Skip to content

Commit 1344b65

Browse files
[SCEV] Fix incorrect NUW inference (#70521)
This patch fixes a miscompile in LSR caused by incorrect inference of NUW flag for AddRec: we shouldn't infer no-wrap flags based on a comparison which doesn't fully control the loop exit.
1 parent f706411 commit 1344b65

File tree

2 files changed

+18
-18
lines changed

2 files changed

+18
-18
lines changed

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12640,6 +12640,11 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
1264012640
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ZExt->getOperand());
1264112641
if (AR && AR->getLoop() == L && AR->isAffine()) {
1264212642
auto canProveNUW = [&]() {
12643+
// We can use the comparison to infer no-wrap flags only if it fully
12644+
// controls the loop exit.
12645+
if (!ControlsOnlyExit)
12646+
return false;
12647+
1264312648
if (!isLoopInvariant(RHS, L))
1264412649
return false;
1264512650

llvm/test/Transforms/LoopStrengthReduce/scev-incorrect-nuw-inference.ll

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,38 +5,33 @@
55
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
66
target triple = "x86_64-unknown-linux-gnu"
77

8-
; FIXME: the returned value should be equal to
9-
; zext (trunk (%phi-1) to i16) to i64
10-
; or simply
8+
; The returned value should be equal to
119
; zext (%phi-1) to i64
12-
; which means it should be equal to 1209. Currently, due to a bug in SCEV, it's
13-
; over 65534.
10+
; or simply 1209.
1411
define noundef i64 @test() {
1512
; CHECK-LABEL: define noundef i64 @test() {
1613
; CHECK-NEXT: bb2:
1714
; CHECK-NEXT: br label [[BB3:%.*]]
1815
; CHECK: bb3:
19-
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[BB10:%.*]] ], [ 0, [[BB2:%.*]] ]
20-
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LSR_IV]], 65535
21-
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[LSR_IV]], 65407
22-
; CHECK-NEXT: [[ICMP5:%.*]] = icmp ult i64 [[TMP1]], -256
23-
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32
24-
; CHECK-NEXT: [[ICMP6:%.*]] = icmp ult i32 [[TMP2]], 128
16+
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[BB10:%.*]] ], [ -1, [[BB2:%.*]] ]
17+
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LSR_IV]], 65536
18+
; CHECK-NEXT: [[AND:%.*]] = and i32 [[TMP0]], 65535
19+
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[AND]] to i64
20+
; CHECK-NEXT: [[ADD4:%.*]] = add nsw i64 [[ZEXT]], -128
21+
; CHECK-NEXT: [[ICMP5:%.*]] = icmp ult i64 [[ADD4]], -256
22+
; CHECK-NEXT: [[ICMP6:%.*]] = icmp ult i32 [[AND]], 128
2523
; CHECK-NEXT: [[OR:%.*]] = or i1 [[ICMP5]], [[ICMP6]]
2624
; CHECK-NEXT: br i1 [[OR]], label [[BB10]], label [[BB7:%.*]]
2725
; CHECK: bb7:
28-
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[LSR_IV]] to i32
26+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[LSR_IV]], 1
2927
; CHECK-NEXT: call void @foo(i32 [[TMP1]])
3028
; CHECK-NEXT: unreachable
3129
; CHECK: bb10:
32-
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 1
33-
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[LSR_IV_NEXT]], -1
34-
; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[TMP2]] to i32
35-
; CHECK-NEXT: [[ICMP12:%.*]] = icmp ult i32 [[TMP]], 1210
30+
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i32 [[LSR_IV]], 1
31+
; CHECK-NEXT: [[ICMP12:%.*]] = icmp ult i32 [[LSR_IV_NEXT]], 1210
3632
; CHECK-NEXT: br i1 [[ICMP12]], label [[BB3]], label [[BB13:%.*]]
3733
; CHECK: bb13:
38-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[LSR_IV_NEXT]], 65534
39-
; CHECK-NEXT: ret i64 [[TMP3]]
34+
; CHECK-NEXT: ret i64 [[ZEXT]]
4035
;
4136
bb2:
4237
br label %bb3

0 commit comments

Comments
 (0)