Skip to content

Commit a616f57

Browse files
authored
[CorrelatedValuePropagation] Fold calls to UCMP/SCMP when we know that ranges of operands do not overlap (#97235)
This patch adds folds for calls to `ucmp`/`scmp` intrinsics where we can establish that the range of the first operand is strictly to the left or strictly to the right of the range of the second operand.
1 parent c49c386 commit a616f57

File tree

2 files changed

+293
-0
lines changed

2 files changed

+293
-0
lines changed

llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ STATISTIC(NumOverflows, "Number of overflow checks removed");
8585
STATISTIC(NumSaturating,
8686
"Number of saturating arithmetics converted to normal arithmetics");
8787
STATISTIC(NumNonNull, "Number of function pointer arguments marked non-null");
88+
STATISTIC(NumCmpIntr, "Number of llvm.[us]cmp intrinsics removed");
8889
STATISTIC(NumMinMax, "Number of llvm.[us]{min,max} intrinsics removed");
8990
STATISTIC(NumSMinMax,
9091
"Number of llvm.s{min,max} intrinsics simplified to unsigned");
@@ -548,6 +549,35 @@ static bool processAbsIntrinsic(IntrinsicInst *II, LazyValueInfo *LVI) {
548549
return false;
549550
}
550551

552+
static bool processCmpIntrinsic(IntrinsicInst *II, LazyValueInfo *LVI) {
553+
bool IsSigned = II->getIntrinsicID() == Intrinsic::scmp;
554+
ConstantRange LHS_CR = LVI->getConstantRangeAtUse(II->getOperandUse(0),
555+
/*UndefAllowed*/ false);
556+
ConstantRange RHS_CR = LVI->getConstantRangeAtUse(II->getOperandUse(1),
557+
/*UndefAllowed*/ false);
558+
559+
if (LHS_CR.icmp(IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, RHS_CR)) {
560+
++NumCmpIntr;
561+
II->replaceAllUsesWith(ConstantInt::get(II->getType(), 1));
562+
II->eraseFromParent();
563+
return true;
564+
}
565+
if (LHS_CR.icmp(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, RHS_CR)) {
566+
++NumCmpIntr;
567+
II->replaceAllUsesWith(ConstantInt::getSigned(II->getType(), -1));
568+
II->eraseFromParent();
569+
return true;
570+
}
571+
if (LHS_CR.icmp(ICmpInst::ICMP_EQ, RHS_CR)) {
572+
++NumCmpIntr;
573+
II->replaceAllUsesWith(ConstantInt::get(II->getType(), 0));
574+
II->eraseFromParent();
575+
return true;
576+
}
577+
578+
return false;
579+
}
580+
551581
// See if this min/max intrinsic always picks it's one specific operand.
552582
// If not, check whether we can canonicalize signed minmax into unsigned version
553583
static bool processMinMaxIntrinsic(MinMaxIntrinsic *MM, LazyValueInfo *LVI) {
@@ -639,6 +669,11 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) {
639669
return processAbsIntrinsic(&cast<IntrinsicInst>(CB), LVI);
640670
}
641671

672+
if (CB.getIntrinsicID() == Intrinsic::scmp ||
673+
CB.getIntrinsicID() == Intrinsic::ucmp) {
674+
return processCmpIntrinsic(&cast<IntrinsicInst>(CB), LVI);
675+
}
676+
642677
if (auto *MM = dyn_cast<MinMaxIntrinsic>(&CB)) {
643678
return processMinMaxIntrinsic(MM, LVI);
644679
}
Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -passes=correlated-propagation -S | FileCheck %s
3+
4+
; If nothing is known we can't change anything
5+
define i8 @ucmp_0(i32 %x, i32 %y) {
6+
; CHECK-LABEL: @ucmp_0(
7+
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
8+
; CHECK-NEXT: ret i8 [[TMP1]]
9+
;
10+
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
11+
ret i8 %1
12+
}
13+
14+
define i8 @scmp_0(i32 %x, i32 %y) {
15+
; CHECK-LABEL: @scmp_0(
16+
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
17+
; CHECK-NEXT: ret i8 [[TMP1]]
18+
;
19+
%1 = call i8 @llvm.scmp(i32 %x, i32 %y)
20+
ret i8 %1
21+
}
22+
23+
; If we know that range of LHS < range of RHS then return -1
24+
define i8 @ucmp_1(i32 %x, i32 %y) {
25+
; X is within [4, 8)
26+
; CHECK-LABEL: @ucmp_1(
27+
; CHECK-NEXT: [[COND1:%.*]] = icmp uge i32 [[X:%.*]], 4
28+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND1]])
29+
; CHECK-NEXT: [[COND2:%.*]] = icmp ult i32 [[X]], 8
30+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND2]])
31+
; CHECK-NEXT: [[COND3:%.*]] = icmp uge i32 [[Y:%.*]], 8
32+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND3]])
33+
; CHECK-NEXT: ret i8 -1
34+
;
35+
%cond1 = icmp uge i32 %x, 4
36+
call void @llvm.assume(i1 %cond1)
37+
%cond2 = icmp ult i32 %x, 8
38+
call void @llvm.assume(i1 %cond2)
39+
; Y is within [8, UNSIGNED_MAX)
40+
%cond3 = icmp uge i32 %y, 8
41+
call void @llvm.assume(i1 %cond3)
42+
43+
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
44+
ret i8 %1
45+
}
46+
47+
define i8 @scmp_1(i32 %x, i32 %y) {
48+
; X is within [-5, 3)
49+
; CHECK-LABEL: @scmp_1(
50+
; CHECK-NEXT: [[COND1:%.*]] = icmp sge i32 [[X:%.*]], -5
51+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND1]])
52+
; CHECK-NEXT: [[COND2:%.*]] = icmp slt i32 [[X]], 3
53+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND2]])
54+
; CHECK-NEXT: [[COND3:%.*]] = icmp sge i32 [[Y:%.*]], 3
55+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND3]])
56+
; CHECK-NEXT: ret i8 -1
57+
;
58+
%cond1 = icmp sge i32 %x, -5
59+
call void @llvm.assume(i1 %cond1)
60+
%cond2 = icmp slt i32 %x, 3
61+
call void @llvm.assume(i1 %cond2)
62+
; Y is within [3, SIGNED_MAX)
63+
%cond3 = icmp sge i32 %y, 3
64+
call void @llvm.assume(i1 %cond3)
65+
66+
%1 = call i8 @llvm.scmp(i32 %x, i32 %y)
67+
ret i8 %1
68+
}
69+
70+
; If we know that range of LHS > range of RHS then return 1
71+
define i8 @ucmp_2(i32 %x, i32 %y) {
72+
; X is within [4, UNSIGNED_MAX)
73+
; CHECK-LABEL: @ucmp_2(
74+
; CHECK-NEXT: [[COND1:%.*]] = icmp uge i32 [[X:%.*]], 4
75+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND1]])
76+
; CHECK-NEXT: [[COND2:%.*]] = icmp ult i32 [[Y:%.*]], 4
77+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND2]])
78+
; CHECK-NEXT: ret i8 1
79+
;
80+
%cond1 = icmp uge i32 %x, 4
81+
call void @llvm.assume(i1 %cond1)
82+
; Y is within [0, 4)
83+
%cond2 = icmp ult i32 %y, 4
84+
call void @llvm.assume(i1 %cond2)
85+
86+
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
87+
ret i8 %1
88+
}
89+
90+
define i8 @scmp_2(i32 %x, i32 %y) {
91+
; X is within [4, SIGNED_MAX)
92+
; CHECK-LABEL: @scmp_2(
93+
; CHECK-NEXT: [[COND1:%.*]] = icmp sge i32 [[X:%.*]], 4
94+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND1]])
95+
; CHECK-NEXT: [[COND2:%.*]] = icmp slt i32 [[Y:%.*]], 4
96+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND2]])
97+
; CHECK-NEXT: ret i8 1
98+
;
99+
%cond1 = icmp sge i32 %x, 4
100+
call void @llvm.assume(i1 %cond1)
101+
; Y is within [SIGNED_MIN, 4)
102+
%cond2 = icmp slt i32 %y, 4
103+
call void @llvm.assume(i1 %cond2)
104+
105+
%1 = call i8 @llvm.scmp(i32 %x, i32 %y)
106+
ret i8 %1
107+
}
108+
109+
; If we know that LHS and RHS are both constants then return 0
110+
define i8 @ucmp_5(i32 %x, i32 %y) {
111+
; CHECK-LABEL: @ucmp_5(
112+
; CHECK-NEXT: [[COND1:%.*]] = icmp eq i32 [[X:%.*]], 4
113+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND1]])
114+
; CHECK-NEXT: [[COND2:%.*]] = icmp eq i32 [[Y:%.*]], 4
115+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND2]])
116+
; CHECK-NEXT: ret i8 0
117+
;
118+
%cond1 = icmp eq i32 %x, 4
119+
call void @llvm.assume(i1 %cond1)
120+
%cond2 = icmp eq i32 %y, 4
121+
call void @llvm.assume(i1 %cond2)
122+
123+
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
124+
ret i8 %1
125+
}
126+
127+
define i8 @scmp_5(i32 %x, i32 %y) {
128+
; CHECK-LABEL: @scmp_5(
129+
; CHECK-NEXT: [[COND1:%.*]] = icmp eq i32 [[X:%.*]], -5
130+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND1]])
131+
; CHECK-NEXT: [[COND2:%.*]] = icmp eq i32 [[Y:%.*]], -5
132+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND2]])
133+
; CHECK-NEXT: ret i8 0
134+
;
135+
%cond1 = icmp eq i32 %x, -5
136+
call void @llvm.assume(i1 %cond1)
137+
%cond2 = icmp eq i32 %y, -5
138+
call void @llvm.assume(i1 %cond2)
139+
140+
%1 = call i8 @llvm.scmp(i32 %x, i32 %y)
141+
ret i8 %1
142+
}
143+
144+
; We can infer ranges based on the location where a UCMP/SCMP result is used
145+
define i8 @scmp_6(i32 noundef %x) {
146+
; CHECK-LABEL: @scmp_6(
147+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 10
148+
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 -1, i8 5
149+
; CHECK-NEXT: ret i8 [[TMP2]]
150+
;
151+
%1 = icmp slt i32 %x, 10
152+
%2 = call i8 @llvm.scmp(i32 %x, i32 10)
153+
%3 = select i1 %1, i8 %2, i8 5
154+
ret i8 %3
155+
}
156+
157+
; Negative test: ranges overlap
158+
define i8 @ucmp_3(i32 %x, i32 %y) {
159+
; X is within [4, UNSIGNED_MAX)
160+
; CHECK-LABEL: @ucmp_3(
161+
; CHECK-NEXT: [[COND1:%.*]] = icmp uge i32 [[X:%.*]], 4
162+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND1]])
163+
; CHECK-NEXT: [[COND2:%.*]] = icmp ult i32 [[Y:%.*]], 6
164+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND2]])
165+
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[Y]])
166+
; CHECK-NEXT: ret i8 [[TMP1]]
167+
;
168+
%cond1 = icmp uge i32 %x, 4
169+
call void @llvm.assume(i1 %cond1)
170+
; Y is within [0, 6)
171+
%cond2 = icmp ult i32 %y, 6
172+
call void @llvm.assume(i1 %cond2)
173+
174+
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
175+
ret i8 %1
176+
}
177+
178+
define i8 @scmp_3(i32 %x, i32 %y) {
179+
; X is within [2, SIGNED_MAX)
180+
; CHECK-LABEL: @scmp_3(
181+
; CHECK-NEXT: [[COND1:%.*]] = icmp sge i32 [[X:%.*]], 2
182+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND1]])
183+
; CHECK-NEXT: [[COND2:%.*]] = icmp slt i32 [[Y:%.*]], 4
184+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND2]])
185+
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 [[Y]])
186+
; CHECK-NEXT: ret i8 [[TMP1]]
187+
;
188+
%cond1 = icmp sge i32 %x, 2
189+
call void @llvm.assume(i1 %cond1)
190+
; Y is within [SIGNED_MIN, 4)
191+
%cond2 = icmp slt i32 %y, 4
192+
call void @llvm.assume(i1 %cond2)
193+
194+
%1 = call i8 @llvm.scmp(i32 %x, i32 %y)
195+
ret i8 %1
196+
}
197+
198+
; Negative test: mismatched signedness of range-establishing comparisons and
199+
; of the intrinsic
200+
define i8 @ucmp_4(i32 %x, i32 %y) {
201+
; X is within [4, SIGNED_MAX)
202+
; CHECK-LABEL: @ucmp_4(
203+
; CHECK-NEXT: [[COND1:%.*]] = icmp sge i32 [[X:%.*]], 4
204+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND1]])
205+
; CHECK-NEXT: [[COND2:%.*]] = icmp slt i32 [[Y:%.*]], 4
206+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND2]])
207+
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[Y]])
208+
; CHECK-NEXT: ret i8 [[TMP1]]
209+
;
210+
%cond1 = icmp sge i32 %x, 4
211+
call void @llvm.assume(i1 %cond1)
212+
; Y is within [0, 4)
213+
%cond2 = icmp slt i32 %y, 4
214+
call void @llvm.assume(i1 %cond2)
215+
216+
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
217+
ret i8 %1
218+
}
219+
220+
define i8 @scmp_4(i32 %x, i32 %y) {
221+
; X is within [4, UNSIGNED_MAX)
222+
; CHECK-LABEL: @scmp_4(
223+
; CHECK-NEXT: [[COND1:%.*]] = icmp uge i32 [[X:%.*]], 4
224+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND1]])
225+
; CHECK-NEXT: [[COND2:%.*]] = icmp ult i32 [[Y:%.*]], 4
226+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND2]])
227+
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 [[Y]])
228+
; CHECK-NEXT: ret i8 [[TMP1]]
229+
;
230+
%cond1 = icmp uge i32 %x, 4
231+
call void @llvm.assume(i1 %cond1)
232+
; Y is within [0, 4)
233+
%cond2 = icmp ult i32 %y, 4
234+
call void @llvm.assume(i1 %cond2)
235+
236+
%1 = call i8 @llvm.scmp(i32 %x, i32 %y)
237+
ret i8 %1
238+
}
239+
240+
; Negative test: ranges are the same, but we can't be sure the values are equal
241+
define i8 @ucmp_6(i32 %x, i32 %y) {
242+
; Both X and Y are within [0, 10]
243+
; CHECK-LABEL: @ucmp_6(
244+
; CHECK-NEXT: [[COND1:%.*]] = icmp ule i32 [[X:%.*]], 10
245+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND1]])
246+
; CHECK-NEXT: [[COND2:%.*]] = icmp ule i32 [[Y:%.*]], 10
247+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND2]])
248+
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[Y]])
249+
; CHECK-NEXT: ret i8 [[TMP1]]
250+
;
251+
%cond1 = icmp ule i32 %x, 10
252+
call void @llvm.assume(i1 %cond1)
253+
%cond2 = icmp ule i32 %y, 10
254+
call void @llvm.assume(i1 %cond2)
255+
256+
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
257+
ret i8 %1
258+
}

0 commit comments

Comments
 (0)