Skip to content

Commit 0a75153

Browse files
committed
[LoopUnswitch] Allow i1 truncs in loop unswitch
With the addition of llvm#84628, truncs to i1 are being emitted as conditions to branch instructions. This caused significant regressions in cases which were previously improved by loop unswitch. Adding truncs to i1 restore the previous performance seen.
1 parent a9689c6 commit 0a75153

File tree

3 files changed

+232
-1
lines changed

3 files changed

+232
-1
lines changed

llvm/lib/Transforms/Utils/LoopUtils.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1930,7 +1930,15 @@ llvm::hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold,
19301930
if (!TI || !TI->isConditional())
19311931
return {};
19321932

1933-
auto *CondI = dyn_cast<CmpInst>(TI->getCondition());
1933+
Instruction *CondI = nullptr;
1934+
CondI = dyn_cast<CmpInst>(TI->getCondition());
1935+
1936+
if (!CondI) {
1937+
CondI = dyn_cast<TruncInst>(TI->getCondition());
1938+
if (CondI && CondI->getType() != Type::getInt1Ty(TI->getContext())) {
1939+
return {};
1940+
}
1941+
}
19341942
// The case with the condition outside the loop should already be handled
19351943
// earlier.
19361944
if (!CondI || !L.contains(CondI))

llvm/test/Transforms/SimpleLoopUnswitch/endless-unswitch.ll

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,3 +106,96 @@ for.inc: ; preds = %for.cond5
106106
store i8 0, ptr @b, align 1
107107
br label %for.cond5
108108
}
109+
110+
define void @e() {
111+
; CHECK-LABEL: @e(
112+
; CHECK-NEXT: entry:
113+
; CHECK-NEXT: br label [[FOR_COND:%.*]]
114+
; CHECK: for.cond:
115+
; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[FOR_COND]]
116+
; CHECK: for.end:
117+
; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr null, align 2
118+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i16 [[TMP0]] to i1
119+
; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_END_SPLIT:%.*]], label [[FOR_END_SPLIT_US:%.*]]
120+
; CHECK: for.end.split.us:
121+
; CHECK-NEXT: br label [[G_US:%.*]]
122+
; CHECK: g.us:
123+
; CHECK-NEXT: br label [[G_SPLIT_US6:%.*]]
124+
; CHECK: for.cond1.us1:
125+
; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr null, align 2
126+
; CHECK-NEXT: [[TOBOOL4_NOT_US:%.*]] = trunc i16 [[TMP2]] to i1
127+
; CHECK-NEXT: br i1 [[TOBOOL4_NOT_US]], label [[FOR_COND5_PREHEADER_US4:%.*]], label [[G_LOOPEXIT_US:%.*]]
128+
; CHECK: for.cond5.us2:
129+
; CHECK-NEXT: br i1 false, label [[FOR_COND1_LOOPEXIT_US5:%.*]], label [[FOR_INC_US3:%.*]]
130+
; CHECK: for.inc.us3:
131+
; CHECK-NEXT: store i8 0, ptr @b, align 1
132+
; CHECK-NEXT: br label [[FOR_COND5_US2:%.*]]
133+
; CHECK: for.cond5.preheader.us4:
134+
; CHECK-NEXT: br label [[FOR_COND5_US2]]
135+
; CHECK: for.cond1.loopexit.us5:
136+
; CHECK-NEXT: br label [[FOR_COND1_US1:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
137+
; CHECK: g.loopexit.us:
138+
; CHECK-NEXT: br label [[G_US]]
139+
; CHECK: g.split.us6:
140+
; CHECK-NEXT: br label [[FOR_COND1_US1]]
141+
; CHECK: for.end.split:
142+
; CHECK-NEXT: br label [[G:%.*]]
143+
; CHECK: g.loopexit:
144+
; CHECK-NEXT: br label [[G]], !llvm.loop [[LOOP4:![0-9]+]]
145+
; CHECK: g:
146+
; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr null, align 2
147+
; CHECK-NEXT: [[TMP4:%.*]] = trunc i16 [[TMP3]] to i1
148+
; CHECK-NEXT: br i1 [[TMP4]], label [[G_SPLIT_US:%.*]], label [[G_SPLIT:%.*]]
149+
; CHECK: g.split.us:
150+
; CHECK-NEXT: br label [[FOR_COND1_US:%.*]]
151+
; CHECK: for.cond1.us:
152+
; CHECK-NEXT: br label [[FOR_COND5_PREHEADER_US:%.*]]
153+
; CHECK: for.cond5.us:
154+
; CHECK-NEXT: br i1 false, label [[FOR_COND1_LOOPEXIT_US:%.*]], label [[FOR_INC_US:%.*]]
155+
; CHECK: for.inc.us:
156+
; CHECK-NEXT: store i8 0, ptr @b, align 1
157+
; CHECK-NEXT: br label [[FOR_COND5_US:%.*]]
158+
; CHECK: for.cond5.preheader.us:
159+
; CHECK-NEXT: br label [[FOR_COND5_US]]
160+
; CHECK: for.cond1.loopexit.us:
161+
; CHECK-NEXT: br label [[FOR_COND1_US]]
162+
; CHECK: g.split:
163+
; CHECK-NEXT: br label [[FOR_COND1:%.*]]
164+
; CHECK: for.cond1.loopexit:
165+
; CHECK-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP3]]
166+
; CHECK: for.cond1:
167+
; CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr null, align 2
168+
; CHECK-NEXT: [[TOBOOL4_NOT:%.*]] = trunc i16 [[TMP5]] to i1
169+
; CHECK-NEXT: br i1 [[TOBOOL4_NOT]], label [[FOR_COND5_PREHEADER:%.*]], label [[G_LOOPEXIT:%.*]]
170+
; CHECK: for.cond5.preheader:
171+
; CHECK-NEXT: br label [[FOR_COND5:%.*]]
172+
; CHECK: for.cond5:
173+
; CHECK-NEXT: br i1 false, label [[FOR_COND1_LOOPEXIT:%.*]], label [[FOR_INC:%.*]]
174+
; CHECK: for.inc:
175+
; CHECK-NEXT: store i8 0, ptr @b, align 1
176+
; CHECK-NEXT: br label [[FOR_COND5]]
177+
;
178+
entry:
179+
br label %for.cond
180+
181+
for.cond: ; preds = %for.cond, %entry
182+
br i1 false, label %for.end, label %for.cond
183+
184+
for.end: ; preds = %for.cond
185+
br label %g
186+
187+
g: ; preds = %for.cond1, %for.end
188+
br label %for.cond1
189+
190+
for.cond1: ; preds = %for.cond5, %g
191+
%0 = load i16, ptr null, align 2
192+
%tobool4.not = trunc i16 %0 to i1
193+
br i1 %tobool4.not, label %for.cond5, label %g
194+
195+
for.cond5: ; preds = %for.inc, %for.cond1
196+
br i1 false, label %for.cond1, label %for.inc
197+
198+
for.inc: ; preds = %for.cond5
199+
store i8 0, ptr @b, align 1
200+
br label %for.cond5
201+
}

llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1326,6 +1326,136 @@ exit:
13261326
ret i32 10
13271327
}
13281328

1329+
define i32 @partial_unswitch_true_successor_trunc(ptr %ptr, i32 %N) {
1330+
; CHECK-LABEL: @partial_unswitch_true_successor_trunc(
1331+
; CHECK-NEXT: entry:
1332+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[PTR:%.*]], align 4
1333+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i1
1334+
; CHECK-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]]
1335+
; CHECK: entry.split.us:
1336+
; CHECK-NEXT: br label [[LOOP_HEADER_US:%.*]]
1337+
; CHECK: loop.header.us:
1338+
; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ]
1339+
; CHECK-NEXT: br label [[NOCLOBBER_US:%.*]]
1340+
; CHECK: noclobber.us:
1341+
; CHECK-NEXT: br label [[LOOP_LATCH_US]]
1342+
; CHECK: loop.latch.us:
1343+
; CHECK-NEXT: [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]]
1344+
; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1
1345+
; CHECK-NEXT: br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]]
1346+
; CHECK: exit.split.us:
1347+
; CHECK-NEXT: br label [[EXIT:%.*]]
1348+
; CHECK: entry.split:
1349+
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
1350+
; CHECK: loop.header:
1351+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1352+
; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[PTR]], align 4
1353+
; CHECK-NEXT: [[SC:%.*]] = trunc i32 [[LV]] to i1
1354+
; CHECK-NEXT: br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]]
1355+
; CHECK: noclobber:
1356+
; CHECK-NEXT: br label [[LOOP_LATCH]]
1357+
; CHECK: clobber:
1358+
; CHECK-NEXT: call void @clobber()
1359+
; CHECK-NEXT: br label [[LOOP_LATCH]]
1360+
; CHECK: loop.latch:
1361+
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]]
1362+
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
1363+
; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP12:![0-9]+]]
1364+
; CHECK: exit.split:
1365+
; CHECK-NEXT: br label [[EXIT]]
1366+
; CHECK: exit:
1367+
; CHECK-NEXT: ret i32 10
1368+
;
1369+
entry:
1370+
br label %loop.header
1371+
1372+
loop.header:
1373+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
1374+
%lv = load i32, ptr %ptr
1375+
%sc = trunc i32 %lv to i1
1376+
br i1 %sc, label %noclobber, label %clobber
1377+
1378+
noclobber:
1379+
br label %loop.latch
1380+
1381+
clobber:
1382+
call void @clobber()
1383+
br label %loop.latch
1384+
1385+
loop.latch:
1386+
%c = icmp ult i32 %iv, %N
1387+
%iv.next = add i32 %iv, 1
1388+
br i1 %c, label %loop.header, label %exit
1389+
1390+
exit:
1391+
ret i32 10
1392+
}
1393+
1394+
define i32 @partial_unswitch_false_successor_trunc(ptr %ptr, i32 %N) {
1395+
; CHECK-LABEL: @partial_unswitch_false_successor_trunc(
1396+
; CHECK-NEXT: entry:
1397+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[PTR:%.*]], align 4
1398+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i1
1399+
; CHECK-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_US:%.*]]
1400+
; CHECK: entry.split.us:
1401+
; CHECK-NEXT: br label [[LOOP_HEADER_US:%.*]]
1402+
; CHECK: loop.header.us:
1403+
; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ]
1404+
; CHECK-NEXT: br label [[NOCLOBBER_US:%.*]]
1405+
; CHECK: noclobber.us:
1406+
; CHECK-NEXT: br label [[LOOP_LATCH_US]]
1407+
; CHECK: loop.latch.us:
1408+
; CHECK-NEXT: [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]]
1409+
; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1
1410+
; CHECK-NEXT: br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]]
1411+
; CHECK: exit.split.us:
1412+
; CHECK-NEXT: br label [[EXIT:%.*]]
1413+
; CHECK: entry.split:
1414+
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
1415+
; CHECK: loop.header:
1416+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1417+
; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[PTR]], align 4
1418+
; CHECK-NEXT: [[SC:%.*]] = trunc i32 [[LV]] to i1
1419+
; CHECK-NEXT: br i1 [[SC]], label [[CLOBBER:%.*]], label [[NOCLOBBER:%.*]]
1420+
; CHECK: clobber:
1421+
; CHECK-NEXT: call void @clobber()
1422+
; CHECK-NEXT: br label [[LOOP_LATCH]]
1423+
; CHECK: noclobber:
1424+
; CHECK-NEXT: br label [[LOOP_LATCH]]
1425+
; CHECK: loop.latch:
1426+
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]]
1427+
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
1428+
; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP13:![0-9]+]]
1429+
; CHECK: exit.split:
1430+
; CHECK-NEXT: br label [[EXIT]]
1431+
; CHECK: exit:
1432+
; CHECK-NEXT: ret i32 10
1433+
;
1434+
entry:
1435+
br label %loop.header
1436+
1437+
loop.header:
1438+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
1439+
%lv = load i32, ptr %ptr
1440+
%sc = trunc i32 %lv to i1
1441+
br i1 %sc, label %clobber, label %noclobber
1442+
1443+
clobber:
1444+
call void @clobber()
1445+
br label %loop.latch
1446+
1447+
noclobber:
1448+
br label %loop.latch
1449+
1450+
loop.latch:
1451+
%c = icmp ult i32 %iv, %N
1452+
%iv.next = add i32 %iv, 1
1453+
br i1 %c, label %loop.header, label %exit
1454+
1455+
exit:
1456+
ret i32 10
1457+
}
1458+
13291459
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[UNSWITCH_PARTIAL_DISABLE:![0-9]+]]}
13301460
; CHECK: [[UNSWITCH_PARTIAL_DISABLE]] = !{!"llvm.loop.unswitch.partial.disable"}
13311461
; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[UNSWITCH_PARTIAL_DISABLE]]}

0 commit comments

Comments
 (0)