Skip to content

Commit db267be

Browse files
fhahngithub-actions[bot]
authored andcommitted
Automerge: [AArch64] Runtime-unroll small multi-exit loops on Apple Silicon. (#124751)
Extend unrolling preferences to allow more aggressive unrolling of search loops with 2 exits, building on the TTI hook added in llvm/llvm-project@ad9da92. In combination with llvm/llvm-project@eac23a5 this enables unrolling loops like std::find, which can improve performance significantly (+15% end-to-end on a workload that makes heavy use of std::find). It increase the total number of unrolled loops by ~2.5% across a very large corpus of workloads. For SPEC2017, +1.6% more loops are unrolled and the following workloads increase in size (`__text`): workload base patch 500.perlbench_r 1682884.00 1694104.00 0.7% 523.xalancbmk_r 3001716.00 3003832.00 0.1% PR: llvm/llvm-project#124751
2 parents 62e465d + 46a13a5 commit db267be

File tree

2 files changed

+164
-23
lines changed

2 files changed

+164
-23
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4167,15 +4167,14 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
41674167
TargetTransformInfo::UnrollingPreferences &UP,
41684168
AArch64TTIImpl &TTI) {
41694169
// Limit loops with structure that is highly likely to benefit from runtime
4170-
// unrolling; that is we exclude outer loops, loops with multiple exits and
4171-
// many blocks (i.e. likely with complex control flow). Note that the
4172-
// heuristics here may be overly conservative and we err on the side of
4173-
// avoiding runtime unrolling rather than unroll excessively. They are all
4174-
// subject to further refinement.
4175-
if (!L->isInnermost() || !L->getExitBlock() || L->getNumBlocks() > 8)
4170+
// unrolling; that is we exclude outer loops and loops with many blocks (i.e.
4171+
// likely with complex control flow). Note that the heuristics here may be
4172+
// overly conservative and we err on the side of avoiding runtime unrolling
4173+
// rather than unroll excessively. They are all subject to further refinement.
4174+
if (!L->isInnermost() || L->getNumBlocks() > 8)
41764175
return;
41774176

4178-
const SCEV *BTC = SE.getBackedgeTakenCount(L);
4177+
const SCEV *BTC = SE.getSymbolicMaxBackedgeTakenCount(L);
41794178
if (isa<SCEVConstant>(BTC) || isa<SCEVCouldNotCompute>(BTC) ||
41804179
(SE.getSmallConstantMaxTripCount(L) > 0 &&
41814180
SE.getSmallConstantMaxTripCount(L) <= 32))
@@ -4194,6 +4193,28 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
41944193
}
41954194
}
41964195

4196+
// Small search loops with multiple exits can be highly beneficial to unroll.
4197+
if (!L->getExitBlock()) {
4198+
if (L->getNumBlocks() == 2 && Size < 6 &&
4199+
all_of(
4200+
L->getBlocks(),
4201+
[](BasicBlock *BB) {
4202+
return isa<BranchInst>(BB->getTerminator());
4203+
})) {
4204+
UP.RuntimeUnrollMultiExit = true;
4205+
UP.Runtime = true;
4206+
// Limit unroll count.
4207+
UP.DefaultUnrollRuntimeCount = 4;
4208+
// Allow slightly more costly trip-count expansion to catch search loops
4209+
// with pointer inductions.
4210+
UP.SCEVExpansionBudget = 5;
4211+
}
4212+
return;
4213+
}
4214+
4215+
if (SE.getSymbolicMaxBackedgeTakenCount(L) != SE.getBackedgeTakenCount(L))
4216+
return;
4217+
41974218
// Limit to loops with trip counts that are cheap to expand.
41984219
UP.SCEVExpansionBudget = 1;
41994220

llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll

Lines changed: 136 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,78 @@ define i1 @multi_2_exit_find_i8_loop(ptr %vec, i8 %tgt) {
1313
; APPLE-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0:[0-9]+]] {
1414
; APPLE-NEXT: [[ENTRY:.*]]:
1515
; APPLE-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
16+
; APPLE-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
1617
; APPLE-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
1718
; APPLE-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
18-
; APPLE-NEXT: br label %[[LOOP_HEADER:.*]]
19-
; APPLE: [[LOOP_HEADER]]:
20-
; APPLE-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
19+
; APPLE-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
20+
; APPLE-NEXT: [[TMP0:%.*]] = sub i64 [[END1]], [[START2]]
21+
; APPLE-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]]
22+
; APPLE-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], -1
23+
; APPLE-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP1]], 3
24+
; APPLE-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
25+
; APPLE-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_PROL_PREHEADER:.*]], label %[[LOOP_HEADER_PROL_LOOPEXIT:.*]]
26+
; APPLE: [[LOOP_HEADER_PROL_PREHEADER]]:
27+
; APPLE-NEXT: br label %[[LOOP_HEADER_PROL:.*]]
28+
; APPLE: [[LOOP_HEADER_PROL]]:
29+
; APPLE-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH_PROL:.*]] ], [ [[START]], %[[LOOP_HEADER_PROL_PREHEADER]] ]
30+
; APPLE-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_HEADER_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], %[[LOOP_LATCH_PROL]] ]
2131
; APPLE-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
2232
; APPLE-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
23-
; APPLE-NEXT: br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
24-
; APPLE: [[LOOP_LATCH]]:
33+
; APPLE-NEXT: br i1 [[C_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT3:.*]], label %[[LOOP_LATCH_PROL]]
34+
; APPLE: [[LOOP_LATCH_PROL]]:
2535
; APPLE-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
2636
; APPLE-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
27-
; APPLE-NEXT: br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
28-
; APPLE: [[EXIT]]:
29-
; APPLE-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
37+
; APPLE-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
38+
; APPLE-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
39+
; APPLE-NEXT: br i1 [[PROL_ITER_CMP]], label %[[LOOP_HEADER_PROL]], label %[[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP0:![0-9]+]]
40+
; APPLE: [[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA]]:
41+
; APPLE-NEXT: [[RES_UNR_PH:%.*]] = phi ptr [ [[END]], %[[LOOP_LATCH_PROL]] ]
42+
; APPLE-NEXT: [[PTR_IV_UNR_PH:%.*]] = phi ptr [ [[PTR_IV_NEXT]], %[[LOOP_LATCH_PROL]] ]
43+
; APPLE-NEXT: br label %[[LOOP_HEADER_PROL_LOOPEXIT]]
44+
; APPLE: [[LOOP_HEADER_PROL_LOOPEXIT]]:
45+
; APPLE-NEXT: [[RES_UNR:%.*]] = phi ptr [ poison, %[[ENTRY]] ], [ [[RES_UNR_PH]], %[[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ]
46+
; APPLE-NEXT: [[PTR_IV_UNR:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_UNR_PH]], %[[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ]
47+
; APPLE-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 3
48+
; APPLE-NEXT: br i1 [[TMP3]], label %[[EXIT:.*]], label %[[ENTRY_NEW:.*]]
49+
; APPLE: [[ENTRY_NEW]]:
50+
; APPLE-NEXT: br label %[[LOOP_HEADER:.*]]
51+
; APPLE: [[LOOP_HEADER]]:
52+
; APPLE-NEXT: [[PTR_IV1:%.*]] = phi ptr [ [[PTR_IV_UNR]], %[[ENTRY_NEW]] ], [ [[RES:%.*]], %[[LOOP_LATCH_3:.*]] ]
53+
; APPLE-NEXT: [[L1:%.*]] = load i8, ptr [[PTR_IV1]], align 8
54+
; APPLE-NEXT: [[C_4:%.*]] = icmp eq i8 [[L1]], [[TGT]]
55+
; APPLE-NEXT: br i1 [[C_4]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP_LATCH:.*]]
56+
; APPLE: [[LOOP_LATCH]]:
57+
; APPLE-NEXT: [[PTR_IV_NEXT1:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV1]], i64 1
58+
; APPLE-NEXT: [[L_1:%.*]] = load i8, ptr [[PTR_IV_NEXT1]], align 8
59+
; APPLE-NEXT: [[C_1_1:%.*]] = icmp eq i8 [[L_1]], [[TGT]]
60+
; APPLE-NEXT: br i1 [[C_1_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_1:.*]]
61+
; APPLE: [[LOOP_LATCH_1]]:
62+
; APPLE-NEXT: [[PTR_IV_NEXT_1:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT1]], i64 1
63+
; APPLE-NEXT: [[L_2:%.*]] = load i8, ptr [[PTR_IV_NEXT_1]], align 8
64+
; APPLE-NEXT: [[C_1_2:%.*]] = icmp eq i8 [[L_2]], [[TGT]]
65+
; APPLE-NEXT: br i1 [[C_1_2]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_2:.*]]
66+
; APPLE: [[LOOP_LATCH_2]]:
67+
; APPLE-NEXT: [[PTR_IV_NEXT_2:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT_1]], i64 1
68+
; APPLE-NEXT: [[L_3:%.*]] = load i8, ptr [[PTR_IV_NEXT_2]], align 8
69+
; APPLE-NEXT: [[C_1_3:%.*]] = icmp eq i8 [[L_3]], [[TGT]]
70+
; APPLE-NEXT: br i1 [[C_1_3]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_3]]
71+
; APPLE: [[LOOP_LATCH_3]]:
72+
; APPLE-NEXT: [[RES]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT_2]], i64 1
3073
; APPLE-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
31-
; APPLE-NEXT: ret i1 [[C_3]]
74+
; APPLE-NEXT: br i1 [[C_3]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_HEADER]]
75+
; APPLE: [[EXIT_UNR_LCSSA_LOOPEXIT]]:
76+
; APPLE-NEXT: [[RES_PH_PH:%.*]] = phi ptr [ [[PTR_IV1]], %[[LOOP_HEADER]] ], [ [[PTR_IV_NEXT1]], %[[LOOP_LATCH]] ], [ [[PTR_IV_NEXT_1]], %[[LOOP_LATCH_1]] ], [ [[PTR_IV_NEXT_2]], %[[LOOP_LATCH_2]] ], [ [[END]], %[[LOOP_LATCH_3]] ]
77+
; APPLE-NEXT: br label %[[EXIT_UNR_LCSSA:.*]]
78+
; APPLE: [[EXIT_UNR_LCSSA_LOOPEXIT3]]:
79+
; APPLE-NEXT: [[RES_PH_PH4:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER_PROL]] ]
80+
; APPLE-NEXT: br label %[[EXIT_UNR_LCSSA]]
81+
; APPLE: [[EXIT_UNR_LCSSA]]:
82+
; APPLE-NEXT: [[RES_PH:%.*]] = phi ptr [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ], [ [[RES_PH_PH4]], %[[EXIT_UNR_LCSSA_LOOPEXIT3]] ]
83+
; APPLE-NEXT: br label %[[EXIT]]
84+
; APPLE: [[EXIT]]:
85+
; APPLE-NEXT: [[RES1:%.*]] = phi ptr [ [[RES_UNR]], %[[LOOP_HEADER_PROL_LOOPEXIT]] ], [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ]
86+
; APPLE-NEXT: [[C_5:%.*]] = icmp eq ptr [[RES1]], [[END]]
87+
; APPLE-NEXT: ret i1 [[C_5]]
3288
;
3389
; OTHER-LABEL: define i1 @multi_2_exit_find_i8_loop(
3490
; OTHER-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0:[0-9]+]] {
@@ -80,22 +136,81 @@ define i1 @multi_2_exit_find_ptr_loop(ptr %vec, ptr %tgt) {
80136
; APPLE-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]]) #[[ATTR0]] {
81137
; APPLE-NEXT: [[ENTRY:.*]]:
82138
; APPLE-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
139+
; APPLE-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
83140
; APPLE-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
84141
; APPLE-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
85142
; APPLE-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
143+
; APPLE-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
86144
; APPLE-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
87-
; APPLE-NEXT: br label %[[LOOP_HEADER:.*]]
88-
; APPLE: [[LOOP_HEADER]]:
89-
; APPLE-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
145+
; APPLE-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
146+
; APPLE-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
147+
; APPLE-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
148+
; APPLE-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
149+
; APPLE-NEXT: [[TMP4:%.*]] = freeze i64 [[TMP3]]
150+
; APPLE-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], -1
151+
; APPLE-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP4]], 3
152+
; APPLE-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
153+
; APPLE-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_PROL_PREHEADER:.*]], label %[[LOOP_HEADER_PROL_LOOPEXIT:.*]]
154+
; APPLE: [[LOOP_HEADER_PROL_PREHEADER]]:
155+
; APPLE-NEXT: br label %[[LOOP_HEADER_PROL:.*]]
156+
; APPLE: [[LOOP_HEADER_PROL]]:
157+
; APPLE-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH_PROL:.*]] ], [ [[START]], %[[LOOP_HEADER_PROL_PREHEADER]] ]
158+
; APPLE-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_HEADER_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], %[[LOOP_LATCH_PROL]] ]
90159
; APPLE-NEXT: [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
91160
; APPLE-NEXT: [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
92-
; APPLE-NEXT: br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
93-
; APPLE: [[LOOP_LATCH]]:
161+
; APPLE-NEXT: br i1 [[C_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT3:.*]], label %[[LOOP_LATCH_PROL]]
162+
; APPLE: [[LOOP_LATCH_PROL]]:
94163
; APPLE-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
95164
; APPLE-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
96-
; APPLE-NEXT: br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
165+
; APPLE-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
166+
; APPLE-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
167+
; APPLE-NEXT: br i1 [[PROL_ITER_CMP]], label %[[LOOP_HEADER_PROL]], label %[[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP2:![0-9]+]]
168+
; APPLE: [[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA]]:
169+
; APPLE-NEXT: [[RES_UNR_PH:%.*]] = phi ptr [ [[END]], %[[LOOP_LATCH_PROL]] ]
170+
; APPLE-NEXT: [[PTR_IV_UNR_PH:%.*]] = phi ptr [ [[PTR_IV_NEXT]], %[[LOOP_LATCH_PROL]] ]
171+
; APPLE-NEXT: br label %[[LOOP_HEADER_PROL_LOOPEXIT]]
172+
; APPLE: [[LOOP_HEADER_PROL_LOOPEXIT]]:
173+
; APPLE-NEXT: [[RES_UNR:%.*]] = phi ptr [ poison, %[[ENTRY]] ], [ [[RES_UNR_PH]], %[[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ]
174+
; APPLE-NEXT: [[PTR_IV_UNR:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_UNR_PH]], %[[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ]
175+
; APPLE-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP5]], 3
176+
; APPLE-NEXT: br i1 [[TMP6]], label %[[EXIT:.*]], label %[[ENTRY_NEW:.*]]
177+
; APPLE: [[ENTRY_NEW]]:
178+
; APPLE-NEXT: br label %[[LOOP_HEADER:.*]]
179+
; APPLE: [[LOOP_HEADER]]:
180+
; APPLE-NEXT: [[PTR_IV1:%.*]] = phi ptr [ [[PTR_IV_UNR]], %[[ENTRY_NEW]] ], [ [[PTR_IV_NEXT_3:%.*]], %[[LOOP_LATCH_3:.*]] ]
181+
; APPLE-NEXT: [[L1:%.*]] = load ptr, ptr [[PTR_IV1]], align 8
182+
; APPLE-NEXT: [[C_4:%.*]] = icmp eq ptr [[L1]], [[TGT]]
183+
; APPLE-NEXT: br i1 [[C_4]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP_LATCH:.*]]
184+
; APPLE: [[LOOP_LATCH]]:
185+
; APPLE-NEXT: [[PTR_IV_NEXT1:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV1]], i64 8
186+
; APPLE-NEXT: [[L_1:%.*]] = load ptr, ptr [[PTR_IV_NEXT1]], align 8
187+
; APPLE-NEXT: [[C_1_1:%.*]] = icmp eq ptr [[L_1]], [[TGT]]
188+
; APPLE-NEXT: br i1 [[C_1_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_1:.*]]
189+
; APPLE: [[LOOP_LATCH_1]]:
190+
; APPLE-NEXT: [[PTR_IV_NEXT_1:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT1]], i64 8
191+
; APPLE-NEXT: [[L_2:%.*]] = load ptr, ptr [[PTR_IV_NEXT_1]], align 8
192+
; APPLE-NEXT: [[C_1_2:%.*]] = icmp eq ptr [[L_2]], [[TGT]]
193+
; APPLE-NEXT: br i1 [[C_1_2]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_2:.*]]
194+
; APPLE: [[LOOP_LATCH_2]]:
195+
; APPLE-NEXT: [[PTR_IV_NEXT_2:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT_1]], i64 8
196+
; APPLE-NEXT: [[L_3:%.*]] = load ptr, ptr [[PTR_IV_NEXT_2]], align 8
197+
; APPLE-NEXT: [[C_1_3:%.*]] = icmp eq ptr [[L_3]], [[TGT]]
198+
; APPLE-NEXT: br i1 [[C_1_3]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_3]]
199+
; APPLE: [[LOOP_LATCH_3]]:
200+
; APPLE-NEXT: [[PTR_IV_NEXT_3]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT_2]], i64 8
201+
; APPLE-NEXT: [[C_2_3:%.*]] = icmp eq ptr [[PTR_IV_NEXT_3]], [[END]]
202+
; APPLE-NEXT: br i1 [[C_2_3]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_HEADER]]
203+
; APPLE: [[EXIT_UNR_LCSSA_LOOPEXIT]]:
204+
; APPLE-NEXT: [[RES_PH_PH:%.*]] = phi ptr [ [[PTR_IV1]], %[[LOOP_HEADER]] ], [ [[PTR_IV_NEXT1]], %[[LOOP_LATCH]] ], [ [[PTR_IV_NEXT_1]], %[[LOOP_LATCH_1]] ], [ [[PTR_IV_NEXT_2]], %[[LOOP_LATCH_2]] ], [ [[END]], %[[LOOP_LATCH_3]] ]
205+
; APPLE-NEXT: br label %[[EXIT_UNR_LCSSA:.*]]
206+
; APPLE: [[EXIT_UNR_LCSSA_LOOPEXIT3]]:
207+
; APPLE-NEXT: [[RES_PH_PH4:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER_PROL]] ]
208+
; APPLE-NEXT: br label %[[EXIT_UNR_LCSSA]]
209+
; APPLE: [[EXIT_UNR_LCSSA]]:
210+
; APPLE-NEXT: [[RES_PH:%.*]] = phi ptr [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ], [ [[RES_PH_PH4]], %[[EXIT_UNR_LCSSA_LOOPEXIT3]] ]
211+
; APPLE-NEXT: br label %[[EXIT]]
97212
; APPLE: [[EXIT]]:
98-
; APPLE-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
213+
; APPLE-NEXT: [[RES:%.*]] = phi ptr [ [[RES_UNR]], %[[LOOP_HEADER_PROL_LOOPEXIT]] ], [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ]
99214
; APPLE-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
100215
; APPLE-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
101216
; APPLE-NEXT: ret i1 [[C_3]]
@@ -393,3 +508,8 @@ exit.2:
393508
}
394509

395510
declare void @llvm.assume(i1 noundef)
511+
;.
512+
; APPLE: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
513+
; APPLE: [[META1]] = !{!"llvm.loop.unroll.disable"}
514+
; APPLE: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
515+
;.

0 commit comments

Comments
 (0)