Skip to content

Commit ef7b44b

Browse files
committed
[VPlan] Try to hoist Previous (and operands), if sinking fails for FORs.
In some cases, Previous (and its operands) can be hoisted. This allows supporting additional cases where sinking of all users of to FOR fails, e.g. due having to sink recipes with side-effects. This fixes a crash where we fail to create a scalar VPlan for a first-order recurrence, but can create a vector VPlan, because the trunc instruction of an IV which generates the previous value of the recurrence has been optimized to a truncated induction recipe, thus hoisting it to the beginning. Fixes #106523.
1 parent 50d55dc commit ef7b44b

File tree

5 files changed

+225
-14
lines changed

5 files changed

+225
-14
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -771,6 +771,72 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
771771
return true;
772772
}
773773

774+
/// Try to hoist \p Previous and its operands to the beginning of the vector
775+
/// header.
776+
static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR,
777+
VPRecipeBase *Previous,
778+
VPDominatorTree &VPDT) {
779+
using namespace llvm::VPlanPatternMatch;
780+
if (Previous->mayHaveSideEffects() || Previous->mayReadFromMemory())
781+
return false;
782+
// Collect recipes that need sinking.
783+
SmallVector<VPRecipeBase *> WorkList;
784+
SmallPtrSet<VPRecipeBase *, 8> Seen;
785+
auto TryToPushHoistCandidate = [&](VPRecipeBase *HoistCandidate) {
786+
// If we reach FOR, it means the original Previous depends on some other
787+
// recurrence that in turn depends on FOR. If that is the case, we would
788+
// also need to hoist recipes involving the other FOR, which may break
789+
// dependencies.
790+
if (HoistCandidate == FOR)
791+
return false;
792+
793+
// Hoist candidate outside any region, no need to hoist.
794+
if (!HoistCandidate->getParent()->getParent())
795+
return true;
796+
797+
// Hoist candidate is a header phi or already visited, no need to hoist.
798+
if (isa<VPHeaderPHIRecipe>(HoistCandidate) ||
799+
!Seen.insert(HoistCandidate).second)
800+
return true;
801+
802+
// Don't move candiates with sideeffects, as we do not yet analyze recipes
803+
// between candidate and hoist destination yet.
804+
if (HoistCandidate->mayHaveSideEffects())
805+
return false;
806+
807+
WorkList.push_back(HoistCandidate);
808+
return true;
809+
};
810+
811+
// Recursively sink users of FOR after Previous.
812+
if (!TryToPushHoistCandidate(Previous))
813+
return false;
814+
for (unsigned I = 0; I != WorkList.size(); ++I) {
815+
VPRecipeBase *Current = WorkList[I];
816+
assert(Current->getNumDefinedValues() == 1 &&
817+
"only recipes with a single defined value expected");
818+
819+
for (VPValue *Op : Current->operands())
820+
if (auto *R = Op->getDefiningRecipe())
821+
if (!TryToPushHoistCandidate(R))
822+
return false;
823+
}
824+
825+
// Keep recipes to hoist ordered by dominance so earlier instructions are
826+
// processed first.
827+
sort(WorkList, [&VPDT](const VPRecipeBase *A, const VPRecipeBase *B) {
828+
return VPDT.properlyDominates(A, B);
829+
});
830+
831+
auto HoistPoint = FOR->getParent()->getFirstNonPhi();
832+
for (VPRecipeBase *HoistCandidate : WorkList) {
833+
if (HoistPoint == HoistCandidate->getIterator())
834+
continue;
835+
HoistCandidate->moveBefore(*FOR->getParent(), HoistPoint);
836+
}
837+
return true;
838+
}
839+
774840
bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
775841
VPBuilder &LoopBuilder) {
776842
VPDominatorTree VPDT;
@@ -795,7 +861,8 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
795861
}
796862

797863
if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT))
798-
return false;
864+
if (!hoistPreviousBeforeFORUsers(FOR, Previous, VPDT))
865+
return false;
799866

800867
// Introduce a recipe to combine the incoming and previous values of a
801868
// fixed-order recurrence.

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,13 @@ struct VPlanTransforms {
3636
GetIntOrFpInductionDescriptor,
3737
ScalarEvolution &SE, const TargetLibraryInfo &TLI);
3838

39-
/// Sink users of fixed-order recurrences after the recipe defining their
40-
/// previous value. Then introduce FirstOrderRecurrenceSplice VPInstructions
41-
/// to combine the value from the recurrence phis and previous values. The
42-
/// current implementation assumes all users can be sunk after the previous
43-
/// value, which is enforced by earlier legality checks.
39+
/// Try to move users of fixed-order recurrences after the recipe defining
40+
/// their previous value, either by sinking them or hoisting the recipe
41+
/// defining their previous value (and its operands). Then introduce
42+
/// FirstOrderRecurrenceSplice VPInstructions to combine the value from the
43+
/// recurrence phis and previous values. The current implementation assumes
44+
/// all users can be sunk after the previous value, which is enforced by
45+
/// earlier legality checks.
4446
/// \returns true if all users of fixed-order recurrences could be re-arranged
4547
/// as needed or false if it is not possible. In the latter case, \p Plan is
4648
/// not valid.

llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,3 +285,68 @@ exit:
285285
store double %.lcssa, ptr %C
286286
ret i64 %.in.lcssa
287287
}
288+
289+
; Test for https://github.com/llvm/llvm-project/issues/106523.
290+
define void @for_iv_trunc_optimized(ptr %dst) {
291+
; CHECK-LABEL: @for_iv_trunc_optimized(
292+
; CHECK-NEXT: bb:
293+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
294+
; CHECK: vector.ph:
295+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
296+
; CHECK: vector.body:
297+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
298+
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 1>, [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ]
299+
; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
300+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 1, i32 2, i32 3, i32 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
301+
; CHECK-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
302+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
303+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> [[STEP_ADD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
304+
; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP0]], zeroinitializer
305+
; CHECK-NEXT: [[TMP3]] = or <4 x i32> [[TMP1]], zeroinitializer
306+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR1]], <4 x i32> [[TMP2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
307+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
308+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP5]], i32 3
309+
; CHECK-NEXT: store i32 [[TMP6]], ptr [[DST:%.*]], align 4
310+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
311+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], <i32 4, i32 4, i32 4, i32 4>
312+
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 336
313+
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
314+
; CHECK: middle.block:
315+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3
316+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
317+
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
318+
; CHECK: scalar.ph:
319+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 337, [[MIDDLE_BLOCK]] ], [ 1, [[BB:%.*]] ]
320+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 1, [[BB]] ]
321+
; CHECK-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT3]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
322+
; CHECK-NEXT: br label [[LOOP:%.*]]
323+
; CHECK: loop:
324+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[ADD:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
325+
; CHECK-NEXT: [[FOR_1:%.*]] = phi i32 [ [[TRUNC:%.*]], [[LOOP]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
326+
; CHECK-NEXT: [[FOR_2:%.*]] = phi i32 [ [[OR:%.*]], [[LOOP]] ], [ [[SCALAR_RECUR_INIT4]], [[SCALAR_PH]] ]
327+
; CHECK-NEXT: [[OR]] = or i32 [[FOR_1]], 0
328+
; CHECK-NEXT: [[ADD]] = add i64 [[IV]], 1
329+
; CHECK-NEXT: store i32 [[FOR_2]], ptr [[DST]], align 4
330+
; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[IV]], 337
331+
; CHECK-NEXT: [[TRUNC]] = trunc i64 [[IV]] to i32
332+
; CHECK-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP9:![0-9]+]]
333+
; CHECK: exit:
334+
; CHECK-NEXT: ret void
335+
;
336+
bb:
337+
br label %loop
338+
339+
loop:
340+
%iv = phi i64 [ %add, %loop ], [ 1, %bb ]
341+
%for.1 = phi i32 [ %trunc, %loop ], [ 1, %bb ]
342+
%for.2 = phi i32 [ %or, %loop ], [ 0, %bb ]
343+
%or = or i32 %for.1, 0
344+
%add = add i64 %iv, 1
345+
store i32 %for.2, ptr %dst, align 4
346+
%icmp = icmp ult i64 %iv, 337
347+
%trunc = trunc i64 %iv to i32
348+
br i1 %icmp, label %loop, label %exit
349+
350+
exit:
351+
ret void
352+
}

llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,53 @@ exit:
154154
; FOR (for.y) should be moved which is not currently supported.
155155
define i32 @test_chained_first_order_recurrences_4(ptr %base) {
156156
; CHECK-LABEL: 'test_chained_first_order_recurrences_4'
157-
; CHECK: No VPlans built.
158-
157+
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
158+
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
159+
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
160+
; CHECK-NEXT: Live-in ir<4098> = original trip-count
161+
; CHECK-EMPTY:
162+
; CHECK-NEXT: vector.ph:
163+
; CHECK-NEXT: Successor(s): vector loop
164+
; CHECK-EMPTY:
165+
; CHECK-NEXT: <x1> vector loop: {
166+
; CHECK-NEXT: vector.body:
167+
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
168+
; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.x> = phi ir<0>, ir<%for.x.next>
169+
; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.y> = phi ir<0>, ir<%for.x.prev>
170+
; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
171+
; CHECK-NEXT: WIDEN ir<%for.x.next> = mul ir<0>, ir<0>
172+
; CHECK-NEXT: EMIT vp<[[SPLICE_X:%.]]> = first-order splice ir<%for.x>, ir<%for.x.next>
173+
; CHECK-NEXT: WIDEN-CAST ir<%for.x.prev> = trunc vp<[[SPLICE_X]]> to i32
174+
; CHECK-NEXT: EMIT vp<[[SPLICE_Y:%.+]]> = first-order splice ir<%for.y>, ir<%for.x.prev>
175+
; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%base>, vp<[[SCALAR_STEPS]]>
176+
; CHECK-NEXT: WIDEN-CAST ir<%for.y.i64> = sext vp<[[SPLICE_Y]]> to i64
177+
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep>
178+
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%for.y.i64>
179+
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
180+
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
181+
; CHECK-NEXT: No successors
182+
; CHECK-NEXT: }
183+
; CHECK-NEXT: Successor(s): middle.block
184+
; CHECK-EMPTY:
185+
; CHECK-NEXT: middle.block:
186+
; CHECK-NEXT: EMIT vp<[[EXT_X:%.+]]> = extract-from-end ir<%for.x.next>, ir<1>
187+
; CHECK-NEXT: EMIT vp<[[EXT_Y:%.+]]> = extract-from-end ir<%for.x.prev>, ir<1>
188+
; CHECK-NEXT: EMIT vp<[[MIDDLE_C:%.+]]> = icmp eq ir<4098>, vp<[[VTC]]>
189+
; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_C]]>
190+
; CHECK-NEXT: Successor(s): ir-bb<ret>, scalar.ph
191+
; CHECK-EMPTY:
192+
; CHECK-NEXT: ir-bb<ret>:
193+
; CHECK-NEXT: No successors
194+
; CHECK-EMPTY:
195+
; CHECK-NEXT: scalar.ph:
196+
; CHECK-NEXT: EMIT vp<[[RESUME_X:%.+]]> = resume-phi vp<[[EXT_X]]>, ir<0>
197+
; CHECK-NEXT: EMIT vp<[[RESUME_Y:%.+]]> = resume-phi vp<[[EXT_Y]]>, ir<0>
198+
; CHECK-NEXT: No successors
199+
; CHECK-EMPTY:
200+
; CHECK-NEXT: Live-out i64 %for.x = vp<[[RESUME_X]]>
201+
; CHECK-NEXT: Live-out i32 %for.y = vp<[[RESUME_Y]]>
202+
; CHECK-NEXT: }
203+
;
159204
entry:
160205
br label %loop
161206

llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -385,19 +385,51 @@ exit:
385385
define void @hoist_previous_value_and_operands(ptr %dst, i64 %mask) {
386386
; CHECK-LABEL: @hoist_previous_value_and_operands(
387387
; CHECK-NEXT: bb:
388+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
389+
; CHECK: vector.ph:
390+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[MASK:%.*]], i64 0
391+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
392+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
393+
; CHECK: vector.body:
394+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
395+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 1, i64 2, i64 3, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
396+
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 1>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
397+
; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
398+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
399+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
400+
; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
401+
; CHECK-NEXT: [[TMP2]] = trunc <4 x i64> [[TMP1]] to <4 x i32>
402+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
403+
; CHECK-NEXT: [[TMP4]] = or <4 x i32> [[TMP3]], zeroinitializer
404+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR1]], <4 x i32> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
405+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP0]]
406+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
407+
; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP7]], align 4
408+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
409+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
410+
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 336
411+
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
412+
; CHECK: middle.block:
413+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
414+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
415+
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
416+
; CHECK: scalar.ph:
417+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 337, [[MIDDLE_BLOCK]] ], [ 1, [[BB:%.*]] ]
418+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 1, [[BB]] ]
419+
; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
388420
; CHECK-NEXT: br label [[LOOP:%.*]]
389421
; CHECK: loop:
390-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[ADD:%.*]], [[LOOP]] ], [ 1, [[BB:%.*]] ]
391-
; CHECK-NEXT: [[FOR_1:%.*]] = phi i32 [ [[TRUNC:%.*]], [[LOOP]] ], [ 1, [[BB]] ]
392-
; CHECK-NEXT: [[FOR_2:%.*]] = phi i32 [ [[OR:%.*]], [[LOOP]] ], [ 0, [[BB]] ]
422+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[ADD:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
423+
; CHECK-NEXT: [[FOR_1:%.*]] = phi i32 [ [[TRUNC:%.*]], [[LOOP]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
424+
; CHECK-NEXT: [[FOR_2:%.*]] = phi i32 [ [[OR:%.*]], [[LOOP]] ], [ [[SCALAR_RECUR_INIT3]], [[SCALAR_PH]] ]
393425
; CHECK-NEXT: [[OR]] = or i32 [[FOR_1]], 0
394426
; CHECK-NEXT: [[ADD]] = add i64 [[IV]], 1
395-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[IV]]
427+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
396428
; CHECK-NEXT: store i32 [[FOR_2]], ptr [[GEP]], align 4
397429
; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[IV]], 337
398-
; CHECK-NEXT: [[A:%.*]] = and i64 [[IV]], [[MASK:%.*]]
430+
; CHECK-NEXT: [[A:%.*]] = and i64 [[IV]], [[MASK]]
399431
; CHECK-NEXT: [[TRUNC]] = trunc i64 [[A]] to i32
400-
; CHECK-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT:%.*]]
432+
; CHECK-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
401433
; CHECK: exit:
402434
; CHECK-NEXT: ret void
403435
;

0 commit comments

Comments
 (0)