Skip to content

Commit c88bf3e

Browse files
committed
[LoopVectorize] Add support for reverse loops in isDereferenceableAndAlignedInLoop
Currently when we encounter a negative step in the induction variable isDereferenceableAndAlignedInLoop bails out because the element size is signed greater than the step. This patch adds support for negative steps in cases where we detect the start address for the load is of the form base + offset. In this case the address decrements in each iteration so we need to calculate the access size differently. The motivation for this patch comes from PR #88385 where a reviewer requested reusing isDereferenceableAndAlignedInLoop, but that PR itself does support reverse loops.
1 parent 95c97eb commit c88bf3e

File tree

2 files changed

+101
-71
lines changed

2 files changed

+101
-71
lines changed

llvm/lib/Analysis/Loads.cpp

Lines changed: 50 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -293,47 +293,78 @@ bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
293293

294294
// TODO: Handle overlapping accesses.
295295
// We should be computing AccessSize as (TC - 1) * Step + EltSize.
296-
if (EltSize.sgt(Step->getAPInt()))
296+
bool StepIsNegative = Step->getAPInt().isNegative();
297+
APInt AbsStep = Step->getAPInt().abs();
298+
if (EltSize.ugt(AbsStep))
299+
return false;
300+
301+
// For the moment, restrict ourselves to the case where the access size is a
302+
// multiple of the requested alignment and the base is aligned.
303+
// TODO: generalize if a case found which warrants
304+
if (EltSize.urem(Alignment.value()) != 0)
297305
return false;
298306

299307
// Compute the total access size for access patterns with unit stride and
300308
// patterns with gaps. For patterns with unit stride, Step and EltSize are the
301309
// same.
302310
// For patterns with gaps (i.e. non unit stride), we are
303311
// accessing EltSize bytes at every Step.
304-
APInt AccessSize = TC * Step->getAPInt();
312+
APInt AccessSize = TC * AbsStep;
305313

306314
assert(SE.isLoopInvariant(AddRec->getStart(), L) &&
307315
"implied by addrec definition");
308316
Value *Base = nullptr;
309317
if (auto *StartS = dyn_cast<SCEVUnknown>(AddRec->getStart())) {
318+
if (StepIsNegative)
319+
return false;
310320
Base = StartS->getValue();
311321
} else if (auto *StartS = dyn_cast<SCEVAddExpr>(AddRec->getStart())) {
312-
// Handle (NewBase + offset) as start value.
313-
const auto *Offset = dyn_cast<SCEVConstant>(StartS->getOperand(0));
314-
const auto *NewBase = dyn_cast<SCEVUnknown>(StartS->getOperand(1));
315-
if (StartS->getNumOperands() == 2 && Offset && NewBase) {
322+
const SCEV *End = AddRec->evaluateAtIteration(
323+
SE.getConstant(StartS->getType(), TC - 1), SE);
324+
325+
// The step recurrence could be negative so it's necessary to find the min
326+
// and max accessed addresses in the loop.
327+
const SCEV *Min = SE.getUMinExpr(StartS, End);
328+
const SCEV *Max = SE.getUMaxExpr(StartS, End);
329+
if (isa<SCEVCouldNotCompute>(Min) || isa<SCEVCouldNotCompute>(Max))
330+
return false;
331+
332+
// Now calculate the total access size, which is (max - min) + element_size.
333+
const SCEV *Diff = SE.getMinusSCEV(Max, Min);
334+
if (isa<SCEVCouldNotCompute>(Diff))
335+
return false;
336+
337+
const SCEV *AS = SE.getAddExpr(
338+
Diff, SE.getConstant(Diff->getType(), EltSize.getZExtValue()));
339+
auto *ASC = dyn_cast<SCEVConstant>(AS);
340+
if (!ASC)
341+
return false;
342+
343+
if (const SCEVUnknown *NewBase = dyn_cast<SCEVUnknown>(Min)) {
344+
Base = NewBase->getValue();
345+
AccessSize = ASC->getAPInt();
346+
} else if (auto *MinAddRec = dyn_cast<SCEVAddExpr>(Min)) {
347+
if (MinAddRec->getNumOperands() != 2)
348+
return false;
349+
350+
const auto *Offset = dyn_cast<SCEVConstant>(MinAddRec->getOperand(0));
351+
const auto *NewBase = dyn_cast<SCEVUnknown>(MinAddRec->getOperand(1));
352+
if (!Offset || !NewBase)
353+
return false;
354+
316355
// For the moment, restrict ourselves to the case where the offset is a
317356
// multiple of the requested alignment and the base is aligned.
318357
// TODO: generalize if a case found which warrants
319358
if (Offset->getAPInt().urem(Alignment.value()) != 0)
320359
return false;
321-
Base = NewBase->getValue();
322-
bool Overflow = false;
323-
AccessSize = AccessSize.uadd_ov(Offset->getAPInt(), Overflow);
324-
if (Overflow)
325-
return false;
326-
}
327-
}
328360

329-
if (!Base)
361+
AccessSize = ASC->getAPInt() + Offset->getAPInt();
362+
Base = NewBase->getValue();
363+
} else
364+
return false;
365+
} else
330366
return false;
331367

332-
// For the moment, restrict ourselves to the case where the access size is a
333-
// multiple of the requested alignment and the base is aligned.
334-
// TODO: generalize if a case found which warrants
335-
if (EltSize.urem(Alignment.value()) != 0)
336-
return false;
337368
return isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL,
338369
HeaderFirstNonPHI, AC, &DT);
339370
}

llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll

Lines changed: 51 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) {
213213
; CHECK: vector.ph:
214214
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
215215
; CHECK: vector.body:
216-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
216+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
217217
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
218218
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
219219
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[TMP0]]
@@ -223,32 +223,33 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) {
223223
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
224224
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i32> [[REVERSE]], <i32 3, i32 3>
225225
; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true>
226-
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
227-
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
226+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP0]]
227+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0
228+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 -1
229+
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4
230+
; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD1]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
231+
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
232+
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
228233
; CHECK: pred.store.if:
229-
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP0]]
230-
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
231-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP0]]
232-
; CHECK-NEXT: [[TMP10:%.*]] = shl nsw i32 [[TMP8]], 2
233-
; CHECK-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4
234+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP0]]
235+
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[REVERSE2]], i32 0
236+
; CHECK-NEXT: [[TMP12:%.*]] = shl nsw i32 [[TMP11]], 2
237+
; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP10]], align 4
234238
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
235239
; CHECK: pred.store.continue:
236-
; CHECK-NEXT: [[TMP11:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_STORE_IF]] ]
237-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
238-
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
239-
; CHECK: pred.store.if1:
240-
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], -1
241-
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP13]]
242-
; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
243-
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP13]]
244-
; CHECK-NEXT: [[TMP17:%.*]] = shl nsw i32 [[TMP15]], 2
245-
; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4
246-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
247-
; CHECK: pred.store.continue2:
248-
; CHECK-NEXT: [[TMP18:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP15]], [[PRED_STORE_IF1]] ]
240+
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
241+
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]]
242+
; CHECK: pred.store.if3:
243+
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -1
244+
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP14]]
245+
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[REVERSE2]], i32 1
246+
; CHECK-NEXT: [[TMP17:%.*]] = shl nsw i32 [[TMP16]], 2
247+
; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4
248+
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
249+
; CHECK: pred.store.continue4:
249250
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
250-
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
251-
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
251+
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
252+
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
252253
; CHECK: middle.block:
253254
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
254255
; CHECK: scalar.ph:
@@ -257,13 +258,13 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) {
257258
; CHECK: for.body:
258259
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
259260
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[INDVARS_IV]]
260-
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
261-
; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[TMP20]], 3
261+
; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
262+
; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[TMP19]], 3
262263
; CHECK-NEXT: br i1 [[CMP3_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]]
263264
; CHECK: if.then:
264265
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[INDVARS_IV]]
265-
; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
266-
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP21]], 2
266+
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
267+
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP20]], 2
267268
; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[INDVARS_IV]]
268269
; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX7]], align 4
269270
; CHECK-NEXT: br label [[FOR_INC]]
@@ -541,35 +542,33 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly
541542
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i32> [[REVERSE]], <i32 3, i32 3>
542543
; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true>
543544
; CHECK-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[VEC_IND]], <i64 2, i64 2>
544-
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
545-
; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
545+
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0
546+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP7]]
547+
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1
548+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP9]]
549+
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP8]], align 4
550+
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
551+
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
552+
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
546553
; CHECK: pred.store.if:
547-
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0
548-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP8]]
549-
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
550-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP0]]
551-
; CHECK-NEXT: [[TMP12:%.*]] = shl nsw i32 [[TMP10]], 2
552-
; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4
554+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP0]]
555+
; CHECK-NEXT: [[TMP15:%.*]] = shl nsw i32 [[TMP11]], 2
556+
; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4
553557
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
554558
; CHECK: pred.store.continue:
555-
; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP10]], [[PRED_STORE_IF]] ]
556-
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
557-
; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
559+
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
560+
; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
558561
; CHECK: pred.store.if1:
559-
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], -1
560-
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1
561-
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP16]]
562-
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
563-
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP15]]
564-
; CHECK-NEXT: [[TMP20:%.*]] = shl nsw i32 [[TMP18]], 2
565-
; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4
562+
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -1
563+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP17]]
564+
; CHECK-NEXT: [[TMP19:%.*]] = shl nsw i32 [[TMP12]], 2
565+
; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4
566566
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
567567
; CHECK: pred.store.continue2:
568-
; CHECK-NEXT: [[TMP21:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP18]], [[PRED_STORE_IF1]] ]
569568
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
570569
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 -2, i64 -2>
571-
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
572-
; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
570+
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
571+
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
573572
; CHECK: middle.block:
574573
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
575574
; CHECK: scalar.ph:
@@ -578,14 +577,14 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly
578577
; CHECK: for.body:
579578
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
580579
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[INDVARS_IV]]
581-
; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
582-
; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[TMP23]], 3
580+
; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
581+
; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[TMP21]], 3
583582
; CHECK-NEXT: br i1 [[CMP3_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]]
584583
; CHECK: if.then:
585584
; CHECK-NEXT: [[INDVARS_IV_STRIDED:%.*]] = mul i64 [[INDVARS_IV]], 2
586585
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[INDVARS_IV_STRIDED]]
587-
; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
588-
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP24]], 2
586+
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
587+
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP22]], 2
589588
; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[INDVARS_IV]]
590589
; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX7]], align 4
591590
; CHECK-NEXT: br label [[FOR_INC]]

0 commit comments

Comments
 (0)