Skip to content

Commit 3e3a167

Browse files
committed
[VPlan] Don't cost FOR splice if unused in legacy cost model
Fixes llvm#131359 After llvm#129645, a first-order recurrence will no longer have it's splice costed if the VPInstruction::FirstOrderRecurrenceSplice has no users and is dead. The legacy cost model didn't account for this, so update this to avoid the "VPlan cost model and legacy cost model disagreed" assertion. Alternatively we could also account for this in planContainsAdditionalSimplifications
1 parent fc8b2bf commit 3e3a167

File tree

2 files changed

+66
-0
lines changed

2 files changed

+66
-0
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6541,6 +6541,11 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
65416541
// TODO: Consider vscale_range info.
65426542
if (VF.isScalable() && VF.getKnownMinValue() == 1)
65436543
return InstructionCost::getInvalid();
6544+
// If a FOR has no users inside the loop we won't generate a splice.
6545+
if (none_of(Phi->users(), [this](User *U) {
6546+
return TheLoop->contains(cast<Instruction>(U));
6547+
}))
6548+
return 0;
65446549
SmallVector<int> Mask(VF.getKnownMinValue());
65456550
std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
65466551
return TTI.getShuffleCost(TargetTransformInfo::SK_Splice,
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -p loop-vectorize -S %s | FileCheck %s
3+
4+
; Make sure the legacy cost model doesn't add a cost for a splice when the
5+
; first-order recurrence isn't used inside the loop. The VPlan cost model
6+
; eliminates the dead VPInstruction::FirstOrderRecurrenceSplice so the two cost
7+
; models would go out of sync otherwise.
8+
9+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
10+
target triple = "x86_64"
11+
12+
define void @h() {
13+
; CHECK-LABEL: define void @h() {
14+
; CHECK-NEXT: [[ENTRY:.*]]:
15+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
16+
; CHECK: [[VECTOR_PH]]:
17+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
18+
; CHECK: [[VECTOR_BODY]]:
19+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
20+
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, %[[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], %[[VECTOR_BODY]] ]
21+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
22+
; CHECK-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
23+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
24+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
25+
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
26+
; CHECK-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
27+
; CHECK: [[MIDDLE_BLOCK]]:
28+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3
29+
; CHECK-NEXT: br i1 false, label %[[F_EXIT:.*]], label %[[SCALAR_PH]]
30+
; CHECK: [[SCALAR_PH]]:
31+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
32+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 40, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
33+
; CHECK-NEXT: br label %[[FOR_COND_I:.*]]
34+
; CHECK: [[FOR_COND_I]]:
35+
; CHECK-NEXT: [[D_0_I:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[E_0_I:%.*]], %[[FOR_COND_I]] ]
36+
; CHECK-NEXT: [[E_0_I]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC_I:%.*]], %[[FOR_COND_I]] ]
37+
; CHECK-NEXT: [[INC_I]] = add i32 [[E_0_I]], 1
38+
; CHECK-NEXT: [[EXITCOND_NOT_I:%.*]] = icmp eq i32 [[E_0_I]], 43
39+
; CHECK-NEXT: br i1 [[EXITCOND_NOT_I]], label %[[F_EXIT]], label %[[FOR_COND_I]], !llvm.loop [[LOOP3:![0-9]+]]
40+
; CHECK: [[F_EXIT]]:
41+
; CHECK-NEXT: ret void
42+
;
43+
entry:
44+
br label %for.cond.i
45+
46+
for.cond.i:
47+
%d.0.i = phi i32 [ 0, %entry ], [ %e.0.i, %for.cond.i ]
48+
%e.0.i = phi i32 [ 0, %entry ], [ %inc.i, %for.cond.i ]
49+
%inc.i = add i32 %e.0.i, 1
50+
%exitcond.not.i = icmp eq i32 %e.0.i, 43
51+
br i1 %exitcond.not.i, label %f.exit, label %for.cond.i
52+
53+
f.exit:
54+
ret void
55+
}
56+
;.
57+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
58+
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
59+
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
60+
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
61+
;.

0 commit comments

Comments
 (0)