Skip to content

Commit e258bca

Browse files
authored
[VPlan] Only skip expansion for SCEVUnknown if it isn't an instruction. (llvm#125235)
Update getOrCreateVPValueForSCEVExpr to only skip expansion of SCEVUnknown if the underlying value isn't an instruction. Instructions may be defined in a loop and using them without expansion may break LCSSA form. SCEVExpander will take care of preserving LCSSA if needed. We could also try to pass LoopInfo, but there are some users of the function where it won't be available and main benefit from skipping expansion is slightly more concise VPlans. Note that SCEVExpander is now used to expand SCEVUnknown with floats. Adjust the check in expandCodeFor to only check the types and casts if the type of the value is different to the requested type. Otherwise we crash when trying to expand a float and requesting a float type. Fixes llvm#121518. PR: llvm#125235
1 parent 2a5050a commit e258bca

File tree

5 files changed

+127
-8
lines changed

5 files changed

+127
-8
lines changed

llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1451,7 +1451,7 @@ Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {
14511451
// Expand the code for this SCEV.
14521452
Value *V = expand(SH);
14531453

1454-
if (Ty) {
1454+
if (Ty && Ty != V->getType()) {
14551455
assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
14561456
"non-trivial casts should be done with the SCEVs directly!");
14571457
V = InsertNoopCastOfTo(V, Ty);

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3404,7 +3404,7 @@ void VPExpandSCEVRecipe::execute(VPTransformState &State) {
34043404
}
34053405

34063406
const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
3407-
SCEVExpander Exp(SE, DL, "induction");
3407+
SCEVExpander Exp(SE, DL, "induction", /*PreserveLCSSA=*/true);
34083408

34093409
Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
34103410
&*State.Builder.GetInsertPoint());

llvm/lib/Transforms/Vectorize/VPlanUtils.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,18 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
3030
VPValue *Expanded = nullptr;
3131
if (auto *E = dyn_cast<SCEVConstant>(Expr))
3232
Expanded = Plan.getOrAddLiveIn(E->getValue());
33-
else if (auto *E = dyn_cast<SCEVUnknown>(Expr))
34-
Expanded = Plan.getOrAddLiveIn(E->getValue());
3533
else {
36-
Expanded = new VPExpandSCEVRecipe(Expr, SE);
37-
Plan.getEntry()->appendRecipe(Expanded->getDefiningRecipe());
34+
auto *U = dyn_cast<SCEVUnknown>(Expr);
35+
// Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction
36+
// value. Otherwise the value may be defined in a loop and using it directly
37+
// will break LCSSA form. The SCEV expansion takes care of preserving LCSSA
38+
// form.
39+
if (U && !isa<Instruction>(U->getValue())) {
40+
Expanded = Plan.getOrAddLiveIn(U->getValue());
41+
} else {
42+
Expanded = new VPExpandSCEVRecipe(Expr, SE);
43+
Plan.getEntry()->appendRecipe(Expanded->getDefiningRecipe());
44+
}
3845
}
3946
Plan.addSCEVExpansion(Expr, Expanded);
4047
return Expanded;
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -p loop-vectorize -vectorizer-min-trip-count=8 -mcpu=skylake-avx512 -S %s | FileCheck %s
3+
4+
target triple = "x86_64-unknown-linux-gnu"
5+
6+
; Test case for https://github.com/llvm/llvm-project/issues/121518. Make sure
7+
; that we preserve LCSSA form when using %iv.1 from loop.1 in the trip count
8+
; expression when vectorizing loop.2
9+
define void @value_defined_in_loop1_used_for_trip_counts(i32 %start, i1 %c, ptr %dst) {
10+
; CHECK-LABEL: define void @value_defined_in_loop1_used_for_trip_counts(
11+
; CHECK-SAME: i32 [[START:%.*]], i1 [[C:%.*]], ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
12+
; CHECK-NEXT: [[ENTRY:.*]]:
13+
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[C]], i32 0, i32 7
14+
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[SELECT]] to i64
15+
; CHECK-NEXT: br label %[[LOOP_1:.*]]
16+
; CHECK: [[LOOP_1]]:
17+
; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[ZEXT]], %[[LOOP_1]] ]
18+
; CHECK-NEXT: br i1 false, label %[[LOOP_1_EXIT:.*]], label %[[LOOP_1]]
19+
; CHECK: [[LOOP_1_EXIT]]:
20+
; CHECK-NEXT: [[IV_1_LCSSA2:%.*]] = phi i64 [ [[IV_1]], %[[LOOP_1]] ]
21+
; CHECK-NEXT: [[IV_1_LCSSA:%.*]] = phi i64 [ [[IV_1]], %[[LOOP_1]] ]
22+
; CHECK-NEXT: br i1 [[C]], label %[[LOOP_2_PREHEADER:.*]], label %[[LOOP_3_PREHEADER:.*]]
23+
; CHECK: [[LOOP_3_PREHEADER]]:
24+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
25+
; CHECK: [[VECTOR_PH]]:
26+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[IV_1_LCSSA2]], 15
27+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16
28+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
29+
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[IV_1_LCSSA2]], 1
30+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
31+
; CHECK: [[VECTOR_BODY]]:
32+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
33+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> poison, <16 x i32> zeroinitializer
34+
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <16 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, [[BROADCAST_SPLAT]]
35+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[DST]], i64 0
36+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
37+
; CHECK-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> zeroinitializer, ptr [[TMP2]], i32 1, <16 x i1> [[TMP0]])
38+
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
39+
; CHECK: [[MIDDLE_BLOCK]]:
40+
; CHECK-NEXT: br i1 true, label %[[EXIT_1_LOOPEXIT1:.*]], label %[[SCALAR_PH]]
41+
; CHECK: [[SCALAR_PH]]:
42+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_3_PREHEADER]] ]
43+
; CHECK-NEXT: br label %[[LOOP_3:.*]]
44+
; CHECK: [[LOOP_2_PREHEADER]]:
45+
; CHECK-NEXT: br label %[[LOOP_2:.*]]
46+
; CHECK: [[LOOP_2]]:
47+
; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ [[IV_2_NEXT:%.*]], %[[LOOP_2]] ], [ 0, %[[LOOP_2_PREHEADER]] ]
48+
; CHECK-NEXT: [[IV_3:%.*]] = phi i32 [ [[IV_3_NEXT:%.*]], %[[LOOP_2]] ], [ [[START]], %[[LOOP_2_PREHEADER]] ]
49+
; CHECK-NEXT: [[IV_3_NEXT]] = add i32 [[IV_3]], 1
50+
; CHECK-NEXT: [[IV_2_NEXT]] = add i64 [[IV_2]], 1
51+
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[IV_3]], 1
52+
; CHECK-NEXT: [[ZEXT8:%.*]] = zext i32 [[SHL]] to i64
53+
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[ZEXT8]]
54+
; CHECK-NEXT: store i16 0, ptr [[GEP_DST]], align 2
55+
; CHECK-NEXT: [[EC_2:%.*]] = icmp ult i64 [[IV_2]], [[IV_1_LCSSA]]
56+
; CHECK-NEXT: br i1 [[EC_2]], label %[[LOOP_2]], label %[[EXIT_1_LOOPEXIT:.*]]
57+
; CHECK: [[LOOP_3]]:
58+
; CHECK-NEXT: [[IV_4:%.*]] = phi i64 [ [[IV_4_NEXT:%.*]], %[[LOOP_3]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
59+
; CHECK-NEXT: [[GEP_DST_2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV_4]]
60+
; CHECK-NEXT: store i8 0, ptr [[GEP_DST_2]], align 1
61+
; CHECK-NEXT: [[IV_4_NEXT]] = add i64 [[IV_4]], 1
62+
; CHECK-NEXT: [[EC_3:%.*]] = icmp ult i64 [[IV_4_NEXT]], [[IV_1_LCSSA]]
63+
; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP_3]], label %[[EXIT_1_LOOPEXIT1]], !llvm.loop [[LOOP0:![0-9]+]]
64+
; CHECK: [[EXIT_1_LOOPEXIT]]:
65+
; CHECK-NEXT: br label %[[EXIT_1:.*]]
66+
; CHECK: [[EXIT_1_LOOPEXIT1]]:
67+
; CHECK-NEXT: br label %[[EXIT_1]]
68+
; CHECK: [[EXIT_1]]:
69+
; CHECK-NEXT: ret void
70+
;
71+
entry:
72+
%select = select i1 %c, i32 0, i32 7
73+
%zext = zext i32 %select to i64
74+
br label %loop.1
75+
76+
loop.1:
77+
%iv.1 = phi i64 [ 0, %entry ], [ %zext, %loop.1 ]
78+
br i1 false, label %loop.1.exit, label %loop.1
79+
80+
loop.1.exit:
81+
br i1 %c, label %loop.2, label %loop.3
82+
83+
loop.2:
84+
%iv.2 = phi i64 [ 0, %loop.1.exit ], [ %iv.2.next, %loop.2 ]
85+
%iv.3 = phi i32 [ %start, %loop.1.exit ], [ %iv.3.next, %loop.2 ]
86+
%iv.3.next = add i32 %iv.3, 1
87+
%iv.2.next = add i64 %iv.2, 1
88+
%shl = shl i32 %iv.3, 1
89+
%zext8 = zext i32 %shl to i64
90+
%gep.dst = getelementptr i8, ptr %dst, i64 %zext8
91+
store i16 0, ptr %gep.dst, align 2
92+
%ec.2 = icmp ult i64 %iv.2, %iv.1
93+
br i1 %ec.2, label %loop.2, label %exit.1
94+
95+
loop.3:
96+
%iv.4 = phi i64 [ 0, %loop.1.exit ], [ %iv.4.next, %loop.3 ]
97+
%gep.dst.2 = getelementptr i8, ptr %dst, i64 %iv.4
98+
store i8 0, ptr %gep.dst.2, align 1
99+
%iv.4.next = add i64 %iv.4, 1
100+
%ec.3 = icmp ult i64 %iv.4.next, %iv.1
101+
br i1 %ec.3, label %loop.3, label %exit.1
102+
103+
exit.1:
104+
ret void
105+
}
106+
;.
107+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
108+
; CHECK: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"}
109+
; CHECK: [[META2]] = !{!"llvm.loop.isvectorized", i32 1}
110+
;.

llvm/test/Transforms/LoopVectorize/float-induction.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -842,14 +842,16 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
842842
; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP0]], [[DOTCAST2]]
843843
; VEC4_INTERL2-NEXT: [[IND_END3:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]]
844844
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
845-
; VEC4_INTERL2-NEXT: [[BROADCAST:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
846-
; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = fmul fast <4 x float> [[BROADCAST]], splat (float 4.000000e+00)
845+
; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fmul fast <4 x float> [[DOTSPLATINSERT2]], <float 4.000000e+00, float poison, float poison, float poison>
846+
; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[TMP19]], <4 x float> poison, <4 x i32> zeroinitializer
847847
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
848848
; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
849849
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
850850
; VEC4_INTERL2-NEXT: [[DOTSPLAT7:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT6]], <4 x float> poison, <4 x i32> zeroinitializer
851851
; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = fmul fast <4 x float> [[DOTSPLAT7]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
852852
; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], [[TMP4]]
853+
; VEC4_INTERL2-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
854+
; VEC4_INTERL2-NEXT: [[BROADCAST:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT7]], <4 x float> poison, <4 x i32> zeroinitializer
853855
; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]]
854856
; VEC4_INTERL2: vector.body:
855857
; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]

0 commit comments

Comments
 (0)