Skip to content

Commit 1484f82

Browse files
authored
[VPlan] Add VPInstruction::StepVector and use it in VPWidenIntOrFpInductionRecipe (#129508)
Split off from #118638, this adds VPInstruction::StepVector, which generates integer step vectors (0,1,2,...,VF). This is a step towards eventually modelling all the separate parts of VPWidenIntOrFpInductionRecipe in VPlan. This is then used by VPWidenIntOrFpInductionRecipe, where we materialize it just before unrolling so the operands stay in a fixed position. The need for a separate operand in VPWidenIntOrFpInductionRecipe, as well as the need to update it in optimizeVectorInductionWidthForTCAndVFUF, should be removed with #118638 when everything is expanded in convertToConcreteRecipes.
1 parent d9bdc2d commit 1484f82

21 files changed

+116
-58
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7783,6 +7783,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77837783
"Trying to execute plan with unsupported VF");
77847784
assert(BestVPlan.hasUF(BestUF) &&
77857785
"Trying to execute plan with unsupported UF");
7786+
VPlanTransforms::materializeStepVectors(BestVPlan);
77867787
// TODO: Move to VPlan transform stage once the transition to the VPlan-based
77877788
// cost model is complete for better cost estimates.
77887789
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF,

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -902,6 +902,8 @@ class VPInstruction : public VPRecipeWithIRFlags,
902902
/// Scale the first operand (vector step) by the second operand
903903
/// (scalar-step). Casts both operands to the result type if needed.
904904
WideIVStep,
905+
// Creates a step vector starting from 0 to VF with a step of 1.
906+
StepVector,
905907

906908
};
907909

@@ -1072,7 +1074,15 @@ class VPInstructionWithType : public VPInstruction {
10721074
if (R->isScalarCast())
10731075
return true;
10741076
auto *VPI = dyn_cast<VPInstruction>(R);
1075-
return VPI && VPI->getOpcode() == VPInstruction::WideIVStep;
1077+
if (!VPI)
1078+
return false;
1079+
switch (VPI->getOpcode()) {
1080+
case VPInstruction::WideIVStep:
1081+
case VPInstruction::StepVector:
1082+
return true;
1083+
default:
1084+
return false;
1085+
}
10761086
}
10771087

10781088
static inline bool classof(const VPUser *R) {
@@ -1869,7 +1879,7 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
18691879
TruncInst *Trunc;
18701880

18711881
// If this recipe is unrolled it will have 2 additional operands.
1872-
bool isUnrolled() const { return getNumOperands() == 5; }
1882+
bool isUnrolled() const { return getNumOperands() == 6; }
18731883

18741884
public:
18751885
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
@@ -1918,6 +1928,16 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
19181928
VPValue *getVFValue() { return getOperand(2); }
19191929
const VPValue *getVFValue() const { return getOperand(2); }
19201930

1931+
// TODO: Remove once VPWidenIntOrFpInduction is fully expanded in
1932+
// convertToConcreteRecipes.
1933+
VPInstructionWithType *getStepVector() {
1934+
auto *StepVector =
1935+
cast<VPInstructionWithType>(getOperand(3)->getDefiningRecipe());
1936+
assert(StepVector->getOpcode() == VPInstruction::StepVector &&
1937+
"step vector operand must be a VPInstruction::StepVector");
1938+
return StepVector;
1939+
}
1940+
19211941
VPValue *getSplatVFValue() {
19221942
// If the recipe has been unrolled return the VPValue for the induction
19231943
// increment.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -934,6 +934,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
934934
case VPInstruction::Not:
935935
case VPInstruction::PtrAdd:
936936
case VPInstruction::WideIVStep:
937+
case VPInstruction::StepVector:
937938
return false;
938939
default:
939940
return true;
@@ -1085,8 +1086,6 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
10851086

10861087
void VPInstructionWithType::execute(VPTransformState &State) {
10871088
State.setDebugLocFrom(getDebugLoc());
1088-
assert(vputils::onlyFirstLaneUsed(this) &&
1089-
"Codegen only implemented for first lane.");
10901089
switch (getOpcode()) {
10911090
case Instruction::ZExt:
10921091
case Instruction::Trunc: {
@@ -1096,6 +1095,12 @@ void VPInstructionWithType::execute(VPTransformState &State) {
10961095
State.set(this, Cast, VPLane(0));
10971096
break;
10981097
}
1098+
case VPInstruction::StepVector: {
1099+
Value *StepVector =
1100+
State.Builder.CreateStepVector(VectorType::get(ResultTy, State.VF));
1101+
State.set(this, StepVector);
1102+
break;
1103+
}
10991104
default:
11001105
llvm_unreachable("opcode not implemented yet");
11011106
}
@@ -1113,6 +1118,9 @@ void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent,
11131118
O << "wide-iv-step ";
11141119
printOperands(O, SlotTracker);
11151120
break;
1121+
case VPInstruction::StepVector:
1122+
O << "step-vector " << *ResultTy;
1123+
break;
11161124
default:
11171125
assert(Instruction::isCast(getOpcode()) && "unhandled opcode");
11181126
O << Instruction::getOpcodeName(getOpcode()) << " ";
@@ -1880,7 +1888,8 @@ InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF,
18801888
/// (0 * Step, 1 * Step, 2 * Step, ...)
18811889
/// to each vector element of Val.
18821890
/// \p Opcode is relevant for FP induction variable.
1883-
static Value *getStepVector(Value *Val, Value *Step,
1891+
/// \p InitVec is an integer step vector from 0 with a step of 1.
1892+
static Value *getStepVector(Value *Val, Value *Step, Value *InitVec,
18841893
Instruction::BinaryOps BinOp, ElementCount VF,
18851894
IRBuilderBase &Builder) {
18861895
assert(VF.isVector() && "only vector VFs are supported");
@@ -1894,15 +1903,6 @@ static Value *getStepVector(Value *Val, Value *Step,
18941903
"Induction Step must be an integer or FP");
18951904
assert(Step->getType() == STy && "Step has wrong type");
18961905

1897-
// Create a vector of consecutive numbers from zero to VF.
1898-
VectorType *InitVecValVTy = ValVTy;
1899-
if (STy->isFloatingPointTy()) {
1900-
Type *InitVecValSTy =
1901-
IntegerType::get(STy->getContext(), STy->getScalarSizeInBits());
1902-
InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
1903-
}
1904-
Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
1905-
19061906
if (STy->isIntegerTy()) {
19071907
Step = Builder.CreateVectorSplat(VLen, Step);
19081908
assert(Step->getType() == Val->getType() && "Invalid step vec");
@@ -1969,8 +1969,9 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
19691969
}
19701970

19711971
Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
1972-
Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(),
1973-
State.VF, State.Builder);
1972+
Value *SteppedStart =
1973+
::getStepVector(SplatStart, Step, State.get(getStepVector()),
1974+
ID.getInductionOpcode(), State.VF, State.Builder);
19741975

19751976
// We create vector phi nodes for both integer and floating-point induction
19761977
// variables. Here, we determine the kind of arithmetic we will perform.

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1223,6 +1223,16 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
12231223
WideIV->setStartValue(NewStart);
12241224
auto *NewStep = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, 1));
12251225
WideIV->setStepValue(NewStep);
1226+
// TODO: Remove once VPWidenIntOrFpInductionRecipe is fully expanded.
1227+
VPInstructionWithType *OldStepVector = WideIV->getStepVector();
1228+
assert(OldStepVector->getNumUsers() == 1 &&
1229+
"step vector should only be used by single "
1230+
"VPWidenIntOrFpInductionRecipe");
1231+
auto *NewStepVector = new VPInstructionWithType(
1232+
VPInstruction::StepVector, {}, NewIVTy, OldStepVector->getDebugLoc());
1233+
NewStepVector->insertAfter(OldStepVector->getDefiningRecipe());
1234+
OldStepVector->replaceAllUsesWith(NewStepVector);
1235+
OldStepVector->eraseFromParent();
12261236

12271237
auto *NewBTC = new VPWidenCastRecipe(
12281238
Instruction::Trunc, Plan.getOrCreateBackedgeTakenCount(), NewIVTy);
@@ -2585,6 +2595,27 @@ void VPlanTransforms::handleUncountableEarlyExit(
25852595
LatchExitingBranch->eraseFromParent();
25862596
}
25872597

2598+
void VPlanTransforms::materializeStepVectors(VPlan &Plan) {
2599+
for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
2600+
auto *IVR = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
2601+
if (!IVR)
2602+
continue;
2603+
2604+
Type *Ty = IVR->getPHINode()->getType();
2605+
if (TruncInst *Trunc = IVR->getTruncInst())
2606+
Ty = Trunc->getType();
2607+
if (Ty->isFloatingPointTy())
2608+
Ty = IntegerType::get(Ty->getContext(), Ty->getScalarSizeInBits());
2609+
2610+
VPBuilder Builder(Plan.getVectorPreheader());
2611+
VPInstruction *StepVector = Builder.createNaryOp(
2612+
VPInstruction::StepVector, {}, Ty, {}, IVR->getDebugLoc());
2613+
assert(IVR->getNumOperands() == 3 &&
2614+
"can only add step vector before unrolling");
2615+
IVR->addOperand(StepVector);
2616+
}
2617+
}
2618+
25882619
void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
25892620
if (Plan.hasScalarVFOnly())
25902621
return;

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,11 @@ struct VPlanTransforms {
199199
optimizeInductionExitUsers(VPlan &Plan,
200200
DenseMap<VPValue *, VPValue *> &EndValues);
201201

202+
/// Materialize VPInstruction::StepVectors for VPWidenIntOrFpInductionRecipes.
203+
/// TODO: Remove once all of VPWidenIntOrFpInductionRecipe is expanded in
204+
/// convertToConcreteRecipes.
205+
static void materializeStepVectors(VPlan &Plan);
206+
202207
/// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
203208
static void materializeBroadcasts(VPlan &Plan);
204209

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
1616
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
1717
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
1818
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8)
19+
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
1920
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
2021
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
21-
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
2222
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
2323
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
2424
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
@@ -100,9 +100,9 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
100100
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
101101
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
102102
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
103+
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
103104
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
104105
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
105-
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
106106
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
107107
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
108108
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]

llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,9 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
123123
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[N]], [[TMP11]]
124124
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
125125
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
126+
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
126127
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
127128
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
128-
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
129129
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
130130
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
131131
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
@@ -246,9 +246,9 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
246246
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP0]], [[TMP11]]
247247
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
248248
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[TMP0]])
249+
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
249250
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0
250251
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
251-
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
252252
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
253253
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
254254
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]

llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -517,13 +517,13 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
517517
; DEFAULT-NEXT: [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]]
518518
; DEFAULT-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0
519519
; DEFAULT-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15)
520+
; DEFAULT-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
520521
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i64 0
521522
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
522523
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B]], i64 0
523524
; DEFAULT-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
524525
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[C]], i64 0
525526
; DEFAULT-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT3]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
526-
; DEFAULT-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
527527
; DEFAULT-NEXT: [[TMP11:%.*]] = mul <vscale x 16 x i8> [[TMP10]], splat (i8 1)
528528
; DEFAULT-NEXT: [[INDUCTION:%.*]] = add <vscale x 16 x i8> zeroinitializer, [[TMP11]]
529529
; DEFAULT-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP4]] to i8
@@ -593,13 +593,13 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
593593
; OPTSIZE-NEXT: [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]]
594594
; OPTSIZE-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0
595595
; OPTSIZE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15)
596+
; OPTSIZE-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
596597
; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i64 0
597598
; OPTSIZE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
598599
; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B]], i64 0
599600
; OPTSIZE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
600601
; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[C]], i64 0
601602
; OPTSIZE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT3]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
602-
; OPTSIZE-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
603603
; OPTSIZE-NEXT: [[TMP11:%.*]] = mul <vscale x 16 x i8> [[TMP10]], splat (i8 1)
604604
; OPTSIZE-NEXT: [[INDUCTION:%.*]] = add <vscale x 16 x i8> zeroinitializer, [[TMP11]]
605605
; OPTSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP4]] to i8
@@ -669,13 +669,13 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
669669
; MINSIZE-NEXT: [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]]
670670
; MINSIZE-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0
671671
; MINSIZE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15)
672+
; MINSIZE-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
672673
; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i64 0
673674
; MINSIZE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
674675
; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B]], i64 0
675676
; MINSIZE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
676677
; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[C]], i64 0
677678
; MINSIZE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT3]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
678-
; MINSIZE-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
679679
; MINSIZE-NEXT: [[TMP11:%.*]] = mul <vscale x 16 x i8> [[TMP10]], splat (i8 1)
680680
; MINSIZE-NEXT: [[INDUCTION:%.*]] = add <vscale x 16 x i8> zeroinitializer, [[TMP11]]
681681
; MINSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP4]] to i8

llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0
2626
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
2727
; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 2
2828
; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[IDX]], [[N_VEC]]
29+
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
2930
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[IDX]], i64 0
3031
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
31-
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
3232
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i32> [[TMP8]], splat (i32 1)
3333
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> [[DOTSPLAT]], [[TMP10]]
3434
; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP7]]

0 commit comments

Comments
 (0)