Skip to content

Commit 47e1c87

Browse files
authored
[VPlan] Set debug location for VPReduction/VPWidenIntrinsicRecipe. (#120054)
This patch add missing debug location for VPReduction/VPWidenIntrinsicRecipe.
1 parent 9d3f9f4 commit 47e1c87

File tree

6 files changed

+112
-10
lines changed

6 files changed

+112
-10
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9739,9 +9739,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
97399739
if (CM.blockNeedsPredicationForAnyReason(BB))
97409740
CondOp = RecipeBuilder.getBlockInMask(BB);
97419741

9742-
VPReductionRecipe *RedRecipe =
9743-
new VPReductionRecipe(RdxDesc, CurrentLinkI, PreviousLink, VecOp,
9744-
CondOp, CM.useOrderedReductions(RdxDesc));
9742+
auto *RedRecipe = new VPReductionRecipe(
9743+
RdxDesc, CurrentLinkI, PreviousLink, VecOp, CondOp,
9744+
CM.useOrderedReductions(RdxDesc), CurrentLinkI->getDebugLoc());
97459745
// Append the recipe to the end of the VPBasicBlock because we need to
97469746
// ensure that it comes after all of it's inputs, including CondOp.
97479747
// Note that this transformation may leave over dead recipes (including

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1654,7 +1654,7 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
16541654
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID,
16551655
ArrayRef<VPValue *> CallArguments, Type *Ty,
16561656
DebugLoc DL = {})
1657-
: VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments),
1657+
: VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, DL),
16581658
VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) {
16591659
LLVMContext &Ctx = Ty->getContext();
16601660
AttributeList Attrs = Intrinsic::getAttributes(Ctx, VectorIntrinsicID);
@@ -2648,8 +2648,9 @@ class VPReductionRecipe : public VPSingleDefRecipe {
26482648
protected:
26492649
VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
26502650
Instruction *I, ArrayRef<VPValue *> Operands,
2651-
VPValue *CondOp, bool IsOrdered)
2652-
: VPSingleDefRecipe(SC, Operands, I), RdxDesc(R), IsOrdered(IsOrdered) {
2651+
VPValue *CondOp, bool IsOrdered, DebugLoc DL)
2652+
: VPSingleDefRecipe(SC, Operands, I, DL), RdxDesc(R),
2653+
IsOrdered(IsOrdered) {
26532654
if (CondOp) {
26542655
IsConditional = true;
26552656
addOperand(CondOp);
@@ -2659,16 +2660,17 @@ class VPReductionRecipe : public VPSingleDefRecipe {
26592660
public:
26602661
VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I,
26612662
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2662-
bool IsOrdered)
2663+
bool IsOrdered, DebugLoc DL = {})
26632664
: VPReductionRecipe(VPDef::VPReductionSC, R, I,
26642665
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2665-
IsOrdered) {}
2666+
IsOrdered, DL) {}
26662667

26672668
~VPReductionRecipe() override = default;
26682669

26692670
VPReductionRecipe *clone() override {
26702671
return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
2671-
getVecOp(), getCondOp(), IsOrdered);
2672+
getVecOp(), getCondOp(), IsOrdered,
2673+
getDebugLoc());
26722674
}
26732675

26742676
static inline bool classof(const VPRecipeBase *R) {
@@ -2723,7 +2725,7 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
27232725
VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(),
27242726
cast_or_null<Instruction>(R.getUnderlyingValue()),
27252727
ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2726-
R.isOrdered()) {}
2728+
R.isOrdered(), R.getDebugLoc()) {}
27272729

27282730
~VPReductionEVLRecipe() override = default;
27292731

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2175,6 +2175,7 @@ void VPReductionRecipe::execute(VPTransformState &State) {
21752175
// Propagate the fast-math flags carried by the underlying instruction.
21762176
IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
21772177
State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
2178+
State.setDebugLocFrom(getDebugLoc());
21782179
Value *NewVecOp = State.get(getVecOp());
21792180
if (VPValue *Cond = getCondOp()) {
21802181
Value *NewCond = State.get(Cond, State.VF.isScalar());
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
; RUN: opt -passes=debugify,loop-vectorize \
2+
; RUN: -force-tail-folding-style=data-with-evl \
3+
; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
4+
; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -S < %s 2>&1 | FileCheck --check-prefix=DEBUGLOC %s
5+
6+
; Testing the debug locations of the generated vector intrinsic is same as
7+
; its scalar counterpart.
8+
9+
define void @vp_select(ptr %a, ptr %b, ptr %c, i64 %N) {
10+
; DEBUGLOC-LABEL: define void @vp_select(
11+
; DEBUGLOC: vector.body:
12+
; DEBUGLOC: = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %{{.+}}, <vscale x 4 x i32> %{{.+}}, <vscale x 4 x i32> %{{.+}}, i32 %{{.+}}), !dbg ![[SELLOC:[0-9]+]]
13+
; DEBUGLOC: loop:
14+
; DEBUGLOC: = select i1 %{{.+}}, i32 %{{.+}}, i32 %{{.+}}, !dbg ![[SELLOC]]
15+
;
16+
entry:
17+
br label %loop
18+
19+
loop:
20+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
21+
%gep.b = getelementptr inbounds i32, ptr %b, i64 %iv
22+
%load.b = load i32, ptr %gep.b, align 4
23+
%gep.c = getelementptr inbounds i32, ptr %c, i64 %iv
24+
%load.c = load i32, ptr %gep.c, align 4
25+
%cmp = icmp sgt i32 %load.b, %load.c
26+
%neg.c = sub i32 0, %load.c
27+
%sel = select i1 %cmp, i32 %load.c, i32 %neg.c
28+
%add = add i32 %sel, %load.b
29+
%gep.a = getelementptr inbounds i32, ptr %a, i64 %iv
30+
store i32 %add, ptr %gep.a, align 4
31+
%iv.next = add nuw nsw i64 %iv, 1
32+
%exitcond = icmp eq i64 %iv.next, %N
33+
br i1 %exitcond, label %exit, label %loop
34+
35+
exit:
36+
ret void
37+
}
38+
39+
; DEBUGLOC: [[SELLOC]] = !DILocation(line: 9

llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,36 @@ exit:
109109
ret void
110110
}
111111

112+
define void @widen_intrinsic_dbg(i64 %n, ptr %y, ptr %x) {
113+
; DEBUGLOC-LABEL: define void @widen_intrinsic_dbg(
114+
; DEBUGLOC: vector.body:
115+
; DEBUGLOC: = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !dbg ![[INTRINSIC_LOC:[0-9]+]]
116+
; DEBUGLOC: loop:
117+
; DEBUGLOC: = call float @llvm.sqrt.f32(float %{{.+}}), !dbg ![[INTRINSIC_LOC]]
118+
;
119+
entry:
120+
br label %loop
121+
122+
loop:
123+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
124+
%gep.y = getelementptr inbounds float, ptr %y, i64 %iv
125+
%load = load float, ptr %gep.y, align 4
126+
%call = call float @llvm.sqrt.f32(float %load)
127+
%gep.x = getelementptr inbounds float, ptr %x, i64 %iv
128+
store float %call, ptr %gep.x, align 4
129+
%iv.next = add i64 %iv, 1
130+
%exitcond = icmp eq i64 %iv.next, %n
131+
br i1 %exitcond, label %exit, label %loop
132+
133+
exit:
134+
ret void
135+
}
136+
112137
!0 = !{!0, !1}
113138
!1 = !{!"llvm.loop.vectorize.width", i32 4}
114139
; CHECK-NOT: !{metadata !"llvm.loop.vectorize.width", i32 4}
115140
; CHECK: !{!"llvm.loop.isvectorized", i32 1}
116141

117142
; DEBUGLOC: ![[RESUMELOC]] = !DILocation(line: 2
118143
; DEBUGLOC: ![[PTRIVLOC]] = !DILocation(line: 12
144+
; DEBUGLOC: ![[INTRINSIC_LOC]] = !DILocation(line: 44
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; RUN: opt < %s -passes=debugify,loop-vectorize -force-vector-width=4 -prefer-inloop-reductions -S | FileCheck %s -check-prefix DEBUGLOC
2+
3+
; Testing the debug locations of the generated vector intstructions are same as
4+
; their scalar counterpart.
5+
6+
define i32 @reduction_sum(ptr %A, ptr %B) {
7+
; DEBUGLOC-LABEL: define i32 @reduction_sum(
8+
; DEBUGLOC: vector.body:
9+
; DEBUGLOC: = load <4 x i32>, ptr %{{.+}}, align 4, !dbg ![[LOADLOC:[0-9]+]]
10+
; DEBUGLOC: = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %{{.+}}), !dbg ![[REDLOC:[0-9]+]]
11+
; DEBUGLOC: loop:
12+
; DEBUGLOC: %[[LOAD:.+]] = load i32, ptr %{{.+}}, align 4, !dbg ![[LOADLOC]]
13+
; DEBUGLOC: = add i32 %{{.+}}, %[[LOAD]], !dbg ![[REDLOC]]
14+
;
15+
entry:
16+
br label %loop
17+
18+
loop:
19+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
20+
%red = phi i32 [ 0, %entry ], [ %red.next, %loop ]
21+
%gep = getelementptr inbounds i32, ptr %A, i64 %iv
22+
%load = load i32, ptr %gep, align 4
23+
%red.next = add i32 %red, %load
24+
%iv.next = add i64 %iv, 1
25+
%exitcond = icmp eq i64 %iv.next, 256
26+
br i1 %exitcond, label %exit, label %loop
27+
28+
exit:
29+
%red.lcssa = phi i32 [ %red.next, %loop ]
30+
ret i32 %red.lcssa
31+
}
32+
33+
; DEBUGLOC: ![[LOADLOC]] = !DILocation(line: 5
34+
; DEBUGLOC: ![[REDLOC]] = !DILocation(line: 6

0 commit comments

Comments
 (0)