Skip to content

Commit bbd6a2d

Browse files
authored
[RISCV] Convert implicit_def tuples to noreg in post-isel peephole (#91173)
If a segmented load has an undefined passthru then it will be selected as a reg_sequence with implicit_def operands, which currently slips through the implicit_def -> noreg peephole. This patch fixes this so we're able to infer if the passthru is undefined without the need for looking through vreg definitions with MachineRegisterInfo, which will help with moving RISCVInsertVSETVLI to LiveIntervals in #70549
1 parent e44600f commit bbd6a2d

File tree

5 files changed

+30
-109
lines changed

5 files changed

+30
-109
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3478,8 +3478,15 @@ static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
34783478
}
34793479

34803480
static bool isImplicitDef(SDValue V) {
3481-
return V.isMachineOpcode() &&
3482-
V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
3481+
if (!V.isMachineOpcode())
3482+
return false;
3483+
if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
3484+
for (unsigned I = 1; I < V.getNumOperands(); I += 2)
3485+
if (!isImplicitDef(V.getOperand(I)))
3486+
return false;
3487+
return true;
3488+
}
3489+
return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
34833490
}
34843491

34853492
// Optimize masked RVV pseudo instructions with a known all-ones mask to their

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

Lines changed: 10 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -173,44 +173,18 @@ static bool isMaskRegOp(const MachineInstr &MI) {
173173
/// Note that this is different from "agnostic" as defined by the vector
174174
/// specification. Agnostic requires each lane to either be undisturbed, or
175175
/// take the value -1; no other value is allowed.
176-
static bool hasUndefinedMergeOp(const MachineInstr &MI,
177-
const MachineRegisterInfo &MRI) {
176+
static bool hasUndefinedMergeOp(const MachineInstr &MI) {
178177

179178
unsigned UseOpIdx;
180179
if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))
181180
// If there is no passthrough operand, then the pass through
182181
// lanes are undefined.
183182
return true;
184183

185-
// If the tied operand is NoReg, an IMPLICIT_DEF, or a REG_SEQEUENCE whose
186-
// operands are solely IMPLICIT_DEFS, then the pass through lanes are
187-
// undefined.
184+
// All undefined passthrus should be $noreg: see
185+
// RISCVDAGToDAGISel::doPeepholeNoRegPassThru
188186
const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
189-
if (UseMO.getReg() == RISCV::NoRegister)
190-
return true;
191-
192-
if (UseMO.isUndef())
193-
return true;
194-
if (UseMO.getReg().isPhysical())
195-
return false;
196-
197-
MachineInstr *UseMI = MRI.getUniqueVRegDef(UseMO.getReg());
198-
assert(UseMI);
199-
if (UseMI->isImplicitDef())
200-
return true;
201-
202-
if (UseMI->isRegSequence()) {
203-
for (unsigned i = 1, e = UseMI->getNumOperands(); i < e; i += 2) {
204-
MachineInstr *SourceMI =
205-
MRI.getUniqueVRegDef(UseMI->getOperand(i).getReg());
206-
assert(SourceMI);
207-
if (!SourceMI->isImplicitDef())
208-
return false;
209-
}
210-
return true;
211-
}
212-
213-
return false;
187+
return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();
214188
}
215189

216190
/// Which subfields of VL or VTYPE have values we need to preserve?
@@ -429,7 +403,7 @@ DemandedFields getDemanded(const MachineInstr &MI,
429403
// this for any tail agnostic operation, but we can't as TA requires
430404
// tail lanes to either be the original value or -1. We are writing
431405
// unknown bits to the lanes here.
432-
if (hasUndefinedMergeOp(MI, *MRI)) {
406+
if (hasUndefinedMergeOp(MI)) {
433407
if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
434408
Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
435409
else
@@ -913,7 +887,7 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
913887

914888
bool TailAgnostic = true;
915889
bool MaskAgnostic = true;
916-
if (!hasUndefinedMergeOp(MI, *MRI)) {
890+
if (!hasUndefinedMergeOp(MI)) {
917891
// Start with undisturbed.
918892
TailAgnostic = false;
919893
MaskAgnostic = false;
@@ -1109,7 +1083,7 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
11091083
// * The LMUL1 restriction is for machines whose latency may depend on VL.
11101084
// * As above, this is only legal for tail "undefined" not "agnostic".
11111085
if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
1112-
isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
1086+
isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI)) {
11131087
Used.VLAny = false;
11141088
Used.VLZeroness = true;
11151089
Used.LMUL = false;
@@ -1121,8 +1095,9 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
11211095
// immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place.
11221096
// Since a splat is non-constant time in LMUL, we do need to be careful to not
11231097
// increase the number of active vector registers (unlike for vmv.s.x.)
1124-
if (isScalarSplatInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
1125-
isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
1098+
if (isScalarSplatInstr(MI) && Require.hasAVLImm() &&
1099+
Require.getAVLImm() == 1 && isLMUL1OrSmaller(CurInfo.getVLMUL()) &&
1100+
hasUndefinedMergeOp(MI)) {
11261101
Used.LMUL = false;
11271102
Used.SEWLMULRatio = false;
11281103
Used.VLAny = false;

llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll

Lines changed: 5 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,8 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind {
1313
; SPILL-O0-NEXT: csrr a2, vlenb
1414
; SPILL-O0-NEXT: slli a2, a2, 1
1515
; SPILL-O0-NEXT: sub sp, sp, a2
16-
; SPILL-O0-NEXT: # implicit-def: $v8
17-
; SPILL-O0-NEXT: # implicit-def: $v9
18-
; SPILL-O0-NEXT: # implicit-def: $v10
19-
; SPILL-O0-NEXT: # implicit-def: $v9
20-
; SPILL-O0-NEXT: # kill: def $v8 killed $v8 def $v8_v9
21-
; SPILL-O0-NEXT: vmv1r.v v9, v10
2216
; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
17+
; SPILL-O0-NEXT: # implicit-def: $v8_v9
2318
; SPILL-O0-NEXT: vlseg2e32.v v8, (a0)
2419
; SPILL-O0-NEXT: vmv1r.v v8, v9
2520
; SPILL-O0-NEXT: addi a0, sp, 16
@@ -95,13 +90,8 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(ptr %base, i32 %vl) nounwind {
9590
; SPILL-O0-NEXT: csrr a2, vlenb
9691
; SPILL-O0-NEXT: slli a2, a2, 1
9792
; SPILL-O0-NEXT: sub sp, sp, a2
98-
; SPILL-O0-NEXT: # implicit-def: $v8
99-
; SPILL-O0-NEXT: # implicit-def: $v9
100-
; SPILL-O0-NEXT: # implicit-def: $v10
101-
; SPILL-O0-NEXT: # implicit-def: $v9
102-
; SPILL-O0-NEXT: # kill: def $v8 killed $v8 def $v8_v9
103-
; SPILL-O0-NEXT: vmv1r.v v9, v10
10493
; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, ta, ma
94+
; SPILL-O0-NEXT: # implicit-def: $v8_v9
10595
; SPILL-O0-NEXT: vlseg2e32.v v8, (a0)
10696
; SPILL-O0-NEXT: vmv1r.v v8, v9
10797
; SPILL-O0-NEXT: addi a0, sp, 16
@@ -177,13 +167,8 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(ptr %base, i32 %vl) nounwind {
177167
; SPILL-O0-NEXT: csrr a2, vlenb
178168
; SPILL-O0-NEXT: slli a2, a2, 1
179169
; SPILL-O0-NEXT: sub sp, sp, a2
180-
; SPILL-O0-NEXT: # implicit-def: $v8m2
181-
; SPILL-O0-NEXT: # implicit-def: $v10m2
182-
; SPILL-O0-NEXT: # implicit-def: $v12m2
183-
; SPILL-O0-NEXT: # implicit-def: $v10m2
184-
; SPILL-O0-NEXT: # kill: def $v8m2 killed $v8m2 def $v8m2_v10m2
185-
; SPILL-O0-NEXT: vmv2r.v v10, v12
186170
; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, ta, ma
171+
; SPILL-O0-NEXT: # implicit-def: $v8m2_v10m2
187172
; SPILL-O0-NEXT: vlseg2e32.v v8, (a0)
188173
; SPILL-O0-NEXT: vmv2r.v v8, v10
189174
; SPILL-O0-NEXT: addi a0, sp, 16
@@ -262,13 +247,8 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(ptr %base, i32 %vl) nounwind {
262247
; SPILL-O0-NEXT: csrr a2, vlenb
263248
; SPILL-O0-NEXT: slli a2, a2, 2
264249
; SPILL-O0-NEXT: sub sp, sp, a2
265-
; SPILL-O0-NEXT: # implicit-def: $v8m4
266-
; SPILL-O0-NEXT: # implicit-def: $v12m4
267-
; SPILL-O0-NEXT: # implicit-def: $v16m4
268-
; SPILL-O0-NEXT: # implicit-def: $v12m4
269-
; SPILL-O0-NEXT: # kill: def $v8m4 killed $v8m4 def $v8m4_v12m4
270-
; SPILL-O0-NEXT: vmv4r.v v12, v16
271250
; SPILL-O0-NEXT: vsetvli zero, a1, e32, m4, ta, ma
251+
; SPILL-O0-NEXT: # implicit-def: $v8m4_v12m4
272252
; SPILL-O0-NEXT: vlseg2e32.v v8, (a0)
273253
; SPILL-O0-NEXT: vmv4r.v v8, v12
274254
; SPILL-O0-NEXT: addi a0, sp, 16
@@ -347,16 +327,8 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(ptr %base, i32 %vl) nounwind {
347327
; SPILL-O0-NEXT: csrr a2, vlenb
348328
; SPILL-O0-NEXT: slli a2, a2, 1
349329
; SPILL-O0-NEXT: sub sp, sp, a2
350-
; SPILL-O0-NEXT: # implicit-def: $v8m2
351-
; SPILL-O0-NEXT: # implicit-def: $v10m2
352-
; SPILL-O0-NEXT: # implicit-def: $v16m2
353-
; SPILL-O0-NEXT: # implicit-def: $v10m2
354-
; SPILL-O0-NEXT: # implicit-def: $v14m2
355-
; SPILL-O0-NEXT: # implicit-def: $v10m2
356-
; SPILL-O0-NEXT: # kill: def $v8m2 killed $v8m2 def $v8m2_v10m2_v12m2
357-
; SPILL-O0-NEXT: vmv2r.v v10, v16
358-
; SPILL-O0-NEXT: vmv2r.v v12, v14
359330
; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, ta, ma
331+
; SPILL-O0-NEXT: # implicit-def: $v8m2_v10m2_v12m2
360332
; SPILL-O0-NEXT: vlseg3e32.v v8, (a0)
361333
; SPILL-O0-NEXT: vmv2r.v v8, v10
362334
; SPILL-O0-NEXT: addi a0, sp, 16

llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll

Lines changed: 5 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,8 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind {
1313
; SPILL-O0-NEXT: csrr a2, vlenb
1414
; SPILL-O0-NEXT: slli a2, a2, 1
1515
; SPILL-O0-NEXT: sub sp, sp, a2
16-
; SPILL-O0-NEXT: # implicit-def: $v8
17-
; SPILL-O0-NEXT: # implicit-def: $v9
18-
; SPILL-O0-NEXT: # implicit-def: $v10
19-
; SPILL-O0-NEXT: # implicit-def: $v9
20-
; SPILL-O0-NEXT: # kill: def $v8 killed $v8 def $v8_v9
21-
; SPILL-O0-NEXT: vmv1r.v v9, v10
2216
; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
17+
; SPILL-O0-NEXT: # implicit-def: $v8_v9
2318
; SPILL-O0-NEXT: vlseg2e32.v v8, (a0)
2419
; SPILL-O0-NEXT: vmv1r.v v8, v9
2520
; SPILL-O0-NEXT: addi a0, sp, 16
@@ -95,13 +90,8 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(ptr %base, i64 %vl) nounwind {
9590
; SPILL-O0-NEXT: csrr a2, vlenb
9691
; SPILL-O0-NEXT: slli a2, a2, 1
9792
; SPILL-O0-NEXT: sub sp, sp, a2
98-
; SPILL-O0-NEXT: # implicit-def: $v8
99-
; SPILL-O0-NEXT: # implicit-def: $v9
100-
; SPILL-O0-NEXT: # implicit-def: $v10
101-
; SPILL-O0-NEXT: # implicit-def: $v9
102-
; SPILL-O0-NEXT: # kill: def $v8 killed $v8 def $v8_v9
103-
; SPILL-O0-NEXT: vmv1r.v v9, v10
10493
; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, ta, ma
94+
; SPILL-O0-NEXT: # implicit-def: $v8_v9
10595
; SPILL-O0-NEXT: vlseg2e32.v v8, (a0)
10696
; SPILL-O0-NEXT: vmv1r.v v8, v9
10797
; SPILL-O0-NEXT: addi a0, sp, 16
@@ -177,13 +167,8 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(ptr %base, i64 %vl) nounwind {
177167
; SPILL-O0-NEXT: csrr a2, vlenb
178168
; SPILL-O0-NEXT: slli a2, a2, 1
179169
; SPILL-O0-NEXT: sub sp, sp, a2
180-
; SPILL-O0-NEXT: # implicit-def: $v8m2
181-
; SPILL-O0-NEXT: # implicit-def: $v10m2
182-
; SPILL-O0-NEXT: # implicit-def: $v12m2
183-
; SPILL-O0-NEXT: # implicit-def: $v10m2
184-
; SPILL-O0-NEXT: # kill: def $v8m2 killed $v8m2 def $v8m2_v10m2
185-
; SPILL-O0-NEXT: vmv2r.v v10, v12
186170
; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, ta, ma
171+
; SPILL-O0-NEXT: # implicit-def: $v8m2_v10m2
187172
; SPILL-O0-NEXT: vlseg2e32.v v8, (a0)
188173
; SPILL-O0-NEXT: vmv2r.v v8, v10
189174
; SPILL-O0-NEXT: addi a0, sp, 16
@@ -262,13 +247,8 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(ptr %base, i64 %vl) nounwind {
262247
; SPILL-O0-NEXT: csrr a2, vlenb
263248
; SPILL-O0-NEXT: slli a2, a2, 2
264249
; SPILL-O0-NEXT: sub sp, sp, a2
265-
; SPILL-O0-NEXT: # implicit-def: $v8m4
266-
; SPILL-O0-NEXT: # implicit-def: $v12m4
267-
; SPILL-O0-NEXT: # implicit-def: $v16m4
268-
; SPILL-O0-NEXT: # implicit-def: $v12m4
269-
; SPILL-O0-NEXT: # kill: def $v8m4 killed $v8m4 def $v8m4_v12m4
270-
; SPILL-O0-NEXT: vmv4r.v v12, v16
271250
; SPILL-O0-NEXT: vsetvli zero, a1, e32, m4, ta, ma
251+
; SPILL-O0-NEXT: # implicit-def: $v8m4_v12m4
272252
; SPILL-O0-NEXT: vlseg2e32.v v8, (a0)
273253
; SPILL-O0-NEXT: vmv4r.v v8, v12
274254
; SPILL-O0-NEXT: addi a0, sp, 16
@@ -347,16 +327,8 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(ptr %base, i64 %vl) nounwind {
347327
; SPILL-O0-NEXT: csrr a2, vlenb
348328
; SPILL-O0-NEXT: slli a2, a2, 1
349329
; SPILL-O0-NEXT: sub sp, sp, a2
350-
; SPILL-O0-NEXT: # implicit-def: $v8m2
351-
; SPILL-O0-NEXT: # implicit-def: $v10m2
352-
; SPILL-O0-NEXT: # implicit-def: $v16m2
353-
; SPILL-O0-NEXT: # implicit-def: $v10m2
354-
; SPILL-O0-NEXT: # implicit-def: $v14m2
355-
; SPILL-O0-NEXT: # implicit-def: $v10m2
356-
; SPILL-O0-NEXT: # kill: def $v8m2 killed $v8m2 def $v8m2_v10m2_v12m2
357-
; SPILL-O0-NEXT: vmv2r.v v10, v16
358-
; SPILL-O0-NEXT: vmv2r.v v12, v14
359330
; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, ta, ma
331+
; SPILL-O0-NEXT: # implicit-def: $v8m2_v10m2_v12m2
360332
; SPILL-O0-NEXT: vlseg3e32.v v8, (a0)
361333
; SPILL-O0-NEXT: vmv2r.v v8, v10
362334
; SPILL-O0-NEXT: addi a0, sp, 16

llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,7 @@ define i64 @test_vlseg2ff_nxv8i8(ptr %base, i64 %vl, ptr %outvl) {
6666
; CHECK-NEXT: {{ $}}
6767
; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x11
6868
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x10
69-
; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
70-
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
71-
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vr = IMPLICIT_DEF
72-
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vr = IMPLICIT_DEF
73-
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vrn2m1 = REG_SEQUENCE [[DEF]], %subreg.sub_vrm1_0, [[DEF2]], %subreg.sub_vrm1_1
74-
; CHECK-NEXT: [[PseudoVLSEG2E8FF_V_M1_:%[0-9]+]]:vrn2m1, [[PseudoVLSEG2E8FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLSEG2E8FF_V_M1 [[REG_SEQUENCE]], [[COPY1]], [[COPY]], 3 /* e8 */, 2 /* tu, ma */, implicit-def dead $vl :: (load unknown-size from %ir.base, align 1)
69+
; CHECK-NEXT: [[PseudoVLSEG2E8FF_V_M1_:%[0-9]+]]:vrn2m1, [[PseudoVLSEG2E8FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLSEG2E8FF_V_M1 $noreg, [[COPY1]], [[COPY]], 3 /* e8 */, 2 /* tu, ma */, implicit-def dead $vl :: (load unknown-size from %ir.base, align 1)
7570
; CHECK-NEXT: $x10 = COPY [[PseudoVLSEG2E8FF_V_M1_1]]
7671
; CHECK-NEXT: PseudoRET implicit $x10
7772
entry:

0 commit comments

Comments
 (0)