Skip to content

Commit 7f8451c

Browse files
authored
[RISCV] Use vsetvli instead of vlenb in Prologue/Epilogue (#113756)
Currently, we use `csrr` with `vlenb` to obtain the `VLEN`, but this is not the only option. We can also use `vsetvli` with `e8`/`m1` to get `VLENMAX`, which is equal to the VLEN. This method is preferable on some microarchitectures and makes it easier to obtain values like `VLEN * 2`, `VLEN * 4`, or `VLEN * 8`, reducing the number of instructions needed to calculate VLEN multiples. However, this approach is *NOT* always interchangeable, as it changes the state of `VTYPE` and `VL`, which can alter the behavior of vector instructions, potentially causing incorrect code generation if applied after a vsetvli insertion. Therefore, we limit its use to the prologue/epilogue for now, as there are no vector operations within the prologue/epilogue sequence. With further analysis, we may extend this approach beyond the prologue/epilogue in the future, but starting here should be a good first step. This feature is gurded by the `+prefer-vsetvli-over-read-vlenb` feature, which is disabled by default for now.
1 parent 77edfbb commit 7f8451c

12 files changed

+2044
-12
lines changed

llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ class RISCVExpandPseudo : public MachineFunctionPass {
5656
MachineBasicBlock::iterator MBBI);
5757
bool expandRV32ZdinxLoad(MachineBasicBlock &MBB,
5858
MachineBasicBlock::iterator MBBI);
59+
bool expandPseudoReadVLENBViaVSETVLIX0(MachineBasicBlock &MBB,
60+
MachineBasicBlock::iterator MBBI);
5961
#ifndef NDEBUG
6062
unsigned getInstSizeInBytes(const MachineFunction &MF) const {
6163
unsigned Size = 0;
@@ -164,6 +166,8 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
164166
case RISCV::PseudoVMSET_M_B64:
165167
// vmset.m vd => vmxnor.mm vd, vd, vd
166168
return expandVMSET_VMCLR(MBB, MBBI, RISCV::VMXNOR_MM);
169+
case RISCV::PseudoReadVLENBViaVSETVLIX0:
170+
return expandPseudoReadVLENBViaVSETVLIX0(MBB, MBBI);
167171
}
168172

169173
return false;
@@ -415,6 +419,24 @@ bool RISCVExpandPseudo::expandRV32ZdinxLoad(MachineBasicBlock &MBB,
415419
return true;
416420
}
417421

422+
bool RISCVExpandPseudo::expandPseudoReadVLENBViaVSETVLIX0(
423+
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
424+
DebugLoc DL = MBBI->getDebugLoc();
425+
Register Dst = MBBI->getOperand(0).getReg();
426+
unsigned Mul = MBBI->getOperand(1).getImm();
427+
RISCVVType::VLMUL VLMUL = RISCVVType::encodeLMUL(Mul, /*Fractional=*/false);
428+
unsigned VTypeImm = RISCVVType::encodeVTYPE(
429+
VLMUL, /*SEW=*/8, /*TailAgnostic=*/true, /*MaskAgnostic=*/true);
430+
431+
BuildMI(MBB, MBBI, DL, TII->get(RISCV::PseudoVSETVLIX0))
432+
.addReg(Dst, RegState::Define)
433+
.addReg(RISCV::X0, RegState::Kill)
434+
.addImm(VTypeImm);
435+
436+
MBBI->eraseFromParent();
437+
return true;
438+
}
439+
418440
class RISCVPreRAExpandPseudo : public MachineFunctionPass {
419441
public:
420442
const RISCVSubtarget *STI;

llvm/lib/Target/RISCV/RISCVFeatures.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1591,6 +1591,12 @@ def TuneVentanaVeyron : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "V
15911591
def TuneVXRMPipelineFlush : SubtargetFeature<"vxrm-pipeline-flush", "HasVXRMPipelineFlush",
15921592
"true", "VXRM writes causes pipeline flush">;
15931593

1594+
def TunePreferVsetvliOverReadVLENB
1595+
: SubtargetFeature<"prefer-vsetvli-over-read-vlenb",
1596+
"PreferVsetvliOverReadVLENB",
1597+
"true",
1598+
"Prefer vsetvli over read vlenb CSR to calculate VLEN">;
1599+
15941600
// Assume that lock-free native-width atomics are available, even if the target
15951601
// and operating system combination would not usually provide them. The user
15961602
// is responsible for providing any necessary __sync implementations. Code

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2195,6 +2195,17 @@ bool RISCVFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
21952195
const MachineFunction *MF = MBB.getParent();
21962196
const auto *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
21972197

2198+
// Make sure VTYPE and VL are not live-in since we will use vsetvli in the
2199+
// prologue to get the VLEN, and that will clobber these registers.
2200+
//
2201+
// We may do also check the stack contains objects with scalable vector type,
2202+
// but this will require iterating over all the stack objects, but this may
2203+
// not worth since the situation is rare, we could do further check in future
2204+
// if we find it is necessary.
2205+
if (STI.preferVsetvliOverReadVLENB() &&
2206+
(MBB.isLiveIn(RISCV::VTYPE) || MBB.isLiveIn(RISCV::VL)))
2207+
return false;
2208+
21982209
if (!RVFI->useSaveRestoreLibCalls(*MF))
21992210
return true;
22002211

llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6051,6 +6051,11 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1 in {
60516051
[(set GPR:$rd, (riscv_read_vlenb))]>,
60526052
PseudoInstExpansion<(CSRRS GPR:$rd, SysRegVLENB.Encoding, X0)>,
60536053
Sched<[WriteRdVLENB]>;
6054+
let Defs = [VL, VTYPE] in {
6055+
def PseudoReadVLENBViaVSETVLIX0 : Pseudo<(outs GPR:$rd), (ins uimm5:$shamt),
6056+
[]>,
6057+
Sched<[WriteVSETVLI, ReadVSETVLI]>;
6058+
}
60546059
}
60556060

60566061
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1,

llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -226,21 +226,48 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
226226
assert(isInt<32>(ScalableValue / (RISCV::RVVBitsPerBlock / 8)) &&
227227
"Expect the number of vector registers within 32-bits.");
228228
uint32_t NumOfVReg = ScalableValue / (RISCV::RVVBitsPerBlock / 8);
229-
BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), ScratchReg)
230-
.setMIFlag(Flag);
231-
232-
if (ScalableAdjOpc == RISCV::ADD && ST.hasStdExtZba() &&
233-
(NumOfVReg == 2 || NumOfVReg == 4 || NumOfVReg == 8)) {
234-
unsigned Opc = NumOfVReg == 2 ? RISCV::SH1ADD :
235-
(NumOfVReg == 4 ? RISCV::SH2ADD : RISCV::SH3ADD);
236-
BuildMI(MBB, II, DL, TII->get(Opc), DestReg)
237-
.addReg(ScratchReg, RegState::Kill).addReg(SrcReg)
229+
// Only use vsetvli rather than vlenb if adjusting in the prologue or
230+
// epilogue, otherwise it may disturb the VTYPE and VL status.
231+
bool IsPrologueOrEpilogue =
232+
Flag == MachineInstr::FrameSetup || Flag == MachineInstr::FrameDestroy;
233+
bool UseVsetvliRatherThanVlenb =
234+
IsPrologueOrEpilogue && ST.preferVsetvliOverReadVLENB();
235+
if (UseVsetvliRatherThanVlenb && (NumOfVReg == 1 || NumOfVReg == 2 ||
236+
NumOfVReg == 4 || NumOfVReg == 8)) {
237+
BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENBViaVSETVLIX0),
238+
ScratchReg)
239+
.addImm(NumOfVReg)
238240
.setMIFlag(Flag);
239-
} else {
240-
TII->mulImm(MF, MBB, II, DL, ScratchReg, NumOfVReg, Flag);
241241
BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), DestReg)
242-
.addReg(SrcReg).addReg(ScratchReg, RegState::Kill)
242+
.addReg(SrcReg)
243+
.addReg(ScratchReg, RegState::Kill)
243244
.setMIFlag(Flag);
245+
} else {
246+
if (UseVsetvliRatherThanVlenb)
247+
BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENBViaVSETVLIX0),
248+
ScratchReg)
249+
.addImm(1)
250+
.setMIFlag(Flag);
251+
else
252+
BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), ScratchReg)
253+
.setMIFlag(Flag);
254+
255+
if (ScalableAdjOpc == RISCV::ADD && ST.hasStdExtZba() &&
256+
(NumOfVReg == 2 || NumOfVReg == 4 || NumOfVReg == 8)) {
257+
unsigned Opc = NumOfVReg == 2
258+
? RISCV::SH1ADD
259+
: (NumOfVReg == 4 ? RISCV::SH2ADD : RISCV::SH3ADD);
260+
BuildMI(MBB, II, DL, TII->get(Opc), DestReg)
261+
.addReg(ScratchReg, RegState::Kill)
262+
.addReg(SrcReg)
263+
.setMIFlag(Flag);
264+
} else {
265+
TII->mulImm(MF, MBB, II, DL, ScratchReg, NumOfVReg, Flag);
266+
BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), DestReg)
267+
.addReg(SrcReg)
268+
.addReg(ScratchReg, RegState::Kill)
269+
.setMIFlag(Flag);
270+
}
244271
}
245272
SrcReg = DestReg;
246273
KillSrcReg = true;

0 commit comments

Comments
 (0)