Skip to content

Commit 201bfb1

Browse files
committed
[RISCV] Add sink-and-fold support for RISC-V.
This uses the recently introduced sink-and-fold support in MachineSink. https://reviews.llvm.org/D152828 This enables folding ADDI into load/store addresses. Enabling by default will be a separate PR.
1 parent 8ee38f3 commit 201bfb1

9 files changed

+388
-391
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

+68
Original file line numberDiff line numberDiff line change
@@ -1907,6 +1907,74 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
19071907
return true;
19081908
}
19091909

1910+
bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
1911+
const MachineInstr &AddrI,
1912+
ExtAddrMode &AM) const {
1913+
switch (MemI.getOpcode()) {
1914+
default:
1915+
return false;
1916+
case RISCV::LB:
1917+
case RISCV::LBU:
1918+
case RISCV::LH:
1919+
case RISCV::LHU:
1920+
case RISCV::LW:
1921+
case RISCV::LWU:
1922+
case RISCV::LD:
1923+
case RISCV::FLH:
1924+
case RISCV::FLW:
1925+
case RISCV::FLD:
1926+
case RISCV::SB:
1927+
case RISCV::SH:
1928+
case RISCV::SW:
1929+
case RISCV::SD:
1930+
case RISCV::FSH:
1931+
case RISCV::FSW:
1932+
case RISCV::FSD:
1933+
break;
1934+
}
1935+
1936+
if (MemI.getOperand(0).getReg() == Reg)
1937+
return false;
1938+
1939+
if (AddrI.getOpcode() != RISCV::ADDI || !AddrI.getOperand(1).isReg() ||
1940+
!AddrI.getOperand(2).isImm())
1941+
return false;
1942+
1943+
int64_t OldOffset = MemI.getOperand(2).getImm();
1944+
int64_t Disp = AddrI.getOperand(2).getImm();
1945+
int64_t NewOffset = OldOffset + Disp;
1946+
if (!STI.is64Bit())
1947+
NewOffset = SignExtend64<32>(NewOffset);
1948+
1949+
if (!isInt<12>(NewOffset))
1950+
return false;
1951+
1952+
AM.BaseReg = AddrI.getOperand(1).getReg();
1953+
AM.ScaledReg = 0;
1954+
AM.Scale = 0;
1955+
AM.Displacement = NewOffset;
1956+
AM.Form = ExtAddrMode::Formula::Basic;
1957+
return true;
1958+
}
1959+
1960+
MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI,
1961+
const ExtAddrMode &AM) const {
1962+
1963+
const DebugLoc &DL = MemI.getDebugLoc();
1964+
MachineBasicBlock &MBB = *MemI.getParent();
1965+
1966+
assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
1967+
"Addressing mode not supported for folding");
1968+
1969+
return BuildMI(MBB, MemI, DL, get(MemI.getOpcode()))
1970+
.addReg(MemI.getOperand(0).getReg(),
1971+
MemI.mayLoad() ? RegState::Define : 0)
1972+
.addReg(AM.BaseReg)
1973+
.addImm(AM.Displacement)
1974+
.setMemRefs(MemI.memoperands())
1975+
.setMIFlags(MemI.getFlags());
1976+
}
1977+
19101978
// Return true if get the base operand, byte offset of an instruction and the
19111979
// memory width. Width is the size of memory that is being loaded/stored.
19121980
bool RISCVInstrInfo::getMemOperandWithOffsetWidth(

llvm/lib/Target/RISCV/RISCVInstrInfo.h

+7
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,13 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
137137
bool verifyInstruction(const MachineInstr &MI,
138138
StringRef &ErrInfo) const override;
139139

140+
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
141+
const MachineInstr &AddrI,
142+
ExtAddrMode &AM) const override;
143+
144+
MachineInstr *emitLdStWithAddr(MachineInstr &MemI,
145+
const ExtAddrMode &AM) const override;
146+
140147
bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
141148
const MachineOperand *&BaseOp,
142149
int64_t &Offset, unsigned &Width,

llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ bool RISCVInitUndef::processBasicBlock(MachineFunction &MF,
275275
Changed |= handleSubReg(MF, MI, DLD);
276276
if (MI.isImplicitDef()) {
277277
auto DstReg = MI.getOperand(0).getReg();
278-
if (isVectorRegClass(DstReg))
278+
if (DstReg.isVirtual() && isVectorRegClass(DstReg))
279279
Changed |= handleImplicitDef(MBB, I);
280280
}
281281
}

llvm/lib/Target/RISCV/RISCVTargetMachine.cpp

+8-1
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,11 @@ static cl::opt<bool> EnableRISCVDeadRegisterElimination(
7878
" them with stores to x0"),
7979
cl::init(true));
8080

81+
static cl::opt<bool>
82+
EnableSinkFold("riscv-enable-sink-fold",
83+
cl::desc("Enable sinking and folding of instruction copies"),
84+
cl::init(false), cl::Hidden);
85+
8186
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
8287
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
8388
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
@@ -242,7 +247,9 @@ namespace {
242247
class RISCVPassConfig : public TargetPassConfig {
243248
public:
244249
RISCVPassConfig(RISCVTargetMachine &TM, PassManagerBase &PM)
245-
: TargetPassConfig(TM, PM) {}
250+
: TargetPassConfig(TM, PM) {
251+
setEnableSinkAndFold(EnableSinkFold);
252+
}
246253

247254
RISCVTargetMachine &getRISCVTargetMachine() const {
248255
return getTM<RISCVTargetMachine>();

llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll

+14-18
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3-
; RUN: | FileCheck -check-prefix=RV32I %s
3+
; RUN: -riscv-enable-sink-fold | FileCheck -check-prefix=RV32I %s
44
; RUN: llc -mtriple=riscv32 -verify-machineinstrs -code-model=medium < %s \
5-
; RUN: | FileCheck -check-prefix=RV32I-MEDIUM %s
5+
; RUN: -riscv-enable-sink-fold | FileCheck -check-prefix=RV32I-MEDIUM %s
66
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
7-
; RUN: | FileCheck -check-prefix=RV64I %s
7+
; RUN: -riscv-enable-sink-fold | FileCheck -check-prefix=RV64I %s
88
; RUN: llc -mtriple=riscv64 -verify-machineinstrs -code-model=medium < %s \
9-
; RUN: | FileCheck -check-prefix=RV64I-MEDIUM %s
9+
; RUN: -riscv-enable-sink-fold | FileCheck -check-prefix=RV64I-MEDIUM %s
1010

1111
; We can often fold an ADDI into the offset of load/store instructions:
1212
; (load (addi base, off1), off2) -> (load base, off1+off2)
@@ -769,14 +769,13 @@ define i64 @fold_addi_from_different_bb(i64 %k, i64 %n, ptr %a) nounwind {
769769
; RV32I-NEXT: li s3, 0
770770
; RV32I-NEXT: li s4, 0
771771
; RV32I-NEXT: slli a0, a0, 4
772-
; RV32I-NEXT: add a0, s0, a0
773-
; RV32I-NEXT: addi s7, a0, 8
772+
; RV32I-NEXT: add s7, s0, a0
774773
; RV32I-NEXT: .LBB20_5: # %for.body
775774
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
776775
; RV32I-NEXT: mv a0, s0
777776
; RV32I-NEXT: call f@plt
778-
; RV32I-NEXT: lw a0, 4(s7)
779-
; RV32I-NEXT: lw a1, 0(s7)
777+
; RV32I-NEXT: lw a0, 12(s7)
778+
; RV32I-NEXT: lw a1, 8(s7)
780779
; RV32I-NEXT: add a0, a0, s4
781780
; RV32I-NEXT: add s3, a1, s3
782781
; RV32I-NEXT: sltu s4, s3, a1
@@ -835,14 +834,13 @@ define i64 @fold_addi_from_different_bb(i64 %k, i64 %n, ptr %a) nounwind {
835834
; RV32I-MEDIUM-NEXT: li s3, 0
836835
; RV32I-MEDIUM-NEXT: li s4, 0
837836
; RV32I-MEDIUM-NEXT: slli a0, a0, 4
838-
; RV32I-MEDIUM-NEXT: add a0, s0, a0
839-
; RV32I-MEDIUM-NEXT: addi s7, a0, 8
837+
; RV32I-MEDIUM-NEXT: add s7, s0, a0
840838
; RV32I-MEDIUM-NEXT: .LBB20_5: # %for.body
841839
; RV32I-MEDIUM-NEXT: # =>This Inner Loop Header: Depth=1
842840
; RV32I-MEDIUM-NEXT: mv a0, s0
843841
; RV32I-MEDIUM-NEXT: call f@plt
844-
; RV32I-MEDIUM-NEXT: lw a0, 4(s7)
845-
; RV32I-MEDIUM-NEXT: lw a1, 0(s7)
842+
; RV32I-MEDIUM-NEXT: lw a0, 12(s7)
843+
; RV32I-MEDIUM-NEXT: lw a1, 8(s7)
846844
; RV32I-MEDIUM-NEXT: add a0, a0, s4
847845
; RV32I-MEDIUM-NEXT: add s3, a1, s3
848846
; RV32I-MEDIUM-NEXT: sltu s4, s3, a1
@@ -883,13 +881,12 @@ define i64 @fold_addi_from_different_bb(i64 %k, i64 %n, ptr %a) nounwind {
883881
; RV64I-NEXT: mv s1, a1
884882
; RV64I-NEXT: li s2, 0
885883
; RV64I-NEXT: slli a0, a0, 4
886-
; RV64I-NEXT: add a0, a2, a0
887-
; RV64I-NEXT: addi s3, a0, 8
884+
; RV64I-NEXT: add s3, a2, a0
888885
; RV64I-NEXT: .LBB20_2: # %for.body
889886
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
890887
; RV64I-NEXT: mv a0, s0
891888
; RV64I-NEXT: call f@plt
892-
; RV64I-NEXT: ld a0, 0(s3)
889+
; RV64I-NEXT: ld a0, 8(s3)
893890
; RV64I-NEXT: addi s1, s1, -1
894891
; RV64I-NEXT: add s2, a0, s2
895892
; RV64I-NEXT: bnez s1, .LBB20_2
@@ -920,13 +917,12 @@ define i64 @fold_addi_from_different_bb(i64 %k, i64 %n, ptr %a) nounwind {
920917
; RV64I-MEDIUM-NEXT: mv s1, a1
921918
; RV64I-MEDIUM-NEXT: li s2, 0
922919
; RV64I-MEDIUM-NEXT: slli a0, a0, 4
923-
; RV64I-MEDIUM-NEXT: add a0, a2, a0
924-
; RV64I-MEDIUM-NEXT: addi s3, a0, 8
920+
; RV64I-MEDIUM-NEXT: add s3, a2, a0
925921
; RV64I-MEDIUM-NEXT: .LBB20_2: # %for.body
926922
; RV64I-MEDIUM-NEXT: # =>This Inner Loop Header: Depth=1
927923
; RV64I-MEDIUM-NEXT: mv a0, s0
928924
; RV64I-MEDIUM-NEXT: call f@plt
929-
; RV64I-MEDIUM-NEXT: ld a0, 0(s3)
925+
; RV64I-MEDIUM-NEXT: ld a0, 8(s3)
930926
; RV64I-MEDIUM-NEXT: addi s1, s1, -1
931927
; RV64I-MEDIUM-NEXT: add s2, a0, s2
932928
; RV64I-MEDIUM-NEXT: bnez s1, .LBB20_2

0 commit comments

Comments
 (0)