Skip to content

Commit daa4a90

Browse files
committed
[RISCV] Fold addi into load / store even if they are in different BBs.
Currently, since ISel only looks at one basic block at a time we miss some opportunities to combine load / store with `addi`. Such opportunities may occur when GEP and the use of GEP are in different basic blocks. In this PR we combine `addi` with memory access in `RISCVISelLowering:finalizeLowering`.
1 parent 02981c9 commit daa4a90

File tree

2 files changed

+78
-0
lines changed

2 files changed

+78
-0
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ using namespace llvm;
4949
#define DEBUG_TYPE "riscv-lower"
5050

5151
STATISTIC(NumTailCalls, "Number of tail calls");
52+
STATISTIC(NumADDIsMerged, "Number of ADDIs merged.");
5253

5354
static cl::opt<unsigned> ExtensionMaxWebSize(
5455
DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
@@ -2278,6 +2279,81 @@ bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {
22782279
}
22792280
}
22802281

2282+
static bool tryToFoldInstIntoUse(MachineInstr &UseMI, MachineInstr &MI) {
2283+
2284+
if (MI.getOpcode() != RISCV::ADDI)
2285+
return false;
2286+
if (!(MI.getOperand(0).isReg() && MI.getOperand(1).isReg()))
2287+
return false;
2288+
2289+
switch (UseMI.getOpcode()) {
2290+
default:
2291+
return false;
2292+
case RISCV::LB:
2293+
case RISCV::LH:
2294+
case RISCV::LW:
2295+
case RISCV::LD:
2296+
case RISCV::LBU:
2297+
case RISCV::LHU:
2298+
case RISCV::SB:
2299+
case RISCV::SH:
2300+
case RISCV::SW:
2301+
case RISCV::SD:
2302+
break;
2303+
}
2304+
MachineOperand &OriginalBaseMO = UseMI.getOperand(1);
2305+
if (!OriginalBaseMO.isReg())
2306+
return false;
2307+
if (OriginalBaseMO.getReg() != MI.getOperand(0).getReg())
2308+
return false;
2309+
2310+
MachineOperand &OriginalOffsetMO = UseMI.getOperand(2);
2311+
MachineOperand &ADDIOffsetMO = MI.getOperand(2);
2312+
if (!(OriginalOffsetMO.isImm() && ADDIOffsetMO.isImm()))
2313+
return false;
2314+
2315+
int64_t OriginalOffset = OriginalOffsetMO.getImm();
2316+
int64_t ADDIOffset = ADDIOffsetMO.getImm();
2317+
int64_t TotalOffset = OriginalOffset + ADDIOffset;
2318+
if (!isInt<12>(TotalOffset))
2319+
return false;
2320+
2321+
OriginalOffsetMO.setImm(TotalOffset);
2322+
OriginalBaseMO.setReg(MI.getOperand(1).getReg());
2323+
NumADDIsMerged++;
2324+
return true;
2325+
}
2326+
2327+
void RISCVTargetLowering::finalizeLowering(MachineFunction &MF) const {
2328+
TargetLoweringBase::finalizeLowering(MF);
2329+
MachineRegisterInfo &MRI = MF.getRegInfo();
2330+
2331+
SmallVector<MachineInstr *, 8> ToErase;
2332+
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
2333+
MachineBasicBlock *MBB = &*I;
2334+
for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
2335+
MBBI != MBBE;) {
2336+
MachineInstr &MI = *MBBI++;
2337+
if (MI.getOpcode() != RISCV::ADDI)
2338+
continue;
2339+
if (!MI.getOperand(0).isReg())
2340+
continue;
2341+
SmallVector<MachineInstr *, 4> Users;
2342+
for (MachineInstr &UseMI :
2343+
MRI.use_instructions(MI.getOperand(0).getReg()))
2344+
Users.push_back(&UseMI);
2345+
bool AllUsesWereFolded = true;
2346+
for (MachineInstr *UseMI : Users)
2347+
AllUsesWereFolded &= tryToFoldInstIntoUse(*UseMI, MI);
2348+
if (AllUsesWereFolded)
2349+
ToErase.push_back(&MI);
2350+
}
2351+
}
2352+
for (MachineInstr *MI : ToErase)
2353+
MI->eraseFromParent();
2354+
2355+
return;
2356+
}
22812357

22822358
unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
22832359
return NumRepeatedDivisors;

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -951,6 +951,8 @@ class RISCVTargetLowering : public TargetLowering {
951951
return false;
952952
};
953953

954+
void finalizeLowering(MachineFunction &MF) const override;
955+
954956
/// For available scheduling models FDIV + two independent FMULs are much
955957
/// faster than two FDIVs.
956958
unsigned combineRepeatedFPDivisors() const override;

0 commit comments

Comments
 (0)