Skip to content

Commit 540cf25

Browse files
authored
[RISCV] Split f64 loads/stores for RV32+Zdinx during isel instead of post-RA. (#139840)
This avoids a bunch of complexity around making sure the offset doesn't exceed 4093 so we can add 4 after splitting later. By splitting early, the split loads/stores will get selected independently. There's a bit of follow up work to do, particularly around splitting a constant pool load. Overall I think this is cleaner with less edge cases.
1 parent 4265733 commit 540cf25

12 files changed

+474
-461
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 11 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -2576,8 +2576,7 @@ bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base,
25762576
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
25772577
const MVT VT, const RISCVSubtarget *Subtarget,
25782578
SDValue Addr, SDValue &Base, SDValue &Offset,
2579-
bool IsPrefetch = false,
2580-
bool IsRV32Zdinx = false) {
2579+
bool IsPrefetch = false) {
25812580
if (!isa<ConstantSDNode>(Addr))
25822581
return false;
25832582

@@ -2591,9 +2590,6 @@ static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
25912590
if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
25922591
if (IsPrefetch && (Lo12 & 0b11111) != 0)
25932592
return false;
2594-
if (IsRV32Zdinx && !isInt<12>(Lo12 + 4))
2595-
return false;
2596-
25972593
if (Hi) {
25982594
int64_t Hi20 = (Hi >> 12) & 0xfffff;
25992595
Base = SDValue(
@@ -2617,8 +2613,6 @@ static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
26172613
Lo12 = Seq.back().getImm();
26182614
if (IsPrefetch && (Lo12 & 0b11111) != 0)
26192615
return false;
2620-
if (IsRV32Zdinx && !isInt<12>(Lo12 + 4))
2621-
return false;
26222616

26232617
// Drop the last instruction.
26242618
Seq.pop_back();
@@ -2710,47 +2704,22 @@ bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
27102704
}
27112705

27122706
bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
2713-
SDValue &Offset, bool IsRV32Zdinx) {
2707+
SDValue &Offset) {
27142708
if (SelectAddrFrameIndex(Addr, Base, Offset))
27152709
return true;
27162710

27172711
SDLoc DL(Addr);
27182712
MVT VT = Addr.getSimpleValueType();
27192713

27202714
if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2721-
// If this is non RV32Zdinx we can always fold.
2722-
if (!IsRV32Zdinx) {
2723-
Base = Addr.getOperand(0);
2724-
Offset = Addr.getOperand(1);
2725-
return true;
2726-
}
2727-
2728-
// For RV32Zdinx we need to have more than 4 byte alignment so we can add 4
2729-
// to the offset when we expand in RISCVExpandPseudoInsts.
2730-
if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
2731-
const DataLayout &DL = CurDAG->getDataLayout();
2732-
Align Alignment = commonAlignment(
2733-
GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2734-
if (Alignment > 4) {
2735-
Base = Addr.getOperand(0);
2736-
Offset = Addr.getOperand(1);
2737-
return true;
2738-
}
2739-
}
2740-
if (auto *CP = dyn_cast<ConstantPoolSDNode>(Addr.getOperand(1))) {
2741-
Align Alignment = commonAlignment(CP->getAlign(), CP->getOffset());
2742-
if (Alignment > 4) {
2743-
Base = Addr.getOperand(0);
2744-
Offset = Addr.getOperand(1);
2745-
return true;
2746-
}
2747-
}
2715+
Base = Addr.getOperand(0);
2716+
Offset = Addr.getOperand(1);
2717+
return true;
27482718
}
27492719

2750-
int64_t RV32ZdinxRange = IsRV32Zdinx ? 4 : 0;
27512720
if (CurDAG->isBaseWithConstantOffset(Addr)) {
27522721
int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2753-
if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {
2722+
if (isInt<12>(CVal) && isInt<12>(CVal)) {
27542723
Base = Addr.getOperand(0);
27552724
if (Base.getOpcode() == RISCVISD::ADD_LO) {
27562725
SDValue LoOperand = Base.getOperand(1);
@@ -2763,8 +2732,7 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
27632732
const DataLayout &DL = CurDAG->getDataLayout();
27642733
Align Alignment = commonAlignment(
27652734
GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2766-
if ((CVal == 0 || Alignment > CVal) &&
2767-
(!IsRV32Zdinx || commonAlignment(Alignment, CVal) > 4)) {
2735+
if ((CVal == 0 || Alignment > CVal)) {
27682736
int64_t CombinedOffset = CVal + GA->getOffset();
27692737
Base = Base.getOperand(0);
27702738
Offset = CurDAG->getTargetGlobalAddress(
@@ -2785,13 +2753,13 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
27852753
// Handle ADD with large immediates.
27862754
if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
27872755
int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2788-
assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) &&
2756+
assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&
27892757
"simm12 not already handled?");
27902758

27912759
// Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
27922760
// an ADDI for part of the offset and fold the rest into the load/store.
27932761
// This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2794-
if (CVal >= -4096 && CVal <= (4094 - RV32ZdinxRange)) {
2762+
if (CVal >= -4096 && CVal <= 4094) {
27952763
int64_t Adj = CVal < 0 ? -2048 : 2047;
27962764
Base = SDValue(
27972765
CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
@@ -2809,7 +2777,7 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
28092777
// instructions.
28102778
if (isWorthFoldingAdd(Addr) &&
28112779
selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2812-
Offset, /*IsPrefetch=*/false, RV32ZdinxRange)) {
2780+
Offset, /*IsPrefetch=*/false)) {
28132781
// Insert an ADD instruction with the materialized Hi52 bits.
28142782
Base = SDValue(
28152783
CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
@@ -2819,7 +2787,7 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
28192787
}
28202788

28212789
if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
2822-
/*IsPrefetch=*/false, RV32ZdinxRange))
2790+
/*IsPrefetch=*/false))
28232791
return true;
28242792

28252793
Base = Addr;

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
4646
std::vector<SDValue> &OutOps) override;
4747

4848
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset);
49-
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset,
50-
bool IsRV32Zdinx = false);
51-
bool SelectAddrRegImmRV32Zdinx(SDValue Addr, SDValue &Base, SDValue &Offset) {
52-
return SelectAddrRegImm(Addr, Base, Offset, true);
53-
}
49+
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset);
5450
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset);
5551

5652
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount,

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 56 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
586586
if (!Subtarget.is64Bit())
587587
setOperationAction(ISD::BITCAST, MVT::i64, Custom);
588588

589+
if (Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
590+
!Subtarget.is64Bit()) {
591+
setOperationAction(ISD::LOAD, MVT::f64, Custom);
592+
setOperationAction(ISD::STORE, MVT::f64, Custom);
593+
}
594+
589595
if (Subtarget.hasStdExtZfa()) {
590596
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
591597
setOperationAction(FPRndMode, MVT::f64, Legal);
@@ -7708,19 +7714,42 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
77087714
}
77097715
case ISD::LOAD: {
77107716
auto *Load = cast<LoadSDNode>(Op);
7711-
EVT VecTy = Load->getMemoryVT();
7717+
EVT VT = Load->getValueType(0);
7718+
if (VT == MVT::f64) {
7719+
assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
7720+
!Subtarget.is64Bit() && "Unexpected custom legalisation");
7721+
7722+
// Replace a double precision load with two i32 loads and a BuildPairF64.
7723+
SDLoc DL(Op);
7724+
SDValue BasePtr = Load->getBasePtr();
7725+
SDValue Chain = Load->getChain();
7726+
7727+
SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, BasePtr,
7728+
Load->getPointerInfo(), Load->getOriginalAlign(),
7729+
Load->getMemOperand()->getFlags());
7730+
BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
7731+
SDValue Hi = DAG.getLoad(
7732+
MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo().getWithOffset(4),
7733+
Load->getOriginalAlign(), Load->getMemOperand()->getFlags());
7734+
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
7735+
Hi.getValue(1));
7736+
7737+
SDValue Pair = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
7738+
return DAG.getMergeValues({Pair, Chain}, DL);
7739+
}
7740+
77127741
// Handle normal vector tuple load.
7713-
if (VecTy.isRISCVVectorTuple()) {
7742+
if (VT.isRISCVVectorTuple()) {
77147743
SDLoc DL(Op);
77157744
MVT XLenVT = Subtarget.getXLenVT();
7716-
unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7717-
unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7745+
unsigned NF = VT.getRISCVVectorTupleNumFields();
7746+
unsigned Sz = VT.getSizeInBits().getKnownMinValue();
77187747
unsigned NumElts = Sz / (NF * 8);
77197748
int Log2LMUL = Log2_64(NumElts) - 3;
77207749

77217750
auto Flag = SDNodeFlags();
77227751
Flag.setNoUnsignedWrap(true);
7723-
SDValue Ret = DAG.getUNDEF(VecTy);
7752+
SDValue Ret = DAG.getUNDEF(VT);
77247753
SDValue BasePtr = Load->getBasePtr();
77257754
SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
77267755
VROffset =
@@ -7734,7 +7763,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
77347763
MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
77357764
BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
77367765
OutChains.push_back(LoadVal.getValue(1));
7737-
Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTy, Ret, LoadVal,
7766+
Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Ret, LoadVal,
77387767
DAG.getVectorIdxConstant(i, DL));
77397768
BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
77407769
}
@@ -7752,6 +7781,27 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
77527781
auto *Store = cast<StoreSDNode>(Op);
77537782
SDValue StoredVal = Store->getValue();
77547783
EVT VT = StoredVal.getValueType();
7784+
if (VT == MVT::f64) {
7785+
assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
7786+
!Subtarget.is64Bit() && "Unexpected custom legalisation");
7787+
7788+
// Replace a double precision store with a SplitF64 and i32 stores.
7789+
SDValue DL(Op);
7790+
SDValue BasePtr = Store->getBasePtr();
7791+
SDValue Chain = Store->getChain();
7792+
SDValue Split = DAG.getNode(RISCVISD::SplitF64, DL,
7793+
DAG.getVTList(MVT::i32, MVT::i32), StoredVal);
7794+
7795+
SDValue Lo = DAG.getStore(
7796+
Chain, DL, Split.getValue(0), BasePtr, Store->getPointerInfo(),
7797+
Store->getOriginalAlign(), Store->getMemOperand()->getFlags());
7798+
BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
7799+
SDValue Hi = DAG.getStore(Chain, DL, Split.getValue(1), BasePtr,
7800+
Store->getPointerInfo().getWithOffset(4),
7801+
Store->getOriginalAlign(),
7802+
Store->getMemOperand()->getFlags());
7803+
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
7804+
}
77557805
if (VT == MVT::i64) {
77567806
assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
77577807
"Unexpected custom legalisation");

llvm/lib/Target/RISCV/RISCVInstrInfoD.td

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,6 @@ def : GINodeEquiv<G_MERGE_VALUES, RISCVBuildPairF64>;
2727
def RISCVSplitF64 : RVSDNode<"SplitF64", SDT_RISCVSplitF64>;
2828
def : GINodeEquiv<G_UNMERGE_VALUES, RISCVSplitF64>;
2929

30-
def AddrRegImmINX : ComplexPattern<iPTR, 2, "SelectAddrRegImmRV32Zdinx">;
31-
3230
//===----------------------------------------------------------------------===//
3331
// Operand and SDNode transformation definitions.
3432
//===----------------------------------------------------------------------===//
@@ -529,21 +527,14 @@ defm Select_FPR64IN32X : SelectCC_GPR_rrirr<FPR64IN32X, f64>;
529527
def PseudoFROUND_D_IN32X : PseudoFROUND<FPR64IN32X, f64>;
530528

531529
/// Loads
532-
let isCall = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 1 in
530+
let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 1 in
533531
def PseudoRV32ZdinxLD : Pseudo<(outs GPRPair:$dst), (ins GPR:$rs1, simm12:$imm12), []>;
534532

535533
/// Stores
536-
let isCall = 0, mayLoad = 0, mayStore = 1, Size = 8, isCodeGenOnly = 1 in
534+
let hasSideEffects = 0, mayLoad = 0, mayStore = 1, Size = 8, isCodeGenOnly = 1 in
537535
def PseudoRV32ZdinxSD : Pseudo<(outs), (ins GPRPair:$rs2, GPRNoX0:$rs1, simm12:$imm12), []>;
538536
} // Predicates = [HasStdExtZdinx, IsRV32]
539537

540-
let Predicates = [HasStdExtZdinx, NoHasStdExtZilsd, IsRV32] in {
541-
def : Pat<(f64 (load (AddrRegImmINX (XLenVT GPR:$rs1), simm12:$imm12))),
542-
(PseudoRV32ZdinxLD GPR:$rs1, simm12:$imm12)>;
543-
def : Pat<(store (f64 GPRPair:$rs2), (AddrRegImmINX (XLenVT GPR:$rs1), simm12:$imm12)),
544-
(PseudoRV32ZdinxSD GPRPair:$rs2, GPR:$rs1, simm12:$imm12)>;
545-
}
546-
547538
let Predicates = [HasStdExtZdinx, HasStdExtZilsd, IsRV32] in {
548539
def : LdPat<load, LD_RV32, f64>;
549540
def : StPat<store, SD_RV32, GPRPair, f64>;

llvm/test/CodeGen/RISCV/double-calling-conv.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,10 +165,10 @@ define double @callee_double_stack(i64 %a, i64 %b, i64 %c, i64 %d, double %e, do
165165
;
166166
; RV32IZFINXZDINX-LABEL: callee_double_stack:
167167
; RV32IZFINXZDINX: # %bb.0:
168-
; RV32IZFINXZDINX-NEXT: lw a0, 8(sp)
169168
; RV32IZFINXZDINX-NEXT: lw a1, 12(sp)
170-
; RV32IZFINXZDINX-NEXT: lw a2, 0(sp)
171169
; RV32IZFINXZDINX-NEXT: lw a3, 4(sp)
170+
; RV32IZFINXZDINX-NEXT: lw a0, 8(sp)
171+
; RV32IZFINXZDINX-NEXT: lw a2, 0(sp)
172172
; RV32IZFINXZDINX-NEXT: fadd.d a0, a2, a0
173173
; RV32IZFINXZDINX-NEXT: ret
174174
%1 = fadd double %e, %f

0 commit comments

Comments
 (0)