-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[X86][GlobalISel] Support addr matching in SDAG patterns #130445
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
addr matching was the only gatekeeper for starting selecting G_LOAD and G_STORE using SDAG patterns. * Replace getLoadStoreOp with getPtrLoadStoreOp in Load/Store selector as GlobalISel matcher or emitter can't map the pointer type into i32/i64 types used in SDAG patterns for pointers. So the load and store selection of pointers is still manual. * X86SelectAddress now is used for both: pattern matching and manual selection. As a result it accumulates all the code that previously was distrubuted among different selection functions. * Introduce a complex rederer `gi_addr` for `addr`. In this patch only existing functionality has been implemented. The renderer's name is the same as in SDAG: `selectAddr`. * Since truncating stores are not supported, we custom legalize them by matching types of store and MMO. * Introduce a ConstantPool flag to X86AddressMode because otherwise we need to introduce a GlobalISel copy for X86ISelAddressMode.
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-x86 Author: Evgenii Kudriashov (e-kud) Changes
Patch is 95.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/130445.diff 37 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 9e5d4d34f24d2..571ec6dd7e9ba 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -66,6 +66,9 @@ class GMemOperation : public GenericMachineInstr {
/// memory operations can't be reordered.
bool isUnordered() const { return getMMO().isUnordered(); }
+ /// Return the minimum known alignment in bytes of the actual memory
+ /// reference.
+ Align getAlign() const { return getMMO().getAlign(); }
/// Returns the size in bytes of the memory access.
LocationSize getMemSize() const { return getMMO().getSize(); }
/// Returns the size in bits of the memory access.
diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
index d7f315d82b832..828494ae9cfbf 100644
--- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
@@ -73,6 +73,9 @@ class X86InstructionSelector : public InstructionSelector {
// TODO: remove after supported by Tablegen-erated instruction selection.
unsigned getLoadStoreOp(const LLT &Ty, const RegisterBank &RB, unsigned Opc,
Align Alignment) const;
+ // TODO: remove once p0<->i32/i64 matching is available
+ unsigned getPtrLoadStoreOp(const LLT &Ty, const RegisterBank &RB,
+ unsigned Opc) const;
bool selectLoadStoreOp(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
@@ -119,6 +122,8 @@ class X86InstructionSelector : public InstructionSelector {
bool selectSelect(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
+ ComplexRendererFns selectAddr(MachineOperand &Root) const;
+
// emit insert subreg instruction and insert it before MachineInstr &I
bool emitInsertSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I,
MachineRegisterInfo &MRI, MachineFunction &MF) const;
@@ -445,6 +450,17 @@ bool X86InstructionSelector::select(MachineInstr &I) {
return false;
}
+unsigned X86InstructionSelector::getPtrLoadStoreOp(const LLT &Ty,
+ const RegisterBank &RB,
+ unsigned Opc) const {
+ bool Isload = (Opc == TargetOpcode::G_LOAD);
+ if (Ty == LLT::pointer(0, 32) && X86::GPRRegBankID == RB.getID())
+ return Isload ? X86::MOV32rm : X86::MOV32mr;
+ if (Ty == LLT::pointer(0, 64) && X86::GPRRegBankID == RB.getID())
+ return Isload ? X86::MOV64rm : X86::MOV64mr;
+ return Opc;
+}
+
unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
const RegisterBank &RB,
unsigned Opc,
@@ -460,7 +476,7 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
} else if (Ty == LLT::scalar(16)) {
if (X86::GPRRegBankID == RB.getID())
return Isload ? X86::MOV16rm : X86::MOV16mr;
- } else if (Ty == LLT::scalar(32) || Ty == LLT::pointer(0, 32)) {
+ } else if (Ty == LLT::scalar(32)) {
if (X86::GPRRegBankID == RB.getID())
return Isload ? X86::MOV32rm : X86::MOV32mr;
if (X86::VECRRegBankID == RB.getID())
@@ -472,7 +488,7 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
X86::MOVSSmr);
if (X86::PSRRegBankID == RB.getID())
return Isload ? X86::LD_Fp32m : X86::ST_Fp32m;
- } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
+ } else if (Ty == LLT::scalar(64)) {
if (X86::GPRRegBankID == RB.getID())
return Isload ? X86::MOV64rm : X86::MOV64mr;
if (X86::VECRRegBankID == RB.getID())
@@ -530,30 +546,75 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
}
// Fill in an address from the given instruction.
-static void X86SelectAddress(const MachineInstr &I,
+static bool X86SelectAddress(MachineInstr &I, const X86TargetMachine &TM,
const MachineRegisterInfo &MRI,
- X86AddressMode &AM) {
- assert(I.getOperand(0).isReg() && "unsupported opperand.");
+ const X86Subtarget &STI, X86AddressMode &AM) {
+ assert(I.getOperand(0).isReg() && "unsupported operand.");
assert(MRI.getType(I.getOperand(0).getReg()).isPointer() &&
"unsupported type.");
- if (I.getOpcode() == TargetOpcode::G_PTR_ADD) {
+ switch (I.getOpcode()) {
+ default:
+ break;
+ case TargetOpcode::G_FRAME_INDEX:
+ AM.Base.FrameIndex = I.getOperand(1).getIndex();
+ AM.BaseType = X86AddressMode::FrameIndexBase;
+ return true;
+ case TargetOpcode::G_PTR_ADD: {
if (auto COff = getIConstantVRegSExtVal(I.getOperand(2).getReg(), MRI)) {
int64_t Imm = *COff;
if (isInt<32>(Imm)) { // Check for displacement overflow.
AM.Disp = static_cast<int32_t>(Imm);
AM.Base.Reg = I.getOperand(1).getReg();
- return;
+ return true;
}
}
- } else if (I.getOpcode() == TargetOpcode::G_FRAME_INDEX) {
- AM.Base.FrameIndex = I.getOperand(1).getIndex();
- AM.BaseType = X86AddressMode::FrameIndexBase;
- return;
+ break;
}
+ case TargetOpcode::G_GLOBAL_VALUE: {
+ auto GV = I.getOperand(1).getGlobal();
+ if (GV->isThreadLocal()) {
+ return false; // TODO: we don't support TLS yet.
+ }
+ // Can't handle alternate code models yet.
+ if (TM.getCodeModel() != CodeModel::Small)
+ return false;
+ AM.GV = GV;
+ AM.GVOpFlags = STI.classifyGlobalReference(GV);
+
+ // TODO: The ABI requires an extra load. not supported yet.
+ if (isGlobalStubReference(AM.GVOpFlags))
+ return false;
+ // TODO: This reference is relative to the pic base. not supported yet.
+ if (isGlobalRelativeToPICBase(AM.GVOpFlags))
+ return false;
+
+ if (STI.isPICStyleRIPRel()) {
+ // Use rip-relative addressing.
+ assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
+ AM.Base.Reg = X86::RIP;
+ }
+ return true;
+ }
+ case TargetOpcode::G_CONSTANT_POOL: {
+ // TODO: Need a separate move for Large model
+ if (TM.getCodeModel() == CodeModel::Large)
+ return false;
+
+ AM.GVOpFlags = STI.classifyLocalReference(nullptr);
+ if (AM.GVOpFlags == X86II::MO_GOTOFF)
+ AM.Base.Reg = STI.getInstrInfo()->getGlobalBaseReg(I.getMF());
+ else if (STI.is64Bit())
+ AM.Base.Reg = X86::RIP;
+ AM.CP = true;
+ AM.Disp = I.getOperand(1).getIndex();
+ return true;
+ }
+ }
// Default behavior.
AM.Base.Reg = I.getOperand(0).getReg();
+ return true;
}
bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
@@ -586,36 +647,18 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
}
}
- unsigned NewOpc = getLoadStoreOp(Ty, RB, Opc, MemOp.getAlign());
+ unsigned NewOpc = getPtrLoadStoreOp(Ty, RB, Opc);
if (NewOpc == Opc)
return false;
I.setDesc(TII.get(NewOpc));
MachineInstrBuilder MIB(MF, I);
- const MachineInstr *Ptr = MRI.getVRegDef(I.getOperand(1).getReg());
-
- if (Ptr->getOpcode() == TargetOpcode::G_CONSTANT_POOL) {
- assert(Opc == TargetOpcode::G_LOAD &&
- "Only G_LOAD from constant pool is expected");
- // TODO: Need a separate move for Large model
- if (TM.getCodeModel() == CodeModel::Large)
- return false;
-
- unsigned char OpFlag = STI.classifyLocalReference(nullptr);
- unsigned PICBase = 0;
- if (OpFlag == X86II::MO_GOTOFF)
- PICBase = TII.getGlobalBaseReg(&MF);
- else if (STI.is64Bit())
- PICBase = X86::RIP;
-
- I.removeOperand(1);
- addConstantPoolReference(MIB, Ptr->getOperand(1).getIndex(), PICBase,
- OpFlag);
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
+ MachineInstr *Ptr = MRI.getVRegDef(I.getOperand(1).getReg());
X86AddressMode AM;
- X86SelectAddress(*Ptr, MRI, AM);
+ if (!X86SelectAddress(*Ptr, TM, MRI, STI, AM))
+ return false;
+
if (Opc == TargetOpcode::G_LOAD) {
I.removeOperand(1);
addFullAddress(MIB, AM);
@@ -673,33 +716,10 @@ bool X86InstructionSelector::selectGlobalValue(MachineInstr &I,
assert((I.getOpcode() == TargetOpcode::G_GLOBAL_VALUE) &&
"unexpected instruction");
- auto GV = I.getOperand(1).getGlobal();
- if (GV->isThreadLocal()) {
- return false; // TODO: we don't support TLS yet.
- }
-
- // Can't handle alternate code models yet.
- if (TM.getCodeModel() != CodeModel::Small)
- return false;
-
X86AddressMode AM;
- AM.GV = GV;
- AM.GVOpFlags = STI.classifyGlobalReference(GV);
-
- // TODO: The ABI requires an extra load. not supported yet.
- if (isGlobalStubReference(AM.GVOpFlags))
+ if (!X86SelectAddress(I, TM, MRI, STI, AM))
return false;
- // TODO: This reference is relative to the pic base. not supported yet.
- if (isGlobalRelativeToPICBase(AM.GVOpFlags))
- return false;
-
- if (STI.isPICStyleRIPRel()) {
- // Use rip-relative addressing.
- assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
- AM.Base.Reg = X86::RIP;
- }
-
const Register DefReg = I.getOperand(0).getReg();
LLT Ty = MRI.getType(DefReg);
unsigned NewOpc = getLeaOP(Ty, STI);
@@ -1880,6 +1900,49 @@ bool X86InstructionSelector::selectSelect(MachineInstr &I,
return true;
}
+InstructionSelector::ComplexRendererFns
+X86InstructionSelector::selectAddr(MachineOperand &Root) const {
+ MachineInstr *MI = Root.getParent();
+ MachineIRBuilder MIRBuilder(*MI);
+
+ MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
+ MachineInstr *Ptr = MRI.getVRegDef(Root.getReg());
+ X86AddressMode AM;
+ X86SelectAddress(*Ptr, TM, MRI, STI, AM);
+
+ if (AM.Scale != 1)
+ return std::nullopt;
+
+ if (AM.IndexReg)
+ return std::nullopt;
+
+ return {// Base
+ {[=](MachineInstrBuilder &MIB) {
+ if (AM.BaseType == X86AddressMode::RegBase)
+ MIB.addUse(AM.Base.Reg);
+ else {
+ assert(AM.BaseType == X86AddressMode::FrameIndexBase &&
+ "Unknown type of address base");
+ MIB.addFrameIndex(AM.Base.FrameIndex);
+ }
+ },
+ // Scale
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(AM.Scale); },
+ // Index
+ [=](MachineInstrBuilder &MIB) { MIB.addUse(0); },
+ // Disp
+ [=](MachineInstrBuilder &MIB) {
+ if (AM.GV)
+ MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags);
+ else if (AM.CP)
+ MIB.addConstantPoolIndex(AM.Disp, 0, AM.GVOpFlags);
+ else
+ MIB.addImm(AM.Disp);
+ },
+ // Segment
+ [=](MachineInstrBuilder &MIB) { MIB.addUse(0); }}};
+}
+
InstructionSelector *
llvm::createX86InstructionSelector(const X86TargetMachine &TM,
const X86Subtarget &Subtarget,
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index 24bf0dd378641..83bca5adde3b9 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -368,22 +368,16 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
// load/store: add more corner cases
for (unsigned Op : {G_LOAD, G_STORE}) {
auto &Action = getActionDefinitionsBuilder(Op);
- Action.legalForTypesWithMemDesc({{s8, p0, s1, 1},
- {s8, p0, s8, 1},
- {s16, p0, s8, 1},
+ Action.legalForTypesWithMemDesc({{s8, p0, s8, 1},
{s16, p0, s16, 1},
- {s32, p0, s8, 1},
- {s32, p0, s16, 1},
{s32, p0, s32, 1},
{s80, p0, s80, 1},
{p0, p0, p0, 1},
{v4s8, p0, v4s8, 1}});
if (Is64Bit)
- Action.legalForTypesWithMemDesc({{s64, p0, s8, 1},
- {s64, p0, s16, 1},
- {s64, p0, s32, 1},
- {s64, p0, s64, 1},
+ Action.legalForTypesWithMemDesc({{s64, p0, s64, 1},
{v2s32, p0, v2s32, 1}});
+
if (HasSSE1)
Action.legalForTypesWithMemDesc({{v4s32, p0, v4s32, 1}});
if (HasSSE2)
@@ -402,6 +396,22 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
{v32s16, p0, v32s16, 1},
{v16s32, p0, v16s32, 1},
{v8s64, p0, v8s64, 1}});
+
+ // X86 supports extending loads but not stores for GPRs
+ if (Op == G_LOAD) {
+ Action.legalForTypesWithMemDesc({{s8, p0, s1, 1},
+ {s16, p0, s8, 1},
+ {s32, p0, s8, 1},
+ {s32, p0, s16, 1}});
+ if (Is64Bit)
+ Action.legalForTypesWithMemDesc({{s64, p0, s8, 1},
+ {s64, p0, s16, 1},
+ {s64, p0, s32, 1}});
+ } else {
+ Action.customIf([=](const LegalityQuery &Query) {
+ return Query.Types[0] != Query.MMODescrs[0].MemoryTy;
+ });
+ }
Action.widenScalarToNextPow2(0, /*Min=*/8)
.clampScalar(0, s8, sMaxScalar)
.scalarize(0);
@@ -655,6 +665,8 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
return legalizeFPTOUI(MI, MRI, Helper);
case TargetOpcode::G_UITOFP:
return legalizeUITOFP(MI, MRI, Helper);
+ case TargetOpcode::G_STORE:
+ return legalizeNarrowingStore(MI, MRI, Helper);
}
llvm_unreachable("expected switch to return");
}
@@ -749,6 +761,22 @@ bool X86LegalizerInfo::legalizeUITOFP(MachineInstr &MI,
return false;
}
+bool X86LegalizerInfo::legalizeNarrowingStore(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const {
+ auto &Store = cast<GStore>(MI);
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ MachineMemOperand &MMO = **Store.memoperands_begin();
+ MachineFunction &MF = MIRBuilder.getMF();
+ LLT ValTy = MRI.getType(Store.getValueReg());
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), ValTy);
+
+ Helper.Observer.changingInstr(Store);
+ Store.setMemRefs(MF, {NewMMO});
+ Helper.Observer.changedInstr(Store);
+ return true;
+}
+
bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const {
return true;
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
index 39bd9892e2f16..54f776456397b 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
@@ -45,6 +45,9 @@ class X86LegalizerInfo : public LegalizerInfo {
bool legalizeUITOFP(MachineInstr &MI, MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const;
+
+ bool legalizeNarrowingStore(MachineInstr &MI, MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const;
};
} // namespace llvm
#endif
diff --git a/llvm/lib/Target/X86/X86InstrBuilder.h b/llvm/lib/Target/X86/X86InstrBuilder.h
index 45c5f8aa82e97..4243c831726d6 100644
--- a/llvm/lib/Target/X86/X86InstrBuilder.h
+++ b/llvm/lib/Target/X86/X86InstrBuilder.h
@@ -55,10 +55,11 @@ struct X86AddressMode {
int Disp;
const GlobalValue *GV;
unsigned GVOpFlags;
+ bool CP;
X86AddressMode()
- : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(nullptr),
- GVOpFlags(0) {
+ : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(nullptr),
+ GVOpFlags(0), CP(false) {
Base.Reg = 0;
}
diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td
index a7ef72be9316f..f9d70d1bb5d85 100644
--- a/llvm/lib/Target/X86/X86InstrFragments.td
+++ b/llvm/lib/Target/X86/X86InstrFragments.td
@@ -354,6 +354,8 @@ def X86cmpccxadd : SDNode<"X86ISD::CMPCCXADD", SDTX86Cmpccxadd,
// Define X86-specific addressing mode.
let WantsParent = true in
def addr : ComplexPattern<iPTR, 5, "selectAddr">;
+def gi_addr : GIComplexOperandMatcher<s32, "selectAddr">,
+ GIComplexPatternEquiv<addr>;
def lea32addr : ComplexPattern<i32, 5, "selectLEAAddr",
[add, sub, mul, X86mul_imm, shl, or, xor, frameindex],
[]>;
@@ -444,7 +446,11 @@ def i64relocImmSExt32 : PatLeaf<(i64 relocImm), [{
//
def imm_su : PatLeaf<(imm), [{
return !shouldAvoidImmediateInstFormsForSize(N);
-}]>;
+}]> {
+ // TODO : introduce the same check as in SDAG
+ let GISelPredicateCode = [{ return true; }];
+}
+
def i64immSExt32_su : PatLeaf<(i64immSExt32), [{
return !shouldAvoidImmediateInstFormsForSize(N);
}]>;
@@ -500,7 +506,9 @@ def loadi8 : PatFrag<(ops node:$ptr), (i8 (unindexedload node:$ptr)), [{
ISD::LoadExtType ExtType = LD->getExtensionType();
return ExtType == ISD::NON_EXTLOAD || ExtType == ISD::EXTLOAD ||
ExtType == ISD::ZEXTLOAD;
-}]>;
+}]> {
+ let GISelPredicateCode = [{ return isa<GLoad>(MI); }];
+}
// It's always safe to treat a anyext i16 load as a i32 load if the i16 is
// known to be 32-bit aligned or better. Ditto for i8 to i16.
@@ -512,7 +520,16 @@ def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
if (ExtType == ISD::EXTLOAD && EnablePromoteAnyextLoad)
return LD->getAlign() >= 2 && LD->isSimple();
return false;
-}]>;
+}]> {
+ let GISelPredicateCode = [{
+ auto &Load = cast<GLoad>(MI);
+ LLT Ty = MRI.getType(Load.getDstReg());
+ // Non extending load has MMO and destination types of the same size
+ if (Load.getMemSizeInBits() == Ty.getSizeInBits())
+ return true;
+ return Load.getAlign() >= 2 && Load.isSimple();
+ }];
+}
def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
LoadSDNode *LD = cast<LoadSDNode>(N);
@@ -522,7 +539,16 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
if (ExtType == ISD::EXTLOAD && EnablePromoteAnyextLoad)
return LD->getAlign() >= 4 && LD->isSimple();
return false;
-}]>;
+}]> {
+ let GISelPredicateCode = [{
+ auto &Load = cast<GLoad>(MI);
+ LLT Ty = MRI.getType(Load.getDstReg());
+ // Non extending load has MMO and destination types of the same size
+ if (Load.getMemSizeInBits() == Ty.getSizeInBits())
+ return true;
+ return Load.getAlign() >= 4 && Load.isSimple();
+ }];
+}
def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
def loadf16 : PatFrag<(ops node:$ptr), (f16 (load node:$ptr))>;
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index de70570481fc2..0c20ffed77e77 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -1008,13 +1008,23 @@ def alignedstore : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
auto *St = cast<StoreSDNode>(N);
return St->getAlign() >= St->getMemoryVT().getStoreSize();
-}]>;
+}]> {
+ let GISelPredicateCode = [{
+ auto &LdSt = cast<GLoadStore>(MI);
+ return LdSt.getAlign() >= LdSt.getMemSize().getValue();
+ }];
+}
// Like 'load', but always requires vector size alignment.
def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
auto *Ld = cast<LoadSDNode>(N);
return Ld->getAlign() >= Ld->getMemoryVT().getStoreSize();
-}]>;
+}]> {
+ let GISelPredicateCode = [{
+ auto &LdSt = cast<GLoadStore>(MI);
+ return LdSt.getAlign() >= LdSt.getMemSize().getValue();
+ }];
+}
// 128-bit aligned load pattern fragments
// NOTE: all 128-bit integer vector loads are promoted to v2i64
diff --git a/llvm/test/CodeGen/X86/GlobalISel/GV.ll b/llvm/test/CodeGen/X86/GlobalISel/GV.ll
index 8c14b54864752...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
Tentative ping. |
; GISEL-X86-LABEL: or_imm8_i32: | ||
; GISEL-X86: # %bb.0: | ||
; GISEL-X86-NEXT: movl $-5, %eax | ||
; GISEL-X86-NEXT: orl {{[0-9]+}}(%esp), %eax |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is GISel missing constant to RHS canonicalization or is something else going on?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, GISel matches differently comparing to SDAG due to absence of the target hook in GIM_CheckIsSafeToFold
similar to IsProfitableToFold
in OPC_CheckFoldableChainNode
.
unsigned X86InstructionSelector::getPtrLoadStoreOp(const LLT &Ty, | ||
const RegisterBank &RB, | ||
unsigned Opc) const { | ||
bool Isload = (Opc == TargetOpcode::G_LOAD); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
assert((Opc == TargetOpcode::G_LOAD || Opc == TargetOpcode::G_STORE) && "Unknown memory opcode");
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks! I blindly copied existing code however should've reread it.
unsigned X86InstructionSelector::getPtrLoadStoreOp(const LLT &Ty, | ||
const RegisterBank &RB, | ||
unsigned Opc) const { | ||
bool Isload = (Opc == TargetOpcode::G_LOAD); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(style) Isload -> IsLoad
|
||
if (STI.isPICStyleRIPRel()) { | ||
// Use rip-relative addressing. | ||
assert(AM.Base.Reg == 0 && AM.IndexReg == 0); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(style) assert message
I also collected stats: enabling |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fix clang-format warnings?
Done. I think that column structure is more readable but I don't mind. |
@RKSimon ping |
if (Ty == LLT::pointer(0, 32) && X86::GPRRegBankID == RB.getID()) | ||
return IsLoad ? X86::MOV32rm : X86::MOV32mr; | ||
if (Ty == LLT::pointer(0, 64) && X86::GPRRegBankID == RB.getID()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Check isPointer and the size? This will fail on non-0 address spaces
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we'll start seeing problems at Legalizer because there are a lot of assumptions for p0
for stores and loads in particular. But I've changed it anyway.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is X86 backend known to use non-zero addr spaces?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can think of two use cases:
FS
,GS
segment registers- User defined address spaces. IIRC, garbage collection passes assume that managed pointers should be in the address space 1.
But I think this code will be fixed earlier then we start caring about different address spaces.
Ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers (the X32 check prefix tweak isn't required - although it would be nice to clean these up at some point).
@@ -32,7 +32,7 @@ define i64 @test_ret_i64() { | |||
define i8 @test_arg_i8(i8 %a) { | |||
; X32-LABEL: test_arg_i8: | |||
; X32: # %bb.0: | |||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax | |||
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(pet peeve) - we should use X86 for i686 checks and use X32 for gnux32 triples (this goes 100x for calling convention tests!)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm going to introduce fastcall convention support and will address this issue in the corresponding tests.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/59/builds/16334 Here is the relevant piece of the build log for the reference
|
addr matching was the only gatekeeper for starting selecting G_LOAD and G_STORE using SDAG patterns. * Introduce a complex renderer gi_addr for addr. In this patch only the existing functionality has been implemented. The renderer's name is the same as in SDAG: selectAddr. Apparently the type of GIComplexOperandMatcher doesn't matter as RISCV also uses s32 for both 64 and 32 bit pointers. * X86SelectAddress now is used for both: pattern matching and manual selection. As a result it accumulates all the code that previously was distributed among different selection functions. * Replace getLoadStoreOp with getPtrLoadStoreOp in Load/Store selector as GlobalISel matcher or emitter can't map the pointer type into i32/i64 types used in SDAG patterns for pointers. So the load and store selection of pointers is still manual. getLoadStoreOp is still present because it is used in G_FCONSTANT lowering that requires extra efforts to select it using SDAG patterns. * Since truncating stores are not supported, we custom legalize them by matching types of store and MMO. * Introduce a constant pool flag in X86AddressMode because otherwise we need to introduce a GlobalISel copy for X86ISelAddressMode. * Also please notice in the tests that GlobalISel prefers to fold memory operands immediately comparing to SDAG. The reason is that GlobalISel doesn't have target hooks in GIM_CheckIsSafeToFold. Or maybe another check on profitability is required along with safety check that is currently not present.
addr matching was the only gatekeeper for starting selecting G_LOAD and G_STORE using SDAG patterns. * Introduce a complex renderer gi_addr for addr. In this patch only the existing functionality has been implemented. The renderer's name is the same as in SDAG: selectAddr. Apparently the type of GIComplexOperandMatcher doesn't matter as RISCV also uses s32 for both 64 and 32 bit pointers. * X86SelectAddress now is used for both: pattern matching and manual selection. As a result it accumulates all the code that previously was distributed among different selection functions. * Replace getLoadStoreOp with getPtrLoadStoreOp in Load/Store selector as GlobalISel matcher or emitter can't map the pointer type into i32/i64 types used in SDAG patterns for pointers. So the load and store selection of pointers is still manual. getLoadStoreOp is still present because it is used in G_FCONSTANT lowering that requires extra efforts to select it using SDAG patterns. * Since truncating stores are not supported, we custom legalize them by matching types of store and MMO. * Introduce a constant pool flag in X86AddressMode because otherwise we need to introduce a GlobalISel copy for X86ISelAddressMode. * Also please notice in the tests that GlobalISel prefers to fold memory operands immediately comparing to SDAG. The reason is that GlobalISel doesn't have target hooks in GIM_CheckIsSafeToFold. Or maybe another check on profitability is required along with safety check that is currently not present.
addr matching was the only gatekeeper for starting selecting G_LOAD and G_STORE using SDAG patterns. * Introduce a complex renderer gi_addr for addr. In this patch only the existing functionality has been implemented. The renderer's name is the same as in SDAG: selectAddr. Apparently the type of GIComplexOperandMatcher doesn't matter as RISCV also uses s32 for both 64 and 32 bit pointers. * X86SelectAddress now is used for both: pattern matching and manual selection. As a result it accumulates all the code that previously was distributed among different selection functions. * Replace getLoadStoreOp with getPtrLoadStoreOp in Load/Store selector as GlobalISel matcher or emitter can't map the pointer type into i32/i64 types used in SDAG patterns for pointers. So the load and store selection of pointers is still manual. getLoadStoreOp is still present because it is used in G_FCONSTANT lowering that requires extra efforts to select it using SDAG patterns. * Since truncating stores are not supported, we custom legalize them by matching types of store and MMO. * Introduce a constant pool flag in X86AddressMode because otherwise we need to introduce a GlobalISel copy for X86ISelAddressMode. * Also please notice in the tests that GlobalISel prefers to fold memory operands immediately comparing to SDAG. The reason is that GlobalISel doesn't have target hooks in GIM_CheckIsSafeToFold. Or maybe another check on profitability is required along with safety check that is currently not present.
addr
matching was the only gatekeeper for starting selectingG_LOAD
andG_STORE
using SDAG patterns.gi_addr
foraddr
. In this patch only the existing functionality has been implemented. The renderer's name is the same as in SDAG:selectAddr
. Apparently the type ofGIComplexOperandMatcher
doesn't matter as RISCV also usess32
for both 64 and 32 bit pointers.X86SelectAddress
now is used for both: pattern matching and manual selection. As a result it accumulates all the code that previously was distrubuted among different selection functions.getLoadStoreOp
withgetPtrLoadStoreOp
in Load/Store selector as GlobalISel matcher or emitter can't map the pointer type intoi32/i64
types used in SDAG patterns for pointers. So the load and store selection of pointers is still manual.getLoadStoreOp
is still present because it is used inG_FCONSTANT
lowering that requires extra efforts to select it using SDAG patterns.X86AddressMode
because otherwise we need to introduce a GlobalISel copy forX86ISelAddressMode
.GIM_CheckIsSafeToFold
. Or maybe another check on profitability is required along with safety check that is currently not present.