-
Notifications
You must be signed in to change notification settings - Fork 13.6k
AMDGPU/GlobalISel: add RegBankLegalize rules for extends and trunc #132383
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -133,6 +133,43 @@ void RegBankLegalizeHelper::widenLoad(MachineInstr &MI, LLT WideTy, | |
MI.eraseFromParent(); | ||
} | ||
|
||
void RegBankLegalizeHelper::lowerVccExtToSel(MachineInstr &MI) { | ||
Register Dst = MI.getOperand(0).getReg(); | ||
LLT Ty = MRI.getType(Dst); | ||
Register Src = MI.getOperand(1).getReg(); | ||
unsigned Opc = MI.getOpcode(); | ||
int TrueExtCst = Opc == G_SEXT ? -1 : 1; | ||
if (Ty == S32 || Ty == S16) { | ||
auto True = B.buildConstant({VgprRB, Ty}, TrueExtCst); | ||
auto False = B.buildConstant({VgprRB, Ty}, 0); | ||
B.buildSelect(Dst, Src, True, False); | ||
} else if (Ty == S64) { | ||
auto True = B.buildConstant({VgprRB_S32}, TrueExtCst); | ||
auto False = B.buildConstant({VgprRB_S32}, 0); | ||
auto Lo = B.buildSelect({VgprRB_S32}, Src, True, False); | ||
MachineInstrBuilder Hi; | ||
switch (Opc) { | ||
case G_SEXT: | ||
Hi = Lo; | ||
break; | ||
case G_ZEXT: | ||
Hi = False; | ||
break; | ||
case G_ANYEXT: | ||
Hi = B.buildUndef({VgprRB_S32}); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. poison, but not sure the poison PR got merged yet |
||
break; | ||
default: | ||
llvm_unreachable("Opcode not supported"); | ||
} | ||
|
||
B.buildMergeValues(Dst, {Lo.getReg(0), Hi.getReg(0)}); | ||
} else { | ||
llvm_unreachable("Type not supported"); | ||
} | ||
|
||
MI.eraseFromParent(); | ||
} | ||
|
||
static bool isSignedBFE(MachineInstr &MI) { | ||
if (GIntrinsic *GI = dyn_cast<GIntrinsic>(&MI)) | ||
return (GI->is(Intrinsic::amdgcn_sbfe)); | ||
|
@@ -256,26 +293,8 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI, | |
switch (Mapping.LoweringMethod) { | ||
case DoNotLower: | ||
return; | ||
case VccExtToSel: { | ||
LLT Ty = MRI.getType(MI.getOperand(0).getReg()); | ||
Register Src = MI.getOperand(1).getReg(); | ||
unsigned Opc = MI.getOpcode(); | ||
if (Ty == S32 || Ty == S16) { | ||
auto True = B.buildConstant({VgprRB, Ty}, Opc == G_SEXT ? -1 : 1); | ||
auto False = B.buildConstant({VgprRB, Ty}, 0); | ||
B.buildSelect(MI.getOperand(0).getReg(), Src, True, False); | ||
} | ||
if (Ty == S64) { | ||
auto True = B.buildConstant({VgprRB, S32}, Opc == G_SEXT ? -1 : 1); | ||
auto False = B.buildConstant({VgprRB, S32}, 0); | ||
auto Sel = B.buildSelect({VgprRB, S32}, Src, True, False); | ||
B.buildMergeValues( | ||
MI.getOperand(0).getReg(), | ||
{Sel.getReg(0), Opc == G_SEXT ? Sel.getReg(0) : False.getReg(0)}); | ||
} | ||
MI.eraseFromParent(); | ||
return; | ||
} | ||
case VccExtToSel: | ||
return lowerVccExtToSel(MI); | ||
case UniExtToSel: { | ||
LLT Ty = MRI.getType(MI.getOperand(0).getReg()); | ||
auto True = B.buildConstant({SgprRB, Ty}, | ||
|
@@ -292,13 +311,23 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI, | |
case Ext32To64: { | ||
const RegisterBank *RB = MRI.getRegBank(MI.getOperand(0).getReg()); | ||
MachineInstrBuilder Hi; | ||
|
||
if (MI.getOpcode() == AMDGPU::G_ZEXT) { | ||
switch (MI.getOpcode()) { | ||
case AMDGPU::G_ZEXT: { | ||
Hi = B.buildConstant({RB, S32}, 0); | ||
} else { | ||
break; | ||
} | ||
case AMDGPU::G_SEXT: { | ||
// Replicate sign bit from 32-bit extended part. | ||
auto ShiftAmt = B.buildConstant({RB, S32}, 31); | ||
Hi = B.buildAShr({RB, S32}, MI.getOperand(1).getReg(), ShiftAmt); | ||
break; | ||
} | ||
case AMDGPU::G_ANYEXT: { | ||
Hi = B.buildUndef({RB, S32}); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. poison, but not sure the poison PR got merged yet There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this one #127825 ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes |
||
break; | ||
} | ||
default: | ||
llvm_unreachable("Unsuported Opcode in Ext32To64"); | ||
} | ||
|
||
B.buildMergeLikeInstr(MI.getOperand(0).getReg(), | ||
|
@@ -321,7 +350,7 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI, | |
// compares all bits in register. | ||
Register BoolSrc = MRI.createVirtualRegister({VgprRB, Ty}); | ||
if (Ty == S64) { | ||
auto Src64 = B.buildUnmerge({VgprRB, Ty}, Src); | ||
auto Src64 = B.buildUnmerge(VgprRB_S32, Src); | ||
auto One = B.buildConstant(VgprRB_S32, 1); | ||
auto AndLo = B.buildAnd(VgprRB_S32, Src64.getReg(0), One); | ||
auto Zero = B.buildConstant(VgprRB_S32, 0); | ||
|
@@ -409,8 +438,11 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) { | |
case Sgpr32AExt: | ||
case Sgpr32AExtBoolInReg: | ||
case Sgpr32SExt: | ||
case Sgpr32ZExt: | ||
case UniInVgprS32: | ||
case Vgpr32: | ||
case Vgpr32SExt: | ||
case Vgpr32ZExt: | ||
return LLT::scalar(32); | ||
case Sgpr64: | ||
case Vgpr64: | ||
|
@@ -521,6 +553,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) { | |
case Sgpr32AExt: | ||
case Sgpr32AExtBoolInReg: | ||
case Sgpr32SExt: | ||
case Sgpr32ZExt: | ||
return SgprRB; | ||
case Vgpr16: | ||
case Vgpr32: | ||
|
@@ -537,6 +570,8 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) { | |
case VgprB128: | ||
case VgprB256: | ||
case VgprB512: | ||
case Vgpr32SExt: | ||
case Vgpr32ZExt: | ||
return VgprRB; | ||
default: | ||
return nullptr; | ||
|
@@ -742,8 +777,8 @@ void RegBankLegalizeHelper::applyMappingSrc( | |
assert(Ty.getSizeInBits() == 1); | ||
assert(RB == SgprRB); | ||
auto Aext = B.buildAnyExt(SgprRB_S32, Reg); | ||
// Zext SgprS1 is not legal, this instruction is most of times meant to be | ||
// combined away in RB combiner, so do not make AND with 1. | ||
// Zext SgprS1 is not legal, make AND with 1 instead. This instruction is | ||
// most of times meant to be combined away in AMDGPURegBankCombiner. | ||
auto Cst1 = B.buildConstant(SgprRB_S32, 1); | ||
auto BoolInReg = B.buildAnd(SgprRB_S32, Aext, Cst1); | ||
Op.setReg(BoolInReg.getReg(0)); | ||
|
@@ -756,6 +791,29 @@ void RegBankLegalizeHelper::applyMappingSrc( | |
Op.setReg(Sext.getReg(0)); | ||
break; | ||
} | ||
case Sgpr32ZExt: { | ||
assert(1 < Ty.getSizeInBits() && Ty.getSizeInBits() < 32); | ||
assert(RB == SgprRB); | ||
auto Zext = B.buildZExt({SgprRB, S32}, Reg); | ||
Op.setReg(Zext.getReg(0)); | ||
break; | ||
} | ||
case Vgpr32SExt: { | ||
// Note this ext allows S1, and it is meant to be combined away. | ||
assert(Ty.getSizeInBits() < 32); | ||
assert(RB == VgprRB); | ||
auto Sext = B.buildSExt({VgprRB, S32}, Reg); | ||
Op.setReg(Sext.getReg(0)); | ||
break; | ||
} | ||
case Vgpr32ZExt: { | ||
// Note this ext allows S1, and it is meant to be combined away. | ||
assert(Ty.getSizeInBits() < 32); | ||
assert(RB == VgprRB); | ||
auto Zext = B.buildZExt({VgprRB, S32}, Reg); | ||
Op.setReg(Zext.getReg(0)); | ||
break; | ||
} | ||
default: | ||
llvm_unreachable("ID not supported"); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -489,22 +489,61 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, | |
.Uni(B32, {{SgprB32}, {Sgpr32AExtBoolInReg, SgprB32, SgprB32}}); | ||
|
||
addRulesForGOpcs({G_ANYEXT}) | ||
.Any({{UniS16, S1}, {{None}, {None}}}) // should be combined away | ||
.Any({{UniS32, S1}, {{None}, {None}}}) // should be combined away | ||
.Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}); | ||
.Any({{UniS64, S1}, {{None}, {None}}}) // should be combined away | ||
.Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}}) | ||
.Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}}) | ||
.Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}}) | ||
.Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. unrelated to the patch: These should be better documented, otherwise it's very hard to read what's actually happening here. I had to go find 2 different struct signatures before getting an idea of what these lines do. A small comment on top I also feel like we could eliminate one or even two sets of braces by just making them arguments, further helping readability. It could just be an overload that's preferred when manually writing the rules, and keep the current signature if we're pushing rules using a loop or something? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably could improve this one a bit. Originally I wanted to keep rules as oneliners. There are Uni and Div that are specialized and have fewer braces and think that almost all remaining opcodes are using them. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 on the need for documentation: It's hard to follow which of the parts serve, e.g., as patterns, replacements, or asserts. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Quick explanation for now: .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}}) list of which register bank to apply on dst registers (check RegBankLegalizeHelper for details) .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}}) list of which register bank to apply on source registers .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}}) ID of more complicated lowering method, for example this one is transforming G_ANYEXT to G_SELECT there is shorter faster version when checking just dst operand, for example In first list you don't have to check all operands, check enough to decide what to do, in second two lists (for destination and sources operands) need to cover all operands |
||
.Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}}) | ||
.Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}) | ||
.Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}}); | ||
|
||
// In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY. | ||
// It is up to user to deal with truncated bits. | ||
addRulesForGOpcs({G_TRUNC}) | ||
.Any({{UniS1, UniS16}, {{None}, {None}}}) // should be combined away | ||
.Any({{UniS1, UniS32}, {{None}, {None}}}) // should be combined away | ||
.Any({{UniS1, UniS64}, {{None}, {None}}}) // should be combined away | ||
.Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}}) | ||
.Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}}) | ||
.Any({{UniS32, S64}, {{Sgpr32}, {Sgpr64}}}) | ||
.Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}}) | ||
// This is non-trivial. VgprToVccCopy is done using compare instruction. | ||
.Any({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}}); | ||
.Any({{DivS1, DivS16}, {{Vcc}, {Vgpr16}, VgprToVccCopy}}) | ||
.Any({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}}) | ||
.Any({{DivS1, DivS64}, {{Vcc}, {Vgpr64}, VgprToVccCopy}}); | ||
|
||
addRulesForGOpcs({G_ZEXT, G_SEXT}) | ||
addRulesForGOpcs({G_ZEXT}) | ||
.Any({{UniS16, S1}, {{Sgpr32Trunc}, {Sgpr32AExtBoolInReg}, UniExtToSel}}) | ||
.Any({{UniS32, S1}, {{Sgpr32}, {Sgpr32AExtBoolInReg}, UniExtToSel}}) | ||
.Any({{UniS64, S1}, {{Sgpr64}, {Sgpr32AExtBoolInReg}, UniExtToSel}}) | ||
.Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}}) | ||
.Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}}) | ||
.Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}}) | ||
.Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}}) | ||
.Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}}) | ||
// not extending S16 to S32 is questionable. | ||
.Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32ZExt}, Ext32To64}}) | ||
.Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32ZExt}, Ext32To64}}) | ||
.Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}) | ||
.Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}}); | ||
|
||
addRulesForGOpcs({G_SEXT}) | ||
.Any({{UniS16, S1}, {{Sgpr32Trunc}, {Sgpr32AExtBoolInReg}, UniExtToSel}}) | ||
.Any({{UniS32, S1}, {{Sgpr32}, {Sgpr32AExtBoolInReg}, UniExtToSel}}) | ||
.Any({{UniS64, S1}, {{Sgpr64}, {Sgpr32AExtBoolInReg}, UniExtToSel}}) | ||
.Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}}) | ||
.Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}}) | ||
.Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}}) | ||
.Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}}) | ||
.Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}}); | ||
.Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}}) | ||
// not extending S16 to S32 is questionable. | ||
.Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32SExt}, Ext32To64}}) | ||
.Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32SExt}, Ext32To64}}) | ||
.Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}) | ||
.Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}}); | ||
|
||
bool hasUnalignedLoads = ST->getGeneration() >= AMDGPUSubtarget::GFX12; | ||
bool hasSMRDSmall = ST->hasScalarSubwordLoads(); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What about pointers and vectors?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is used for S1 {S|Z|A}EXT to S16/S32/S64, iirc pointers are not legal here, and vectors types are scalarized in legalizer