Skip to content

Commit 45f51f9

Browse files
authored
[AArch64][GlobalISel] Select UMULL instruction (llvm#65469)
Global ISel now selects `UMULL` and `UMULL2` instructions. G_MUL instruction with input operands coming from `SEXT` or `ZEXT` operations are turned into UMULL G_MUL instructions with v2s64 result type is always scalarised except: `mul ( unmerge( ext ), unmerge( ext ))` So the extend could be unmerged and fold away the unmerge in the middle: `mul ( unmerge( ext ), unmerge( ext ))` => `mul ( unmerge( merge( ext( unmerge )), unmerge( merge( ext( unmerge ))))` => `mul ( ext(unmerge)), ( ext( unmerge ))) `
1 parent 75a71c2 commit 45f51f9

9 files changed

+1536
-441
lines changed

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,13 @@ def mul_const : GICombineRule<
156156
(apply [{ applyAArch64MulConstCombine(*${root}, MRI, B, ${matchinfo}); }])
157157
>;
158158

159+
def lower_mull : GICombineRule<
160+
(defs root:$root),
161+
(match (wip_match_opcode G_MUL):$root,
162+
[{ return matchExtMulToMULL(*${root}, MRI); }]),
163+
(apply [{ applyExtMulToMULL(*${root}, MRI, B, Observer); }])
164+
>;
165+
159166
def build_vector_to_dup : GICombineRule<
160167
(defs root:$root),
161168
(match (wip_match_opcode G_BUILD_VECTOR):$root,
@@ -232,7 +239,7 @@ def AArch64PostLegalizerLowering
232239
icmp_lowering, build_vector_lowering,
233240
lower_vector_fcmp, form_truncstore,
234241
vector_sext_inreg_to_shift,
235-
unmerge_ext_to_unmerge]> {
242+
unmerge_ext_to_unmerge, lower_mull]> {
236243
}
237244

238245
// Post-legalization combines which are primarily optimizations.

llvm/lib/Target/AArch64/AArch64InstrGISel.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,18 @@ def G_PREFETCH : AArch64GenericInstruction {
215215
let hasSideEffects = 1;
216216
}
217217

218+
def G_UMULL : AArch64GenericInstruction {
219+
let OutOperandList = (outs type0:$dst);
220+
let InOperandList = (ins type0:$src1, type0:$src2);
221+
let hasSideEffects = 0;
222+
}
223+
224+
def G_SMULL : AArch64GenericInstruction {
225+
let OutOperandList = (outs type0:$dst);
226+
let InOperandList = (ins type0:$src1, type0:$src2);
227+
let hasSideEffects = 0;
228+
}
229+
218230
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
219231
// expands into BSL/BIT/BIF after register allocation.
220232
def G_BSP : AArch64GenericInstruction {
@@ -255,6 +267,9 @@ def : GINodeEquiv<G_FCMLTZ, AArch64fcmltz>;
255267

256268
def : GINodeEquiv<G_BSP, AArch64bsp>;
257269

270+
def : GINodeEquiv<G_UMULL, AArch64umull>;
271+
def : GINodeEquiv<G_SMULL, AArch64smull>;
272+
258273
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
259274

260275
def : GINodeEquiv<G_PREFETCH, AArch64Prefetch>;

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -119,13 +119,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
119119
.clampScalar(0, s32, s64);
120120

121121
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
122-
.legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8})
123-
.scalarizeIf(
124-
[=](const LegalityQuery &Query) {
125-
return Query.Opcode == G_MUL && Query.Types[0] == v2s64;
126-
},
127-
0)
128-
.legalFor({v2s64})
122+
.legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
129123
.widenScalarToNextPow2(0)
130124
.clampScalar(0, s32, s64)
131125
.clampMaxNumElements(0, s8, 16)

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,6 +1110,75 @@ void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
11101110
Observer.changedInstr(MI);
11111111
}
11121112

1113+
// Match mul({z/s}ext , {z/s}ext) => {u/s}mull OR
1114+
// Match v2s64 mul instructions, which will then be scalarised later on
1115+
// Doing these two matches in one function to ensure that the order of matching
1116+
// will always be the same.
1117+
// Try lowering MUL to MULL before trying to scalarize if needed.
1118+
bool matchExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI) {
1119+
// Get the instructions that defined the source operand
1120+
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1121+
MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
1122+
MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
1123+
1124+
if (DstTy.isVector()) {
1125+
// If the source operands were EXTENDED before, then {U/S}MULL can be used
1126+
unsigned I1Opc = I1->getOpcode();
1127+
unsigned I2Opc = I2->getOpcode();
1128+
if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) ||
1129+
(I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) &&
1130+
(MRI.getType(I1->getOperand(0).getReg()).getScalarSizeInBits() ==
1131+
MRI.getType(I1->getOperand(1).getReg()).getScalarSizeInBits() * 2) &&
1132+
(MRI.getType(I2->getOperand(0).getReg()).getScalarSizeInBits() ==
1133+
MRI.getType(I2->getOperand(1).getReg()).getScalarSizeInBits() * 2)) {
1134+
return true;
1135+
}
1136+
// If result type is v2s64, scalarise the instruction
1137+
else if (DstTy == LLT::fixed_vector(2, 64)) {
1138+
return true;
1139+
}
1140+
}
1141+
return false;
1142+
}
1143+
1144+
void applyExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI,
1145+
MachineIRBuilder &B, GISelChangeObserver &Observer) {
1146+
assert(MI.getOpcode() == TargetOpcode::G_MUL &&
1147+
"Expected a G_MUL instruction");
1148+
1149+
// Get the instructions that defined the source operand
1150+
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1151+
MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
1152+
MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
1153+
1154+
// If the source operands were EXTENDED before, then {U/S}MULL can be used
1155+
unsigned I1Opc = I1->getOpcode();
1156+
unsigned I2Opc = I2->getOpcode();
1157+
if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) ||
1158+
(I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) &&
1159+
(MRI.getType(I1->getOperand(0).getReg()).getScalarSizeInBits() ==
1160+
MRI.getType(I1->getOperand(1).getReg()).getScalarSizeInBits() * 2) &&
1161+
(MRI.getType(I2->getOperand(0).getReg()).getScalarSizeInBits() ==
1162+
MRI.getType(I2->getOperand(1).getReg()).getScalarSizeInBits() * 2)) {
1163+
1164+
B.setInstrAndDebugLoc(MI);
1165+
B.buildInstr(I1->getOpcode() == TargetOpcode::G_ZEXT ? AArch64::G_UMULL
1166+
: AArch64::G_SMULL,
1167+
{MI.getOperand(0).getReg()},
1168+
{I1->getOperand(1).getReg(), I2->getOperand(1).getReg()});
1169+
MI.eraseFromParent();
1170+
}
1171+
// If result type is v2s64, scalarise the instruction
1172+
else if (DstTy == LLT::fixed_vector(2, 64)) {
1173+
LegalizerHelper Helper(*MI.getMF(), Observer, B);
1174+
B.setInstrAndDebugLoc(MI);
1175+
Helper.fewerElementsVector(
1176+
MI, 0,
1177+
DstTy.changeElementCount(
1178+
DstTy.getElementCount().divideCoefficientBy(2)));
1179+
}
1180+
}
1181+
11131182
class AArch64PostLegalizerLoweringImpl : public Combiner {
11141183
protected:
11151184
// TODO: Make CombinerHelper methods const.

llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -175,12 +175,8 @@ body: |
175175
; CHECK-NEXT: {{ $}}
176176
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
177177
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
178-
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
179-
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
180-
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[UV]], [[UV2]]
181-
; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[UV1]], [[UV3]]
182-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MUL]](s64), [[MUL1]](s64)
183-
; CHECK-NEXT: $q0 = COPY [[BUILD_VECTOR]](<2 x s64>)
178+
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[COPY]], [[COPY1]]
179+
; CHECK-NEXT: $q0 = COPY [[MUL]](<2 x s64>)
184180
; CHECK-NEXT: RET_ReallyLR implicit $q0
185181
%0:_(<2 x s64>) = COPY $q0
186182
%1:_(<2 x s64>) = COPY $q1

llvm/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -203,11 +203,9 @@ body: |
203203
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
204204
; CHECK-NEXT: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[UV]], [[UV2]]
205205
; CHECK-NEXT: [[SDIV1:%[0-9]+]]:_(s64) = G_SDIV [[UV1]], [[UV3]]
206-
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
207-
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SDIV]], [[UV4]]
208-
; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SDIV1]], [[UV5]]
209-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MUL]](s64), [[MUL1]](s64)
210-
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<2 x s64>) = G_SUB [[COPY]], [[BUILD_VECTOR]]
206+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SDIV]](s64), [[SDIV1]](s64)
207+
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[BUILD_VECTOR]], [[COPY1]]
208+
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<2 x s64>) = G_SUB [[COPY]], [[MUL]]
211209
; CHECK-NEXT: $q0 = COPY [[SUB]](<2 x s64>)
212210
%0:_(<2 x s64>) = COPY $q0
213211
%1:_(<2 x s64>) = COPY $q1

llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
2828
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
2929
#
30-
# DEBUG-NEXT: G_SDIV (opcode {{[0-9]+}}): 1 type index
30+
# DEBUG-NEXT: G_SDIV (opcode {{[0-9]+}}): 1 type index, 0 imm indices
3131
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
3232
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
3333
#

0 commit comments

Comments
 (0)