Skip to content

Commit d88d983

Browse files
authored
[AArch64][GlobalISel] Support more types for TRUNC (#66927)
G_TRUNC will get lowered into trunc(merge(trunc(unmerge), trunc(unmerge))) if the source is larger than 128 bits or the truncation is more than half of the current bit size. Now mirrors ZEXT/SEXT code more closely for vector types.
1 parent 28bb219 commit d88d983

File tree

7 files changed

+1100
-86
lines changed

7 files changed

+1100
-86
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,7 @@ class LegalizerHelper {
384384
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI);
385385
LegalizeResult lowerFunnelShift(MachineInstr &MI);
386386
LegalizeResult lowerEXT(MachineInstr &MI);
387+
LegalizeResult lowerTRUNC(MachineInstr &MI);
387388
LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI);
388389
LegalizeResult lowerRotate(MachineInstr &MI);
389390

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 60 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3766,6 +3766,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
37663766
case G_SEXT:
37673767
case G_ANYEXT:
37683768
return lowerEXT(MI);
3769+
case G_TRUNC:
3770+
return lowerTRUNC(MI);
37693771
GISEL_VECREDUCE_CASES_NONSEQ
37703772
return lowerVectorReduction(MI);
37713773
}
@@ -5110,13 +5112,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
51105112
MI.eraseFromParent();
51115113
return Legalized;
51125114
}
5113-
case TargetOpcode::G_TRUNC: {
5114-
Observer.changingInstr(MI);
5115-
moreElementsVectorSrc(MI, MoreTy, 1);
5116-
moreElementsVectorDst(MI, MoreTy, 0);
5117-
Observer.changedInstr(MI);
5118-
return Legalized;
5119-
}
5115+
case TargetOpcode::G_TRUNC:
51205116
case TargetOpcode::G_FPTRUNC:
51215117
case TargetOpcode::G_FPEXT: {
51225118
if (TypeIdx != 0)
@@ -6165,6 +6161,63 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerEXT(MachineInstr &MI) {
61656161
return UnableToLegalize;
61666162
}
61676163

6164+
LegalizerHelper::LegalizeResult LegalizerHelper::lowerTRUNC(MachineInstr &MI) {
6165+
// MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
6166+
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
6167+
// Similar to how operand splitting is done in SelectiondDAG, we can handle
6168+
// %res(v8s8) = G_TRUNC %in(v8s32) by generating:
6169+
// %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
6170+
// %lo16(<4 x s16>) = G_TRUNC %inlo
6171+
// %hi16(<4 x s16>) = G_TRUNC %inhi
6172+
// %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
6173+
// %res(<8 x s8>) = G_TRUNC %in16
6174+
6175+
assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
6176+
6177+
Register DstReg = MI.getOperand(0).getReg();
6178+
Register SrcReg = MI.getOperand(1).getReg();
6179+
LLT DstTy = MRI.getType(DstReg);
6180+
LLT SrcTy = MRI.getType(SrcReg);
6181+
6182+
if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
6183+
isPowerOf2_32(DstTy.getScalarSizeInBits()) &&
6184+
isPowerOf2_32(SrcTy.getNumElements()) &&
6185+
isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
6186+
// Split input type.
6187+
LLT SplitSrcTy = SrcTy.changeElementCount(
6188+
SrcTy.getElementCount().divideCoefficientBy(2));
6189+
6190+
// First, split the source into two smaller vectors.
6191+
SmallVector<Register, 2> SplitSrcs;
6192+
extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs);
6193+
6194+
// Truncate the splits into intermediate narrower elements.
6195+
LLT InterTy;
6196+
if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
6197+
InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
6198+
else
6199+
InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
6200+
for (unsigned I = 0; I < SplitSrcs.size(); ++I) {
6201+
SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
6202+
}
6203+
6204+
// Combine the new truncates into one vector
6205+
auto Merge = MIRBuilder.buildMergeLikeInstr(
6206+
DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
6207+
6208+
// Truncate the new vector to the final result type
6209+
if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
6210+
MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
6211+
else
6212+
MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
6213+
6214+
MI.eraseFromParent();
6215+
6216+
return Legalized;
6217+
}
6218+
return UnableToLegalize;
6219+
}
6220+
61686221
LegalizerHelper::LegalizeResult
61696222
LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
61706223
auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 13 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -536,14 +536,22 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
536536
});
537537

538538
getActionDefinitionsBuilder(G_TRUNC)
539+
.legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
540+
.moreElementsToNextPow2(0)
541+
.clampMaxNumElements(0, s8, 8)
542+
.clampMaxNumElements(0, s16, 4)
543+
.clampMaxNumElements(0, s32, 2)
539544
.minScalarOrEltIf(
540545
[=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
541546
0, s8)
542-
.customIf([=](const LegalityQuery &Query) {
547+
.lowerIf([=](const LegalityQuery &Query) {
543548
LLT DstTy = Query.Types[0];
544549
LLT SrcTy = Query.Types[1];
545-
return DstTy == v8s8 && SrcTy.getSizeInBits() > 128;
550+
return DstTy.isVector() && (SrcTy.getSizeInBits() > 128 ||
551+
(DstTy.getScalarSizeInBits() * 2 <
552+
SrcTy.getScalarSizeInBits()));
546553
})
554+
547555
.alwaysLegal();
548556

549557
getActionDefinitionsBuilder(G_SEXT_INREG)
@@ -1002,8 +1010,6 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
10021010
return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
10031011
case TargetOpcode::G_GLOBAL_VALUE:
10041012
return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1005-
case TargetOpcode::G_TRUNC:
1006-
return legalizeVectorTrunc(MI, Helper);
10071013
case TargetOpcode::G_SBFX:
10081014
case TargetOpcode::G_UBFX:
10091015
return legalizeBitfieldExtract(MI, MRI, Helper);
@@ -1102,54 +1108,6 @@ bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
11021108
return true;
11031109
}
11041110

1105-
static void extractParts(Register Reg, MachineRegisterInfo &MRI,
1106-
MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts,
1107-
SmallVectorImpl<Register> &VRegs) {
1108-
for (int I = 0; I < NumParts; ++I)
1109-
VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
1110-
MIRBuilder.buildUnmerge(VRegs, Reg);
1111-
}
1112-
1113-
bool AArch64LegalizerInfo::legalizeVectorTrunc(
1114-
MachineInstr &MI, LegalizerHelper &Helper) const {
1115-
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1116-
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1117-
// Similar to how operand splitting is done in SelectiondDAG, we can handle
1118-
// %res(v8s8) = G_TRUNC %in(v8s32) by generating:
1119-
// %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
1120-
// %lo16(<4 x s16>) = G_TRUNC %inlo
1121-
// %hi16(<4 x s16>) = G_TRUNC %inhi
1122-
// %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
1123-
// %res(<8 x s8>) = G_TRUNC %in16
1124-
1125-
Register DstReg = MI.getOperand(0).getReg();
1126-
Register SrcReg = MI.getOperand(1).getReg();
1127-
LLT DstTy = MRI.getType(DstReg);
1128-
LLT SrcTy = MRI.getType(SrcReg);
1129-
assert(llvm::has_single_bit<uint32_t>(DstTy.getSizeInBits()) &&
1130-
llvm::has_single_bit<uint32_t>(SrcTy.getSizeInBits()));
1131-
1132-
// Split input type.
1133-
LLT SplitSrcTy =
1134-
SrcTy.changeElementCount(SrcTy.getElementCount().divideCoefficientBy(2));
1135-
// First, split the source into two smaller vectors.
1136-
SmallVector<Register, 2> SplitSrcs;
1137-
extractParts(SrcReg, MRI, MIRBuilder, SplitSrcTy, 2, SplitSrcs);
1138-
1139-
// Truncate the splits into intermediate narrower elements.
1140-
LLT InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
1141-
for (unsigned I = 0; I < SplitSrcs.size(); ++I)
1142-
SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
1143-
1144-
auto Concat = MIRBuilder.buildConcatVectors(
1145-
DstTy.changeElementSize(DstTy.getScalarSizeInBits() * 2), SplitSrcs);
1146-
1147-
Helper.Observer.changingInstr(MI);
1148-
MI.getOperand(1).setReg(Concat.getReg(0));
1149-
Helper.Observer.changedInstr(MI);
1150-
return true;
1151-
}
1152-
11531111
bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
11541112
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
11551113
GISelChangeObserver &Observer) const {
@@ -1319,6 +1277,9 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
13191277

13201278
return true;
13211279
}
1280+
case Intrinsic::experimental_vector_reverse:
1281+
// TODO: Add support for vector_reverse
1282+
return false;
13221283
}
13231284

13241285
return true;

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ class AArch64LegalizerInfo : public LegalizerInfo {
4646
bool legalizeSmallCMGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
4747
MachineIRBuilder &MIRBuilder,
4848
GISelChangeObserver &Observer) const;
49-
bool legalizeVectorTrunc(MachineInstr &MI, LegalizerHelper &Helper) const;
5049
bool legalizeBitfieldExtract(MachineInstr &MI, MachineRegisterInfo &MRI,
5150
LegalizerHelper &Helper) const;
5251
bool legalizeRotate(MachineInstr &MI, MachineRegisterInfo &MRI,

0 commit comments

Comments
 (0)