Skip to content

Commit a356e6c

Browse files
authored
[SelectionDAG] Expand fixed point multiplication into libcall (#79352)
32-bit ARMv6 with thumb doesn't support MULHS/MUL_LOHI as legal/custom nodes during expansion which will cause fixed point multiplication of _Accum types to fail with fixed point arithmetic. Prior to this, we just happen to use fixed point multiplication on platforms that happen to support these MULHS/MUL_LOHI. This patch attempts to check if the multiplication can be done via libcalls, which are provided by the arm runtime. These libcall attempts are made elsewhere, so this patch refactors that libcall logic into its own functions and the fixed point expansion calls and reuses that logic.
1 parent 6485600 commit a356e6c

File tree

7 files changed

+2038
-104
lines changed

7 files changed

+2038
-104
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5287,6 +5287,24 @@ class TargetLowering : public TargetLoweringBase {
52875287
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow,
52885288
SelectionDAG &DAG) const;
52895289

5290+
/// forceExpandWideMUL - Unconditionally expand a MUL into either a libcall or
5291+
/// brute force via a wide multiplication. The expansion works by
5292+
/// attempting to do a multiplication on a wider type twice the size of the
5293+
/// original operands. LL and LH represent the lower and upper halves of the
5294+
/// first operand. RL and RH represent the lower and upper halves of the
5295+
/// second operand. The upper and lower halves of the result are stored in Lo
5296+
/// and Hi.
5297+
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed,
5298+
EVT WideVT, const SDValue LL, const SDValue LH,
5299+
const SDValue RL, const SDValue RH, SDValue &Lo,
5300+
SDValue &Hi) const;
5301+
5302+
/// Same as above, but creates the upper halves of each operand by
5303+
/// sign/zero-extending the operands.
5304+
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed,
5305+
const SDValue LHS, const SDValue RHS, SDValue &Lo,
5306+
SDValue &Hi) const;
5307+
52905308
/// Expand a VECREDUCE_* into an explicit calculation. If Count is specified,
52915309
/// only the first Count elements of the vector are used.
52925310
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const;

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 14 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -4008,47 +4008,15 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
40084008
LC = RTLIB::MUL_I128;
40094009

40104010
if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
4011-
// We'll expand the multiplication by brute force because we have no other
4012-
// options. This is a trivially-generalized version of the code from
4013-
// Hacker's Delight (itself derived from Knuth's Algorithm M from section
4014-
// 4.3.1).
4015-
unsigned Bits = NVT.getSizeInBits();
4016-
unsigned HalfBits = Bits >> 1;
4017-
SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl,
4018-
NVT);
4019-
SDValue LLL = DAG.getNode(ISD::AND, dl, NVT, LL, Mask);
4020-
SDValue RLL = DAG.getNode(ISD::AND, dl, NVT, RL, Mask);
4021-
4022-
SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL);
4023-
SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask);
4024-
4025-
SDValue Shift = DAG.getShiftAmountConstant(HalfBits, NVT, dl);
4026-
SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift);
4027-
SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift);
4028-
SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift);
4029-
4030-
SDValue U = DAG.getNode(ISD::ADD, dl, NVT,
4031-
DAG.getNode(ISD::MUL, dl, NVT, LLH, RLL), TH);
4032-
SDValue UL = DAG.getNode(ISD::AND, dl, NVT, U, Mask);
4033-
SDValue UH = DAG.getNode(ISD::SRL, dl, NVT, U, Shift);
4034-
4035-
SDValue V = DAG.getNode(ISD::ADD, dl, NVT,
4036-
DAG.getNode(ISD::MUL, dl, NVT, LLL, RLH), UL);
4037-
SDValue VH = DAG.getNode(ISD::SRL, dl, NVT, V, Shift);
4038-
4039-
SDValue W = DAG.getNode(ISD::ADD, dl, NVT,
4040-
DAG.getNode(ISD::MUL, dl, NVT, LLH, RLH),
4041-
DAG.getNode(ISD::ADD, dl, NVT, UH, VH));
4042-
Lo = DAG.getNode(ISD::ADD, dl, NVT, TL,
4043-
DAG.getNode(ISD::SHL, dl, NVT, V, Shift));
4044-
4045-
Hi = DAG.getNode(ISD::ADD, dl, NVT, W,
4046-
DAG.getNode(ISD::ADD, dl, NVT,
4047-
DAG.getNode(ISD::MUL, dl, NVT, RH, LL),
4048-
DAG.getNode(ISD::MUL, dl, NVT, RL, LH)));
4011+
// Perform a wide multiplication where the wide type is the original VT and
4012+
// the 4 parts are the split arguments.
4013+
TLI.forceExpandWideMUL(DAG, dl, /*Signed=*/true, VT, LL, LH, RL, RH, Lo,
4014+
Hi);
40494015
return;
40504016
}
40514017

4018+
// Note that we don't need to do a wide MUL here since we don't care about the
4019+
// upper half of the result if it exceeds VT.
40524020
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
40534021
TargetLowering::MakeLibCallOptions CallOptions;
40544022
CallOptions.setSExt(true);
@@ -4146,9 +4114,15 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
41464114
if (!TLI.expandMUL_LOHI(LoHiOp, VT, dl, LHS, RHS, Result, NVT, DAG,
41474115
TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
41484116
LL, LH, RL, RH)) {
4149-
report_fatal_error("Unable to expand MUL_FIX using MUL_LOHI.");
4150-
return;
4117+
Result.clear();
4118+
Result.resize(4);
4119+
4120+
SDValue LoTmp, HiTmp;
4121+
TLI.forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, LoTmp, HiTmp);
4122+
SplitInteger(LoTmp, Result[0], Result[1]);
4123+
SplitInteger(HiTmp, Result[2], Result[3]);
41514124
}
4125+
assert(Result.size() == 4 && "Unexpected number of partlets in the result");
41524126

41534127
unsigned NVTSize = NVT.getScalarSizeInBits();
41544128
assert((VTSize == NVTSize * 2) && "Expected the new value type to be half "

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 118 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -10149,6 +10149,122 @@ SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
1014910149
return DAG.getSelect(dl, VT, Cond, SatVal, Result);
1015010150
}
1015110151

10152+
void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10153+
bool Signed, EVT WideVT,
10154+
const SDValue LL, const SDValue LH,
10155+
const SDValue RL, const SDValue RH,
10156+
SDValue &Lo, SDValue &Hi) const {
10157+
// We can fall back to a libcall with an illegal type for the MUL if we
10158+
// have a libcall big enough.
10159+
// Also, we can fall back to a division in some cases, but that's a big
10160+
// performance hit in the general case.
10161+
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10162+
if (WideVT == MVT::i16)
10163+
LC = RTLIB::MUL_I16;
10164+
else if (WideVT == MVT::i32)
10165+
LC = RTLIB::MUL_I32;
10166+
else if (WideVT == MVT::i64)
10167+
LC = RTLIB::MUL_I64;
10168+
else if (WideVT == MVT::i128)
10169+
LC = RTLIB::MUL_I128;
10170+
10171+
if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
10172+
// We'll expand the multiplication by brute force because we have no other
10173+
// options. This is a trivially-generalized version of the code from
10174+
// Hacker's Delight (itself derived from Knuth's Algorithm M from section
10175+
// 4.3.1).
10176+
EVT VT = LL.getValueType();
10177+
unsigned Bits = VT.getSizeInBits();
10178+
unsigned HalfBits = Bits >> 1;
10179+
SDValue Mask =
10180+
DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
10181+
SDValue LLL = DAG.getNode(ISD::AND, dl, VT, LL, Mask);
10182+
SDValue RLL = DAG.getNode(ISD::AND, dl, VT, RL, Mask);
10183+
10184+
SDValue T = DAG.getNode(ISD::MUL, dl, VT, LLL, RLL);
10185+
SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
10186+
10187+
SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
10188+
SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
10189+
SDValue LLH = DAG.getNode(ISD::SRL, dl, VT, LL, Shift);
10190+
SDValue RLH = DAG.getNode(ISD::SRL, dl, VT, RL, Shift);
10191+
10192+
SDValue U = DAG.getNode(ISD::ADD, dl, VT,
10193+
DAG.getNode(ISD::MUL, dl, VT, LLH, RLL), TH);
10194+
SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
10195+
SDValue UH = DAG.getNode(ISD::SRL, dl, VT, U, Shift);
10196+
10197+
SDValue V = DAG.getNode(ISD::ADD, dl, VT,
10198+
DAG.getNode(ISD::MUL, dl, VT, LLL, RLH), UL);
10199+
SDValue VH = DAG.getNode(ISD::SRL, dl, VT, V, Shift);
10200+
10201+
SDValue W =
10202+
DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LLH, RLH),
10203+
DAG.getNode(ISD::ADD, dl, VT, UH, VH));
10204+
Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
10205+
DAG.getNode(ISD::SHL, dl, VT, V, Shift));
10206+
10207+
Hi = DAG.getNode(ISD::ADD, dl, VT, W,
10208+
DAG.getNode(ISD::ADD, dl, VT,
10209+
DAG.getNode(ISD::MUL, dl, VT, RH, LL),
10210+
DAG.getNode(ISD::MUL, dl, VT, RL, LH)));
10211+
} else {
10212+
// Attempt a libcall.
10213+
SDValue Ret;
10214+
TargetLowering::MakeLibCallOptions CallOptions;
10215+
CallOptions.setSExt(Signed);
10216+
CallOptions.setIsPostTypeLegalization(true);
10217+
if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
10218+
// Halves of WideVT are packed into registers in different order
10219+
// depending on platform endianness. This is usually handled by
10220+
// the C calling convention, but we can't defer to it in
10221+
// the legalizer.
10222+
SDValue Args[] = {LL, LH, RL, RH};
10223+
Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10224+
} else {
10225+
SDValue Args[] = {LH, LL, RH, RL};
10226+
Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10227+
}
10228+
assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10229+
"Ret value is a collection of constituent nodes holding result.");
10230+
if (DAG.getDataLayout().isLittleEndian()) {
10231+
// Same as above.
10232+
Lo = Ret.getOperand(0);
10233+
Hi = Ret.getOperand(1);
10234+
} else {
10235+
Lo = Ret.getOperand(1);
10236+
Hi = Ret.getOperand(0);
10237+
}
10238+
}
10239+
}
10240+
10241+
void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10242+
bool Signed, const SDValue LHS,
10243+
const SDValue RHS, SDValue &Lo,
10244+
SDValue &Hi) const {
10245+
EVT VT = LHS.getValueType();
10246+
assert(RHS.getValueType() == VT && "Mismatching operand types");
10247+
10248+
SDValue HiLHS;
10249+
SDValue HiRHS;
10250+
if (Signed) {
10251+
// The high part is obtained by SRA'ing all but one of the bits of low
10252+
// part.
10253+
unsigned LoSize = VT.getFixedSizeInBits();
10254+
HiLHS = DAG.getNode(
10255+
ISD::SRA, dl, VT, LHS,
10256+
DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10257+
HiRHS = DAG.getNode(
10258+
ISD::SRA, dl, VT, RHS,
10259+
DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10260+
} else {
10261+
HiLHS = DAG.getConstant(0, dl, VT);
10262+
HiRHS = DAG.getConstant(0, dl, VT);
10263+
}
10264+
EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
10265+
forceExpandWideMUL(DAG, dl, Signed, WideVT, LHS, HiLHS, RHS, HiRHS, Lo, Hi);
10266+
}
10267+
1015210268
SDValue
1015310269
TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
1015410270
assert((Node->getOpcode() == ISD::SMULFIX ||
@@ -10223,7 +10339,7 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
1022310339
} else if (VT.isVector()) {
1022410340
return SDValue();
1022510341
} else {
10226-
report_fatal_error("Unable to expand fixed point multiplication.");
10342+
forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
1022710343
}
1022810344

1022910345
if (Scale == VTSize)
@@ -10522,69 +10638,7 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
1052210638
if (VT.isVector())
1052310639
return false;
1052410640

10525-
// We can fall back to a libcall with an illegal type for the MUL if we
10526-
// have a libcall big enough.
10527-
// Also, we can fall back to a division in some cases, but that's a big
10528-
// performance hit in the general case.
10529-
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10530-
if (WideVT == MVT::i16)
10531-
LC = RTLIB::MUL_I16;
10532-
else if (WideVT == MVT::i32)
10533-
LC = RTLIB::MUL_I32;
10534-
else if (WideVT == MVT::i64)
10535-
LC = RTLIB::MUL_I64;
10536-
else if (WideVT == MVT::i128)
10537-
LC = RTLIB::MUL_I128;
10538-
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
10539-
10540-
SDValue HiLHS;
10541-
SDValue HiRHS;
10542-
if (isSigned) {
10543-
// The high part is obtained by SRA'ing all but one of the bits of low
10544-
// part.
10545-
unsigned LoSize = VT.getFixedSizeInBits();
10546-
HiLHS =
10547-
DAG.getNode(ISD::SRA, dl, VT, LHS,
10548-
DAG.getConstant(LoSize - 1, dl,
10549-
getPointerTy(DAG.getDataLayout())));
10550-
HiRHS =
10551-
DAG.getNode(ISD::SRA, dl, VT, RHS,
10552-
DAG.getConstant(LoSize - 1, dl,
10553-
getPointerTy(DAG.getDataLayout())));
10554-
} else {
10555-
HiLHS = DAG.getConstant(0, dl, VT);
10556-
HiRHS = DAG.getConstant(0, dl, VT);
10557-
}
10558-
10559-
// Here we're passing the 2 arguments explicitly as 4 arguments that are
10560-
// pre-lowered to the correct types. This all depends upon WideVT not
10561-
// being a legal type for the architecture and thus has to be split to
10562-
// two arguments.
10563-
SDValue Ret;
10564-
TargetLowering::MakeLibCallOptions CallOptions;
10565-
CallOptions.setSExt(isSigned);
10566-
CallOptions.setIsPostTypeLegalization(true);
10567-
if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
10568-
// Halves of WideVT are packed into registers in different order
10569-
// depending on platform endianness. This is usually handled by
10570-
// the C calling convention, but we can't defer to it in
10571-
// the legalizer.
10572-
SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
10573-
Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10574-
} else {
10575-
SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
10576-
Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10577-
}
10578-
assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10579-
"Ret value is a collection of constituent nodes holding result.");
10580-
if (DAG.getDataLayout().isLittleEndian()) {
10581-
// Same as above.
10582-
BottomHalf = Ret.getOperand(0);
10583-
TopHalf = Ret.getOperand(1);
10584-
} else {
10585-
BottomHalf = Ret.getOperand(1);
10586-
TopHalf = Ret.getOperand(0);
10587-
}
10641+
forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
1058810642
}
1058910643

1059010644
Result = BottomHalf;

0 commit comments

Comments
 (0)