@@ -22901,6 +22901,10 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
22901
22901
SelectionDAG &DAG) {
22902
22902
SDLoc dl(Op);
22903
22903
MVT VT = Op.getSimpleValueType();
22904
+ bool IsSigned = Op->getOpcode() == ISD::MULHS;
22905
+ unsigned NumElts = VT.getVectorNumElements();
22906
+ SDValue A = Op.getOperand(0);
22907
+ SDValue B = Op.getOperand(1);
22904
22908
22905
22909
// Decompose 256-bit ops into smaller 128-bit ops.
22906
22910
if (VT.is256BitVector() && !Subtarget.hasInt256())
@@ -22910,9 +22914,6 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
22910
22914
assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) ||
22911
22915
(VT == MVT::v8i32 && Subtarget.hasInt256()) ||
22912
22916
(VT == MVT::v16i32 && Subtarget.hasAVX512()));
22913
- SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
22914
-
22915
- int NumElts = VT.getVectorNumElements();
22916
22917
22917
22918
// PMULxD operations multiply each even value (starting at 0) of LHS with
22918
22919
// the related value of RHS and produce a widen result.
@@ -22929,23 +22930,22 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
22929
22930
const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1,
22930
22931
9, -1, 11, -1, 13, -1, 15, -1};
22931
22932
// <a|b|c|d> => <b|undef|d|undef>
22932
- SDValue Odd0 = DAG.getVectorShuffle(VT, dl, Op0, Op0 ,
22933
+ SDValue Odd0 = DAG.getVectorShuffle(VT, dl, A, A ,
22933
22934
makeArrayRef(&Mask[0], NumElts));
22934
22935
// <e|f|g|h> => <f|undef|h|undef>
22935
- SDValue Odd1 = DAG.getVectorShuffle(VT, dl, Op1, Op1 ,
22936
+ SDValue Odd1 = DAG.getVectorShuffle(VT, dl, B, B ,
22936
22937
makeArrayRef(&Mask[0], NumElts));
22937
22938
22938
22939
// Emit two multiplies, one for the lower 2 ints and one for the higher 2
22939
22940
// ints.
22940
22941
MVT MulVT = MVT::getVectorVT(MVT::i64, NumElts / 2);
22941
- bool IsSigned = Op->getOpcode() == ISD::MULHS;
22942
22942
unsigned Opcode =
22943
- (! IsSigned || ! Subtarget.hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ ;
22943
+ (IsSigned && Subtarget.hasSSE41()) ? X86ISD::PMULDQ : X86ISD::PMULUDQ ;
22944
22944
// PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
22945
22945
// => <2 x i64> <ae|cg>
22946
22946
SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT,
22947
- DAG.getBitcast(MulVT, Op0 ),
22948
- DAG.getBitcast(MulVT, Op1 )));
22947
+ DAG.getBitcast(MulVT, A ),
22948
+ DAG.getBitcast(MulVT, B )));
22949
22949
// PMULUDQ <4 x i32> <b|undef|d|undef>, <4 x i32> <f|undef|h|undef>
22950
22950
// => <2 x i64> <bf|dh>
22951
22951
SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT,
@@ -22954,7 +22954,7 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
22954
22954
22955
22955
// Shuffle it back into the right order.
22956
22956
SmallVector<int, 16> ShufMask(NumElts);
22957
- for (int i = 0; i != NumElts; ++i)
22957
+ for (int i = 0; i != (int) NumElts; ++i)
22958
22958
ShufMask[i] = (i / 2) * 2 + ((i % 2) * NumElts) + 1;
22959
22959
22960
22960
SDValue Res = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, ShufMask);
@@ -22964,9 +22964,9 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
22964
22964
if (IsSigned && !Subtarget.hasSSE41()) {
22965
22965
SDValue ShAmt = DAG.getConstant(31, dl, VT);
22966
22966
SDValue T1 = DAG.getNode(ISD::AND, dl, VT,
22967
- DAG.getNode(ISD::SRA, dl, VT, Op0 , ShAmt), Op1 );
22967
+ DAG.getNode(ISD::SRA, dl, VT, A , ShAmt), B );
22968
22968
SDValue T2 = DAG.getNode(ISD::AND, dl, VT,
22969
- DAG.getNode(ISD::SRA, dl, VT, Op1 , ShAmt), Op0 );
22969
+ DAG.getNode(ISD::SRA, dl, VT, B , ShAmt), A );
22970
22970
22971
22971
SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2);
22972
22972
Res = DAG.getNode(ISD::SUB, dl, VT, Res, Fixup);
@@ -22982,14 +22982,11 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
22982
22982
22983
22983
// Lower v16i8/v32i8 as extension to v8i16/v16i16 vector pairs, multiply,
22984
22984
// logical shift down the upper half and pack back to i8.
22985
- SDValue A = Op.getOperand(0);
22986
- SDValue B = Op.getOperand(1);
22987
22985
22988
22986
// With SSE41 we can use sign/zero extend, but for pre-SSE41 we unpack
22989
22987
// and then ashr/lshr the upper bits down to the lower bits before multiply.
22990
- unsigned Opcode = Op.getOpcode();
22991
- unsigned ExShift = (ISD::MULHU == Opcode ? X86ISD::VSRLI : X86ISD::VSRAI);
22992
- unsigned ExAVX = (ISD::MULHU == Opcode ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND);
22988
+ unsigned ExShift = IsSigned ? X86ISD::VSRAI : X86ISD::VSRLI;
22989
+ unsigned ExAVX = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
22993
22990
22994
22991
// For 512-bit vectors, split into 256-bit vectors to allow the
22995
22992
// sign-extension to occur.
@@ -22998,9 +22995,8 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
22998
22995
22999
22996
// AVX2 implementations - extend xmm subvectors to ymm.
23000
22997
if (Subtarget.hasInt256()) {
23001
- unsigned NumElems = VT.getVectorNumElements();
23002
22998
SDValue Lo = DAG.getIntPtrConstant(0, dl);
23003
- SDValue Hi = DAG.getIntPtrConstant(NumElems / 2, dl);
22999
+ SDValue Hi = DAG.getIntPtrConstant(NumElts / 2, dl);
23004
23000
23005
23001
if (VT == MVT::v32i8) {
23006
23002
if (Subtarget.canExtendTo512BW()) {
@@ -23014,8 +23010,8 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
23014
23010
MVT ExVT = MVT::v16i16;
23015
23011
SDValue ALo = extract128BitVector(A, 0, DAG, dl);
23016
23012
SDValue BLo = extract128BitVector(B, 0, DAG, dl);
23017
- SDValue AHi = extract128BitVector(A, NumElems / 2, DAG, dl);
23018
- SDValue BHi = extract128BitVector(B, NumElems / 2, DAG, dl);
23013
+ SDValue AHi = extract128BitVector(A, NumElts / 2, DAG, dl);
23014
+ SDValue BHi = extract128BitVector(B, NumElts / 2, DAG, dl);
23019
23015
ALo = DAG.getNode(ExAVX, dl, ExVT, ALo);
23020
23016
BLo = DAG.getNode(ExAVX, dl, ExVT, BLo);
23021
23017
AHi = DAG.getNode(ExAVX, dl, ExVT, AHi);
@@ -23054,8 +23050,8 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
23054
23050
assert(VT == MVT::v16i8 &&
23055
23051
"Pre-AVX2 support only supports v16i8 multiplication");
23056
23052
MVT ExVT = MVT::v8i16;
23057
- unsigned ExSSE41 = ISD::MULHU == Opcode ? ISD::ZERO_EXTEND_VECTOR_INREG
23058
- : ISD::SIGN_EXTEND_VECTOR_INREG ;
23053
+ unsigned ExSSE41 = IsSigned ? ISD::SIGN_EXTEND_VECTOR_INREG
23054
+ : ISD::ZERO_EXTEND_VECTOR_INREG ;
23059
23055
23060
23056
// Extract the lo parts and zero/sign extend to i16.
23061
23057
SDValue ALo, BLo;
@@ -23076,7 +23072,7 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
23076
23072
// Extract the hi parts and zero/sign extend to i16.
23077
23073
SDValue AHi, BHi;
23078
23074
if (Subtarget.hasSSE41()) {
23079
- const int ShufMask[] = {8, 9, 10, 11, 12, 13, 14, 15,
23075
+ const int ShufMask[] = { 8, 9, 10, 11, 12, 13, 14, 15,
23080
23076
-1, -1, -1, -1, -1, -1, -1, -1};
23081
23077
AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
23082
23078
BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
0 commit comments