Skip to content

Commit 725a8a5

Browse files
committed
[X86] Custom lower AVX masked loads to masked load and vselect instead of selecting a maskmov+vblend during isel.
AVX masked loads only support 0 as the value for masked off elements. So we need an extra blend to support other values. Previously we expanded the masked load to two instructions with isel patterns. With this patch we now insert the vselect during lowering and it will be separately selected as a blend. llvm-svn: 364718
1 parent 4d0feb2 commit 725a8a5

File tree

2 files changed

+29
-16
lines changed

2 files changed

+29
-16
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1266,7 +1266,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
12661266

12671267
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
12681268
MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1269-
setOperationAction(ISD::MLOAD, VT, Legal);
1269+
setOperationAction(ISD::MLOAD, VT, Custom);
12701270
setOperationAction(ISD::MSTORE, VT, Legal);
12711271
}
12721272

@@ -1412,15 +1412,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
14121412
setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
14131413
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
14141414

1415-
if (!Subtarget.hasVLX()) {
1416-
// With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1417-
// to 512-bit rather than use the AVX2 instructions so that we can use
1418-
// k-masks.
1419-
for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1420-
MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1421-
setOperationAction(ISD::MLOAD, VT, Custom);
1422-
setOperationAction(ISD::MSTORE, VT, Custom);
1423-
}
1415+
// With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1416+
// to 512-bit rather than use the AVX2 instructions so that we can use
1417+
// k-masks.
1418+
for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1419+
MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1420+
setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1421+
setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
14241422
}
14251423

14261424
setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
@@ -26914,8 +26912,28 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
2691426912
MVT VT = Op.getSimpleValueType();
2691526913
MVT ScalarVT = VT.getScalarType();
2691626914
SDValue Mask = N->getMask();
26915+
MVT MaskVT = Mask.getSimpleValueType();
26916+
SDValue PassThru = N->getPassThru();
2691726917
SDLoc dl(Op);
2691826918

26919+
// Handle AVX masked loads which don't support passthru other than 0.
26920+
if (MaskVT.getVectorElementType() != MVT::i1) {
26921+
// We also allow undef in the isel pattern.
26922+
if (PassThru.isUndef() || ISD::isBuildVectorAllZeros(PassThru.getNode()))
26923+
return Op;
26924+
26925+
SDValue NewLoad = DAG.getMaskedLoad(VT, dl, N->getChain(),
26926+
N->getBasePtr(), Mask,
26927+
getZeroVector(VT, Subtarget, DAG, dl),
26928+
N->getMemoryVT(), N->getMemOperand(),
26929+
N->getExtensionType(),
26930+
N->isExpandingLoad());
26931+
// Emit a blend.
26932+
SDValue Select = DAG.getNode(ISD::VSELECT, dl, MaskVT, Mask, NewLoad,
26933+
PassThru);
26934+
return DAG.getMergeValues({ Select, NewLoad.getValue(1) }, dl);
26935+
}
26936+
2691926937
assert((!N->isExpandingLoad() || Subtarget.hasAVX512()) &&
2692026938
"Expanding masked load is supported on AVX-512 target only!");
2692126939

@@ -26934,7 +26952,7 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
2693426952
// VLX the vector should be widened to 512 bit
2693526953
unsigned NumEltsInWideVec = 512 / VT.getScalarSizeInBits();
2693626954
MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec);
26937-
SDValue PassThru = ExtendToType(N->getPassThru(), WideDataVT, DAG);
26955+
PassThru = ExtendToType(PassThru, WideDataVT, DAG);
2693826956

2693926957
// Mask element has to be i1.
2694026958
assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 &&

llvm/lib/Target/X86/X86InstrSSE.td

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7757,11 +7757,6 @@ multiclass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT,
77577757
def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask),
77587758
(VT immAllZerosV))),
77597759
(!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>;
7760-
def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), (VT RC:$src0))),
7761-
(!cast<Instruction>(BlendStr#"rr")
7762-
RC:$src0,
7763-
(VT (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)),
7764-
RC:$mask)>;
77657760
}
77667761
let Predicates = [HasAVX] in {
77677762
defm : maskmov_lowering<"VMASKMOVPS", VR128, v4f32, v4i32, "VBLENDVPS", v4i32>;

0 commit comments

Comments
 (0)