Skip to content

Commit c1cb733

Browse files
committed
[X86] Improve lowering of v16i8->v16i1 truncate under prefer-vector-width=256.
1 parent 42bf075 commit c1cb733

File tree

2 files changed

+17
-14
lines changed

2 files changed

+17
-14
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+13-8
Original file line numberDiff line numberDiff line change
@@ -20382,17 +20382,22 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
2038220382
// trying to avoid 512-bit vectors. If we are avoiding 512-bit vectors
2038320383
// we need to split into two 8 element vectors which we can extend to v8i32,
2038420384
// truncate and concat the results. There's an additional complication if
20385-
// the original type is v16i8. In that case we can't split the v16i8 so
20386-
// first we pre-extend it to v16i16 which we can split to v8i16, then extend
20387-
// to v8i32, truncate that to v8i1 and concat the two halves.
20385+
// the original type is v16i8. In that case we can't split the v16i8
20386+
// directly, so we need to shuffle high elements to low and use
20387+
// sign_extend_vector_inreg.
2038820388
if (NumElts == 16 && !Subtarget.canExtendTo512DQ()) {
20389+
SDValue Lo, Hi;
2038920390
if (InVT == MVT::v16i8) {
20390-
// First we need to sign extend up to 256-bits so we can split that.
20391-
InVT = MVT::v16i16;
20392-
In = DAG.getNode(ISD::SIGN_EXTEND, DL, InVT, In);
20391+
Lo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, MVT::v8i32, In);
20392+
Hi = DAG.getVectorShuffle(
20393+
InVT, DL, In, In,
20394+
{8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1});
20395+
Hi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, MVT::v8i32, Hi);
20396+
} else {
20397+
assert(InVT == MVT::v16i16 && "Unexpected VT!");
20398+
Lo = extract128BitVector(In, 0, DAG, DL);
20399+
Hi = extract128BitVector(In, 8, DAG, DL);
2039320400
}
20394-
SDValue Lo = extract128BitVector(In, 0, DAG, DL);
20395-
SDValue Hi = extract128BitVector(In, 8, DAG, DL);
2039620401
// We're split now, just emit two truncates and a concat. The two
2039720402
// truncates will trigger legalization to come back to this function.
2039820403
Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i1, Lo);

llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll

+4-6
Original file line numberDiff line numberDiff line change
@@ -133,14 +133,12 @@ define <32 x i1> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0
133133
; AVX256VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
134134
; AVX256VL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
135135
; AVX256VL-NEXT: vextracti128 $1, %ymm0, %xmm1
136-
; AVX256VL-NEXT: vpmovsxbw %xmm1, %xmm1
137-
; AVX256VL-NEXT: vpmovsxwd %xmm1, %ymm1
136+
; AVX256VL-NEXT: vpmovsxbd %xmm1, %ymm1
138137
; AVX256VL-NEXT: vptestmd %ymm1, %ymm1, %k1
139-
; AVX256VL-NEXT: vpmovsxbw %xmm0, %ymm0
140-
; AVX256VL-NEXT: vextracti128 $1, %ymm0, %xmm1
141-
; AVX256VL-NEXT: vpmovsxwd %xmm1, %ymm1
138+
; AVX256VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
139+
; AVX256VL-NEXT: vpmovsxbd %xmm1, %ymm1
142140
; AVX256VL-NEXT: vptestmd %ymm1, %ymm1, %k2
143-
; AVX256VL-NEXT: vpmovsxwd %xmm0, %ymm0
141+
; AVX256VL-NEXT: vpmovsxbd %xmm0, %ymm0
144142
; AVX256VL-NEXT: vptestmd %ymm0, %ymm0, %k3
145143
; AVX256VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
146144
; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k3} {z}

0 commit comments

Comments
 (0)