Skip to content

Commit 6e574a4

Browse files
authored
[X86] lowerVECTOR_SHUFFLE - canonicalize zeros/ones/fp splat constants to ensure no undefs (#141214)
Make it easier for splat/element-equivalent detection by ensuring constant splats contain no undefs. Integer constants are limited to rematerializable zeros/ones values to avoid unnecessary scalar_to_vector(int) -> load conversions - we can relax this later if useful
1 parent 896ea58 commit 6e574a4

File tree

5 files changed

+26
-11
lines changed

5 files changed

+26
-11
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18322,6 +18322,25 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget,
1832218322
"canonicalizeShuffleMaskWithHorizOp "
1832318323
"shouldn't alter the shuffle mask size");
1832418324

18325+
// Canonicalize zeros/ones/fp splat constants to ensure no undefs.
18326+
// These will be materialized uniformly anyway, so make splat matching easier.
18327+
// TODO: Allow all int constants?
18328+
auto CanonicalizeConstant = [VT, &DL, &DAG](SDValue V) {
18329+
if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
18330+
BitVector Undefs;
18331+
if (SDValue Splat = BV->getSplatValue(&Undefs)) {
18332+
if (Undefs.any() &&
18333+
(isNullConstant(Splat) || isAllOnesConstant(Splat) ||
18334+
isa<ConstantFPSDNode>(Splat))) {
18335+
V = DAG.getBitcast(VT, DAG.getSplat(BV->getValueType(0), DL, Splat));
18336+
}
18337+
}
18338+
}
18339+
return V;
18340+
};
18341+
V1 = CanonicalizeConstant(V1);
18342+
V2 = CanonicalizeConstant(V2);
18343+
1832518344
// Commute the shuffle if it will improve canonicalization.
1832618345
if (canonicalizeShuffleMaskWithCommute(Mask)) {
1832718346
ShuffleVectorSDNode::commuteMask(Mask);

llvm/test/CodeGen/X86/pr34592.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1
2424
; CHECK-O0-NEXT: vmovaps 48(%rbp), %ymm11
2525
; CHECK-O0-NEXT: vmovaps 16(%rbp), %ymm11
2626
; CHECK-O0-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3,4,5],ymm0[6,7]
27+
; CHECK-O0-NEXT: vxorps %xmm3, %xmm3, %xmm3
2728
; CHECK-O0-NEXT: vpunpcklqdq {{.*#+}} ymm2 = ymm2[0],ymm1[0],ymm2[2],ymm1[2]
2829
; CHECK-O0-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
2930
; CHECK-O0-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,2,1]
3031
; CHECK-O0-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5],ymm0[6,7]
3132
; CHECK-O0-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm7[2,3],ymm6[0,1]
32-
; CHECK-O0-NEXT: vxorps %xmm3, %xmm3, %xmm3
3333
; CHECK-O0-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1],ymm3[2,3],ymm2[4,5,6,7]
3434
; CHECK-O0-NEXT: vmovaps %xmm1, %xmm3
3535
; CHECK-O0-NEXT: vmovaps %xmm7, %xmm1
@@ -55,12 +55,12 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1
5555
; CHECK-O3-NEXT: vmovdqa 208(%rbp), %ymm3
5656
; CHECK-O3-NEXT: vmovdqa 144(%rbp), %ymm0
5757
; CHECK-O3-NEXT: vpblendd {{.*#+}} ymm1 = ymm6[0,1,2,3,4,5],ymm2[6,7]
58+
; CHECK-O3-NEXT: vpxor %xmm2, %xmm2, %xmm2
5859
; CHECK-O3-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
5960
; CHECK-O3-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
6061
; CHECK-O3-NEXT: vpermq {{.*#+}} ymm1 = ymm1[3,1,2,1]
6162
; CHECK-O3-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
6263
; CHECK-O3-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm7[2,3],ymm6[0,1]
63-
; CHECK-O3-NEXT: vpxor %xmm2, %xmm2, %xmm2
6464
; CHECK-O3-NEXT: vpblendd {{.*#+}} ymm2 = ymm1[0,1],ymm2[2,3],ymm1[4,5,6,7]
6565
; CHECK-O3-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm7[0],ymm5[0],ymm7[2],ymm5[2]
6666
; CHECK-O3-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,1,2,3]

llvm/test/CodeGen/X86/pr38639.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,8 @@ define <8 x double> @test(<4 x double> %a, <4 x double> %b) {
66
; CHECK: # %bb.0:
77
; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [8.2071743224100002E-1,8.2071743224100002E-1,8.2071743224100002E-1,8.2071743224100002E-1]
88
; CHECK-NEXT: vblendps {{.*#+}} ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
9-
; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
10-
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],ymm2[1],ymm1[3],ymm2[3]
11-
; CHECK-NEXT: vmovddup {{.*#+}} xmm2 = [8.2071743224100002E-1,8.2071743224100002E-1]
12-
; CHECK-NEXT: # xmm2 = mem[0,0]
13-
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
9+
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
10+
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
1411
; CHECK-NEXT: retq
1512
%1 = shufflevector <4 x double> %a, <4 x double> <double undef, double 0x3FEA435134576E1C, double 0x3FEA435134576E1C, double 0x3FEA435134576E1C>, <8 x i32> <i32 6, i32 5, i32 2, i32 3, i32 5, i32 1, i32 3, i32 7>
1613
ret <8 x double> %1

llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2402,7 +2402,7 @@ define <4 x float> @shuffle_mem_pmovzx_v4f32(ptr %p0, ptr %p1) {
24022402
; AVX1: # %bb.0:
24032403
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
24042404
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
2405-
; AVX1-NEXT: vunpckhps {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2405+
; AVX1-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
24062406
; AVX1-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
24072407
; AVX1-NEXT: vmovaps %xmm1, (%rsi)
24082408
; AVX1-NEXT: retq
@@ -2411,7 +2411,7 @@ define <4 x float> @shuffle_mem_pmovzx_v4f32(ptr %p0, ptr %p1) {
24112411
; AVX2OR512VL: # %bb.0:
24122412
; AVX2OR512VL-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
24132413
; AVX2OR512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
2414-
; AVX2OR512VL-NEXT: vunpckhps {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2414+
; AVX2OR512VL-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
24152415
; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %xmm0
24162416
; AVX2OR512VL-NEXT: vmovaps %xmm1, (%rsi)
24172417
; AVX2OR512VL-NEXT: retq

llvm/test/CodeGen/X86/vector-shuffle-avx512.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -640,8 +640,7 @@ define <32 x float> @PR47534(<8 x float> %tmp) {
640640
; CHECK: # %bb.0:
641641
; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
642642
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
643-
; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [7,25,26,27,7,29,30,31,7,25,26,27,7,29,30,31]
644-
; CHECK-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
643+
; CHECK-NEXT: vpmovsxbd {{.*#+}} zmm1 = [7,17,18,19,7,21,22,23,0,25,26,27,0,29,30,31]
645644
; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1
646645
; CHECK-NEXT: ret{{[l|q]}}
647646
%tmp1 = shufflevector <8 x float> %tmp, <8 x float> undef, <32 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>

0 commit comments

Comments
 (0)