[X86] lowerVECTOR_SHUFFLE - canonicalize zeros/ones/fp splat constants to ensure no undefs #141214

RKSimon · 2025-05-23T09:00:55Z

Make it easier for splat/element-equivalent detection by ensuring constant splats contain no undefs.

Integer constants are limited to rematerializable zeros/ones values to avoid unnecessary scalar_to_vector(int) -> load conversions - we can relax this later if useful

…s to ensure no undefs Make it easier for splat/element-equivalent detection by ensuring constant splats contain no undefs. Integer constants are limited to rematerializable zeros/ones values to avoid unnecessary scalar_to_vector(int) -> load conversions - we can relax this later if useful

llvmbot · 2025-05-23T09:01:28Z

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Make it easier for splat/element-equivalent detection by ensuring constant splats contain no undefs.

Integer constants are limited to rematerializable zeros/ones values to avoid unnecessary scalar_to_vector(int) -> load conversions - we can relax this later if useful

Full diff: https://github.com/llvm/llvm-project/pull/141214.diff

5 Files Affected:

(modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+19)
(modified) llvm/test/CodeGen/X86/pr34592.ll (+2-2)
(modified) llvm/test/CodeGen/X86/pr38639.ll (+2-5)
(modified) llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll (+2-2)
(modified) llvm/test/CodeGen/X86/vector-shuffle-avx512.ll (+1-2)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2ce4fa51692b3..92e980574a187 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18322,6 +18322,25 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget,
          "canonicalizeShuffleMaskWithHorizOp "
          "shouldn't alter the shuffle mask size");
 
+  // Canonicalize zeros/ones/fp splat constants to ensure no undefs.
+  // These will be materialized uniformly anyway, so make splat matching easier.
+  // TODO: Allow all int constants?
+  auto CanonicalizeConstant = [VT, &DL, &DAG](SDValue V) {
+    if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
+      BitVector Undefs;
+      if (SDValue Splat = BV->getSplatValue(&Undefs)) {
+        if (Undefs.any() &&
+            (isNullConstant(Splat) || isAllOnesConstant(Splat) ||
+             isa<ConstantFPSDNode>(Splat))) {
+          V = DAG.getBitcast(VT, DAG.getSplat(BV->getValueType(0), DL, Splat));
+        }
+      }
+    }
+    return V;
+  };
+  V1 = CanonicalizeConstant(V1);
+  V2 = CanonicalizeConstant(V2);
+
   // Commute the shuffle if it will improve canonicalization.
   if (canonicalizeShuffleMaskWithCommute(Mask)) {
     ShuffleVectorSDNode::commuteMask(Mask);
diff --git a/llvm/test/CodeGen/X86/pr34592.ll b/llvm/test/CodeGen/X86/pr34592.ll
index aed5ea3ed217b..7cbdb39ddf860 100644
--- a/llvm/test/CodeGen/X86/pr34592.ll
+++ b/llvm/test/CodeGen/X86/pr34592.ll
@@ -24,12 +24,12 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1
 ; CHECK-O0-NEXT:    vmovaps 48(%rbp), %ymm11
 ; CHECK-O0-NEXT:    vmovaps 16(%rbp), %ymm11
 ; CHECK-O0-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3,4,5],ymm0[6,7]
+; CHECK-O0-NEXT:    vxorps %xmm3, %xmm3, %xmm3
 ; CHECK-O0-NEXT:    vpunpcklqdq {{.*#+}} ymm2 = ymm2[0],ymm1[0],ymm2[2],ymm1[2]
 ; CHECK-O0-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
 ; CHECK-O0-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,1,2,1]
 ; CHECK-O0-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5],ymm0[6,7]
 ; CHECK-O0-NEXT:    vperm2i128 {{.*#+}} ymm2 = ymm7[2,3],ymm6[0,1]
-; CHECK-O0-NEXT:    vxorps %xmm3, %xmm3, %xmm3
 ; CHECK-O0-NEXT:    vpblendd {{.*#+}} ymm2 = ymm2[0,1],ymm3[2,3],ymm2[4,5,6,7]
 ; CHECK-O0-NEXT:    vmovaps %xmm1, %xmm3
 ; CHECK-O0-NEXT:    vmovaps %xmm7, %xmm1
@@ -55,12 +55,12 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1
 ; CHECK-O3-NEXT:    vmovdqa 208(%rbp), %ymm3
 ; CHECK-O3-NEXT:    vmovdqa 144(%rbp), %ymm0
 ; CHECK-O3-NEXT:    vpblendd {{.*#+}} ymm1 = ymm6[0,1,2,3,4,5],ymm2[6,7]
+; CHECK-O3-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; CHECK-O3-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
 ; CHECK-O3-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
 ; CHECK-O3-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[3,1,2,1]
 ; CHECK-O3-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
 ; CHECK-O3-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm7[2,3],ymm6[0,1]
-; CHECK-O3-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; CHECK-O3-NEXT:    vpblendd {{.*#+}} ymm2 = ymm1[0,1],ymm2[2,3],ymm1[4,5,6,7]
 ; CHECK-O3-NEXT:    vpunpcklqdq {{.*#+}} ymm1 = ymm7[0],ymm5[0],ymm7[2],ymm5[2]
 ; CHECK-O3-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[2,1,2,3]
diff --git a/llvm/test/CodeGen/X86/pr38639.ll b/llvm/test/CodeGen/X86/pr38639.ll
index 15cc7581454aa..8eb3da1190285 100644
--- a/llvm/test/CodeGen/X86/pr38639.ll
+++ b/llvm/test/CodeGen/X86/pr38639.ll
@@ -6,11 +6,8 @@ define <8 x double> @test(<4 x double> %a, <4 x double> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [8.2071743224100002E-1,8.2071743224100002E-1,8.2071743224100002E-1,8.2071743224100002E-1]
 ; CHECK-NEXT:    vblendps {{.*#+}} ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; CHECK-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; CHECK-NEXT:    vunpckhpd {{.*#+}} ymm1 = ymm1[1],ymm2[1],ymm1[3],ymm2[3]
-; CHECK-NEXT:    vmovddup {{.*#+}} xmm2 = [8.2071743224100002E-1,8.2071743224100002E-1]
-; CHECK-NEXT:    # xmm2 = mem[0,0]
-; CHECK-NEXT:    vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
+; CHECK-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; CHECK-NEXT:    vunpckhpd {{.*#+}} ymm1 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
 ; CHECK-NEXT:    retq
   %1 = shufflevector <4 x double> %a, <4 x double> <double undef, double 0x3FEA435134576E1C, double 0x3FEA435134576E1C, double 0x3FEA435134576E1C>, <8 x i32> <i32 6, i32 5, i32 2, i32 3, i32 5, i32 1, i32 3, i32 7>
   ret <8 x double> %1
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
index 62f59e918f00c..0eb72c8bc0be4 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
@@ -2402,7 +2402,7 @@ define <4 x float> @shuffle_mem_pmovzx_v4f32(ptr %p0, ptr %p1) {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
 ; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vunpckhps {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vunpcklps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; AVX1-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
 ; AVX1-NEXT:    vmovaps %xmm1, (%rsi)
 ; AVX1-NEXT:    retq
@@ -2411,7 +2411,7 @@ define <4 x float> @shuffle_mem_pmovzx_v4f32(ptr %p0, ptr %p1) {
 ; AVX2OR512VL:       # %bb.0:
 ; AVX2OR512VL-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
 ; AVX2OR512VL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; AVX2OR512VL-NEXT:    vunpckhps {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2OR512VL-NEXT:    vunpcklps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; AVX2OR512VL-NEXT:    vbroadcastss %xmm0, %xmm0
 ; AVX2OR512VL-NEXT:    vmovaps %xmm1, (%rsi)
 ; AVX2OR512VL-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
index 545a9d3e314a2..07498c1233b5d 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
@@ -640,8 +640,7 @@ define <32 x float> @PR47534(<8 x float> %tmp) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT:    vbroadcasti64x4 {{.*#+}} zmm1 = [7,25,26,27,7,29,30,31,7,25,26,27,7,29,30,31]
-; CHECK-NEXT:    # zmm1 = mem[0,1,2,3,0,1,2,3]
+; CHECK-NEXT:    vpmovsxbd {{.*#+}} zmm1 = [7,17,18,19,7,21,22,23,0,25,26,27,0,29,30,31]
 ; CHECK-NEXT:    vpermi2ps %zmm2, %zmm0, %zmm1
 ; CHECK-NEXT:    ret{{[l|q]}}
   %tmp1 = shufflevector <8 x float> %tmp, <8 x float> undef, <32 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>

llvmbot added the backend:X86 label May 23, 2025

RKSimon merged commit 6e574a4 into llvm:main May 23, 2025
13 checks passed

RKSimon deleted the x86-shuffle-uniform-splat-constant branch May 23, 2025 10:02

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[X86] lowerVECTOR_SHUFFLE - canonicalize zeros/ones/fp splat constants to ensure no undefs #141214

[X86] lowerVECTOR_SHUFFLE - canonicalize zeros/ones/fp splat constants to ensure no undefs #141214

Uh oh!

RKSimon commented May 23, 2025

Uh oh!

llvmbot commented May 23, 2025

Uh oh!

Uh oh!

Uh oh!

[X86] lowerVECTOR_SHUFFLE - canonicalize zeros/ones/fp splat constants to ensure no undefs #141214

[X86] lowerVECTOR_SHUFFLE - canonicalize zeros/ones/fp splat constants to ensure no undefs #141214

Uh oh!

Conversation

RKSimon commented May 23, 2025

Uh oh!

llvmbot commented May 23, 2025

Uh oh!

Uh oh!

Uh oh!