Skip to content

[X86] lowerV4F64Shuffle - prefer BLEND before UNPCK shuffle matching #141073

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 22, 2025

Conversation

RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented May 22, 2025

Same matching order as other 128/256-bit shuffles

Fixes regression identified in #139741

Same matching order as other 128/256-bit shuffles

Fixes regression identified in llvm#139741
@llvmbot
Copy link
Member

llvmbot commented May 22, 2025

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Same matching order as other 128/256-bit shuffles

Fixes regression identified in #139741


Full diff: https://github.com/llvm/llvm-project/pull/141073.diff

3 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+4-5)
  • (modified) llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll (+1-1)
  • (modified) llvm/test/CodeGen/X86/subvector-broadcast.ll (+38-11)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7abc854454348..38be3a82af658 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -16444,15 +16444,14 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
                                                DAG, Subtarget);
   }
 
-  // Use dedicated unpack instructions for masks that match their pattern.
-  if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4f64, V1, V2, Mask, DAG))
-    return V;
-
   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
                                           Zeroable, Subtarget, DAG))
     return Blend;
 
-  // Check if the blend happens to exactly fit that of SHUFPD.
+  // Use dedicated unpack instructions for masks that match their pattern.
+  if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4f64, V1, V2, Mask, DAG))
+    return V;
+
   if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v4f64, V1, V2, Mask,
                                           Zeroable, Subtarget, DAG))
     return Op;
diff --git a/llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll b/llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll
index 45df725d7a78c..0cdc5458e71ca 100644
--- a/llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll
+++ b/llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll
@@ -7,7 +7,7 @@ define void @foo(<2 x float> %0) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
+; CHECK-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
 ; CHECK-NEXT:    vcvtps2phx %ymm0, %xmm0
 ; CHECK-NEXT:    vmovlps %xmm0, 0
 ; CHECK-NEXT:    vzeroupper
diff --git a/llvm/test/CodeGen/X86/subvector-broadcast.ll b/llvm/test/CodeGen/X86/subvector-broadcast.ll
index 76183ac5f8fa3..75333bf835f89 100644
--- a/llvm/test/CodeGen/X86/subvector-broadcast.ll
+++ b/llvm/test/CodeGen/X86/subvector-broadcast.ll
@@ -1662,19 +1662,46 @@ define <4 x double> @broadcast_v4f64_v2f64_4u61(ptr %vp, <4 x double> %default)
   ret <4 x double> %res
 }
 
+; TODO: prefer vblend vs vunpckh on AVX1 targets
 define <8 x float> @broadcast_v8f32_v2f32_u1uu0uEu(ptr %vp, <8 x float> %default) {
-; X86-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    vbroadcastsd (%eax), %ymm1
-; X86-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
-; X86-NEXT:    retl
+; X86-AVX1-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX1-NEXT:    vbroadcastsd (%eax), %ymm1
+; X86-AVX1-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
+; X86-AVX1-NEXT:    retl
 ;
-; X64-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
-; X64:       # %bb.0:
-; X64-NEXT:    vbroadcastsd (%rdi), %ymm1
-; X64-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
-; X64-NEXT:    retq
+; X86-AVX2-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX2-NEXT:    vbroadcastsd (%eax), %ymm1
+; X86-AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
+; X86-AVX2-NEXT:    retl
+;
+; X86-AVX512-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
+; X86-AVX512:       # %bb.0:
+; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512-NEXT:    vbroadcastsd (%eax), %ymm1
+; X86-AVX512-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
+; X86-AVX512-NEXT:    retl
+;
+; X64-AVX1-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vbroadcastsd (%rdi), %ymm1
+; X64-AVX1-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vbroadcastsd (%rdi), %ymm1
+; X64-AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512-LABEL: broadcast_v8f32_v2f32_u1uu0uEu:
+; X64-AVX512:       # %bb.0:
+; X64-AVX512-NEXT:    vbroadcastsd (%rdi), %ymm1
+; X64-AVX512-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
+; X64-AVX512-NEXT:    retq
   %vec = load <2 x float>, ptr %vp
   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 undef, i32 1, i32 undef, i32 undef, i32 0, i32 2, i32 3, i32 undef>
   %res = select <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1>, <8 x float> %shuf, <8 x float> %default

@RKSimon RKSimon merged commit 9a77af3 into llvm:main May 22, 2025
11 of 13 checks passed
@RKSimon RKSimon deleted the x86-v4f64-prefer-blend-vs-unpck branch May 22, 2025 15:51
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants