Skip to content

Commit 7c3bbfd

Browse files
committed
[X86] lowerShuffleAsLanePermuteAndPermute - simplify lane crossing mask based on demanded elts
Don't demand every element of each demanded sublane - set the undemanded mask elements to UNDEF to allow simplification (usually to a VBROADCAST). Fixes #66150
1 parent 6ad1dd3 commit 7c3bbfd

File tree

5 files changed

+23
-31
lines changed

5 files changed

+23
-31
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14907,6 +14907,7 @@ static SDValue lowerShuffleAsLanePermuteAndPermute(
1490714907
SmallVector<int, 16> InLaneMask(NumElts, SM_SentinelUndef);
1490814908
// CrossLaneMask but one entry == one sublane.
1490914909
SmallVector<int, 16> CrossLaneMaskLarge(NumSublanes, SM_SentinelUndef);
14910+
APInt DemandedCrossLane = APInt::getZero(NumElts);
1491014911

1491114912
for (int i = 0; i != NumElts; ++i) {
1491214913
int M = Mask[i];
@@ -14929,6 +14930,7 @@ static SDValue lowerShuffleAsLanePermuteAndPermute(
1492914930
CrossLaneMaskLarge[DstSublane] = SrcSublane;
1493014931
int DstSublaneOffset = DstSublane * NumEltsPerSublane;
1493114932
InLaneMask[i] = DstSublaneOffset + M % NumEltsPerSublane;
14933+
DemandedCrossLane.setBit(InLaneMask[i]);
1493214934
break;
1493314935
}
1493414936
if (!Found)
@@ -14963,6 +14965,12 @@ static SDValue lowerShuffleAsLanePermuteAndPermute(
1496314965
if (CrossLaneMask == Mask || InLaneMask == Mask)
1496414966
return SDValue();
1496514967

14968+
// Simplify CrossLaneMask based on the actual demanded elements.
14969+
if (V1.hasOneUse())
14970+
for (int i = 0; i != NumElts; ++i)
14971+
if (!DemandedCrossLane[i])
14972+
CrossLaneMask[i] = SM_SentinelUndef;
14973+
1496614974
SDValue CrossLane = DAG.getVectorShuffle(VT, DL, V1, V2, CrossLaneMask);
1496714975
return DAG.getVectorShuffle(VT, DL, CrossLane, DAG.getUNDEF(VT),
1496814976
InLaneMask);

llvm/test/CodeGen/X86/pr40730.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ define <8 x i32> @shuffle_v8i32_0dcd3f14(<8 x i32> %a, <8 x i32> %b) {
2020
; CHECK-NEXT: .quad 0x0000000e0000000d
2121
; CHECK-NEXT: .quad 0x0000000e0000000d
2222
; CHECK-NEXT: .quad 0x0000001000000000
23-
; CHECK-NEXT: .quad 0x0000000e0000000d
23+
; CHECK-NEXT: .zero 8
2424

2525
define <8 x i32> @shuffle_v8i32_0dcd3f14_constant(<8 x i32> %a0) {
2626
; CHECK-LABEL: shuffle_v8i32_0dcd3f14_constant:

llvm/test/CodeGen/X86/vector-replicaton-i1-mask.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3303,7 +3303,7 @@ define void @mask_replication_factor5_vf8(ptr %in.maskvec, ptr %in.vec, ptr %out
33033303
; AVX512BW-ONLY: # %bb.0:
33043304
; AVX512BW-ONLY-NEXT: kmovw (%rdi), %k0
33053305
; AVX512BW-ONLY-NEXT: vpmovm2b %k0, %zmm0
3306-
; AVX512BW-ONLY-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
3306+
; AVX512BW-ONLY-NEXT: vpbroadcastq %xmm0, %zmm0
33073307
; AVX512BW-ONLY-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,19,19,19,19,20,20,20,20,20,21,21,21,21,21,22,22,38,38,38,39,39,39,39,39,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
33083308
; AVX512BW-ONLY-NEXT: vpxor %xmm1, %xmm1, %xmm1
33093309
; AVX512BW-ONLY-NEXT: movabsq $1099511627775, %rax # imm = 0xFFFFFFFFFF
@@ -9332,7 +9332,7 @@ define void @mask_replication_factor7_vf8(ptr %in.maskvec, ptr %in.vec, ptr %out
93329332
; AVX512BW-ONLY: # %bb.0:
93339333
; AVX512BW-ONLY-NEXT: kmovw (%rdi), %k0
93349334
; AVX512BW-ONLY-NEXT: vpmovm2b %k0, %zmm0
9335-
; AVX512BW-ONLY-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
9335+
; AVX512BW-ONLY-NEXT: vpbroadcastq %xmm0, %zmm0
93369336
; AVX512BW-ONLY-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,0,0,0,0,0,0,1,1,1,1,1,1,1,2,2,18,18,18,18,18,19,19,19,19,19,19,19,20,20,20,20,36,36,36,37,37,37,37,37,37,37,38,38,38,38,38,38,54,55,55,55,55,55,55,55,u,u,u,u,u,u,u,u]
93379337
; AVX512BW-ONLY-NEXT: vpxor %xmm1, %xmm1, %xmm1
93389338
; AVX512BW-ONLY-NEXT: movabsq $72057594037927935, %rax # imm = 0xFFFFFFFFFFFFFF
@@ -12935,7 +12935,7 @@ define void @mask_replication_factor8_vf8(ptr %in.maskvec, ptr %in.vec, ptr %out
1293512935
; AVX512BW-ONLY: # %bb.0:
1293612936
; AVX512BW-ONLY-NEXT: kmovq (%rdi), %k0
1293712937
; AVX512BW-ONLY-NEXT: vpmovm2b %k0, %zmm0
12938-
; AVX512BW-ONLY-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
12938+
; AVX512BW-ONLY-NEXT: vpbroadcastq %xmm0, %zmm0
1293912939
; AVX512BW-ONLY-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19,36,36,36,36,36,36,36,36,37,37,37,37,37,37,37,37,54,54,54,54,54,54,54,54,55,55,55,55,55,55,55,55]
1294012940
; AVX512BW-ONLY-NEXT: vpmovb2m %zmm0, %k1
1294112941
; AVX512BW-ONLY-NEXT: kshiftrd $16, %k1, %k2

llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3086,7 +3086,7 @@ define <16 x i16> @shuffle_v16i16_28_zz_zz_zz_29_zz_zz_zz_30_zz_zz_zz_31_zz_zz_z
30863086
;
30873087
; AVX2-LABEL: shuffle_v16i16_28_zz_zz_zz_29_zz_zz_zz_30_zz_zz_zz_31_zz_zz_zz:
30883088
; AVX2: # %bb.0:
3089-
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
3089+
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3]
30903090
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9],zero,zero,zero,zero,zero,zero,ymm0[10,11],zero,zero,zero,zero,zero,zero,ymm0[28,29],zero,zero,zero,zero,zero,zero,ymm0[30,31],zero,zero,zero,zero,zero,zero
30913091
; AVX2-NEXT: retq
30923092
;
@@ -3110,7 +3110,7 @@ define <16 x i16> @shuffle_v16i16_28_zz_zz_zz_29_zz_zz_zz_30_zz_zz_zz_31_zz_zz_z
31103110
;
31113111
; XOPAVX2-LABEL: shuffle_v16i16_28_zz_zz_zz_29_zz_zz_zz_30_zz_zz_zz_31_zz_zz_zz:
31123112
; XOPAVX2: # %bb.0:
3113-
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
3113+
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3]
31143114
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9],zero,zero,zero,zero,zero,zero,ymm0[10,11],zero,zero,zero,zero,zero,zero,ymm0[28,29],zero,zero,zero,zero,zero,zero,ymm0[30,31],zero,zero,zero,zero,zero,zero
31153115
; XOPAVX2-NEXT: retq
31163116
%shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 28, i32 0, i32 0, i32 0, i32 29, i32 0, i32 0, i32 0, i32 30, i32 0, i32 0, i32 0, i32 31, i32 0, i32 0, i32 0>

llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll

Lines changed: 9 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4092,13 +4092,13 @@ define <32 x i8> @shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz
40924092
;
40934093
; AVX2-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz:
40944094
; AVX2: # %bb.0:
4095-
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4095+
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3]
40964096
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8],zero,zero,zero,ymm0[9],zero,zero,zero,ymm0[10],zero,zero,zero,ymm0[11],zero,zero,zero,ymm0[28],zero,zero,zero,ymm0[29],zero,zero,zero,ymm0[30],zero,zero,zero,ymm0[31],zero,zero,zero
40974097
; AVX2-NEXT: retq
40984098
;
40994099
; AVX512VLBW-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz:
41004100
; AVX512VLBW: # %bb.0:
4101-
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4101+
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3]
41024102
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8],zero,zero,zero,ymm0[9],zero,zero,zero,ymm0[10],zero,zero,zero,ymm0[11],zero,zero,zero,ymm0[28],zero,zero,zero,ymm0[29],zero,zero,zero,ymm0[30],zero,zero,zero,ymm0[31],zero,zero,zero
41034103
; AVX512VLBW-NEXT: retq
41044104
;
@@ -4122,7 +4122,7 @@ define <32 x i8> @shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz
41224122
;
41234123
; XOPAVX2-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz:
41244124
; XOPAVX2: # %bb.0:
4125-
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4125+
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3]
41264126
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8],zero,zero,zero,ymm0[9],zero,zero,zero,ymm0[10],zero,zero,zero,ymm0[11],zero,zero,zero,ymm0[28],zero,zero,zero,ymm0[29],zero,zero,zero,ymm0[30],zero,zero,zero,ymm0[31],zero,zero,zero
41274127
; XOPAVX2-NEXT: retq
41284128
%shuffle = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 56, i32 1, i32 2, i32 3, i32 57, i32 5, i32 6, i32 7, i32 58, i32 9, i32 10, i32 11, i32 59, i32 13, i32 14, i32 15, i32 60, i32 17, i32 18, i32 19, i32 61, i32 21, i32 22, i32 23, i32 62, i32 25, i32 26, i32 27, i32 63, i32 29, i32 30, i32 31>
@@ -5141,26 +5141,11 @@ define <4 x i64> @PR66150(ptr %b) {
51415141
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
51425142
; AVX1-NEXT: retq
51435143
;
5144-
; AVX2-LABEL: PR66150:
5145-
; AVX2: # %bb.0:
5146-
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
5147-
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
5148-
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
5149-
; AVX2-NEXT: retq
5150-
;
5151-
; AVX512VLBW-LABEL: PR66150:
5152-
; AVX512VLBW: # %bb.0:
5153-
; AVX512VLBW-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
5154-
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
5155-
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
5156-
; AVX512VLBW-NEXT: retq
5157-
;
5158-
; AVX512VLVBMI-LABEL: PR66150:
5159-
; AVX512VLVBMI: # %bb.0:
5160-
; AVX512VLVBMI-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
5161-
; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3]
5162-
; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0
5163-
; AVX512VLVBMI-NEXT: retq
5144+
; AVX2OR512VL-LABEL: PR66150:
5145+
; AVX2OR512VL: # %bb.0:
5146+
; AVX2OR512VL-NEXT: vpbroadcastd (%rdi), %ymm0
5147+
; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
5148+
; AVX2OR512VL-NEXT: retq
51645149
;
51655150
; XOPAVX1-LABEL: PR66150:
51665151
; XOPAVX1: # %bb.0:
@@ -5174,8 +5159,7 @@ define <4 x i64> @PR66150(ptr %b) {
51745159
;
51755160
; XOPAVX2-LABEL: PR66150:
51765161
; XOPAVX2: # %bb.0:
5177-
; XOPAVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
5178-
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
5162+
; XOPAVX2-NEXT: vpbroadcastd (%rdi), %ymm0
51795163
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
51805164
; XOPAVX2-NEXT: retq
51815165
%tmp1 = load i32, ptr %b, align 4

0 commit comments

Comments
 (0)