Skip to content

Commit c4eec9e

Browse files
authored
[X86] combineConcatVectorOps - add concatenation handling for consecutive extracts of upper subvectors (#132389)
We already fold concat(extract_subvector(x,0), extract_subvector(x,numsubelts)) -> (wider lower half) extract_subvector(x,0) This patch extends this handling for the concat(extract_subvector(x,c), extract_subvector(x,c+numsubelts)) -> (wider upper half) extract_subvector(x,c) case as well
1 parent 857a04c commit c4eec9e

File tree

3 files changed

+13
-14
lines changed

3 files changed

+13
-14
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57955,13 +57955,19 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5795557955
DAG.getBitcast(VT, Src1.getOperand(0)),
5795657956
DAG.getTargetConstant(0x31, DL, MVT::i8));
5795757957
}
57958-
// concat(extract_subvector(x,lo), extract_subvector(x,hi)) -> x.
57958+
// Widen extract_subvector
57959+
// concat(extract_subvector(x,lo), extract_subvector(x,hi))
57960+
// --> extract_subvector(x,lo)
57961+
unsigned NumSubElts0 = Src0.getValueType().getVectorNumElements();
5795957962
if (Src0.getOperand(0) == Src1.getOperand(0) &&
57960-
Src0.getConstantOperandAPInt(1) == 0 &&
57963+
(Src0.getConstantOperandAPInt(1) == 0 ||
57964+
Src0.getConstantOperandAPInt(1) == (NumSrcElts0 / 2)) &&
5796157965
Src1.getConstantOperandAPInt(1) ==
57962-
Src0.getValueType().getVectorNumElements()) {
57963-
return DAG.getBitcast(VT, extractSubVector(Src0.getOperand(0), 0, DAG,
57964-
DL, VT.getSizeInBits()));
57966+
(Src0.getConstantOperandAPInt(1) + NumSubElts0)) {
57967+
return DAG.getBitcast(VT,
57968+
extractSubVector(Src0.getOperand(0),
57969+
Src0.getConstantOperandVal(1),
57970+
DAG, DL, VT.getSizeInBits()));
5796557971
}
5796657972
}
5796757973
}

llvm/test/CodeGen/X86/matrix-multiply.ll

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,10 +1036,6 @@ define <16 x float> @test_mul4x4_f32(<16 x float> %a0, <16 x float> %a1) nounwin
10361036
;
10371037
; AVX512-LABEL: test_mul4x4_f32:
10381038
; AVX512: # %bb.0: # %entry
1039-
; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm2
1040-
; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm3
1041-
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
1042-
; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
10431039
; AVX512-NEXT: vshufps {{.*#+}} zmm2 = zmm1[1,1,1,1,5,5,5,5,9,9,9,9,13,13,13,13]
10441040
; AVX512-NEXT: vshuff64x2 {{.*#+}} zmm3 = zmm0[2,3,2,3,2,3,2,3]
10451041
; AVX512-NEXT: vmulps %zmm2, %zmm3, %zmm2

llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -997,15 +997,12 @@ define <8 x double> @concat_vpermilvar_v8f64_v2f64(<2 x double> %a0, <2 x double
997997
;
998998
; X64-LABEL: concat_vpermilvar_v8f64_v2f64:
999999
; X64: # %bb.0:
1000+
; X64-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
10001001
; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1001-
; X64-NEXT: vextractf32x4 $2, %zmm4, %xmm5
1002-
; X64-NEXT: vextractf32x4 $3, %zmm4, %xmm6
1003-
; X64-NEXT: vpermilpd %xmm5, %xmm2, %xmm2
1004-
; X64-NEXT: vpermilpd %xmm6, %xmm3, %xmm3
10051002
; X64-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
10061003
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1007-
; X64-NEXT: vpermilpd %ymm4, %ymm0, %ymm0
10081004
; X64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
1005+
; X64-NEXT: vpermilpd %zmm4, %zmm0, %zmm0
10091006
; X64-NEXT: retq
10101007
%m0 = shufflevector <8 x i64> %m, <8 x i64> poison, <2 x i32> <i32 0, i32 1>
10111008
%m1 = shufflevector <8 x i64> %m, <8 x i64> poison, <2 x i32> <i32 2, i32 3>

0 commit comments

Comments
 (0)