Skip to content

Commit f30f7a0

Browse files
committed
[X86] canonicalizeShuffleWithOp - initial support for shuffle(cvt(x),cvt(y)) -> cvt(shuffle(x,y))
Initial support is just for UNPCKL(CVTPH2PS(X),CVTPH2PS(Y)) -> CVTPH2PS(UNPCKL(X,Y)) Making this more general for other shuffles/conversions will have to be done carefully as we have to handle changes in src/dst element width, so I just handled the CVTPH2PS regression case. Fixes #83414
1 parent 2eb40aa commit f30f7a0

File tree

2 files changed

+25
-20
lines changed

2 files changed

+25
-20
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41500,6 +41500,21 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG,
4150041500
ShuffleVT,
4150141501
DAG.getNode(SrcOpcode, DL, OpVT, DAG.getBitcast(OpVT, Res)));
4150241502
}
41503+
// TODO: We can generalize this for other shuffles/conversions.
41504+
if (Opc == X86ISD::UNPCKL && SrcOpcode == X86ISD::CVTPH2PS &&
41505+
N1.getOpcode() == SrcOpcode &&
41506+
N0.getValueType() == N1.getValueType() &&
41507+
N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
41508+
ShuffleVT.getScalarSizeInBits() == N0.getScalarValueSizeInBits() &&
41509+
IsSafeToMoveShuffle(N0, SrcOpcode) &&
41510+
IsSafeToMoveShuffle(N1, SrcOpcode)) {
41511+
EVT OpSrcVT = N0.getOperand(0).getValueType();
41512+
EVT OpDstVT = N0.getValueType();
41513+
SDValue Res =
41514+
DAG.getNode(Opc, DL, OpSrcVT, N0.getOperand(0), N1.getOperand(0));
41515+
return DAG.getBitcast(ShuffleVT,
41516+
DAG.getNode(SrcOpcode, DL, OpDstVT, Res));
41517+
}
4150341518
}
4150441519
break;
4150541520
}

llvm/test/CodeGen/X86/vector-half-conversions.ll

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4966,22 +4966,18 @@ define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind {
49664966
;
49674967
; F16C-LABEL: fptosi_2f16_to_4i32:
49684968
; F16C: # %bb.0:
4969-
; F16C-NEXT: vpsrld $16, %xmm0, %xmm1
4970-
; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
4971-
; F16C-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
4969+
; F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
4970+
; F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
49724971
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
4973-
; F16C-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
49744972
; F16C-NEXT: vcvttps2dq %xmm0, %xmm0
49754973
; F16C-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
49764974
; F16C-NEXT: retq
49774975
;
49784976
; AVX512-LABEL: fptosi_2f16_to_4i32:
49794977
; AVX512: # %bb.0:
4980-
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
4981-
; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1
4982-
; AVX512-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
4978+
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
4979+
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
49834980
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
4984-
; AVX512-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
49854981
; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0
49864982
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
49874983
; AVX512-NEXT: retq
@@ -5084,11 +5080,9 @@ define <4 x i32> @fptoui_2f16_to_4i32(<2 x half> %a) nounwind {
50845080
;
50855081
; F16C-LABEL: fptoui_2f16_to_4i32:
50865082
; F16C: # %bb.0:
5087-
; F16C-NEXT: vpsrld $16, %xmm0, %xmm1
5088-
; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
5089-
; F16C-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
5083+
; F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
5084+
; F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
50905085
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
5091-
; F16C-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
50925086
; F16C-NEXT: vcvttps2dq %xmm0, %xmm1
50935087
; F16C-NEXT: vpsrad $31, %xmm1, %xmm2
50945088
; F16C-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -5100,23 +5094,19 @@ define <4 x i32> @fptoui_2f16_to_4i32(<2 x half> %a) nounwind {
51005094
;
51015095
; AVX512F-LABEL: fptoui_2f16_to_4i32:
51025096
; AVX512F: # %bb.0:
5103-
; AVX512F-NEXT: vpsrld $16, %xmm0, %xmm1
5104-
; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1
5105-
; AVX512F-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
5097+
; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
5098+
; AVX512F-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
51065099
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
5107-
; AVX512F-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
51085100
; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
51095101
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
51105102
; AVX512F-NEXT: vzeroupper
51115103
; AVX512F-NEXT: retq
51125104
;
51135105
; AVX512-FASTLANE-LABEL: fptoui_2f16_to_4i32:
51145106
; AVX512-FASTLANE: # %bb.0:
5115-
; AVX512-FASTLANE-NEXT: vpsrld $16, %xmm0, %xmm1
5116-
; AVX512-FASTLANE-NEXT: vcvtph2ps %xmm1, %xmm1
5117-
; AVX512-FASTLANE-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
5107+
; AVX512-FASTLANE-NEXT: vxorps %xmm1, %xmm1, %xmm1
5108+
; AVX512-FASTLANE-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
51185109
; AVX512-FASTLANE-NEXT: vcvtph2ps %xmm0, %xmm0
5119-
; AVX512-FASTLANE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
51205110
; AVX512-FASTLANE-NEXT: vcvttps2udq %xmm0, %xmm0
51215111
; AVX512-FASTLANE-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
51225112
; AVX512-FASTLANE-NEXT: retq

0 commit comments

Comments
 (0)