Skip to content

Commit d13cd76

Browse files
committed
[X86] SimplifyDemandedVectorEltsForTargetNode - reduce width of X86 conversions nodes when upper elements are not demanded.
Fixes #83402
1 parent 0ea9cdb commit d13cd76

File tree

2 files changed

+27
-11
lines changed

2 files changed

+27
-11
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42518,6 +42518,25 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
4251842518
SDValue Insert =
4251942519
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
4252042520
return TLO.CombineTo(Op, Insert);
42521+
}
42522+
// Conversions.
42523+
case X86ISD::CVTTP2SI:
42524+
case X86ISD::CVTTP2UI:
42525+
case X86ISD::CVTPH2PS: {
42526+
SDLoc DL(Op);
42527+
unsigned Scale = SizeInBits / ExtSizeInBits;
42528+
SDValue SrcOp = Op.getOperand(0);
42529+
MVT SrcVT = SrcOp.getSimpleValueType();
42530+
unsigned SrcExtSize =
42531+
std::max<unsigned>(SrcVT.getSizeInBits() / Scale, 128);
42532+
MVT ExtVT = MVT::getVectorVT(VT.getSimpleVT().getScalarType(),
42533+
ExtSizeInBits / VT.getScalarSizeInBits());
42534+
SDValue ExtOp = TLO.DAG.getNode(
42535+
Opc, DL, ExtVT, extractSubVector(SrcOp, 0, TLO.DAG, DL, SrcExtSize));
42536+
SDValue UndefVec = TLO.DAG.getUNDEF(VT);
42537+
SDValue Insert =
42538+
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
42539+
return TLO.CombineTo(Op, Insert);
4252142540
}
4252242541
// Zero upper elements.
4252342542
case X86ISD::VZEXT_MOVL:

llvm/test/CodeGen/X86/vector-half-conversions.ll

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4990,6 +4990,7 @@ define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind {
49904990
ret <4 x i32> %ext
49914991
}
49924992

4993+
; PR83402
49934994
define <4 x i32> @fptosi_4f16_to_4i32(<4 x half> %a) nounwind {
49944995
; AVX-LABEL: fptosi_4f16_to_4i32:
49954996
; AVX: # %bb.0:
@@ -5024,16 +5025,14 @@ define <4 x i32> @fptosi_4f16_to_4i32(<4 x half> %a) nounwind {
50245025
;
50255026
; F16C-LABEL: fptosi_4f16_to_4i32:
50265027
; F16C: # %bb.0:
5027-
; F16C-NEXT: vcvtph2ps %xmm0, %ymm0
5028+
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
50285029
; F16C-NEXT: vcvttps2dq %xmm0, %xmm0
5029-
; F16C-NEXT: vzeroupper
50305030
; F16C-NEXT: retq
50315031
;
50325032
; AVX512-LABEL: fptosi_4f16_to_4i32:
50335033
; AVX512: # %bb.0:
5034-
; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0
5034+
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
50355035
; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0
5036-
; AVX512-NEXT: vzeroupper
50375036
; AVX512-NEXT: retq
50385037
%cvt = fptosi <4 x half> %a to <4 x i32>
50395038
ret <4 x i32> %cvt
@@ -5213,13 +5212,12 @@ define <4 x i32> @fptoui_4f16_to_4i32(<4 x half> %a) nounwind {
52135212
;
52145213
; F16C-LABEL: fptoui_4f16_to_4i32:
52155214
; F16C: # %bb.0:
5216-
; F16C-NEXT: vcvtph2ps %xmm0, %ymm0
5217-
; F16C-NEXT: vcvttps2dq %ymm0, %ymm1
5218-
; F16C-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
5219-
; F16C-NEXT: vcvttps2dq %ymm0, %ymm0
5215+
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
5216+
; F16C-NEXT: vcvttps2dq %xmm0, %xmm1
5217+
; F16C-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5218+
; F16C-NEXT: vcvttps2dq %xmm0, %xmm0
52205219
; F16C-NEXT: vorps %xmm0, %xmm1, %xmm0
52215220
; F16C-NEXT: vblendvps %xmm1, %xmm0, %xmm1, %xmm0
5222-
; F16C-NEXT: vzeroupper
52235221
; F16C-NEXT: retq
52245222
;
52255223
; AVX512F-LABEL: fptoui_4f16_to_4i32:
@@ -5232,9 +5230,8 @@ define <4 x i32> @fptoui_4f16_to_4i32(<4 x half> %a) nounwind {
52325230
;
52335231
; AVX512-FASTLANE-LABEL: fptoui_4f16_to_4i32:
52345232
; AVX512-FASTLANE: # %bb.0:
5235-
; AVX512-FASTLANE-NEXT: vcvtph2ps %xmm0, %ymm0
5233+
; AVX512-FASTLANE-NEXT: vcvtph2ps %xmm0, %xmm0
52365234
; AVX512-FASTLANE-NEXT: vcvttps2udq %xmm0, %xmm0
5237-
; AVX512-FASTLANE-NEXT: vzeroupper
52385235
; AVX512-FASTLANE-NEXT: retq
52395236
%cvt = fptoui <4 x half> %a to <4 x i32>
52405237
ret <4 x i32> %cvt

0 commit comments

Comments
 (0)