Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit 197ca31

Browse files
committed
[AVX-512] Lower AVX cvtpd2ps intrinsic to ISD::FP_ROUND so it can use EVEX instruction when available.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@286057 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent c67db1e commit 197ca31

File tree

4 files changed

+16
-13
lines changed

4 files changed

+16
-13
lines changed

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18062,7 +18062,12 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
1806218062
Src1, Src2, Src3, Src4),
1806318063
Mask, PassThru, Subtarget, DAG);
1806418064
}
18065-
case CVTPD2PS: {
18065+
case CVTPD2PS:
18066+
// ISD::FP_ROUND has a second argument that indicates if the truncation
18067+
// does not change the value. Set it to 0 since it can change.
18068+
return DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1),
18069+
DAG.getIntPtrConstant(0, dl));
18070+
case CVTPD2PS_MASK: {
1806618071
SDValue Src = Op.getOperand(1);
1806718072
SDValue PassThru = Op.getOperand(2);
1806818073
SDValue Mask = Op.getOperand(3);

lib/Target/X86/X86InstrSSE.td

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2228,16 +2228,16 @@ def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
22282228
[], IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>;
22292229

22302230
// YMM only
2231+
let Predicates = [HasAVX, NoVLX] in {
22312232
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
22322233
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
2233-
[(set VR128:$dst,
2234-
(int_x86_avx_cvt_pd2_ps_256 VR256:$src))],
2234+
[(set VR128:$dst, (fpround VR256:$src))],
22352235
IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>;
22362236
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
22372237
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
2238-
[(set VR128:$dst,
2239-
(int_x86_avx_cvt_pd2_ps_256 (loadv4f64 addr:$src)))],
2238+
[(set VR128:$dst, (fpround (loadv4f64 addr:$src)))],
22402239
IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
2240+
}
22412241
def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}",
22422242
(VCVTPD2PSYrr VR128:$dst, VR256:$src), 0>;
22432243

@@ -2261,10 +2261,6 @@ let Predicates = [HasAVX, NoVLX] in {
22612261
(VCVTPD2PSrr VR128:$src)>;
22622262
def : Pat<(v4f32 (X86vfpround (loadv2f64 addr:$src))),
22632263
(VCVTPD2PSXrm addr:$src)>;
2264-
def : Pat<(v4f32 (fpround (v4f64 VR256:$src))),
2265-
(VCVTPD2PSYrr VR256:$src)>;
2266-
def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
2267-
(VCVTPD2PSYrm addr:$src)>;
22682264

22692265
def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
22702266
(VCVTPS2PDrr VR128:$src)>;

lib/Target/X86/X86IntrinsicsInfo.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ enum IntrinsicType : uint16_t {
2323
INTR_NO_TYPE,
2424
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, XGETBV, ADX, FPCLASS, FPCLASSS,
2525
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
26-
CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM, CVTPD2PS,
26+
CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM,
27+
CVTPD2PS, CVTPD2PS_MASK,
2728
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
2829
INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_2OP_IMM8_MASK,
2930
INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK,
@@ -251,6 +252,7 @@ static const IntrinsicData* getIntrinsicWithChain(uint16_t IntNo) {
251252
* the alphabetical order.
252253
*/
253254
static const IntrinsicData IntrinsicsWithoutChain[] = {
255+
X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,CVTPD2PS, ISD::FP_ROUND, 0),
254256
X86_INTRINSIC_DATA(avx_cvtdq2_ps_256, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0),
255257
X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
256258
X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
@@ -503,9 +505,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
503505
X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
504506
X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps, INTR_TYPE_1OP_MASK,
505507
X86ISD::VFPROUND, 0),
506-
X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_256, CVTPD2PS,
508+
X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_256, CVTPD2PS_MASK,
507509
ISD::FP_ROUND, 0),
508-
X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS,
510+
X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS_MASK,
509511
ISD::FP_ROUND, X86ISD::VFPROUND_RND),
510512
X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_128, INTR_TYPE_1OP_MASK,
511513
X86ISD::CVTP2SI, 0),

test/CodeGen/X86/avx-intrinsics-x86.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2824,7 +2824,7 @@ define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) {
28242824
;
28252825
; AVX512VL-LABEL: test_x86_avx_cvt_pd2_ps_256:
28262826
; AVX512VL: ## BB#0:
2827-
; AVX512VL-NEXT: vcvtpd2psy %ymm0, %xmm0 ## encoding: [0xc5,0xfd,0x5a,0xc0]
2827+
; AVX512VL-NEXT: vcvtpd2ps %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x5a,0xc0]
28282828
; AVX512VL-NEXT: retl ## encoding: [0xc3]
28292829
%res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1]
28302830
ret <4 x float> %res

0 commit comments

Comments
 (0)