Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit c67db1e

Browse files
committed
[AVX-512] Lower SSE/AVX cvtdq2ps intrinsics directly to ISD::SINT_TO_FP so they can use EVEX instructions when available.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@286056 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 04b2a60 commit c67db1e

File tree

4 files changed

+29
-30
lines changed

4 files changed

+29
-30
lines changed

lib/Target/X86/X86InstrSSE.td

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2031,13 +2031,6 @@ def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
20312031
(int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
20322032
IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
20332033

2034-
let Predicates = [HasAVX] in {
2035-
def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
2036-
(VCVTDQ2PSrr VR128:$src)>;
2037-
def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (loadv2i64 addr:$src))),
2038-
(VCVTDQ2PSrm addr:$src)>;
2039-
}
2040-
20412034
let Predicates = [HasAVX, NoVLX] in {
20422035
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
20432036
(VCVTDQ2PSrr VR128:$src)>;
@@ -2066,11 +2059,6 @@ let Predicates = [UseSSE2] in {
20662059
def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
20672060
(CVTDQ2PSrm addr:$src)>;
20682061

2069-
def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
2070-
(CVTDQ2PSrr VR128:$src)>;
2071-
def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))),
2072-
(CVTDQ2PSrm addr:$src)>;
2073-
20742062
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
20752063
(CVTTPS2DQrr VR128:$src)>;
20762064
def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))),
@@ -2263,12 +2251,6 @@ def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
22632251
// AVX 256-bit register conversion intrinsics
22642252
// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
22652253
// whenever possible to avoid declaring two versions of each one.
2266-
let Predicates = [HasAVX] in {
2267-
def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
2268-
(VCVTDQ2PSYrr VR256:$src)>;
2269-
def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (loadv4i64 addr:$src))),
2270-
(VCVTDQ2PSYrm addr:$src)>;
2271-
}
22722254

22732255
let Predicates = [HasAVX, NoVLX] in {
22742256
// Match fpround and fpextend for 128/256-bit conversions

lib/Target/X86/X86IntrinsicsInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ static const IntrinsicData* getIntrinsicWithChain(uint16_t IntNo) {
251251
* the alphabetical order.
252252
*/
253253
static const IntrinsicData IntrinsicsWithoutChain[] = {
254+
X86_INTRINSIC_DATA(avx_cvtdq2_ps_256, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0),
254255
X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
255256
X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
256257
X86_INTRINSIC_DATA(avx_hsub_pd_256, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
@@ -1749,6 +1750,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
17491750
X86_INTRINSIC_DATA(sse2_comile_sd, COMI, X86ISD::COMI, ISD::SETLE),
17501751
X86_INTRINSIC_DATA(sse2_comilt_sd, COMI, X86ISD::COMI, ISD::SETLT),
17511752
X86_INTRINSIC_DATA(sse2_comineq_sd, COMI, X86ISD::COMI, ISD::SETNE),
1753+
X86_INTRINSIC_DATA(sse2_cvtdq2ps, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0),
17521754
X86_INTRINSIC_DATA(sse2_cvtpd2ps, INTR_TYPE_1OP, X86ISD::VFPROUND, 0),
17531755
X86_INTRINSIC_DATA(sse2_max_pd, INTR_TYPE_2OP, X86ISD::FMAX, 0),
17541756
X86_INTRINSIC_DATA(sse2_min_pd, INTR_TYPE_2OP, X86ISD::FMIN, 0),

test/CodeGen/X86/avx-intrinsics-x86.ll

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -230,10 +230,15 @@ declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readn
230230

231231

232232
define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
233-
; CHECK-LABEL: test_x86_sse2_cvtdq2ps:
234-
; CHECK: ## BB#0:
235-
; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x5b,0xc0]
236-
; CHECK-NEXT: retl ## encoding: [0xc3]
233+
; AVX-LABEL: test_x86_sse2_cvtdq2ps:
234+
; AVX: ## BB#0:
235+
; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x5b,0xc0]
236+
; AVX-NEXT: retl ## encoding: [0xc3]
237+
;
238+
; AVX512VL-LABEL: test_x86_sse2_cvtdq2ps:
239+
; AVX512VL: ## BB#0:
240+
; AVX512VL-NEXT: vcvtdq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5b,0xc0]
241+
; AVX512VL-NEXT: retl ## encoding: [0xc3]
237242
%res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
238243
ret <4 x float> %res
239244
}
@@ -2856,10 +2861,15 @@ declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
28562861

28572862

28582863
define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
2859-
; CHECK-LABEL: test_x86_avx_cvtdq2_ps_256:
2860-
; CHECK: ## BB#0:
2861-
; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 ## encoding: [0xc5,0xfc,0x5b,0xc0]
2862-
; CHECK-NEXT: retl ## encoding: [0xc3]
2864+
; AVX-LABEL: test_x86_avx_cvtdq2_ps_256:
2865+
; AVX: ## BB#0:
2866+
; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0 ## encoding: [0xc5,0xfc,0x5b,0xc0]
2867+
; AVX-NEXT: retl ## encoding: [0xc3]
2868+
;
2869+
; AVX512VL-LABEL: test_x86_avx_cvtdq2_ps_256:
2870+
; AVX512VL: ## BB#0:
2871+
; AVX512VL-NEXT: vcvtdq2ps %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5b,0xc0]
2872+
; AVX512VL-NEXT: retl ## encoding: [0xc3]
28632873
%res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1]
28642874
ret <8 x float> %res
28652875
}

test/CodeGen/X86/sse2-intrinsics-x86.ll

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -231,10 +231,15 @@ define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
231231
; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ## encoding: [0x0f,0x5b,0xc0]
232232
; SSE-NEXT: retl ## encoding: [0xc3]
233233
;
234-
; VCHECK-LABEL: test_x86_sse2_cvtdq2ps:
235-
; VCHECK: ## BB#0:
236-
; VCHECK-NEXT: vcvtdq2ps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x5b,0xc0]
237-
; VCHECK-NEXT: retl ## encoding: [0xc3]
234+
; AVX2-LABEL: test_x86_sse2_cvtdq2ps:
235+
; AVX2: ## BB#0:
236+
; AVX2-NEXT: vcvtdq2ps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x5b,0xc0]
237+
; AVX2-NEXT: retl ## encoding: [0xc3]
238+
;
239+
; SKX-LABEL: test_x86_sse2_cvtdq2ps:
240+
; SKX: ## BB#0:
241+
; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5b,0xc0]
242+
; SKX-NEXT: retl ## encoding: [0xc3]
238243
%res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
239244
ret <4 x float> %res
240245
}

0 commit comments

Comments
 (0)