Skip to content

Commit a409ebc

Browse files
authored
[X86][AVX10.2] Support AVX10.2-SATCVT-DS new instructions. (#102592)
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965
1 parent ef7a847 commit a409ebc

31 files changed

+10963
-1
lines changed

clang/include/clang/Basic/BuiltinsX86.def

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2122,6 +2122,36 @@ TARGET_BUILTIN(__builtin_ia32_vpdpwuud256, "V8iV8iV8iV8i", "nV:256:", "avxvnniin
21222122
TARGET_BUILTIN(__builtin_ia32_vpdpwuuds128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16|avx10.2-256")
21232123
TARGET_BUILTIN(__builtin_ia32_vpdpwuuds256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16|avx10.2-256")
21242124

2125+
// AVX10.2 SATCVT-DS
2126+
TARGET_BUILTIN(__builtin_ia32_vcvttsd2sis32, "iV2dIi", "ncV:128:", "avx10.2-256")
2127+
TARGET_BUILTIN(__builtin_ia32_vcvttsd2usis32, "UiV2dIi", "ncV:128:", "avx10.2-256")
2128+
TARGET_BUILTIN(__builtin_ia32_vcvttss2sis32, "iV4fIi", "ncV:128:", "avx10.2-256")
2129+
TARGET_BUILTIN(__builtin_ia32_vcvttss2usis32, "UiV4fIi", "ncV:128:", "avx10.2-256")
2130+
TARGET_BUILTIN(__builtin_ia32_vcvttpd2dqs128_mask, "V4iV2dV4iUc", "nV:128:", "avx10.2-256")
2131+
TARGET_BUILTIN(__builtin_ia32_vcvttpd2dqs256_round_mask, "V4iV4dV4iUcIi", "nV:256:", "avx10.2-256")
2132+
TARGET_BUILTIN(__builtin_ia32_vcvttpd2dqs512_round_mask, "V8iV8dV8iUcIi", "nV:512:", "avx10.2-512")
2133+
TARGET_BUILTIN(__builtin_ia32_vcvttpd2udqs128_mask, "V4iV2dV4iUc", "nV:128:", "avx10.2-256")
2134+
TARGET_BUILTIN(__builtin_ia32_vcvttpd2udqs256_round_mask, "V4iV4dV4iUcIi", "nV:256:", "avx10.2-256")
2135+
TARGET_BUILTIN(__builtin_ia32_vcvttpd2udqs512_round_mask, "V8iV8dV8iUcIi", "nV:512:", "avx10.2-512")
2136+
TARGET_BUILTIN(__builtin_ia32_vcvttpd2qqs128_mask, "V2OiV2dV2OiUc", "nV:128:", "avx10.2-256")
2137+
TARGET_BUILTIN(__builtin_ia32_vcvttpd2qqs256_round_mask, "V4OiV4dV4OiUcIi", "nV:256:", "avx10.2-256")
2138+
TARGET_BUILTIN(__builtin_ia32_vcvttpd2qqs512_round_mask, "V8OiV8dV8OiUcIi", "nV:512:", "avx10.2-512")
2139+
TARGET_BUILTIN(__builtin_ia32_vcvttpd2uqqs128_mask, "V2OiV2dV2OiUc", "nV:128:", "avx10.2-256")
2140+
TARGET_BUILTIN(__builtin_ia32_vcvttpd2uqqs256_round_mask, "V4OiV4dV4OiUcIi", "nV:256:", "avx10.2-256")
2141+
TARGET_BUILTIN(__builtin_ia32_vcvttpd2uqqs512_round_mask, "V8OiV8dV8OiUcIi", "nV:512:", "avx10.2-512")
2142+
TARGET_BUILTIN(__builtin_ia32_vcvttps2dqs128_mask, "V4iV4fV4iUc", "nV:128:", "avx10.2-256")
2143+
TARGET_BUILTIN(__builtin_ia32_vcvttps2dqs256_round_mask, "V8iV8fV8iUcIi", "nV:256:", "avx10.2-256")
2144+
TARGET_BUILTIN(__builtin_ia32_vcvttps2dqs512_round_mask, "V16iV16fV16iUsIi", "nV:512:", "avx10.2-512")
2145+
TARGET_BUILTIN(__builtin_ia32_vcvttps2udqs128_mask, "V4iV4fV4iUc", "nV:128:", "avx10.2-256")
2146+
TARGET_BUILTIN(__builtin_ia32_vcvttps2udqs256_round_mask, "V8iV8fV8iUcIi", "nV:256:", "avx10.2-256")
2147+
TARGET_BUILTIN(__builtin_ia32_vcvttps2udqs512_round_mask, "V16iV16fV16iUsIi", "nV:512:", "avx10.2-512")
2148+
TARGET_BUILTIN(__builtin_ia32_vcvttps2qqs128_mask, "V2OiV4fV2OiUc", "nV:128:", "avx10.2-256")
2149+
TARGET_BUILTIN(__builtin_ia32_vcvttps2qqs256_round_mask, "V4OiV4fV4OiUcIi", "nV:256:", "avx10.2-256")
2150+
TARGET_BUILTIN(__builtin_ia32_vcvttps2qqs512_round_mask, "V8OiV8fV8OiUcIi", "nV:512:", "avx10.2-512")
2151+
TARGET_BUILTIN(__builtin_ia32_vcvttps2uqqs128_mask, "V2OiV4fV2OiUc", "nV:128:", "avx10.2-256")
2152+
TARGET_BUILTIN(__builtin_ia32_vcvttps2uqqs256_round_mask, "V4OiV4fV4OiUcIi", "nV:256:", "avx10.2-256")
2153+
TARGET_BUILTIN(__builtin_ia32_vcvttps2uqqs512_round_mask, "V8OiV8fV8OiUcIi", "nV:512:", "avx10.2-512")
2154+
21252155
// AVX-NE-CONVERT
21262156
TARGET_BUILTIN(__builtin_ia32_vbcstnebf162ps128, "V4fyC*", "nV:128:", "avxneconvert")
21272157
TARGET_BUILTIN(__builtin_ia32_vbcstnebf162ps256, "V8fyC*", "nV:256:", "avxneconvert")

clang/include/clang/Basic/BuiltinsX86_64.def

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,12 @@ TARGET_BUILTIN(__builtin_ia32_vcvttsh2si64, "OiV8xIi", "ncV:128:", "avx512fp16")
9999
TARGET_BUILTIN(__builtin_ia32_vcvttsh2usi64, "UOiV8xIi", "ncV:128:", "avx512fp16")
100100
TARGET_BUILTIN(__builtin_ia32_directstore_u64, "vULi*ULi", "n", "movdiri")
101101

102+
// AVX10.2 SATCVT-DS
103+
TARGET_BUILTIN(__builtin_ia32_vcvttsd2sis64, "OiV2dIi", "ncV:128:", "avx10.2-256")
104+
TARGET_BUILTIN(__builtin_ia32_vcvttsd2usis64, "UOiV2dIi", "ncV:128:", "avx10.2-256")
105+
TARGET_BUILTIN(__builtin_ia32_vcvttss2sis64, "OiV4fIi", "ncV:128:", "avx10.2-256")
106+
TARGET_BUILTIN(__builtin_ia32_vcvttss2usis64, "UOiV4fIi", "ncV:128:", "avx10.2-256")
107+
102108
// UINTR
103109
TARGET_BUILTIN(__builtin_ia32_clui, "v", "n", "uintr")
104110
TARGET_BUILTIN(__builtin_ia32_stui, "v", "n", "uintr")

clang/lib/Headers/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,11 +151,13 @@ set(x86_files
151151
avx10_2_512convertintrin.h
152152
avx10_2_512minmaxintrin.h
153153
avx10_2_512niintrin.h
154+
avx10_2_512satcvtdsintrin.h
154155
avx10_2_512satcvtintrin.h
155156
avx10_2bf16intrin.h
156157
avx10_2convertintrin.h
157158
avx10_2minmaxintrin.h
158159
avx10_2niintrin.h
160+
avx10_2satcvtdsintrin.h
159161
avx10_2satcvtintrin.h
160162
avx2intrin.h
161163
avx512bf16intrin.h
Lines changed: 303 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,303 @@
1+
/*===----- avx10_2_512satcvtdsintrin.h - AVX10_2_512SATCVTDS intrinsics ----===
2+
*
3+
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
* See https://llvm.org/LICENSE.txt for license information.
5+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
*
7+
*===-----------------------------------------------------------------------===
8+
*/
9+
#ifndef __IMMINTRIN_H
10+
#error \
11+
"Never use <avx10_2_512satcvtdsintrin.h> directly; include <immintrin.h> instead."
12+
#endif
13+
14+
#ifndef __AVX10_2_512SATCVTDSINTRIN_H
15+
#define __AVX10_2_512SATCVTDSINTRIN_H
16+
17+
/* Define the default attributes for the functions in this file. */
18+
#define __DEFAULT_FN_ATTRS \
19+
__attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"), \
20+
__min_vector_width__(512)))
21+
22+
// 512 bit : Double -> Int
23+
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi32(__m512d __A) {
24+
return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
25+
(__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
26+
_MM_FROUND_CUR_DIRECTION));
27+
}
28+
29+
static __inline__ __m256i __DEFAULT_FN_ATTRS
30+
_mm512_mask_cvttspd_epi32(__m256i __W, __mmask8 __U, __m512d __A) {
31+
return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
32+
(__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
33+
}
34+
35+
static __inline__ __m256i __DEFAULT_FN_ATTRS
36+
_mm512_maskz_cvttspd_epi32(__mmask8 __U, __m512d __A) {
37+
return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
38+
(__v8df)__A, (__v8si)_mm256_setzero_si256(), __U,
39+
_MM_FROUND_CUR_DIRECTION));
40+
}
41+
42+
#define _mm512_cvtts_roundpd_epi32(__A, __R) \
43+
((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( \
44+
(__v8df)(__m512d)(__A), (__v8si)_mm256_undefined_si256(), \
45+
(__mmask8) - 1, (const int)(__R)))
46+
47+
#define _mm512_mask_cvtts_roundpd_epi32(__W, __U, __A, __R) \
48+
((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( \
49+
(__v8df)(__m512d)(__A), (__v8si)(__m256i)(__W), (__mmask8)(__U), \
50+
(const int)(__R)))
51+
52+
#define _mm512_maskz_cvtts_roundpd_epi32(__U, __A, __R) \
53+
((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( \
54+
(__v8df)(__m512d)(__A), (__v8si)_mm256_setzero_si256(), (__mmask8)(__U), \
55+
(const int)(__R)))
56+
57+
// 512 bit : Double -> uInt
58+
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu32(__m512d __A) {
59+
return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
60+
(__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
61+
_MM_FROUND_CUR_DIRECTION));
62+
}
63+
64+
static __inline__ __m256i __DEFAULT_FN_ATTRS
65+
_mm512_mask_cvttspd_epu32(__m256i __W, __mmask8 __U, __m512d __A) {
66+
return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
67+
(__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
68+
}
69+
70+
static __inline__ __m256i __DEFAULT_FN_ATTRS
71+
_mm512_maskz_cvttspd_epu32(__mmask8 __U, __m512d __A) {
72+
return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
73+
(__v8df)__A, (__v8si)_mm256_setzero_si256(), __U,
74+
_MM_FROUND_CUR_DIRECTION));
75+
}
76+
77+
#define _mm512_cvtts_roundpd_epu32(__A, __R) \
78+
((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( \
79+
(__v8df)(__m512d)(__A), (__v8si)_mm256_undefined_si256(), \
80+
(__mmask8) - 1, (const int)(__R)))
81+
82+
#define _mm512_mask_cvtts_roundpd_epu32(__W, __U, __A, __R) \
83+
((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( \
84+
(__v8df)(__m512d)(__A), (__v8si)(__m256i)(__W), (__mmask8)(__U), \
85+
(const int)(__R)))
86+
87+
#define _mm512_maskz_cvtts_roundpd_epu32(__U, __A, __R) \
88+
((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( \
89+
(__v8df)(__m512d)(__A), (__v8si)_mm256_setzero_si256(), (__mmask8)(__U), \
90+
(const int)(__R)))
91+
92+
// 512 bit : Double -> Long
93+
94+
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi64(__m512d __A) {
95+
return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
96+
(__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
97+
_MM_FROUND_CUR_DIRECTION));
98+
}
99+
static __inline__ __m512i __DEFAULT_FN_ATTRS
100+
_mm512_mask_cvttspd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
101+
return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
102+
(__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
103+
}
104+
static __inline__ __m512i __DEFAULT_FN_ATTRS
105+
_mm512_maskz_cvttspd_epi64(__mmask8 __U, __m512d __A) {
106+
return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
107+
(__v8df)__A, (__v8di)_mm512_setzero_si512(), __U,
108+
_MM_FROUND_CUR_DIRECTION));
109+
}
110+
111+
#define _mm512_cvtts_roundpd_epi64(__A, __R) \
112+
((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( \
113+
(__v8df)(__m512d)(__A), (__v8di)_mm512_undefined_epi32(), \
114+
(__mmask8) - 1, (const int)(__R)))
115+
116+
#define _mm512_mask_cvtts_roundpd_epi64(__W, __U, __A, __R) \
117+
((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( \
118+
(__v8df)(__m512d)(__A), (__v8di)(__m512i)(__W), (__mmask8)(__U), \
119+
(const int)(__R)))
120+
121+
#define _mm512_maskz_cvtts_roundpd_epi64(__U, __A, __R) \
122+
((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( \
123+
(__v8df)(__m512d)(__A), (__v8di)_mm512_setzero_si512(), (__mmask8)(__U), \
124+
(const int)(__R)))
125+
126+
// 512 bit : Double -> ULong
127+
128+
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu64(__m512d __A) {
129+
return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
130+
(__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
131+
_MM_FROUND_CUR_DIRECTION));
132+
}
133+
134+
static __inline__ __m512i __DEFAULT_FN_ATTRS
135+
_mm512_mask_cvttspd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
136+
return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
137+
(__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
138+
}
139+
140+
static __inline__ __m512i __DEFAULT_FN_ATTRS
141+
_mm512_maskz_cvttspd_epu64(__mmask8 __U, __m512d __A) {
142+
return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
143+
(__v8df)__A, (__v8di)_mm512_setzero_si512(), __U,
144+
_MM_FROUND_CUR_DIRECTION));
145+
}
146+
147+
#define _mm512_cvtts_roundpd_epu64(__A, __R) \
148+
((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( \
149+
(__v8df)(__m512d)(__A), (__v8di)_mm512_undefined_epi32(), \
150+
(__mmask8) - 1, (const int)(__R)))
151+
152+
#define _mm512_mask_cvtts_roundpd_epu64(__W, __U, __A, __R) \
153+
((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( \
154+
(__v8df)(__m512d)(__A), (__v8di)(__m512i)(__W), (__mmask8)(__U), \
155+
(const int)(__R)))
156+
157+
#define _mm512_maskz_cvtts_roundpd_epu64(__U, __A, __R) \
158+
((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( \
159+
(__v8df)(__m512d)(__A), (__v8di)_mm512_setzero_si512(), (__mmask8)(__U), \
160+
(const int)(__R)))
161+
162+
// 512 bit: Float -> int
163+
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi32(__m512 __A) {
164+
return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
165+
(__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
166+
_MM_FROUND_CUR_DIRECTION));
167+
}
168+
169+
static __inline__ __m512i __DEFAULT_FN_ATTRS
170+
_mm512_mask_cvttsps_epi32(__m512i __W, __mmask16 __U, __m512 __A) {
171+
return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
172+
(__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION));
173+
}
174+
175+
static __inline__ __m512i __DEFAULT_FN_ATTRS
176+
_mm512_maskz_cvttsps_epi32(__mmask16 __U, __m512 __A) {
177+
return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
178+
(__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U,
179+
_MM_FROUND_CUR_DIRECTION));
180+
}
181+
182+
#define _mm512_cvtts_roundps_epi32(__A, __R) \
183+
((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( \
184+
(__v16sf)(__m512)(__A), (__v16si)_mm512_undefined_epi32(), \
185+
(__mmask16) - 1, (const int)(__R)))
186+
187+
#define _mm512_mask_cvtts_roundps_epi32(__W, __U, __A, __R) \
188+
((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( \
189+
(__v16sf)(__m512)(__A), (__v16si)(__m512i)(__W), (__mmask16)(__U), \
190+
(const int)(__R)))
191+
192+
#define _mm512_maskz_cvtts_roundps_epi32(__U, __A, __R) \
193+
((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( \
194+
(__v16sf)(__m512)(__A), (__v16si)_mm512_setzero_si512(), \
195+
(__mmask16)(__U), (const int)(__R)))
196+
197+
// 512 bit: Float -> uint
198+
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu32(__m512 __A) {
199+
return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
200+
(__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
201+
_MM_FROUND_CUR_DIRECTION));
202+
}
203+
204+
static __inline__ __m512i __DEFAULT_FN_ATTRS
205+
_mm512_mask_cvttsps_epu32(__m512i __W, __mmask16 __U, __m512 __A) {
206+
return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
207+
(__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION));
208+
}
209+
210+
static __inline__ __m512i __DEFAULT_FN_ATTRS
211+
_mm512_maskz_cvttsps_epu32(__mmask16 __U, __m512 __A) {
212+
return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
213+
(__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U,
214+
_MM_FROUND_CUR_DIRECTION));
215+
}
216+
217+
#define _mm512_cvtts_roundps_epu32(__A, __R) \
218+
((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( \
219+
(__v16sf)(__m512)(__A), (__v16si)_mm512_undefined_epi32(), \
220+
(__mmask16) - 1, (const int)(__R)))
221+
222+
#define _mm512_mask_cvtts_roundps_epu32(__W, __U, __A, __R) \
223+
((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( \
224+
(__v16sf)(__m512)(__A), (__v16si)(__m512i)(__W), (__mmask16)(__U), \
225+
(const int)(__R)))
226+
227+
#define _mm512_maskz_cvtts_roundps_epu32(__U, __A, __R) \
228+
((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( \
229+
(__v16sf)(__m512)(__A), (__v16si)_mm512_setzero_si512(), \
230+
(__mmask16)(__U), (const int)(__R)))
231+
232+
// 512 bit : float -> long
233+
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi64(__m256 __A) {
234+
return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
235+
(__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
236+
_MM_FROUND_CUR_DIRECTION));
237+
}
238+
239+
static __inline__ __m512i __DEFAULT_FN_ATTRS
240+
_mm512_mask_cvttsps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
241+
return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
242+
(__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
243+
}
244+
245+
static __inline__ __m512i __DEFAULT_FN_ATTRS
246+
_mm512_maskz_cvttsps_epi64(__mmask8 __U, __m256 __A) {
247+
return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
248+
(__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U,
249+
_MM_FROUND_CUR_DIRECTION));
250+
}
251+
252+
#define _mm512_cvtts_roundps_epi64(__A, __R) \
253+
((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( \
254+
(__v8sf)(__m256)(__A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1, \
255+
(const int)(__R)))
256+
257+
#define _mm512_mask_cvtts_roundps_epi64(__W, __U, __A, __R) \
258+
((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( \
259+
(__v8sf)(__m256)(__A), (__v8di)(__m512i)(__W), (__mmask8)(__U), \
260+
(const int)(__R)))
261+
262+
#define _mm512_maskz_cvtts_roundps_epi64(__U, __A, __R) \
263+
((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( \
264+
(__v8sf)(__m256)(__A), (__v8di)_mm512_setzero_si512(), (__mmask8)(__U), \
265+
(const int)(__R)))
266+
267+
// 512 bit : float -> ulong
268+
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu64(__m256 __A) {
269+
return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
270+
(__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
271+
_MM_FROUND_CUR_DIRECTION));
272+
}
273+
274+
static __inline__ __m512i __DEFAULT_FN_ATTRS
275+
_mm512_mask_cvttsps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
276+
return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
277+
(__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
278+
}
279+
280+
static __inline__ __m512i __DEFAULT_FN_ATTRS
281+
_mm512_maskz_cvttsps_epu64(__mmask8 __U, __m256 __A) {
282+
return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
283+
(__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U,
284+
_MM_FROUND_CUR_DIRECTION));
285+
}
286+
287+
#define _mm512_cvtts_roundps_epu64(__A, __R) \
288+
((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( \
289+
(__v8sf)(__m256)(__A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1, \
290+
(const int)(__R)))
291+
292+
#define _mm512_mask_cvtts_roundps_epu64(__W, __U, __A, __R) \
293+
((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( \
294+
(__v8sf)(__m256)(__A), (__v8di)(__m512i)(__W), (__mmask8)(__U), \
295+
(const int)(__R)))
296+
297+
#define _mm512_maskz_cvtts_roundps_epu64(__U, __A, __R) \
298+
((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( \
299+
(__v8sf)(__m256)(__A), (__v8di)_mm512_setzero_si512(), (__mmask8)(__U), \
300+
(const int)(__R)))
301+
302+
#undef __DEFAULT_FN_ATTRS
303+
#endif // __AVX10_2_512SATCVTDSINTRIN_H

0 commit comments

Comments
 (0)