Skip to content

Commit 73c9ad2

Browse files
authored
[clang][x86] Add constexpr support for some basic SSE1 intrinsics (#111001)
This is an initial patch to enable constexpr support on the more basic SSE1 intrinsics - such as initialization, arithmetic, logic and fixed shuffles. The plan is to incrementally extend this for SSE2/AVX etc. - initially for the equivalent basic intrinsics, but we can add support for some of the ia32 builtins as well we the need arises.
1 parent 5dc7a5e commit 73c9ad2

File tree

2 files changed

+141
-72
lines changed

2 files changed

+141
-72
lines changed

clang/lib/Headers/xmmintrin.h

Lines changed: 58 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,14 @@ typedef unsigned int __v4su __attribute__((__vector_size__(16)));
4848
__min_vector_width__(128)))
4949
#endif
5050

51+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
52+
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
53+
#define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2 constexpr
54+
#else
55+
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
56+
#define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2
57+
#endif
58+
5159
#define __trunc64(x) \
5260
(__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
5361
#define __zext128(x) \
@@ -75,9 +83,8 @@ typedef unsigned int __v4su __attribute__((__vector_size__(16)));
7583
/// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the sum
7684
/// of the lower 32 bits of both operands. The upper 96 bits are copied from
7785
/// the upper 96 bits of the first source operand.
78-
static __inline__ __m128 __DEFAULT_FN_ATTRS
79-
_mm_add_ss(__m128 __a, __m128 __b)
80-
{
86+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
87+
_mm_add_ss(__m128 __a, __m128 __b) {
8188
__a[0] += __b[0];
8289
return __a;
8390
}
@@ -95,9 +102,8 @@ _mm_add_ss(__m128 __a, __m128 __b)
95102
/// A 128-bit vector of [4 x float] containing one of the source operands.
96103
/// \returns A 128-bit vector of [4 x float] containing the sums of both
97104
/// operands.
98-
static __inline__ __m128 __DEFAULT_FN_ATTRS
99-
_mm_add_ps(__m128 __a, __m128 __b)
100-
{
105+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
106+
_mm_add_ps(__m128 __a, __m128 __b) {
101107
return (__m128)((__v4sf)__a + (__v4sf)__b);
102108
}
103109

@@ -117,9 +123,8 @@ _mm_add_ps(__m128 __a, __m128 __b)
117123
/// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
118124
/// difference of the lower 32 bits of both operands. The upper 96 bits are
119125
/// copied from the upper 96 bits of the first source operand.
120-
static __inline__ __m128 __DEFAULT_FN_ATTRS
121-
_mm_sub_ss(__m128 __a, __m128 __b)
122-
{
126+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
127+
_mm_sub_ss(__m128 __a, __m128 __b) {
123128
__a[0] -= __b[0];
124129
return __a;
125130
}
@@ -138,9 +143,8 @@ _mm_sub_ss(__m128 __a, __m128 __b)
138143
/// A 128-bit vector of [4 x float] containing the subtrahend.
139144
/// \returns A 128-bit vector of [4 x float] containing the differences between
140145
/// both operands.
141-
static __inline__ __m128 __DEFAULT_FN_ATTRS
142-
_mm_sub_ps(__m128 __a, __m128 __b)
143-
{
146+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
147+
_mm_sub_ps(__m128 __a, __m128 __b) {
144148
return (__m128)((__v4sf)__a - (__v4sf)__b);
145149
}
146150

@@ -160,9 +164,8 @@ _mm_sub_ps(__m128 __a, __m128 __b)
160164
/// \returns A 128-bit vector of [4 x float] containing the product of the lower
161165
/// 32 bits of both operands. The upper 96 bits are copied from the upper 96
162166
/// bits of the first source operand.
163-
static __inline__ __m128 __DEFAULT_FN_ATTRS
164-
_mm_mul_ss(__m128 __a, __m128 __b)
165-
{
167+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
168+
_mm_mul_ss(__m128 __a, __m128 __b) {
166169
__a[0] *= __b[0];
167170
return __a;
168171
}
@@ -180,9 +183,8 @@ _mm_mul_ss(__m128 __a, __m128 __b)
180183
/// A 128-bit vector of [4 x float] containing one of the source operands.
181184
/// \returns A 128-bit vector of [4 x float] containing the products of both
182185
/// operands.
183-
static __inline__ __m128 __DEFAULT_FN_ATTRS
184-
_mm_mul_ps(__m128 __a, __m128 __b)
185-
{
186+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
187+
_mm_mul_ps(__m128 __a, __m128 __b) {
186188
return (__m128)((__v4sf)__a * (__v4sf)__b);
187189
}
188190

@@ -202,9 +204,8 @@ _mm_mul_ps(__m128 __a, __m128 __b)
202204
/// \returns A 128-bit vector of [4 x float] containing the quotients of the
203205
/// lower 32 bits of both operands. The upper 96 bits are copied from the
204206
/// upper 96 bits of the first source operand.
205-
static __inline__ __m128 __DEFAULT_FN_ATTRS
206-
_mm_div_ss(__m128 __a, __m128 __b)
207-
{
207+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
208+
_mm_div_ss(__m128 __a, __m128 __b) {
208209
__a[0] /= __b[0];
209210
return __a;
210211
}
@@ -221,9 +222,8 @@ _mm_div_ss(__m128 __a, __m128 __b)
221222
/// A 128-bit vector of [4 x float] containing the divisor.
222223
/// \returns A 128-bit vector of [4 x float] containing the quotients of both
223224
/// operands.
224-
static __inline__ __m128 __DEFAULT_FN_ATTRS
225-
_mm_div_ps(__m128 __a, __m128 __b)
226-
{
225+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
226+
_mm_div_ps(__m128 __a, __m128 __b) {
227227
return (__m128)((__v4sf)__a / (__v4sf)__b);
228228
}
229229

@@ -437,9 +437,8 @@ _mm_max_ps(__m128 __a, __m128 __b)
437437
/// A 128-bit vector containing one of the source operands.
438438
/// \returns A 128-bit vector of [4 x float] containing the bitwise AND of the
439439
/// values between both operands.
440-
static __inline__ __m128 __DEFAULT_FN_ATTRS
441-
_mm_and_ps(__m128 __a, __m128 __b)
442-
{
440+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
441+
_mm_and_ps(__m128 __a, __m128 __b) {
443442
return (__m128)((__v4su)__a & (__v4su)__b);
444443
}
445444

@@ -459,9 +458,8 @@ _mm_and_ps(__m128 __a, __m128 __b)
459458
/// \returns A 128-bit vector of [4 x float] containing the bitwise AND of the
460459
/// one's complement of the first operand and the values in the second
461460
/// operand.
462-
static __inline__ __m128 __DEFAULT_FN_ATTRS
463-
_mm_andnot_ps(__m128 __a, __m128 __b)
464-
{
461+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
462+
_mm_andnot_ps(__m128 __a, __m128 __b) {
465463
return (__m128)(~(__v4su)__a & (__v4su)__b);
466464
}
467465

@@ -477,9 +475,8 @@ _mm_andnot_ps(__m128 __a, __m128 __b)
477475
/// A 128-bit vector of [4 x float] containing one of the source operands.
478476
/// \returns A 128-bit vector of [4 x float] containing the bitwise OR of the
479477
/// values between both operands.
480-
static __inline__ __m128 __DEFAULT_FN_ATTRS
481-
_mm_or_ps(__m128 __a, __m128 __b)
482-
{
478+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
479+
_mm_or_ps(__m128 __a, __m128 __b) {
483480
return (__m128)((__v4su)__a | (__v4su)__b);
484481
}
485482

@@ -496,9 +493,8 @@ _mm_or_ps(__m128 __a, __m128 __b)
496493
/// A 128-bit vector of [4 x float] containing one of the source operands.
497494
/// \returns A 128-bit vector of [4 x float] containing the bitwise exclusive OR
498495
/// of the values between both operands.
499-
static __inline__ __m128 __DEFAULT_FN_ATTRS
500-
_mm_xor_ps(__m128 __a, __m128 __b)
501-
{
496+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
497+
_mm_xor_ps(__m128 __a, __m128 __b) {
502498
return (__m128)((__v4su)__a ^ (__v4su)__b);
503499
}
504500

@@ -1738,9 +1734,8 @@ _mm_cvt_pi2ps(__m128 __a, __m64 __b)
17381734
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
17391735
/// used in the extraction.
17401736
/// \returns A 32-bit float containing the extracted value.
1741-
static __inline__ float __DEFAULT_FN_ATTRS
1742-
_mm_cvtss_f32(__m128 __a)
1743-
{
1737+
static __inline__ float __DEFAULT_FN_ATTRS_CONSTEXPR
1738+
_mm_cvtss_f32(__m128 __a) {
17441739
return __a[0];
17451740
}
17461741

@@ -1931,9 +1926,8 @@ _mm_undefined_ps(void)
19311926
/// \returns An initialized 128-bit floating-point vector of [4 x float]. The
19321927
/// lower 32 bits contain the value provided in the source operand. The
19331928
/// upper 96 bits are set to zero.
1934-
static __inline__ __m128 __DEFAULT_FN_ATTRS
1935-
_mm_set_ss(float __w)
1936-
{
1929+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
1930+
_mm_set_ss(float __w) {
19371931
return __extension__ (__m128){ __w, 0.0f, 0.0f, 0.0f };
19381932
}
19391933

@@ -1949,9 +1943,8 @@ _mm_set_ss(float __w)
19491943
/// A single-precision floating-point value used to initialize each vector
19501944
/// element of the result.
19511945
/// \returns An initialized 128-bit floating-point vector of [4 x float].
1952-
static __inline__ __m128 __DEFAULT_FN_ATTRS
1953-
_mm_set1_ps(float __w)
1954-
{
1946+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
1947+
_mm_set1_ps(float __w) {
19551948
return __extension__ (__m128){ __w, __w, __w, __w };
19561949
}
19571950

@@ -1968,9 +1961,8 @@ _mm_set1_ps(float __w)
19681961
/// A single-precision floating-point value used to initialize each vector
19691962
/// element of the result.
19701963
/// \returns An initialized 128-bit floating-point vector of [4 x float].
1971-
static __inline__ __m128 __DEFAULT_FN_ATTRS
1972-
_mm_set_ps1(float __w)
1973-
{
1964+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
1965+
_mm_set_ps1(float __w) {
19741966
return _mm_set1_ps(__w);
19751967
}
19761968

@@ -1995,9 +1987,8 @@ _mm_set_ps1(float __w)
19951987
/// A single-precision floating-point value used to initialize bits [31:0]
19961988
/// of the result.
19971989
/// \returns An initialized 128-bit floating-point vector of [4 x float].
1998-
static __inline__ __m128 __DEFAULT_FN_ATTRS
1999-
_mm_set_ps(float __z, float __y, float __x, float __w)
2000-
{
1990+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
1991+
_mm_set_ps(float __z, float __y, float __x, float __w) {
20011992
return __extension__ (__m128){ __w, __x, __y, __z };
20021993
}
20031994

@@ -2023,9 +2014,8 @@ _mm_set_ps(float __z, float __y, float __x, float __w)
20232014
/// A single-precision floating-point value used to initialize bits [127:96]
20242015
/// of the result.
20252016
/// \returns An initialized 128-bit floating-point vector of [4 x float].
2026-
static __inline__ __m128 __DEFAULT_FN_ATTRS
2027-
_mm_setr_ps(float __z, float __y, float __x, float __w)
2028-
{
2017+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
2018+
_mm_setr_ps(float __z, float __y, float __x, float __w) {
20292019
return __extension__ (__m128){ __z, __y, __x, __w };
20302020
}
20312021

@@ -2038,9 +2028,8 @@ _mm_setr_ps(float __z, float __y, float __x, float __w)
20382028
///
20392029
/// \returns An initialized 128-bit floating-point vector of [4 x float] with
20402030
/// all elements set to zero.
2041-
static __inline__ __m128 __DEFAULT_FN_ATTRS
2042-
_mm_setzero_ps(void)
2043-
{
2031+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
2032+
_mm_setzero_ps(void) {
20442033
return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
20452034
}
20462035

@@ -2786,9 +2775,8 @@ void _mm_setcsr(unsigned int __i);
27862775
/// Bits [95:64] are written to bits [63:32] of the destination. \n
27872776
/// Bits [127:96] are written to bits [127:96] of the destination.
27882777
/// \returns A 128-bit vector of [4 x float] containing the interleaved values.
2789-
static __inline__ __m128 __DEFAULT_FN_ATTRS
2790-
_mm_unpackhi_ps(__m128 __a, __m128 __b)
2791-
{
2778+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
2779+
_mm_unpackhi_ps(__m128 __a, __m128 __b) {
27922780
return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 2, 6, 3, 7);
27932781
}
27942782

@@ -2808,9 +2796,8 @@ _mm_unpackhi_ps(__m128 __a, __m128 __b)
28082796
/// Bits [31:0] are written to bits [63:32] of the destination. \n
28092797
/// Bits [63:32] are written to bits [127:96] of the destination.
28102798
/// \returns A 128-bit vector of [4 x float] containing the interleaved values.
2811-
static __inline__ __m128 __DEFAULT_FN_ATTRS
2812-
_mm_unpacklo_ps(__m128 __a, __m128 __b)
2813-
{
2799+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
2800+
_mm_unpacklo_ps(__m128 __a, __m128 __b) {
28142801
return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 4, 1, 5);
28152802
}
28162803

@@ -2830,9 +2817,8 @@ _mm_unpacklo_ps(__m128 __a, __m128 __b)
28302817
/// A 128-bit floating-point vector of [4 x float]. The lower 32 bits are
28312818
/// written to the lower 32 bits of the result.
28322819
/// \returns A 128-bit floating-point vector of [4 x float].
2833-
static __inline__ __m128 __DEFAULT_FN_ATTRS
2834-
_mm_move_ss(__m128 __a, __m128 __b)
2835-
{
2820+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
2821+
_mm_move_ss(__m128 __a, __m128 __b) {
28362822
__a[0] = __b[0];
28372823
return __a;
28382824
}
@@ -2852,9 +2838,8 @@ _mm_move_ss(__m128 __a, __m128 __b)
28522838
/// A 128-bit floating-point vector of [4 x float]. The upper 64 bits are
28532839
/// written to the lower 64 bits of the result.
28542840
/// \returns A 128-bit floating-point vector of [4 x float].
2855-
static __inline__ __m128 __DEFAULT_FN_ATTRS
2856-
_mm_movehl_ps(__m128 __a, __m128 __b)
2857-
{
2841+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
2842+
_mm_movehl_ps(__m128 __a, __m128 __b) {
28582843
return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 6, 7, 2, 3);
28592844
}
28602845

@@ -2873,9 +2858,8 @@ _mm_movehl_ps(__m128 __a, __m128 __b)
28732858
/// A 128-bit floating-point vector of [4 x float]. The lower 64 bits are
28742859
/// written to the upper 64 bits of the result.
28752860
/// \returns A 128-bit floating-point vector of [4 x float].
2876-
static __inline__ __m128 __DEFAULT_FN_ATTRS
2877-
_mm_movelh_ps(__m128 __a, __m128 __b)
2878-
{
2861+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
2862+
_mm_movelh_ps(__m128 __a, __m128 __b) {
28792863
return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 1, 4, 5);
28802864
}
28812865

@@ -3207,7 +3191,9 @@ do { \
32073191
#undef __anyext128
32083192
#undef __zeroupper64
32093193
#undef __DEFAULT_FN_ATTRS
3194+
#undef __DEFAULT_FN_ATTRS_CONSTEXPR
32103195
#undef __DEFAULT_FN_ATTRS_SSE2
3196+
#undef __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
32113197

32123198
/* Ugly hack for backwards-compatibility (compatible with gcc) */
32133199
#if defined(__SSE2__) && !__building_module(_Builtin_intrinsics)

0 commit comments

Comments
 (0)