Skip to content

[X86] Change target of __builtin_ia32_cmp[p|s][s|d] from avx into sse/sse2 #67410

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Sep 27, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions clang/include/clang/Basic/BuiltinsX86.def
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,8 @@ TARGET_BUILTIN(__builtin_ia32_minps, "V4fV4fV4f", "ncV:128:", "sse")
TARGET_BUILTIN(__builtin_ia32_maxps, "V4fV4fV4f", "ncV:128:", "sse")
TARGET_BUILTIN(__builtin_ia32_minss, "V4fV4fV4f", "ncV:128:", "sse")
TARGET_BUILTIN(__builtin_ia32_maxss, "V4fV4fV4f", "ncV:128:", "sse")
TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "ncV:128:", "sse")
TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "ncV:128:", "sse")

TARGET_BUILTIN(__builtin_ia32_cmpeqpd, "V2dV2dV2d", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_cmpltpd, "V2dV2dV2d", "ncV:128:", "sse2")
Expand All @@ -250,6 +252,8 @@ TARGET_BUILTIN(__builtin_ia32_cmpneqsd, "V2dV2dV2d", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_cmpnltsd, "V2dV2dV2d", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_cmpnlesd, "V2dV2dV2d", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_cmpordsd, "V2dV2dV2d", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_minpd, "V2dV2dV2d", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_maxpd, "V2dV2dV2d", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_minsd, "V2dV2dV2d", "ncV:128:", "sse2")
Expand Down Expand Up @@ -469,12 +473,8 @@ TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "ncV:256:", "avx")
TARGET_BUILTIN(__builtin_ia32_shufpd256, "V4dV4dV4dIi", "ncV:256:", "avx")
TARGET_BUILTIN(__builtin_ia32_shufps256, "V8fV8fV8fIi", "ncV:256:", "avx")
TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "ncV:256:", "avx")
TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "ncV:128:", "avx")
TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "ncV:256:", "avx")
TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "ncV:128:", "avx")
TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "ncV:256:", "avx")
TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "ncV:128:", "avx")
TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "ncV:128:", "avx")
TARGET_BUILTIN(__builtin_ia32_vextractf128_pd256, "V2dV4dIi", "ncV:256:", "avx")
TARGET_BUILTIN(__builtin_ia32_vextractf128_ps256, "V4fV8fIi", "ncV:256:", "avx")
TARGET_BUILTIN(__builtin_ia32_vextractf128_si256, "V4iV8iIi", "ncV:256:", "avx")
Expand Down
272 changes: 0 additions & 272 deletions clang/lib/Headers/avxintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -1569,160 +1569,6 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
((__m256d)__builtin_ia32_shufpd256((__v4df)(__m256d)(a), \
(__v4df)(__m256d)(b), (int)(mask)))

/* Compare */
#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */
#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */
#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */
#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */
#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */
#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */
#define _CMP_ORD_Q 0x07 /* Ordered (non-signaling) */
#define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */
#define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unordered, signaling) */
#define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */
#define _CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */
#define _CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */
#define _CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */
#define _CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */
#define _CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */
#define _CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */
#define _CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */
#define _CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */
#define _CMP_UNORD_S 0x13 /* Unordered (signaling) */
#define _CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */
#define _CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */
#define _CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unordered, non-signaling) */
#define _CMP_ORD_S 0x17 /* Ordered (signaling) */
#define _CMP_EQ_US 0x18 /* Equal (unordered, signaling) */
#define _CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unordered, non-signaling) */
#define _CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */
#define _CMP_FALSE_OS 0x1b /* False (ordered, signaling) */
#define _CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */
#define _CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */
#define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */
#define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */

/// Compares each of the corresponding double-precision values of two
/// 128-bit vectors of [2 x double], using the operation specified by the
/// immediate integer operand.
///
/// Returns a [2 x double] vector consisting of two doubles corresponding to
/// the two comparison results: zero if the comparison is false, and all 1's
/// if the comparison is true.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c);
/// \endcode
///
/// This intrinsic corresponds to the <c> VCMPPD </c> instruction.
///
/// \param a
/// A 128-bit vector of [2 x double].
/// \param b
/// A 128-bit vector of [2 x double].
/// \param c
/// An immediate integer operand, with bits [4:0] specifying which comparison
/// operation to use: \n
/// 0x00: Equal (ordered, non-signaling) \n
/// 0x01: Less-than (ordered, signaling) \n
/// 0x02: Less-than-or-equal (ordered, signaling) \n
/// 0x03: Unordered (non-signaling) \n
/// 0x04: Not-equal (unordered, non-signaling) \n
/// 0x05: Not-less-than (unordered, signaling) \n
/// 0x06: Not-less-than-or-equal (unordered, signaling) \n
/// 0x07: Ordered (non-signaling) \n
/// 0x08: Equal (unordered, non-signaling) \n
/// 0x09: Not-greater-than-or-equal (unordered, signaling) \n
/// 0x0A: Not-greater-than (unordered, signaling) \n
/// 0x0B: False (ordered, non-signaling) \n
/// 0x0C: Not-equal (ordered, non-signaling) \n
/// 0x0D: Greater-than-or-equal (ordered, signaling) \n
/// 0x0E: Greater-than (ordered, signaling) \n
/// 0x0F: True (unordered, non-signaling) \n
/// 0x10: Equal (ordered, signaling) \n
/// 0x11: Less-than (ordered, non-signaling) \n
/// 0x12: Less-than-or-equal (ordered, non-signaling) \n
/// 0x13: Unordered (signaling) \n
/// 0x14: Not-equal (unordered, signaling) \n
/// 0x15: Not-less-than (unordered, non-signaling) \n
/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n
/// 0x17: Ordered (signaling) \n
/// 0x18: Equal (unordered, signaling) \n
/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
/// 0x1A: Not-greater-than (unordered, non-signaling) \n
/// 0x1B: False (ordered, signaling) \n
/// 0x1C: Not-equal (ordered, signaling) \n
/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n
/// 0x1E: Greater-than (ordered, non-signaling) \n
/// 0x1F: True (unordered, signaling)
/// \returns A 128-bit vector of [2 x double] containing the comparison results.
#define _mm_cmp_pd(a, b, c) \
((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \
(__v2df)(__m128d)(b), (c)))

/// Compares each of the corresponding values of two 128-bit vectors of
/// [4 x float], using the operation specified by the immediate integer
/// operand.
///
/// Returns a [4 x float] vector consisting of four floats corresponding to
/// the four comparison results: zero if the comparison is false, and all 1's
/// if the comparison is true.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c);
/// \endcode
///
/// This intrinsic corresponds to the <c> VCMPPS </c> instruction.
///
/// \param a
/// A 128-bit vector of [4 x float].
/// \param b
/// A 128-bit vector of [4 x float].
/// \param c
/// An immediate integer operand, with bits [4:0] specifying which comparison
/// operation to use: \n
/// 0x00: Equal (ordered, non-signaling) \n
/// 0x01: Less-than (ordered, signaling) \n
/// 0x02: Less-than-or-equal (ordered, signaling) \n
/// 0x03: Unordered (non-signaling) \n
/// 0x04: Not-equal (unordered, non-signaling) \n
/// 0x05: Not-less-than (unordered, signaling) \n
/// 0x06: Not-less-than-or-equal (unordered, signaling) \n
/// 0x07: Ordered (non-signaling) \n
/// 0x08: Equal (unordered, non-signaling) \n
/// 0x09: Not-greater-than-or-equal (unordered, signaling) \n
/// 0x0A: Not-greater-than (unordered, signaling) \n
/// 0x0B: False (ordered, non-signaling) \n
/// 0x0C: Not-equal (ordered, non-signaling) \n
/// 0x0D: Greater-than-or-equal (ordered, signaling) \n
/// 0x0E: Greater-than (ordered, signaling) \n
/// 0x0F: True (unordered, non-signaling) \n
/// 0x10: Equal (ordered, signaling) \n
/// 0x11: Less-than (ordered, non-signaling) \n
/// 0x12: Less-than-or-equal (ordered, non-signaling) \n
/// 0x13: Unordered (signaling) \n
/// 0x14: Not-equal (unordered, signaling) \n
/// 0x15: Not-less-than (unordered, non-signaling) \n
/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n
/// 0x17: Ordered (signaling) \n
/// 0x18: Equal (unordered, signaling) \n
/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
/// 0x1A: Not-greater-than (unordered, non-signaling) \n
/// 0x1B: False (ordered, signaling) \n
/// 0x1C: Not-equal (ordered, signaling) \n
/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n
/// 0x1E: Greater-than (ordered, non-signaling) \n
/// 0x1F: True (unordered, signaling)
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
#define _mm_cmp_ps(a, b, c) \
((__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \
(__v4sf)(__m128)(b), (c)))

/// Compares each of the corresponding double-precision values of two
/// 256-bit vectors of [4 x double], using the operation specified by the
/// immediate integer operand.
Expand Down Expand Up @@ -1843,124 +1689,6 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
((__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \
(__v8sf)(__m256)(b), (c)))

/// Compares each of the corresponding scalar double-precision values of
/// two 128-bit vectors of [2 x double], using the operation specified by the
/// immediate integer operand.
///
/// If the result is true, all 64 bits of the destination vector are set;
/// otherwise they are cleared.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c);
/// \endcode
///
/// This intrinsic corresponds to the <c> VCMPSD </c> instruction.
///
/// \param a
/// A 128-bit vector of [2 x double].
/// \param b
/// A 128-bit vector of [2 x double].
/// \param c
/// An immediate integer operand, with bits [4:0] specifying which comparison
/// operation to use: \n
/// 0x00: Equal (ordered, non-signaling) \n
/// 0x01: Less-than (ordered, signaling) \n
/// 0x02: Less-than-or-equal (ordered, signaling) \n
/// 0x03: Unordered (non-signaling) \n
/// 0x04: Not-equal (unordered, non-signaling) \n
/// 0x05: Not-less-than (unordered, signaling) \n
/// 0x06: Not-less-than-or-equal (unordered, signaling) \n
/// 0x07: Ordered (non-signaling) \n
/// 0x08: Equal (unordered, non-signaling) \n
/// 0x09: Not-greater-than-or-equal (unordered, signaling) \n
/// 0x0A: Not-greater-than (unordered, signaling) \n
/// 0x0B: False (ordered, non-signaling) \n
/// 0x0C: Not-equal (ordered, non-signaling) \n
/// 0x0D: Greater-than-or-equal (ordered, signaling) \n
/// 0x0E: Greater-than (ordered, signaling) \n
/// 0x0F: True (unordered, non-signaling) \n
/// 0x10: Equal (ordered, signaling) \n
/// 0x11: Less-than (ordered, non-signaling) \n
/// 0x12: Less-than-or-equal (ordered, non-signaling) \n
/// 0x13: Unordered (signaling) \n
/// 0x14: Not-equal (unordered, signaling) \n
/// 0x15: Not-less-than (unordered, non-signaling) \n
/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n
/// 0x17: Ordered (signaling) \n
/// 0x18: Equal (unordered, signaling) \n
/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
/// 0x1A: Not-greater-than (unordered, non-signaling) \n
/// 0x1B: False (ordered, signaling) \n
/// 0x1C: Not-equal (ordered, signaling) \n
/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n
/// 0x1E: Greater-than (ordered, non-signaling) \n
/// 0x1F: True (unordered, signaling)
/// \returns A 128-bit vector of [2 x double] containing the comparison results.
#define _mm_cmp_sd(a, b, c) \
((__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \
(__v2df)(__m128d)(b), (c)))

/// Compares each of the corresponding scalar values of two 128-bit
/// vectors of [4 x float], using the operation specified by the immediate
/// integer operand.
///
/// If the result is true, all 32 bits of the destination vector are set;
/// otherwise they are cleared.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c);
/// \endcode
///
/// This intrinsic corresponds to the <c> VCMPSS </c> instruction.
///
/// \param a
/// A 128-bit vector of [4 x float].
/// \param b
/// A 128-bit vector of [4 x float].
/// \param c
/// An immediate integer operand, with bits [4:0] specifying which comparison
/// operation to use: \n
/// 0x00: Equal (ordered, non-signaling) \n
/// 0x01: Less-than (ordered, signaling) \n
/// 0x02: Less-than-or-equal (ordered, signaling) \n
/// 0x03: Unordered (non-signaling) \n
/// 0x04: Not-equal (unordered, non-signaling) \n
/// 0x05: Not-less-than (unordered, signaling) \n
/// 0x06: Not-less-than-or-equal (unordered, signaling) \n
/// 0x07: Ordered (non-signaling) \n
/// 0x08: Equal (unordered, non-signaling) \n
/// 0x09: Not-greater-than-or-equal (unordered, signaling) \n
/// 0x0A: Not-greater-than (unordered, signaling) \n
/// 0x0B: False (ordered, non-signaling) \n
/// 0x0C: Not-equal (ordered, non-signaling) \n
/// 0x0D: Greater-than-or-equal (ordered, signaling) \n
/// 0x0E: Greater-than (ordered, signaling) \n
/// 0x0F: True (unordered, non-signaling) \n
/// 0x10: Equal (ordered, signaling) \n
/// 0x11: Less-than (ordered, non-signaling) \n
/// 0x12: Less-than-or-equal (ordered, non-signaling) \n
/// 0x13: Unordered (signaling) \n
/// 0x14: Not-equal (unordered, signaling) \n
/// 0x15: Not-less-than (unordered, non-signaling) \n
/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n
/// 0x17: Ordered (signaling) \n
/// 0x18: Equal (unordered, signaling) \n
/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
/// 0x1A: Not-greater-than (unordered, non-signaling) \n
/// 0x1B: False (ordered, signaling) \n
/// 0x1C: Not-equal (ordered, signaling) \n
/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n
/// 0x1E: Greater-than (ordered, non-signaling) \n
/// 0x1F: True (unordered, signaling)
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
#define _mm_cmp_ss(a, b, c) \
((__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \
(__v4sf)(__m128)(b), (c)))

/// Takes a [8 x i32] vector and returns the vector element value
/// indexed by the immediate constant operand.
///
Expand Down
Loading