Skip to content

[X86][AVX10.2] Support AVX10.2-MINMAX new instructions. #101598

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Aug 5, 2024

Conversation

FreddyLeaf
Copy link
Contributor

@FreddyLeaf FreddyLeaf commented Aug 2, 2024

Copy link

github-actions bot commented Aug 2, 2024

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff 22c06aa5e94e30fb1333ecaf46ce33c65d148634 822611314c4cf8e985628765cc806a0b718ebdd3 --extensions h,cpp,c,inc -- clang/lib/Headers/avx10_2_512minmaxintrin.h clang/lib/Headers/avx10_2minmaxintrin.h clang/test/CodeGen/X86/avx10_2_512minmax-builtins.c clang/test/CodeGen/X86/avx10_2_512minmax-error.c clang/test/CodeGen/X86/avx10_2minmax-builtins.c clang/lib/Headers/immintrin.h clang/lib/Sema/SemaX86.cpp llvm/lib/Target/X86/X86ISelLowering.cpp llvm/lib/Target/X86/X86ISelLowering.h llvm/lib/Target/X86/X86IntrinsicsInfo.h llvm/test/TableGen/x86-fold-tables.inc
View the diff from clang-format here.
diff --git a/clang/lib/Headers/avx10_2_512minmaxintrin.h b/clang/lib/Headers/avx10_2_512minmaxintrin.h
index e175365d11..877faf37f5 100644
--- a/clang/lib/Headers/avx10_2_512minmaxintrin.h
+++ b/clang/lib/Headers/avx10_2_512minmaxintrin.h
@@ -35,7 +35,7 @@
 #define _mm512_minmax_pd(A, B, C)                                              \
   ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
       (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
-      (__v8df)_mm512_undefined_pd(), (__mmask8)-1,                             \
+      (__v8df)_mm512_undefined_pd(), (__mmask8) - 1,                           \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_mask_minmax_pd(W, U, A, B, C)                                   \
@@ -51,7 +51,7 @@
 #define _mm512_minmax_round_pd(A, B, C, R)                                     \
   ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
       (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
-      (__v8df)_mm512_undefined_pd(), (__mmask8)-1, (int)(R)))
+      (__v8df)_mm512_undefined_pd(), (__mmask8) - 1, (int)(R)))
 
 #define _mm512_mask_minmax_round_pd(W, U, A, B, C, R)                          \
   ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
@@ -66,7 +66,7 @@
 #define _mm512_minmax_ph(A, B, C)                                              \
   ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
       (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
-      (__v32hf)_mm512_undefined_ph(), (__mmask32)-1,                           \
+      (__v32hf)_mm512_undefined_ph(), (__mmask32) - 1,                         \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_mask_minmax_ph(W, U, A, B, C)                                   \
@@ -82,7 +82,7 @@
 #define _mm512_minmax_round_ph(A, B, C, R)                                     \
   ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
       (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
-      (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, (int)(R)))
+      (__v32hf)_mm512_undefined_ph(), (__mmask32) - 1, (int)(R)))
 
 #define _mm512_mask_minmax_round_ph(W, U, A, B, C, R)                          \
   ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
@@ -97,7 +97,7 @@
 #define _mm512_minmax_ps(A, B, C)                                              \
   ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
       (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C),                    \
-      (__v16sf)_mm512_undefined_ps(), (__mmask16)-1,                           \
+      (__v16sf)_mm512_undefined_ps(), (__mmask16) - 1,                         \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_mask_minmax_ps(W, U, A, B, C)                                   \
@@ -113,7 +113,7 @@
 #define _mm512_minmax_round_ps(A, B, C, R)                                     \
   ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
       (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C),                    \
-      (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, (int)(R)))
+      (__v16sf)_mm512_undefined_ps(), (__mmask16) - 1, (int)(R)))
 
 #define _mm512_mask_minmax_round_ps(W, U, A, B, C, R)                          \
   ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
diff --git a/clang/lib/Headers/avx10_2minmaxintrin.h b/clang/lib/Headers/avx10_2minmaxintrin.h
index a9367e7424..c350a9e669 100644
--- a/clang/lib/Headers/avx10_2minmaxintrin.h
+++ b/clang/lib/Headers/avx10_2minmaxintrin.h
@@ -53,7 +53,7 @@
 #define _mm_minmax_pd(A, B, C)                                                 \
   ((__m128d)__builtin_ia32_vminmaxpd128_mask(                                  \
       (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
-      (__v2df)_mm_setzero_pd(), (__mmask8)-1))
+      (__v2df)_mm_setzero_pd(), (__mmask8) - 1))
 
 #define _mm_mask_minmax_pd(W, U, A, B, C)                                      \
   ((__m128d)__builtin_ia32_vminmaxpd128_mask(                                  \
@@ -68,7 +68,7 @@
 #define _mm256_minmax_pd(A, B, C)                                              \
   ((__m256d)__builtin_ia32_vminmaxpd256_round_mask(                            \
       (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C),                    \
-      (__v4df)_mm256_setzero_pd(), (__mmask8)-1, _MM_FROUND_NO_EXC))
+      (__v4df)_mm256_setzero_pd(), (__mmask8) - 1, _MM_FROUND_NO_EXC))
 
 #define _mm256_mask_minmax_pd(W, U, A, B, C)                                   \
   ((__m256d)__builtin_ia32_vminmaxpd256_round_mask(                            \
@@ -83,7 +83,7 @@
 #define _mm256_minmax_round_pd(A, B, C, R)                                     \
   ((__m256d)__builtin_ia32_vminmaxpd256_round_mask(                            \
       (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C),                    \
-      (__v4df)_mm256_undefined_pd(), (__mmask8)-1, (int)(R)))
+      (__v4df)_mm256_undefined_pd(), (__mmask8) - 1, (int)(R)))
 
 #define _mm256_mask_minmax_round_pd(W, U, A, B, C, R)                          \
   ((__m256d)__builtin_ia32_vminmaxpd256_round_mask(                            \
@@ -98,12 +98,12 @@
 #define _mm_minmax_ph(A, B, C)                                                 \
   ((__m128h)__builtin_ia32_vminmaxph128_mask(                                  \
       (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
-      (__v8hf)_mm_setzero_ph(), (__mmask8)-1))
+      (__v8hf)_mm_setzero_ph(), (__mmask8) - 1))
 
 #define _mm_mask_minmax_ph(W, U, A, B, C)                                      \
   ((__m128h)__builtin_ia32_vminmaxph128_mask(                                  \
       (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
-      (__v8hf)(__m128h)(W), (__mmask16)-1))
+      (__v8hf)(__m128h)(W), (__mmask16) - 1))
 
 #define _mm_maskz_minmax_ph(U, A, B, C)                                        \
   ((__m128h)__builtin_ia32_vminmaxph128_mask(                                  \
@@ -113,7 +113,7 @@
 #define _mm256_minmax_ph(A, B, C)                                              \
   ((__m256h)__builtin_ia32_vminmaxph256_round_mask(                            \
       (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C),                  \
-      (__v16hf)_mm256_setzero_ph(), (__mmask16)-1, _MM_FROUND_NO_EXC))
+      (__v16hf)_mm256_setzero_ph(), (__mmask16) - 1, _MM_FROUND_NO_EXC))
 
 #define _mm256_mask_minmax_ph(W, U, A, B, C)                                   \
   ((__m256h)__builtin_ia32_vminmaxph256_round_mask(                            \
@@ -128,7 +128,7 @@
 #define _mm256_minmax_round_ph(A, B, C, R)                                     \
   ((__m256h)__builtin_ia32_vminmaxph256_round_mask(                            \
       (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C),                  \
-      (__v16hf)_mm256_undefined_ph(), (__mmask16)-1, (int)(R)))
+      (__v16hf)_mm256_undefined_ph(), (__mmask16) - 1, (int)(R)))
 
 #define _mm256_mask_minmax_round_ph(W, U, A, B, C, R)                          \
   ((__m256h)__builtin_ia32_vminmaxph256_round_mask(                            \
@@ -143,7 +143,7 @@
 #define _mm_minmax_ps(A, B, C)                                                 \
   ((__m128)__builtin_ia32_vminmaxps128_mask(                                   \
       (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
-      (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
+      (__v4sf)_mm_setzero_ps(), (__mmask8) - 1))
 
 #define _mm_mask_minmax_ps(W, U, A, B, C)                                      \
   ((__m128)__builtin_ia32_vminmaxps128_mask(                                   \
@@ -158,7 +158,7 @@
 #define _mm256_minmax_ps(A, B, C)                                              \
   ((__m256)__builtin_ia32_vminmaxps256_round_mask(                             \
       (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C),                      \
-      (__v8sf)_mm256_setzero_ps(), (__mmask8)-1, _MM_FROUND_NO_EXC))
+      (__v8sf)_mm256_setzero_ps(), (__mmask8) - 1, _MM_FROUND_NO_EXC))
 
 #define _mm256_mask_minmax_ps(W, U, A, B, C)                                   \
   ((__m256)__builtin_ia32_vminmaxps256_round_mask(                             \
@@ -173,7 +173,7 @@
 #define _mm256_minmax_round_ps(A, B, C, R)                                     \
   ((__m256)__builtin_ia32_vminmaxps256_round_mask(                             \
       (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C),                      \
-      (__v8sf)_mm256_undefined_ps(), (__mmask8)-1, (int)(R)))
+      (__v8sf)_mm256_undefined_ps(), (__mmask8) - 1, (int)(R)))
 
 #define _mm256_mask_minmax_round_ps(W, U, A, B, C, R)                          \
   ((__m256)__builtin_ia32_vminmaxps256_round_mask(                             \
@@ -188,7 +188,7 @@
 #define _mm_minmax_sd(A, B, C)                                                 \
   ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
       (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
-      (__v2df)_mm_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
+      (__v2df)_mm_undefined_pd(), (__mmask8) - 1, _MM_FROUND_CUR_DIRECTION))
 
 #define _mm_mask_minmax_sd(W, U, A, B, C)                                      \
   ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
@@ -203,7 +203,7 @@
 #define _mm_minmax_round_sd(A, B, C, R)                                        \
   ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
       (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
-      (__v2df)_mm_undefined_pd(), (__mmask8)-1, (int)(R)))
+      (__v2df)_mm_undefined_pd(), (__mmask8) - 1, (int)(R)))
 
 #define _mm_mask_minmax_round_sd(W, U, A, B, C, R)                             \
   ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
@@ -218,7 +218,7 @@
 #define _mm_minmax_sh(A, B, C)                                                 \
   ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
       (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
-      (__v8hf)_mm_undefined_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
+      (__v8hf)_mm_undefined_ph(), (__mmask8) - 1, _MM_FROUND_CUR_DIRECTION))
 
 #define _mm_mask_minmax_sh(W, U, A, B, C)                                      \
   ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
@@ -233,7 +233,7 @@
 #define _mm_minmax_round_sh(A, B, C, R)                                        \
   ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
       (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
-      (__v8hf)_mm_undefined_ph(), (__mmask8)-1, (int)(R)))
+      (__v8hf)_mm_undefined_ph(), (__mmask8) - 1, (int)(R)))
 
 #define _mm_mask_minmax_round_sh(W, U, A, B, C, R)                             \
   ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
@@ -248,7 +248,7 @@
 #define _mm_minmax_ss(A, B, C)                                                 \
   ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
       (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
-      (__v4sf)_mm_undefined_ps(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
+      (__v4sf)_mm_undefined_ps(), (__mmask8) - 1, _MM_FROUND_CUR_DIRECTION))
 
 #define _mm_mask_minmax_ss(W, U, A, B, C)                                      \
   ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
@@ -263,7 +263,7 @@
 #define _mm_minmax_round_ss(A, B, C, R)                                        \
   ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
       (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
-      (__v4sf)_mm_undefined_ps(), (__mmask8)-1, (int)(R)))
+      (__v4sf)_mm_undefined_ps(), (__mmask8) - 1, (int)(R)))
 
 #define _mm_mask_minmax_round_ss(W, U, A, B, C, R)                             \
   ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \

@FreddyLeaf FreddyLeaf marked this pull request as ready for review August 3, 2024 06:06
@llvmbot llvmbot added clang Clang issues not falling into any other category backend:X86 clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics mc Machine (object) code llvm:ir labels Aug 3, 2024
@llvmbot
Copy link
Member

llvmbot commented Aug 3, 2024

@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-clang

Author: Freddy Ye (FreddyLeaf)

Changes

Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965


Patch is 307.16 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101598.diff

25 Files Affected:

  • (modified) clang/docs/ReleaseNotes.rst (+2)
  • (modified) clang/include/clang/Basic/BuiltinsX86.def (+16)
  • (modified) clang/lib/Headers/CMakeLists.txt (+2)
  • (added) clang/lib/Headers/avx10_2_512minmaxintrin.h (+219)
  • (added) clang/lib/Headers/avx10_2minmaxintrin.h (+188)
  • (modified) clang/lib/Headers/immintrin.h (+2)
  • (modified) clang/lib/Sema/SemaX86.cpp (+24)
  • (added) clang/test/CodeGen/X86/avx10_2_512minmax-builtins.c (+244)
  • (added) clang/test/CodeGen/X86/avx10_2_512minmax-error.c (+137)
  • (added) clang/test/CodeGen/X86/avx10_2minmax-builtins.c (+210)
  • (modified) llvm/include/llvm/IR/IntrinsicsX86.td (+66)
  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+4)
  • (modified) llvm/lib/Target/X86/X86ISelLowering.h (+5)
  • (modified) llvm/lib/Target/X86/X86InstrAVX10.td (+112)
  • (modified) llvm/lib/Target/X86/X86InstrFragmentsSIMD.td (+10)
  • (modified) llvm/lib/Target/X86/X86IntrinsicsInfo.h (+15)
  • (added) llvm/test/CodeGen/X86/avx10_2_512minmax-intrinsics.ll (+648)
  • (added) llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll (+558)
  • (added) llvm/test/MC/Disassembler/X86/avx10.2minmax-32.txt (+579)
  • (added) llvm/test/MC/Disassembler/X86/avx10.2minmax-64.txt (+579)
  • (added) llvm/test/MC/X86/avx10.2minmax-32-att.s (+578)
  • (added) llvm/test/MC/X86/avx10.2minmax-32-intel.s (+578)
  • (added) llvm/test/MC/X86/avx10.2minmax-64-att.s (+578)
  • (added) llvm/test/MC/X86/avx10.2minmax-64-intel.s (+578)
  • (modified) llvm/test/TableGen/x86-fold-tables.inc (+81)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 4c7bd099420ab..c54627586a240 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -220,6 +220,8 @@ X86 Support
   found in the file ``clang/www/builtins.py``.
 
 - Support ISA of ``AVX10.2``.
+  * Supported MINMAX intrinsics of ``*_(mask(z)))_minmax(ne)_p[s|d|h|bh]`` and
+    ``*_(mask(z)))_minmax_s[s|d|h]``.
 
 Arm and AArch64 Support
 ^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index f028711a807c0..3200e0112adce 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2022,6 +2022,22 @@ TARGET_BUILTIN(__builtin_ia32_vsm4key4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
 TARGET_BUILTIN(__builtin_ia32_vsm4rnds4128, "V4UiV4UiV4Ui", "nV:128:", "sm4")
 TARGET_BUILTIN(__builtin_ia32_vsm4rnds4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
 
+// AVX10-MINMAX
+TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16128, "V8yV8yV8yIi", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16256, "V16yV16yV16yIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16512, "V32yV32yV32yIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vminmaxpd128_mask, "V2dV2dV2dIiV2dUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminmaxpd256_round_mask, "V4dV4dV4dIiV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminmaxpd512_round_mask, "V8dV8dV8dIiV8dUcIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vminmaxph128_mask, "V8xV8xV8xIiV8xUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminmaxph256_round_mask, "V16xV16xV16xIiV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminmaxph512_round_mask, "V32xV32xV32xIiV32xUiIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vminmaxps128_mask, "V4fV4fV4fIiV4fUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminmaxps256_round_mask, "V8fV8fV8fIiV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminmaxps512_round_mask, "V16fV16fV16fIiV16fUsIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vminmaxsd_round_mask, "V2dV2dV2dIiV2dUcIi", "nV:128:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vminmaxsh_round_mask, "V8xV8xV8xIiV8xUcIi", "nV:128:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vminmaxss_round_mask, "V4fV4fV4fIiV4fUcIi", "nV:128:", "avx10.2-512")
 #undef BUILTIN
 #undef TARGET_BUILTIN
 #undef TARGET_HEADER_BUILTIN
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index b17ab24d625a0..f3d19e38f8f2b 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -147,7 +147,9 @@ set(x86_files
   amxcomplexintrin.h
   amxfp16intrin.h
   amxintrin.h
+  avx10_2_512minmaxintrin.h
   avx10_2_512niintrin.h
+  avx10_2minmaxintrin.h
   avx10_2niintrin.h
   avx2intrin.h
   avx512bf16intrin.h
diff --git a/clang/lib/Headers/avx10_2_512minmaxintrin.h b/clang/lib/Headers/avx10_2_512minmaxintrin.h
new file mode 100644
index 0000000000000..ee486cb24f3d9
--- /dev/null
+++ b/clang/lib/Headers/avx10_2_512minmaxintrin.h
@@ -0,0 +1,219 @@
+/*===--------------- avx10_2_512minmaxintrin.h - AVX10_2_512MINMAX intrinsics
+ *-----------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <avx10_2_512minmaxintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AVX10_2_512MINMAXINTRIN_H
+#define __AVX10_2_512MINMAXINTRIN_H
+
+#define _mm512_minmaxne_pbh(A, B, C)                                           \
+  ((__m512bh)__builtin_ia32_vminmaxnepbf16512(                                 \
+      (__v32bf)(__m512bh)(A), (__v32bf)(__m512bh)(A), (int)(C)))
+
+#define _mm512_mask_minmaxne_pbh(W, U, A, B, C)                                \
+  ((__m512bh)__builtin_ia32_selectpbf_512(                                     \
+      (__mmask32)(U),                                                          \
+      (__v32bf)__builtin_ia32_vminmaxnepbf16512(                               \
+          (__v32bf)(__m512bh)(A), (__v32bf)(__m512bh)(B), (int)(C)),           \
+      (__v32bf)(__m512bh)(W)))
+
+#define _mm512_maskz_minmaxne_pbh(U, A, B, C)                                  \
+  ((__m512bh)__builtin_ia32_selectpbf_512(                                     \
+      (__mmask32)(U),                                                          \
+      (__v32bf)__builtin_ia32_vminmaxnepbf16512(                               \
+          (__v32bf)(__m512bh)(A), (__v32bf)(__m512bh)(B), (int)(C)),           \
+      (__v32bf) __builtin_bit_cast(__m512bh, _mm512_setzero_ps())))
+
+#define _mm512_minmax_pd(A, B, C)                                              \
+  ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
+      (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
+      (__v8df)_mm512_undefined_pd(), (__mmask8) - 1,                           \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_minmax_pd(W, U, A, B, C)                                   \
+  ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
+      (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
+      (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_minmax_pd(U, A, B, C)                                     \
+  ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
+      (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
+      (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_minmax_round_pd(A, B, C, R)                                     \
+  ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
+      (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
+      (__v8df)_mm512_undefined_pd(), (__mmask8) - 1, (int)(R)))
+
+#define _mm512_mask_minmax_round_pd(W, U, A, B, C, R)                          \
+  ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
+      (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
+      (__v8df)(__m512d)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_minmax_round_pd(U, A, B, C, R)                            \
+  ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
+      (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
+      (__v8df)_mm512_setzero_pd(), (__mmask8)(U), (int)(R)))
+
+#define _mm512_minmax_ph(A, B, C)                                              \
+  ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
+      (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
+      (__v32hf)_mm512_undefined_ph(), (__mmask32) - 1,                         \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_minmax_ph(W, U, A, B, C)                                   \
+  ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
+      (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
+      (__v32hf)(__m512h)(W), (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_minmax_ph(U, A, B, C)                                     \
+  ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
+      (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
+      (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_minmax_round_ph(A, B, C, R)                                     \
+  ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
+      (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
+      (__v32hf)_mm512_undefined_ph(), (__mmask32) - 1, (int)(R)))
+
+#define _mm512_mask_minmax_round_ph(W, U, A, B, C, R)                          \
+  ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
+      (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
+      (__v32hf)(__m512h)(W), (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_minmax_round_ph(U, A, B, C, R)                            \
+  ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
+      (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
+      (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
+
+#define _mm512_minmax_ps(A, B, C)                                              \
+  ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
+      (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C),                    \
+      (__v16sf)_mm512_undefined_ps(), (__mmask16) - 1,                         \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_minmax_ps(W, U, A, B, C)                                   \
+  ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
+      (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C), (__v16sf)(W),      \
+      (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_minmax_ps(U, A, B, C)                                     \
+  ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
+      (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C),                    \
+      (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_minmax_round_ps(A, B, C, R)                                     \
+  ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
+      (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C),                    \
+      (__v16sf)_mm512_undefined_ps(), (__mmask16) - 1, (int)(R)))
+
+#define _mm512_mask_minmax_round_ps(W, U, A, B, C, R)                          \
+  ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
+      (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C), (__v16sf)(W),      \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_minmax_round_ps(U, A, B, C, R)                            \
+  ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
+      (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C),                    \
+      (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), (int)(R)))
+
+#define _mm_minmax_sd(A, B, C)                                                 \
+  ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
+      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
+      (__v2df)_mm_undefined_pd(), (__mmask8) - 1, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_minmax_sd(W, U, A, B, C)                                      \
+  ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
+      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
+      (__v2df)(__m128d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_minmax_sd(U, A, B, C)                                        \
+  ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
+      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
+      (__v2df)_mm_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_minmax_round_sd(A, B, C, R)                                        \
+  ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
+      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
+      (__v2df)_mm_undefined_pd(), (__mmask8) - 1, (int)(R)))
+
+#define _mm_mask_minmax_round_sd(W, U, A, B, C, R)                             \
+  ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
+      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
+      (__v2df)(__m128d)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_minmax_round_sd(U, A, B, C, R)                               \
+  ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
+      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
+      (__v2df)_mm_setzero_pd(), (__mmask8)(U), (int)(R)))
+
+#define _mm_minmax_sh(A, B, C)                                                 \
+  ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
+      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
+      (__v8hf)_mm_undefined_ph(), (__mmask8) - 1, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_minmax_sh(W, U, A, B, C)                                      \
+  ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
+      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
+      (__v8hf)(__m128h)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_minmax_sh(U, A, B, C)                                        \
+  ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
+      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
+      (__v8hf)_mm_setzero_ph(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_minmax_round_sh(A, B, C, R)                                        \
+  ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
+      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
+      (__v8hf)_mm_undefined_ph(), (__mmask8) - 1, (int)(R)))
+
+#define _mm_mask_minmax_round_sh(W, U, A, B, C, R)                             \
+  ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
+      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
+      (__v8hf)(__m128h)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_minmax_round_sh(U, A, B, C, R)                               \
+  ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
+      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
+      (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
+
+#define _mm_minmax_ss(A, B, C)                                                 \
+  ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
+      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
+      (__v4sf)_mm_undefined_ps(), (__mmask8) - 1, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_minmax_ss(W, U, A, B, C)                                      \
+  ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
+      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), (__v4sf)(W),         \
+      (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_minmax_ss(U, A, B, C)                                        \
+  ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
+      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
+      (__v4sf)_mm_setzero_ps(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_minmax_round_ss(A, B, C, R)                                        \
+  ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
+      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
+      (__v4sf)_mm_undefined_ps(), (__mmask8) - 1, (int)(R)))
+
+#define _mm_mask_minmax_round_ss(W, U, A, B, C, R)                             \
+  ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
+      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), (__v4sf)(W),         \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_minmax_round_ss(U, A, B, C, R)                               \
+  ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
+      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
+      (__v4sf)_mm_setzero_ps(), (__mmask8)(U), (int)(R)))
+
+#endif // __AVX10_2_512MINMAXINTRIN_H
diff --git a/clang/lib/Headers/avx10_2minmaxintrin.h b/clang/lib/Headers/avx10_2minmaxintrin.h
new file mode 100644
index 0000000000000..48539dd65b5b9
--- /dev/null
+++ b/clang/lib/Headers/avx10_2minmaxintrin.h
@@ -0,0 +1,188 @@
+/*===--------------- avx10_2minmaxintrin.h - AVX10_2MINMAX intrinsics
+ *-----------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <avx10_2minmaxintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AVX10_2MINMAXINTRIN_H
+#define __AVX10_2MINMAXINTRIN_H
+
+#define _mm_minmaxne_pbh(A, B, C)                                              \
+  ((__m128bh)__builtin_ia32_vminmaxnepbf16128(                                 \
+      (__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B), (int)(C)))
+
+#define _mm_mask_minmaxne_pbh(W, U, A, B, C)                                   \
+  ((__m128bh)__builtin_ia32_selectpbf_128(                                     \
+      (__mmask8)(U),                                                           \
+      (__v8bf)__builtin_ia32_vminmaxnepbf16128(                                \
+          (__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B), (int)(C)),             \
+      (__v8bf)(W)))
+
+#define _mm_maskz_minmaxne_pbh(U, A, B, C)                                     \
+  ((__m128bh)__builtin_ia32_selectpbf_128(                                     \
+      (__mmask8)(U),                                                           \
+      (__v8bf)__builtin_ia32_vminmaxnepbf16128(                                \
+          (__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B), (int)(C)),             \
+      (__v8bf) __builtin_bit_cast(__m128bh, _mm_setzero_ps())))
+
+#define _mm256_minmaxne_pbh(A, B, C)                                           \
+  ((__m256bh)__builtin_ia32_vminmaxnepbf16256(                                 \
+      (__m256bh)(__v16bf)(A), (__m256bh)(__v16bf)(B), (int)(C)))
+
+#define _mm256_mask_minmaxne_pbh(W, U, A, B, C)                                \
+  ((__m256bh)__builtin_ia32_selectpbf_256(                                     \
+      (__mmask16)(U),                                                          \
+      (__v16bf)__builtin_ia32_vminmaxnepbf16256(                               \
+          (__m256bh)(__v16bf)(A), (__m256bh)(__v16bf)(B), (int)(C)),           \
+      (__v16bf)(W)))
+
+#define _mm256_maskz_minmaxne_pbh(U, A, B, C)                                  \
+  ((__m256bh)__builtin_ia32_selectpbf_256(                                     \
+      (__mmask16)(U),                                                          \
+      (__v16bf)__builtin_ia32_vminmaxnepbf16256(                               \
+          (__m256bh)(__v16bf)(A), (__m256bh)(__v16bf)(B), (int)(C)),           \
+      (__v16bf) __builtin_bit_cast(__m256bh, _mm256_setzero_ps())))
+
+#define _mm_minmax_pd(A, B, C)                                                 \
+  ((__m128d)__builtin_ia32_vminmaxpd128_mask(                                  \
+      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
+      (__v2df)_mm_setzero_pd(), (__mmask8)(-1)))
+
+#define _mm_mask_minmax_pd(W, U, A, B, C)                                      \
+  ((__m128d)__builtin_ia32_vminmaxpd128_mask(                                  \
+      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
+      (__v2df)(__m128d)(W), (__mmask8)(U)))
+
+#define _mm_maskz_minmax_pd(U, A, B, C)                                        \
+  ((__m128d)__builtin_ia32_vminmaxpd128_mask(                                  \
+      (__v2df)(__m1...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Aug 3, 2024

@llvm/pr-subscribers-mc

Author: Freddy Ye (FreddyLeaf)

Changes

Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965


Patch is 307.16 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101598.diff

25 Files Affected:

  • (modified) clang/docs/ReleaseNotes.rst (+2)
  • (modified) clang/include/clang/Basic/BuiltinsX86.def (+16)
  • (modified) clang/lib/Headers/CMakeLists.txt (+2)
  • (added) clang/lib/Headers/avx10_2_512minmaxintrin.h (+219)
  • (added) clang/lib/Headers/avx10_2minmaxintrin.h (+188)
  • (modified) clang/lib/Headers/immintrin.h (+2)
  • (modified) clang/lib/Sema/SemaX86.cpp (+24)
  • (added) clang/test/CodeGen/X86/avx10_2_512minmax-builtins.c (+244)
  • (added) clang/test/CodeGen/X86/avx10_2_512minmax-error.c (+137)
  • (added) clang/test/CodeGen/X86/avx10_2minmax-builtins.c (+210)
  • (modified) llvm/include/llvm/IR/IntrinsicsX86.td (+66)
  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+4)
  • (modified) llvm/lib/Target/X86/X86ISelLowering.h (+5)
  • (modified) llvm/lib/Target/X86/X86InstrAVX10.td (+112)
  • (modified) llvm/lib/Target/X86/X86InstrFragmentsSIMD.td (+10)
  • (modified) llvm/lib/Target/X86/X86IntrinsicsInfo.h (+15)
  • (added) llvm/test/CodeGen/X86/avx10_2_512minmax-intrinsics.ll (+648)
  • (added) llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll (+558)
  • (added) llvm/test/MC/Disassembler/X86/avx10.2minmax-32.txt (+579)
  • (added) llvm/test/MC/Disassembler/X86/avx10.2minmax-64.txt (+579)
  • (added) llvm/test/MC/X86/avx10.2minmax-32-att.s (+578)
  • (added) llvm/test/MC/X86/avx10.2minmax-32-intel.s (+578)
  • (added) llvm/test/MC/X86/avx10.2minmax-64-att.s (+578)
  • (added) llvm/test/MC/X86/avx10.2minmax-64-intel.s (+578)
  • (modified) llvm/test/TableGen/x86-fold-tables.inc (+81)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 4c7bd099420ab..c54627586a240 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -220,6 +220,8 @@ X86 Support
   found in the file ``clang/www/builtins.py``.
 
 - Support ISA of ``AVX10.2``.
+  * Supported MINMAX intrinsics of ``*_(mask(z)))_minmax(ne)_p[s|d|h|bh]`` and
+    ``*_(mask(z)))_minmax_s[s|d|h]``.
 
 Arm and AArch64 Support
 ^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index f028711a807c0..3200e0112adce 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2022,6 +2022,22 @@ TARGET_BUILTIN(__builtin_ia32_vsm4key4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
 TARGET_BUILTIN(__builtin_ia32_vsm4rnds4128, "V4UiV4UiV4Ui", "nV:128:", "sm4")
 TARGET_BUILTIN(__builtin_ia32_vsm4rnds4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
 
+// AVX10-MINMAX
+TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16128, "V8yV8yV8yIi", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16256, "V16yV16yV16yIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16512, "V32yV32yV32yIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vminmaxpd128_mask, "V2dV2dV2dIiV2dUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminmaxpd256_round_mask, "V4dV4dV4dIiV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminmaxpd512_round_mask, "V8dV8dV8dIiV8dUcIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vminmaxph128_mask, "V8xV8xV8xIiV8xUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminmaxph256_round_mask, "V16xV16xV16xIiV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminmaxph512_round_mask, "V32xV32xV32xIiV32xUiIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vminmaxps128_mask, "V4fV4fV4fIiV4fUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminmaxps256_round_mask, "V8fV8fV8fIiV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminmaxps512_round_mask, "V16fV16fV16fIiV16fUsIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vminmaxsd_round_mask, "V2dV2dV2dIiV2dUcIi", "nV:128:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vminmaxsh_round_mask, "V8xV8xV8xIiV8xUcIi", "nV:128:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vminmaxss_round_mask, "V4fV4fV4fIiV4fUcIi", "nV:128:", "avx10.2-512")
 #undef BUILTIN
 #undef TARGET_BUILTIN
 #undef TARGET_HEADER_BUILTIN
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index b17ab24d625a0..f3d19e38f8f2b 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -147,7 +147,9 @@ set(x86_files
   amxcomplexintrin.h
   amxfp16intrin.h
   amxintrin.h
+  avx10_2_512minmaxintrin.h
   avx10_2_512niintrin.h
+  avx10_2minmaxintrin.h
   avx10_2niintrin.h
   avx2intrin.h
   avx512bf16intrin.h
diff --git a/clang/lib/Headers/avx10_2_512minmaxintrin.h b/clang/lib/Headers/avx10_2_512minmaxintrin.h
new file mode 100644
index 0000000000000..ee486cb24f3d9
--- /dev/null
+++ b/clang/lib/Headers/avx10_2_512minmaxintrin.h
@@ -0,0 +1,219 @@
+/*===--------------- avx10_2_512minmaxintrin.h - AVX10_2_512MINMAX intrinsics
+ *-----------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <avx10_2_512minmaxintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AVX10_2_512MINMAXINTRIN_H
+#define __AVX10_2_512MINMAXINTRIN_H
+
+#define _mm512_minmaxne_pbh(A, B, C)                                           \
+  ((__m512bh)__builtin_ia32_vminmaxnepbf16512(                                 \
+      (__v32bf)(__m512bh)(A), (__v32bf)(__m512bh)(A), (int)(C)))
+
+#define _mm512_mask_minmaxne_pbh(W, U, A, B, C)                                \
+  ((__m512bh)__builtin_ia32_selectpbf_512(                                     \
+      (__mmask32)(U),                                                          \
+      (__v32bf)__builtin_ia32_vminmaxnepbf16512(                               \
+          (__v32bf)(__m512bh)(A), (__v32bf)(__m512bh)(B), (int)(C)),           \
+      (__v32bf)(__m512bh)(W)))
+
+#define _mm512_maskz_minmaxne_pbh(U, A, B, C)                                  \
+  ((__m512bh)__builtin_ia32_selectpbf_512(                                     \
+      (__mmask32)(U),                                                          \
+      (__v32bf)__builtin_ia32_vminmaxnepbf16512(                               \
+          (__v32bf)(__m512bh)(A), (__v32bf)(__m512bh)(B), (int)(C)),           \
+      (__v32bf) __builtin_bit_cast(__m512bh, _mm512_setzero_ps())))
+
+#define _mm512_minmax_pd(A, B, C)                                              \
+  ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
+      (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
+      (__v8df)_mm512_undefined_pd(), (__mmask8) - 1,                           \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_minmax_pd(W, U, A, B, C)                                   \
+  ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
+      (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
+      (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_minmax_pd(U, A, B, C)                                     \
+  ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
+      (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
+      (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_minmax_round_pd(A, B, C, R)                                     \
+  ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
+      (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
+      (__v8df)_mm512_undefined_pd(), (__mmask8) - 1, (int)(R)))
+
+#define _mm512_mask_minmax_round_pd(W, U, A, B, C, R)                          \
+  ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
+      (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
+      (__v8df)(__m512d)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_minmax_round_pd(U, A, B, C, R)                            \
+  ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
+      (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
+      (__v8df)_mm512_setzero_pd(), (__mmask8)(U), (int)(R)))
+
+#define _mm512_minmax_ph(A, B, C)                                              \
+  ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
+      (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
+      (__v32hf)_mm512_undefined_ph(), (__mmask32) - 1,                         \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_minmax_ph(W, U, A, B, C)                                   \
+  ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
+      (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
+      (__v32hf)(__m512h)(W), (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_minmax_ph(U, A, B, C)                                     \
+  ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
+      (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
+      (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_minmax_round_ph(A, B, C, R)                                     \
+  ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
+      (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
+      (__v32hf)_mm512_undefined_ph(), (__mmask32) - 1, (int)(R)))
+
+#define _mm512_mask_minmax_round_ph(W, U, A, B, C, R)                          \
+  ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
+      (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
+      (__v32hf)(__m512h)(W), (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_minmax_round_ph(U, A, B, C, R)                            \
+  ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
+      (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
+      (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
+
+#define _mm512_minmax_ps(A, B, C)                                              \
+  ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
+      (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C),                    \
+      (__v16sf)_mm512_undefined_ps(), (__mmask16) - 1,                         \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_minmax_ps(W, U, A, B, C)                                   \
+  ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
+      (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C), (__v16sf)(W),      \
+      (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_minmax_ps(U, A, B, C)                                     \
+  ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
+      (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C),                    \
+      (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_minmax_round_ps(A, B, C, R)                                     \
+  ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
+      (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C),                    \
+      (__v16sf)_mm512_undefined_ps(), (__mmask16) - 1, (int)(R)))
+
+#define _mm512_mask_minmax_round_ps(W, U, A, B, C, R)                          \
+  ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
+      (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C), (__v16sf)(W),      \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_minmax_round_ps(U, A, B, C, R)                            \
+  ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
+      (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C),                    \
+      (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), (int)(R)))
+
+#define _mm_minmax_sd(A, B, C)                                                 \
+  ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
+      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
+      (__v2df)_mm_undefined_pd(), (__mmask8) - 1, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_minmax_sd(W, U, A, B, C)                                      \
+  ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
+      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
+      (__v2df)(__m128d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_minmax_sd(U, A, B, C)                                        \
+  ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
+      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
+      (__v2df)_mm_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_minmax_round_sd(A, B, C, R)                                        \
+  ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
+      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
+      (__v2df)_mm_undefined_pd(), (__mmask8) - 1, (int)(R)))
+
+#define _mm_mask_minmax_round_sd(W, U, A, B, C, R)                             \
+  ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
+      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
+      (__v2df)(__m128d)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_minmax_round_sd(U, A, B, C, R)                               \
+  ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
+      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
+      (__v2df)_mm_setzero_pd(), (__mmask8)(U), (int)(R)))
+
+#define _mm_minmax_sh(A, B, C)                                                 \
+  ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
+      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
+      (__v8hf)_mm_undefined_ph(), (__mmask8) - 1, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_minmax_sh(W, U, A, B, C)                                      \
+  ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
+      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
+      (__v8hf)(__m128h)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_minmax_sh(U, A, B, C)                                        \
+  ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
+      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
+      (__v8hf)_mm_setzero_ph(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_minmax_round_sh(A, B, C, R)                                        \
+  ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
+      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
+      (__v8hf)_mm_undefined_ph(), (__mmask8) - 1, (int)(R)))
+
+#define _mm_mask_minmax_round_sh(W, U, A, B, C, R)                             \
+  ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
+      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
+      (__v8hf)(__m128h)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_minmax_round_sh(U, A, B, C, R)                               \
+  ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
+      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
+      (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
+
+#define _mm_minmax_ss(A, B, C)                                                 \
+  ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
+      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
+      (__v4sf)_mm_undefined_ps(), (__mmask8) - 1, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_minmax_ss(W, U, A, B, C)                                      \
+  ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
+      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), (__v4sf)(W),         \
+      (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_minmax_ss(U, A, B, C)                                        \
+  ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
+      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
+      (__v4sf)_mm_setzero_ps(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_minmax_round_ss(A, B, C, R)                                        \
+  ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
+      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
+      (__v4sf)_mm_undefined_ps(), (__mmask8) - 1, (int)(R)))
+
+#define _mm_mask_minmax_round_ss(W, U, A, B, C, R)                             \
+  ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
+      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), (__v4sf)(W),         \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_minmax_round_ss(U, A, B, C, R)                               \
+  ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
+      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
+      (__v4sf)_mm_setzero_ps(), (__mmask8)(U), (int)(R)))
+
+#endif // __AVX10_2_512MINMAXINTRIN_H
diff --git a/clang/lib/Headers/avx10_2minmaxintrin.h b/clang/lib/Headers/avx10_2minmaxintrin.h
new file mode 100644
index 0000000000000..48539dd65b5b9
--- /dev/null
+++ b/clang/lib/Headers/avx10_2minmaxintrin.h
@@ -0,0 +1,188 @@
+/*===--------------- avx10_2minmaxintrin.h - AVX10_2MINMAX intrinsics
+ *-----------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <avx10_2minmaxintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AVX10_2MINMAXINTRIN_H
+#define __AVX10_2MINMAXINTRIN_H
+
+#define _mm_minmaxne_pbh(A, B, C)                                              \
+  ((__m128bh)__builtin_ia32_vminmaxnepbf16128(                                 \
+      (__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B), (int)(C)))
+
+#define _mm_mask_minmaxne_pbh(W, U, A, B, C)                                   \
+  ((__m128bh)__builtin_ia32_selectpbf_128(                                     \
+      (__mmask8)(U),                                                           \
+      (__v8bf)__builtin_ia32_vminmaxnepbf16128(                                \
+          (__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B), (int)(C)),             \
+      (__v8bf)(W)))
+
+#define _mm_maskz_minmaxne_pbh(U, A, B, C)                                     \
+  ((__m128bh)__builtin_ia32_selectpbf_128(                                     \
+      (__mmask8)(U),                                                           \
+      (__v8bf)__builtin_ia32_vminmaxnepbf16128(                                \
+          (__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B), (int)(C)),             \
+      (__v8bf) __builtin_bit_cast(__m128bh, _mm_setzero_ps())))
+
+#define _mm256_minmaxne_pbh(A, B, C)                                           \
+  ((__m256bh)__builtin_ia32_vminmaxnepbf16256(                                 \
+      (__m256bh)(__v16bf)(A), (__m256bh)(__v16bf)(B), (int)(C)))
+
+#define _mm256_mask_minmaxne_pbh(W, U, A, B, C)                                \
+  ((__m256bh)__builtin_ia32_selectpbf_256(                                     \
+      (__mmask16)(U),                                                          \
+      (__v16bf)__builtin_ia32_vminmaxnepbf16256(                               \
+          (__m256bh)(__v16bf)(A), (__m256bh)(__v16bf)(B), (int)(C)),           \
+      (__v16bf)(W)))
+
+#define _mm256_maskz_minmaxne_pbh(U, A, B, C)                                  \
+  ((__m256bh)__builtin_ia32_selectpbf_256(                                     \
+      (__mmask16)(U),                                                          \
+      (__v16bf)__builtin_ia32_vminmaxnepbf16256(                               \
+          (__m256bh)(__v16bf)(A), (__m256bh)(__v16bf)(B), (int)(C)),           \
+      (__v16bf) __builtin_bit_cast(__m256bh, _mm256_setzero_ps())))
+
+#define _mm_minmax_pd(A, B, C)                                                 \
+  ((__m128d)__builtin_ia32_vminmaxpd128_mask(                                  \
+      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
+      (__v2df)_mm_setzero_pd(), (__mmask8)(-1)))
+
+#define _mm_mask_minmax_pd(W, U, A, B, C)                                      \
+  ((__m128d)__builtin_ia32_vminmaxpd128_mask(                                  \
+      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
+      (__v2df)(__m128d)(W), (__mmask8)(U)))
+
+#define _mm_maskz_minmax_pd(U, A, B, C)                                        \
+  ((__m128d)__builtin_ia32_vminmaxpd128_mask(                                  \
+      (__v2df)(__m1...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Aug 3, 2024

@llvm/pr-subscribers-llvm-ir

Author: Freddy Ye (FreddyLeaf)

Changes

Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965


Patch is 307.16 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101598.diff

25 Files Affected:

  • (modified) clang/docs/ReleaseNotes.rst (+2)
  • (modified) clang/include/clang/Basic/BuiltinsX86.def (+16)
  • (modified) clang/lib/Headers/CMakeLists.txt (+2)
  • (added) clang/lib/Headers/avx10_2_512minmaxintrin.h (+219)
  • (added) clang/lib/Headers/avx10_2minmaxintrin.h (+188)
  • (modified) clang/lib/Headers/immintrin.h (+2)
  • (modified) clang/lib/Sema/SemaX86.cpp (+24)
  • (added) clang/test/CodeGen/X86/avx10_2_512minmax-builtins.c (+244)
  • (added) clang/test/CodeGen/X86/avx10_2_512minmax-error.c (+137)
  • (added) clang/test/CodeGen/X86/avx10_2minmax-builtins.c (+210)
  • (modified) llvm/include/llvm/IR/IntrinsicsX86.td (+66)
  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+4)
  • (modified) llvm/lib/Target/X86/X86ISelLowering.h (+5)
  • (modified) llvm/lib/Target/X86/X86InstrAVX10.td (+112)
  • (modified) llvm/lib/Target/X86/X86InstrFragmentsSIMD.td (+10)
  • (modified) llvm/lib/Target/X86/X86IntrinsicsInfo.h (+15)
  • (added) llvm/test/CodeGen/X86/avx10_2_512minmax-intrinsics.ll (+648)
  • (added) llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll (+558)
  • (added) llvm/test/MC/Disassembler/X86/avx10.2minmax-32.txt (+579)
  • (added) llvm/test/MC/Disassembler/X86/avx10.2minmax-64.txt (+579)
  • (added) llvm/test/MC/X86/avx10.2minmax-32-att.s (+578)
  • (added) llvm/test/MC/X86/avx10.2minmax-32-intel.s (+578)
  • (added) llvm/test/MC/X86/avx10.2minmax-64-att.s (+578)
  • (added) llvm/test/MC/X86/avx10.2minmax-64-intel.s (+578)
  • (modified) llvm/test/TableGen/x86-fold-tables.inc (+81)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 4c7bd099420ab..c54627586a240 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -220,6 +220,8 @@ X86 Support
   found in the file ``clang/www/builtins.py``.
 
 - Support ISA of ``AVX10.2``.
+  * Supported MINMAX intrinsics of ``*_(mask(z)))_minmax(ne)_p[s|d|h|bh]`` and
+    ``*_(mask(z)))_minmax_s[s|d|h]``.
 
 Arm and AArch64 Support
 ^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index f028711a807c0..3200e0112adce 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2022,6 +2022,22 @@ TARGET_BUILTIN(__builtin_ia32_vsm4key4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
 TARGET_BUILTIN(__builtin_ia32_vsm4rnds4128, "V4UiV4UiV4Ui", "nV:128:", "sm4")
 TARGET_BUILTIN(__builtin_ia32_vsm4rnds4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
 
+// AVX10-MINMAX
+TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16128, "V8yV8yV8yIi", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16256, "V16yV16yV16yIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16512, "V32yV32yV32yIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vminmaxpd128_mask, "V2dV2dV2dIiV2dUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminmaxpd256_round_mask, "V4dV4dV4dIiV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminmaxpd512_round_mask, "V8dV8dV8dIiV8dUcIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vminmaxph128_mask, "V8xV8xV8xIiV8xUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminmaxph256_round_mask, "V16xV16xV16xIiV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminmaxph512_round_mask, "V32xV32xV32xIiV32xUiIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vminmaxps128_mask, "V4fV4fV4fIiV4fUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminmaxps256_round_mask, "V8fV8fV8fIiV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminmaxps512_round_mask, "V16fV16fV16fIiV16fUsIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vminmaxsd_round_mask, "V2dV2dV2dIiV2dUcIi", "nV:128:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vminmaxsh_round_mask, "V8xV8xV8xIiV8xUcIi", "nV:128:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vminmaxss_round_mask, "V4fV4fV4fIiV4fUcIi", "nV:128:", "avx10.2-512")
 #undef BUILTIN
 #undef TARGET_BUILTIN
 #undef TARGET_HEADER_BUILTIN
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index b17ab24d625a0..f3d19e38f8f2b 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -147,7 +147,9 @@ set(x86_files
   amxcomplexintrin.h
   amxfp16intrin.h
   amxintrin.h
+  avx10_2_512minmaxintrin.h
   avx10_2_512niintrin.h
+  avx10_2minmaxintrin.h
   avx10_2niintrin.h
   avx2intrin.h
   avx512bf16intrin.h
diff --git a/clang/lib/Headers/avx10_2_512minmaxintrin.h b/clang/lib/Headers/avx10_2_512minmaxintrin.h
new file mode 100644
index 0000000000000..ee486cb24f3d9
--- /dev/null
+++ b/clang/lib/Headers/avx10_2_512minmaxintrin.h
@@ -0,0 +1,219 @@
+/*===--------------- avx10_2_512minmaxintrin.h - AVX10_2_512MINMAX intrinsics
+ *-----------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <avx10_2_512minmaxintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AVX10_2_512MINMAXINTRIN_H
+#define __AVX10_2_512MINMAXINTRIN_H
+
+#define _mm512_minmaxne_pbh(A, B, C)                                           \
+  ((__m512bh)__builtin_ia32_vminmaxnepbf16512(                                 \
+      (__v32bf)(__m512bh)(A), (__v32bf)(__m512bh)(A), (int)(C)))
+
+#define _mm512_mask_minmaxne_pbh(W, U, A, B, C)                                \
+  ((__m512bh)__builtin_ia32_selectpbf_512(                                     \
+      (__mmask32)(U),                                                          \
+      (__v32bf)__builtin_ia32_vminmaxnepbf16512(                               \
+          (__v32bf)(__m512bh)(A), (__v32bf)(__m512bh)(B), (int)(C)),           \
+      (__v32bf)(__m512bh)(W)))
+
+#define _mm512_maskz_minmaxne_pbh(U, A, B, C)                                  \
+  ((__m512bh)__builtin_ia32_selectpbf_512(                                     \
+      (__mmask32)(U),                                                          \
+      (__v32bf)__builtin_ia32_vminmaxnepbf16512(                               \
+          (__v32bf)(__m512bh)(A), (__v32bf)(__m512bh)(B), (int)(C)),           \
+      (__v32bf) __builtin_bit_cast(__m512bh, _mm512_setzero_ps())))
+
+#define _mm512_minmax_pd(A, B, C)                                              \
+  ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
+      (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
+      (__v8df)_mm512_undefined_pd(), (__mmask8) - 1,                           \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_minmax_pd(W, U, A, B, C)                                   \
+  ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
+      (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
+      (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_minmax_pd(U, A, B, C)                                     \
+  ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
+      (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
+      (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_minmax_round_pd(A, B, C, R)                                     \
+  ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
+      (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
+      (__v8df)_mm512_undefined_pd(), (__mmask8) - 1, (int)(R)))
+
+#define _mm512_mask_minmax_round_pd(W, U, A, B, C, R)                          \
+  ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
+      (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
+      (__v8df)(__m512d)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_minmax_round_pd(U, A, B, C, R)                            \
+  ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
+      (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
+      (__v8df)_mm512_setzero_pd(), (__mmask8)(U), (int)(R)))
+
+#define _mm512_minmax_ph(A, B, C)                                              \
+  ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
+      (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
+      (__v32hf)_mm512_undefined_ph(), (__mmask32) - 1,                         \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_minmax_ph(W, U, A, B, C)                                   \
+  ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
+      (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
+      (__v32hf)(__m512h)(W), (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_minmax_ph(U, A, B, C)                                     \
+  ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
+      (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
+      (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_minmax_round_ph(A, B, C, R)                                     \
+  ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
+      (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
+      (__v32hf)_mm512_undefined_ph(), (__mmask32) - 1, (int)(R)))
+
+#define _mm512_mask_minmax_round_ph(W, U, A, B, C, R)                          \
+  ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
+      (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
+      (__v32hf)(__m512h)(W), (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_minmax_round_ph(U, A, B, C, R)                            \
+  ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
+      (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
+      (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
+
+#define _mm512_minmax_ps(A, B, C)                                              \
+  ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
+      (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C),                    \
+      (__v16sf)_mm512_undefined_ps(), (__mmask16) - 1,                         \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_minmax_ps(W, U, A, B, C)                                   \
+  ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
+      (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C), (__v16sf)(W),      \
+      (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_minmax_ps(U, A, B, C)                                     \
+  ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
+      (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C),                    \
+      (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_minmax_round_ps(A, B, C, R)                                     \
+  ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
+      (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C),                    \
+      (__v16sf)_mm512_undefined_ps(), (__mmask16) - 1, (int)(R)))
+
+#define _mm512_mask_minmax_round_ps(W, U, A, B, C, R)                          \
+  ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
+      (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C), (__v16sf)(W),      \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_minmax_round_ps(U, A, B, C, R)                            \
+  ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
+      (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C),                    \
+      (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), (int)(R)))
+
+#define _mm_minmax_sd(A, B, C)                                                 \
+  ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
+      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
+      (__v2df)_mm_undefined_pd(), (__mmask8) - 1, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_minmax_sd(W, U, A, B, C)                                      \
+  ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
+      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
+      (__v2df)(__m128d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_minmax_sd(U, A, B, C)                                        \
+  ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
+      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
+      (__v2df)_mm_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_minmax_round_sd(A, B, C, R)                                        \
+  ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
+      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
+      (__v2df)_mm_undefined_pd(), (__mmask8) - 1, (int)(R)))
+
+#define _mm_mask_minmax_round_sd(W, U, A, B, C, R)                             \
+  ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
+      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
+      (__v2df)(__m128d)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_minmax_round_sd(U, A, B, C, R)                               \
+  ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
+      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
+      (__v2df)_mm_setzero_pd(), (__mmask8)(U), (int)(R)))
+
+#define _mm_minmax_sh(A, B, C)                                                 \
+  ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
+      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
+      (__v8hf)_mm_undefined_ph(), (__mmask8) - 1, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_minmax_sh(W, U, A, B, C)                                      \
+  ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
+      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
+      (__v8hf)(__m128h)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_minmax_sh(U, A, B, C)                                        \
+  ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
+      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
+      (__v8hf)_mm_setzero_ph(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_minmax_round_sh(A, B, C, R)                                        \
+  ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
+      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
+      (__v8hf)_mm_undefined_ph(), (__mmask8) - 1, (int)(R)))
+
+#define _mm_mask_minmax_round_sh(W, U, A, B, C, R)                             \
+  ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
+      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
+      (__v8hf)(__m128h)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_minmax_round_sh(U, A, B, C, R)                               \
+  ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
+      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
+      (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
+
+#define _mm_minmax_ss(A, B, C)                                                 \
+  ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
+      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
+      (__v4sf)_mm_undefined_ps(), (__mmask8) - 1, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_minmax_ss(W, U, A, B, C)                                      \
+  ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
+      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), (__v4sf)(W),         \
+      (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_minmax_ss(U, A, B, C)                                        \
+  ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
+      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
+      (__v4sf)_mm_setzero_ps(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_minmax_round_ss(A, B, C, R)                                        \
+  ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
+      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
+      (__v4sf)_mm_undefined_ps(), (__mmask8) - 1, (int)(R)))
+
+#define _mm_mask_minmax_round_ss(W, U, A, B, C, R)                             \
+  ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
+      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), (__v4sf)(W),         \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_minmax_round_ss(U, A, B, C, R)                               \
+  ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
+      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
+      (__v4sf)_mm_setzero_ps(), (__mmask8)(U), (int)(R)))
+
+#endif // __AVX10_2_512MINMAXINTRIN_H
diff --git a/clang/lib/Headers/avx10_2minmaxintrin.h b/clang/lib/Headers/avx10_2minmaxintrin.h
new file mode 100644
index 0000000000000..48539dd65b5b9
--- /dev/null
+++ b/clang/lib/Headers/avx10_2minmaxintrin.h
@@ -0,0 +1,188 @@
+/*===--------------- avx10_2minmaxintrin.h - AVX10_2MINMAX intrinsics
+ *-----------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <avx10_2minmaxintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AVX10_2MINMAXINTRIN_H
+#define __AVX10_2MINMAXINTRIN_H
+
+#define _mm_minmaxne_pbh(A, B, C)                                              \
+  ((__m128bh)__builtin_ia32_vminmaxnepbf16128(                                 \
+      (__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B), (int)(C)))
+
+#define _mm_mask_minmaxne_pbh(W, U, A, B, C)                                   \
+  ((__m128bh)__builtin_ia32_selectpbf_128(                                     \
+      (__mmask8)(U),                                                           \
+      (__v8bf)__builtin_ia32_vminmaxnepbf16128(                                \
+          (__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B), (int)(C)),             \
+      (__v8bf)(W)))
+
+#define _mm_maskz_minmaxne_pbh(U, A, B, C)                                     \
+  ((__m128bh)__builtin_ia32_selectpbf_128(                                     \
+      (__mmask8)(U),                                                           \
+      (__v8bf)__builtin_ia32_vminmaxnepbf16128(                                \
+          (__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B), (int)(C)),             \
+      (__v8bf) __builtin_bit_cast(__m128bh, _mm_setzero_ps())))
+
+#define _mm256_minmaxne_pbh(A, B, C)                                           \
+  ((__m256bh)__builtin_ia32_vminmaxnepbf16256(                                 \
+      (__m256bh)(__v16bf)(A), (__m256bh)(__v16bf)(B), (int)(C)))
+
+#define _mm256_mask_minmaxne_pbh(W, U, A, B, C)                                \
+  ((__m256bh)__builtin_ia32_selectpbf_256(                                     \
+      (__mmask16)(U),                                                          \
+      (__v16bf)__builtin_ia32_vminmaxnepbf16256(                               \
+          (__m256bh)(__v16bf)(A), (__m256bh)(__v16bf)(B), (int)(C)),           \
+      (__v16bf)(W)))
+
+#define _mm256_maskz_minmaxne_pbh(U, A, B, C)                                  \
+  ((__m256bh)__builtin_ia32_selectpbf_256(                                     \
+      (__mmask16)(U),                                                          \
+      (__v16bf)__builtin_ia32_vminmaxnepbf16256(                               \
+          (__m256bh)(__v16bf)(A), (__m256bh)(__v16bf)(B), (int)(C)),           \
+      (__v16bf) __builtin_bit_cast(__m256bh, _mm256_setzero_ps())))
+
+#define _mm_minmax_pd(A, B, C)                                                 \
+  ((__m128d)__builtin_ia32_vminmaxpd128_mask(                                  \
+      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
+      (__v2df)_mm_setzero_pd(), (__mmask8)(-1)))
+
+#define _mm_mask_minmax_pd(W, U, A, B, C)                                      \
+  ((__m128d)__builtin_ia32_vminmaxpd128_mask(                                  \
+      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
+      (__v2df)(__m128d)(W), (__mmask8)(U)))
+
+#define _mm_maskz_minmax_pd(U, A, B, C)                                        \
+  ((__m128d)__builtin_ia32_vminmaxpd128_mask(                                  \
+      (__v2df)(__m1...
[truncated]

Copy link
Contributor

@phoebewang phoebewang left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@FreddyLeaf FreddyLeaf merged commit 3d5cc7e into llvm:main Aug 5, 2024
7 of 8 checks passed
@FreddyLeaf FreddyLeaf deleted the avx10-minmax branch August 5, 2024 03:06
Copy link
Contributor

@KanRobert KanRobert left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:X86 clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics clang Clang issues not falling into any other category llvm:ir mc Machine (object) code
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants