-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[X86][AVX10.2] Remove YMM rounding from VMINMAXP[H,S,D] #132405
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-x86 Author: Phoebe Wang (phoebewang) ChangesRef: https://cdrdv2.intel.com/v1/dl/getContent/784343 Patch is 55.76 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132405.diff 15 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index ea0d6df4a33c2..adb174a9fc62d 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -4823,7 +4823,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
}
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
- def vminmaxpd256_round_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int, _Vector<4, double>, unsigned char, _Constant int)">;
+ def vminmaxpd256_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int, _Vector<4, double>, unsigned char)">;
}
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4835,7 +4835,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
}
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
- def vminmaxph256_round_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Constant int, _Vector<16, _Float16>, unsigned short, _Constant int)">;
+ def vminmaxph256_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Constant int, _Vector<16, _Float16>, unsigned short)">;
}
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4847,7 +4847,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
}
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
- def vminmaxps256_round_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int, _Vector<8, float>, unsigned char, _Constant int)">;
+ def vminmaxps256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int, _Vector<8, float>, unsigned char)">;
}
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
diff --git a/clang/lib/Headers/avx10_2minmaxintrin.h b/clang/lib/Headers/avx10_2minmaxintrin.h
index 8164d49d89f1f..809a01b04f13e 100644
--- a/clang/lib/Headers/avx10_2minmaxintrin.h
+++ b/clang/lib/Headers/avx10_2minmaxintrin.h
@@ -66,34 +66,19 @@
(__v2df)_mm_setzero_pd(), (__mmask8)(U)))
#define _mm256_minmax_pd(A, B, C) \
- ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \
+ ((__m256d)__builtin_ia32_vminmaxpd256_mask( \
(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \
- (__v4df)_mm256_setzero_pd(), (__mmask8)-1, _MM_FROUND_NO_EXC))
+ (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
#define _mm256_mask_minmax_pd(W, U, A, B, C) \
- ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \
+ ((__m256d)__builtin_ia32_vminmaxpd256_mask( \
(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \
- (__v4df)(__m256d)(W), (__mmask8)(U), _MM_FROUND_NO_EXC))
+ (__v4df)(__m256d)(W), (__mmask8)(U)))
#define _mm256_maskz_minmax_pd(U, A, B, C) \
- ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \
+ ((__m256d)__builtin_ia32_vminmaxpd256_mask( \
(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \
- (__v4df)_mm256_setzero_pd(), (__mmask8)(U), _MM_FROUND_NO_EXC))
-
-#define _mm256_minmax_round_pd(A, B, C, R) \
- ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \
- (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \
- (__v4df)_mm256_undefined_pd(), (__mmask8)-1, (int)(R)))
-
-#define _mm256_mask_minmax_round_pd(W, U, A, B, C, R) \
- ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \
- (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \
- (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R)))
-
-#define _mm256_maskz_minmax_round_pd(U, A, B, C, R) \
- ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \
- (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \
- (__v4df)_mm256_setzero_pd(), (__mmask8)(U), (int)(R)))
+ (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
#define _mm_minmax_ph(A, B, C) \
((__m128h)__builtin_ia32_vminmaxph128_mask( \
@@ -111,34 +96,19 @@
(__v8hf)_mm_setzero_ph(), (__mmask8)(U)))
#define _mm256_minmax_ph(A, B, C) \
- ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \
+ ((__m256h)__builtin_ia32_vminmaxph256_mask( \
(__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \
- (__v16hf)_mm256_setzero_ph(), (__mmask16)-1, _MM_FROUND_NO_EXC))
+ (__v16hf)_mm256_setzero_ph(), (__mmask16)-1))
#define _mm256_mask_minmax_ph(W, U, A, B, C) \
- ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \
+ ((__m256h)__builtin_ia32_vminmaxph256_mask( \
(__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \
- (__v16hf)(__m256h)(W), (__mmask16)(U), _MM_FROUND_NO_EXC))
+ (__v16hf)(__m256h)(W), (__mmask16)(U)))
#define _mm256_maskz_minmax_ph(U, A, B, C) \
- ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \
- (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \
- (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), _MM_FROUND_NO_EXC))
-
-#define _mm256_minmax_round_ph(A, B, C, R) \
- ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \
+ ((__m256h)__builtin_ia32_vminmaxph256_mask( \
(__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \
- (__v16hf)_mm256_undefined_ph(), (__mmask16)-1, (int)(R)))
-
-#define _mm256_mask_minmax_round_ph(W, U, A, B, C, R) \
- ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \
- (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (C), \
- (__v16hf)(__m256h)(W), (__mmask16)(U), (int)(R)))
-
-#define _mm256_maskz_minmax_round_ph(U, A, B, C, R) \
- ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \
- (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \
- (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
+ (__v16hf)_mm256_setzero_ph(), (__mmask16)(U)))
#define _mm_minmax_ps(A, B, C) \
((__m128)__builtin_ia32_vminmaxps128_mask( \
@@ -156,34 +126,19 @@
(__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
#define _mm256_minmax_ps(A, B, C) \
- ((__m256)__builtin_ia32_vminmaxps256_round_mask( \
+ ((__m256)__builtin_ia32_vminmaxps256_mask( \
(__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \
- (__v8sf)_mm256_setzero_ps(), (__mmask8)-1, _MM_FROUND_NO_EXC))
+ (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
#define _mm256_mask_minmax_ps(W, U, A, B, C) \
- ((__m256)__builtin_ia32_vminmaxps256_round_mask( \
+ ((__m256)__builtin_ia32_vminmaxps256_mask( \
(__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), (__v8sf)(__m256)(W), \
- (__mmask8)(U), _MM_FROUND_NO_EXC))
+ (__mmask8)(U)))
#define _mm256_maskz_minmax_ps(U, A, B, C) \
- ((__m256)__builtin_ia32_vminmaxps256_round_mask( \
- (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \
- (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), _MM_FROUND_NO_EXC))
-
-#define _mm256_minmax_round_ps(A, B, C, R) \
- ((__m256)__builtin_ia32_vminmaxps256_round_mask( \
- (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \
- (__v8sf)_mm256_undefined_ps(), (__mmask8)-1, (int)(R)))
-
-#define _mm256_mask_minmax_round_ps(W, U, A, B, C, R) \
- ((__m256)__builtin_ia32_vminmaxps256_round_mask( \
- (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), (__v8sf)(__m256)(W), \
- (__mmask8)(U), (int)(R)))
-
-#define _mm256_maskz_minmax_round_ps(U, A, B, C, R) \
- ((__m256)__builtin_ia32_vminmaxps256_round_mask( \
+ ((__m256)__builtin_ia32_vminmaxps256_mask( \
(__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \
- (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), (int)(R)))
+ (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
#define _mm_minmax_sd(A, B, C) \
((__m128d)__builtin_ia32_vminmaxsd_round_mask( \
diff --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp
index e54a278225f1c..27454e2f03b5a 100644
--- a/clang/lib/Sema/SemaX86.cpp
+++ b/clang/lib/Sema/SemaX86.cpp
@@ -147,9 +147,6 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_rndscalesd_round_mask:
case X86::BI__builtin_ia32_rndscaless_round_mask:
case X86::BI__builtin_ia32_rndscalesh_round_mask:
- case X86::BI__builtin_ia32_vminmaxpd256_round_mask:
- case X86::BI__builtin_ia32_vminmaxps256_round_mask:
- case X86::BI__builtin_ia32_vminmaxph256_round_mask:
case X86::BI__builtin_ia32_vminmaxpd512_round_mask:
case X86::BI__builtin_ia32_vminmaxps512_round_mask:
case X86::BI__builtin_ia32_vminmaxph512_round_mask:
@@ -910,11 +907,11 @@ bool SemaX86::CheckBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
case X86::BI__builtin_ia32_vminmaxbf16256:
case X86::BI__builtin_ia32_vminmaxbf16512:
case X86::BI__builtin_ia32_vminmaxpd128_mask:
- case X86::BI__builtin_ia32_vminmaxpd256_round_mask:
+ case X86::BI__builtin_ia32_vminmaxpd256_mask:
case X86::BI__builtin_ia32_vminmaxph128_mask:
- case X86::BI__builtin_ia32_vminmaxph256_round_mask:
+ case X86::BI__builtin_ia32_vminmaxph256_mask:
case X86::BI__builtin_ia32_vminmaxps128_mask:
- case X86::BI__builtin_ia32_vminmaxps256_round_mask:
+ case X86::BI__builtin_ia32_vminmaxps256_mask:
case X86::BI__builtin_ia32_vminmaxpd512_round_mask:
case X86::BI__builtin_ia32_vminmaxps512_round_mask:
case X86::BI__builtin_ia32_vminmaxph512_round_mask:
diff --git a/clang/test/CodeGen/X86/avx10_2_512minmax-error.c b/clang/test/CodeGen/X86/avx10_2_512minmax-error.c
index 6db7801eb0040..2ee496d317a5a 100644
--- a/clang/test/CodeGen/X86/avx10_2_512minmax-error.c
+++ b/clang/test/CodeGen/X86/avx10_2_512minmax-error.c
@@ -113,17 +113,6 @@ __m512 test_mm512_minmax_round_ps(__m512 __A, __m512 __B) {
return _mm512_minmax_round_ps(__A, __B, 127, 11); // expected-error {{invalid rounding argument}}
}
-__m256d test_mm256_minmax_round_pd(__m256d __A, __m256d __B) {
- return _mm256_minmax_round_pd(__A, __B, 127, 11); // expected-error {{invalid rounding argument}}
-}
-
-__m256h test_mm256_minmax_round_ph(__m256h __A, __m256h __B) {
- return _mm256_minmax_round_ph(__A, __B, 127, 11); // expected-error {{invalid rounding argument}}
-}
-
-__m256 test_mm256_minmax_round_ps(__m256 __A, __m256 __B) {
- return _mm256_minmax_round_ps(__A, __B, 127, 11); // expected-error {{invalid rounding argument}}
-}
__m128d test_mm_minmax_round_sd(__m128d __A, __m128d __B) {
return _mm_minmax_round_sd(__A, __B, 127, 11); // expected-error {{invalid rounding argument}}
}
diff --git a/clang/test/CodeGen/X86/avx10_2minmax-builtins.c b/clang/test/CodeGen/X86/avx10_2minmax-builtins.c
index 7e21858c71834..7dad153a15c37 100644
--- a/clang/test/CodeGen/X86/avx10_2minmax-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2minmax-builtins.c
@@ -67,40 +67,22 @@ __m128d test_mm_maskz_minmax_pd(__mmask8 __A, __m128d __B, __m128d __C) {
__m256d test_mm256_minmax_pd(__m256d __A, __m256d __B) {
// CHECK-LABEL: @test_mm256_minmax_pd(
- // CHECK: call <4 x double> @llvm.x86.avx10.mask.vminmaxpd256.round(
+ // CHECK: call <4 x double> @llvm.x86.avx10.mask.vminmaxpd256(
return _mm256_minmax_pd(__A, __B, 127);
}
__m256d test_mm256_mask_minmax_pd(__m256d __A, __mmask8 __B, __m256d __C, __m256d __D) {
// CHECK-LABEL: @test_mm256_mask_minmax_pd(
- // CHECK: call <4 x double> @llvm.x86.avx10.mask.vminmaxpd256.round(
+ // CHECK: call <4 x double> @llvm.x86.avx10.mask.vminmaxpd256(
return _mm256_mask_minmax_pd(__A, __B, __C, __D, 127);
}
__m256d test_mm256_maskz_minmax_pd(__mmask8 __A, __m256d __B, __m256d __C) {
// CHECK-LABEL: @test_mm256_maskz_minmax_pd(
- // CHECK: call <4 x double> @llvm.x86.avx10.mask.vminmaxpd256.round(
+ // CHECK: call <4 x double> @llvm.x86.avx10.mask.vminmaxpd256(
return _mm256_maskz_minmax_pd(__A, __B, __C, 127);
}
-__m256d test_mm256_minmax_round_pd(__m256d __A, __m256d __B) {
- // CHECK-LABEL: @test_mm256_minmax_round_pd(
- // CHECK: call <4 x double> @llvm.x86.avx10.mask.vminmaxpd256.round(
- return _mm256_minmax_round_pd(__A, __B, 127, _MM_FROUND_NO_EXC);
-}
-
-__m256d test_mm256_mask_minmax_round_pd(__m256d __A, __mmask8 __B, __m256d __C, __m256d __D) {
- // CHECK-LABEL: @test_mm256_mask_minmax_round_pd(
- // CHECK: call <4 x double> @llvm.x86.avx10.mask.vminmaxpd256.round(
- return _mm256_mask_minmax_round_pd(__A, __B, __C, __D, 127, _MM_FROUND_NO_EXC);
-}
-
-__m256d test_mm256_maskz_minmax_round_pd(__mmask8 __A, __m256d __B, __m256d __C) {
- // CHECK-LABEL: @test_mm256_maskz_minmax_round_pd(
- // CHECK: call <4 x double> @llvm.x86.avx10.mask.vminmaxpd256.round(
- return _mm256_maskz_minmax_round_pd(__A, __B, __C, 127, _MM_FROUND_NO_EXC);
-}
-
__m128h test_mm_minmax_ph(__m128h __A, __m128h __B) {
// CHECK-LABEL: @test_mm_minmax_ph(
// CHECK: call <8 x half> @llvm.x86.avx10.mask.vminmaxph128(
@@ -121,40 +103,22 @@ __m128h test_mm_maskz_minmax_ph(__mmask8 __A, __m128h __B, __m128h __C) {
__m256h test_mm256_minmax_ph(__m256h __A, __m256h __B) {
// CHECK-LABEL: @test_mm256_minmax_ph(
- // CHECK: call <16 x half> @llvm.x86.avx10.mask.vminmaxph256.round(
+ // CHECK: call <16 x half> @llvm.x86.avx10.mask.vminmaxph256(
return _mm256_minmax_ph(__A, __B, 127);
}
__m256h test_mm256_mask_minmax_ph(__m256h __A, __mmask16 __B, __m256h __C, __m256h __D) {
// CHECK-LABEL: @test_mm256_mask_minmax_ph(
- // CHECK: call <16 x half> @llvm.x86.avx10.mask.vminmaxph256.round(
+ // CHECK: call <16 x half> @llvm.x86.avx10.mask.vminmaxph256(
return _mm256_mask_minmax_ph(__A, __B, __C, __D, 127);
}
__m256h test_mm256_maskz_minmax_ph(__mmask16 __A, __m256h __B, __m256h __C) {
// CHECK-LABEL: @test_mm256_maskz_minmax_ph(
- // CHECK: call <16 x half> @llvm.x86.avx10.mask.vminmaxph256.round(
+ // CHECK: call <16 x half> @llvm.x86.avx10.mask.vminmaxph256(
return _mm256_maskz_minmax_ph(__A, __B, __C, 127);
}
-__m256h test_mm256_minmax_round_ph(__m256h __A, __m256h __B) {
- // CHECK-LABEL: @test_mm256_minmax_round_ph(
- // CHECK: call <16 x half> @llvm.x86.avx10.mask.vminmaxph256.round(
- return _mm256_minmax_round_ph(__A, __B, 127, _MM_FROUND_NO_EXC);
-}
-
-__m256h test_mm256_mask_minmax_round_ph(__m256h __A, __mmask16 __B, __m256h __C, __m256h __D) {
- // CHECK-LABEL: @test_mm256_mask_minmax_round_ph(
- // CHECK: call <16 x half> @llvm.x86.avx10.mask.vminmaxph256.round(
- return _mm256_mask_minmax_round_ph(__A, __B, __C, __D, 127, _MM_FROUND_NO_EXC);
-}
-
-__m256h test_mm256_maskz_minmax_round_ph(__mmask16 __A, __m256h __B, __m256h __C) {
- // CHECK-LABEL: @test_mm256_maskz_minmax_round_ph(
- // CHECK: call <16 x half> @llvm.x86.avx10.mask.vminmaxph256.round(
- return _mm256_maskz_minmax_round_ph(__A, __B, __C, 127, _MM_FROUND_NO_EXC);
-}
-
__m128 test_mm_minmax_ps(__m128 __A, __m128 __B) {
// CHECK-LABEL: @test_mm_minmax_ps(
// CHECK: call <4 x float> @llvm.x86.avx10.mask.vminmaxps128(
@@ -175,40 +139,22 @@ __m128 test_mm_maskz_minmax_ps(__mmask8 __A, __m128 __B, __m128 __C) {
__m256 test_mm256_minmax_ps(__m256 __A, __m256 __B) {
// CHECK-LABEL: @test_mm256_minmax_ps(
- // CHECK: call <8 x float> @llvm.x86.avx10.mask.vminmaxps256.round(
+ // CHECK: call <8 x float> @llvm.x86.avx10.mask.vminmaxps256(
return _mm256_minmax_ps(__A, __B, 127);
}
__m256 test_mm256_mask_minmax_ps(__m256 __A, __mmask8 __B, __m256 __C, __m256 __D) {
// CHECK-LABEL: @test_mm256_mask_minmax_ps(
- // CHECK: call <8 x float> @llvm.x86.avx10.mask.vminmaxps256.round(
+ // CHECK: call <8 x float> @llvm.x86.avx10.mask.vminmaxps256(
return _mm256_mask_minmax_ps(__A, __B, __C, __D, 127);
}
__m256 test_mm256_maskz_minmax_ps(__mmask8 __A, __m256 __B, __m256 __C) {
// CHECK-LABEL: @test_mm256_maskz_minmax_ps(
- // CHECK: call <8 x float> @llvm.x86.avx10.mask.vminmaxps256.round(
+ // CHECK: call <8 x float> @llvm.x86.avx10.mask.vminmaxps256(
return _mm256_maskz_minmax_ps(__A, __B, __C, 127);
}
-__m256 test_mm256_minmax_round_ps(__m256 __A, __m256 __B) {
- // CHECK-LABEL: @test_mm256_minmax_round_ps(
- // CHECK: call <8 x float> @llvm.x86.avx10.mask.vminmaxps256.round(
- return _mm256_minmax_round_ps(__A, __B, 127, _MM_FROUND_NO_EXC);
-}
-
-__m256 test_mm256_mask_minmax_round_ps(__m256 __A, __mmask8 __B, __m256 __C, __m256 __D) {
- // CHECK-LABEL: @test_mm256_mask_minmax_round_ps(
- // CHECK: call <8 x float> @llvm.x86.avx10.mask.vminmaxps256.round(
- return _mm256_mask_minmax_round_ps(__A, __B, __C, __D, 127, _MM_FROUND_NO_EXC);
-}
-
-__m256 test_mm256_maskz_minmax_round_ps(__mmask8 __A, __m256 __B, __m256 __C) {
- // CHECK-LABEL: @test_mm256_maskz_minmax_round_ps(
- // CHECK: call <8 x float> @llvm.x86.avx10.mask.vminmaxps256.round(
- return _mm256_maskz_minmax_round_ps(__A, __B, __C, 127, _MM_FROUND_NO_EXC);
-}
-
__m128d test_mm_minmax_sd(__m128d __A, __m128d __B) {
// CHECK-LABEL: @test_mm_minmax_sd(
// CHECK: call <2 x double> @llvm.x86.avx10.mask.vminmaxsd.round(
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 4fcf2ff8f38df..e66d07dd5b628 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -6861,9 +6861,9 @@ def int_x86_avx10_mask_vminmaxpd128 : ClangBuiltin<"__builtin_ia32_vminmaxpd128_
def int_x86_avx10_vminmaxpd256 : ClangBuiltin<"__builtin_ia32_vminmaxpd256">,
DefaultAttrsIntrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_x86_avx10_mask_vminmaxpd256_round : ClangBuiltin<"__builtin_ia32_vminmaxpd256_round_mask">,
- DefaultAttrsIntrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>]>;
+def int_x86_avx10_mask_vminmaxpd256 : ClangBuiltin<"__builtin_ia32_vminmaxpd256_mask">,
+ DefaultAttrsIntrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
def int_x86_avx10_mask_vminmaxpd_round : ClangBuiltin<"__builtin_ia32_vminmaxpd512_round_mask">,
DefaultAttrsIntrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>]>;
@@ -6876,8 +6876,8 @@ def int_x86_avx10_mask_vminmaxph128 : ClangBuiltin<"__builtin_ia32_vminmaxph128_
def int_x86_avx10_vminmaxph256 : ClangBuiltin<"__builtin_ia32_vminmaxp...
[truncated]
|
@llvm/pr-subscribers-clang Author: Phoebe Wang (phoebewang) ChangesRef: https://cdrdv2.intel.com/v1/dl/getContent/784343 Patch is 55.76 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132405.diff 15 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index ea0d6df4a33c2..adb174a9fc62d 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -4823,7 +4823,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
}
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
- def vminmaxpd256_round_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int, _Vector<4, double>, unsigned char, _Constant int)">;
+ def vminmaxpd256_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int, _Vector<4, double>, unsigned char)">;
}
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4835,7 +4835,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
}
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
- def vminmaxph256_round_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Constant int, _Vector<16, _Float16>, unsigned short, _Constant int)">;
+ def vminmaxph256_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Constant int, _Vector<16, _Float16>, unsigned short)">;
}
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4847,7 +4847,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
}
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
- def vminmaxps256_round_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int, _Vector<8, float>, unsigned char, _Constant int)">;
+ def vminmaxps256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int, _Vector<8, float>, unsigned char)">;
}
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
diff --git a/clang/lib/Headers/avx10_2minmaxintrin.h b/clang/lib/Headers/avx10_2minmaxintrin.h
index 8164d49d89f1f..809a01b04f13e 100644
--- a/clang/lib/Headers/avx10_2minmaxintrin.h
+++ b/clang/lib/Headers/avx10_2minmaxintrin.h
@@ -66,34 +66,19 @@
(__v2df)_mm_setzero_pd(), (__mmask8)(U)))
#define _mm256_minmax_pd(A, B, C) \
- ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \
+ ((__m256d)__builtin_ia32_vminmaxpd256_mask( \
(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \
- (__v4df)_mm256_setzero_pd(), (__mmask8)-1, _MM_FROUND_NO_EXC))
+ (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
#define _mm256_mask_minmax_pd(W, U, A, B, C) \
- ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \
+ ((__m256d)__builtin_ia32_vminmaxpd256_mask( \
(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \
- (__v4df)(__m256d)(W), (__mmask8)(U), _MM_FROUND_NO_EXC))
+ (__v4df)(__m256d)(W), (__mmask8)(U)))
#define _mm256_maskz_minmax_pd(U, A, B, C) \
- ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \
+ ((__m256d)__builtin_ia32_vminmaxpd256_mask( \
(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \
- (__v4df)_mm256_setzero_pd(), (__mmask8)(U), _MM_FROUND_NO_EXC))
-
-#define _mm256_minmax_round_pd(A, B, C, R) \
- ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \
- (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \
- (__v4df)_mm256_undefined_pd(), (__mmask8)-1, (int)(R)))
-
-#define _mm256_mask_minmax_round_pd(W, U, A, B, C, R) \
- ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \
- (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \
- (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R)))
-
-#define _mm256_maskz_minmax_round_pd(U, A, B, C, R) \
- ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \
- (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \
- (__v4df)_mm256_setzero_pd(), (__mmask8)(U), (int)(R)))
+ (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
#define _mm_minmax_ph(A, B, C) \
((__m128h)__builtin_ia32_vminmaxph128_mask( \
@@ -111,34 +96,19 @@
(__v8hf)_mm_setzero_ph(), (__mmask8)(U)))
#define _mm256_minmax_ph(A, B, C) \
- ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \
+ ((__m256h)__builtin_ia32_vminmaxph256_mask( \
(__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \
- (__v16hf)_mm256_setzero_ph(), (__mmask16)-1, _MM_FROUND_NO_EXC))
+ (__v16hf)_mm256_setzero_ph(), (__mmask16)-1))
#define _mm256_mask_minmax_ph(W, U, A, B, C) \
- ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \
+ ((__m256h)__builtin_ia32_vminmaxph256_mask( \
(__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \
- (__v16hf)(__m256h)(W), (__mmask16)(U), _MM_FROUND_NO_EXC))
+ (__v16hf)(__m256h)(W), (__mmask16)(U)))
#define _mm256_maskz_minmax_ph(U, A, B, C) \
- ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \
- (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \
- (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), _MM_FROUND_NO_EXC))
-
-#define _mm256_minmax_round_ph(A, B, C, R) \
- ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \
+ ((__m256h)__builtin_ia32_vminmaxph256_mask( \
(__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \
- (__v16hf)_mm256_undefined_ph(), (__mmask16)-1, (int)(R)))
-
-#define _mm256_mask_minmax_round_ph(W, U, A, B, C, R) \
- ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \
- (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (C), \
- (__v16hf)(__m256h)(W), (__mmask16)(U), (int)(R)))
-
-#define _mm256_maskz_minmax_round_ph(U, A, B, C, R) \
- ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \
- (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \
- (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
+ (__v16hf)_mm256_setzero_ph(), (__mmask16)(U)))
#define _mm_minmax_ps(A, B, C) \
((__m128)__builtin_ia32_vminmaxps128_mask( \
@@ -156,34 +126,19 @@
(__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
#define _mm256_minmax_ps(A, B, C) \
- ((__m256)__builtin_ia32_vminmaxps256_round_mask( \
+ ((__m256)__builtin_ia32_vminmaxps256_mask( \
(__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \
- (__v8sf)_mm256_setzero_ps(), (__mmask8)-1, _MM_FROUND_NO_EXC))
+ (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
#define _mm256_mask_minmax_ps(W, U, A, B, C) \
- ((__m256)__builtin_ia32_vminmaxps256_round_mask( \
+ ((__m256)__builtin_ia32_vminmaxps256_mask( \
(__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), (__v8sf)(__m256)(W), \
- (__mmask8)(U), _MM_FROUND_NO_EXC))
+ (__mmask8)(U)))
#define _mm256_maskz_minmax_ps(U, A, B, C) \
- ((__m256)__builtin_ia32_vminmaxps256_round_mask( \
- (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \
- (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), _MM_FROUND_NO_EXC))
-
-#define _mm256_minmax_round_ps(A, B, C, R) \
- ((__m256)__builtin_ia32_vminmaxps256_round_mask( \
- (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \
- (__v8sf)_mm256_undefined_ps(), (__mmask8)-1, (int)(R)))
-
-#define _mm256_mask_minmax_round_ps(W, U, A, B, C, R) \
- ((__m256)__builtin_ia32_vminmaxps256_round_mask( \
- (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), (__v8sf)(__m256)(W), \
- (__mmask8)(U), (int)(R)))
-
-#define _mm256_maskz_minmax_round_ps(U, A, B, C, R) \
- ((__m256)__builtin_ia32_vminmaxps256_round_mask( \
+ ((__m256)__builtin_ia32_vminmaxps256_mask( \
(__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \
- (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), (int)(R)))
+ (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
#define _mm_minmax_sd(A, B, C) \
((__m128d)__builtin_ia32_vminmaxsd_round_mask( \
diff --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp
index e54a278225f1c..27454e2f03b5a 100644
--- a/clang/lib/Sema/SemaX86.cpp
+++ b/clang/lib/Sema/SemaX86.cpp
@@ -147,9 +147,6 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_rndscalesd_round_mask:
case X86::BI__builtin_ia32_rndscaless_round_mask:
case X86::BI__builtin_ia32_rndscalesh_round_mask:
- case X86::BI__builtin_ia32_vminmaxpd256_round_mask:
- case X86::BI__builtin_ia32_vminmaxps256_round_mask:
- case X86::BI__builtin_ia32_vminmaxph256_round_mask:
case X86::BI__builtin_ia32_vminmaxpd512_round_mask:
case X86::BI__builtin_ia32_vminmaxps512_round_mask:
case X86::BI__builtin_ia32_vminmaxph512_round_mask:
@@ -910,11 +907,11 @@ bool SemaX86::CheckBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
case X86::BI__builtin_ia32_vminmaxbf16256:
case X86::BI__builtin_ia32_vminmaxbf16512:
case X86::BI__builtin_ia32_vminmaxpd128_mask:
- case X86::BI__builtin_ia32_vminmaxpd256_round_mask:
+ case X86::BI__builtin_ia32_vminmaxpd256_mask:
case X86::BI__builtin_ia32_vminmaxph128_mask:
- case X86::BI__builtin_ia32_vminmaxph256_round_mask:
+ case X86::BI__builtin_ia32_vminmaxph256_mask:
case X86::BI__builtin_ia32_vminmaxps128_mask:
- case X86::BI__builtin_ia32_vminmaxps256_round_mask:
+ case X86::BI__builtin_ia32_vminmaxps256_mask:
case X86::BI__builtin_ia32_vminmaxpd512_round_mask:
case X86::BI__builtin_ia32_vminmaxps512_round_mask:
case X86::BI__builtin_ia32_vminmaxph512_round_mask:
diff --git a/clang/test/CodeGen/X86/avx10_2_512minmax-error.c b/clang/test/CodeGen/X86/avx10_2_512minmax-error.c
index 6db7801eb0040..2ee496d317a5a 100644
--- a/clang/test/CodeGen/X86/avx10_2_512minmax-error.c
+++ b/clang/test/CodeGen/X86/avx10_2_512minmax-error.c
@@ -113,17 +113,6 @@ __m512 test_mm512_minmax_round_ps(__m512 __A, __m512 __B) {
return _mm512_minmax_round_ps(__A, __B, 127, 11); // expected-error {{invalid rounding argument}}
}
-__m256d test_mm256_minmax_round_pd(__m256d __A, __m256d __B) {
- return _mm256_minmax_round_pd(__A, __B, 127, 11); // expected-error {{invalid rounding argument}}
-}
-
-__m256h test_mm256_minmax_round_ph(__m256h __A, __m256h __B) {
- return _mm256_minmax_round_ph(__A, __B, 127, 11); // expected-error {{invalid rounding argument}}
-}
-
-__m256 test_mm256_minmax_round_ps(__m256 __A, __m256 __B) {
- return _mm256_minmax_round_ps(__A, __B, 127, 11); // expected-error {{invalid rounding argument}}
-}
__m128d test_mm_minmax_round_sd(__m128d __A, __m128d __B) {
return _mm_minmax_round_sd(__A, __B, 127, 11); // expected-error {{invalid rounding argument}}
}
diff --git a/clang/test/CodeGen/X86/avx10_2minmax-builtins.c b/clang/test/CodeGen/X86/avx10_2minmax-builtins.c
index 7e21858c71834..7dad153a15c37 100644
--- a/clang/test/CodeGen/X86/avx10_2minmax-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2minmax-builtins.c
@@ -67,40 +67,22 @@ __m128d test_mm_maskz_minmax_pd(__mmask8 __A, __m128d __B, __m128d __C) {
__m256d test_mm256_minmax_pd(__m256d __A, __m256d __B) {
// CHECK-LABEL: @test_mm256_minmax_pd(
- // CHECK: call <4 x double> @llvm.x86.avx10.mask.vminmaxpd256.round(
+ // CHECK: call <4 x double> @llvm.x86.avx10.mask.vminmaxpd256(
return _mm256_minmax_pd(__A, __B, 127);
}
__m256d test_mm256_mask_minmax_pd(__m256d __A, __mmask8 __B, __m256d __C, __m256d __D) {
// CHECK-LABEL: @test_mm256_mask_minmax_pd(
- // CHECK: call <4 x double> @llvm.x86.avx10.mask.vminmaxpd256.round(
+ // CHECK: call <4 x double> @llvm.x86.avx10.mask.vminmaxpd256(
return _mm256_mask_minmax_pd(__A, __B, __C, __D, 127);
}
__m256d test_mm256_maskz_minmax_pd(__mmask8 __A, __m256d __B, __m256d __C) {
// CHECK-LABEL: @test_mm256_maskz_minmax_pd(
- // CHECK: call <4 x double> @llvm.x86.avx10.mask.vminmaxpd256.round(
+ // CHECK: call <4 x double> @llvm.x86.avx10.mask.vminmaxpd256(
return _mm256_maskz_minmax_pd(__A, __B, __C, 127);
}
-__m256d test_mm256_minmax_round_pd(__m256d __A, __m256d __B) {
- // CHECK-LABEL: @test_mm256_minmax_round_pd(
- // CHECK: call <4 x double> @llvm.x86.avx10.mask.vminmaxpd256.round(
- return _mm256_minmax_round_pd(__A, __B, 127, _MM_FROUND_NO_EXC);
-}
-
-__m256d test_mm256_mask_minmax_round_pd(__m256d __A, __mmask8 __B, __m256d __C, __m256d __D) {
- // CHECK-LABEL: @test_mm256_mask_minmax_round_pd(
- // CHECK: call <4 x double> @llvm.x86.avx10.mask.vminmaxpd256.round(
- return _mm256_mask_minmax_round_pd(__A, __B, __C, __D, 127, _MM_FROUND_NO_EXC);
-}
-
-__m256d test_mm256_maskz_minmax_round_pd(__mmask8 __A, __m256d __B, __m256d __C) {
- // CHECK-LABEL: @test_mm256_maskz_minmax_round_pd(
- // CHECK: call <4 x double> @llvm.x86.avx10.mask.vminmaxpd256.round(
- return _mm256_maskz_minmax_round_pd(__A, __B, __C, 127, _MM_FROUND_NO_EXC);
-}
-
__m128h test_mm_minmax_ph(__m128h __A, __m128h __B) {
// CHECK-LABEL: @test_mm_minmax_ph(
// CHECK: call <8 x half> @llvm.x86.avx10.mask.vminmaxph128(
@@ -121,40 +103,22 @@ __m128h test_mm_maskz_minmax_ph(__mmask8 __A, __m128h __B, __m128h __C) {
__m256h test_mm256_minmax_ph(__m256h __A, __m256h __B) {
// CHECK-LABEL: @test_mm256_minmax_ph(
- // CHECK: call <16 x half> @llvm.x86.avx10.mask.vminmaxph256.round(
+ // CHECK: call <16 x half> @llvm.x86.avx10.mask.vminmaxph256(
return _mm256_minmax_ph(__A, __B, 127);
}
__m256h test_mm256_mask_minmax_ph(__m256h __A, __mmask16 __B, __m256h __C, __m256h __D) {
// CHECK-LABEL: @test_mm256_mask_minmax_ph(
- // CHECK: call <16 x half> @llvm.x86.avx10.mask.vminmaxph256.round(
+ // CHECK: call <16 x half> @llvm.x86.avx10.mask.vminmaxph256(
return _mm256_mask_minmax_ph(__A, __B, __C, __D, 127);
}
__m256h test_mm256_maskz_minmax_ph(__mmask16 __A, __m256h __B, __m256h __C) {
// CHECK-LABEL: @test_mm256_maskz_minmax_ph(
- // CHECK: call <16 x half> @llvm.x86.avx10.mask.vminmaxph256.round(
+ // CHECK: call <16 x half> @llvm.x86.avx10.mask.vminmaxph256(
return _mm256_maskz_minmax_ph(__A, __B, __C, 127);
}
-__m256h test_mm256_minmax_round_ph(__m256h __A, __m256h __B) {
- // CHECK-LABEL: @test_mm256_minmax_round_ph(
- // CHECK: call <16 x half> @llvm.x86.avx10.mask.vminmaxph256.round(
- return _mm256_minmax_round_ph(__A, __B, 127, _MM_FROUND_NO_EXC);
-}
-
-__m256h test_mm256_mask_minmax_round_ph(__m256h __A, __mmask16 __B, __m256h __C, __m256h __D) {
- // CHECK-LABEL: @test_mm256_mask_minmax_round_ph(
- // CHECK: call <16 x half> @llvm.x86.avx10.mask.vminmaxph256.round(
- return _mm256_mask_minmax_round_ph(__A, __B, __C, __D, 127, _MM_FROUND_NO_EXC);
-}
-
-__m256h test_mm256_maskz_minmax_round_ph(__mmask16 __A, __m256h __B, __m256h __C) {
- // CHECK-LABEL: @test_mm256_maskz_minmax_round_ph(
- // CHECK: call <16 x half> @llvm.x86.avx10.mask.vminmaxph256.round(
- return _mm256_maskz_minmax_round_ph(__A, __B, __C, 127, _MM_FROUND_NO_EXC);
-}
-
__m128 test_mm_minmax_ps(__m128 __A, __m128 __B) {
// CHECK-LABEL: @test_mm_minmax_ps(
// CHECK: call <4 x float> @llvm.x86.avx10.mask.vminmaxps128(
@@ -175,40 +139,22 @@ __m128 test_mm_maskz_minmax_ps(__mmask8 __A, __m128 __B, __m128 __C) {
__m256 test_mm256_minmax_ps(__m256 __A, __m256 __B) {
// CHECK-LABEL: @test_mm256_minmax_ps(
- // CHECK: call <8 x float> @llvm.x86.avx10.mask.vminmaxps256.round(
+ // CHECK: call <8 x float> @llvm.x86.avx10.mask.vminmaxps256(
return _mm256_minmax_ps(__A, __B, 127);
}
__m256 test_mm256_mask_minmax_ps(__m256 __A, __mmask8 __B, __m256 __C, __m256 __D) {
// CHECK-LABEL: @test_mm256_mask_minmax_ps(
- // CHECK: call <8 x float> @llvm.x86.avx10.mask.vminmaxps256.round(
+ // CHECK: call <8 x float> @llvm.x86.avx10.mask.vminmaxps256(
return _mm256_mask_minmax_ps(__A, __B, __C, __D, 127);
}
__m256 test_mm256_maskz_minmax_ps(__mmask8 __A, __m256 __B, __m256 __C) {
// CHECK-LABEL: @test_mm256_maskz_minmax_ps(
- // CHECK: call <8 x float> @llvm.x86.avx10.mask.vminmaxps256.round(
+ // CHECK: call <8 x float> @llvm.x86.avx10.mask.vminmaxps256(
return _mm256_maskz_minmax_ps(__A, __B, __C, 127);
}
-__m256 test_mm256_minmax_round_ps(__m256 __A, __m256 __B) {
- // CHECK-LABEL: @test_mm256_minmax_round_ps(
- // CHECK: call <8 x float> @llvm.x86.avx10.mask.vminmaxps256.round(
- return _mm256_minmax_round_ps(__A, __B, 127, _MM_FROUND_NO_EXC);
-}
-
-__m256 test_mm256_mask_minmax_round_ps(__m256 __A, __mmask8 __B, __m256 __C, __m256 __D) {
- // CHECK-LABEL: @test_mm256_mask_minmax_round_ps(
- // CHECK: call <8 x float> @llvm.x86.avx10.mask.vminmaxps256.round(
- return _mm256_mask_minmax_round_ps(__A, __B, __C, __D, 127, _MM_FROUND_NO_EXC);
-}
-
-__m256 test_mm256_maskz_minmax_round_ps(__mmask8 __A, __m256 __B, __m256 __C) {
- // CHECK-LABEL: @test_mm256_maskz_minmax_round_ps(
- // CHECK: call <8 x float> @llvm.x86.avx10.mask.vminmaxps256.round(
- return _mm256_maskz_minmax_round_ps(__A, __B, __C, 127, _MM_FROUND_NO_EXC);
-}
-
__m128d test_mm_minmax_sd(__m128d __A, __m128d __B) {
// CHECK-LABEL: @test_mm_minmax_sd(
// CHECK: call <2 x double> @llvm.x86.avx10.mask.vminmaxsd.round(
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 4fcf2ff8f38df..e66d07dd5b628 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -6861,9 +6861,9 @@ def int_x86_avx10_mask_vminmaxpd128 : ClangBuiltin<"__builtin_ia32_vminmaxpd128_
def int_x86_avx10_vminmaxpd256 : ClangBuiltin<"__builtin_ia32_vminmaxpd256">,
DefaultAttrsIntrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_x86_avx10_mask_vminmaxpd256_round : ClangBuiltin<"__builtin_ia32_vminmaxpd256_round_mask">,
- DefaultAttrsIntrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>]>;
+def int_x86_avx10_mask_vminmaxpd256 : ClangBuiltin<"__builtin_ia32_vminmaxpd256_mask">,
+ DefaultAttrsIntrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
def int_x86_avx10_mask_vminmaxpd_round : ClangBuiltin<"__builtin_ia32_vminmaxpd512_round_mask">,
DefaultAttrsIntrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>]>;
@@ -6876,8 +6876,8 @@ def int_x86_avx10_mask_vminmaxph128 : ClangBuiltin<"__builtin_ia32_vminmaxph128_
def int_x86_avx10_vminmaxph256 : ClangBuiltin<"__builtin_ia32_vminmaxp...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
You can test this locally with the following command:git-clang-format --diff 09feaa9261448e72ce21394613e0a12f84c1b8d9 45cafe1b3f82e418d8043deae32eb164f7138654 --extensions c,cpp,h -- clang/lib/Headers/avx10_2minmaxintrin.h clang/lib/Sema/SemaX86.cpp clang/test/CodeGen/X86/avx10_2_512minmax-error.c clang/test/CodeGen/X86/avx10_2minmax-builtins.c llvm/lib/Target/X86/X86IntrinsicsInfo.h View the diff from clang-format here.diff --git a/clang/lib/Headers/avx10_2minmaxintrin.h b/clang/lib/Headers/avx10_2minmaxintrin.h
index 809a01b04f..dc414cedb9 100644
--- a/clang/lib/Headers/avx10_2minmaxintrin.h
+++ b/clang/lib/Headers/avx10_2minmaxintrin.h
@@ -68,7 +68,7 @@
#define _mm256_minmax_pd(A, B, C) \
((__m256d)__builtin_ia32_vminmaxpd256_mask( \
(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \
- (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
+ (__v4df)_mm256_setzero_pd(), (__mmask8) - 1))
#define _mm256_mask_minmax_pd(W, U, A, B, C) \
((__m256d)__builtin_ia32_vminmaxpd256_mask( \
@@ -98,7 +98,7 @@
#define _mm256_minmax_ph(A, B, C) \
((__m256h)__builtin_ia32_vminmaxph256_mask( \
(__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \
- (__v16hf)_mm256_setzero_ph(), (__mmask16)-1))
+ (__v16hf)_mm256_setzero_ph(), (__mmask16) - 1))
#define _mm256_mask_minmax_ph(W, U, A, B, C) \
((__m256h)__builtin_ia32_vminmaxph256_mask( \
@@ -128,7 +128,7 @@
#define _mm256_minmax_ps(A, B, C) \
((__m256)__builtin_ia32_vminmaxps256_mask( \
(__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \
- (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
+ (__v8sf)_mm256_setzero_ps(), (__mmask8) - 1))
#define _mm256_mask_minmax_ps(W, U, A, B, C) \
((__m256)__builtin_ia32_vminmaxps256_mask( \
|
✅ With the latest revision this PR passed the undef deprecator. |
Ref: https://cdrdv2.intel.com/v1/dl/getContent/784343