Skip to content

Commit 924c7ea

Browse files
authored
[X86][AVX10.2] Remove YMM rounding from VCVT2PS2PHX (#132397)
Ref: https://cdrdv2.intel.com/v1/dl/getContent/784343
1 parent 0ea4fb9 commit 924c7ea

14 files changed

+11
-260
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5006,7 +5006,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<
50065006
}
50075007

50085008
let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
5009-
def vcvt2ps2phx256_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<8, float>, _Vector<8, float>, _Vector<16, _Float16>, unsigned short, _Constant int)">;
5009+
def vcvt2ps2phx256_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<8, float>, _Vector<8, float>, _Vector<16, _Float16>, unsigned short)">;
50105010
}
50115011

50125012
let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {

clang/lib/Headers/avx10_2convertintrin.h

Lines changed: 3 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,7 @@ _mm_maskz_cvtx2ps_ph(__mmask8 __U, __m128 __A, __m128 __B) {
178178
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtx2ps_ph(__m256 __A,
179179
__m256 __B) {
180180
return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask(
181-
(__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)(-1),
182-
_MM_FROUND_CUR_DIRECTION);
181+
(__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)(-1));
183182
}
184183

185184
/// Convert two 256-bit vectors, \a __A and \a __B, containing packed
@@ -223,8 +222,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtx2ps_ph(__m256 __A,
223222
static __inline__ __m256h __DEFAULT_FN_ATTRS256
224223
_mm256_mask_cvtx2ps_ph(__m256h __W, __mmask16 __U, __m256 __A, __m256 __B) {
225224
return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask(
226-
(__v8sf)__A, (__v8sf)__B, (__v16hf)__W, (__mmask16)__U,
227-
_MM_FROUND_CUR_DIRECTION);
225+
(__v8sf)__A, (__v8sf)__B, (__v16hf)__W, (__mmask16)__U);
228226
}
229227

230228
/// Convert two 256-bit vectors, \a __A and \a __B, containing packed
@@ -266,142 +264,9 @@ _mm256_mask_cvtx2ps_ph(__m256h __W, __mmask16 __U, __m256 __A, __m256 __B) {
266264
static __inline__ __m256h __DEFAULT_FN_ATTRS256
267265
_mm256_maskz_cvtx2ps_ph(__mmask16 __U, __m256 __A, __m256 __B) {
268266
return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask(
269-
(__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
270-
_MM_FROUND_CUR_DIRECTION);
267+
(__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
271268
}
272269

273-
/// Convert two 256-bit vectors, \a __A and \a __B, containing packed
274-
/// single-precision (32-bit) floating-point elements to a 256-bit vector
275-
/// containing FP16 elements. Rounding mode \a __R needs to be provided.
276-
///
277-
/// \code{.operation}
278-
/// FOR i := 0 to 15
279-
/// IF i < 8
280-
/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i])
281-
/// ELSE
282-
/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 8])
283-
/// FI
284-
/// ENDFOR
285-
///
286-
/// dst[MAX:256] := 0
287-
/// \endcode
288-
///
289-
/// \headerfile <immintrin.h>
290-
///
291-
/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction.
292-
///
293-
/// \param __A
294-
/// A 256-bit vector of [8 x float].
295-
/// \param __B
296-
/// A 256-bit vector of [8 x float].
297-
/// \param __R
298-
/// Rounding mode. Valid inputs are: _MM_FROUND_CUR_DIRECTION or
299-
/// result of bitwise or of _MM_FROUND_NO_EXC with at most one of the following:
300-
/// _MM_FROUND_TO_NEAREST_INT, _MM_FROUND_TO_NEG_INF, _MM_FROUND_TO_POS_INF,
301-
/// _MM_FROUND_TO_ZERO.
302-
/// \returns
303-
/// A 256-bit vector of [16 x fp16]. Lower elements correspond to the
304-
/// (converted) elements from \a __B; higher order elements correspond to the
305-
/// (converted) elements from \a __A.
306-
#define _mm256_cvtx_round2ps_ph(__A, __B, __R) \
307-
((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \
308-
(__v8sf)(__A), (__v8sf)(__B), (__v16hf)_mm256_undefined_ph(), \
309-
(__mmask16)(-1), (const int)(__R)))
310-
311-
/// Convert two 256-bit vectors, \a __A and \a __B, containing packed
312-
/// single-precision (32-bit) floating-point elements to a 256-bit vector
313-
/// containing FP16 elements. Merging mask \a __U is used to determine if given
314-
/// element should be taken from \a __W instead. Rounding mode \a __R needs to
315-
/// be provided.
316-
///
317-
/// \code{.operation}
318-
/// FOR i := 0 to 15
319-
/// IF __U[i]
320-
/// IF i < 8
321-
/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i])
322-
/// ELSE
323-
/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 8])
324-
/// FI
325-
/// ELSE
326-
/// dst.fp16[i] := __W.fp16[i]
327-
/// FI
328-
/// ENDFOR
329-
///
330-
/// dst[MAX:256] := 0
331-
/// \endcode
332-
///
333-
/// \headerfile <immintrin.h>
334-
///
335-
/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction.
336-
///
337-
/// \param __W
338-
/// A 256-bit vector of [16 x fp16].
339-
/// \param __U
340-
/// A 16-bit merging mask.
341-
/// \param __A
342-
/// A 256-bit vector of [8 x float].
343-
/// \param __B
344-
/// A 256-bit vector of [8 x float].
345-
/// \param __R
346-
/// Rounding mode. Valid inputs are: _MM_FROUND_CUR_DIRECTION or
347-
/// result of bitwise or of _MM_FROUND_NO_EXC with at most one of the following:
348-
/// _MM_FROUND_TO_NEAREST_INT, _MM_FROUND_TO_NEG_INF, _MM_FROUND_TO_POS_INF,
349-
/// _MM_FROUND_TO_ZERO.
350-
/// \returns
351-
/// A 256-bit vector of [16 x fp16]. Lower elements correspond to the
352-
/// (converted) elements from \a __B; higher order elements correspond to the
353-
/// (converted) elements from \a __A. If corresponding mask bit is not set, then
354-
/// element from \a __W is taken instead.
355-
#define _mm256_mask_cvtx_round2ps_ph(__W, __U, __A, __B, __R) \
356-
((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \
357-
(__v8sf)(__A), (__v8sf)(__B), (__v16hf)(__W), (__mmask16)(__U), (const int)(__R)))
358-
359-
/// Convert two 256-bit vectors, \a __A and \a __B, containing packed
360-
/// single-precision (32-bit) floating-point elements to a 256-bit vector
361-
/// containing FP16 elements. Zeroing mask \a __U is used to determine if given
362-
/// element should be zeroed instead. Rounding mode \a __R needs to be provided.
363-
///
364-
/// \code{.operation}
365-
/// FOR i := 0 to 15
366-
/// IF __U[i]
367-
/// IF i < 8
368-
/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i])
369-
/// ELSE
370-
/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 8])
371-
/// FI
372-
/// ELSE
373-
/// dst.fp16[i] := 0
374-
/// FI
375-
/// ENDFOR
376-
///
377-
/// dst[MAX:256] := 0
378-
/// \endcode
379-
///
380-
/// \headerfile <immintrin.h>
381-
///
382-
/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction.
383-
///
384-
/// \param __U
385-
/// A 16-bit zeroing mask.
386-
/// \param __A
387-
/// A 256-bit vector of [8 x float].
388-
/// \param __B
389-
/// A 256-bit vector of [8 x float].
390-
/// \param __R
391-
/// Rounding mode. Valid inputs are: _MM_FROUND_CUR_DIRECTION or
392-
/// result of bitwise or of _MM_FROUND_NO_EXC with at most one of the following:
393-
/// _MM_FROUND_TO_NEAREST_INT, _MM_FROUND_TO_NEG_INF, _MM_FROUND_TO_POS_INF,
394-
/// _MM_FROUND_TO_ZERO.
395-
/// \returns
396-
/// A 256-bit vector of [16 x fp16]. Lower elements correspond to the
397-
/// (converted) elements from \a __B; higher order elements correspond to the
398-
/// (converted) elements from \a __A. If corresponding mask bit is not set,
399-
/// then zero is taken instead.
400-
#define _mm256_maskz_cvtx_round2ps_ph(__U, __A, __B, __R) \
401-
((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \
402-
(__v8sf)(__A), (__v8sf)(__B), (__v16hf)(_mm256_setzero_ph()), \
403-
(__mmask16)(__U), (const int)(__R)))
404-
405270
/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements
406271
/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each
407272
/// 16-bit integer stored in \a __B.

clang/lib/Sema/SemaX86.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,6 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
314314
case X86::BI__builtin_ia32_vfmulcph512_mask:
315315
case X86::BI__builtin_ia32_vfcmulcsh_mask:
316316
case X86::BI__builtin_ia32_vfcmulcph512_mask:
317-
case X86::BI__builtin_ia32_vcvt2ps2phx256_mask:
318317
case X86::BI__builtin_ia32_vcvt2ps2phx512_mask:
319318
ArgNum = 4;
320319
HasRC = true;

clang/test/CodeGen/X86/avx10_2convert-builtins.c

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -41,24 +41,6 @@ __m256h test_mm256_maskz_cvtx2ps_ph(__mmask16 __U, __m256 __A, __m256 __B) {
4141
return _mm256_maskz_cvtx2ps_ph(__U, __A, __B);
4242
}
4343

44-
__m256h test_mm256_cvtx_round2ps_ph(__m256 __A, __m256 __B) {
45-
// CHECK-LABEL: @test_mm256_cvtx_round2ps_ph(
46-
// CHECK: call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(
47-
return _mm256_cvtx_round2ps_ph(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
48-
}
49-
50-
__m256h test_mm256_mask_cvtx_round2ps_ph(__m256h __W, __mmask8 __U, __m256 __A, __m256 __B) {
51-
// CHECK-LABEL: @test_mm256_mask_cvtx_round2ps_ph(
52-
// CHECK: call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(
53-
return _mm256_mask_cvtx_round2ps_ph(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
54-
}
55-
56-
__m256h test_mm256_maskz_cvtx_round2ps_ph(__mmask8 __U, __m256 __A, __m256 __B) {
57-
// CHECK-LABEL: @test_mm256_maskz_cvtx_round2ps_ph(
58-
// CHECK: call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(
59-
return _mm256_maskz_cvtx_round2ps_ph(__U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
60-
}
61-
6244
__m128i test_mm_cvtbiasph_bf8(__m128i __A, __m128h __B) {
6345
// CHECK-LABEL: @test_mm_cvtbiasph_bf8(
6446
// CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtbiasph2bf8128(

llvm/include/llvm/IR/IntrinsicsX86.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7026,8 +7026,8 @@ def int_x86_avx10_mask_vcvt2ps2phx_128 : ClangBuiltin<"__builtin_ia32_vcvt2ps2ph
70267026
DefaultAttrsIntrinsic<[llvm_v8f16_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty],
70277027
[IntrNoMem]>;
70287028
def int_x86_avx10_mask_vcvt2ps2phx_256 : ClangBuiltin<"__builtin_ia32_vcvt2ps2phx256_mask">,
7029-
DefaultAttrsIntrinsic<[llvm_v16f16_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v16f16_ty, llvm_i16_ty, llvm_i32_ty],
7030-
[IntrNoMem, ImmArg<ArgIndex<4>>]>;
7029+
DefaultAttrsIntrinsic<[llvm_v16f16_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v16f16_ty, llvm_i16_ty],
7030+
[IntrNoMem]>;
70317031
def int_x86_avx10_mask_vcvt2ps2phx_512 : ClangBuiltin<"__builtin_ia32_vcvt2ps2phx512_mask">,
70327032
DefaultAttrsIntrinsic<[llvm_v32f16_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v32f16_ty, llvm_i32_ty, llvm_i32_ty],
70337033
[IntrNoMem, ImmArg<ArgIndex<4>>]>;

llvm/lib/Target/X86/X86InstrAVX10.td

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -771,12 +771,6 @@ multiclass avx10_cvt2ps2ph<bits<8> opc, string OpcodeStr,
771771
_SrcVTInfo.info128>,
772772
EVEX_V128, EVEX_CD8<32, CD8VF>;
773773
}
774-
775-
let Predicates = [HasAVX10_2], hasEVEX_U = 1 in {
776-
defm Z256 : avx10_cvt2ps2ph_rc<opc, OpcodeStr, sched.YMM,
777-
_SrcVTInfo.info256, _DstVTInfo.info256,
778-
OpNodeRnd>;
779-
}
780774
}
781775

782776
defm VCVT2PS2PHX : avx10_cvt2ps2ph<0x67, "vcvt2ps2phx",

llvm/lib/Target/X86/X86IntrinsicsInfo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
440440
X86_INTRINSIC_DATA(avx10_mask_vcvt2ps2phx_128, INTR_TYPE_2OP_MASK,
441441
X86ISD::VFPROUND2, 0),
442442
X86_INTRINSIC_DATA(avx10_mask_vcvt2ps2phx_256, INTR_TYPE_2OP_MASK,
443-
X86ISD::VFPROUND2, X86ISD::VFPROUND2_RND),
443+
X86ISD::VFPROUND2, 0),
444444
X86_INTRINSIC_DATA(avx10_mask_vcvt2ps2phx_512, INTR_TYPE_2OP_MASK,
445445
X86ISD::VFPROUND2, X86ISD::VFPROUND2_RND),
446446
X86_INTRINSIC_DATA(avx10_mask_vcvtbiasph2bf8128, TRUNCATE2_TO_REG,

llvm/test/CodeGen/X86/avx10_2convert-intrinsics.ll

Lines changed: 4 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ define <16 x half> @test_int_x86_avx10_vcvt2ps2phx256(<8 x float> %A, <8 x float
5050
; CHECK: # %bb.0:
5151
; CHECK-NEXT: vcvt2ps2phx %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x67,0xc1]
5252
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
53-
%ret = call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(<8 x float> %A, <8 x float> %B, <16 x half> zeroinitializer, i16 -1, i32 4)
53+
%ret = call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(<8 x float> %A, <8 x float> %B, <16 x half> zeroinitializer, i16 -1)
5454
ret <16 x half> %ret
5555
}
5656

@@ -66,7 +66,7 @@ define <16 x half> @test_int_x86_avx10_vcvt2ps2phx256_mask(<16 x half> %W, i16 %
6666
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6767
; X86-NEXT: vcvt2ps2phx %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x67,0xc2]
6868
; X86-NEXT: retl # encoding: [0xc3]
69-
%ret = call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(<8 x float> %A, <8 x float> %B, <16 x half> %W, i16 %U, i32 4)
69+
%ret = call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(<8 x float> %A, <8 x float> %B, <16 x half> %W, i16 %U)
7070
ret <16 x half> %ret
7171
}
7272

@@ -82,52 +82,11 @@ define <16 x half> @test_int_x86_avx10_vcvt2ps2phx256_maskz(<16 x half> %W, i16
8282
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8383
; X86-NEXT: vcvt2ps2phx %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x67,0xc2]
8484
; X86-NEXT: retl # encoding: [0xc3]
85-
%ret = call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(<8 x float> %A, <8 x float> %B, <16 x half> zeroinitializer, i16 %U, i32 4)
85+
%ret = call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(<8 x float> %A, <8 x float> %B, <16 x half> zeroinitializer, i16 %U)
8686
ret <16 x half> %ret
8787
}
8888

89-
define <16 x half> @test_int_x86_avx10_vcvt2ps2phx256_round(<8 x float> %A, <8 x float> %B) {
90-
; CHECK-LABEL: test_int_x86_avx10_vcvt2ps2phx256_round:
91-
; CHECK: # %bb.0:
92-
; CHECK-NEXT: vcvt2ps2phx {rz-sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x79,0x78,0x67,0xc1]
93-
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
94-
%ret = call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(<8 x float> %A, <8 x float> %B, <16 x half> zeroinitializer, i16 -1, i32 11)
95-
ret <16 x half> %ret
96-
}
97-
98-
define <16 x half> @test_int_x86_avx10_vcvt2ps2phx256_round_mask(<16 x half> %W, i16 %U, <8 x float> %A, <8 x float> %B) {
99-
; X64-LABEL: test_int_x86_avx10_vcvt2ps2phx256_round_mask:
100-
; X64: # %bb.0:
101-
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
102-
; X64-NEXT: vcvt2ps2phx {rz-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x71,0x79,0x67,0xc2]
103-
; X64-NEXT: retq # encoding: [0xc3]
104-
;
105-
; X86-LABEL: test_int_x86_avx10_vcvt2ps2phx256_round_mask:
106-
; X86: # %bb.0:
107-
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
108-
; X86-NEXT: vcvt2ps2phx {rz-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x71,0x79,0x67,0xc2]
109-
; X86-NEXT: retl # encoding: [0xc3]
110-
%ret = call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(<8 x float> %A, <8 x float> %B, <16 x half> %W, i16 %U, i32 11)
111-
ret <16 x half> %ret
112-
}
113-
114-
define <16 x half> @test_int_x86_avx10_vcvt2ps2phx256_round_maskz(i16 %U, <8 x float> %A, <8 x float> %B) {
115-
; X64-LABEL: test_int_x86_avx10_vcvt2ps2phx256_round_maskz:
116-
; X64: # %bb.0:
117-
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
118-
; X64-NEXT: vcvt2ps2phx {rz-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x79,0xf9,0x67,0xc1]
119-
; X64-NEXT: retq # encoding: [0xc3]
120-
;
121-
; X86-LABEL: test_int_x86_avx10_vcvt2ps2phx256_round_maskz:
122-
; X86: # %bb.0:
123-
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
124-
; X86-NEXT: vcvt2ps2phx {rz-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x79,0xf9,0x67,0xc1]
125-
; X86-NEXT: retl # encoding: [0xc3]
126-
%ret = call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(<8 x float> %A, <8 x float> %B, <16 x half> zeroinitializer, i16 %U, i32 11)
127-
ret <16 x half> %ret
128-
}
129-
130-
declare <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(<8 x float>, <8 x float>, <16 x half>, i16, i32)
89+
declare <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(<8 x float>, <8 x float>, <16 x half>, i16)
13190

13291
define <16 x i8> @test_int_x86_avx10_vcvtbiasph2bf8128(<16 x i8> %A, <8 x half> %B) nounwind {
13392
; CHECK-LABEL: test_int_x86_avx10_vcvtbiasph2bf8128:

llvm/test/MC/Disassembler/X86/avx10.2convert-32.txt

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,10 @@
55
# INTEL: vcvt2ps2phx ymm2, ymm3, ymm4
66
0x62,0xf2,0x65,0x28,0x67,0xd4
77

8-
# ATT: vcvt2ps2phx {rn-sae}, %ymm4, %ymm3, %ymm2
9-
# INTEL: vcvt2ps2phx ymm2, ymm3, ymm4, {rn-sae}
10-
0x62,0xf2,0x61,0x18,0x67,0xd4
11-
128
# ATT: vcvt2ps2phx %ymm4, %ymm3, %ymm2 {%k7}
139
# INTEL: vcvt2ps2phx ymm2 {k7}, ymm3, ymm4
1410
0x62,0xf2,0x65,0x2f,0x67,0xd4
1511

16-
# ATT: vcvt2ps2phx {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
17-
# INTEL: vcvt2ps2phx ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
18-
0x62,0xf2,0x61,0xff,0x67,0xd4
19-
2012
# ATT: vcvt2ps2phx %zmm4, %zmm3, %zmm2
2113
# INTEL: vcvt2ps2phx zmm2, zmm3, zmm4
2214
0x62,0xf2,0x65,0x48,0x67,0xd4

llvm/test/MC/Disassembler/X86/avx10.2convert-64.txt

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,10 @@
55
# INTEL: vcvt2ps2phx ymm22, ymm23, ymm24
66
0x62,0x82,0x45,0x20,0x67,0xf0
77

8-
# ATT: vcvt2ps2phx {rn-sae}, %ymm24, %ymm23, %ymm22
9-
# INTEL: vcvt2ps2phx ymm22, ymm23, ymm24, {rn-sae}
10-
0x62,0x82,0x41,0x10,0x67,0xf0
11-
128
# ATT: vcvt2ps2phx %ymm24, %ymm23, %ymm22 {%k7}
139
# INTEL: vcvt2ps2phx ymm22 {k7}, ymm23, ymm24
1410
0x62,0x82,0x45,0x27,0x67,0xf0
1511

16-
# ATT: vcvt2ps2phx {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
17-
# INTEL: vcvt2ps2phx ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
18-
0x62,0x82,0x41,0xf7,0x67,0xf0
19-
2012
# ATT: vcvt2ps2phx %zmm24, %zmm23, %zmm22
2113
# INTEL: vcvt2ps2phx zmm22, zmm23, zmm24
2214
0x62,0x82,0x45,0x40,0x67,0xf0

llvm/test/MC/X86/avx10.2convert-32-att.s

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,10 @@
44
// CHECK: encoding: [0x62,0xf2,0x65,0x28,0x67,0xd4]
55
vcvt2ps2phx %ymm4, %ymm3, %ymm2
66

7-
// CHECK: vcvt2ps2phx {rn-sae}, %ymm4, %ymm3, %ymm2
8-
// CHECK: encoding: [0x62,0xf2,0x61,0x18,0x67,0xd4]
9-
vcvt2ps2phx {rn-sae}, %ymm4, %ymm3, %ymm2
10-
117
// CHECK: vcvt2ps2phx %ymm4, %ymm3, %ymm2 {%k7}
128
// CHECK: encoding: [0x62,0xf2,0x65,0x2f,0x67,0xd4]
139
vcvt2ps2phx %ymm4, %ymm3, %ymm2 {%k7}
1410

15-
// CHECK: vcvt2ps2phx {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
16-
// CHECK: encoding: [0x62,0xf2,0x61,0xff,0x67,0xd4]
17-
vcvt2ps2phx {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
18-
1911
// CHECK: vcvt2ps2phx %zmm4, %zmm3, %zmm2
2012
// CHECK: encoding: [0x62,0xf2,0x65,0x48,0x67,0xd4]
2113
vcvt2ps2phx %zmm4, %zmm3, %zmm2

llvm/test/MC/X86/avx10.2convert-32-intel.s

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,10 @@
44
// CHECK: encoding: [0x62,0xf2,0x65,0x28,0x67,0xd4]
55
vcvt2ps2phx ymm2, ymm3, ymm4
66

7-
// CHECK: vcvt2ps2phx ymm2, ymm3, ymm4, {rn-sae}
8-
// CHECK: encoding: [0x62,0xf2,0x61,0x18,0x67,0xd4]
9-
vcvt2ps2phx ymm2, ymm3, ymm4, {rn-sae}
10-
117
// CHECK: vcvt2ps2phx ymm2 {k7}, ymm3, ymm4
128
// CHECK: encoding: [0x62,0xf2,0x65,0x2f,0x67,0xd4]
139
vcvt2ps2phx ymm2 {k7}, ymm3, ymm4
1410

15-
// CHECK: vcvt2ps2phx ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
16-
// CHECK: encoding: [0x62,0xf2,0x61,0xff,0x67,0xd4]
17-
vcvt2ps2phx ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
18-
1911
// CHECK: vcvt2ps2phx zmm2, zmm3, zmm4
2012
// CHECK: encoding: [0x62,0xf2,0x65,0x48,0x67,0xd4]
2113
vcvt2ps2phx zmm2, zmm3, zmm4

0 commit comments

Comments
 (0)