Skip to content

Commit af223bc

Browse files
authored
[X86] Avoid zero extend i16 when inserting fp16 (#126194)
1 parent 4c1dc85 commit af223bc

12 files changed

+130
-129
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22044,15 +22044,20 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
2204422044
}
2204522045

2204622046
In = DAG.getBitcast(MVT::i16, In);
22047-
In = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v8i16,
22048-
getZeroVector(MVT::v8i16, Subtarget, DAG, DL), In,
22049-
DAG.getVectorIdxConstant(0, DL));
2205022047
SDValue Res;
2205122048
if (IsStrict) {
22049+
In = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v8i16,
22050+
getZeroVector(MVT::v8i16, Subtarget, DAG, DL), In,
22051+
DAG.getVectorIdxConstant(0, DL));
2205222052
Res = DAG.getNode(X86ISD::STRICT_CVTPH2PS, DL, {MVT::v4f32, MVT::Other},
2205322053
{Chain, In});
2205422054
Chain = Res.getValue(1);
2205522055
} else {
22056+
In = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, In);
22057+
In = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
22058+
DAG.getUNDEF(MVT::v4f32), In,
22059+
DAG.getVectorIdxConstant(0, DL));
22060+
In = DAG.getBitcast(MVT::v8i16, In);
2205622061
Res = DAG.getNode(X86ISD::CVTPH2PS, DL, MVT::v4f32, In,
2205722062
DAG.getTargetConstant(4, DL, MVT::i32));
2205822063
}

llvm/test/CodeGen/X86/avx512-insert-extract.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2164,7 +2164,7 @@ define void @test_concat_v2i1(ptr %arg, ptr %arg1, ptr %arg2) nounwind {
21642164
; KNL-NEXT: setb %al
21652165
; KNL-NEXT: andl $1, %eax
21662166
; KNL-NEXT: kmovw %eax, %k0
2167-
; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
2167+
; KNL-NEXT: vpsrld $16, %xmm0, %xmm0
21682168
; KNL-NEXT: vcvtph2ps %xmm0, %xmm0
21692169
; KNL-NEXT: vucomiss %xmm2, %xmm0
21702170
; KNL-NEXT: setb %al

llvm/test/CodeGen/X86/avx512-vec-cmp.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1443,8 +1443,7 @@ define void @half_vec_compare(ptr %x, ptr %y) {
14431443
; KNL: ## %bb.0: ## %entry
14441444
; KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
14451445
; KNL-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x07]
1446-
; KNL-NEXT: vpshuflw $85, %xmm0, %xmm1 ## encoding: [0xc5,0xfb,0x70,0xc8,0x55]
1447-
; KNL-NEXT: ## xmm1 = xmm0[1,1,1,1,4,5,6,7]
1446+
; KNL-NEXT: vpsrld $16, %xmm0, %xmm1 ## encoding: [0xc5,0xf1,0x72,0xd0,0x10]
14481447
; KNL-NEXT: vcvtph2ps %xmm1, %xmm1 ## encoding: [0xc4,0xe2,0x79,0x13,0xc9]
14491448
; KNL-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
14501449
; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
@@ -1470,8 +1469,7 @@ define void @half_vec_compare(ptr %x, ptr %y) {
14701469
; AVX512BW: ## %bb.0: ## %entry
14711470
; AVX512BW-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
14721471
; AVX512BW-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x07]
1473-
; AVX512BW-NEXT: vpshuflw $85, %xmm0, %xmm1 ## encoding: [0xc5,0xfb,0x70,0xc8,0x55]
1474-
; AVX512BW-NEXT: ## xmm1 = xmm0[1,1,1,1,4,5,6,7]
1472+
; AVX512BW-NEXT: vpsrld $16, %xmm0, %xmm1 ## encoding: [0xc5,0xf1,0x72,0xd0,0x10]
14751473
; AVX512BW-NEXT: vcvtph2ps %xmm1, %xmm1 ## encoding: [0xc4,0xe2,0x79,0x13,0xc9]
14761474
; AVX512BW-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
14771475
; AVX512BW-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]

llvm/test/CodeGen/X86/fminimum-fmaximum.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1854,9 +1854,9 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) nounwind {
18541854
; AVX512-NEXT: cmovpl %ecx, %r8d
18551855
; AVX512-NEXT: movl $0, %r11d
18561856
; AVX512-NEXT: cmoval %ecx, %r11d
1857-
; AVX512-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[3,3,3,3,4,5,6,7]
1857+
; AVX512-NEXT: vpsrlq $48, %xmm1, %xmm2
18581858
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
1859-
; AVX512-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[3,3,3,3,4,5,6,7]
1859+
; AVX512-NEXT: vpsrlq $48, %xmm0, %xmm3
18601860
; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
18611861
; AVX512-NEXT: vucomiss %xmm2, %xmm3
18621862
; AVX512-NEXT: movl $0, %r10d
@@ -1872,9 +1872,9 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) nounwind {
18721872
; AVX512-NEXT: cmovpl %ecx, %ebx
18731873
; AVX512-NEXT: movl $0, %r14d
18741874
; AVX512-NEXT: cmoval %ecx, %r14d
1875-
; AVX512-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[1,1,1,1,4,5,6,7]
1875+
; AVX512-NEXT: vpsrld $16, %xmm1, %xmm2
18761876
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
1877-
; AVX512-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[1,1,1,1,4,5,6,7]
1877+
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm3
18781878
; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
18791879
; AVX512-NEXT: vucomiss %xmm2, %xmm3
18801880
; AVX512-NEXT: movl $0, %r15d
@@ -1916,7 +1916,7 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) nounwind {
19161916
; AVX512-NEXT: vpinsrw $7, %edx, %xmm3, %xmm3
19171917
; AVX512-NEXT: vpbroadcastw {{.*#+}} xmm4 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
19181918
; AVX512-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
1919-
; AVX512-NEXT: vpshuflw {{.*#+}} xmm3 = xmm2[1,1,1,1,4,5,6,7]
1919+
; AVX512-NEXT: vpsrld $16, %xmm2, %xmm3
19201920
; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
19211921
; AVX512-NEXT: vpxor %xmm4, %xmm4, %xmm4
19221922
; AVX512-NEXT: vucomiss %xmm4, %xmm3
@@ -1930,21 +1930,21 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) nounwind {
19301930
; AVX512-NEXT: cmovpl %eax, %esi
19311931
; AVX512-NEXT: vmovd %esi, %xmm3
19321932
; AVX512-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
1933-
; AVX512-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[1,1,1,1]
1933+
; AVX512-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[1,1,3,3]
19341934
; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5
19351935
; AVX512-NEXT: vucomiss %xmm4, %xmm5
19361936
; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF
19371937
; AVX512-NEXT: cmovnel %eax, %edx
19381938
; AVX512-NEXT: cmovpl %eax, %edx
19391939
; AVX512-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
1940-
; AVX512-NEXT: vpshuflw {{.*#+}} xmm5 = xmm2[3,3,3,3,4,5,6,7]
1940+
; AVX512-NEXT: vpsrlq $48, %xmm2, %xmm5
19411941
; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5
19421942
; AVX512-NEXT: vucomiss %xmm4, %xmm5
19431943
; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF
19441944
; AVX512-NEXT: cmovnel %eax, %edx
19451945
; AVX512-NEXT: cmovpl %eax, %edx
19461946
; AVX512-NEXT: vpinsrw $3, %edx, %xmm3, %xmm3
1947-
; AVX512-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[2,3,2,3]
1947+
; AVX512-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[2,3,0,1]
19481948
; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5
19491949
; AVX512-NEXT: vucomiss %xmm4, %xmm5
19501950
; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF

llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1938,12 +1938,12 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
19381938
; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
19391939
; AVX512-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
19401940
; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1941-
; AVX512-NEXT: vpshuflw {{.*#+}} xmm0 = xmm4[3,3,3,3,4,5,6,7]
1941+
; AVX512-NEXT: vpsrlq $48, %xmm4, %xmm0
19421942
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
19431943
; AVX512-NEXT: vucomiss %xmm0, %xmm0
19441944
; AVX512-NEXT: setp %al
19451945
; AVX512-NEXT: kmovw %eax, %k1
1946-
; AVX512-NEXT: vpshuflw {{.*#+}} xmm1 = xmm8[3,3,3,3,4,5,6,7]
1946+
; AVX512-NEXT: vpsrlq $48, %xmm8, %xmm1
19471947
; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1
19481948
; AVX512-NEXT: vucomiss %xmm1, %xmm1
19491949
; AVX512-NEXT: setp %al
@@ -1996,12 +1996,12 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
19961996
; AVX512-NEXT: seta %al
19971997
; AVX512-NEXT: kmovw %eax, %k1
19981998
; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1}
1999-
; AVX512-NEXT: vpshuflw {{.*#+}} xmm1 = xmm4[1,1,1,1,4,5,6,7]
1999+
; AVX512-NEXT: vpsrld $16, %xmm4, %xmm1
20002000
; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1
20012001
; AVX512-NEXT: vucomiss %xmm1, %xmm1
20022002
; AVX512-NEXT: setp %al
20032003
; AVX512-NEXT: kmovw %eax, %k1
2004-
; AVX512-NEXT: vpshuflw {{.*#+}} xmm4 = xmm8[1,1,1,1,4,5,6,7]
2004+
; AVX512-NEXT: vpsrld $16, %xmm8, %xmm4
20052005
; AVX512-NEXT: vcvtph2ps %xmm4, %xmm4
20062006
; AVX512-NEXT: vucomiss %xmm4, %xmm4
20072007
; AVX512-NEXT: setp %al

llvm/test/CodeGen/X86/fp-round.ll

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,6 @@ define half @round_f16(half %h) {
5050
;
5151
; AVX512F-LABEL: round_f16:
5252
; AVX512F: # %bb.0: # %entry
53-
; AVX512F-NEXT: vpextrw $0, %xmm0, %eax
54-
; AVX512F-NEXT: movzwl %ax, %eax
55-
; AVX512F-NEXT: vmovd %eax, %xmm0
5653
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
5754
; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
5855
; AVX512F-NEXT: vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)

llvm/test/CodeGen/X86/fpclamptosat_vec.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -698,7 +698,7 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) nounwind {
698698
;
699699
; AVX2-LABEL: stest_f16i32:
700700
; AVX2: # %bb.0: # %entry
701-
; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
701+
; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1
702702
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
703703
; AVX2-NEXT: vcvttss2si %xmm1, %rax
704704
; AVX2-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
@@ -709,7 +709,7 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) nounwind {
709709
; AVX2-NEXT: vcvttss2si %xmm1, %rax
710710
; AVX2-NEXT: vmovq %rcx, %xmm1
711711
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
712-
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
712+
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
713713
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
714714
; AVX2-NEXT: vmovq %rax, %xmm2
715715
; AVX2-NEXT: vcvttss2si %xmm0, %rax
@@ -836,7 +836,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind {
836836
;
837837
; AVX2-LABEL: utesth_f16i32:
838838
; AVX2: # %bb.0: # %entry
839-
; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
839+
; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1
840840
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm2
841841
; AVX2-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
842842
; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3
@@ -866,7 +866,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind {
866866
; AVX2-NEXT: sarq $63, %rdx
867867
; AVX2-NEXT: andq %rax, %rdx
868868
; AVX2-NEXT: orq %rcx, %rdx
869-
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
869+
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
870870
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
871871
; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm1
872872
; AVX2-NEXT: vcvttss2si %xmm1, %rax
@@ -999,7 +999,7 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) nounwind {
999999
;
10001000
; AVX2-LABEL: ustest_f16i32:
10011001
; AVX2: # %bb.0: # %entry
1002-
; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
1002+
; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1
10031003
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
10041004
; AVX2-NEXT: vcvttss2si %xmm1, %rax
10051005
; AVX2-NEXT: vmovq %rax, %xmm1
@@ -1011,7 +1011,7 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) nounwind {
10111011
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm2
10121012
; AVX2-NEXT: vcvttss2si %xmm2, %rax
10131013
; AVX2-NEXT: vmovq %rax, %xmm2
1014-
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
1014+
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
10151015
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
10161016
; AVX2-NEXT: vcvttss2si %xmm0, %rax
10171017
; AVX2-NEXT: vmovq %rax, %xmm0
@@ -3310,7 +3310,7 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) nounwind {
33103310
;
33113311
; AVX2-LABEL: stest_f16i32_mm:
33123312
; AVX2: # %bb.0: # %entry
3313-
; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
3313+
; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1
33143314
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
33153315
; AVX2-NEXT: vcvttss2si %xmm1, %rax
33163316
; AVX2-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
@@ -3321,7 +3321,7 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) nounwind {
33213321
; AVX2-NEXT: vcvttss2si %xmm1, %rax
33223322
; AVX2-NEXT: vmovq %rcx, %xmm1
33233323
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
3324-
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3324+
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
33253325
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
33263326
; AVX2-NEXT: vmovq %rax, %xmm2
33273327
; AVX2-NEXT: vcvttss2si %xmm0, %rax
@@ -3446,7 +3446,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind {
34463446
;
34473447
; AVX2-LABEL: utesth_f16i32_mm:
34483448
; AVX2: # %bb.0: # %entry
3449-
; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
3449+
; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1
34503450
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm2
34513451
; AVX2-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
34523452
; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3
@@ -3476,7 +3476,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind {
34763476
; AVX2-NEXT: sarq $63, %rdx
34773477
; AVX2-NEXT: andq %rax, %rdx
34783478
; AVX2-NEXT: orq %rcx, %rdx
3479-
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3479+
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
34803480
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
34813481
; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm1
34823482
; AVX2-NEXT: vcvttss2si %xmm1, %rax
@@ -3608,7 +3608,7 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) nounwind {
36083608
;
36093609
; AVX2-LABEL: ustest_f16i32_mm:
36103610
; AVX2: # %bb.0: # %entry
3611-
; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
3611+
; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1
36123612
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
36133613
; AVX2-NEXT: vcvttss2si %xmm1, %rax
36143614
; AVX2-NEXT: vmovq %rax, %xmm1
@@ -3620,7 +3620,7 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) nounwind {
36203620
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm2
36213621
; AVX2-NEXT: vcvttss2si %xmm2, %rax
36223622
; AVX2-NEXT: vmovq %rax, %xmm2
3623-
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3623+
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
36243624
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
36253625
; AVX2-NEXT: vcvttss2si %xmm0, %rax
36263626
; AVX2-NEXT: vmovq %rax, %xmm0

llvm/test/CodeGen/X86/half.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1593,9 +1593,9 @@ define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 {
15931593
; BWON-F16C-NEXT: # %bb.7:
15941594
; BWON-F16C-NEXT: vmovaps %xmm5, %xmm6
15951595
; BWON-F16C-NEXT: .LBB26_8:
1596-
; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm5 = xmm1[3,3,3,3,4,5,6,7]
1596+
; BWON-F16C-NEXT: vpsrlq $48, %xmm1, %xmm5
15971597
; BWON-F16C-NEXT: vcvtph2ps %xmm5, %xmm7
1598-
; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm5 = xmm0[3,3,3,3,4,5,6,7]
1598+
; BWON-F16C-NEXT: vpsrlq $48, %xmm0, %xmm5
15991599
; BWON-F16C-NEXT: vcvtph2ps %xmm5, %xmm5
16001600
; BWON-F16C-NEXT: vucomiss %xmm7, %xmm5
16011601
; BWON-F16C-NEXT: ja .LBB26_10
@@ -1629,9 +1629,9 @@ define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 {
16291629
; BWON-F16C-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
16301630
; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
16311631
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm6, %xmm4
1632-
; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
1632+
; BWON-F16C-NEXT: vpsrld $16, %xmm1, %xmm1
16331633
; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
1634-
; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
1634+
; BWON-F16C-NEXT: vpsrld $16, %xmm0, %xmm0
16351635
; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
16361636
; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0
16371637
; BWON-F16C-NEXT: ja .LBB26_16

llvm/test/CodeGen/X86/pr116153.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
define void @_test_func(<16 x half> %0) #0 {
55
; CHECK-LABEL: _test_func:
66
; CHECK: # %bb.0:
7-
; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
7+
; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm1
88
; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1
99
; CHECK-NEXT: xorl %eax, %eax
1010
; CHECK-NEXT: vucomiss %xmm1, %xmm1
@@ -16,7 +16,7 @@ define void @_test_func(<16 x half> %0) #0 {
1616
; CHECK-NEXT: vucomiss %xmm1, %xmm1
1717
; CHECK-NEXT: movl $0, %esi
1818
; CHECK-NEXT: cmovnpl %ecx, %esi
19-
; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,1,1,4,5,6,7]
19+
; CHECK-NEXT: vpsrld $16, %xmm0, %xmm1
2020
; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1
2121
; CHECK-NEXT: vucomiss %xmm1, %xmm1
2222
; CHECK-NEXT: movl $0, %edi

llvm/test/CodeGen/X86/pr91005.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ define void @PR91005(ptr %0) minsize {
77
; CHECK-NEXT: testb %al, %al
88
; CHECK-NEXT: je .LBB0_2
99
; CHECK-NEXT: # %bb.1:
10-
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [31744,31744,31744,31744]
10+
; CHECK-NEXT: movl $31744, %eax # imm = 0x7C00
11+
; CHECK-NEXT: vmovd %eax, %xmm0
1112
; CHECK-NEXT: vpcmpeqw %xmm0, %xmm0, %xmm0
1213
; CHECK-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1314
; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0

0 commit comments

Comments
 (0)