Skip to content

Commit dfc89f8

Browse files
phoebewangtstellar
authored andcommitted
[X86][FP16] Do not create VBROADCAST_LOAD for f16 without AVX2 (#91125)
AVX doesn't provide 16-bit BROADCAST instruction. Fixes #91005
1 parent 047cd91 commit dfc89f8

File tree

2 files changed

+41
-1
lines changed

2 files changed

+41
-1
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -7295,7 +7295,7 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
72957295
// With pattern matching, the VBROADCAST node may become a VMOVDDUP.
72967296
if (ScalarSize == 32 ||
72977297
(ScalarSize == 64 && (IsGE256 || Subtarget.hasVLX())) ||
7298-
CVT == MVT::f16 ||
7298+
(CVT == MVT::f16 && Subtarget.hasAVX2()) ||
72997299
(OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) {
73007300
const Constant *C = nullptr;
73017301
if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))

llvm/test/CodeGen/X86/pr91005.ll

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+f16c < %s | FileCheck %s
3+
4+
define void @PR91005(ptr %0) minsize {
5+
; CHECK-LABEL: PR91005:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: xorl %eax, %eax
8+
; CHECK-NEXT: testb %al, %al
9+
; CHECK-NEXT: je .LBB0_2
10+
; CHECK-NEXT: # %bb.1:
11+
; CHECK-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
12+
; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
13+
; CHECK-NEXT: vpextrw $0, %xmm0, %eax
14+
; CHECK-NEXT: movzwl %ax, %eax
15+
; CHECK-NEXT: vmovd %eax, %xmm0
16+
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0
17+
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
18+
; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0
19+
; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0
20+
; CHECK-NEXT: vmovd %xmm0, %eax
21+
; CHECK-NEXT: movw %ax, (%rdi)
22+
; CHECK-NEXT: .LBB0_2: # %common.ret
23+
; CHECK-NEXT: retq
24+
%2 = bitcast <2 x half> poison to <2 x i16>
25+
%3 = icmp eq <2 x i16> %2, <i16 31744, i16 31744>
26+
br i1 poison, label %4, label %common.ret
27+
28+
common.ret: ; preds = %4, %1
29+
ret void
30+
31+
4: ; preds = %1
32+
%5 = select <2 x i1> %3, <2 x half> <half 0xH3C00, half 0xH3C00>, <2 x half> zeroinitializer
33+
%6 = fmul <2 x half> %5, zeroinitializer
34+
%7 = fsub <2 x half> %6, zeroinitializer
35+
%8 = extractelement <2 x half> %7, i64 0
36+
store half %8, ptr %0, align 2
37+
br label %common.ret
38+
}
39+
40+
declare <2 x half> @llvm.fabs.v2f16(<2 x half>)

0 commit comments

Comments
 (0)