Skip to content

Commit 7210a98

Browse files
committed
[X86] Add missing subvector_subreg_lowering for BF16 (llvm#83720)
1 parent 6009708 commit 7210a98

File tree

2 files changed

+25
-0
lines changed

2 files changed

+25
-0
lines changed

llvm/lib/Target/X86/X86InstrVecCompiler.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ defm : subvector_subreg_lowering<VR128, v2f64, VR256, v4f64, sub_xmm>;
8383
defm : subvector_subreg_lowering<VR128, v8i16, VR256, v16i16, sub_xmm>;
8484
defm : subvector_subreg_lowering<VR128, v16i8, VR256, v32i8, sub_xmm>;
8585
defm : subvector_subreg_lowering<VR128, v8f16, VR256, v16f16, sub_xmm>;
86+
defm : subvector_subreg_lowering<VR128, v8bf16, VR256, v16bf16, sub_xmm>;
8687

8788
// A 128-bit subvector extract from the first 512-bit vector position is a
8889
// subregister copy that needs no instruction. Likewise, a 128-bit subvector
@@ -95,6 +96,7 @@ defm : subvector_subreg_lowering<VR128, v2f64, VR512, v8f64, sub_xmm>;
9596
defm : subvector_subreg_lowering<VR128, v8i16, VR512, v32i16, sub_xmm>;
9697
defm : subvector_subreg_lowering<VR128, v16i8, VR512, v64i8, sub_xmm>;
9798
defm : subvector_subreg_lowering<VR128, v8f16, VR512, v32f16, sub_xmm>;
99+
defm : subvector_subreg_lowering<VR128, v8bf16, VR512, v32bf16, sub_xmm>;
98100

99101
// A 128-bit subvector extract from the first 512-bit vector position is a
100102
// subregister copy that needs no instruction. Likewise, a 128-bit subvector
@@ -107,6 +109,7 @@ defm : subvector_subreg_lowering<VR256, v4f64, VR512, v8f64, sub_ymm>;
107109
defm : subvector_subreg_lowering<VR256, v16i16, VR512, v32i16, sub_ymm>;
108110
defm : subvector_subreg_lowering<VR256, v32i8, VR512, v64i8, sub_ymm>;
109111
defm : subvector_subreg_lowering<VR256, v16f16, VR512, v32f16, sub_ymm>;
112+
defm : subvector_subreg_lowering<VR256, v16bf16, VR512, v32bf16, sub_ymm>;
110113

111114

112115
// If we're inserting into an all zeros vector, just use a plain move which

llvm/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,3 +402,25 @@ entry:
402402
%1 = shufflevector <8 x bfloat> %0, <8 x bfloat> undef, <16 x i32> zeroinitializer
403403
ret <16 x bfloat> %1
404404
}
405+
406+
define <16 x i32> @pr83358() {
407+
; X86-LABEL: pr83358:
408+
; X86: # %bb.0:
409+
; X86-NEXT: vcvtneps2bf16y {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x72,0x05,A,A,A,A]
410+
; X86-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
411+
; X86-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
412+
; X86-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xc0,0x01]
413+
; X86-NEXT: retl # encoding: [0xc3]
414+
;
415+
; X64-LABEL: pr83358:
416+
; X64: # %bb.0:
417+
; X64-NEXT: vcvtneps2bf16y {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x72,0x05,A,A,A,A]
418+
; X64-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
419+
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
420+
; X64-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xc0,0x01]
421+
; X64-NEXT: retq # encoding: [0xc3]
422+
%1 = call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00>)
423+
%2 = bitcast <8 x bfloat> %1 to <4 x i32>
424+
%3 = shufflevector <4 x i32> %2, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
425+
ret <16 x i32> %3
426+
}

0 commit comments

Comments
 (0)