Skip to content

Commit 4bd5ac2

Browse files
committed
[X86] Add missing subvector_subreg_lowering for BF16
Fixes: #83358
1 parent eb3b063 commit 4bd5ac2

File tree

2 files changed

+25
-0
lines changed

2 files changed

+25
-0
lines changed

llvm/lib/Target/X86/X86InstrVecCompiler.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ defm : subvector_subreg_lowering<VR128, v2f64, VR256, v4f64, sub_xmm>;
8383
defm : subvector_subreg_lowering<VR128, v8i16, VR256, v16i16, sub_xmm>;
8484
defm : subvector_subreg_lowering<VR128, v16i8, VR256, v32i8, sub_xmm>;
8585
defm : subvector_subreg_lowering<VR128, v8f16, VR256, v16f16, sub_xmm>;
86+
defm : subvector_subreg_lowering<VR128, v8bf16, VR256, v16bf16, sub_xmm>;
8687

8788
// A 128-bit subvector extract from the first 512-bit vector position is a
8889
// subregister copy that needs no instruction. Likewise, a 128-bit subvector
@@ -95,6 +96,7 @@ defm : subvector_subreg_lowering<VR128, v2f64, VR512, v8f64, sub_xmm>;
9596
defm : subvector_subreg_lowering<VR128, v8i16, VR512, v32i16, sub_xmm>;
9697
defm : subvector_subreg_lowering<VR128, v16i8, VR512, v64i8, sub_xmm>;
9798
defm : subvector_subreg_lowering<VR128, v8f16, VR512, v32f16, sub_xmm>;
99+
defm : subvector_subreg_lowering<VR128, v8bf16, VR512, v32bf16, sub_xmm>;
98100

99101
// A 128-bit subvector extract from the first 512-bit vector position is a
100102
// subregister copy that needs no instruction. Likewise, a 128-bit subvector
@@ -107,6 +109,7 @@ defm : subvector_subreg_lowering<VR256, v4f64, VR512, v8f64, sub_ymm>;
107109
defm : subvector_subreg_lowering<VR256, v16i16, VR512, v32i16, sub_ymm>;
108110
defm : subvector_subreg_lowering<VR256, v32i8, VR512, v64i8, sub_ymm>;
109111
defm : subvector_subreg_lowering<VR256, v16f16, VR512, v32f16, sub_ymm>;
112+
defm : subvector_subreg_lowering<VR256, v16bf16, VR512, v32bf16, sub_ymm>;
110113

111114

112115
// If we're inserting into an all zeros vector, just use a plain move which

llvm/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,3 +381,25 @@ entry:
381381
%1 = shufflevector <8 x bfloat> %0, <8 x bfloat> undef, <16 x i32> zeroinitializer
382382
ret <16 x bfloat> %1
383383
}
384+
385+
define <16 x i32> @pr83358() {
386+
; X86-LABEL: pr83358:
387+
; X86: # %bb.0:
388+
; X86-NEXT: vcvtneps2bf16y {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x72,0x05,A,A,A,A]
389+
; X86-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
390+
; X86-NEXT: vshufi64x2 $0, %zmm0, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x43,0xc0,0x00]
391+
; X86-NEXT: # zmm0 = zmm0[0,1,0,1,0,1,0,1]
392+
; X86-NEXT: retl # encoding: [0xc3]
393+
;
394+
; X64-LABEL: pr83358:
395+
; X64: # %bb.0:
396+
; X64-NEXT: vcvtneps2bf16y {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x72,0x05,A,A,A,A]
397+
; X64-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
398+
; X64-NEXT: vshufi64x2 $0, %zmm0, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x43,0xc0,0x00]
399+
; X64-NEXT: # zmm0 = zmm0[0,1,0,1,0,1,0,1]
400+
; X64-NEXT: retq # encoding: [0xc3]
401+
%1 = call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00>)
402+
%2 = bitcast <8 x bfloat> %1 to <4 x i32>
403+
%3 = shufflevector <4 x i32> %2, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
404+
ret <16 x i32> %3
405+
}

0 commit comments

Comments
 (0)