Skip to content

Commit 520d91d

Browse files
committed
AMDGPU: Fix buffer intrinsic store of bfloat
1 parent d83b782 commit 520d91d

File tree

2 files changed

+34
-7
lines changed

2 files changed

+34
-7
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -874,7 +874,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
874874
{MVT::Other, MVT::v2i16, MVT::v2f16, MVT::v2bf16,
875875
MVT::v3i16, MVT::v3f16, MVT::v4f16, MVT::v4i16,
876876
MVT::v4bf16, MVT::v8i16, MVT::v8f16, MVT::v8bf16,
877-
MVT::f16, MVT::i16, MVT::i8, MVT::i128},
877+
MVT::f16, MVT::i16, MVT::bf16, MVT::i8, MVT::i128},
878878
Custom);
879879

880880
setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
@@ -9973,7 +9973,7 @@ SDValue SITargetLowering::handleByteShortBufferStores(SelectionDAG &DAG,
99739973
EVT VDataType, SDLoc DL,
99749974
SDValue Ops[],
99759975
MemSDNode *M) const {
9976-
if (VDataType == MVT::f16)
9976+
if (VDataType == MVT::f16 || VDataType == MVT::bf16)
99779977
Ops[1] = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Ops[1]);
99789978

99799979
SDValue BufferStoreExt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Ops[1]);

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.bf16.ll

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,38 @@
55
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s
66
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11 %s
77

8-
; FIXME
9-
; define amdgpu_ps void @buffer_store_bf16(ptr addrspace(8) inreg %rsrc, bfloat %data, i32 %offset) {
10-
; call void @llvm.amdgcn.raw.ptr.buffer.store.bf16(bfloat %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0)
11-
; ret void
12-
; }
8+
define amdgpu_ps void @buffer_store_bf16(ptr addrspace(8) inreg %rsrc, bfloat %data, i32 %offset) {
9+
; GFX7-LABEL: buffer_store_bf16:
10+
; GFX7: ; %bb.0:
11+
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
12+
; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
13+
; GFX7-NEXT: buffer_store_short v0, v1, s[0:3], 0 offen
14+
; GFX7-NEXT: s_endpgm
15+
;
16+
; GFX8-LABEL: buffer_store_bf16:
17+
; GFX8: ; %bb.0:
18+
; GFX8-NEXT: buffer_store_short v0, v1, s[0:3], 0 offen
19+
; GFX8-NEXT: s_endpgm
20+
;
21+
; GFX9-LABEL: buffer_store_bf16:
22+
; GFX9: ; %bb.0:
23+
; GFX9-NEXT: buffer_store_short v0, v1, s[0:3], 0 offen
24+
; GFX9-NEXT: s_endpgm
25+
;
26+
; GFX10-LABEL: buffer_store_bf16:
27+
; GFX10: ; %bb.0:
28+
; GFX10-NEXT: buffer_store_short v0, v1, s[0:3], 0 offen
29+
; GFX10-NEXT: s_endpgm
30+
;
31+
; GFX11-LABEL: buffer_store_bf16:
32+
; GFX11: ; %bb.0:
33+
; GFX11-NEXT: buffer_store_b16 v0, v1, s[0:3], 0 offen
34+
; GFX11-NEXT: s_nop 0
35+
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
36+
; GFX11-NEXT: s_endpgm
37+
call void @llvm.amdgcn.raw.ptr.buffer.store.bf16(bfloat %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0)
38+
ret void
39+
}
1340

1441
define amdgpu_ps void @buffer_store_v2bf16(ptr addrspace(8) inreg %rsrc, <2 x bfloat> %data, i32 %offset) {
1542
; GFX7-LABEL: buffer_store_v2bf16:

0 commit comments

Comments
 (0)