Skip to content

Commit f34e20f

Browse files
arsenmsrpande
authored andcommitted
AMDGPU: Support buffer_atomic_pk_add_bf16 for gfx950 (llvm#117599)
Co-authored-by: Sirish Pande <[email protected]>
1 parent 5450c2d commit f34e20f

File tree

10 files changed

+224
-10
lines changed

10 files changed

+224
-10
lines changed

clang/test/CodeGenOpenCL/amdgpu-features.cl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@
8989
// GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
9090
// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
9191
// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
92-
// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
92+
// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
9393
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
9494
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
9595
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
@@ -109,8 +109,8 @@
109109
// GFX1151: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
110110
// GFX1152: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
111111
// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
112-
// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
113-
// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
112+
// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
113+
// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" "uniform-work-group-size"="true"
114114

115115
// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64"
116116

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1416,7 +1416,7 @@ def int_amdgcn_raw_ptr_buffer_atomic_cmpswap : Intrinsic<
14161416
// gfx908 intrinsic
14171417
def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
14181418

1419-
// Supports float and <2 x half> on gfx908. Supports v2bf16 on gfx90a, gfx940, gfx12+.
1419+
// Supports float and <2 x half> on gfx908. Supports v2bf16 on gfx90a, gfx940, gfx950, gfx12+.
14201420
def int_amdgcn_raw_ptr_buffer_atomic_fadd : AMDGPURawPtrBufferAtomic<llvm_anyfloat_ty>;
14211421

14221422
class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty> : Intrinsic <
@@ -1491,7 +1491,7 @@ def int_amdgcn_struct_ptr_buffer_atomic_cmpswap : Intrinsic<
14911491
ImmArg<ArgIndex<6>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
14921492
AMDGPURsrcIntrinsic<2, 0>;
14931493

1494-
// gfx908 intrinsic
1494+
// gfx908 intrinsic. Supports v2bf16 on gfx12+ and gfx950
14951495
def int_amdgcn_struct_buffer_atomic_fadd : AMDGPUStructBufferAtomic<llvm_anyfloat_ty>;
14961496
def int_amdgcn_struct_ptr_buffer_atomic_fadd : AMDGPUStructPtrBufferAtomic<llvm_anyfloat_ty>;
14971497

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1595,7 +1595,8 @@ def FeatureISAVersion9_5_Common : FeatureSet<
15951595
FeatureFP4ConversionScaleInsts,
15961596
FeatureFP6BF6ConversionScaleInsts,
15971597
FeatureDot12Insts,
1598-
FeatureDot13Insts
1598+
FeatureDot13Insts,
1599+
FeatureAtomicBufferPkAddBF16Inst
15991600
])>;
16001601

16011602
def FeatureISAVersion9_4_0 : FeatureSet<

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3291,6 +3291,8 @@ defm BUFFER_WBINVL1_VOL : MUBUF_Real_vi <0x3f>;
32913291

32923292

32933293
defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>;
3294+
defm BUFFER_ATOMIC_PK_ADD_BF16 : MUBUF_Real_Atomic_vi <0x52>;
3295+
32943296
defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Real_Atomic_vi <0x4d>;
32953297

32963298
let SubtargetPredicate = isGFX90APlus in {

llvm/lib/TargetParser/TargetParser.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
379379
Features["atomic-ds-pk-add-16-insts"] = true;
380380
Features["atomic-flat-pk-add-16-insts"] = true;
381381
Features["atomic-buffer-global-pk-add-f16-insts"] = true;
382+
Features["atomic-buffer-pk-add-bf16-inst"] = true;
382383
Features["atomic-global-pk-add-bf16-inst"] = true;
383384
Features["16-bit-insts"] = true;
384385
Features["dpp"] = true;
@@ -479,6 +480,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
479480
Features["ashr-pk-insts"] = true;
480481
Features["dot12-insts"] = true;
481482
Features["dot13-insts"] = true;
483+
Features["atomic-buffer-pk-add-bf16-inst"] = true;
482484
Features["gfx950-insts"] = true;
483485
[[fallthrough]];
484486
case GK_GFX942:
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 -verify-machineinstrs | FileCheck %s -check-prefix=GFX950-SDAG
3+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx950 -global-isel=1 -verify-machineinstrs | FileCheck %s -check-prefix=GFX950-GISEL
4+
5+
declare <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat>, <4 x i32>, i32, i32, i32, i32 immarg)
6+
declare <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32, i32, i32)
7+
8+
define amdgpu_ps float @struct_buffer_atomic_add_v2bf16_ret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
9+
; GFX950-SDAG-LABEL: struct_buffer_atomic_add_v2bf16_ret:
10+
; GFX950-SDAG: ; %bb.0:
11+
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
12+
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
13+
; GFX950-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v[2:3], s[0:3], s4 idxen offen sc0
14+
; GFX950-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0
15+
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
16+
; GFX950-SDAG-NEXT: flat_store_dword v[2:3], v0
17+
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
18+
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
19+
; GFX950-SDAG-NEXT: ; return to shader part epilog
20+
;
21+
; GFX950-GISEL-LABEL: struct_buffer_atomic_add_v2bf16_ret:
22+
; GFX950-GISEL: ; %bb.0:
23+
; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1
24+
; GFX950-GISEL-NEXT: v_mov_b32_e32 v5, v2
25+
; GFX950-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[4:5], s[0:3], s4 idxen offen sc0
26+
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], 0
27+
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
28+
; GFX950-GISEL-NEXT: flat_store_dword v[2:3], v0
29+
; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
30+
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
31+
; GFX950-GISEL-NEXT: ; return to shader part epilog
32+
%orig = call <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
33+
store <2 x bfloat> %orig, ptr null
34+
ret float 1.0
35+
}
36+
37+
define amdgpu_ps void @struct_buffer_atomic_add_v2bf16_noret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
38+
; GFX950-SDAG-LABEL: struct_buffer_atomic_add_v2bf16_noret:
39+
; GFX950-SDAG: ; %bb.0:
40+
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
41+
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
42+
; GFX950-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v[2:3], s[0:3], s4 idxen offen
43+
; GFX950-SDAG-NEXT: s_endpgm
44+
;
45+
; GFX950-GISEL-LABEL: struct_buffer_atomic_add_v2bf16_noret:
46+
; GFX950-GISEL: ; %bb.0:
47+
; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1
48+
; GFX950-GISEL-NEXT: v_mov_b32_e32 v5, v2
49+
; GFX950-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[4:5], s[0:3], s4 idxen offen
50+
; GFX950-GISEL-NEXT: s_endpgm
51+
%orig = call <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
52+
ret void
53+
}
54+
55+
define amdgpu_ps void @raw_buffer_atomic_add_v2bf16(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
56+
; GFX950-SDAG-LABEL: raw_buffer_atomic_add_v2bf16:
57+
; GFX950-SDAG: ; %bb.0:
58+
; GFX950-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen
59+
; GFX950-SDAG-NEXT: s_endpgm
60+
;
61+
; GFX950-GISEL-LABEL: raw_buffer_atomic_add_v2bf16:
62+
; GFX950-GISEL: ; %bb.0:
63+
; GFX950-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen
64+
; GFX950-GISEL-NEXT: s_endpgm
65+
%ret = call <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
66+
ret void
67+
}
68+
69+
define amdgpu_ps float @raw_buffer_atomic_add_v2bf16_ret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
70+
; GFX950-SDAG-LABEL: raw_buffer_atomic_add_v2bf16_ret:
71+
; GFX950-SDAG: ; %bb.0:
72+
; GFX950-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen sc0
73+
; GFX950-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0
74+
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
75+
; GFX950-SDAG-NEXT: flat_store_dword v[2:3], v0
76+
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
77+
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
78+
; GFX950-SDAG-NEXT: ; return to shader part epilog
79+
;
80+
; GFX950-GISEL-LABEL: raw_buffer_atomic_add_v2bf16_ret:
81+
; GFX950-GISEL: ; %bb.0:
82+
; GFX950-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen sc0
83+
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], 0
84+
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
85+
; GFX950-GISEL-NEXT: flat_store_dword v[2:3], v0
86+
; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
87+
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
88+
; GFX950-GISEL-NEXT: ; return to shader part epilog
89+
%orig = call <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
90+
store <2 x bfloat> %orig, ptr null
91+
ret float 1.0
92+
}

llvm/test/MC/AMDGPU/gfx12_asm_vbuffer_mubuf.s

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2636,16 +2636,16 @@ buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:8388607 th:TH_ATOMIC_CASCA
26362636
// GFX12: encoding: [0x03,0x80,0x16,0xc4,0x05,0x10,0xe8,0x00,0x00,0xff,0xff,0x7f]
26372637

26382638
buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0 offset:8388607
2639-
// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
2639+
// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
26402640

26412641
buffer_atomic_pk_add_bf16 v5, off, s[8:11], -1 offset:8388607
2642-
// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
2642+
// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
26432643

26442644
buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0.5 offset:8388607
2645-
// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
2645+
// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
26462646

26472647
buffer_atomic_pk_add_bf16 v5, off, s[8:11], -4.0 offset:8388607
2648-
// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
2648+
// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
26492649

26502650
buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:8388607 glc
26512651
// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

llvm/test/MC/AMDGPU/gfx950_asm_features.s

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,3 +1089,63 @@ v_cvt_scalef32_2xpk16_fp6_f32 v[20:25], v[10:25], v[10:25], 22
10891089
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
10901090
// GFX950: v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], 11 ; encoding: [0x14,0x00,0x53,0xd2,0x0a,0x15,0x2e,0x02]
10911091
v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], 11
1092+
1093+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1094+
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x03]
1095+
buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095
1096+
1097+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1098+
// GFX950: buffer_atomic_pk_add_bf16 v255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0xff,0x02,0x03]
1099+
buffer_atomic_pk_add_bf16 v255, off, s[8:11], s3 offset:4095
1100+
1101+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1102+
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x03,0x03]
1103+
buffer_atomic_pk_add_bf16 v5, off, s[12:15], s3 offset:4095
1104+
1105+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1106+
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x18,0x03]
1107+
buffer_atomic_pk_add_bf16 v5, off, s[96:99], s3 offset:4095
1108+
1109+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1110+
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x65]
1111+
buffer_atomic_pk_add_bf16 v5, off, s[8:11], s101 offset:4095
1112+
1113+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1114+
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x7c]
1115+
buffer_atomic_pk_add_bf16 v5, off, s[8:11], m0 offset:4095
1116+
1117+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1118+
// GFX950: buffer_atomic_pk_add_bf16 v5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x48,0xe1,0x00,0x05,0x02,0x03]
1119+
buffer_atomic_pk_add_bf16 v5, v0, s[8:11], s3 idxen offset:4095
1120+
1121+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1122+
// GFX950: buffer_atomic_pk_add_bf16 v5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x48,0xe1,0x00,0x05,0x02,0x03]
1123+
buffer_atomic_pk_add_bf16 v5, v0, s[8:11], s3 offen offset:4095
1124+
1125+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1126+
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x48,0xe1,0x00,0x05,0x02,0x03]
1127+
buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3
1128+
1129+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1130+
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x48,0xe1,0x00,0x05,0x02,0x03]
1131+
buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3
1132+
1133+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1134+
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x48,0xe1,0x00,0x05,0x02,0x03]
1135+
buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:7
1136+
1137+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1138+
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x80]
1139+
buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0 offset:4095
1140+
1141+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1142+
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xc1]
1143+
buffer_atomic_pk_add_bf16 v5, off, s[8:11], -1 offset:4095
1144+
1145+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1146+
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xf0]
1147+
buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0.5 offset:4095
1148+
1149+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1150+
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xf7]
1151+
buffer_atomic_pk_add_bf16 v5, off, s[8:11], -4.0 offset:4095

llvm/test/MC/AMDGPU/gfx950_err.s

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,3 +341,15 @@ v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], v6 div:2
341341

342342
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
343343
v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], v6 clamp div:2
344+
345+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
346+
buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 glc
347+
348+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
349+
buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 slc
350+
351+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
352+
buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 dlc
353+
354+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
355+
buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 glc slc dlc

0 commit comments

Comments
 (0)