Skip to content

Commit 3033f0c

Browse files
arsenmpravinjagtap
authored andcommitted
AMDGPU: Handle gfx950 96/128-bit buffer_load_lds (llvm#116681)
Enforcing this limit in the clang builtin will come later.
1 parent 1669b16 commit 3033f0c

9 files changed

+485
-12
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1716,7 +1716,7 @@ class AMDGPURawBufferLoadLDS : Intrinsic <
17161716
[],
17171717
[llvm_v4i32_ty, // rsrc(SGPR)
17181718
LLVMQualPointerType<3>, // LDS base offset
1719-
llvm_i32_ty, // Data byte size: 1/2/4
1719+
llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950)
17201720
llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling)
17211721
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
17221722
llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling)
@@ -1735,7 +1735,7 @@ class AMDGPURawPtrBufferLoadLDS : Intrinsic <
17351735
[],
17361736
[AMDGPUBufferRsrcTy, // rsrc(SGPR)
17371737
LLVMQualPointerType<3>, // LDS base offset
1738-
llvm_i32_ty, // Data byte size: 1/2/4
1738+
llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950)
17391739
llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling)
17401740
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
17411741
llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling)
@@ -1757,7 +1757,7 @@ class AMDGPUStructBufferLoadLDS : Intrinsic <
17571757
[],
17581758
[llvm_v4i32_ty, // rsrc(SGPR)
17591759
LLVMQualPointerType<3>, // LDS base offset
1760-
llvm_i32_ty, // Data byte size: 1/2/4
1760+
llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950)
17611761
llvm_i32_ty, // vindex(VGPR)
17621762
llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling)
17631763
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
@@ -1777,7 +1777,7 @@ class AMDGPUStructPtrBufferLoadLDS : Intrinsic <
17771777
[],
17781778
[AMDGPUBufferRsrcTy, // rsrc(SGPR)
17791779
LLVMQualPointerType<3>, // LDS base offset
1780-
llvm_i32_ty, // Data byte size: 1/2/4
1780+
llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950)
17811781
llvm_i32_ty, // vindex(VGPR)
17821782
llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling)
17831783
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3256,6 +3256,24 @@ bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const {
32563256
: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
32573257
: AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
32583258
break;
3259+
case 12:
3260+
if (!Subtarget->hasLDSLoadB96_B128())
3261+
return false;
3262+
3263+
Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
3264+
: AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
3265+
: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
3266+
: AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
3267+
break;
3268+
case 16:
3269+
if (!Subtarget->hasLDSLoadB96_B128())
3270+
return false;
3271+
3272+
Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
3273+
: AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
3274+
: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
3275+
: AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
3276+
break;
32593277
}
32603278

32613279
MachineBasicBlock *MBB = MI.getParent();

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -571,9 +571,17 @@ multiclass MUBUF_Pseudo_Loads<string opName, ValueType load_vt = i32,
571571
}
572572
}
573573

574-
multiclass MUBUF_Pseudo_Loads_Lds<string opName, ValueType load_vt = i32> {
574+
multiclass MUBUF_Pseudo_Loads_Lds<string opName, ValueType load_vt = i32, Predicate LDSPred = TruePredicate> {
575575
defm NAME : MUBUF_Pseudo_Loads<opName, load_vt>;
576-
defm _LDS : MUBUF_Pseudo_Loads<opName, load_vt, 0, 1>;
576+
577+
if !ne(LDSPred, TruePredicate) then {
578+
let SubtargetPredicate = LDSPred in {
579+
defm _LDS : MUBUF_Pseudo_Loads<opName, load_vt, 0, 1>;
580+
}
581+
} else {
582+
defm _LDS : MUBUF_Pseudo_Loads<opName, load_vt, 0, 1>;
583+
}
584+
577585
}
578586

579587
multiclass MUBUF_Pseudo_Loads_LDSOpc<string opName,
@@ -954,11 +962,11 @@ defm BUFFER_LOAD_DWORD : MUBUF_Pseudo_Loads_Lds <
954962
defm BUFFER_LOAD_DWORDX2 : MUBUF_Pseudo_Loads <
955963
"buffer_load_dwordx2", v2i32
956964
>;
957-
defm BUFFER_LOAD_DWORDX3 : MUBUF_Pseudo_Loads <
958-
"buffer_load_dwordx3", v3i32
965+
defm BUFFER_LOAD_DWORDX3 : MUBUF_Pseudo_Loads_Lds <
966+
"buffer_load_dwordx3", v3i32, /*LDSPred=*/HasGFX950Insts
959967
>;
960-
defm BUFFER_LOAD_DWORDX4 : MUBUF_Pseudo_Loads <
961-
"buffer_load_dwordx4", v4i32
968+
defm BUFFER_LOAD_DWORDX4 : MUBUF_Pseudo_Loads_Lds <
969+
"buffer_load_dwordx4", v4i32, /*LDSPred=*/HasGFX950Insts
962970
>;
963971

964972
defm BUFFER_LOAD_LDS_B32 : MUBUF_Pseudo_Loads_LDSOpc <
@@ -3225,8 +3233,8 @@ defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_Lds_vi <0x12>;
32253233
defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_Lds_vi <0x13>;
32263234
defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_Lds_vi <0x14>;
32273235
defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_vi <0x15>;
3228-
defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_vi <0x16>;
3229-
defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_vi <0x17>;
3236+
defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_Lds_vi <0x16>;
3237+
defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_Lds_vi <0x17>;
32303238
defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_vi <0x18>;
32313239
defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_vi <0x19>;
32323240
defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_vi <0x1a>;

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9980,6 +9980,22 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
99809980
: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
99819981
: AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
99829982
break;
9983+
case 12:
9984+
if (!Subtarget->hasLDSLoadB96_B128())
9985+
return SDValue();
9986+
Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
9987+
: AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
9988+
: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
9989+
: AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
9990+
break;
9991+
case 16:
9992+
if (!Subtarget->hasLDSLoadB96_B128())
9993+
return SDValue();
9994+
Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
9995+
: AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
9996+
: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
9997+
: AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
9998+
break;
99839999
}
998410000

998510001
SDValue M0Val = copyToM0(DAG, Chain, DL, Op.getOperand(3));

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.gfx950.ll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@
22
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-SDAG %s
33
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-GISEL %s
44

5+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s
6+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s
7+
8+
; ERR-SDAG: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.global.load.lds
9+
10+
; ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.global.load.lds),
11+
12+
513
declare void @llvm.amdgcn.global.load.lds(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr, i32 %size, i32 %offset, i32 %aux)
614

715
;---------------------------------------------------------------------y
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-SDAG %s
3+
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-GISEL %s
4+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s
5+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s
6+
7+
; FIXME: Not a great error
8+
; ERR-SDAG: LLVM ERROR: Do not know how to expand this operator's operand!
9+
; ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.ptr.buffer.load.lds),
10+
11+
declare void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux)
12+
13+
;---------------------------------------------------------------------y
14+
; dwordx3
15+
;---------------------------------------------------------------------
16+
17+
define amdgpu_ps float @buffer_load_lds_dwordx3(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) {
18+
; GFX950-LABEL: buffer_load_lds_dwordx3:
19+
; GFX950: ; %bb.0: ; %main_body
20+
; GFX950-NEXT: s_mov_b32 m0, s4
21+
; GFX950-NEXT: s_nop 0
22+
; GFX950-NEXT: buffer_load_dword off, s[0:3], 0 lds
23+
; GFX950-NEXT: buffer_load_dword off, s[0:3], 0 offset:4 sc0 lds
24+
; GFX950-NEXT: buffer_load_dword off, s[0:3], 0 offset:8 nt lds
25+
; GFX950-NEXT: v_mov_b32_e32 v0, s4
26+
; GFX950-NEXT: s_waitcnt vmcnt(0)
27+
; GFX950-NEXT: ds_read_b32 v0, v0
28+
; GFX950-NEXT: s_waitcnt lgkmcnt(0)
29+
; GFX950-NEXT: ; return to shader part epilog
30+
main_body:
31+
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 0)
32+
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 4, i32 1)
33+
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 8, i32 2)
34+
%res = load float, ptr addrspace(3) %lds
35+
ret float %res
36+
}
37+
38+
define amdgpu_ps void @buffer_load_lds_dwordx3_imm_voffset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) {
39+
; GFX950-LABEL: buffer_load_lds_dwordx3_imm_voffset:
40+
; GFX950: ; %bb.0:
41+
; GFX950-NEXT: v_mov_b32_e32 v0, 0x800
42+
; GFX950-NEXT: s_mov_b32 m0, s4
43+
; GFX950-NEXT: s_nop 0
44+
; GFX950-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 offen lds
45+
; GFX950-NEXT: s_endpgm
46+
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 2048, i32 0, i32 0, i32 0)
47+
ret void
48+
}
49+
50+
define amdgpu_ps void @buffer_load_lds_dwordx3_v_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset) {
51+
; GFX950-LABEL: buffer_load_lds_dwordx3_v_offset:
52+
; GFX950: ; %bb.0:
53+
; GFX950-NEXT: s_mov_b32 m0, s4
54+
; GFX950-NEXT: s_nop 0
55+
; GFX950-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 offen lds
56+
; GFX950-NEXT: s_endpgm
57+
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %voffset, i32 0, i32 0, i32 0)
58+
ret void
59+
}
60+
61+
define amdgpu_ps void @buffer_load_lds_dwordx3_s_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 inreg %soffset) {
62+
; GFX950-LABEL: buffer_load_lds_dwordx3_s_offset:
63+
; GFX950: ; %bb.0:
64+
; GFX950-NEXT: s_mov_b32 m0, s4
65+
; GFX950-NEXT: s_nop 0
66+
; GFX950-NEXT: buffer_load_dwordx3 off, s[0:3], s5 lds
67+
; GFX950-NEXT: s_endpgm
68+
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 0, i32 %soffset, i32 0, i32 0)
69+
ret void
70+
}
71+
72+
define amdgpu_ps void @buffer_load_lds_dwordx3_vs_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset, i32 inreg %soffset) {
73+
; GFX950-LABEL: buffer_load_lds_dwordx3_vs_offset:
74+
; GFX950: ; %bb.0:
75+
; GFX950-NEXT: s_mov_b32 m0, s4
76+
; GFX950-NEXT: s_nop 0
77+
; GFX950-NEXT: buffer_load_dwordx3 v0, s[0:3], s5 offen lds
78+
; GFX950-NEXT: s_endpgm
79+
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %voffset, i32 %soffset, i32 0, i32 0)
80+
ret void
81+
}
82+
83+
define amdgpu_ps void @buffer_load_lds_dwordx3_vs_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset, i32 inreg %soffset) {
84+
; GFX950-LABEL: buffer_load_lds_dwordx3_vs_imm_offset:
85+
; GFX950: ; %bb.0:
86+
; GFX950-NEXT: s_mov_b32 m0, s4
87+
; GFX950-NEXT: s_nop 0
88+
; GFX950-NEXT: buffer_load_dwordx3 v0, s[0:3], s5 offen offset:2048 lds
89+
; GFX950-NEXT: s_endpgm
90+
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %voffset, i32 %soffset, i32 2048, i32 0)
91+
ret void
92+
}
93+
94+
;---------------------------------------------------------------------y
95+
; dwordx4
96+
;---------------------------------------------------------------------
97+
98+
define amdgpu_ps float @buffer_load_lds_dwordx4(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) {
99+
; GFX950-LABEL: buffer_load_lds_dwordx4:
100+
; GFX950: ; %bb.0: ; %main_body
101+
; GFX950-NEXT: s_mov_b32 m0, s4
102+
; GFX950-NEXT: s_nop 0
103+
; GFX950-NEXT: buffer_load_dword off, s[0:3], 0 lds
104+
; GFX950-NEXT: buffer_load_dword off, s[0:3], 0 offset:4 sc0 lds
105+
; GFX950-NEXT: buffer_load_dword off, s[0:3], 0 offset:8 nt lds
106+
; GFX950-NEXT: v_mov_b32_e32 v0, s4
107+
; GFX950-NEXT: s_waitcnt vmcnt(0)
108+
; GFX950-NEXT: ds_read_b32 v0, v0
109+
; GFX950-NEXT: s_waitcnt lgkmcnt(0)
110+
; GFX950-NEXT: ; return to shader part epilog
111+
main_body:
112+
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 0)
113+
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 4, i32 1)
114+
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 8, i32 2)
115+
%res = load float, ptr addrspace(3) %lds
116+
ret float %res
117+
}
118+
119+
define amdgpu_ps void @buffer_load_lds_dwordx4_imm_voffset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) {
120+
; GFX950-LABEL: buffer_load_lds_dwordx4_imm_voffset:
121+
; GFX950: ; %bb.0:
122+
; GFX950-NEXT: v_mov_b32_e32 v0, 0x800
123+
; GFX950-NEXT: s_mov_b32 m0, s4
124+
; GFX950-NEXT: s_nop 0
125+
; GFX950-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 offen lds
126+
; GFX950-NEXT: s_endpgm
127+
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 2048, i32 0, i32 0, i32 0)
128+
ret void
129+
}
130+
131+
define amdgpu_ps void @buffer_load_lds_dwordx4_v_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset) {
132+
; GFX950-LABEL: buffer_load_lds_dwordx4_v_offset:
133+
; GFX950: ; %bb.0:
134+
; GFX950-NEXT: s_mov_b32 m0, s4
135+
; GFX950-NEXT: s_nop 0
136+
; GFX950-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 offen lds
137+
; GFX950-NEXT: s_endpgm
138+
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %voffset, i32 0, i32 0, i32 0)
139+
ret void
140+
}
141+
142+
define amdgpu_ps void @buffer_load_lds_dwordx4_s_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 inreg %soffset) {
143+
; GFX950-LABEL: buffer_load_lds_dwordx4_s_offset:
144+
; GFX950: ; %bb.0:
145+
; GFX950-NEXT: s_mov_b32 m0, s4
146+
; GFX950-NEXT: s_nop 0
147+
; GFX950-NEXT: buffer_load_dwordx4 off, s[0:3], s5 lds
148+
; GFX950-NEXT: s_endpgm
149+
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 0, i32 %soffset, i32 0, i32 0)
150+
ret void
151+
}
152+
153+
define amdgpu_ps void @buffer_load_lds_dwordx4_vs_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset, i32 inreg %soffset) {
154+
; GFX950-LABEL: buffer_load_lds_dwordx4_vs_offset:
155+
; GFX950: ; %bb.0:
156+
; GFX950-NEXT: s_mov_b32 m0, s4
157+
; GFX950-NEXT: s_nop 0
158+
; GFX950-NEXT: buffer_load_dwordx4 v0, s[0:3], s5 offen lds
159+
; GFX950-NEXT: s_endpgm
160+
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %voffset, i32 %soffset, i32 0, i32 0)
161+
ret void
162+
}
163+
164+
define amdgpu_ps void @buffer_load_lds_dwordx4_vs_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset, i32 inreg %soffset) {
165+
; GFX950-LABEL: buffer_load_lds_dwordx4_vs_imm_offset:
166+
; GFX950: ; %bb.0:
167+
; GFX950-NEXT: s_mov_b32 m0, s4
168+
; GFX950-NEXT: s_nop 0
169+
; GFX950-NEXT: buffer_load_dwordx4 v0, s[0:3], s5 offen offset:2048 lds
170+
; GFX950-NEXT: s_endpgm
171+
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %voffset, i32 %soffset, i32 2048, i32 0)
172+
ret void
173+
}
174+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
175+
; GFX950-GISEL: {{.*}}
176+
; GFX950-SDAG: {{.*}}

0 commit comments

Comments
 (0)