Skip to content

Commit ace87ec

Browse files
authored
[AMDGPU][AMDGPURegBankInfo] Map S_BUFFER_LOAD_XXX to its corresponding BUFFER_LOAD_XXX (#117574)
In one test code generation diverged between GISEL and DAG For example, this intrinsic > %ld = call i8 @llvm.amdgcn.s.buffer.load.u8(<4 x i32> %src, i32 %offset, i32 0) would be lowered into these two cases: * `buffer_load_u8 v2, v2, s[0:3], null offen` * `buffer_load_b32 v2, v2, s[0:3], null offen` This patch fixes this issue.
1 parent e0fb3ac commit ace87ec

File tree

2 files changed

+52
-64
lines changed

2 files changed

+52
-64
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1330,6 +1330,24 @@ unsigned AMDGPURegisterBankInfo::setBufferOffsets(
13301330
return 0;
13311331
}
13321332

1333+
static unsigned getSBufferLoadCorrespondingBufferLoadOpcode(unsigned Opc) {
1334+
switch (Opc) {
1335+
case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
1336+
return AMDGPU::G_AMDGPU_BUFFER_LOAD;
1337+
case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
1338+
return AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE;
1339+
case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
1340+
return AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE;
1341+
case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
1342+
return AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT;
1343+
case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT:
1344+
return AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT;
1345+
default:
1346+
break;
1347+
}
1348+
llvm_unreachable("Unexpected s_buffer_load opcode");
1349+
}
1350+
13331351
bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
13341352
MachineIRBuilder &B, const OperandsMapper &OpdMapper) const {
13351353
MachineInstr &MI = OpdMapper.getMI();
@@ -1406,16 +1424,16 @@ bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
14061424
if (i != 0)
14071425
BaseMMO = MF.getMachineMemOperand(BaseMMO, MMOOffset + 16 * i, MemSize);
14081426

1409-
B.buildInstr(AMDGPU::G_AMDGPU_BUFFER_LOAD)
1410-
.addDef(LoadParts[i]) // vdata
1411-
.addUse(RSrc) // rsrc
1412-
.addUse(VIndex) // vindex
1413-
.addUse(VOffset) // voffset
1414-
.addUse(SOffset) // soffset
1415-
.addImm(ImmOffset + 16 * i) // offset(imm)
1416-
.addImm(0) // cachepolicy, swizzled buffer(imm)
1417-
.addImm(0) // idxen(imm)
1418-
.addMemOperand(MMO);
1427+
B.buildInstr(getSBufferLoadCorrespondingBufferLoadOpcode(MI.getOpcode()))
1428+
.addDef(LoadParts[i]) // vdata
1429+
.addUse(RSrc) // rsrc
1430+
.addUse(VIndex) // vindex
1431+
.addUse(VOffset) // voffset
1432+
.addUse(SOffset) // soffset
1433+
.addImm(ImmOffset + 16 * i) // offset(imm)
1434+
.addImm(0) // cachepolicy, swizzled buffer(imm)
1435+
.addImm(0) // idxen(imm)
1436+
.addMemOperand(MMO);
14191437
}
14201438

14211439
// TODO: If only the resource is a VGPR, it may be better to execute the

llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll

Lines changed: 24 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -465,19 +465,12 @@ main_body:
465465
}
466466

467467
define amdgpu_ps void @s_buffer_load_byte_sgpr_or_imm_offset_divergent(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 %offset) {
468-
; DAG-LABEL: s_buffer_load_byte_sgpr_or_imm_offset_divergent:
469-
; DAG: ; %bb.0: ; %main_body
470-
; DAG-NEXT: buffer_load_i8 v2, v2, s[0:3], null offen
471-
; DAG-NEXT: s_wait_loadcnt 0x0
472-
; DAG-NEXT: global_store_b32 v[0:1], v2, off
473-
; DAG-NEXT: s_endpgm
474-
;
475-
; GISEL-LABEL: s_buffer_load_byte_sgpr_or_imm_offset_divergent:
476-
; GISEL: ; %bb.0: ; %main_body
477-
; GISEL-NEXT: buffer_load_b32 v2, v2, s[0:3], null offen
478-
; GISEL-NEXT: s_wait_loadcnt 0x0
479-
; GISEL-NEXT: global_store_b32 v[0:1], v2, off
480-
; GISEL-NEXT: s_endpgm
468+
; GCN-LABEL: s_buffer_load_byte_sgpr_or_imm_offset_divergent:
469+
; GCN: ; %bb.0: ; %main_body
470+
; GCN-NEXT: buffer_load_i8 v2, v2, s[0:3], null offen
471+
; GCN-NEXT: s_wait_loadcnt 0x0
472+
; GCN-NEXT: global_store_b32 v[0:1], v2, off
473+
; GCN-NEXT: s_endpgm
481474
main_body:
482475
%ld = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> %src, i32 %offset, i32 0)
483476
%sext = sext i8 %ld to i32
@@ -538,20 +531,12 @@ main_body:
538531
}
539532

540533
define amdgpu_ps void @s_buffer_load_ubyte_sgpr_or_imm_offset_divergent(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 %offset) {
541-
; DAG-LABEL: s_buffer_load_ubyte_sgpr_or_imm_offset_divergent:
542-
; DAG: ; %bb.0: ; %main_body
543-
; DAG-NEXT: buffer_load_u8 v2, v2, s[0:3], null offen
544-
; DAG-NEXT: s_wait_loadcnt 0x0
545-
; DAG-NEXT: global_store_b32 v[0:1], v2, off
546-
; DAG-NEXT: s_endpgm
547-
;
548-
; GISEL-LABEL: s_buffer_load_ubyte_sgpr_or_imm_offset_divergent:
549-
; GISEL: ; %bb.0: ; %main_body
550-
; GISEL-NEXT: buffer_load_b32 v2, v2, s[0:3], null offen
551-
; GISEL-NEXT: s_wait_loadcnt 0x0
552-
; GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2
553-
; GISEL-NEXT: global_store_b32 v[0:1], v2, off
554-
; GISEL-NEXT: s_endpgm
534+
; GCN-LABEL: s_buffer_load_ubyte_sgpr_or_imm_offset_divergent:
535+
; GCN: ; %bb.0: ; %main_body
536+
; GCN-NEXT: buffer_load_u8 v2, v2, s[0:3], null offen
537+
; GCN-NEXT: s_wait_loadcnt 0x0
538+
; GCN-NEXT: global_store_b32 v[0:1], v2, off
539+
; GCN-NEXT: s_endpgm
555540
main_body:
556541
%ld = call i8 @llvm.amdgcn.s.buffer.load.u8(<4 x i32> %src, i32 %offset, i32 0)
557542
%zext = zext i8 %ld to i32
@@ -606,19 +591,12 @@ main_body:
606591
}
607592

608593
define amdgpu_ps void @s_buffer_load_short_sgpr_or_imm_offset_divergent(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 %offset) {
609-
; DAG-LABEL: s_buffer_load_short_sgpr_or_imm_offset_divergent:
610-
; DAG: ; %bb.0: ; %main_body
611-
; DAG-NEXT: buffer_load_i16 v2, v2, s[0:3], null offen
612-
; DAG-NEXT: s_wait_loadcnt 0x0
613-
; DAG-NEXT: global_store_b32 v[0:1], v2, off
614-
; DAG-NEXT: s_endpgm
615-
;
616-
; GISEL-LABEL: s_buffer_load_short_sgpr_or_imm_offset_divergent:
617-
; GISEL: ; %bb.0: ; %main_body
618-
; GISEL-NEXT: buffer_load_b32 v2, v2, s[0:3], null offen
619-
; GISEL-NEXT: s_wait_loadcnt 0x0
620-
; GISEL-NEXT: global_store_b32 v[0:1], v2, off
621-
; GISEL-NEXT: s_endpgm
594+
; GCN-LABEL: s_buffer_load_short_sgpr_or_imm_offset_divergent:
595+
; GCN: ; %bb.0: ; %main_body
596+
; GCN-NEXT: buffer_load_i16 v2, v2, s[0:3], null offen
597+
; GCN-NEXT: s_wait_loadcnt 0x0
598+
; GCN-NEXT: global_store_b32 v[0:1], v2, off
599+
; GCN-NEXT: s_endpgm
622600
main_body:
623601
%ld = call i16 @llvm.amdgcn.s.buffer.load.i16(<4 x i32> %src, i32 %offset, i32 0)
624602
%sext = sext i16 %ld to i32
@@ -679,20 +657,12 @@ main_body:
679657
}
680658

681659
define amdgpu_ps void @s_buffer_load_ushort_sgpr_or_imm_offset_divergent(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 %offset) {
682-
; DAG-LABEL: s_buffer_load_ushort_sgpr_or_imm_offset_divergent:
683-
; DAG: ; %bb.0: ; %main_body
684-
; DAG-NEXT: buffer_load_u16 v2, v2, s[0:3], null offen
685-
; DAG-NEXT: s_wait_loadcnt 0x0
686-
; DAG-NEXT: global_store_b32 v[0:1], v2, off
687-
; DAG-NEXT: s_endpgm
688-
;
689-
; GISEL-LABEL: s_buffer_load_ushort_sgpr_or_imm_offset_divergent:
690-
; GISEL: ; %bb.0: ; %main_body
691-
; GISEL-NEXT: buffer_load_b32 v2, v2, s[0:3], null offen
692-
; GISEL-NEXT: s_wait_loadcnt 0x0
693-
; GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
694-
; GISEL-NEXT: global_store_b32 v[0:1], v2, off
695-
; GISEL-NEXT: s_endpgm
660+
; GCN-LABEL: s_buffer_load_ushort_sgpr_or_imm_offset_divergent:
661+
; GCN: ; %bb.0: ; %main_body
662+
; GCN-NEXT: buffer_load_u16 v2, v2, s[0:3], null offen
663+
; GCN-NEXT: s_wait_loadcnt 0x0
664+
; GCN-NEXT: global_store_b32 v[0:1], v2, off
665+
; GCN-NEXT: s_endpgm
696666
main_body:
697667
%ld = call i16 @llvm.amdgcn.s.buffer.load.u16(<4 x i32> %src, i32 %offset, i32 0)
698668
%zext = zext i16 %ld to i32

0 commit comments

Comments
 (0)