Skip to content

Commit c8bbbaa

Browse files
authored
[SelectionDAG][AMDGPU] Negative offset when selecting scratch sv offsets (#122251)
APInt will fail when given a negative offset. SelectScratchSVAddr utilizes this function and can be given a negative offset as well, so this change modifies it to use APSInt instead.
1 parent e33f456 commit c8bbbaa

File tree

2 files changed

+80
-1
lines changed

2 files changed

+80
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1926,7 +1926,8 @@ bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
19261926
KnownBits VKnown = CurDAG->computeKnownBits(VAddr);
19271927
KnownBits SKnown =
19281928
KnownBits::add(CurDAG->computeKnownBits(SAddr),
1929-
KnownBits::makeConstant(APInt(32, ImmOffset)));
1929+
KnownBits::makeConstant(APInt(32, ImmOffset,
1930+
/*isSigned=*/true)));
19301931
uint64_t VMax = VKnown.getMaxValue().getZExtValue();
19311932
uint64_t SMax = SKnown.getMaxValue().getZExtValue();
19321933
return (VMax & 3) + (SMax & 3) >= 4;

llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1243,3 +1243,81 @@ bb:
12431243
store volatile i8 4, ptr addrspace(5) %p4
12441244
ret void
12451245
}
1246+
1247+
define amdgpu_kernel void @soff1_voff1_negative(i32 %soff) {
1248+
; GFX940-SDAG-LABEL: soff1_voff1_negative:
1249+
; GFX940-SDAG: ; %bb.0: ; %bb
1250+
; GFX940-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24
1251+
; GFX940-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1252+
; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1
1253+
; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1254+
; GFX940-SDAG-NEXT: v_add_u32_e32 v0, s0, v0
1255+
; GFX940-SDAG-NEXT: v_add_u32_e32 v0, -1, v0
1256+
; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1
1257+
; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0)
1258+
; GFX940-SDAG-NEXT: s_endpgm
1259+
;
1260+
; GFX940-GISEL-LABEL: soff1_voff1_negative:
1261+
; GFX940-GISEL: ; %bb.0: ; %bb
1262+
; GFX940-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24
1263+
; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1264+
; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1
1265+
; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1266+
; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0
1267+
; GFX940-GISEL-NEXT: v_add3_u32 v0, s0, v0, -1
1268+
; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1
1269+
; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0)
1270+
; GFX940-GISEL-NEXT: s_endpgm
1271+
;
1272+
; GFX11-SDAG-LABEL: soff1_voff1_negative:
1273+
; GFX11-SDAG: ; %bb.0: ; %bb
1274+
; GFX11-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24
1275+
; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
1276+
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1277+
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
1278+
; GFX11-SDAG-NEXT: v_add3_u32 v0, 0, s0, v0
1279+
; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, off offset:-1 dlc
1280+
; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
1281+
; GFX11-SDAG-NEXT: s_endpgm
1282+
;
1283+
; GFX11-GISEL-LABEL: soff1_voff1_negative:
1284+
; GFX11-GISEL: ; %bb.0: ; %bb
1285+
; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24
1286+
; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
1287+
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1288+
; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0
1289+
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1290+
; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
1291+
; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:-1 dlc
1292+
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
1293+
; GFX11-GISEL-NEXT: s_endpgm
1294+
;
1295+
; GFX12-SDAG-LABEL: soff1_voff1_negative:
1296+
; GFX12-SDAG: ; %bb.0: ; %bb
1297+
; GFX12-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24
1298+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
1299+
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
1300+
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:-1 scope:SCOPE_SYS
1301+
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
1302+
; GFX12-SDAG-NEXT: s_endpgm
1303+
;
1304+
; GFX12-GISEL-LABEL: soff1_voff1_negative:
1305+
; GFX12-GISEL: ; %bb.0: ; %bb
1306+
; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24
1307+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
1308+
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1309+
; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0
1310+
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1311+
; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
1312+
; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:-1 scope:SCOPE_SYS
1313+
; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
1314+
; GFX12-GISEL-NEXT: s_endpgm
1315+
bb:
1316+
%a = alloca [64 x i8], align 4, addrspace(5)
1317+
%as = getelementptr i8, ptr addrspace(5) %a, i32 %soff
1318+
%voff = call i32 @llvm.amdgcn.workitem.id.x()
1319+
%asv = getelementptr i8, ptr addrspace(5) %as, i32 %voff
1320+
%p1 = getelementptr i8, ptr addrspace(5) %asv, i32 -1
1321+
store volatile i8 1, ptr addrspace(5) %p1
1322+
ret void
1323+
}

0 commit comments

Comments
 (0)