Skip to content

Commit 6f7b7bc

Browse files
committed
[AMDGPU] Change SGPR layout to striped caller/callee saved (llvm#127353)
This PR updates the SGPR layout to a striped caller/callee-saved design, similar to the VGPR layout. To ensure that s30-s31 (return address), s32 (stack pointer), s33 (frame pointer), and s34 (base pointer) remain callee-saved, the striped layout starts from s40, with a stripe width of 8. The last stripe is 10 wide instead of 8 to avoid ending with a 2-wide stripe. Fixes llvm#113782. (cherry picked from commit a779af3)
1 parent ef0f844 commit 6f7b7bc

File tree

62 files changed

+46714
-8069
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+46714
-8069
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,15 @@ def CSR_AMDGPU_AGPRs : CalleeSavedRegs<
155155
>;
156156

157157
def CSR_AMDGPU_SGPRs : CalleeSavedRegs<
158-
(sequence "SGPR%u", 30, 105)
158+
// Ensure that s30-s31 (return address), s32 (stack pointer), s33 (frame pointer),
159+
// and s34 (base pointer) are callee-saved. The striped layout starts from s40,
160+
// with a stripe width of 8. The last stripe is 10 wide instead of 8, to avoid
161+
// ending with a 2-wide stripe.
162+
(add (sequence "SGPR%u", 30, 39),
163+
(sequence "SGPR%u", 48, 55),
164+
(sequence "SGPR%u", 64, 71),
165+
(sequence "SGPR%u", 80, 87),
166+
(sequence "SGPR%u", 96, 105))
159167
>;
160168

161169
def CSR_AMDGPU_SI_Gfx_SGPRs : CalleeSavedRegs<

llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir

Lines changed: 776 additions & 8 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll

Lines changed: 651 additions & 3 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgpu-spill-cfi-saved-regs.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-spill-cfi-saved-regs -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,WAVE64 %s
22
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -amdgpu-spill-cfi-saved-regs -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,WAVE32 %s
3+
; XFAIL: *
34

45
; CHECK-LABEL: kern:
56
; CHECK: .cfi_startproc
@@ -78,7 +79,7 @@ define void @empty_func() {
7879
; WAVE64-NEXT: v_mov_b32_e32 v0, exec_hi
7980
; WAVE64-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
8081
; WAVE64-NEXT: .cfi_offset 17, 0
81-
82+
8283
define void @no_vgprs_to_spill_into() #1 {
8384
call void asm sideeffect "",
8485
"~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}

llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,38 @@ body: |
459459
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27
460460
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28
461461
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29
462+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40
463+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41
464+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42
465+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43
466+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44
467+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45
468+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46
469+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47
470+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56
471+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57
472+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58
473+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59
474+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60
475+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61
476+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62
477+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63
478+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72
479+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73
480+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74
481+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75
482+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76
483+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77
484+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78
485+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79
486+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88
487+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89
488+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90
489+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91
490+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92
491+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93
492+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94
493+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95
462494
; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
463495
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
464496
; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr40, 4352

llvm/test/CodeGen/AMDGPU/bf16.ll

Lines changed: 1165 additions & 283 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -9,42 +9,42 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
99
; CHECK-NEXT: s_mov_b32 s32, 0
1010
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
1111
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
12-
; CHECK-NEXT: s_load_dwordx8 s[36:43], s[6:7], 0x0
12+
; CHECK-NEXT: s_load_dwordx8 s[48:55], s[6:7], 0x0
1313
; CHECK-NEXT: s_add_u32 s0, s0, s15
1414
; CHECK-NEXT: s_addc_u32 s1, s1, 0
1515
; CHECK-NEXT: s_mov_b64 s[10:11], s[8:9]
1616
; CHECK-NEXT: s_mov_b32 s8, 0
1717
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
18-
; CHECK-NEXT: s_cmp_lg_u32 s40, 0
18+
; CHECK-NEXT: s_cmp_lg_u32 s52, 0
1919
; CHECK-NEXT: s_cbranch_scc1 .LBB0_8
2020
; CHECK-NEXT: ; %bb.1: ; %if.end13.i.i
21-
; CHECK-NEXT: s_cmp_eq_u32 s42, 0
21+
; CHECK-NEXT: s_cmp_eq_u32 s54, 0
2222
; CHECK-NEXT: s_cbranch_scc1 .LBB0_4
2323
; CHECK-NEXT: ; %bb.2: ; %if.else251.i.i
24-
; CHECK-NEXT: s_cmp_lg_u32 s43, 0
24+
; CHECK-NEXT: s_cmp_lg_u32 s55, 0
2525
; CHECK-NEXT: s_mov_b32 s15, 0
2626
; CHECK-NEXT: s_cselect_b32 s8, -1, 0
2727
; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s8
2828
; CHECK-NEXT: s_cbranch_vccz .LBB0_5
2929
; CHECK-NEXT: ; %bb.3:
30-
; CHECK-NEXT: s_mov_b32 s36, 0
30+
; CHECK-NEXT: s_mov_b32 s48, 0
3131
; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s8
3232
; CHECK-NEXT: s_cbranch_vccz .LBB0_6
3333
; CHECK-NEXT: s_branch .LBB0_7
3434
; CHECK-NEXT: .LBB0_4:
3535
; CHECK-NEXT: s_mov_b32 s10, s8
3636
; CHECK-NEXT: s_mov_b32 s11, s8
3737
; CHECK-NEXT: s_mov_b32 s9, s8
38-
; CHECK-NEXT: s_mov_b64 s[38:39], s[10:11]
39-
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
38+
; CHECK-NEXT: s_mov_b64 s[50:51], s[10:11]
39+
; CHECK-NEXT: s_mov_b64 s[48:49], s[8:9]
4040
; CHECK-NEXT: s_branch .LBB0_7
4141
; CHECK-NEXT: .LBB0_5: ; %if.then263.i.i
42-
; CHECK-NEXT: v_cmp_lt_f32_e64 s8, s41, 0
43-
; CHECK-NEXT: s_mov_b32 s36, 1.0
42+
; CHECK-NEXT: v_cmp_lt_f32_e64 s8, s53, 0
43+
; CHECK-NEXT: s_mov_b32 s48, 1.0
4444
; CHECK-NEXT: s_mov_b32 s15, 0x7fc00000
45-
; CHECK-NEXT: s_mov_b32 s37, s36
46-
; CHECK-NEXT: s_mov_b32 s38, s36
47-
; CHECK-NEXT: s_mov_b32 s39, s36
45+
; CHECK-NEXT: s_mov_b32 s49, s48
46+
; CHECK-NEXT: s_mov_b32 s50, s48
47+
; CHECK-NEXT: s_mov_b32 s51, s48
4848
; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s8
4949
; CHECK-NEXT: s_cbranch_vccnz .LBB0_7
5050
; CHECK-NEXT: .LBB0_6: ; %if.end273.i.i
@@ -56,8 +56,8 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
5656
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
5757
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
5858
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 10, v1
59-
; CHECK-NEXT: v_add_f32_e64 v1, s15, s36
60-
; CHECK-NEXT: s_mov_b32 s36, 0
59+
; CHECK-NEXT: v_add_f32_e64 v1, s15, s48
60+
; CHECK-NEXT: s_mov_b32 s48, 0
6161
; CHECK-NEXT: s_mov_b64 s[34:35], s[6:7]
6262
; CHECK-NEXT: v_or3_b32 v31, v0, v3, v2
6363
; CHECK-NEXT: v_mov_b32_e32 v0, v1
@@ -66,9 +66,9 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
6666
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
6767
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
6868
; CHECK-NEXT: s_mov_b64 s[6:7], s[34:35]
69-
; CHECK-NEXT: s_mov_b32 s37, s36
70-
; CHECK-NEXT: s_mov_b32 s38, s36
71-
; CHECK-NEXT: s_mov_b32 s39, s36
69+
; CHECK-NEXT: s_mov_b32 s49, s48
70+
; CHECK-NEXT: s_mov_b32 s50, s48
71+
; CHECK-NEXT: s_mov_b32 s51, s48
7272
; CHECK-NEXT: .LBB0_7: ; %if.end294.i.i
7373
; CHECK-NEXT: v_mov_b32_e32 v0, 0
7474
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:12
@@ -77,11 +77,11 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
7777
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0
7878
; CHECK-NEXT: .LBB0_8: ; %kernel_direct_lighting.exit
7979
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x20
80-
; CHECK-NEXT: v_mov_b32_e32 v0, s36
80+
; CHECK-NEXT: v_mov_b32_e32 v0, s48
8181
; CHECK-NEXT: v_mov_b32_e32 v4, 0
82-
; CHECK-NEXT: v_mov_b32_e32 v1, s37
83-
; CHECK-NEXT: v_mov_b32_e32 v2, s38
84-
; CHECK-NEXT: v_mov_b32_e32 v3, s39
82+
; CHECK-NEXT: v_mov_b32_e32 v1, s49
83+
; CHECK-NEXT: v_mov_b32_e32 v2, s50
84+
; CHECK-NEXT: v_mov_b32_e32 v3, s51
8585
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
8686
; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5]
8787
; CHECK-NEXT: s_endpgm

0 commit comments

Comments
 (0)