Skip to content

Commit 5478409

Browse files
committed
Start the partition from s40
1 parent 50bdd13 commit 5478409

File tree

54 files changed

+7102
-7178
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+7102
-7178
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,15 @@ def CSR_AMDGPU_AGPRs : CalleeSavedRegs<
9191
>;
9292

9393
def CSR_AMDGPU_SGPRs : CalleeSavedRegs<
94-
(add (sequence "SGPR%u", 30, 37),
95-
(sequence "SGPR%u", 46, 53),
96-
(sequence "SGPR%u", 62, 69),
97-
(sequence "SGPR%u", 78, 85),
98-
(sequence "SGPR%u", 94, 105))
94+
// Ensure that s30-s31 (return address), s32 (stack pointer), s33 (frame pointer),
95+
// and s34 (base pointer) are callee-saved. The striped layout starts from s40,
96+
// with a stripe width of 8. The last stripe is 10 wide instead of 8, to avoid
97+
// ending with a 2-wide stripe.
98+
(add (sequence "SGPR%u", 30, 39),
99+
(sequence "SGPR%u", 48, 55),
100+
(sequence "SGPR%u", 64, 71),
101+
(sequence "SGPR%u", 80, 87),
102+
(sequence "SGPR%u", 96, 105))
99103
>;
100104

101105
def CSR_AMDGPU_SI_Gfx_SGPRs : CalleeSavedRegs<

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,8 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
125125
; CHECK-NEXT: v_writelane_b32 v43, s35, 3
126126
; CHECK-NEXT: v_writelane_b32 v43, s36, 4
127127
; CHECK-NEXT: v_writelane_b32 v43, s37, 5
128-
; CHECK-NEXT: v_writelane_b32 v43, s46, 6
129-
; CHECK-NEXT: v_writelane_b32 v43, s47, 7
128+
; CHECK-NEXT: v_writelane_b32 v43, s38, 6
129+
; CHECK-NEXT: v_writelane_b32 v43, s39, 7
130130
; CHECK-NEXT: s_addk_i32 s32, 0x800
131131
; CHECK-NEXT: v_writelane_b32 v43, s48, 8
132132
; CHECK-NEXT: v_writelane_b32 v43, s49, 9
@@ -153,7 +153,7 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
153153
; CHECK-NEXT: s_mov_b32 s53, s12
154154
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
155155
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
156-
; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7]
156+
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
157157
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
158158
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
159159
; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v41
@@ -163,7 +163,7 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
163163
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
164164
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
165165
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
166-
; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
166+
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
167167
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
168168
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
169169
; CHECK-NEXT: s_mov_b32 s12, s53
@@ -185,8 +185,8 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
185185
; CHECK-NEXT: v_readlane_b32 s50, v43, 10
186186
; CHECK-NEXT: v_readlane_b32 s49, v43, 9
187187
; CHECK-NEXT: v_readlane_b32 s48, v43, 8
188-
; CHECK-NEXT: v_readlane_b32 s47, v43, 7
189-
; CHECK-NEXT: v_readlane_b32 s46, v43, 6
188+
; CHECK-NEXT: v_readlane_b32 s39, v43, 7
189+
; CHECK-NEXT: v_readlane_b32 s38, v43, 6
190190
; CHECK-NEXT: v_readlane_b32 s37, v43, 5
191191
; CHECK-NEXT: v_readlane_b32 s36, v43, 4
192192
; CHECK-NEXT: v_readlane_b32 s35, v43, 3
@@ -266,8 +266,8 @@ define double @test_powr_fast_f64(double %x, double %y) {
266266
; CHECK-NEXT: v_writelane_b32 v43, s35, 3
267267
; CHECK-NEXT: v_writelane_b32 v43, s36, 4
268268
; CHECK-NEXT: v_writelane_b32 v43, s37, 5
269-
; CHECK-NEXT: v_writelane_b32 v43, s46, 6
270-
; CHECK-NEXT: v_writelane_b32 v43, s47, 7
269+
; CHECK-NEXT: v_writelane_b32 v43, s38, 6
270+
; CHECK-NEXT: v_writelane_b32 v43, s39, 7
271271
; CHECK-NEXT: s_addk_i32 s32, 0x800
272272
; CHECK-NEXT: v_writelane_b32 v43, s48, 8
273273
; CHECK-NEXT: v_writelane_b32 v43, s49, 9
@@ -293,7 +293,7 @@ define double @test_powr_fast_f64(double %x, double %y) {
293293
; CHECK-NEXT: s_mov_b32 s53, s12
294294
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
295295
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
296-
; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7]
296+
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
297297
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
298298
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
299299
; CHECK-NEXT: v_mul_f64 v[0:1], v[40:41], v[0:1]
@@ -302,7 +302,7 @@ define double @test_powr_fast_f64(double %x, double %y) {
302302
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12
303303
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
304304
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
305-
; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
305+
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
306306
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
307307
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
308308
; CHECK-NEXT: s_mov_b32 s12, s53
@@ -321,8 +321,8 @@ define double @test_powr_fast_f64(double %x, double %y) {
321321
; CHECK-NEXT: v_readlane_b32 s50, v43, 10
322322
; CHECK-NEXT: v_readlane_b32 s49, v43, 9
323323
; CHECK-NEXT: v_readlane_b32 s48, v43, 8
324-
; CHECK-NEXT: v_readlane_b32 s47, v43, 7
325-
; CHECK-NEXT: v_readlane_b32 s46, v43, 6
324+
; CHECK-NEXT: v_readlane_b32 s39, v43, 7
325+
; CHECK-NEXT: v_readlane_b32 s38, v43, 6
326326
; CHECK-NEXT: v_readlane_b32 s37, v43, 5
327327
; CHECK-NEXT: v_readlane_b32 s36, v43, 4
328328
; CHECK-NEXT: v_readlane_b32 s35, v43, 3
@@ -409,8 +409,8 @@ define double @test_pown_fast_f64(double %x, i32 %y) {
409409
; CHECK-NEXT: v_writelane_b32 v43, s35, 3
410410
; CHECK-NEXT: v_writelane_b32 v43, s36, 4
411411
; CHECK-NEXT: v_writelane_b32 v43, s37, 5
412-
; CHECK-NEXT: v_writelane_b32 v43, s46, 6
413-
; CHECK-NEXT: v_writelane_b32 v43, s47, 7
412+
; CHECK-NEXT: v_writelane_b32 v43, s38, 6
413+
; CHECK-NEXT: v_writelane_b32 v43, s39, 7
414414
; CHECK-NEXT: s_addk_i32 s32, 0x800
415415
; CHECK-NEXT: v_writelane_b32 v43, s48, 8
416416
; CHECK-NEXT: v_writelane_b32 v43, s49, 9
@@ -437,7 +437,7 @@ define double @test_pown_fast_f64(double %x, i32 %y) {
437437
; CHECK-NEXT: s_mov_b32 s53, s12
438438
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
439439
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
440-
; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7]
440+
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
441441
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
442442
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
443443
; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v41
@@ -447,7 +447,7 @@ define double @test_pown_fast_f64(double %x, i32 %y) {
447447
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
448448
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
449449
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
450-
; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
450+
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
451451
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
452452
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
453453
; CHECK-NEXT: s_mov_b32 s12, s53
@@ -469,8 +469,8 @@ define double @test_pown_fast_f64(double %x, i32 %y) {
469469
; CHECK-NEXT: v_readlane_b32 s50, v43, 10
470470
; CHECK-NEXT: v_readlane_b32 s49, v43, 9
471471
; CHECK-NEXT: v_readlane_b32 s48, v43, 8
472-
; CHECK-NEXT: v_readlane_b32 s47, v43, 7
473-
; CHECK-NEXT: v_readlane_b32 s46, v43, 6
472+
; CHECK-NEXT: v_readlane_b32 s39, v43, 7
473+
; CHECK-NEXT: v_readlane_b32 s38, v43, 6
474474
; CHECK-NEXT: v_readlane_b32 s37, v43, 5
475475
; CHECK-NEXT: v_readlane_b32 s36, v43, 4
476476
; CHECK-NEXT: v_readlane_b32 s35, v43, 3
@@ -552,8 +552,8 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {
552552
; CHECK-NEXT: v_writelane_b32 v42, s35, 3
553553
; CHECK-NEXT: v_writelane_b32 v42, s36, 4
554554
; CHECK-NEXT: v_writelane_b32 v42, s37, 5
555-
; CHECK-NEXT: v_writelane_b32 v42, s46, 6
556-
; CHECK-NEXT: v_writelane_b32 v42, s47, 7
555+
; CHECK-NEXT: v_writelane_b32 v42, s38, 6
556+
; CHECK-NEXT: v_writelane_b32 v42, s39, 7
557557
; CHECK-NEXT: s_addk_i32 s32, 0x400
558558
; CHECK-NEXT: v_writelane_b32 v42, s48, 8
559559
; CHECK-NEXT: v_writelane_b32 v42, s49, 9
@@ -577,7 +577,7 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {
577577
; CHECK-NEXT: s_mov_b32 s53, s12
578578
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
579579
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
580-
; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7]
580+
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
581581
; CHECK-NEXT: v_lshlrev_b32_e32 v41, 1, v2
582582
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
583583
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -588,7 +588,7 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {
588588
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
589589
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
590590
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
591-
; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
591+
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
592592
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
593593
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
594594
; CHECK-NEXT: s_mov_b32 s12, s53
@@ -606,8 +606,8 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {
606606
; CHECK-NEXT: v_readlane_b32 s50, v42, 10
607607
; CHECK-NEXT: v_readlane_b32 s49, v42, 9
608608
; CHECK-NEXT: v_readlane_b32 s48, v42, 8
609-
; CHECK-NEXT: v_readlane_b32 s47, v42, 7
610-
; CHECK-NEXT: v_readlane_b32 s46, v42, 6
609+
; CHECK-NEXT: v_readlane_b32 s39, v42, 7
610+
; CHECK-NEXT: v_readlane_b32 s38, v42, 6
611611
; CHECK-NEXT: v_readlane_b32 s37, v42, 5
612612
; CHECK-NEXT: v_readlane_b32 s36, v42, 4
613613
; CHECK-NEXT: v_readlane_b32 s35, v42, 3
@@ -694,8 +694,8 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
694694
; CHECK-NEXT: v_writelane_b32 v43, s35, 3
695695
; CHECK-NEXT: v_writelane_b32 v43, s36, 4
696696
; CHECK-NEXT: v_writelane_b32 v43, s37, 5
697-
; CHECK-NEXT: v_writelane_b32 v43, s46, 6
698-
; CHECK-NEXT: v_writelane_b32 v43, s47, 7
697+
; CHECK-NEXT: v_writelane_b32 v43, s38, 6
698+
; CHECK-NEXT: v_writelane_b32 v43, s39, 7
699699
; CHECK-NEXT: s_addk_i32 s32, 0x800
700700
; CHECK-NEXT: v_writelane_b32 v43, s48, 8
701701
; CHECK-NEXT: v_writelane_b32 v43, s49, 9
@@ -721,7 +721,7 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
721721
; CHECK-NEXT: s_mov_b32 s53, s12
722722
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
723723
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
724-
; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7]
724+
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
725725
; CHECK-NEXT: v_or_b32_e32 v42, 1, v2
726726
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
727727
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -732,7 +732,7 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
732732
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
733733
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
734734
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
735-
; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
735+
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
736736
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
737737
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
738738
; CHECK-NEXT: s_mov_b32 s12, s53
@@ -753,8 +753,8 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
753753
; CHECK-NEXT: v_readlane_b32 s50, v43, 10
754754
; CHECK-NEXT: v_readlane_b32 s49, v43, 9
755755
; CHECK-NEXT: v_readlane_b32 s48, v43, 8
756-
; CHECK-NEXT: v_readlane_b32 s47, v43, 7
757-
; CHECK-NEXT: v_readlane_b32 s46, v43, 6
756+
; CHECK-NEXT: v_readlane_b32 s39, v43, 7
757+
; CHECK-NEXT: v_readlane_b32 s38, v43, 6
758758
; CHECK-NEXT: v_readlane_b32 s37, v43, 5
759759
; CHECK-NEXT: v_readlane_b32 s36, v43, 4
760760
; CHECK-NEXT: v_readlane_b32 s35, v43, 3

0 commit comments

Comments
 (0)