Skip to content

Commit 3c297a2

Browse files
madhur13490madhur13490
madhur13490
authored and
madhur13490
committed
Make fixed-abi default for AMD HSA OS
fixed-abi uses pre-defined and predictable SGPR/VGPRs for passing arguments. This patch makes this scheme default when HSA OS is specified in triple. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D96340
1 parent a1c34a9 commit 3c297a2

32 files changed

+1518
-1203
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,10 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
383383
else if (getMCSubtargetInfo()->checkFeatures("+wavefrontsize32"))
384384
MRI.reset(llvm::createGCNMCRegisterInfo(AMDGPUDwarfFlavour::Wave32));
385385
}
386+
// Set -fixed-function-abi to true if not provided..
387+
if (TT.getOS() == Triple::AMDHSA &&
388+
EnableAMDGPUFixedFunctionABIOpt.getNumOccurrences() == 0)
389+
EnableFixedFunctionABI = true;
386390
}
387391

388392
bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false;

llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,9 @@ define i32 @asm_vgpr_early_clobber() {
5353
; CHECK: bb.1 (%ir-block.0):
5454
; CHECK: liveins: $sgpr30_sgpr31
5555
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
56-
; CHECK: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %1, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %2, !0
57-
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %1
58-
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY %2
56+
; CHECK: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %8, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %9, !0
57+
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %8
58+
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY %9
5959
; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY2]]
6060
; CHECK: $vgpr0 = COPY [[ADD]](s32)
6161
; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
@@ -87,8 +87,8 @@ define i32 @test_single_vgpr_output() nounwind {
8787
; CHECK: bb.1.entry:
8888
; CHECK: liveins: $sgpr30_sgpr31
8989
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
90-
; CHECK: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1
91-
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %1
90+
; CHECK: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %8
91+
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %8
9292
; CHECK: $vgpr0 = COPY [[COPY1]](s32)
9393
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
9494
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
@@ -102,8 +102,8 @@ define i32 @test_single_sgpr_output_s32() nounwind {
102102
; CHECK: bb.1.entry:
103103
; CHECK: liveins: $sgpr30_sgpr31
104104
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
105-
; CHECK: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1
106-
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %1
105+
; CHECK: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %8
106+
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %8
107107
; CHECK: $vgpr0 = COPY [[COPY1]](s32)
108108
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
109109
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
@@ -118,9 +118,9 @@ define float @test_multiple_register_outputs_same() #0 {
118118
; CHECK: bb.1 (%ir-block.0):
119119
; CHECK: liveins: $sgpr30_sgpr31
120120
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
121-
; CHECK: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 1835018 /* regdef:VGPR_32 */, def %2
122-
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %1
123-
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY %2
121+
; CHECK: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %8, 1835018 /* regdef:VGPR_32 */, def %9
122+
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %8
123+
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY %9
124124
; CHECK: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY1]], [[COPY2]]
125125
; CHECK: $vgpr0 = COPY [[FADD]](s32)
126126
; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
@@ -138,9 +138,9 @@ define double @test_multiple_register_outputs_mixed() #0 {
138138
; CHECK: bb.1 (%ir-block.0):
139139
; CHECK: liveins: $sgpr30_sgpr31
140140
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
141-
; CHECK: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 2883594 /* regdef:VReg_64 */, def %2
142-
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %1
143-
; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY %2
141+
; CHECK: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %8, 2883594 /* regdef:VReg_64 */, def %9
142+
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %8
143+
; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY %9
144144
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](s64)
145145
; CHECK: $vgpr0 = COPY [[UV]](s32)
146146
; CHECK: $vgpr1 = COPY [[UV1]](s32)
@@ -209,8 +209,8 @@ define float @test_input_vgpr(i32 %src) nounwind {
209209
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
210210
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
211211
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32)
212-
; CHECK: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %2, 1835017 /* reguse:VGPR_32 */, [[COPY2]]
213-
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY %2
212+
; CHECK: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %9, 1835017 /* reguse:VGPR_32 */, [[COPY2]]
213+
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY %9
214214
; CHECK: $vgpr0 = COPY [[COPY3]](s32)
215215
; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
216216
; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
@@ -225,8 +225,8 @@ define i32 @test_memory_constraint(i32 addrspace(3)* %a) nounwind {
225225
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
226226
; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
227227
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
228-
; CHECK: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1835018 /* regdef:VGPR_32 */, def %2, 196622 /* mem:m */, [[COPY]](p3)
229-
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY %2
228+
; CHECK: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1835018 /* regdef:VGPR_32 */, def %9, 196622 /* mem:m */, [[COPY]](p3)
229+
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY %9
230230
; CHECK: $vgpr0 = COPY [[COPY2]](s32)
231231
; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
232232
; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0
@@ -243,8 +243,8 @@ define i32 @test_vgpr_matching_constraint(i32 %a) nounwind {
243243
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
244244
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
245245
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32)
246-
; CHECK: INLINEASM &";", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3)
247-
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY %4
246+
; CHECK: INLINEASM &";", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %11, 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3)
247+
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY %11
248248
; CHECK: $vgpr0 = COPY [[COPY3]](s32)
249249
; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
250250
; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
@@ -258,14 +258,14 @@ define i32 @test_sgpr_matching_constraint() nounwind {
258258
; CHECK: bb.1.entry:
259259
; CHECK: liveins: $sgpr30_sgpr31
260260
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
261-
; CHECK: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1
262-
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %1
263-
; CHECK: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %3
264-
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY %3
261+
; CHECK: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %8
262+
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %8
263+
; CHECK: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %10
264+
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY %10
265265
; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32)
266266
; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY2]](s32)
267-
; CHECK: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %5, 1966089 /* reguse:SReg_32 */, [[COPY3]], 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3)
268-
; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY %5
267+
; CHECK: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %12, 1966089 /* reguse:SReg_32 */, [[COPY3]], 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3)
268+
; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY %12
269269
; CHECK: $vgpr0 = COPY [[COPY5]](s32)
270270
; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
271271
; CHECK: S_SETPC_B64_return [[COPY6]], implicit $vgpr0
@@ -288,10 +288,10 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind {
288288
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32)
289289
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32)
290290
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32)
291-
; CHECK: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %4, 1835018 /* regdef:VGPR_32 */, def %5, 1835018 /* regdef:VGPR_32 */, def %6, 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY5]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY6]](tied-def 5)
292-
; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY %4
293-
; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY %5
294-
; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY %6
291+
; CHECK: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %11, 1835018 /* regdef:VGPR_32 */, def %12, 1835018 /* regdef:VGPR_32 */, def %13, 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY5]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY6]](tied-def 5)
292+
; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY %11
293+
; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY %12
294+
; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY %13
295295
; CHECK: G_STORE [[COPY7]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
296296
; CHECK: G_STORE [[COPY8]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
297297
; CHECK: G_STORE [[COPY9]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
@@ -312,11 +312,11 @@ define i32 @test_sgpr_to_vgpr_move_matching_constraint() nounwind {
312312
; CHECK: bb.1.entry:
313313
; CHECK: liveins: $sgpr30_sgpr31
314314
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
315-
; CHECK: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1
316-
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %1
315+
; CHECK: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %8
316+
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %8
317317
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32)
318-
; CHECK: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %3, 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3)
319-
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY %3
318+
; CHECK: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %10, 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3)
319+
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY %10
320320
; CHECK: $vgpr0 = COPY [[COPY3]](s32)
321321
; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
322322
; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0

llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ define void @func_use_lds_global() {
1414
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1515
; GFX8-NEXT: v_mov_b32_e32 v0, 0
1616
; GFX8-NEXT: s_mov_b32 m0, -1
17-
; GFX8-NEXT: s_mov_b64 s[0:1], s[4:5]
17+
; GFX8-NEXT: s_mov_b64 s[0:1], s[6:7]
1818
; GFX8-NEXT: s_trap 2
1919
; GFX8-NEXT: ds_write_b32 v0, v0
2020
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -24,7 +24,7 @@ define void @func_use_lds_global() {
2424
; GFX9: ; %bb.0:
2525
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2626
; GFX9-NEXT: v_mov_b32_e32 v0, 0
27-
; GFX9-NEXT: s_mov_b64 s[0:1], s[4:5]
27+
; GFX9-NEXT: s_mov_b64 s[0:1], s[6:7]
2828
; GFX9-NEXT: s_trap 2
2929
; GFX9-NEXT: ds_write_b32 v0, v0
3030
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@@ -38,7 +38,7 @@ define void @func_use_lds_global_constexpr_cast() {
3838
; GFX8-LABEL: func_use_lds_global_constexpr_cast:
3939
; GFX8: ; %bb.0:
4040
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41-
; GFX8-NEXT: s_mov_b64 s[0:1], s[4:5]
41+
; GFX8-NEXT: s_mov_b64 s[0:1], s[6:7]
4242
; GFX8-NEXT: s_trap 2
4343
; GFX8-NEXT: flat_store_dword v[0:1], v0
4444
; GFX8-NEXT: s_waitcnt vmcnt(0)
@@ -47,7 +47,7 @@ define void @func_use_lds_global_constexpr_cast() {
4747
; GFX9-LABEL: func_use_lds_global_constexpr_cast:
4848
; GFX9: ; %bb.0:
4949
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50-
; GFX9-NEXT: s_mov_b64 s[0:1], s[4:5]
50+
; GFX9-NEXT: s_mov_b64 s[0:1], s[6:7]
5151
; GFX9-NEXT: s_trap 2
5252
; GFX9-NEXT: global_store_dword v[0:1], v0, off
5353
; GFX9-NEXT: s_waitcnt vmcnt(0)

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,CO-V2 %s
2-
; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mcpu=carrizo -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,CO-V2 %s
1+
; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,HSA,CO-V2 %s
2+
; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mcpu=carrizo -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,HSA,CO-V2 %s
33
; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA %s
44
; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA %s
55
; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2 %s
@@ -90,25 +90,26 @@ bb2:
9090

9191
; ALL-LABEL: {{^}}test_workitem_id_x_func:
9292
; ALL: s_waitcnt
93-
; ALL-NEXT: v_and_b32_e32 v2, 0x3ff, v2
93+
; HSA-NEXT: v_and_b32_e32 v2, 0x3ff, v31
94+
; MESA-NEXT: v_and_b32_e32 v2, 0x3ff, v2
9495
define void @test_workitem_id_x_func(i32 addrspace(1)* %out) #1 {
9596
%id = call i32 @llvm.amdgcn.workitem.id.x()
9697
store i32 %id, i32 addrspace(1)* %out
9798
ret void
9899
}
99100

100101
; ALL-LABEL: {{^}}test_workitem_id_y_func:
101-
; ALL: v_lshrrev_b32_e32 v2, 10, v2
102-
; ALL-NEXT: v_and_b32_e32 v2, 0x3ff, v2
102+
; HSA: v_lshrrev_b32_e32 v2, 10, v31
103+
; MESA: v_lshrrev_b32_e32 v2, 10, v2
103104
define void @test_workitem_id_y_func(i32 addrspace(1)* %out) #1 {
104105
%id = call i32 @llvm.amdgcn.workitem.id.y()
105106
store i32 %id, i32 addrspace(1)* %out
106107
ret void
107108
}
108109

109110
; ALL-LABEL: {{^}}test_workitem_id_z_func:
110-
; ALL: v_lshrrev_b32_e32 v2, 20, v2
111-
; ALL-NEXT: v_and_b32_e32 v2, 0x3ff, v2
111+
; HSA: v_lshrrev_b32_e32 v2, 20, v31
112+
; MESA: v_lshrrev_b32_e32 v2, 20, v2
112113
define void @test_workitem_id_z_func(i32 addrspace(1)* %out) #1 {
113114
%id = call i32 @llvm.amdgcn.workitem.id.z()
114115
store i32 %id, i32 addrspace(1)* %out

llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i
187187
; GCN-NEXT: v_lshlrev_b32_e32 v2, 2, v4
188188
; GCN-NEXT: v_add_u32_e32 v2, s6, v2
189189
; GCN-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
190-
; GCN-NEXT: v_and_b32_e32 v3, 0x3ff, v5
190+
; GCN-NEXT: v_and_b32_e32 v3, 0x3ff, v31
191191
; GCN-NEXT: s_waitcnt vmcnt(0)
192192
; GCN-NEXT: v_add_u32_e32 v2, v2, v3
193193
; GCN-NEXT: global_store_dword v[0:1], v2, off
@@ -243,15 +243,15 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
243243
; GCN-NEXT: s_and_b32 s6, s6, 0xfffff000
244244
; GCN-NEXT: s_add_u32 s7, s6, 4
245245
; GCN-NEXT: v_mov_b32_e32 v2, 0
246-
; GCN-NEXT: v_mov_b32_e32 v5, s6
247-
; GCN-NEXT: buffer_store_dword v2, v5, s[0:3], 0 offen
246+
; GCN-NEXT: v_mov_b32_e32 v4, s6
247+
; GCN-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
248248
; GCN-NEXT: v_mov_b32_e32 v2, 1
249-
; GCN-NEXT: v_mov_b32_e32 v5, s7
250-
; GCN-NEXT: buffer_store_dword v2, v5, s[0:3], 0 offen
249+
; GCN-NEXT: v_mov_b32_e32 v4, s7
250+
; GCN-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
251251
; GCN-NEXT: v_lshlrev_b32_e32 v2, 2, v3
252252
; GCN-NEXT: v_add_u32_e32 v2, s6, v2
253253
; GCN-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
254-
; GCN-NEXT: v_and_b32_e32 v3, 0x3ff, v4
254+
; GCN-NEXT: v_and_b32_e32 v3, 0x3ff, v31
255255
; GCN-NEXT: s_waitcnt vmcnt(0)
256256
; GCN-NEXT: v_add_u32_e32 v2, v2, v3
257257
; GCN-NEXT: global_store_dword v[0:1], v2, off

llvm/test/CodeGen/AMDGPU/addrspacecast.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %pt
4242

4343
; Test handling inside a non-kernel
4444
; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast_func:
45-
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
45+
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[6:7], 0x10{{$}}
4646
; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
4747
; CI-DAG: v_cmp_ne_u32_e32 vcc, -1, v0
4848
; CI-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc

llvm/test/CodeGen/AMDGPU/agpr-register-count.ll

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -122,29 +122,29 @@ bb:
122122

123123
; GCN-LABEL: {{^}}kernel_call_func_32_agprs:
124124
; GFX908: .amdhsa_next_free_vgpr 32
125-
; GFX90A: .amdhsa_accum_offset 12
126-
; GCN: NumVgprs: 9
125+
; GFX90A: .amdhsa_accum_offset 32
126+
; GCN: NumVgprs: 32
127127
; GCN: NumAgprs: 32
128128
; GFX908: TotalNumVgprs: 32
129-
; GFX90A: TotalNumVgprs: 44
129+
; GFX90A: TotalNumVgprs: 64
130130
; GFX908: VGPRBlocks: 7
131-
; GFX90A: VGPRBlocks: 5
131+
; GFX90A: VGPRBlocks: 7
132132
; GFX908: NumVGPRsForWavesPerEU: 32
133-
; GFX90A: NumVGPRsForWavesPerEU: 44
134-
; GFX90A: AccumOffset: 12
133+
; GFX90A: NumVGPRsForWavesPerEU: 64
134+
; GFX90A: AccumOffset: 32
135135
; GCN: Occupancy: 8
136-
; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 2
136+
; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 7
137137
define amdgpu_kernel void @kernel_call_func_32_agprs() #0 {
138138
bb:
139139
call void @func_32_agprs() #0
140140
ret void
141141
}
142142

143143
; GCN-LABEL: {{^}}func_call_func_32_agprs:
144-
; GCN: NumVgprs: 9
144+
; GCN: NumVgprs: 32
145145
; GCN: NumAgprs: 32
146146
; GFX908: TotalNumVgprs: 32
147-
; GFX90A: TotalNumVgprs: 44
147+
; GFX90A: TotalNumVgprs: 64
148148
define void @func_call_func_32_agprs() #0 {
149149
bb:
150150
call void @func_32_agprs() #0
@@ -154,21 +154,21 @@ bb:
154154
declare void @undef_func()
155155

156156
; GCN-LABEL: {{^}}kernel_call_undef_func:
157-
; GFX908: .amdhsa_next_free_vgpr 24
158-
; GFX90A: .amdhsa_next_free_vgpr 48
159-
; GFX90A: .amdhsa_accum_offset 24
160-
; GCN: NumVgprs: 24
157+
; GFX908: .amdhsa_next_free_vgpr 32
158+
; GFX90A: .amdhsa_next_free_vgpr 56
159+
; GFX90A: .amdhsa_accum_offset 32
160+
; GCN: NumVgprs: 32
161161
; GCN: NumAgprs: 24
162-
; GFX908: TotalNumVgprs: 24
163-
; GFX90A: TotalNumVgprs: 48
164-
; GFX908: VGPRBlocks: 5
165-
; GFX90A: VGPRBlocks: 5
166-
; GFX908: NumVGPRsForWavesPerEU: 24
167-
; GFX90A: NumVGPRsForWavesPerEU: 48
168-
; GFX90A: AccumOffset: 24
169-
; GFX908: Occupancy: 10
162+
; GFX908: TotalNumVgprs: 32
163+
; GFX90A: TotalNumVgprs: 56
164+
; GFX908: VGPRBlocks: 7
165+
; GFX90A: VGPRBlocks: 6
166+
; GFX908: NumVGPRsForWavesPerEU: 32
167+
; GFX90A: NumVGPRsForWavesPerEU: 56
168+
; GFX90A: AccumOffset: 32
169+
; GFX908: Occupancy: 8
170170
; GFX90A: Occupancy: 8
171-
; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 5
171+
; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 7
172172
define amdgpu_kernel void @kernel_call_undef_func() #0 {
173173
bb:
174174
call void @undef_func()

0 commit comments

Comments
 (0)