Skip to content

Commit 046e8a3

Browse files
committed
Minor change based on review.
1 parent 08f213f commit 046e8a3

File tree

4 files changed

+27
-28
lines changed

4 files changed

+27
-28
lines changed

llvm/lib/Target/AMDGPU/GCNSubtarget.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -628,9 +628,9 @@ GCNUserSGPRUsageInfo::GCNUserSGPRUsageInfo(const Function &F,
628628

629629
if (ST.hasFlatAddressSpace() && AMDGPU::isEntryFunctionCC(CC) &&
630630
(IsAmdHsaOrMesa || ST.enableFlatScratch()) &&
631-
// FlatScratchInit cannot be true for graphics CC.
631+
// FlatScratchInit cannot be true for graphics CC if enableFlatScratch() is false.
632632
(ST.enableFlatScratch() ||
633-
(!IsNoFlatScratchInitSet && !AMDGPU::isGraphics(CC))) &&
633+
(!AMDGPU::isGraphics(CC) && !F.hasFnAttribute("amdgpu-no-flat-scratch-init"))) &&
634634
!ST.flatScratchIsArchitected()) {
635635
FlatScratchInit = true;
636636
}

llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5,61 +5,60 @@
55
; Make sure we only use one 128-bit register instead of 2 for i128 asm
66
; constraints
77

8-
define amdgpu_kernel void @s_input_output_i128() #0 {
8+
define amdgpu_kernel void @s_input_output_i128() {
99
; GFX908-LABEL: name: s_input_output_i128
1010
; GFX908: bb.0 (%ir-block.0):
11-
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7471114 /* regdef:SGPR_128 */, def %12
12-
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %12
13-
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7471113 /* reguse:SGPR_128 */, [[COPY]]
11+
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7208970 /* regdef:SGPR_128 */, def %13
12+
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %13
13+
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7208969 /* reguse:SGPR_128 */, %14
1414
; GFX908-NEXT: S_ENDPGM 0
1515
;
1616
; GFX90A-LABEL: name: s_input_output_i128
1717
; GFX90A: bb.0 (%ir-block.0):
18-
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7471114 /* regdef:SGPR_128 */, def %10
19-
; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %10
20-
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7471113 /* reguse:SGPR_128 */, [[COPY]]
18+
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7208970 /* regdef:SGPR_128 */, def %11
19+
; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %11
20+
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7208969 /* reguse:SGPR_128 */, %12
2121
; GFX90A-NEXT: S_ENDPGM 0
2222
%val = tail call i128 asm sideeffect "; def $0", "=s"()
2323
call void asm sideeffect "; use $0", "s"(i128 %val)
2424
ret void
2525
}
2626

27-
define amdgpu_kernel void @v_input_output_i128() #0 {
27+
define amdgpu_kernel void @v_input_output_i128() {
2828
; GFX908-LABEL: name: v_input_output_i128
2929
; GFX908: bb.0 (%ir-block.0):
30-
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6291466 /* regdef:VReg_128 */, def %12
31-
; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %12
32-
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6291465 /* reguse:VReg_128 */, [[COPY]]
30+
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128 */, def %13
31+
; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %13
32+
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6094857 /* reguse:VReg_128 */, %14
3333
; GFX908-NEXT: S_ENDPGM 0
3434
;
3535
; GFX90A-LABEL: name: v_input_output_i128
3636
; GFX90A: bb.0 (%ir-block.0):
37-
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6619146 /* regdef:VReg_128_Align2 */, def %10
38-
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %10
39-
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6619145 /* reguse:VReg_128_Align2 */, [[COPY]]
37+
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:VReg_128_Align2 */, def %11
38+
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %11
39+
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6422537 /* reguse:VReg_128_Align2 */, %12
4040
; GFX90A-NEXT: S_ENDPGM 0
4141
%val = tail call i128 asm sideeffect "; def $0", "=v"()
4242
call void asm sideeffect "; use $0", "v"(i128 %val)
4343
ret void
4444
}
4545

46-
define amdgpu_kernel void @a_input_output_i128() #0 {
46+
define amdgpu_kernel void @a_input_output_i128() {
4747
; GFX908-LABEL: name: a_input_output_i128
4848
; GFX908: bb.0 (%ir-block.0):
49-
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6225930 /* regdef:AReg_128 */, def %12
50-
; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %12
51-
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6225929 /* reguse:AReg_128 */, [[COPY]]
49+
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6029322 /* regdef:AReg_128 */, def %13
50+
; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %13
51+
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6029321 /* reguse:AReg_128 */, %14
52+
5253
; GFX908-NEXT: S_ENDPGM 0
5354
;
5455
; GFX90A-LABEL: name: a_input_output_i128
5556
; GFX90A: bb.0 (%ir-block.0):
56-
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6488074 /* regdef:AReg_128_Align2 */, def %10
57-
; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %10
58-
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6488073 /* reguse:AReg_128_Align2 */, [[COPY]]
57+
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6291466 /* regdef:AReg_128_Align2 */, def %11
58+
; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %11
59+
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6291465 /* reguse:AReg_128_Align2 */, %12
5960
; GFX90A-NEXT: S_ENDPGM 0
6061
%val = call i128 asm sideeffect "; def $0", "=a"()
6162
call void asm sideeffect "; use $0", "a"(i128 %val)
6263
ret void
6364
}
64-
65-
attributes #0 = { "amdgpu-no-flat-scratch-init" }

llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
1111
; REGALLOC-GFX908-NEXT: liveins: $sgpr4_sgpr5
1212
; REGALLOC-GFX908-NEXT: {{ $}}
1313
; REGALLOC-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef %6:agpr_32
14-
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6291466 /* regdef:VReg_128 */, def %7
14+
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128 */, def %7
1515
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64 */, def %8
1616

1717
; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %15:vreg_64, %7, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
@@ -57,7 +57,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
5757
; REGALLOC-GFX90A-NEXT: liveins: $sgpr4_sgpr5
5858
; REGALLOC-GFX90A-NEXT: {{ $}}
5959
; REGALLOC-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef %6:agpr_32
60-
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6619146 /* regdef:VReg_128_Align2 */, def %7
60+
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:VReg_128_Align2 */, def %7
6161
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_64_Align2 */, def %8
6262
; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %15:vreg_64_align2, %7, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
6363
; REGALLOC-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)

llvm/test/CodeGen/AMDGPU/udiv.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,8 +246,8 @@ define amdgpu_kernel void @s_udiv_i32(ptr addrspace(1) %out, i32 %a, i32 %b) {
246246
; GCN: ; %bb.0:
247247
; GCN-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
248248
; GCN-NEXT: s_add_i32 s12, s12, s17
249-
; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
250249
; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
250+
; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
251251
; GCN-NEXT: s_waitcnt lgkmcnt(0)
252252
; GCN-NEXT: v_cvt_f32_u32_e32 v0, s3
253253
; GCN-NEXT: s_sub_i32 s4, 0, s3

0 commit comments

Comments
 (0)