Skip to content

Commit f2165b9

Browse files
committed
Revert "[AMDGPU] Add flag to prevent reruns of LowerModuleLDS (#129520)"
This reverts commit aa9f859 because it made some assumptions that may not be valid.
1 parent d08833f commit f2165b9

15 files changed

+194
-196
lines changed

llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1038,12 +1038,6 @@ class AMDGPULowerModuleLDS {
10381038
}
10391039

10401040
bool runOnModule(Module &M) {
1041-
// Check if we've already lowered this module. The pass may run more
1042-
// than once in the LTO pipeline, and multiple runs aren't supported.
1043-
if (M.getModuleFlag("amdgpu.lowered_lds"))
1044-
return false;
1045-
M.addModuleFlag(Module::ModFlagBehavior::Error, "amdgpu.lowered_lds", 1);
1046-
10471041
CallGraph CG = CallGraph(M);
10481042
bool Changed = superAlignLDSGlobals(M);
10491043

llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll

Lines changed: 41 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -9,46 +9,46 @@ define amdgpu_kernel void @call_debug_loc() {
99
; CHECK: bb.1.entry:
1010
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
1111
; CHECK-NEXT: {{ $}}
12-
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2, debug-location !8
13-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1, debug-location !8
14-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0, debug-location !8
15-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16, debug-location !8
16-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15, debug-location !8
17-
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14, debug-location !8
18-
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11, debug-location !8
19-
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7, debug-location !8
20-
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5, debug-location !8
12+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2, debug-location !7
13+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1, debug-location !7
14+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0, debug-location !7
15+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16, debug-location !7
16+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15, debug-location !7
17+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14, debug-location !7
18+
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11, debug-location !7
19+
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7, debug-location !7
20+
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5, debug-location !7
2121
; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9
22-
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, debug-location !8
23-
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[COPY8]], debug-location !8
24-
; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[COPY7]], debug-location !8
25-
; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[COPY6]], debug-location !8
26-
; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY5]], debug-location !8
27-
; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[COPY4]], debug-location !8
28-
; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY3]], debug-location !8
29-
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF debug-location !8
30-
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 10, debug-location !8
31-
; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], debug-location !8
32-
; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY16]], [[COPY1]], implicit $exec, debug-location !8
33-
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 20, debug-location !8
34-
; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]], debug-location !8
35-
; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY17]], [[COPY]], implicit $exec, debug-location !8
36-
; CHECK-NEXT: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 [[COPY2]], [[V_LSHLREV_B32_e64_]], [[V_LSHLREV_B32_e64_1]], implicit $exec, debug-location !8
37-
; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3, debug-location !8
38-
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]], debug-location !8
39-
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]], debug-location !8
40-
; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]], debug-location !8
41-
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY9]], debug-location !8
42-
; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]], debug-location !8
43-
; CHECK-NEXT: $sgpr12 = COPY [[COPY13]], debug-location !8
44-
; CHECK-NEXT: $sgpr13 = COPY [[COPY14]], debug-location !8
45-
; CHECK-NEXT: $sgpr14 = COPY [[COPY15]], debug-location !8
46-
; CHECK-NEXT: $sgpr15 = COPY [[DEF]], debug-location !8
47-
; CHECK-NEXT: $vgpr31 = COPY [[V_OR3_B32_e64_]], debug-location !8
48-
; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee, target-flags(amdgpu-gotprel32-hi) @callee, implicit-def $scc, debug-location !8
49-
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0, debug-location !8 :: (dereferenceable invariant load (p0) from got, addrspace 4)
50-
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @callee, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, debug-location !8
51-
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, debug-location !8
22+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, debug-location !7
23+
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[COPY8]], debug-location !7
24+
; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[COPY7]], debug-location !7
25+
; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[COPY6]], debug-location !7
26+
; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY5]], debug-location !7
27+
; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[COPY4]], debug-location !7
28+
; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY3]], debug-location !7
29+
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF debug-location !7
30+
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 10, debug-location !7
31+
; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], debug-location !7
32+
; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY16]], [[COPY1]], implicit $exec, debug-location !7
33+
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 20, debug-location !7
34+
; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]], debug-location !7
35+
; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY17]], [[COPY]], implicit $exec, debug-location !7
36+
; CHECK-NEXT: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 [[COPY2]], [[V_LSHLREV_B32_e64_]], [[V_LSHLREV_B32_e64_1]], implicit $exec, debug-location !7
37+
; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3, debug-location !7
38+
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]], debug-location !7
39+
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]], debug-location !7
40+
; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]], debug-location !7
41+
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY9]], debug-location !7
42+
; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]], debug-location !7
43+
; CHECK-NEXT: $sgpr12 = COPY [[COPY13]], debug-location !7
44+
; CHECK-NEXT: $sgpr13 = COPY [[COPY14]], debug-location !7
45+
; CHECK-NEXT: $sgpr14 = COPY [[COPY15]], debug-location !7
46+
; CHECK-NEXT: $sgpr15 = COPY [[DEF]], debug-location !7
47+
; CHECK-NEXT: $vgpr31 = COPY [[V_OR3_B32_e64_]], debug-location !7
48+
; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee, target-flags(amdgpu-gotprel32-hi) @callee, implicit-def $scc, debug-location !7
49+
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0, debug-location !7 :: (dereferenceable invariant load (p0) from got, addrspace 4)
50+
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @callee, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, debug-location !7
51+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, debug-location !7
5252
; CHECK-NEXT: S_ENDPGM 0
5353
entry:
5454
call void @callee(), !dbg !6
@@ -60,11 +60,11 @@ define void @returnaddress_debug_loc(ptr addrspace(1) %ptr) {
6060
; CHECK: bb.1.entry:
6161
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
6262
; CHECK-NEXT: {{ $}}
63-
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31, debug-location !8
63+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31, debug-location !7
6464
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
6565
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
6666
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
67-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]], debug-location !8
67+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]], debug-location !7
6868
; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE]], [[COPY3]], 0, 0, implicit $exec :: (store (p0) into %ir.ptr, addrspace 1)
6969
; CHECK-NEXT: SI_RETURN
7070
entry:

llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ define amdgpu_kernel void @asm_convergent() convergent{
77
; CHECK-NEXT: liveins: $sgpr8_sgpr9
88
; CHECK-NEXT: {{ $}}
99
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
10-
; CHECK-NEXT: INLINEASM &s_barrier, 33 /* sideeffect isconvergent attdialect */, !2
10+
; CHECK-NEXT: INLINEASM &s_barrier, 33 /* sideeffect isconvergent attdialect */, !1
1111
; CHECK-NEXT: S_ENDPGM 0
1212
call void asm sideeffect "s_barrier", ""() convergent, !srcloc !0
1313
ret void
@@ -19,8 +19,8 @@ define amdgpu_kernel void @asm_simple_memory_clobber() {
1919
; CHECK-NEXT: liveins: $sgpr8_sgpr9
2020
; CHECK-NEXT: {{ $}}
2121
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
22-
; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, !2
23-
; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, !2
22+
; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, !1
23+
; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, !1
2424
; CHECK-NEXT: S_ENDPGM 0
2525
call void asm sideeffect "", "~{memory}"(), !srcloc !0
2626
call void asm sideeffect "", ""(), !srcloc !0
@@ -33,7 +33,7 @@ define amdgpu_kernel void @asm_simple_vgpr_clobber() {
3333
; CHECK-NEXT: liveins: $sgpr8_sgpr9
3434
; CHECK-NEXT: {{ $}}
3535
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
36-
; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0, !2
36+
; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0, !1
3737
; CHECK-NEXT: S_ENDPGM 0
3838
call void asm sideeffect "v_mov_b32 v0, 7", "~{v0}"(), !srcloc !0
3939
ret void
@@ -45,7 +45,7 @@ define amdgpu_kernel void @asm_simple_sgpr_clobber() {
4545
; CHECK-NEXT: liveins: $sgpr8_sgpr9
4646
; CHECK-NEXT: {{ $}}
4747
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
48-
; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $sgpr0, !2
48+
; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $sgpr0, !1
4949
; CHECK-NEXT: S_ENDPGM 0
5050
call void asm sideeffect "s_mov_b32 s0, 7", "~{s0}"(), !srcloc !0
5151
ret void
@@ -57,7 +57,7 @@ define amdgpu_kernel void @asm_simple_agpr_clobber() {
5757
; CHECK-NEXT: liveins: $sgpr8_sgpr9
5858
; CHECK-NEXT: {{ $}}
5959
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
60-
; CHECK-NEXT: INLINEASM &"; def a0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $agpr0, !2
60+
; CHECK-NEXT: INLINEASM &"; def a0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $agpr0, !1
6161
; CHECK-NEXT: S_ENDPGM 0
6262
call void asm sideeffect "; def a0", "~{a0}"(), !srcloc !0
6363
ret void
@@ -66,7 +66,7 @@ define amdgpu_kernel void @asm_simple_agpr_clobber() {
6666
define i32 @asm_vgpr_early_clobber() {
6767
; CHECK-LABEL: name: asm_vgpr_early_clobber
6868
; CHECK: bb.1 (%ir-block.0):
69-
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %8, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %9, !2
69+
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %8, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %9, !1
7070
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
7171
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9
7272
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]]

llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
define i32 @reloc_constant() {
66
; CHECK-LABEL: name: reloc_constant
77
; CHECK: bb.1 (%ir-block.0):
8-
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), !1
8+
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), !0
9+
; We cannot have any specific metadata check here as ConstantAsMetadata is printed as <raw_ptr_val>
910
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), <0x{{[0-9a-f]+}}>
1011
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[INT]], [[INT1]]
1112
; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32)

llvm/test/CodeGen/AMDGPU/GlobalISel/load-legalize-range-metadata.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ define <2 x i64> @global_load_v2i64_align16__rangemd(ptr addrspace(1) %ptr) {
7777
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
7878
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
7979
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
80-
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[MV]](p1) :: (load (<2 x s64>) from %ir.ptr, !range !3, addrspace 1)
80+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[MV]](p1) :: (load (<2 x s64>) from %ir.ptr, !range !2, addrspace 1)
8181
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
8282
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
8383
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
@@ -119,7 +119,7 @@ define i32 @global_sextload_i8_align1__rangemd(ptr addrspace(1) %ptr) {
119119
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
120120
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
121121
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
122-
; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p1) :: (load (s8) from %ir.ptr, !range !1, addrspace 1)
122+
; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p1) :: (load (s8) from %ir.ptr, !range !0, addrspace 1)
123123
; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
124124
; CHECK-NEXT: SI_RETURN implicit $vgpr0
125125
%load = load i8, ptr addrspace(1) %ptr, align 1, !range !0, !noundef !1
@@ -135,7 +135,7 @@ define i32 @global_zextload_i8_align1__rangemd(ptr addrspace(1) %ptr) {
135135
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
136136
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
137137
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
138-
; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p1) :: (load (s8) from %ir.ptr, !range !5, addrspace 1)
138+
; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p1) :: (load (s8) from %ir.ptr, !range !4, addrspace 1)
139139
; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
140140
; CHECK-NEXT: SI_RETURN implicit $vgpr0
141141
%load = load i8, ptr addrspace(1) %ptr, align 1, !range !4, !noundef !1

llvm/test/CodeGen/AMDGPU/GlobalISel/mmra.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ define void @fence_loads(ptr %ptr) {
1212
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1313
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1414
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
15-
; CHECK-NEXT: ATOMIC_FENCE 5, 1, mmra !1
16-
; CHECK-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (load acquire (s8) from %ir.ptr, align 4)
15+
; CHECK-NEXT: ATOMIC_FENCE 5, 1, mmra !0
16+
; CHECK-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr, mmra !1 :: (load acquire (s8) from %ir.ptr, align 4)
1717
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
1818
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
19-
; CHECK-NEXT: FLAT_STORE_BYTE [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr, mmra !3 :: (store release (s8) into %ir.ptr, align 4)
19+
; CHECK-NEXT: FLAT_STORE_BYTE [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (store release (s8) into %ir.ptr, align 4)
2020
; CHECK-NEXT: SI_RETURN
2121
fence release, !mmra !0
2222
%ld = load atomic i8, ptr %ptr acquire, align 4, !mmra !2

0 commit comments

Comments
 (0)