Skip to content

File tree

562 files changed

+91243
-87510
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

562 files changed

+91243
-87510
lines changed

clang/test/CodeGenHIP/default-attributes.hip

Lines changed: 16 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -8,68 +8,49 @@
88
#define __device__ __attribute__((device))
99
#define __global__ __attribute__((global))
1010

11-
//.
12-
// OPTNONE: @__hip_cuid_ = addrspace(1) global i8 0
13-
// OPTNONE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
14-
// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
15-
//.
16-
// OPT: @__hip_cuid_ = addrspace(1) global i8 0
17-
// OPT: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
18-
// OPT: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
19-
//.
20-
__device__ void extern_func();
21-
2211
// OPTNONE: Function Attrs: convergent mustprogress noinline nounwind optnone
2312
// OPTNONE-LABEL: define {{[^@]+}}@_Z4funcv
2413
// OPTNONE-SAME: () #[[ATTR0:[0-9]+]] {
2514
// OPTNONE-NEXT: entry:
26-
// OPTNONE-NEXT: call void @_Z11extern_funcv() #[[ATTR3:[0-9]+]]
2715
// OPTNONE-NEXT: ret void
2816
//
29-
// OPT: Function Attrs: convergent mustprogress nounwind
17+
// OPT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
3018
// OPT-LABEL: define {{[^@]+}}@_Z4funcv
3119
// OPT-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] {
3220
// OPT-NEXT: entry:
33-
// OPT-NEXT: tail call void @_Z11extern_funcv() #[[ATTR3:[0-9]+]]
3421
// OPT-NEXT: ret void
3522
//
3623
__device__ void func() {
37-
extern_func();
24+
3825
}
3926

4027
// OPTNONE: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
4128
// OPTNONE-LABEL: define {{[^@]+}}@_Z6kernelv
42-
// OPTNONE-SAME: () #[[ATTR2:[0-9]+]] {
29+
// OPTNONE-SAME: () #[[ATTR1:[0-9]+]] {
4330
// OPTNONE-NEXT: entry:
44-
// OPTNONE-NEXT: call void @_Z11extern_funcv() #[[ATTR3]]
4531
// OPTNONE-NEXT: ret void
4632
//
47-
// OPT: Function Attrs: convergent mustprogress norecurse nounwind
33+
// OPT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
4834
// OPT-LABEL: define {{[^@]+}}@_Z6kernelv
49-
// OPT-SAME: () local_unnamed_addr #[[ATTR2:[0-9]+]] {
35+
// OPT-SAME: () local_unnamed_addr #[[ATTR1:[0-9]+]] {
5036
// OPT-NEXT: entry:
51-
// OPT-NEXT: tail call void @_Z11extern_funcv() #[[ATTR3]]
5237
// OPT-NEXT: ret void
5338
//
5439
__global__ void kernel() {
55-
extern_func();
40+
5641
}
5742
//.
58-
// OPTNONE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
59-
// OPTNONE: attributes #[[ATTR1:[0-9]+]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
60-
// OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
61-
// OPTNONE: attributes #[[ATTR3]] = { convergent nounwind }
43+
// OPTNONE: attributes #0 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
44+
// OPTNONE: attributes #1 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
6245
//.
63-
// OPT: attributes #[[ATTR0]] = { convergent mustprogress nounwind "amdgpu-waves-per-eu"="4,10" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
64-
// OPT: attributes #[[ATTR1:[0-9]+]] = { convergent nounwind "amdgpu-waves-per-eu"="4,10" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
65-
// OPT: attributes #[[ATTR2]] = { convergent mustprogress norecurse nounwind "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
66-
// OPT: attributes #[[ATTR3]] = { convergent nounwind }
46+
// OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
47+
// OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
6748
//.
68-
// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
69-
// OPTNONE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
70-
// OPTNONE: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
49+
// OPTNONE: !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
50+
// OPTNONE: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
51+
// OPTNONE: !2 = !{i32 1, !"wchar_size", i32 4}
7152
//.
72-
// OPT: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
73-
// OPT: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
74-
// OPT: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
53+
// OPT: !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
54+
// OPT: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
55+
// OPT: !2 = !{i32 1, !"wchar_size", i32 4}
7556
//.

llvm/docs/ReleaseNotes.rst

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -139,10 +139,6 @@ Changes to the AMDGPU Backend
139139
:ref:`atomicrmw <i_atomicrmw>` instruction with `fadd`, `fmin` and
140140
`fmax` with addrspace(3) instead.
141141

142-
* AMDGPUAttributor is no longer run as part of the codegen pass
143-
pipeline. It is expected to run as part of the middle end
144-
optimizations.
145-
146142
Changes to the ARM Backend
147143
--------------------------
148144

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -731,14 +731,6 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
731731
PM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
732732
});
733733

734-
// FIXME: Why is AMDGPUAttributor not in CGSCC?
735-
PB.registerOptimizerLastEPCallback(
736-
[this](ModulePassManager &MPM, OptimizationLevel Level) {
737-
if (Level != OptimizationLevel::O0) {
738-
MPM.addPass(AMDGPUAttributorPass(*this));
739-
}
740-
});
741-
742734
PB.registerFullLinkTimeOptimizationLastEPCallback(
743735
[this](ModulePassManager &PM, OptimizationLevel Level) {
744736
// We want to support the -lto-partitions=N option as "best effort".
@@ -1045,6 +1037,11 @@ void AMDGPUPassConfig::addIRPasses() {
10451037
addPass(createAMDGPULowerModuleLDSLegacyPass(&TM));
10461038
}
10471039

1040+
// AMDGPUAttributor infers lack of llvm.amdgcn.lds.kernel.id calls, so run
1041+
// after their introduction
1042+
if (TM.getOptLevel() > CodeGenOptLevel::None)
1043+
addPass(createAMDGPUAttributorLegacyPass());
1044+
10481045
if (TM.getOptLevel() > CodeGenOptLevel::None)
10491046
addPass(createInferAddressSpacesPass());
10501047

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -679,12 +679,6 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
679679
break;
680680
}
681681
}
682-
683-
// FIXME: We can spill incoming arguments and restore at the end of the
684-
// prolog.
685-
if (!ScratchWaveOffsetReg)
686-
report_fatal_error(
687-
"could not find temporary scratch offset register in prolog");
688682
} else {
689683
ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
690684
}

llvm/test/CodeGen/AMDGPU/GlobalISel/addsubu64.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ define amdgpu_kernel void @s_add_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
66
; GFX11-LABEL: s_add_u64:
77
; GFX11: ; %bb.0: ; %entry
88
; GFX11-NEXT: s_clause 0x1
9-
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
10-
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
9+
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
10+
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
1111
; GFX11-NEXT: v_mov_b32_e32 v2, 0
1212
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1313
; GFX11-NEXT: s_add_u32 s0, s6, s0
@@ -22,8 +22,8 @@ define amdgpu_kernel void @s_add_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
2222
; GFX12-LABEL: s_add_u64:
2323
; GFX12: ; %bb.0: ; %entry
2424
; GFX12-NEXT: s_clause 0x1
25-
; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
26-
; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
25+
; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
26+
; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
2727
; GFX12-NEXT: v_mov_b32_e32 v2, 0
2828
; GFX12-NEXT: s_wait_kmcnt 0x0
2929
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[6:7], s[0:1]
@@ -58,8 +58,8 @@ define amdgpu_kernel void @s_sub_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
5858
; GFX11-LABEL: s_sub_u64:
5959
; GFX11: ; %bb.0: ; %entry
6060
; GFX11-NEXT: s_clause 0x1
61-
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
62-
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
61+
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
62+
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
6363
; GFX11-NEXT: v_mov_b32_e32 v2, 0
6464
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
6565
; GFX11-NEXT: s_sub_u32 s0, s6, s0
@@ -74,8 +74,8 @@ define amdgpu_kernel void @s_sub_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
7474
; GFX12-LABEL: s_sub_u64:
7575
; GFX12: ; %bb.0: ; %entry
7676
; GFX12-NEXT: s_clause 0x1
77-
; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
78-
; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
77+
; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
78+
; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
7979
; GFX12-NEXT: v_mov_b32_e32 v2, 0
8080
; GFX12-NEXT: s_wait_kmcnt 0x0
8181
; GFX12-NEXT: s_sub_nc_u64 s[0:1], s[6:7], s[0:1]

0 commit comments

Comments
 (0)