Skip to content

Commit 5f8e407

Browse files
committed
[AMDGPU][Attributor] Fix AAAMDWavesPerEU deduction
This should replaces llvm#114357. The problem this resolves is that AAAMDWavesPerEU used "assumed" information from AAAMDFlatWorkGroupSize to derive "known" information. This is generally not valid. The cannonical way is to use "assumed"/"known" information to improve "assumed"/"known" information, respectively. What happend before was that the new "known" information (derived from "assumed") was manifested even though it should not have been. That is how we ended up with external functions that had "amdgpu-waves-per-eu" set. This won't happen if we only set the assumed information as the invalidation, caused by unknown callers, will fallback to the "unset" known information. In addition to the above, we will identify invalid waves-per-eu values, emit a warning, and replace the invalid values with valid ones. This happens also for kernels. See `llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll` This should hopefully fix llvm#123092.
1 parent d5b7b97 commit 5f8e407

File tree

38 files changed

+461
-450
lines changed

38 files changed

+461
-450
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 37 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -831,7 +831,9 @@ struct AAAMDSizeRangeAttribute
831831
const std::string getAsStr(Attributor *) const override {
832832
std::string Str;
833833
raw_string_ostream OS(Str);
834-
OS << getName() << '[';
834+
OS << getName() << " Known[";
835+
OS << getKnown().getLower() << ',' << getKnown().getUpper() - 1;
836+
OS << "] Assumed[";
835837
OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
836838
OS << ']';
837839
return OS.str();
@@ -1044,60 +1046,40 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
10441046
AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
10451047
: AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
10461048

1047-
bool isValidState() const override {
1048-
return !Assumed.isEmptySet() && IntegerRangeState::isValidState();
1049-
}
1050-
10511049
void initialize(Attributor &A) override {
10521050
Function *F = getAssociatedFunction();
10531051
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
10541052

1055-
if (const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
1056-
*this, IRPosition::function(*F), DepClassTy::REQUIRED);
1057-
AssumedGroupSize->isValidState()) {
1053+
// We allow consistent WavesPErEU for all functions here but for non-entry
1054+
// points we will verify consistency in the end.
1055+
unsigned ImpliedMin, ImpliedMax;
1056+
std::tie(ImpliedMin, ImpliedMax) =
1057+
InfoCache.getWavesPerEU(*F, InfoCache.getFlatWorkGroupSizes(*F));
10581058

1059-
unsigned Min, Max;
1060-
std::tie(Min, Max) = InfoCache.getWavesPerEU(
1061-
*F, {AssumedGroupSize->getAssumed().getLower().getZExtValue(),
1062-
AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
1063-
1064-
ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
1065-
intersectKnown(Range);
1066-
}
1059+
ConstantRange Range(APInt(32, ImpliedMin), APInt(32, ImpliedMax + 1));
1060+
intersectKnown(Range);
10671061

1068-
if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
1062+
// For entries we cannot derive anything better.
1063+
if (AMDGPU::isEntryFunctionCC(getAssociatedFunction()->getCallingConv()))
10691064
indicatePessimisticFixpoint();
10701065
}
10711066

10721067
ChangeStatus updateImpl(Attributor &A) override {
1073-
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
10741068
ChangeStatus Change = ChangeStatus::UNCHANGED;
10751069

10761070
auto CheckCallSite = [&](AbstractCallSite CS) {
10771071
Function *Caller = CS.getInstruction()->getFunction();
1078-
Function *Func = getAssociatedFunction();
1072+
[[maybe_unused]] Function *Func = getAssociatedFunction();
10791073
LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
10801074
<< "->" << Func->getName() << '\n');
10811075

10821076
const auto *CallerInfo = A.getAAFor<AAAMDWavesPerEU>(
10831077
*this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
1084-
const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
1085-
*this, IRPosition::function(*Func), DepClassTy::REQUIRED);
1086-
if (!CallerInfo || !AssumedGroupSize || !CallerInfo->isValidState() ||
1087-
!AssumedGroupSize->isValidState())
1078+
if (!CallerInfo || !CallerInfo->isValidState())
10881079
return false;
10891080

1090-
unsigned Min, Max;
1091-
std::tie(Min, Max) = InfoCache.getEffectiveWavesPerEU(
1092-
*Caller,
1093-
{CallerInfo->getAssumed().getLower().getZExtValue(),
1094-
CallerInfo->getAssumed().getUpper().getZExtValue() - 1},
1095-
{AssumedGroupSize->getAssumed().getLower().getZExtValue(),
1096-
AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
1097-
ConstantRange CallerRange(APInt(32, Min), APInt(32, Max + 1));
1098-
IntegerRangeState CallerRangeState(CallerRange);
1099-
Change |= clampStateAndIndicateChange(this->getState(), CallerRangeState);
1100-
1081+
Change |=
1082+
clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
11011083
return true;
11021084
};
11031085

@@ -1113,8 +1095,28 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
11131095
Attributor &A);
11141096

11151097
ChangeStatus manifest(Attributor &A) override {
1098+
unsigned ImpliedMin = getAssumed().getLower().getZExtValue();
1099+
unsigned ImpliedMax = getAssumed().getUpper().getZExtValue() - 1;
1100+
11161101
Function *F = getAssociatedFunction();
11171102
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1103+
1104+
// Make non-kernel functions locally consistent.
1105+
if (!AMDGPU::isEntryFunctionCC(getAssociatedFunction()->getCallingConv())) {
1106+
const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
1107+
*this, getIRPosition(), DepClassTy::OPTIONAL);
1108+
std::pair<unsigned, unsigned> FlatWorkGroupSize;
1109+
if (!AssumedGroupSize || !AssumedGroupSize->isValidState())
1110+
FlatWorkGroupSize = InfoCache.getFlatWorkGroupSizes(*F);
1111+
else
1112+
FlatWorkGroupSize = {
1113+
AssumedGroupSize->getAssumed().getLower().getZExtValue(),
1114+
AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1};
1115+
1116+
std::tie(ImpliedMin, ImpliedMax) = InfoCache.getEffectiveWavesPerEU(
1117+
*F, {ImpliedMin, ImpliedMax}, FlatWorkGroupSize);
1118+
}
1119+
11181120
unsigned Max = InfoCache.getMaxWavesPerEU(*F);
11191121
return emitAttributeIfNotDefault(A, 1, Max);
11201122
}
@@ -1295,10 +1297,10 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
12951297
A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(*F));
12961298
A.getOrCreateAAFor<AAAMDMaxNumWorkgroups>(IRPosition::function(*F));
12971299
A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(*F));
1300+
A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(*F));
12981301
CallingConv::ID CC = F->getCallingConv();
12991302
if (!AMDGPU::isEntryFunctionCC(CC)) {
13001303
A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(*F));
1301-
A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(*F));
13021304
} else if (CC == CallingConv::AMDGPU_KERNEL) {
13031305
addPreloadKernArgHint(*F, TM);
13041306
}

llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ define ptr addrspace(3) @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 {
217217
; AKF_HSA-NEXT: ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3))
218218
;
219219
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@ret_constant_cast_group_gv_gep_to_flat_to_group
220-
; ATTRIBUTOR_HSA-SAME: () #[[ATTR3:[0-9]+]] {
220+
; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] {
221221
; ATTRIBUTOR_HSA-NEXT: ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3))
222222
;
223223
ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3))
@@ -233,9 +233,8 @@ attributes #1 = { nounwind }
233233
; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
234234
;.
235235
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
236-
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
237-
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
238-
; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
236+
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
237+
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
239238
;.
240239
; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
241240
;.

llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ define amdgpu_kernel void @kernel_uses_asm_physreg_tuple() {
7373

7474
define void @func_uses_asm_virtreg_agpr() {
7575
; CHECK-LABEL: define void @func_uses_asm_virtreg_agpr(
76-
; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
76+
; CHECK-SAME: ) #[[ATTR0]] {
7777
; CHECK-NEXT: call void asm sideeffect "
7878
; CHECK-NEXT: ret void
7979
;
@@ -83,7 +83,7 @@ define void @func_uses_asm_virtreg_agpr() {
8383

8484
define void @func_uses_asm_physreg_agpr() {
8585
; CHECK-LABEL: define void @func_uses_asm_physreg_agpr(
86-
; CHECK-SAME: ) #[[ATTR2]] {
86+
; CHECK-SAME: ) #[[ATTR0]] {
8787
; CHECK-NEXT: call void asm sideeffect "
8888
; CHECK-NEXT: ret void
8989
;
@@ -93,7 +93,7 @@ define void @func_uses_asm_physreg_agpr() {
9393

9494
define void @func_uses_asm_physreg_agpr_tuple() {
9595
; CHECK-LABEL: define void @func_uses_asm_physreg_agpr_tuple(
96-
; CHECK-SAME: ) #[[ATTR2]] {
96+
; CHECK-SAME: ) #[[ATTR0]] {
9797
; CHECK-NEXT: call void asm sideeffect "
9898
; CHECK-NEXT: ret void
9999
;
@@ -105,7 +105,7 @@ declare void @unknown()
105105

106106
define amdgpu_kernel void @kernel_calls_extern() {
107107
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern(
108-
; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
108+
; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
109109
; CHECK-NEXT: call void @unknown()
110110
; CHECK-NEXT: ret void
111111
;
@@ -115,8 +115,8 @@ define amdgpu_kernel void @kernel_calls_extern() {
115115

116116
define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
117117
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
118-
; CHECK-SAME: ) #[[ATTR4]] {
119-
; CHECK-NEXT: call void @unknown() #[[ATTR10:[0-9]+]]
118+
; CHECK-SAME: ) #[[ATTR2]] {
119+
; CHECK-NEXT: call void @unknown() #[[ATTR6:[0-9]+]]
120120
; CHECK-NEXT: ret void
121121
;
122122
call void @unknown() #0
@@ -125,7 +125,7 @@ define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
125125

126126
define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
127127
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect(
128-
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR4]] {
128+
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] {
129129
; CHECK-NEXT: call void [[INDIRECT]]()
130130
; CHECK-NEXT: ret void
131131
;
@@ -135,8 +135,8 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
135135

136136
define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
137137
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
138-
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR4]] {
139-
; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR10]]
138+
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] {
139+
; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR6]]
140140
; CHECK-NEXT: ret void
141141
;
142142
call void %indirect() #0
@@ -155,15 +155,15 @@ define amdgpu_kernel void @kernel_transitively_uses_agpr_asm() {
155155

156156
define void @empty() {
157157
; CHECK-LABEL: define void @empty(
158-
; CHECK-SAME: ) #[[ATTR5:[0-9]+]] {
158+
; CHECK-SAME: ) #[[ATTR1]] {
159159
; CHECK-NEXT: ret void
160160
;
161161
ret void
162162
}
163163

164164
define void @also_empty() {
165165
; CHECK-LABEL: define void @also_empty(
166-
; CHECK-SAME: ) #[[ATTR5]] {
166+
; CHECK-SAME: ) #[[ATTR1]] {
167167
; CHECK-NEXT: ret void
168168
;
169169
ret void
@@ -254,14 +254,11 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
254254

255255
attributes #0 = { "amdgpu-no-agpr" }
256256
;.
257-
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
258-
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
259-
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
260-
; CHECK: attributes #[[ATTR3:[0-9]+]] = { "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
261-
; CHECK: attributes #[[ATTR4]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
262-
; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
263-
; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
264-
; CHECK: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
265-
; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
266-
; CHECK: attributes #[[ATTR10]] = { "amdgpu-no-agpr" }
257+
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
258+
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
259+
; CHECK: attributes #[[ATTR2]] = { "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
260+
; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
261+
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
262+
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
263+
; CHECK: attributes #[[ATTR6]] = { "amdgpu-no-agpr" }
267264
;.

llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ define void @use_variables() sanitize_address {
4444

4545
define amdgpu_kernel void @k0() sanitize_address {
4646
; CHECK-LABEL: define amdgpu_kernel void @k0(
47-
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
47+
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] {
4848
; CHECK-NEXT: WId:
4949
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
5050
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
@@ -138,5 +138,6 @@ define amdgpu_kernel void @k0() sanitize_address {
138138
;.
139139
; CHECK: [[META0]] = !{i32 0, i32 1}
140140
; CHECK: [[META1]] = !{i32 8, i32 9}
141-
; CHECK: [[META2]] = !{i32 0}
141+
; CHECK: [[META2:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
142+
; CHECK: [[META3]] = !{i32 0}
142143
;.

llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test-asan.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ define amdgpu_kernel void @k0() sanitize_address {
6666
; CHECK-NEXT: [[TMP38:%.*]] = and i1 [[TMP34]], [[TMP37]]
6767
; CHECK-NEXT: [[TMP39:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP38]])
6868
; CHECK-NEXT: [[TMP40:%.*]] = icmp ne i64 [[TMP39]], 0
69-
; CHECK-NEXT: br i1 [[TMP40]], label [[ASAN_REPORT:%.*]], label [[TMP43:%.*]], !prof [[PROF2:![0-9]+]]
69+
; CHECK-NEXT: br i1 [[TMP40]], label [[ASAN_REPORT:%.*]], label [[TMP43:%.*]], !prof [[PROF3:![0-9]+]]
7070
; CHECK: asan.report:
7171
; CHECK-NEXT: br i1 [[TMP38]], label [[TMP41:%.*]], label [[CONDFREE:%.*]]
7272
; CHECK: 41:
@@ -109,5 +109,6 @@ define amdgpu_kernel void @k0() sanitize_address {
109109
;.
110110
; CHECK: [[META0]] = !{i32 0, i32 1}
111111
; CHECK: [[META1]] = !{i32 8, i32 9}
112-
; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575}
112+
; CHECK: [[META2:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
113+
; CHECK: [[PROF3]] = !{!"branch_weights", i32 1, i32 1048575}
113114
;.

llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,4 +84,5 @@ define amdgpu_kernel void @k0() sanitize_address {
8484
;.
8585
; CHECK: [[META0]] = !{i32 0, i32 1}
8686
; CHECK: [[META1]] = !{i32 8, i32 9}
87+
; CHECK: [[META2:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
8788
;.

0 commit comments

Comments
 (0)