Skip to content

Commit 79f8883

Browse files
committed
[WIP][AMDGPU][Attributor] Make AAAMDWavesPerEU honor existing attribute
1 parent b6b851f commit 79f8883

8 files changed

+113
-93
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 48 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,19 @@ class AMDGPUInformationCache : public InformationCache {
201201
return ST.getWavesPerEU(F, FlatWorkGroupSize);
202202
}
203203

204+
std::optional<std::pair<unsigned, unsigned>>
205+
getWavesPerEUAttr(const Function &F) {
206+
auto Val = AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu",
207+
/*OnlyFirstRequired=*/true);
208+
if (!Val)
209+
return std::nullopt;
210+
if (!Val->second) {
211+
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
212+
Val->second = ST.getMaxWavesPerEU();
213+
}
214+
return std::make_pair(Val->first, *(Val->second));
215+
}
216+
204217
std::pair<unsigned, unsigned>
205218
getEffectiveWavesPerEU(const Function &F,
206219
std::pair<unsigned, unsigned> WavesPerEU,
@@ -771,22 +784,6 @@ struct AAAMDSizeRangeAttribute
771784
/*ForceReplace=*/true);
772785
}
773786

774-
ChangeStatus emitAttributeIfNotDefault(Attributor &A, unsigned Min,
775-
unsigned Max) {
776-
// Don't add the attribute if it's the implied default.
777-
if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
778-
return ChangeStatus::UNCHANGED;
779-
780-
Function *F = getAssociatedFunction();
781-
LLVMContext &Ctx = F->getContext();
782-
SmallString<10> Buffer;
783-
raw_svector_ostream OS(Buffer);
784-
OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
785-
return A.manifestAttrs(getIRPosition(),
786-
{Attribute::get(Ctx, AttrName, OS.str())},
787-
/*ForceReplace=*/true);
788-
}
789-
790787
const std::string getAsStr(Attributor *) const override {
791788
std::string Str;
792789
raw_string_ostream OS(Str);
@@ -883,29 +880,47 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
883880
AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
884881
: AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
885882

886-
bool isValidState() const override {
887-
return !Assumed.isEmptySet() && IntegerRangeState::isValidState();
888-
}
889-
890883
void initialize(Attributor &A) override {
891884
Function *F = getAssociatedFunction();
892885
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
893886

894-
if (const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
895-
*this, IRPosition::function(*F), DepClassTy::REQUIRED);
896-
AssumedGroupSize->isValidState()) {
887+
auto TakeRange = [&](std::pair<unsigned, unsigned> R) {
888+
auto [Min, Max] = R;
889+
ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
890+
IntegerRangeState RangeState(Range);
891+
clampStateAndIndicateChange(this->getState(), RangeState);
892+
indicateOptimisticFixpoint();
893+
};
897894

898-
unsigned Min, Max;
899-
std::tie(Min, Max) = InfoCache.getWavesPerEU(
900-
*F, {AssumedGroupSize->getAssumed().getLower().getZExtValue(),
901-
AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
895+
std::pair<unsigned, unsigned> MaxWavesPerEURange{
896+
1U, InfoCache.getMaxWavesPerEU(*F)};
902897

903-
ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
904-
intersectKnown(Range);
898+
// If the attribute exists, we will honor it if it is not the default.
899+
if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) {
900+
if (*Attr != MaxWavesPerEURange) {
901+
TakeRange(*Attr);
902+
return;
903+
}
905904
}
906905

907-
if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
908-
indicatePessimisticFixpoint();
906+
// Unlike AAAMDFlatWorkGroupSize, it's getting trickier here. Since the
907+
// calculation of waves per EU involves flat work group size, we can't
908+
// simply use an assumed flat work group size as a start point, because the
909+
// update of flat work group size is in an inverse direction of waves per
910+
// EU. However, we can still do something if it is an entry function. Since
911+
// an entry function is a terminal node, and flat work group size either
912+
// from attribute or default will be used anyway, we can take that value and
913+
// calculate the waves per EU based on it. This result can't be updated by
914+
// no means, but that could still allow us to propagate it.
915+
if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
916+
std::pair<unsigned, unsigned> FlatWorkGroupSize;
917+
if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*F))
918+
FlatWorkGroupSize = *Attr;
919+
else
920+
FlatWorkGroupSize = InfoCache.getDefaultFlatWorkGroupSize(*F);
921+
TakeRange(InfoCache.getEffectiveWavesPerEU(*F, MaxWavesPerEURange,
922+
FlatWorkGroupSize));
923+
}
909924
}
910925

911926
ChangeStatus updateImpl(Attributor &A) override {
@@ -954,8 +969,8 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
954969
ChangeStatus manifest(Attributor &A) override {
955970
Function *F = getAssociatedFunction();
956971
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
957-
unsigned Max = InfoCache.getMaxWavesPerEU(*F);
958-
return emitAttributeIfNotDefault(A, 1, Max);
972+
return emitAttributeIfNotDefaultAfterClamp(
973+
A, {1U, InfoCache.getMaxWavesPerEU(*F)});
959974
}
960975

961976
/// See AbstractAttribute::getName()

llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -688,7 +688,7 @@ define void @func_call_asm() #3 {
688688
;
689689
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_asm
690690
; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] {
691-
; ATTRIBUTOR_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR24:[0-9]+]]
691+
; ATTRIBUTOR_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR26:[0-9]+]]
692692
; ATTRIBUTOR_HSA-NEXT: ret void
693693
;
694694
call void asm sideeffect "", ""() #3
@@ -717,7 +717,7 @@ define amdgpu_kernel void @func_kern_defined() #3 {
717717
; AKF_HSA-NEXT: ret void
718718
;
719719
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_kern_defined
720-
; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] {
720+
; ATTRIBUTOR_HSA-SAME: () #[[ATTR17:[0-9]+]] {
721721
; ATTRIBUTOR_HSA-NEXT: call void @defined.func()
722722
; ATTRIBUTOR_HSA-NEXT: ret void
723723
;
@@ -845,7 +845,7 @@ define amdgpu_kernel void @kern_sanitize_address() #4 {
845845
; AKF_HSA-NEXT: ret void
846846
;
847847
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_sanitize_address
848-
; ATTRIBUTOR_HSA-SAME: () #[[ATTR17:[0-9]+]] {
848+
; ATTRIBUTOR_HSA-SAME: () #[[ATTR18:[0-9]+]] {
849849
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) null, align 4
850850
; ATTRIBUTOR_HSA-NEXT: ret void
851851
;
@@ -861,7 +861,7 @@ define void @func_sanitize_address() #4 {
861861
; AKF_HSA-NEXT: ret void
862862
;
863863
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_sanitize_address
864-
; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] {
864+
; ATTRIBUTOR_HSA-SAME: () #[[ATTR18]] {
865865
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) null, align 4
866866
; ATTRIBUTOR_HSA-NEXT: ret void
867867
;
@@ -877,7 +877,7 @@ define void @func_indirect_sanitize_address() #3 {
877877
; AKF_HSA-NEXT: ret void
878878
;
879879
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_sanitize_address
880-
; ATTRIBUTOR_HSA-SAME: () #[[ATTR18:[0-9]+]] {
880+
; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] {
881881
; ATTRIBUTOR_HSA-NEXT: call void @func_sanitize_address()
882882
; ATTRIBUTOR_HSA-NEXT: ret void
883883
;
@@ -893,7 +893,7 @@ define amdgpu_kernel void @kern_indirect_sanitize_address() #3 {
893893
; AKF_HSA-NEXT: ret void
894894
;
895895
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_indirect_sanitize_address
896-
; ATTRIBUTOR_HSA-SAME: () #[[ATTR18]] {
896+
; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] {
897897
; ATTRIBUTOR_HSA-NEXT: call void @func_sanitize_address()
898898
; ATTRIBUTOR_HSA-NEXT: ret void
899899
;
@@ -928,7 +928,7 @@ define internal void @enqueue_block_def() #6 {
928928
; AKF_HSA-NEXT: ret void
929929
;
930930
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@enqueue_block_def
931-
; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] {
931+
; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] {
932932
; ATTRIBUTOR_HSA-NEXT: ret void
933933
;
934934
ret void
@@ -941,7 +941,7 @@ define amdgpu_kernel void @kern_call_enqueued_block_decl() {
941941
; AKF_HSA-NEXT: ret void
942942
;
943943
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_decl
944-
; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] {
944+
; ATTRIBUTOR_HSA-SAME: () #[[ATTR23:[0-9]+]] {
945945
; ATTRIBUTOR_HSA-NEXT: call void @enqueue_block_decl()
946946
; ATTRIBUTOR_HSA-NEXT: ret void
947947
;
@@ -956,7 +956,7 @@ define amdgpu_kernel void @kern_call_enqueued_block_def() {
956956
; AKF_HSA-NEXT: ret void
957957
;
958958
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_def
959-
; ATTRIBUTOR_HSA-SAME: () #[[ATTR23:[0-9]+]] {
959+
; ATTRIBUTOR_HSA-SAME: () #[[ATTR24:[0-9]+]] {
960960
; ATTRIBUTOR_HSA-NEXT: call void @enqueue_block_def()
961961
; ATTRIBUTOR_HSA-NEXT: ret void
962962
;
@@ -969,7 +969,7 @@ define void @unused_enqueue_block() {
969969
; AKF_HSA-NEXT: ret void
970970
;
971971
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@unused_enqueue_block
972-
; ATTRIBUTOR_HSA-SAME: () #[[ATTR23]] {
972+
; ATTRIBUTOR_HSA-SAME: () #[[ATTR25:[0-9]+]] {
973973
; ATTRIBUTOR_HSA-NEXT: ret void
974974
;
975975
ret void
@@ -980,7 +980,7 @@ define internal void @known_func() {
980980
; AKF_HSA-NEXT: ret void
981981
;
982982
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@known_func
983-
; ATTRIBUTOR_HSA-SAME: () #[[ATTR23]] {
983+
; ATTRIBUTOR_HSA-SAME: () #[[ATTR25]] {
984984
; ATTRIBUTOR_HSA-NEXT: ret void
985985
;
986986
ret void
@@ -994,8 +994,8 @@ define amdgpu_kernel void @kern_callsite_enqueue_block() {
994994
; AKF_HSA-NEXT: ret void
995995
;
996996
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_callsite_enqueue_block
997-
; ATTRIBUTOR_HSA-SAME: () #[[ATTR23]] {
998-
; ATTRIBUTOR_HSA-NEXT: call void @known_func() #[[ATTR25:[0-9]+]]
997+
; ATTRIBUTOR_HSA-SAME: () #[[ATTR24]] {
998+
; ATTRIBUTOR_HSA-NEXT: call void @known_func() #[[ATTR27:[0-9]+]]
999999
; ATTRIBUTOR_HSA-NEXT: ret void
10001000
;
10011001
call void @known_func() #6
@@ -1041,15 +1041,17 @@ attributes #6 = { "enqueued-block" }
10411041
; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
10421042
; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "uniform-work-group-size"="false" }
10431043
; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
1044-
; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
1045-
; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
1046-
; ATTRIBUTOR_HSA: attributes #[[ATTR19:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" }
1047-
; ATTRIBUTOR_HSA: attributes #[[ATTR20:[0-9]+]] = { "enqueued-block" "uniform-work-group-size"="false" }
1048-
; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "enqueued-block" "uniform-work-group-size"="false" }
1049-
; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { "uniform-work-group-size"="false" }
1050-
; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
1051-
; ATTRIBUTOR_HSA: attributes #[[ATTR24]] = { nounwind }
1052-
; ATTRIBUTOR_HSA: attributes #[[ATTR25]] = { "enqueued-block" }
1044+
; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
1045+
; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind sanitize_address "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
1046+
; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
1047+
; ATTRIBUTOR_HSA: attributes #[[ATTR20:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" }
1048+
; ATTRIBUTOR_HSA: attributes #[[ATTR21:[0-9]+]] = { "enqueued-block" "uniform-work-group-size"="false" }
1049+
; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "enqueued-block" "uniform-work-group-size"="false" }
1050+
; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { "uniform-work-group-size"="false" }
1051+
; ATTRIBUTOR_HSA: attributes #[[ATTR24]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
1052+
; ATTRIBUTOR_HSA: attributes #[[ATTR25]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
1053+
; ATTRIBUTOR_HSA: attributes #[[ATTR26]] = { nounwind }
1054+
; ATTRIBUTOR_HSA: attributes #[[ATTR27]] = { "enqueued-block" }
10531055
;.
10541056
; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
10551057
;.

llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ bb5: ; preds = %bb5, %bb3
5151

5252
define amdgpu_kernel void @entry() {
5353
; CHECK-LABEL: define {{[^@]+}}@entry
54-
; CHECK-SAME: () #[[ATTR0]] {
54+
; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
5555
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [[TMP0:%.*]], align 8, addrspace(5)
5656
; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
5757
; CHECK-NEXT: [[ARST:%.*]] = call double @baz(ptr [[CAST]])
@@ -64,4 +64,5 @@ define amdgpu_kernel void @entry() {
6464
}
6565
;.
6666
; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" }
67+
; CHECK: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
6768
;.

llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ define internal void @direct() {
2727

2828
define amdgpu_kernel void @test_direct_indirect_call() {
2929
; CHECK-LABEL: define {{[^@]+}}@test_direct_indirect_call
30-
; CHECK-SAME: () #[[ATTR1]] {
30+
; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
3131
; CHECK-NEXT: call void @direct()
3232
; CHECK-NEXT: ret void
3333
;
@@ -37,4 +37,5 @@ define amdgpu_kernel void @test_direct_indirect_call() {
3737
;.
3838
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
3939
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
40+
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
4041
;.

0 commit comments

Comments
 (0)