-
Notifications
You must be signed in to change notification settings - Fork 13.6k
AMDGPU: Propagate amdgpu-max-num-workgroups attribute #113018
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Propagate amdgpu-max-num-workgroups attribute #113018
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesI'm not sure what the interpretation of 0 is supposed to be, Full diff: https://github.com/llvm/llvm-project/pull/113018.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 687a7339da379d..58b90ddc36605c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -179,6 +179,11 @@ class AMDGPUInformationCache : public InformationCache {
return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
}
+ SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) {
+ const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+ return ST.getMaxNumWorkGroups(F);
+ }
+
/// Get code object version.
unsigned getCodeObjectVersion() const {
return CodeObjectVersion;
@@ -821,6 +826,150 @@ AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
"AAAMDFlatWorkGroupSize is only valid for function position");
}
+struct TupleDecIntegerRangeState : public AbstractState {
+ DecIntegerState<uint32_t> X, Y, Z;
+
+ bool isValidState() const override {
+ return X.isValidState() && Y.isValidState() && Z.isValidState();
+ }
+
+ bool isAtFixpoint() const override {
+ return X.isAtFixpoint() && Y.isAtFixpoint() && Z.isAtFixpoint();
+ }
+
+ ChangeStatus indicateOptimisticFixpoint() override {
+ return X.indicateOptimisticFixpoint() | Y.indicateOptimisticFixpoint() |
+ Z.indicateOptimisticFixpoint();
+ }
+
+ ChangeStatus indicatePessimisticFixpoint() override {
+ return X.indicatePessimisticFixpoint() | Y.indicatePessimisticFixpoint() |
+ Z.indicatePessimisticFixpoint();
+ }
+
+ TupleDecIntegerRangeState operator^=(const TupleDecIntegerRangeState &Other) {
+ X ^= Other.X;
+ Y ^= Other.Y;
+ Z ^= Other.Z;
+ return *this;
+ }
+
+ bool operator==(const TupleDecIntegerRangeState &Other) const {
+ return X == Other.X && Y == Other.Y && Z == Other.Z;
+ }
+
+ TupleDecIntegerRangeState &getAssumed() { return *this; }
+ const TupleDecIntegerRangeState &getAssumed() const { return *this; }
+};
+
+using AAAMDMaxNumWorkgroupsState =
+ StateWrapper<TupleDecIntegerRangeState, AbstractAttribute, uint32_t>;
+
+/// Propagate amdgpu-max-num-workgroups attribute.
+struct AAAMDMaxNumWorkgroups
+ : public StateWrapper<TupleDecIntegerRangeState, AbstractAttribute> {
+ using Base = StateWrapper<TupleDecIntegerRangeState, AbstractAttribute>;
+
+ AAAMDMaxNumWorkgroups(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+
+ void initialize(Attributor &A) override {
+ Function *F = getAssociatedFunction();
+ auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
+
+ SmallVector<unsigned> MaxNumWorkgroups = InfoCache.getMaxNumWorkGroups(*F);
+
+ // FIXME: What is the interpretation of 0?
+ for (unsigned &Entry : MaxNumWorkgroups) {
+ if (Entry == 0)
+ Entry = std::numeric_limits<uint32_t>::max();
+ }
+
+ X.takeKnownMinimum(MaxNumWorkgroups[0]);
+ Y.takeKnownMinimum(MaxNumWorkgroups[1]);
+ Z.takeKnownMinimum(MaxNumWorkgroups[2]);
+
+ if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
+ indicatePessimisticFixpoint();
+ }
+
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus Change = ChangeStatus::UNCHANGED;
+
+ auto CheckCallSite = [&](AbstractCallSite CS) {
+ Function *Caller = CS.getInstruction()->getFunction();
+ LLVM_DEBUG(dbgs() << "[AAAMDMaxNumWorkgroups] Call " << Caller->getName()
+ << "->" << getAssociatedFunction()->getName() << '\n');
+
+ const auto *CallerInfo = A.getAAFor<AAAMDMaxNumWorkgroups>(
+ *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
+ if (!CallerInfo)
+ return false;
+
+ Change |=
+ clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
+ return true;
+ };
+
+ bool AllCallSitesKnown = true;
+ if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
+ return indicatePessimisticFixpoint();
+
+ return Change;
+ }
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAAMDMaxNumWorkgroups &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+
+ ChangeStatus manifest(Attributor &A) override {
+ Function *F = getAssociatedFunction();
+ // TODO: Skip adding if worst case?
+ LLVMContext &Ctx = F->getContext();
+ SmallString<32> Buffer;
+ raw_svector_ostream OS(Buffer);
+ OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed();
+
+ // TODO: Should annotate loads of the group size for this to do anything
+ // useful.
+ return A.manifestAttrs(
+ getIRPosition(),
+ {Attribute::get(Ctx, "amdgpu-max-num-workgroups", OS.str())},
+ /* ForceReplace= */ true);
+ }
+
+ const std::string getName() const override { return "AAAMDMaxNumWorkgroups"; }
+
+ const std::string getAsStr(Attributor *) const override {
+ std::string Buffer = "AAAMDMaxNumWorkgroupsState[";
+ raw_string_ostream OS(Buffer);
+ OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed()
+ << ']';
+ return OS.str();
+ }
+
+ const char *getIdAddr() const override { return &ID; }
+
+ /// This function should return true if the type of the \p AA is
+ /// AAAMDMaxNumWorkgroups
+ static bool classof(const AbstractAttribute *AA) {
+ return (AA->getIdAddr() == &ID);
+ }
+
+ void trackStatistics() const override {}
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
+const char AAAMDMaxNumWorkgroups::ID = 0;
+
+AAAMDMaxNumWorkgroups &
+AAAMDMaxNumWorkgroups::createForPosition(const IRPosition &IRP, Attributor &A) {
+ if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
+ return *new (A.Allocator) AAAMDMaxNumWorkgroups(IRP, A);
+ llvm_unreachable("AAAMDMaxNumWorkgroups is only valid for function position");
+}
+
/// Propagate amdgpu-waves-per-eu attribute.
struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
@@ -1043,8 +1192,8 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
DenseSet<const char *> Allowed(
{&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
&AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
- &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
- &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
+ &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
+ &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
&AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
&AAInstanceInfo::ID});
@@ -1068,6 +1217,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
for (auto *F : Functions) {
A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(*F));
A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(*F));
+ A.getOrCreateAAFor<AAAMDMaxNumWorkgroups>(IRPosition::function(*F));
A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(*F));
CallingConv::ID CC = F->getCallingConv();
if (!AMDGPU::isEntryFunctionCC(CC)) {
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll
new file mode 100644
index 00000000000000..bd1aa6e6ed4701
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll
@@ -0,0 +1,228 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck %s
+
+; External call to avoid inferring argument attributes. This makes the
+; final attribute groups easier to read
+declare void @dummy()
+
+define void @extern_callee() {
+; CHECK-LABEL: define void @extern_callee(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @dummy()
+ ret void
+}
+
+define internal void @callee_1_2_3() {
+; CHECK-LABEL: define internal void @callee_1_2_3(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @dummy()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_1_2_3() #0 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_1_2_3(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void @callee_1_2_3()
+; CHECK-NEXT: call void @extern_callee()
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @callee_1_2_3()
+ call void @extern_callee()
+ call void @dummy()
+ ret void
+}
+
+attributes #0 = {"amdgpu-max-num-workgroups"="1,2,3"}
+
+; -> 100,10,99
+define internal void @callee_merge_100_8_32__16_10_99() {
+; CHECK-LABEL: define internal void @callee_merge_100_8_32__16_10_99(
+; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @dummy()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_100_8_32() #1 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_100_8_32(
+; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT: call void @callee_merge_100_8_32__16_10_99()
+; CHECK-NEXT: ret void
+;
+ call void @callee_merge_100_8_32__16_10_99()
+ ret void
+}
+
+attributes #1 = {"amdgpu-max-num-workgroups"="100,8,32"}
+
+define amdgpu_kernel void @kernel_16_10_99() #2 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_16_10_99(
+; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
+; CHECK-NEXT: call void @callee_merge_100_8_32__16_10_99()
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @callee_merge_100_8_32__16_10_99()
+ call void @dummy()
+ ret void
+}
+
+attributes #2 = {"amdgpu-max-num-workgroups"="16,10,99"}
+
+define internal void @merge_to_worst_case() {
+; CHECK-LABEL: define internal void @merge_to_worst_case(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @dummy()
+ ret void
+}
+
+define internal void @callee_x_worst_case() {
+; CHECK-LABEL: define internal void @callee_x_worst_case(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @dummy()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_x_maximum() #3 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_x_maximum(
+; CHECK-SAME: ) #[[ATTR5:[0-9]+]] {
+; CHECK-NEXT: call void @merge_to_worst_case()
+; CHECK-NEXT: call void @callee_x_worst_case()
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @merge_to_worst_case()
+ call void @callee_x_worst_case()
+ call void @dummy()
+ ret void
+}
+
+attributes #3 = {"amdgpu-max-num-workgroups"="4294967295,1,1"}
+
+define amdgpu_kernel void @kernel_y_maximum() #4 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_y_maximum(
+; CHECK-SAME: ) #[[ATTR6:[0-9]+]] {
+; CHECK-NEXT: call void @merge_to_worst_case()
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @merge_to_worst_case()
+ call void @dummy()
+ ret void
+}
+
+attributes #4 = {"amdgpu-max-num-workgroups"="1,4294967295,1"}
+
+define amdgpu_kernel void @kernel_z_maximum() #5 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_z_maximum(
+; CHECK-SAME: ) #[[ATTR7:[0-9]+]] {
+; CHECK-NEXT: call void @merge_to_worst_case()
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @merge_to_worst_case()
+ call void @dummy()
+ ret void
+}
+
+attributes #5 = {"amdgpu-max-num-workgroups"="1,1,4294967295"}
+
+; Make sure the attribute isn't lost from the callee.
+define internal void @annotated_callee_from_unannotated_kernel() #6 {
+; CHECK-LABEL: define internal void @annotated_callee_from_unannotated_kernel(
+; CHECK-SAME: ) #[[ATTR8:[0-9]+]] {
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @dummy()
+ ret void
+}
+
+attributes #6 = {"amdgpu-max-num-workgroups"="42,99,123"}
+
+define amdgpu_kernel void @unannotated_kernel_calls_annotated_callee() {
+; CHECK-LABEL: define amdgpu_kernel void @unannotated_kernel_calls_annotated_callee(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void @annotated_callee_from_unannotated_kernel()
+; CHECK-NEXT: ret void
+;
+ call void @annotated_callee_from_unannotated_kernel()
+ ret void
+}
+
+
+define internal void @annotated_callee_merge_caller() #7 {
+; CHECK-LABEL: define internal void @annotated_callee_merge_caller(
+; CHECK-SAME: ) #[[ATTR9:[0-9]+]] {
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @dummy()
+ ret void
+}
+
+attributes #7 = {"amdgpu-max-num-workgroups"="512,256,1024"}
+
+define amdgpu_kernel void @call_annotated_callee_merge_caller() #8 {
+; CHECK-LABEL: define amdgpu_kernel void @call_annotated_callee_merge_caller(
+; CHECK-SAME: ) #[[ATTR10:[0-9]+]] {
+; CHECK-NEXT: call void @annotated_callee_merge_caller()
+; CHECK-NEXT: ret void
+;
+ call void @annotated_callee_merge_caller()
+ ret void
+}
+
+attributes #8 = {"amdgpu-max-num-workgroups"="256,128,2048"}
+
+define internal void @called_by_explicit_worst_case() {
+; CHECK-LABEL: define internal void @called_by_explicit_worst_case(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @dummy()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_explicit_worst_case() #9 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_explicit_worst_case(
+; CHECK-SAME: ) #[[ATTR11:[0-9]+]] {
+; CHECK-NEXT: call void @called_by_explicit_worst_case()
+; CHECK-NEXT: ret void
+;
+ call void @called_by_explicit_worst_case()
+ ret void
+}
+
+attributes #9 = {"amdgpu-max-num-workgroups"="4294967295,4294967295,4294967295"}
+
+;.
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-max-num-workgroups"="1,2,3" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-max-num-workgroups"="100,10,99" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-max-num-workgroups"="100,8,32" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-max-num-workgroups"="16,10,99" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-max-num-workgroups"="4294967295,1,1" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR6]] = { "amdgpu-max-num-workgroups"="1,4294967295,1" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR7]] = { "amdgpu-max-num-workgroups"="1,1,4294967295" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR8]] = { "amdgpu-max-num-workgroups"="42,99,123" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR9]] = { "amdgpu-max-num-workgroups"="256,128,1024" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR10]] = { "amdgpu-max-num-workgroups"="256,128,2048" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR11]] = { "amdgpu-max-num-workgroups"="4294967295,4294967295,4294967295" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+;.
|
2005b26
to
58a330c
Compare
0c10781
to
7b7992f
Compare
58a330c
to
b0cdf6f
Compare
7b7992f
to
ae52e0d
Compare
b0cdf6f
to
d26f42b
Compare
ae52e0d
to
fada8ca
Compare
8272a93
to
b5edcd1
Compare
Z.takeKnownMinimum(MaxNumWorkgroups[2]); | ||
|
||
if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) | ||
indicatePessimisticFixpoint(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be an optimistic fix point
We want the pessimistic state to be also the invalid state, such that its manifest will not be called
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Optimistic fix point is 0,0,0 which isn't even valid. It doesn't make sense to ever set that?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's why we want to update assumed instead of known, as I mentioned in another comment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can't touch the entry point, how could it ever be optimistic
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
entry point is the starting point of the propagation, so whatever value it has, no matter user specified value or default value, it is the best it has.
|
||
void initialize(Attributor &A) override { | ||
Function *F = getAssociatedFunction(); | ||
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's all this parse is
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
but here it doesn't stop if an attribute is not a default one
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This case isn't actually like the others. It's only useful for known-bits style optimizations, and isn't a first class restriction like amdgpu-waves-per-eu
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure how this attribute is gonna be used in the future, but it can get convoluted and go south if the moving direction of this AA is different from its depending AA (if there is any in the future).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
#113019 is about the extent of what it can do
|
||
SmallVector<unsigned> MaxNumWorkgroups = InfoCache.getMaxNumWorkGroups(*F); | ||
|
||
X.takeKnownMinimum(MaxNumWorkgroups[0]); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
update assumed here instead of known, only if they are not default.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's effectively what happens? The default is the maximum
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, takeKnownMinimum will only update known, not assumed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I still think this should be takeKnownMinimum. This has more in common with AAAlign than amdgpu-waves-per-eu. It is an informative, not a prescriptive, attribute.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the key part here missing (if you update assumed value) is, MaxNumWorkgroups[0]
needs to be checked when it is taken. If it is ~0U
, then its value should not be taken; otherwise it effectively pushes to the worst state directly.
Apparently getMaxNumWorkGroups
is not just checking attribute. It only returns a default value, which is all ~0U
(worst state).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
getMaxNumWorkGroups does check the attribute, otherwise nothing would work.
I see no change by special case not adding the worst case.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For some reason, I made a typo in the last paragraph, which changed its meaning entirely. Lol.
What I meant to say is that getMaxNumWorkGroups
doesn’t just check the attribute; it also returns a default value if the attribute doesn’t exist. In this specific case, it might be fine because the default value is ~0U
. By "fine" I mean that if the value returned by getMaxNumWorkGroups
is not ~0U
, it must have come from the attribute.
*this, IRPosition::function(*Caller), DepClassTy::REQUIRED); | ||
if (!CallerInfo || !CallerInfo->isValidState()) | ||
return false; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
skip the update if it at initial state, like what we did in #114726
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've tried this and it seems to never fire
ChangeStatus manifest(Attributor &A) override { | ||
Function *F = getAssociatedFunction(); | ||
// TODO: Skip adding if worst case? | ||
LLVMContext &Ctx = F->getContext(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
skil manifest if it is still in initial state
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would expect manifest to not get called in the first place if the worst state is considered invalid
Basically if I do anything you've suggested, this patch goes from working to not working |
Half-baked values are no longer added. That is expected. If all call sites of a function are clear, then it should work. |
I'm not sure what the interpretation of 0 is supposed to be, AMDGPUUsage doesn't say.
b5edcd1
to
2ad2f35
Compare
I took your PR and did a local change, and it looks working. |
Meaning what? |
Meaning if it is done correctly, it will work as expected. If you want to use pessimistic state as invalid, you can't touch known, because invalid state only means known to be worst. As long as known is not worst, there is no invalid state anymore, and In addition, everywhere else just uses assumed, including On the other hand, if you just update assumed, then everywhere else just gets the latest state, and |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The patch works fine as is, I have no time to get back to this, and you claim to have a "local change" that fixes it. How about just shipping this and you can open a separate PR with your changes
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm gonna accept it as it is and rework all related AAs
I'm not sure what the interpretation of 0 is supposed to be,
AMDGPUUsage doesn't say.