AMDGPU: Propagate amdgpu-max-num-workgroups attribute #113018

arsenm · 2024-10-19T03:43:25Z

I'm not sure what the interpretation of 0 is supposed to be,
AMDGPUUsage doesn't say.

arsenm · 2024-10-19T03:43:39Z

This stack of pull requests is managed by Graphite. Learn more about stacking.

llvmbot · 2024-10-19T03:44:05Z

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

I'm not sure what the interpretation of 0 is supposed to be,
AMDGPUUsage doesn't say.

Full diff: https://github.com/llvm/llvm-project/pull/113018.diff

2 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (+152-2)
(added) llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll (+228)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 687a7339da379d..58b90ddc36605c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -179,6 +179,11 @@ class AMDGPUInformationCache : public InformationCache {
     return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
   }
 
+  SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) {
+    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+    return ST.getMaxNumWorkGroups(F);
+  }
+
   /// Get code object version.
   unsigned getCodeObjectVersion() const {
     return CodeObjectVersion;
@@ -821,6 +826,150 @@ AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
       "AAAMDFlatWorkGroupSize is only valid for function position");
 }
 
+struct TupleDecIntegerRangeState : public AbstractState {
+  DecIntegerState<uint32_t> X, Y, Z;
+
+  bool isValidState() const override {
+    return X.isValidState() && Y.isValidState() && Z.isValidState();
+  }
+
+  bool isAtFixpoint() const override {
+    return X.isAtFixpoint() && Y.isAtFixpoint() && Z.isAtFixpoint();
+  }
+
+  ChangeStatus indicateOptimisticFixpoint() override {
+    return X.indicateOptimisticFixpoint() | Y.indicateOptimisticFixpoint() |
+           Z.indicateOptimisticFixpoint();
+  }
+
+  ChangeStatus indicatePessimisticFixpoint() override {
+    return X.indicatePessimisticFixpoint() | Y.indicatePessimisticFixpoint() |
+           Z.indicatePessimisticFixpoint();
+  }
+
+  TupleDecIntegerRangeState operator^=(const TupleDecIntegerRangeState &Other) {
+    X ^= Other.X;
+    Y ^= Other.Y;
+    Z ^= Other.Z;
+    return *this;
+  }
+
+  bool operator==(const TupleDecIntegerRangeState &Other) const {
+    return X == Other.X && Y == Other.Y && Z == Other.Z;
+  }
+
+  TupleDecIntegerRangeState &getAssumed() { return *this; }
+  const TupleDecIntegerRangeState &getAssumed() const { return *this; }
+};
+
+using AAAMDMaxNumWorkgroupsState =
+    StateWrapper<TupleDecIntegerRangeState, AbstractAttribute, uint32_t>;
+
+/// Propagate amdgpu-max-num-workgroups attribute.
+struct AAAMDMaxNumWorkgroups
+    : public StateWrapper<TupleDecIntegerRangeState, AbstractAttribute> {
+  using Base = StateWrapper<TupleDecIntegerRangeState, AbstractAttribute>;
+
+  AAAMDMaxNumWorkgroups(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+
+  void initialize(Attributor &A) override {
+    Function *F = getAssociatedFunction();
+    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
+
+    SmallVector<unsigned> MaxNumWorkgroups = InfoCache.getMaxNumWorkGroups(*F);
+
+    // FIXME: What is the interpretation of 0?
+    for (unsigned &Entry : MaxNumWorkgroups) {
+      if (Entry == 0)
+        Entry = std::numeric_limits<uint32_t>::max();
+    }
+
+    X.takeKnownMinimum(MaxNumWorkgroups[0]);
+    Y.takeKnownMinimum(MaxNumWorkgroups[1]);
+    Z.takeKnownMinimum(MaxNumWorkgroups[2]);
+
+    if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
+      indicatePessimisticFixpoint();
+  }
+
+  ChangeStatus updateImpl(Attributor &A) override {
+    ChangeStatus Change = ChangeStatus::UNCHANGED;
+
+    auto CheckCallSite = [&](AbstractCallSite CS) {
+      Function *Caller = CS.getInstruction()->getFunction();
+      LLVM_DEBUG(dbgs() << "[AAAMDMaxNumWorkgroups] Call " << Caller->getName()
+                        << "->" << getAssociatedFunction()->getName() << '\n');
+
+      const auto *CallerInfo = A.getAAFor<AAAMDMaxNumWorkgroups>(
+          *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
+      if (!CallerInfo)
+        return false;
+
+      Change |=
+          clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
+      return true;
+    };
+
+    bool AllCallSitesKnown = true;
+    if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
+      return indicatePessimisticFixpoint();
+
+    return Change;
+  }
+
+  /// Create an abstract attribute view for the position \p IRP.
+  static AAAMDMaxNumWorkgroups &createForPosition(const IRPosition &IRP,
+                                                  Attributor &A);
+
+  ChangeStatus manifest(Attributor &A) override {
+    Function *F = getAssociatedFunction();
+    // TODO: Skip adding if worst case?
+    LLVMContext &Ctx = F->getContext();
+    SmallString<32> Buffer;
+    raw_svector_ostream OS(Buffer);
+    OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed();
+
+    // TODO: Should annotate loads of the group size for this to do anything
+    // useful.
+    return A.manifestAttrs(
+        getIRPosition(),
+        {Attribute::get(Ctx, "amdgpu-max-num-workgroups", OS.str())},
+        /* ForceReplace= */ true);
+  }
+
+  const std::string getName() const override { return "AAAMDMaxNumWorkgroups"; }
+
+  const std::string getAsStr(Attributor *) const override {
+    std::string Buffer = "AAAMDMaxNumWorkgroupsState[";
+    raw_string_ostream OS(Buffer);
+    OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed()
+       << ']';
+    return OS.str();
+  }
+
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is
+  /// AAAMDMaxNumWorkgroups
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
+  void trackStatistics() const override {}
+
+  /// Unique ID (due to the unique address)
+  static const char ID;
+};
+
+const char AAAMDMaxNumWorkgroups::ID = 0;
+
+AAAMDMaxNumWorkgroups &
+AAAMDMaxNumWorkgroups::createForPosition(const IRPosition &IRP, Attributor &A) {
+  if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
+    return *new (A.Allocator) AAAMDMaxNumWorkgroups(IRP, A);
+  llvm_unreachable("AAAMDMaxNumWorkgroups is only valid for function position");
+}
+
 /// Propagate amdgpu-waves-per-eu attribute.
 struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
   AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
@@ -1043,8 +1192,8 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
   DenseSet<const char *> Allowed(
       {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
        &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
-       &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
-       &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
+       &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
+       &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
        &AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
        &AAInstanceInfo::ID});
 
@@ -1068,6 +1217,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
   for (auto *F : Functions) {
     A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(*F));
     A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(*F));
+    A.getOrCreateAAFor<AAAMDMaxNumWorkgroups>(IRPosition::function(*F));
     A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(*F));
     CallingConv::ID CC = F->getCallingConv();
     if (!AMDGPU::isEntryFunctionCC(CC)) {
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll
new file mode 100644
index 00000000000000..bd1aa6e6ed4701
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll
@@ -0,0 +1,228 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck %s
+
+; External call to avoid inferring argument attributes. This makes the
+; final attribute groups easier to read
+declare void @dummy()
+
+define void @extern_callee() {
+; CHECK-LABEL: define void @extern_callee(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    call void @dummy()
+; CHECK-NEXT:    ret void
+;
+  call void @dummy()
+  ret void
+}
+
+define internal void @callee_1_2_3() {
+; CHECK-LABEL: define internal void @callee_1_2_3(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:    call void @dummy()
+; CHECK-NEXT:    ret void
+;
+  call void @dummy()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_1_2_3() #0 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_1_2_3(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    call void @callee_1_2_3()
+; CHECK-NEXT:    call void @extern_callee()
+; CHECK-NEXT:    call void @dummy()
+; CHECK-NEXT:    ret void
+;
+  call void @callee_1_2_3()
+  call void @extern_callee()
+  call void @dummy()
+  ret void
+}
+
+attributes #0 = {"amdgpu-max-num-workgroups"="1,2,3"}
+
+; -> 100,10,99
+define internal void @callee_merge_100_8_32__16_10_99() {
+; CHECK-LABEL: define internal void @callee_merge_100_8_32__16_10_99(
+; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:    call void @dummy()
+; CHECK-NEXT:    ret void
+;
+  call void @dummy()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_100_8_32() #1 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_100_8_32(
+; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT:    call void @callee_merge_100_8_32__16_10_99()
+; CHECK-NEXT:    ret void
+;
+  call void @callee_merge_100_8_32__16_10_99()
+  ret void
+}
+
+attributes #1 = {"amdgpu-max-num-workgroups"="100,8,32"}
+
+define amdgpu_kernel void @kernel_16_10_99() #2 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_16_10_99(
+; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
+; CHECK-NEXT:    call void @callee_merge_100_8_32__16_10_99()
+; CHECK-NEXT:    call void @dummy()
+; CHECK-NEXT:    ret void
+;
+  call void @callee_merge_100_8_32__16_10_99()
+  call void @dummy()
+  ret void
+}
+
+attributes #2 = {"amdgpu-max-num-workgroups"="16,10,99"}
+
+define internal void @merge_to_worst_case() {
+; CHECK-LABEL: define internal void @merge_to_worst_case(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void @dummy()
+; CHECK-NEXT:    ret void
+;
+  call void @dummy()
+  ret void
+}
+
+define internal void @callee_x_worst_case() {
+; CHECK-LABEL: define internal void @callee_x_worst_case(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void @dummy()
+; CHECK-NEXT:    ret void
+;
+  call void @dummy()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_x_maximum() #3 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_x_maximum(
+; CHECK-SAME: ) #[[ATTR5:[0-9]+]] {
+; CHECK-NEXT:    call void @merge_to_worst_case()
+; CHECK-NEXT:    call void @callee_x_worst_case()
+; CHECK-NEXT:    call void @dummy()
+; CHECK-NEXT:    ret void
+;
+  call void @merge_to_worst_case()
+  call void @callee_x_worst_case()
+  call void @dummy()
+  ret void
+}
+
+attributes #3 = {"amdgpu-max-num-workgroups"="4294967295,1,1"}
+
+define amdgpu_kernel void @kernel_y_maximum() #4 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_y_maximum(
+; CHECK-SAME: ) #[[ATTR6:[0-9]+]] {
+; CHECK-NEXT:    call void @merge_to_worst_case()
+; CHECK-NEXT:    call void @dummy()
+; CHECK-NEXT:    ret void
+;
+  call void @merge_to_worst_case()
+  call void @dummy()
+  ret void
+}
+
+attributes #4 = {"amdgpu-max-num-workgroups"="1,4294967295,1"}
+
+define amdgpu_kernel void @kernel_z_maximum() #5 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_z_maximum(
+; CHECK-SAME: ) #[[ATTR7:[0-9]+]] {
+; CHECK-NEXT:    call void @merge_to_worst_case()
+; CHECK-NEXT:    call void @dummy()
+; CHECK-NEXT:    ret void
+;
+  call void @merge_to_worst_case()
+  call void @dummy()
+  ret void
+}
+
+attributes #5 = {"amdgpu-max-num-workgroups"="1,1,4294967295"}
+
+; Make sure the attribute isn't lost from the callee.
+define internal void @annotated_callee_from_unannotated_kernel() #6 {
+; CHECK-LABEL: define internal void @annotated_callee_from_unannotated_kernel(
+; CHECK-SAME: ) #[[ATTR8:[0-9]+]] {
+; CHECK-NEXT:    call void @dummy()
+; CHECK-NEXT:    ret void
+;
+  call void @dummy()
+  ret void
+}
+
+attributes #6 = {"amdgpu-max-num-workgroups"="42,99,123"}
+
+define amdgpu_kernel void @unannotated_kernel_calls_annotated_callee()  {
+; CHECK-LABEL: define amdgpu_kernel void @unannotated_kernel_calls_annotated_callee(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void @annotated_callee_from_unannotated_kernel()
+; CHECK-NEXT:    ret void
+;
+  call void @annotated_callee_from_unannotated_kernel()
+  ret void
+}
+
+
+define internal void @annotated_callee_merge_caller() #7 {
+; CHECK-LABEL: define internal void @annotated_callee_merge_caller(
+; CHECK-SAME: ) #[[ATTR9:[0-9]+]] {
+; CHECK-NEXT:    call void @dummy()
+; CHECK-NEXT:    ret void
+;
+  call void @dummy()
+  ret void
+}
+
+attributes #7 = {"amdgpu-max-num-workgroups"="512,256,1024"}
+
+define amdgpu_kernel void @call_annotated_callee_merge_caller() #8 {
+; CHECK-LABEL: define amdgpu_kernel void @call_annotated_callee_merge_caller(
+; CHECK-SAME: ) #[[ATTR10:[0-9]+]] {
+; CHECK-NEXT:    call void @annotated_callee_merge_caller()
+; CHECK-NEXT:    ret void
+;
+  call void @annotated_callee_merge_caller()
+  ret void
+}
+
+attributes #8 = {"amdgpu-max-num-workgroups"="256,128,2048"}
+
+define internal void @called_by_explicit_worst_case() {
+; CHECK-LABEL: define internal void @called_by_explicit_worst_case(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void @dummy()
+; CHECK-NEXT:    ret void
+;
+  call void @dummy()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_explicit_worst_case() #9 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_explicit_worst_case(
+; CHECK-SAME: ) #[[ATTR11:[0-9]+]] {
+; CHECK-NEXT:    call void @called_by_explicit_worst_case()
+; CHECK-NEXT:    ret void
+;
+  call void @called_by_explicit_worst_case()
+  ret void
+}
+
+attributes #9 = {"amdgpu-max-num-workgroups"="4294967295,4294967295,4294967295"}
+
+;.
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-max-num-workgroups"="1,2,3" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-max-num-workgroups"="100,10,99" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-max-num-workgroups"="100,8,32" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-max-num-workgroups"="16,10,99" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-max-num-workgroups"="4294967295,1,1" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR6]] = { "amdgpu-max-num-workgroups"="1,4294967295,1" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR7]] = { "amdgpu-max-num-workgroups"="1,1,4294967295" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR8]] = { "amdgpu-max-num-workgroups"="42,99,123" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR9]] = { "amdgpu-max-num-workgroups"="256,128,1024" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR10]] = { "amdgpu-max-num-workgroups"="256,128,2048" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR11]] = { "amdgpu-max-num-workgroups"="4294967295,4294967295,4294967295" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+;.

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

shiltian · 2024-11-05T23:11:01Z

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

+    Z.takeKnownMinimum(MaxNumWorkgroups[2]);
+
+    if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
+      indicatePessimisticFixpoint();


This should be an optimistic fix point

We want the pessimistic state to be also the invalid state, such that its manifest will not be called

Optimistic fix point is 0,0,0 which isn't even valid. It doesn't make sense to ever set that?

That's why we want to update assumed instead of known, as I mentioned in another comment.

You can't touch the entry point, how could it ever be optimistic

entry point is the starting point of the propagation, so whatever value it has, no matter user specified value or default value, it is the best it has.

shiltian · 2024-11-05T23:11:53Z

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

+
+  void initialize(Attributor &A) override {
+    Function *F = getAssociatedFunction();
+    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());


we probably want to honor user's attribute if it is not the default one, like what we did in #114357 and #114438

That's all this parse is

but here it doesn't stop if an attribute is not a default one

This case isn't actually like the others. It's only useful for known-bits style optimizations, and isn't a first class restriction like amdgpu-waves-per-eu

I'm not sure how this attribute is gonna be used in the future, but it can get convoluted and go south if the moving direction of this AA is different from its depending AA (if there is any in the future).

#113019 is about the extent of what it can do

shiltian · 2024-11-05T23:13:37Z

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

+
+    SmallVector<unsigned> MaxNumWorkgroups = InfoCache.getMaxNumWorkGroups(*F);
+
+    X.takeKnownMinimum(MaxNumWorkgroups[0]);


update assumed here instead of known, only if they are not default.

That's effectively what happens? The default is the maximum

No, takeKnownMinimum will only update known, not assumed.

I still think this should be takeKnownMinimum. This has more in common with AAAlign than amdgpu-waves-per-eu. It is an informative, not a prescriptive, attribute.

I think the key part here missing (if you update assumed value) is, MaxNumWorkgroups[0] needs to be checked when it is taken. If it is ~0U, then its value should not be taken; otherwise it effectively pushes to the worst state directly.

Apparently getMaxNumWorkGroups is not just checking attribute. It only returns a default value, which is all ~0U (worst state).

getMaxNumWorkGroups does check the attribute, otherwise nothing would work.

I see no change by special case not adding the worst case.

For some reason, I made a typo in the last paragraph, which changed its meaning entirely. Lol.

What I meant to say is that getMaxNumWorkGroups doesn’t just check the attribute; it also returns a default value if the attribute doesn’t exist. In this specific case, it might be fine because the default value is ~0U. By "fine" I mean that if the value returned by getMaxNumWorkGroups is not ~0U, it must have come from the attribute.

shiltian · 2024-11-05T23:14:22Z

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

+          *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
+      if (!CallerInfo || !CallerInfo->isValidState())
+        return false;
+


skip the update if it at initial state, like what we did in #114726

I've tried this and it seems to never fire

shiltian · 2024-11-05T23:14:41Z

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

+  ChangeStatus manifest(Attributor &A) override {
+    Function *F = getAssociatedFunction();
+    // TODO: Skip adding if worst case?
+    LLVMContext &Ctx = F->getContext();


skil manifest if it is still in initial state

I would expect manifest to not get called in the first place if the worst state is considered invalid

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

arsenm · 2024-11-06T22:59:23Z

Basically if I do anything you've suggested, this patch goes from working to not working

shiltian · 2024-11-06T23:16:38Z

Half-baked values are no longer added. That is expected. If all call sites of a function are clear, then it should work.

I'm not sure what the interpretation of 0 is supposed to be, AMDGPUUsage doesn't say.

shiltian · 2024-11-06T23:56:37Z

I took your PR and did a local change, and it looks working.

arsenm · 2024-11-08T17:24:50Z

I took your PR and did a local change, and it looks working.

Meaning what?

shiltian · 2024-11-12T17:12:25Z

Meaning if it is done correctly, it will work as expected.

If you want to use pessimistic state as invalid, you can't touch known, because invalid state only means known to be worst. As long as known is not worst, there is no invalid state anymore, and manifest will always be called.

In addition, everywhere else just uses assumed, including manifest. To update known in initialize will not give you too much, unless the AA becomes pessimistic state such that known can be propagated to assume.

On the other hand, if you just update assumed, then everywhere else just gets the latest state, and manifest would not be called when the state is invalid (or pessimistic). This requires in initialize to check those values before updating assumed, because you don't want to push the state to the worst in the very beginning.

arsenm

The patch works fine as is, I have no time to get back to this, and you claim to have a "local change" that fixes it. How about just shipping this and you can open a separate PR with your changes

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

shiltian

I'm gonna accept it as it is and rework all related AAs

arsenm added the backend:AMDGPU label Oct 19, 2024 — with Graphite App

arsenm mentioned this pull request Oct 19, 2024

AMDGPU: Mark grid size loads with range metadata #113019

Merged

arsenm requested review from jdoerfert, jhuber6, jwanggit86, saiislam and shiltian October 19, 2024 03:44

arsenm marked this pull request as ready for review October 19, 2024 03:44

arsenm mentioned this pull request Oct 19, 2024

clang/AMDGPU: Emit grid size builtins with range metadata #113038

Merged

shiltian reviewed Oct 21, 2024

View reviewed changes

llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll Show resolved Hide resolved

arsenm force-pushed the users/arsenm/amdgpu-attributor-propagate-amdgpu-max-num-workgroups branch from 2005b26 to 58a330c Compare October 26, 2024 04:13

arsenm changed the base branch from main to users/arsenm/amdgpu-default-val-amdgpu-max-num-workgroups October 26, 2024 04:14

arsenm mentioned this pull request Oct 26, 2024

AMDGPU: Treat uint32_max as the default value for amdgpu-max-num-workgroups #113751

Merged

shiltian reviewed Oct 26, 2024

View reviewed changes

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp Outdated Show resolved Hide resolved

arsenm force-pushed the users/arsenm/amdgpu-default-val-amdgpu-max-num-workgroups branch from 0c10781 to 7b7992f Compare October 28, 2024 22:19

arsenm force-pushed the users/arsenm/amdgpu-attributor-propagate-amdgpu-max-num-workgroups branch from 58a330c to b0cdf6f Compare October 28, 2024 22:19

arsenm force-pushed the users/arsenm/amdgpu-default-val-amdgpu-max-num-workgroups branch from 7b7992f to ae52e0d Compare November 5, 2024 16:06

arsenm force-pushed the users/arsenm/amdgpu-attributor-propagate-amdgpu-max-num-workgroups branch from b0cdf6f to d26f42b Compare November 5, 2024 16:06

arsenm force-pushed the users/arsenm/amdgpu-default-val-amdgpu-max-num-workgroups branch from ae52e0d to fada8ca Compare November 5, 2024 20:47

Base automatically changed from users/arsenm/amdgpu-default-val-amdgpu-max-num-workgroups to main November 5, 2024 20:50

arsenm force-pushed the users/arsenm/amdgpu-attributor-propagate-amdgpu-max-num-workgroups branch 2 times, most recently from 8272a93 to b5edcd1 Compare November 5, 2024 21:10

shiltian reviewed Nov 5, 2024

View reviewed changes

arsenm added 2 commits November 6, 2024 15:42

AMDGPU: Propagate amdgpu-max-num-workgroups attribute

fd3ae0b

I'm not sure what the interpretation of 0 is supposed to be, AMDGPUUsage doesn't say.

Comment parameter name

41bb72b

arsenm added 4 commits November 6, 2024 15:42

Add a shader entry test

dfe5c82

Check isValidState

7994c83

Remove fixup of 0

eaf4a26

Remove todo

2ad2f35

arsenm force-pushed the users/arsenm/amdgpu-attributor-propagate-amdgpu-max-num-workgroups branch from b5edcd1 to 2ad2f35 Compare November 6, 2024 23:43

arsenm commented Dec 6, 2024

View reviewed changes

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp Outdated Show resolved Hide resolved

shiltian approved these changes Dec 9, 2024

View reviewed changes

arsenm merged commit 664a226 into main Dec 9, 2024
8 checks passed

arsenm deleted the users/arsenm/amdgpu-attributor-propagate-amdgpu-max-num-workgroups branch December 9, 2024 15:57

shiltian mentioned this pull request Dec 11, 2024

[AMDGPU][Attributor] Skip update and manifest if an AA is at its initial state #114726

Closed


		SmallVector<unsigned> MaxNumWorkgroups = InfoCache.getMaxNumWorkGroups(*F);

		X.takeKnownMinimum(MaxNumWorkgroups[0]);

AMDGPU: Propagate amdgpu-max-num-workgroups attribute #113018

AMDGPU: Propagate amdgpu-max-num-workgroups attribute #113018

Uh oh!

Conversation

arsenm commented Oct 19, 2024

Uh oh!

arsenm commented Oct 19, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Oct 19, 2024

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

shiltian Nov 7, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

arsenm commented Nov 6, 2024

Uh oh!

shiltian commented Nov 6, 2024

Uh oh!

shiltian commented Nov 6, 2024

Uh oh!

arsenm commented Nov 8, 2024

Uh oh!

shiltian commented Nov 12, 2024

Uh oh!

arsenm left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

shiltian left a comment

Choose a reason for hiding this comment

arsenm commented Oct 19, 2024 •

edited

Loading

shiltian Nov 7, 2024 •

edited

Loading