@@ -144,6 +144,20 @@ static bool funcRequiresHostcallPtr(const Function &F) {
144
144
}
145
145
146
146
namespace {
147
+
148
+ std::optional<std::pair<unsigned , unsigned >> static parseRangeAttribute (
149
+ StringRef Attr, bool OnlyFirstRequired = false ) {
150
+ std::pair<unsigned , unsigned > Val;
151
+ std::pair<StringRef, StringRef> Strs = Attr.split (' ,' );
152
+ if (Strs.first .trim ().getAsInteger (0 , Val.first ))
153
+ return std::nullopt;
154
+ if (Strs.second .trim ().getAsInteger (0 , Val.second )) {
155
+ if (!OnlyFirstRequired || !Strs.second .trim ().empty ())
156
+ return std::nullopt;
157
+ }
158
+ return Val;
159
+ }
160
+
147
161
class AMDGPUInformationCache : public InformationCache {
148
162
public:
149
163
AMDGPUInformationCache (const Module &M, AnalysisGetter &AG,
@@ -168,9 +182,18 @@ class AMDGPUInformationCache : public InformationCache {
168
182
return ST.supportsGetDoorbellID ();
169
183
}
170
184
171
- std::pair<unsigned , unsigned > getFlatWorkGroupSizes (const Function &F) {
185
+ std::optional<std::pair<unsigned , unsigned >>
186
+ getFlatWorkGroupSizeAttr (const Function &F) const {
187
+ Attribute Attr = F.getFnAttribute (" amdgpu-flat-work-group-size" );
188
+ if (!Attr.isStringAttribute ())
189
+ return std::nullopt;
190
+ return parseRangeAttribute (Attr.getValueAsString ());
191
+ }
192
+
193
+ std::pair<unsigned , unsigned >
194
+ getDefaultFlatWorkGroupSize (const Function &F) const {
172
195
const GCNSubtarget &ST = TM.getSubtarget <GCNSubtarget>(F);
173
- return ST.getFlatWorkGroupSizes (F );
196
+ return ST.getDefaultFlatWorkGroupSize (F. getCallingConv () );
174
197
}
175
198
176
199
std::pair<unsigned , unsigned >
@@ -733,6 +756,35 @@ struct AAAMDSizeRangeAttribute
733
756
return Change;
734
757
}
735
758
759
+ // / Clamp the assumed range to the default value ([Min, Max]) and emit the
760
+ // / attribute if it is not same as default.
761
+ ChangeStatus
762
+ emitAttributeIfNotDefaultAfterClamp (Attributor &A,
763
+ std::pair<unsigned , unsigned > Default) {
764
+ auto [Min, Max] = Default;
765
+ unsigned Lower = getAssumed ().getLower ().getZExtValue ();
766
+ unsigned Upper = getAssumed ().getUpper ().getZExtValue ();
767
+
768
+ // Clamp the range to the default value.
769
+ if (Lower < Min)
770
+ Lower = Min;
771
+ if (Upper > Max + 1 )
772
+ Upper = Max + 1 ;
773
+
774
+ // No manifest if the value is invalid or same as default after clamp.
775
+ if ((Lower == Min && Upper == Max + 1 ) || (Upper < Lower))
776
+ return ChangeStatus::UNCHANGED;
777
+
778
+ Function *F = getAssociatedFunction ();
779
+ LLVMContext &Ctx = F->getContext ();
780
+ SmallString<10 > Buffer;
781
+ raw_svector_ostream OS (Buffer);
782
+ OS << Lower << ' ,' << Upper - 1 ;
783
+ return A.manifestAttrs (getIRPosition (),
784
+ {Attribute::get (Ctx, AttrName, OS.str ())},
785
+ /* ForceReplace=*/ true );
786
+ }
787
+
736
788
ChangeStatus emitAttributeIfNotDefault (Attributor &A, unsigned Min,
737
789
unsigned Max) {
738
790
// Don't add the attribute if it's the implied default.
@@ -767,13 +819,21 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
767
819
void initialize (Attributor &A) override {
768
820
Function *F = getAssociatedFunction ();
769
821
auto &InfoCache = static_cast <AMDGPUInformationCache &>(A.getInfoCache ());
770
- unsigned MinGroupSize, MaxGroupSize;
771
- std::tie (MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes (*F);
772
- intersectKnown (
773
- ConstantRange (APInt (32 , MinGroupSize), APInt (32 , MaxGroupSize + 1 )));
774
822
775
- if (AMDGPU::isEntryFunctionCC (F->getCallingConv ()))
776
- indicatePessimisticFixpoint ();
823
+ bool HasAttr = false ;
824
+ auto [Min, Max] = InfoCache.getDefaultFlatWorkGroupSize (*F);
825
+
826
+ if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr (*F)) {
827
+ std::tie (Min, Max) = *Attr;
828
+ HasAttr = true ;
829
+ }
830
+
831
+ ConstantRange Range (APInt (32 , Min), APInt (32 , Max + 1 ));
832
+ IntegerRangeState RangeState (Range);
833
+ clampStateAndIndicateChange (this ->getState (), RangeState);
834
+
835
+ if (HasAttr || AMDGPU::isEntryFunctionCC (F->getCallingConv ()))
836
+ indicateOptimisticFixpoint ();
777
837
}
778
838
779
839
ChangeStatus updateImpl (Attributor &A) override {
@@ -787,9 +847,8 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
787
847
ChangeStatus manifest (Attributor &A) override {
788
848
Function *F = getAssociatedFunction ();
789
849
auto &InfoCache = static_cast <AMDGPUInformationCache &>(A.getInfoCache ());
790
- unsigned Min, Max;
791
- std::tie (Min, Max) = InfoCache.getMaximumFlatWorkGroupRange (*F);
792
- return emitAttributeIfNotDefault (A, Min, Max);
850
+ return emitAttributeIfNotDefaultAfterClamp (
851
+ A, InfoCache.getMaximumFlatWorkGroupRange (*F));
793
852
}
794
853
795
854
// / See AbstractAttribute::getName()
0 commit comments