Skip to content

Commit f89193e

Browse files
committed
resolve review comments
1 parent 34c4d8e commit f89193e

File tree

1 file changed

+13
-16
lines changed

1 file changed

+13
-16
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1140,7 +1140,7 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
11401140
if (!CallerAA || !CallerAA->isValidState())
11411141
return false;
11421142

1143-
auto Assumed = this->getAssumed();
1143+
ConstantRange Assumed = this->getAssumed();
11441144
unsigned Min = std::max(Assumed.getLower().getZExtValue(),
11451145
CallerAA->getAssumed().getLower().getZExtValue());
11461146
unsigned Max = std::max(Assumed.getUpper().getZExtValue(),
@@ -1308,37 +1308,34 @@ static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
13081308
}
13091309
}
13101310

1311-
static void checkWavesPerEU(Module &M, TargetMachine &TM) {
1311+
/// The final check and update of the attribute 'amdgpu-waves-per-eu' based on
1312+
/// the determined 'amdgpu-flat-work-group-size' attribute. We can't do this
1313+
/// during attributor run because the two attributes grow in opposite direction,
1314+
/// we should not use any intermediate value to calculate waves per eu until we
1315+
/// have a determined flat workgroup size.
1316+
static void updateWavesPerEU(Module &M, TargetMachine &TM) {
13121317
for (Function &F : M) {
13131318
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
13141319

13151320
auto FlatWgrpSizeAttr =
13161321
AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size");
1317-
auto WavesPerEUAttr = AMDGPU::getIntegerPairAttribute(
1318-
F, "amdgpu-waves-per-eu", /*OnlyFirstRequired=*/true);
13191322

13201323
unsigned MinWavesPerEU = ST.getMinWavesPerEU();
13211324
unsigned MaxWavesPerEU = ST.getMaxWavesPerEU();
13221325

1323-
unsigned MinFlatWgrpSize = 1U;
1324-
unsigned MaxFlatWgrpSize = 1024U;
1326+
unsigned MinFlatWgrpSize = ST.getMinFlatWorkGroupSize();
1327+
unsigned MaxFlatWgrpSize = ST.getMaxFlatWorkGroupSize();
13251328
if (FlatWgrpSizeAttr.has_value()) {
13261329
MinFlatWgrpSize = FlatWgrpSizeAttr->first;
13271330
MaxFlatWgrpSize = *(FlatWgrpSizeAttr->second);
13281331
}
13291332

13301333
// Start with the max range.
13311334
unsigned Min = MinWavesPerEU;
1332-
unsigned Max = MaxWavesPerEU;
1335+
unsigned Max = MinWavesPerEU;
13331336

1334-
// If the attribute exists, set them to the value from the attribute.
1335-
if (WavesPerEUAttr.has_value()) {
1336-
Min = WavesPerEUAttr->first;
1337-
if (WavesPerEUAttr->second.has_value())
1338-
Max = *(WavesPerEUAttr->second);
1339-
}
1340-
1341-
// Compute the range from flat workgroup size.
1337+
// Compute the range from flat workgroup size. `getWavesPerEU` will also
1338+
// account for the 'amdgpu-waves-er-eu' attribute.
13421339
auto [MinFromFlatWgrpSize, MaxFromFlatWgrpSize] =
13431340
ST.getWavesPerEU(F, std::make_pair(MinFlatWgrpSize, MaxFlatWgrpSize));
13441341

@@ -1441,7 +1438,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
14411438
if (Changed && (LTOPhase == ThinOrFullLTOPhase::None ||
14421439
LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
14431440
LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink))
1444-
checkWavesPerEU(M, TM);
1441+
updateWavesPerEU(M, TM);
14451442

14461443
return Changed;
14471444
}

0 commit comments

Comments
 (0)