Skip to content

Commit 1a376e2

Browse files
committed
resolve review comments
1 parent e7f9353 commit 1a376e2

File tree

1 file changed

+13
-16
lines changed

1 file changed

+13
-16
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1143,7 +1143,7 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
11431143
if (!CallerAA || !CallerAA->isValidState())
11441144
return false;
11451145

1146-
auto Assumed = this->getAssumed();
1146+
ConstantRange Assumed = this->getAssumed();
11471147
unsigned Min = std::max(Assumed.getLower().getZExtValue(),
11481148
CallerAA->getAssumed().getLower().getZExtValue());
11491149
unsigned Max = std::max(Assumed.getUpper().getZExtValue(),
@@ -1311,37 +1311,34 @@ static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
13111311
}
13121312
}
13131313

1314-
static void checkWavesPerEU(Module &M, TargetMachine &TM) {
1314+
/// The final check and update of the attribute 'amdgpu-waves-per-eu' based on
1315+
/// the determined 'amdgpu-flat-work-group-size' attribute. We can't do this
1316+
/// during attributor run because the two attributes grow in opposite direction,
1317+
/// we should not use any intermediate value to calculate waves per eu until we
1318+
/// have a determined flat workgroup size.
1319+
static void updateWavesPerEU(Module &M, TargetMachine &TM) {
13151320
for (Function &F : M) {
13161321
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
13171322

13181323
auto FlatWgrpSizeAttr =
13191324
AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size");
1320-
auto WavesPerEUAttr = AMDGPU::getIntegerPairAttribute(
1321-
F, "amdgpu-waves-per-eu", /*OnlyFirstRequired=*/true);
13221325

13231326
unsigned MinWavesPerEU = ST.getMinWavesPerEU();
13241327
unsigned MaxWavesPerEU = ST.getMaxWavesPerEU();
13251328

1326-
unsigned MinFlatWgrpSize = 1U;
1327-
unsigned MaxFlatWgrpSize = 1024U;
1329+
unsigned MinFlatWgrpSize = ST.getMinFlatWorkGroupSize();
1330+
unsigned MaxFlatWgrpSize = ST.getMaxFlatWorkGroupSize();
13281331
if (FlatWgrpSizeAttr.has_value()) {
13291332
MinFlatWgrpSize = FlatWgrpSizeAttr->first;
13301333
MaxFlatWgrpSize = *(FlatWgrpSizeAttr->second);
13311334
}
13321335

13331336
// Start with the max range.
13341337
unsigned Min = MinWavesPerEU;
1335-
unsigned Max = MaxWavesPerEU;
1338+
unsigned Max = MinWavesPerEU;
13361339

1337-
// If the attribute exists, set them to the value from the attribute.
1338-
if (WavesPerEUAttr.has_value()) {
1339-
Min = WavesPerEUAttr->first;
1340-
if (WavesPerEUAttr->second.has_value())
1341-
Max = *(WavesPerEUAttr->second);
1342-
}
1343-
1344-
// Compute the range from flat workgroup size.
1340+
// Compute the range from flat workgroup size. `getWavesPerEU` will also
1341+
// account for the 'amdgpu-waves-er-eu' attribute.
13451342
auto [MinFromFlatWgrpSize, MaxFromFlatWgrpSize] =
13461343
ST.getWavesPerEU(F, std::make_pair(MinFlatWgrpSize, MaxFlatWgrpSize));
13471344

@@ -1458,7 +1455,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
14581455
if (Changed && (LTOPhase == ThinOrFullLTOPhase::None ||
14591456
LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
14601457
LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink))
1461-
checkWavesPerEU(M, TM);
1458+
updateWavesPerEU(M, TM);
14621459

14631460
return Changed;
14641461
}

0 commit comments

Comments
 (0)