@@ -1143,7 +1143,7 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
1143
1143
if (!CallerAA || !CallerAA->isValidState ())
1144
1144
return false ;
1145
1145
1146
- auto Assumed = this ->getAssumed ();
1146
+ ConstantRange Assumed = this ->getAssumed ();
1147
1147
unsigned Min = std::max (Assumed.getLower ().getZExtValue (),
1148
1148
CallerAA->getAssumed ().getLower ().getZExtValue ());
1149
1149
unsigned Max = std::max (Assumed.getUpper ().getZExtValue (),
@@ -1311,37 +1311,34 @@ static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
1311
1311
}
1312
1312
}
1313
1313
1314
- static void checkWavesPerEU (Module &M, TargetMachine &TM) {
1314
+ // / The final check and update of the attribute 'amdgpu-waves-per-eu' based on
1315
+ // / the determined 'amdgpu-flat-work-group-size' attribute. We can't do this
1316
+ // / during attributor run because the two attributes grow in opposite direction,
1317
+ // / we should not use any intermediate value to calculate waves per eu until we
1318
+ // / have a determined flat workgroup size.
1319
+ static void updateWavesPerEU (Module &M, TargetMachine &TM) {
1315
1320
for (Function &F : M) {
1316
1321
const GCNSubtarget &ST = TM.getSubtarget <GCNSubtarget>(F);
1317
1322
1318
1323
auto FlatWgrpSizeAttr =
1319
1324
AMDGPU::getIntegerPairAttribute (F, " amdgpu-flat-work-group-size" );
1320
- auto WavesPerEUAttr = AMDGPU::getIntegerPairAttribute (
1321
- F, " amdgpu-waves-per-eu" , /* OnlyFirstRequired=*/ true );
1322
1325
1323
1326
unsigned MinWavesPerEU = ST.getMinWavesPerEU ();
1324
1327
unsigned MaxWavesPerEU = ST.getMaxWavesPerEU ();
1325
1328
1326
- unsigned MinFlatWgrpSize = 1U ;
1327
- unsigned MaxFlatWgrpSize = 1024U ;
1329
+ unsigned MinFlatWgrpSize = ST. getMinFlatWorkGroupSize () ;
1330
+ unsigned MaxFlatWgrpSize = ST. getMaxFlatWorkGroupSize () ;
1328
1331
if (FlatWgrpSizeAttr.has_value ()) {
1329
1332
MinFlatWgrpSize = FlatWgrpSizeAttr->first ;
1330
1333
MaxFlatWgrpSize = *(FlatWgrpSizeAttr->second );
1331
1334
}
1332
1335
1333
1336
// Start with the max range.
1334
1337
unsigned Min = MinWavesPerEU;
1335
- unsigned Max = MaxWavesPerEU ;
1338
+ unsigned Max = MinWavesPerEU ;
1336
1339
1337
- // If the attribute exists, set them to the value from the attribute.
1338
- if (WavesPerEUAttr.has_value ()) {
1339
- Min = WavesPerEUAttr->first ;
1340
- if (WavesPerEUAttr->second .has_value ())
1341
- Max = *(WavesPerEUAttr->second );
1342
- }
1343
-
1344
- // Compute the range from flat workgroup size.
1340
+ // Compute the range from flat workgroup size. `getWavesPerEU` will also
1341
+ // account for the 'amdgpu-waves-er-eu' attribute.
1345
1342
auto [MinFromFlatWgrpSize, MaxFromFlatWgrpSize] =
1346
1343
ST.getWavesPerEU (F, std::make_pair (MinFlatWgrpSize, MaxFlatWgrpSize));
1347
1344
@@ -1458,7 +1455,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
1458
1455
if (Changed && (LTOPhase == ThinOrFullLTOPhase::None ||
1459
1456
LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
1460
1457
LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink))
1461
- checkWavesPerEU (M, TM);
1458
+ updateWavesPerEU (M, TM);
1462
1459
1463
1460
return Changed;
1464
1461
}
0 commit comments