@@ -1140,7 +1140,7 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
1140
1140
if (!CallerAA || !CallerAA->isValidState ())
1141
1141
return false ;
1142
1142
1143
- auto Assumed = this ->getAssumed ();
1143
+ ConstantRange Assumed = this ->getAssumed ();
1144
1144
unsigned Min = std::max (Assumed.getLower ().getZExtValue (),
1145
1145
CallerAA->getAssumed ().getLower ().getZExtValue ());
1146
1146
unsigned Max = std::max (Assumed.getUpper ().getZExtValue (),
@@ -1308,37 +1308,34 @@ static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
1308
1308
}
1309
1309
}
1310
1310
1311
- static void checkWavesPerEU (Module &M, TargetMachine &TM) {
1311
+ // / The final check and update of the attribute 'amdgpu-waves-per-eu' based on
1312
+ // / the determined 'amdgpu-flat-work-group-size' attribute. We can't do this
1313
+ // / during attributor run because the two attributes grow in opposite direction,
1314
+ // / we should not use any intermediate value to calculate waves per eu until we
1315
+ // / have a determined flat workgroup size.
1316
+ static void updateWavesPerEU (Module &M, TargetMachine &TM) {
1312
1317
for (Function &F : M) {
1313
1318
const GCNSubtarget &ST = TM.getSubtarget <GCNSubtarget>(F);
1314
1319
1315
1320
auto FlatWgrpSizeAttr =
1316
1321
AMDGPU::getIntegerPairAttribute (F, " amdgpu-flat-work-group-size" );
1317
- auto WavesPerEUAttr = AMDGPU::getIntegerPairAttribute (
1318
- F, " amdgpu-waves-per-eu" , /* OnlyFirstRequired=*/ true );
1319
1322
1320
1323
unsigned MinWavesPerEU = ST.getMinWavesPerEU ();
1321
1324
unsigned MaxWavesPerEU = ST.getMaxWavesPerEU ();
1322
1325
1323
- unsigned MinFlatWgrpSize = 1U ;
1324
- unsigned MaxFlatWgrpSize = 1024U ;
1326
+ unsigned MinFlatWgrpSize = ST. getMinFlatWorkGroupSize () ;
1327
+ unsigned MaxFlatWgrpSize = ST. getMaxFlatWorkGroupSize () ;
1325
1328
if (FlatWgrpSizeAttr.has_value ()) {
1326
1329
MinFlatWgrpSize = FlatWgrpSizeAttr->first ;
1327
1330
MaxFlatWgrpSize = *(FlatWgrpSizeAttr->second );
1328
1331
}
1329
1332
1330
1333
// Start with the max range.
1331
1334
unsigned Min = MinWavesPerEU;
1332
- unsigned Max = MaxWavesPerEU ;
1335
+ unsigned Max = MinWavesPerEU ;
1333
1336
1334
- // If the attribute exists, set them to the value from the attribute.
1335
- if (WavesPerEUAttr.has_value ()) {
1336
- Min = WavesPerEUAttr->first ;
1337
- if (WavesPerEUAttr->second .has_value ())
1338
- Max = *(WavesPerEUAttr->second );
1339
- }
1340
-
1341
- // Compute the range from flat workgroup size.
1337
+ // Compute the range from flat workgroup size. `getWavesPerEU` will also
1338
+ // account for the 'amdgpu-waves-er-eu' attribute.
1342
1339
auto [MinFromFlatWgrpSize, MaxFromFlatWgrpSize] =
1343
1340
ST.getWavesPerEU (F, std::make_pair (MinFlatWgrpSize, MaxFlatWgrpSize));
1344
1341
@@ -1441,7 +1438,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
1441
1438
if (Changed && (LTOPhase == ThinOrFullLTOPhase::None ||
1442
1439
LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
1443
1440
LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink))
1444
- checkWavesPerEU (M, TM);
1441
+ updateWavesPerEU (M, TM);
1445
1442
1446
1443
return Changed;
1447
1444
}
0 commit comments