@@ -356,26 +356,25 @@ class LoopInterchangeLegality {
356
356
SmallVector<PHINode *, 8 > InnerLoopInductions;
357
357
};
358
358
359
+ using CostMapTy = DenseMap<const Loop *, std::pair<unsigned , CacheCostTy>>;
360
+
359
361
// / LoopInterchangeProfitability checks if it is profitable to interchange the
360
362
// / loop.
361
363
class LoopInterchangeProfitability {
362
364
public:
363
365
LoopInterchangeProfitability (Loop *Outer, Loop *Inner, ScalarEvolution *SE,
364
- OptimizationRemarkEmitter *ORE)
365
- : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
366
+ OptimizationRemarkEmitter *ORE,
367
+ const std::optional<CostMapTy> &CM)
368
+ : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE), CostMap(CM) {}
366
369
367
370
// / Check if the loop interchange is profitable.
368
371
bool isProfitable (const Loop *InnerLoop, const Loop *OuterLoop,
369
372
unsigned InnerLoopId, unsigned OuterLoopId,
370
- CharMatrix &DepMatrix,
371
- const DenseMap<const Loop *, unsigned > &CostMap,
372
- std::unique_ptr<CacheCost> &CC);
373
+ CharMatrix &DepMatrix);
373
374
374
375
private:
375
376
int getInstrOrderCost ();
376
- std::optional<bool > isProfitablePerLoopCacheAnalysis (
377
- const DenseMap<const Loop *, unsigned > &CostMap,
378
- std::unique_ptr<CacheCost> &CC);
377
+ std::optional<bool > isProfitablePerLoopCacheAnalysis ();
379
378
std::optional<bool > isProfitablePerInstrOrderCost ();
380
379
std::optional<bool > isProfitableForVectorization (unsigned InnerLoopId,
381
380
unsigned OuterLoopId,
@@ -388,6 +387,8 @@ class LoopInterchangeProfitability {
388
387
389
388
// / Interface to emit optimization remarks.
390
389
OptimizationRemarkEmitter *ORE;
390
+
391
+ const std::optional<CostMapTy> &CostMap;
391
392
};
392
393
393
394
// / LoopInterchangeTransform interchanges the loop.
@@ -497,11 +498,13 @@ struct LoopInterchange {
497
498
// indicates the loop should be placed as the innermost loop.
498
499
//
499
500
// For the old pass manager CacheCost would be null.
500
- DenseMap< const Loop *, unsigned > CostMap;
501
+ std::optional<CostMapTy > CostMap = std::nullopt ;
501
502
if (CC != nullptr ) {
503
+ CostMap = CostMapTy ();
502
504
const auto &LoopCosts = CC->getLoopCosts ();
503
505
for (unsigned i = 0 ; i < LoopCosts.size (); i++) {
504
- CostMap[LoopCosts[i].first ] = i;
506
+ const auto &Cost = LoopCosts[i];
507
+ (*CostMap)[Cost.first ] = std::make_pair (i, Cost.second );
505
508
}
506
509
}
507
510
// We try to achieve the globally optimal memory access for the loopnest,
@@ -537,7 +540,7 @@ struct LoopInterchange {
537
540
bool processLoop (Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
538
541
unsigned OuterLoopId,
539
542
std::vector<std::vector<char >> &DependencyMatrix,
540
- const DenseMap< const Loop *, unsigned > &CostMap) {
543
+ const std::optional<CostMapTy > &CostMap) {
541
544
LLVM_DEBUG (dbgs () << " Processing InnerLoopId = " << InnerLoopId
542
545
<< " and OuterLoopId = " << OuterLoopId << " \n " );
543
546
LoopInterchangeLegality LIL (OuterLoop, InnerLoop, SE, ORE);
@@ -546,9 +549,9 @@ struct LoopInterchange {
546
549
return false ;
547
550
}
548
551
LLVM_DEBUG (dbgs () << " Loops are legal to interchange\n " );
549
- LoopInterchangeProfitability LIP (OuterLoop, InnerLoop, SE, ORE);
552
+ LoopInterchangeProfitability LIP (OuterLoop, InnerLoop, SE, ORE, CostMap );
550
553
if (!LIP.isProfitable (InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
551
- DependencyMatrix, CostMap, CC )) {
554
+ DependencyMatrix)) {
552
555
LLVM_DEBUG (dbgs () << " Interchanging loops not profitable.\n " );
553
556
return false ;
554
557
}
@@ -1127,29 +1130,60 @@ int LoopInterchangeProfitability::getInstrOrderCost() {
1127
1130
}
1128
1131
1129
1132
std::optional<bool >
1130
- LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis (
1131
- const DenseMap<const Loop *, unsigned > &CostMap,
1132
- std::unique_ptr<CacheCost> &CC) {
1133
+ LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis () {
1133
1134
// This is the new cost model returned from loop cache analysis.
1134
1135
// A smaller index means the loop should be placed an outer loop, and vice
1135
1136
// versa.
1136
- if (CostMap.contains (InnerLoop) && CostMap.contains (OuterLoop)) {
1137
- unsigned InnerIndex = 0 , OuterIndex = 0 ;
1138
- InnerIndex = CostMap.find (InnerLoop)->second ;
1139
- OuterIndex = CostMap.find (OuterLoop)->second ;
1140
- LLVM_DEBUG (dbgs () << " InnerIndex = " << InnerIndex
1141
- << " , OuterIndex = " << OuterIndex << " \n " );
1142
- if (InnerIndex < OuterIndex)
1143
- return std::optional<bool >(true );
1144
- assert (InnerIndex != OuterIndex && " CostMap should assign unique "
1145
- " numbers to each loop" );
1146
- if (CC->getLoopCost (*OuterLoop) == CC->getLoopCost (*InnerLoop))
1147
- return std::nullopt;
1148
- return std::optional<bool >(false );
1149
- }
1150
- return std::nullopt;
1137
+ if (!CostMap.has_value ())
1138
+ return std::nullopt;
1139
+
1140
+ auto InnerIte = CostMap->find (InnerLoop);
1141
+ auto OuterIte = CostMap->find (OuterLoop);
1142
+ if (InnerIte == CostMap->end () || OuterIte == CostMap->end ())
1143
+ return std::nullopt;
1144
+
1145
+ const auto &[InnerIndex, InnerCost] = InnerIte->second ;
1146
+ const auto &[OuterIndex, OuterCost] = OuterIte->second ;
1147
+ LLVM_DEBUG (dbgs () << " InnerIndex = " << InnerIndex
1148
+ << " , OuterIndex = " << OuterIndex << " \n " );
1149
+ assert (InnerIndex != OuterIndex && " CostMap should assign unique "
1150
+ " numbers to each loop" );
1151
+
1152
+ if (InnerCost == OuterCost)
1153
+ return std::nullopt;
1154
+
1155
+ return InnerIndex < OuterIndex;
1151
1156
}
1152
1157
1158
+ // This function doesn't satisfy transitivity. Consider the following case.
1159
+ //
1160
+ // ```
1161
+ // for (int k = 0; k < N; k++) {
1162
+ // for (int j = 0; j < N; j++) {
1163
+ // for (int i = 0; i < N; i++) {
1164
+ // dst0[i][j][k] += aa[i][j] + bb[i][j] + cc[j][k];
1165
+ // dst1[k][j][i] += dd[i][j] + ee[i][j] + ff[j][k];
1166
+ // }
1167
+ // }
1168
+ // }
1169
+ //
1170
+ // ```
1171
+ //
1172
+ // The getInstrOrderCost will return the following value.
1173
+ //
1174
+ // Outer | Inner | Cost
1175
+ // -------+-------+------
1176
+ // k | j | -2
1177
+ // j | i | -4
1178
+ // k | i | 0
1179
+ //
1180
+ // This means that this function says interchanging (k, j) loops and (j, i)
1181
+ // loops are profitable, but not (k, i). The root cause of this is that the
1182
+ // getInstrOrderCost only see the loops we are checking. We can resolve this if
1183
+ // we also consider the order going through other inductions. As for the above
1184
+ // case, we can induce that interchanging `k` and `i` is profitable (it is
1185
+ // better to move the `k` loop to inner position) by `bb[i][j]` and `cc[j][k]`.
1186
+ // However, such accurate calculation is expensive, so that we don't do it.
1153
1187
std::optional<bool >
1154
1188
LoopInterchangeProfitability::isProfitablePerInstrOrderCost () {
1155
1189
// Legacy cost model: this is rough cost estimation algorithm. It counts the
@@ -1184,11 +1218,27 @@ std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
1184
1218
return std::optional<bool >(!DepMatrix.empty ());
1185
1219
}
1186
1220
1187
- bool LoopInterchangeProfitability::isProfitable (
1188
- const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
1189
- unsigned OuterLoopId, CharMatrix &DepMatrix,
1190
- const DenseMap<const Loop *, unsigned > &CostMap,
1191
- std::unique_ptr<CacheCost> &CC) {
1221
+ // The bubble-sort fashion algorithm is adopted to sort the loop nest, so the
1222
+ // comparison function should ideally induce a strict weak ordering required by
1223
+ // some standard C++ libraries. In particular, isProfitable should hold the
1224
+ // following properties.
1225
+ //
1226
+ // Asymmetry: If isProfitable(a, b) is true then isProfitable(b, a) is false.
1227
+ // Transitivity: If both isProfitable(a, b) and isProfitable(b, c) is true then
1228
+ // isProfitable(a, c) is true.
1229
+ //
1230
+ // The most important thing is not to make unprofitable interchange. From this
1231
+ // point of view, asymmetry is important. This is because if both
1232
+ // isProfitable(a, b) and isProfitable(b, a) are true, then an unprofitable
1233
+ // transformation (one of them) will be performed. On the other hand, a lack of
1234
+ // transitivity might cause some optimization opportunities to be lost, but
1235
+ // won't trigger an unprofitable one. Moreover, guaranteeing transitivity is
1236
+ // expensive. Therefore, isProfitable only holds the asymmetry.
1237
+ bool LoopInterchangeProfitability::isProfitable (const Loop *InnerLoop,
1238
+ const Loop *OuterLoop,
1239
+ unsigned InnerLoopId,
1240
+ unsigned OuterLoopId,
1241
+ CharMatrix &DepMatrix) {
1192
1242
// isProfitable() is structured to avoid endless loop interchange.
1193
1243
// If loop cache analysis could decide the profitability then,
1194
1244
// profitability check will stop and return the analysis result.
@@ -1197,15 +1247,14 @@ bool LoopInterchangeProfitability::isProfitable(
1197
1247
// profitable for InstrOrderCost. Likewise, if InstrOrderCost failed to
1198
1248
// analysis the profitability then only, isProfitableForVectorization
1199
1249
// will decide.
1200
- std::optional<bool > shouldInterchange =
1201
- isProfitablePerLoopCacheAnalysis (CostMap, CC);
1202
- if (!shouldInterchange.has_value ()) {
1203
- shouldInterchange = isProfitablePerInstrOrderCost ();
1204
- if (!shouldInterchange.has_value ())
1205
- shouldInterchange =
1250
+ std::optional<bool > ShouldInterchange = isProfitablePerLoopCacheAnalysis ();
1251
+ if (!ShouldInterchange.has_value ()) {
1252
+ ShouldInterchange = isProfitablePerInstrOrderCost ();
1253
+ if (!ShouldInterchange.has_value ())
1254
+ ShouldInterchange =
1206
1255
isProfitableForVectorization (InnerLoopId, OuterLoopId, DepMatrix);
1207
1256
}
1208
- if (!shouldInterchange .has_value ()) {
1257
+ if (!ShouldInterchange .has_value ()) {
1209
1258
ORE->emit ([&]() {
1210
1259
return OptimizationRemarkMissed (DEBUG_TYPE, " InterchangeNotProfitable" ,
1211
1260
InnerLoop->getStartLoc (),
@@ -1214,7 +1263,8 @@ bool LoopInterchangeProfitability::isProfitable(
1214
1263
" interchange." ;
1215
1264
});
1216
1265
return false ;
1217
- } else if (!shouldInterchange.value ()) {
1266
+ }
1267
+ if (!ShouldInterchange.value ()) {
1218
1268
ORE->emit ([&]() {
1219
1269
return OptimizationRemarkMissed (DEBUG_TYPE, " InterchangeNotProfitable" ,
1220
1270
InnerLoop->getStartLoc (),
0 commit comments