@@ -532,6 +532,11 @@ template <typename Config> class SizeClassAllocator64 {
532
532
uptr BytesInFreeListAtLastCheckpoint;
533
533
uptr RangesReleased;
534
534
uptr LastReleasedBytes;
535
+ // The minimum size of pushed blocks to trigger page release.
536
+ uptr TryReleaseThreshold;
537
+ // The number of bytes not triggering `releaseToOSMaybe()` because of
538
+ // the length of release interval.
539
+ uptr PendingPushedBytesDelta;
535
540
u64 LastReleaseAtNs;
536
541
};
537
542
@@ -560,8 +565,6 @@ template <typename Config> class SizeClassAllocator64 {
560
565
u32 RandState GUARDED_BY (MMLock) = 0;
561
566
BlocksInfo FreeListInfo GUARDED_BY (FLLock);
562
567
PagesInfo MemMapInfo GUARDED_BY (MMLock);
563
- // The minimum size of pushed blocks to trigger page release.
564
- uptr TryReleaseThreshold GUARDED_BY (MMLock) = 0;
565
568
ReleaseToOsInfo ReleaseInfo GUARDED_BY (MMLock) = {};
566
569
bool Exhausted GUARDED_BY (MMLock) = false;
567
570
bool isPopulatingFreeList GUARDED_BY (FLLock) = false;
@@ -610,9 +613,8 @@ template <typename Config> class SizeClassAllocator64 {
610
613
return BlockSize < PageSize / 16U ;
611
614
}
612
615
613
- ALWAYS_INLINE static bool isLargeBlock (uptr BlockSize) {
614
- const uptr PageSize = getPageSizeCached ();
615
- return BlockSize > PageSize;
616
+ ALWAYS_INLINE uptr getMinReleaseAttemptSize (uptr BlockSize) {
617
+ return roundUp (BlockSize, getPageSizeCached ());
616
618
}
617
619
618
620
ALWAYS_INLINE void initRegion (RegionInfo *Region, uptr ClassId,
@@ -631,12 +633,16 @@ template <typename Config> class SizeClassAllocator64 {
631
633
(getRandomModN (&Region->RandState , 16 ) + 1 ) * PageSize;
632
634
}
633
635
636
+ const uptr BlockSize = getSizeByClassId (ClassId);
634
637
// Releasing small blocks is expensive, set a higher threshold to avoid
635
638
// frequent page releases.
636
- if (isSmallBlock (getSizeByClassId (ClassId)))
637
- Region->TryReleaseThreshold = PageSize * SmallerBlockReleasePageDelta;
638
- else
639
- Region->TryReleaseThreshold = PageSize;
639
+ if (isSmallBlock (BlockSize)) {
640
+ Region->ReleaseInfo .TryReleaseThreshold =
641
+ PageSize * SmallerBlockReleasePageDelta;
642
+ } else {
643
+ Region->ReleaseInfo .TryReleaseThreshold =
644
+ getMinReleaseAttemptSize (BlockSize);
645
+ }
640
646
}
641
647
642
648
void pushBatchClassBlocks (RegionInfo *Region, CompactPtrT *Array, u32 Size )
@@ -1245,6 +1251,7 @@ template <typename Config> class SizeClassAllocator64 {
1245
1251
uptr BytesInFreeList;
1246
1252
const uptr AllocatedUserEnd =
1247
1253
Region->MemMapInfo .AllocatedUser + Region->RegionBeg ;
1254
+ uptr RegionPushedBytesDelta = 0 ;
1248
1255
SinglyLinkedList<BatchGroupT> GroupsToRelease;
1249
1256
1250
1257
{
@@ -1267,6 +1274,12 @@ template <typename Config> class SizeClassAllocator64 {
1267
1274
return 0 ;
1268
1275
}
1269
1276
1277
+ // Given that we will unlock the freelist for block operations, cache the
1278
+ // value here so that when we are adapting the `TryReleaseThreshold`
1279
+ // later, we are using the right metric.
1280
+ RegionPushedBytesDelta =
1281
+ BytesInFreeList - Region->ReleaseInfo .BytesInFreeListAtLastCheckpoint ;
1282
+
1270
1283
// ==================================================================== //
1271
1284
// 2. Determine which groups can release the pages. Use a heuristic to
1272
1285
// gather groups that are candidates for doing a release.
@@ -1310,12 +1323,45 @@ template <typename Config> class SizeClassAllocator64 {
1310
1323
auto SkipRegion = [](UNUSED uptr RegionIndex) { return false ; };
1311
1324
releaseFreeMemoryToOS (Context, Recorder, SkipRegion);
1312
1325
if (Recorder.getReleasedRangesCount () > 0 ) {
1326
+ // This is the case that we didn't hit the release threshold but it has
1327
+ // been past a certain period of time. Thus we try to release some pages
1328
+ // and if it does release some additional pages, it's hint that we are
1329
+ // able to lower the threshold. Currently, this case happens when the
1330
+ // `RegionPushedBytesDelta` is over half of the `TryReleaseThreshold`. As
1331
+ // a result, we shrink the threshold to half accordingly.
1332
+ // TODO(chiahungduan): Apply the same adjustment strategy to small blocks.
1333
+ if (!isSmallBlock (BlockSize)) {
1334
+ if (RegionPushedBytesDelta < Region->ReleaseInfo .TryReleaseThreshold &&
1335
+ Recorder.getReleasedBytes () >
1336
+ Region->ReleaseInfo .LastReleasedBytes +
1337
+ getMinReleaseAttemptSize (BlockSize)) {
1338
+ Region->ReleaseInfo .TryReleaseThreshold =
1339
+ Max (Region->ReleaseInfo .TryReleaseThreshold / 2 ,
1340
+ getMinReleaseAttemptSize (BlockSize));
1341
+ }
1342
+ }
1343
+
1313
1344
Region->ReleaseInfo .BytesInFreeListAtLastCheckpoint = BytesInFreeList;
1314
1345
Region->ReleaseInfo .RangesReleased += Recorder.getReleasedRangesCount ();
1315
1346
Region->ReleaseInfo .LastReleasedBytes = Recorder.getReleasedBytes ();
1316
1347
}
1317
1348
Region->ReleaseInfo .LastReleaseAtNs = getMonotonicTimeFast ();
1318
1349
1350
+ if (Region->ReleaseInfo .PendingPushedBytesDelta > 0 ) {
1351
+ // Instead of increasing the threshold by the amount of
1352
+ // `PendingPushedBytesDelta`, we only increase half of the amount so that
1353
+ // it won't be a leap (which may lead to higher memory pressure) because
1354
+ // of certain memory usage bursts which don't happen frequently.
1355
+ Region->ReleaseInfo .TryReleaseThreshold +=
1356
+ Region->ReleaseInfo .PendingPushedBytesDelta / 2 ;
1357
+ // This is another guard of avoiding the growth of threshold indefinitely.
1358
+ // Note that we may consider to make this configurable if we have a better
1359
+ // way to model this.
1360
+ Region->ReleaseInfo .TryReleaseThreshold = Min<uptr>(
1361
+ Region->ReleaseInfo .TryReleaseThreshold , (1UL << GroupSizeLog) / 2 );
1362
+ Region->ReleaseInfo .PendingPushedBytesDelta = 0 ;
1363
+ }
1364
+
1319
1365
// ====================================================================== //
1320
1366
// 5. Merge the `GroupsToRelease` back to the freelist.
1321
1367
// ====================================================================== //
@@ -1329,8 +1375,6 @@ template <typename Config> class SizeClassAllocator64 {
1329
1375
REQUIRES(Region->MMLock, Region->FLLock) {
1330
1376
DCHECK_GE (Region->FreeListInfo .PoppedBlocks ,
1331
1377
Region->FreeListInfo .PushedBlocks );
1332
- const uptr PageSize = getPageSizeCached ();
1333
-
1334
1378
// Always update `BytesInFreeListAtLastCheckpoint` with the smallest value
1335
1379
// so that we won't underestimate the releasable pages. For example, the
1336
1380
// following is the region usage,
@@ -1354,34 +1398,45 @@ template <typename Config> class SizeClassAllocator64 {
1354
1398
1355
1399
const uptr RegionPushedBytesDelta =
1356
1400
BytesInFreeList - Region->ReleaseInfo .BytesInFreeListAtLastCheckpoint ;
1357
- if (RegionPushedBytesDelta < PageSize)
1358
- return false ;
1359
-
1360
- // Releasing smaller blocks is expensive, so we want to make sure that a
1361
- // significant amount of bytes are free, and that there has been a good
1362
- // amount of batches pushed to the freelist before attempting to release.
1363
- if (isSmallBlock (BlockSize) && ReleaseType == ReleaseToOS::Normal)
1364
- if (RegionPushedBytesDelta < Region->TryReleaseThreshold )
1365
- return false ;
1366
1401
1367
1402
if (ReleaseType == ReleaseToOS::Normal) {
1368
- const s32 IntervalMs = atomic_load_relaxed (&ReleaseToOsIntervalMs);
1369
- if (IntervalMs < 0 )
1403
+ if (RegionPushedBytesDelta < Region->ReleaseInfo .TryReleaseThreshold / 2 )
1404
+ return false ;
1405
+
1406
+ const u64 IntervalNs =
1407
+ static_cast <u64>(atomic_load_relaxed (&ReleaseToOsIntervalMs)) *
1408
+ 1000000 ;
1409
+ if (IntervalNs < 0 )
1370
1410
return false ;
1371
1411
1372
- // The constant 8 here is selected from profiling some apps and the number
1373
- // of unreleased pages in the large size classes is around 16 pages or
1374
- // more. Choose half of it as a heuristic and which also avoids page
1375
- // release every time for every pushBlocks() attempt by large blocks.
1376
- const bool ByPassReleaseInterval =
1377
- isLargeBlock (BlockSize) && RegionPushedBytesDelta > 8 * PageSize;
1378
- if (!ByPassReleaseInterval) {
1379
- if (Region->ReleaseInfo .LastReleaseAtNs +
1380
- static_cast <u64>(IntervalMs) * 1000000 >
1381
- getMonotonicTimeFast ()) {
1382
- // Memory was returned recently.
1412
+ const u64 CurTimeNs = getMonotonicTimeFast ();
1413
+ const u64 DiffSinceLastReleaseNs =
1414
+ CurTimeNs - Region->ReleaseInfo .LastReleaseAtNs ;
1415
+
1416
+ // At here, `RegionPushedBytesDelta` is more than half of
1417
+ // `TryReleaseThreshold`. If the last release happened 2 release interval
1418
+ // before, we will still try to see if there's any chance to release some
1419
+ // memory even it doesn't exceed the threshold.
1420
+ if (RegionPushedBytesDelta < Region->ReleaseInfo .TryReleaseThreshold ) {
1421
+ // We want the threshold to have a shorter response time to the variant
1422
+ // memory usage patterns. According to data collected during experiments
1423
+ // (which were done with 1, 2, 4, 8 intervals), `2` strikes the better
1424
+ // balance between the memory usage and number of page release attempts.
1425
+ if (DiffSinceLastReleaseNs < 2 * IntervalNs)
1383
1426
return false ;
1384
- }
1427
+ } else if (DiffSinceLastReleaseNs < IntervalNs) {
1428
+ // In this case, we are over the threshold but we just did some page
1429
+ // release in the same release interval. This is a hint that we may want
1430
+ // a higher threshold so that we can release more memory at once.
1431
+ // `TryReleaseThreshold` will be adjusted according to how many bytes
1432
+ // are not released, i.e., the `PendingPushedBytesdelta` here.
1433
+ // TODO(chiahungduan): Apply the same adjustment strategy to small
1434
+ // blocks.
1435
+ if (!isSmallBlock (BlockSize))
1436
+ Region->ReleaseInfo .PendingPushedBytesDelta = RegionPushedBytesDelta;
1437
+
1438
+ // Memory was returned recently.
1439
+ return false ;
1385
1440
}
1386
1441
} // if (ReleaseType == ReleaseToOS::Normal)
1387
1442
@@ -1397,10 +1452,10 @@ template <typename Config> class SizeClassAllocator64 {
1397
1452
SinglyLinkedList<BatchGroupT> GroupsToRelease;
1398
1453
1399
1454
// We are examining each group and will take the minimum distance to the
1400
- // release threshold as the next Region:: TryReleaseThreshold() . Note that if
1401
- // the size of free blocks has reached the release threshold, the distance
1402
- // to the next release will be PageSize * SmallerBlockReleasePageDelta. See
1403
- // the comment on `SmallerBlockReleasePageDelta` for more details.
1455
+ // release threshold as the next ` TryReleaseThreshold` . Note that if the
1456
+ // size of free blocks has reached the release threshold, the distance to
1457
+ // the next release will be PageSize * SmallerBlockReleasePageDelta. See the
1458
+ // comment on `SmallerBlockReleasePageDelta` for more details.
1404
1459
uptr MinDistToThreshold = GroupSize;
1405
1460
1406
1461
for (BatchGroupT *BG = Region->FreeListInfo .BlockList .front (),
@@ -1548,7 +1603,7 @@ template <typename Config> class SizeClassAllocator64 {
1548
1603
// back to normal.
1549
1604
if (MinDistToThreshold == GroupSize)
1550
1605
MinDistToThreshold = PageSize * SmallerBlockReleasePageDelta;
1551
- Region->TryReleaseThreshold = MinDistToThreshold;
1606
+ Region->ReleaseInfo . TryReleaseThreshold = MinDistToThreshold;
1552
1607
}
1553
1608
1554
1609
return GroupsToRelease;
0 commit comments