Skip to content

Commit 36dff0d

Browse files
authored
[scudo] Reduce unsuccessful attempts of page releasing (#110583)
We introduce a new strategy to track how many bytes are not released because of the contraint of release interval. This will change the `TryReleaseThreshold` adaptively so that we avoid releasing the same pages multiple times (and wasting time on the case of no pages to release). On Android, the number of release attempts decreases 33% (572 to 382) and the worst case drops from 251 to 33. At the same time, it maintains almost the same RSS usage (with some improvements as well). Note that in this CL, this is only applied to non small blocks. We will bring the strategy to all the size classes later.
1 parent 65e69f7 commit 36dff0d

File tree

1 file changed

+94
-39
lines changed

1 file changed

+94
-39
lines changed

compiler-rt/lib/scudo/standalone/primary64.h

Lines changed: 94 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,11 @@ template <typename Config> class SizeClassAllocator64 {
532532
uptr BytesInFreeListAtLastCheckpoint;
533533
uptr RangesReleased;
534534
uptr LastReleasedBytes;
535+
// The minimum size of pushed blocks to trigger page release.
536+
uptr TryReleaseThreshold;
537+
// The number of bytes not triggering `releaseToOSMaybe()` because of
538+
// the length of release interval.
539+
uptr PendingPushedBytesDelta;
535540
u64 LastReleaseAtNs;
536541
};
537542

@@ -560,8 +565,6 @@ template <typename Config> class SizeClassAllocator64 {
560565
u32 RandState GUARDED_BY(MMLock) = 0;
561566
BlocksInfo FreeListInfo GUARDED_BY(FLLock);
562567
PagesInfo MemMapInfo GUARDED_BY(MMLock);
563-
// The minimum size of pushed blocks to trigger page release.
564-
uptr TryReleaseThreshold GUARDED_BY(MMLock) = 0;
565568
ReleaseToOsInfo ReleaseInfo GUARDED_BY(MMLock) = {};
566569
bool Exhausted GUARDED_BY(MMLock) = false;
567570
bool isPopulatingFreeList GUARDED_BY(FLLock) = false;
@@ -610,9 +613,8 @@ template <typename Config> class SizeClassAllocator64 {
610613
return BlockSize < PageSize / 16U;
611614
}
612615

613-
ALWAYS_INLINE static bool isLargeBlock(uptr BlockSize) {
614-
const uptr PageSize = getPageSizeCached();
615-
return BlockSize > PageSize;
616+
ALWAYS_INLINE uptr getMinReleaseAttemptSize(uptr BlockSize) {
617+
return roundUp(BlockSize, getPageSizeCached());
616618
}
617619

618620
ALWAYS_INLINE void initRegion(RegionInfo *Region, uptr ClassId,
@@ -631,12 +633,16 @@ template <typename Config> class SizeClassAllocator64 {
631633
(getRandomModN(&Region->RandState, 16) + 1) * PageSize;
632634
}
633635

636+
const uptr BlockSize = getSizeByClassId(ClassId);
634637
// Releasing small blocks is expensive, set a higher threshold to avoid
635638
// frequent page releases.
636-
if (isSmallBlock(getSizeByClassId(ClassId)))
637-
Region->TryReleaseThreshold = PageSize * SmallerBlockReleasePageDelta;
638-
else
639-
Region->TryReleaseThreshold = PageSize;
639+
if (isSmallBlock(BlockSize)) {
640+
Region->ReleaseInfo.TryReleaseThreshold =
641+
PageSize * SmallerBlockReleasePageDelta;
642+
} else {
643+
Region->ReleaseInfo.TryReleaseThreshold =
644+
getMinReleaseAttemptSize(BlockSize);
645+
}
640646
}
641647

642648
void pushBatchClassBlocks(RegionInfo *Region, CompactPtrT *Array, u32 Size)
@@ -1245,6 +1251,7 @@ template <typename Config> class SizeClassAllocator64 {
12451251
uptr BytesInFreeList;
12461252
const uptr AllocatedUserEnd =
12471253
Region->MemMapInfo.AllocatedUser + Region->RegionBeg;
1254+
uptr RegionPushedBytesDelta = 0;
12481255
SinglyLinkedList<BatchGroupT> GroupsToRelease;
12491256

12501257
{
@@ -1267,6 +1274,12 @@ template <typename Config> class SizeClassAllocator64 {
12671274
return 0;
12681275
}
12691276

1277+
// Given that we will unlock the freelist for block operations, cache the
1278+
// value here so that when we are adapting the `TryReleaseThreshold`
1279+
// later, we are using the right metric.
1280+
RegionPushedBytesDelta =
1281+
BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint;
1282+
12701283
// ==================================================================== //
12711284
// 2. Determine which groups can release the pages. Use a heuristic to
12721285
// gather groups that are candidates for doing a release.
@@ -1310,12 +1323,45 @@ template <typename Config> class SizeClassAllocator64 {
13101323
auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; };
13111324
releaseFreeMemoryToOS(Context, Recorder, SkipRegion);
13121325
if (Recorder.getReleasedRangesCount() > 0) {
1326+
// This is the case that we didn't hit the release threshold but it has
1327+
// been past a certain period of time. Thus we try to release some pages
1328+
// and if it does release some additional pages, it's hint that we are
1329+
// able to lower the threshold. Currently, this case happens when the
1330+
// `RegionPushedBytesDelta` is over half of the `TryReleaseThreshold`. As
1331+
// a result, we shrink the threshold to half accordingly.
1332+
// TODO(chiahungduan): Apply the same adjustment strategy to small blocks.
1333+
if (!isSmallBlock(BlockSize)) {
1334+
if (RegionPushedBytesDelta < Region->ReleaseInfo.TryReleaseThreshold &&
1335+
Recorder.getReleasedBytes() >
1336+
Region->ReleaseInfo.LastReleasedBytes +
1337+
getMinReleaseAttemptSize(BlockSize)) {
1338+
Region->ReleaseInfo.TryReleaseThreshold =
1339+
Max(Region->ReleaseInfo.TryReleaseThreshold / 2,
1340+
getMinReleaseAttemptSize(BlockSize));
1341+
}
1342+
}
1343+
13131344
Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList;
13141345
Region->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount();
13151346
Region->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes();
13161347
}
13171348
Region->ReleaseInfo.LastReleaseAtNs = getMonotonicTimeFast();
13181349

1350+
if (Region->ReleaseInfo.PendingPushedBytesDelta > 0) {
1351+
// Instead of increasing the threshold by the amount of
1352+
// `PendingPushedBytesDelta`, we only increase half of the amount so that
1353+
// it won't be a leap (which may lead to higher memory pressure) because
1354+
// of certain memory usage bursts which don't happen frequently.
1355+
Region->ReleaseInfo.TryReleaseThreshold +=
1356+
Region->ReleaseInfo.PendingPushedBytesDelta / 2;
1357+
// This is another guard of avoiding the growth of threshold indefinitely.
1358+
// Note that we may consider to make this configurable if we have a better
1359+
// way to model this.
1360+
Region->ReleaseInfo.TryReleaseThreshold = Min<uptr>(
1361+
Region->ReleaseInfo.TryReleaseThreshold, (1UL << GroupSizeLog) / 2);
1362+
Region->ReleaseInfo.PendingPushedBytesDelta = 0;
1363+
}
1364+
13191365
// ====================================================================== //
13201366
// 5. Merge the `GroupsToRelease` back to the freelist.
13211367
// ====================================================================== //
@@ -1329,8 +1375,6 @@ template <typename Config> class SizeClassAllocator64 {
13291375
REQUIRES(Region->MMLock, Region->FLLock) {
13301376
DCHECK_GE(Region->FreeListInfo.PoppedBlocks,
13311377
Region->FreeListInfo.PushedBlocks);
1332-
const uptr PageSize = getPageSizeCached();
1333-
13341378
// Always update `BytesInFreeListAtLastCheckpoint` with the smallest value
13351379
// so that we won't underestimate the releasable pages. For example, the
13361380
// following is the region usage,
@@ -1354,34 +1398,45 @@ template <typename Config> class SizeClassAllocator64 {
13541398

13551399
const uptr RegionPushedBytesDelta =
13561400
BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint;
1357-
if (RegionPushedBytesDelta < PageSize)
1358-
return false;
1359-
1360-
// Releasing smaller blocks is expensive, so we want to make sure that a
1361-
// significant amount of bytes are free, and that there has been a good
1362-
// amount of batches pushed to the freelist before attempting to release.
1363-
if (isSmallBlock(BlockSize) && ReleaseType == ReleaseToOS::Normal)
1364-
if (RegionPushedBytesDelta < Region->TryReleaseThreshold)
1365-
return false;
13661401

13671402
if (ReleaseType == ReleaseToOS::Normal) {
1368-
const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs);
1369-
if (IntervalMs < 0)
1403+
if (RegionPushedBytesDelta < Region->ReleaseInfo.TryReleaseThreshold / 2)
1404+
return false;
1405+
1406+
const u64 IntervalNs =
1407+
static_cast<u64>(atomic_load_relaxed(&ReleaseToOsIntervalMs)) *
1408+
1000000;
1409+
if (IntervalNs < 0)
13701410
return false;
13711411

1372-
// The constant 8 here is selected from profiling some apps and the number
1373-
// of unreleased pages in the large size classes is around 16 pages or
1374-
// more. Choose half of it as a heuristic and which also avoids page
1375-
// release every time for every pushBlocks() attempt by large blocks.
1376-
const bool ByPassReleaseInterval =
1377-
isLargeBlock(BlockSize) && RegionPushedBytesDelta > 8 * PageSize;
1378-
if (!ByPassReleaseInterval) {
1379-
if (Region->ReleaseInfo.LastReleaseAtNs +
1380-
static_cast<u64>(IntervalMs) * 1000000 >
1381-
getMonotonicTimeFast()) {
1382-
// Memory was returned recently.
1412+
const u64 CurTimeNs = getMonotonicTimeFast();
1413+
const u64 DiffSinceLastReleaseNs =
1414+
CurTimeNs - Region->ReleaseInfo.LastReleaseAtNs;
1415+
1416+
// At here, `RegionPushedBytesDelta` is more than half of
1417+
// `TryReleaseThreshold`. If the last release happened 2 release interval
1418+
// before, we will still try to see if there's any chance to release some
1419+
// memory even it doesn't exceed the threshold.
1420+
if (RegionPushedBytesDelta < Region->ReleaseInfo.TryReleaseThreshold) {
1421+
// We want the threshold to have a shorter response time to the variant
1422+
// memory usage patterns. According to data collected during experiments
1423+
// (which were done with 1, 2, 4, 8 intervals), `2` strikes the better
1424+
// balance between the memory usage and number of page release attempts.
1425+
if (DiffSinceLastReleaseNs < 2 * IntervalNs)
13831426
return false;
1384-
}
1427+
} else if (DiffSinceLastReleaseNs < IntervalNs) {
1428+
// In this case, we are over the threshold but we just did some page
1429+
// release in the same release interval. This is a hint that we may want
1430+
// a higher threshold so that we can release more memory at once.
1431+
// `TryReleaseThreshold` will be adjusted according to how many bytes
1432+
// are not released, i.e., the `PendingPushedBytesdelta` here.
1433+
// TODO(chiahungduan): Apply the same adjustment strategy to small
1434+
// blocks.
1435+
if (!isSmallBlock(BlockSize))
1436+
Region->ReleaseInfo.PendingPushedBytesDelta = RegionPushedBytesDelta;
1437+
1438+
// Memory was returned recently.
1439+
return false;
13851440
}
13861441
} // if (ReleaseType == ReleaseToOS::Normal)
13871442

@@ -1397,10 +1452,10 @@ template <typename Config> class SizeClassAllocator64 {
13971452
SinglyLinkedList<BatchGroupT> GroupsToRelease;
13981453

13991454
// We are examining each group and will take the minimum distance to the
1400-
// release threshold as the next Region::TryReleaseThreshold(). Note that if
1401-
// the size of free blocks has reached the release threshold, the distance
1402-
// to the next release will be PageSize * SmallerBlockReleasePageDelta. See
1403-
// the comment on `SmallerBlockReleasePageDelta` for more details.
1455+
// release threshold as the next `TryReleaseThreshold`. Note that if the
1456+
// size of free blocks has reached the release threshold, the distance to
1457+
// the next release will be PageSize * SmallerBlockReleasePageDelta. See the
1458+
// comment on `SmallerBlockReleasePageDelta` for more details.
14041459
uptr MinDistToThreshold = GroupSize;
14051460

14061461
for (BatchGroupT *BG = Region->FreeListInfo.BlockList.front(),
@@ -1548,7 +1603,7 @@ template <typename Config> class SizeClassAllocator64 {
15481603
// back to normal.
15491604
if (MinDistToThreshold == GroupSize)
15501605
MinDistToThreshold = PageSize * SmallerBlockReleasePageDelta;
1551-
Region->TryReleaseThreshold = MinDistToThreshold;
1606+
Region->ReleaseInfo.TryReleaseThreshold = MinDistToThreshold;
15521607
}
15531608

15541609
return GroupsToRelease;

0 commit comments

Comments
 (0)