Skip to content

Commit 5c245ff

Browse files
authored
gh-132917: Check resident set size (RSS) before GC trigger. (gh-133399)
For the free-threaded build, check the process resident set size (RSS) increase before triggering a full automatic garbage collection. If the RSS has not increased 10% since the last collection then it is deferred.
1 parent 8e08ac9 commit 5c245ff

File tree

4 files changed

+230
-5
lines changed

4 files changed

+230
-5
lines changed

Doc/library/gc.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,11 @@ The :mod:`gc` module provides the following functions:
128128
starts. For each collection, all the objects in the young generation and some
129129
fraction of the old generation is collected.
130130

131+
In the free-threaded build, the increase in process resident set size (RSS)
132+
is also checked before running the collector. If the RSS has not increased
133+
by 10% since the last collection and the net number of object allocations
134+
has not exceeded 40 times *threshold0*, the collection is not run.
135+
131136
The fraction of the old generation that is collected is **inversely** proportional
132137
to *threshold1*. The larger *threshold1* is, the slower objects in the old generation
133138
are collected.

Include/internal/pycore_interp_structs.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,16 @@ struct _gc_runtime_state {
245245

246246
/* True if gc.freeze() has been used. */
247247
int freeze_active;
248+
249+
/* Resident set size (RSS) of the process after last GC. */
250+
Py_ssize_t last_rss;
251+
252+
/* This accumulates the new object count whenever collection is deferred
253+
due to the RSS increase condition not being meet. Reset on collection. */
254+
Py_ssize_t deferred_count;
255+
256+
/* Mutex held for gc_should_collect_rss(). */
257+
PyMutex mutex;
248258
#endif
249259
};
250260

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
For the free-threaded build, check the process resident set size (RSS)
2+
increase before triggering a full automatic garbage collection. If the RSS
3+
has not increased 10% since the last collection then it is deferred.

Python/gc_free_threading.c

Lines changed: 212 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,29 @@
1717

1818
#include "pydtrace.h"
1919

20+
// Platform-specific includes for get_current_rss().
21+
#ifdef _WIN32
22+
#include <windows.h>
23+
#include <psapi.h> // For GetProcessMemoryInfo
24+
#elif defined(__linux__)
25+
#include <unistd.h> // For sysconf, getpid
26+
#elif defined(__APPLE__)
27+
#include <mach/mach.h>
28+
#include <unistd.h> // For sysconf, getpid
29+
#elif defined(__FreeBSD__)
30+
#include <sys/types.h>
31+
#include <sys/sysctl.h>
32+
#include <sys/user.h> // Requires sys/user.h for kinfo_proc definition
33+
#include <kvm.h>
34+
#include <unistd.h> // For sysconf, getpid
35+
#include <fcntl.h> // For O_RDONLY
36+
#include <limits.h> // For _POSIX2_LINE_MAX
37+
#elif defined(__OpenBSD__)
38+
#include <sys/types.h>
39+
#include <sys/sysctl.h>
40+
#include <sys/user.h> // For kinfo_proc
41+
#include <unistd.h> // For sysconf, getpid
42+
#endif
2043

2144
// enable the "mark alive" pass of GC
2245
#define GC_ENABLE_MARK_ALIVE 1
@@ -1878,6 +1901,180 @@ cleanup_worklist(struct worklist *worklist)
18781901
}
18791902
}
18801903

1904+
// Return the current resident set size (RSS) of the process, in units of KB.
1905+
// Returns -1 if this operation is not supported or on failure.
1906+
static Py_ssize_t
1907+
get_current_rss(void)
1908+
{
1909+
#ifdef _WIN32
1910+
// Windows implementation using GetProcessMemoryInfo
1911+
PROCESS_MEMORY_COUNTERS pmc;
1912+
HANDLE hProcess = GetCurrentProcess();
1913+
if (NULL == hProcess) {
1914+
// Should not happen for the current process
1915+
return -1;
1916+
}
1917+
1918+
// GetProcessMemoryInfo returns non-zero on success
1919+
if (GetProcessMemoryInfo(hProcess, &pmc, sizeof(pmc))) {
1920+
// pmc.WorkingSetSize is in bytes. Convert to KB.
1921+
return (Py_ssize_t)(pmc.WorkingSetSize / 1024);
1922+
}
1923+
else {
1924+
return -1;
1925+
}
1926+
1927+
#elif __linux__
1928+
// Linux implementation using /proc/self/statm
1929+
long page_size_bytes = sysconf(_SC_PAGE_SIZE);
1930+
if (page_size_bytes <= 0) {
1931+
return -1;
1932+
}
1933+
1934+
FILE *fp = fopen("/proc/self/statm", "r");
1935+
if (fp == NULL) {
1936+
return -1;
1937+
}
1938+
1939+
// Second number is resident size in pages
1940+
long rss_pages;
1941+
if (fscanf(fp, "%*d %ld", &rss_pages) != 1) {
1942+
fclose(fp);
1943+
return -1;
1944+
}
1945+
fclose(fp);
1946+
1947+
// Sanity check
1948+
if (rss_pages < 0 || rss_pages > 1000000000) {
1949+
return -1;
1950+
}
1951+
1952+
// Convert unit to KB
1953+
return (Py_ssize_t)rss_pages * (page_size_bytes / 1024);
1954+
1955+
#elif defined(__APPLE__)
1956+
// --- MacOS (Darwin) ---
1957+
mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT;
1958+
mach_task_basic_info_data_t info;
1959+
kern_return_t kerr;
1960+
1961+
kerr = task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &count);
1962+
if (kerr != KERN_SUCCESS) {
1963+
return -1;
1964+
}
1965+
// info.resident_size is in bytes. Convert to KB.
1966+
return (Py_ssize_t)(info.resident_size / 1024);
1967+
1968+
#elif defined(__FreeBSD__)
1969+
long page_size_kb = sysconf(_SC_PAGESIZE) / 1024;
1970+
if (page_size_kb <= 0) {
1971+
return -1;
1972+
}
1973+
1974+
// Using /dev/null for vmcore avoids needing dump file.
1975+
// NULL for kernel file uses running kernel.
1976+
char errbuf[_POSIX2_LINE_MAX]; // For kvm error messages
1977+
kvm_t *kd = kvm_openfiles(NULL, "/dev/null", NULL, O_RDONLY, errbuf);
1978+
if (kd == NULL) {
1979+
return -1;
1980+
}
1981+
1982+
// KERN_PROC_PID filters for the specific process ID
1983+
// n_procs will contain the number of processes returned (should be 1 or 0)
1984+
pid_t pid = getpid();
1985+
int n_procs;
1986+
struct kinfo_proc *kp = kvm_getprocs(kd, KERN_PROC_PID, pid, &n_procs);
1987+
if (kp == NULL) {
1988+
kvm_close(kd);
1989+
return -1;
1990+
}
1991+
1992+
Py_ssize_t rss_kb = -1;
1993+
if (n_procs > 0) {
1994+
// kp[0] contains the info for our process
1995+
// ki_rssize is in pages. Convert to KB.
1996+
rss_kb = (Py_ssize_t)kp->ki_rssize * page_size_kb;
1997+
}
1998+
else {
1999+
// Process with PID not found, shouldn't happen for self.
2000+
rss_kb = -1;
2001+
}
2002+
2003+
kvm_close(kd);
2004+
return rss_kb;
2005+
2006+
#elif defined(__OpenBSD__)
2007+
long page_size_kb = sysconf(_SC_PAGESIZE) / 1024;
2008+
if (page_size_kb <= 0) {
2009+
return -1;
2010+
}
2011+
2012+
struct kinfo_proc kp;
2013+
pid_t pid = getpid();
2014+
int mib[6];
2015+
size_t len = sizeof(kp);
2016+
2017+
mib[0] = CTL_KERN;
2018+
mib[1] = KERN_PROC;
2019+
mib[2] = KERN_PROC_PID;
2020+
mib[3] = pid;
2021+
mib[4] = sizeof(struct kinfo_proc); // size of the structure we want
2022+
mib[5] = 1; // want 1 structure back
2023+
if (sysctl(mib, 6, &kp, &len, NULL, 0) == -1) {
2024+
return -1;
2025+
}
2026+
2027+
if (len > 0) {
2028+
// p_vm_rssize is in pages on OpenBSD. Convert to KB.
2029+
return (Py_ssize_t)kp.p_vm_rssize * page_size_kb;
2030+
}
2031+
else {
2032+
// Process info not returned
2033+
return -1;
2034+
}
2035+
#else
2036+
// Unsupported platform
2037+
return -1;
2038+
#endif
2039+
}
2040+
2041+
static bool
2042+
gc_should_collect_rss(GCState *gcstate)
2043+
{
2044+
Py_ssize_t rss = get_current_rss();
2045+
if (rss < 0) {
2046+
// Reading RSS is not support or failed.
2047+
return true;
2048+
}
2049+
int threshold = gcstate->young.threshold;
2050+
Py_ssize_t deferred = _Py_atomic_load_ssize_relaxed(&gcstate->deferred_count);
2051+
if (deferred > threshold * 40) {
2052+
// Too many new container objects since last GC, even though RSS
2053+
// might not have increased much. This is intended to avoid resource
2054+
// exhaustion if some objects consume resources but don't result in a
2055+
// RSS increase. We use 40x as the factor here because older versions
2056+
// of Python would do full collections after roughly every 70,000 new
2057+
// container objects.
2058+
return true;
2059+
}
2060+
Py_ssize_t last_rss = gcstate->last_rss;
2061+
Py_ssize_t rss_threshold = Py_MAX(last_rss / 10, 128);
2062+
if ((rss - last_rss) > rss_threshold) {
2063+
// The RSS has increased too much, do a collection.
2064+
return true;
2065+
}
2066+
else {
2067+
// The RSS has not increased enough, defer the collection and clear
2068+
// the young object count so we don't check RSS again on the next call
2069+
// to gc_should_collect().
2070+
PyMutex_Lock(&gcstate->mutex);
2071+
gcstate->deferred_count += gcstate->young.count;
2072+
gcstate->young.count = 0;
2073+
PyMutex_Unlock(&gcstate->mutex);
2074+
return false;
2075+
}
2076+
}
2077+
18812078
static bool
18822079
gc_should_collect(GCState *gcstate)
18832080
{
@@ -1887,11 +2084,17 @@ gc_should_collect(GCState *gcstate)
18872084
if (count <= threshold || threshold == 0 || !gc_enabled) {
18882085
return false;
18892086
}
1890-
// Avoid quadratic behavior by scaling threshold to the number of live
1891-
// objects. A few tests rely on immediate scheduling of the GC so we ignore
1892-
// the scaled threshold if generations[1].threshold is set to zero.
1893-
return (count > gcstate->long_lived_total / 4 ||
1894-
gcstate->old[0].threshold == 0);
2087+
if (gcstate->old[0].threshold == 0) {
2088+
// A few tests rely on immediate scheduling of the GC so we ignore the
2089+
// extra conditions if generations[1].threshold is set to zero.
2090+
return true;
2091+
}
2092+
if (count < gcstate->long_lived_total / 4) {
2093+
// Avoid quadratic behavior by scaling threshold to the number of live
2094+
// objects.
2095+
return false;
2096+
}
2097+
return gc_should_collect_rss(gcstate);
18952098
}
18962099

18972100
static void
@@ -1940,6 +2143,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
19402143
}
19412144

19422145
state->gcstate->young.count = 0;
2146+
state->gcstate->deferred_count = 0;
19432147
for (int i = 1; i <= generation; ++i) {
19442148
state->gcstate->old[i-1].count = 0;
19452149
}
@@ -2033,6 +2237,9 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
20332237
// to be freed.
20342238
delete_garbage(state);
20352239

2240+
// Store the current RSS, possibly smaller now that we deleted garbage.
2241+
state->gcstate->last_rss = get_current_rss();
2242+
20362243
// Append objects with legacy finalizers to the "gc.garbage" list.
20372244
handle_legacy_finalizers(state);
20382245
}

0 commit comments

Comments
 (0)