Skip to content

Commit 5df1577

Browse files
mattweingartenAlexisPerry
authored andcommitted
[Memprof] Adds the option to collect AccessCountHistograms for memprof. (llvm#94264)
Adds compile time flag -mllvm -memprof-histogram and runtime flag histogram=true|false to turn Histogram collection on and off. The -memprof-histogram flag relies on -memprof-use-callbacks=true to work. Updates shadow mapping logic in histogram mode from having one 8 byte counter for 64 bytes, to 1 byte for 8 bytes, capped at 255. Only supports this granularity as of now. Updates the RawMemprofReader and serializing MemoryInfoBlocks to binary format, including changing to a new version of the raw binary format from version 3 to version 4. Updates creating MemoryInfoBlocks with and without Histograms. When two MemoryInfoBlocks are merged, AccessCounts are summed up and the shorter Histogram is removed. Adds a memprof_histogram test case. Initial commit for adding AccessCountHistograms up until RawProfile for memprof
1 parent 599b085 commit 5df1577

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+1097
-75
lines changed

compiler-rt/include/profile/MIBEntryDef.inc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,5 @@ MIBEntryDef(MaxAccessDensity = 22, MaxAccessDensity, uint32_t)
5151
MIBEntryDef(TotalLifetimeAccessDensity = 23, TotalLifetimeAccessDensity, uint64_t)
5252
MIBEntryDef(MinLifetimeAccessDensity = 24, MinLifetimeAccessDensity, uint32_t)
5353
MIBEntryDef(MaxLifetimeAccessDensity = 25, MaxLifetimeAccessDensity, uint32_t)
54+
MIBEntryDef(AccessHistogramSize = 26, AccessHistogramSize, uint32_t)
55+
MIBEntryDef(AccessHistogram = 27, AccessHistogram, uintptr_t)

compiler-rt/include/profile/MemProfData.inc

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,11 @@
3333
(uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129)
3434

3535
// The version number of the raw binary format.
36-
#define MEMPROF_RAW_VERSION 3ULL
36+
#define MEMPROF_RAW_VERSION 4ULL
37+
38+
// Currently supported versions.
39+
#define MEMPROF_RAW_SUPPORTED_VERSIONS \
40+
{ 3ULL, 4ULL }
3741

3842
#define MEMPROF_BUILDID_MAX_SIZE 32ULL
3943

@@ -119,7 +123,8 @@ MemInfoBlock() {
119123
}
120124

121125
MemInfoBlock(uint32_t Size, uint64_t AccessCount, uint32_t AllocTs,
122-
uint32_t DeallocTs, uint32_t AllocCpu, uint32_t DeallocCpu)
126+
uint32_t DeallocTs, uint32_t AllocCpu, uint32_t DeallocCpu,
127+
uintptr_t Histogram, uint32_t HistogramSize)
123128
: MemInfoBlock() {
124129
AllocCount = 1U;
125130
TotalAccessCount = AccessCount;
@@ -149,6 +154,8 @@ MemInfoBlock(uint32_t Size, uint64_t AccessCount, uint32_t AllocTs,
149154
AllocCpuId = AllocCpu;
150155
DeallocCpuId = DeallocCpu;
151156
NumMigratedCpu = AllocCpuId != DeallocCpuId;
157+
AccessHistogramSize = HistogramSize;
158+
AccessHistogram = Histogram;
152159
}
153160

154161
void Merge(const MemInfoBlock &newMIB) {
@@ -194,6 +201,24 @@ void Merge(const MemInfoBlock &newMIB) {
194201
NumSameDeallocCpu += DeallocCpuId == newMIB.DeallocCpuId;
195202
AllocCpuId = newMIB.AllocCpuId;
196203
DeallocCpuId = newMIB.DeallocCpuId;
204+
205+
// For merging histograms, we always keep the longer histogram, and add
206+
// values of shorter histogram to larger one.
207+
uintptr_t ShorterHistogram;
208+
uint32_t ShorterHistogramSize;
209+
if (newMIB.AccessHistogramSize > AccessHistogramSize) {
210+
ShorterHistogram = AccessHistogram;
211+
ShorterHistogramSize = AccessHistogramSize;
212+
// Swap histogram of current to larger histogram
213+
AccessHistogram = newMIB.AccessHistogram;
214+
AccessHistogramSize = newMIB.AccessHistogramSize;
215+
} else {
216+
ShorterHistogram = newMIB.AccessHistogram;
217+
ShorterHistogramSize = newMIB.AccessHistogramSize;
218+
}
219+
for (size_t i = 0; i < ShorterHistogramSize; ++i) {
220+
((uint64_t *)AccessHistogram)[i] += ((uint64_t *)ShorterHistogram)[i];
221+
}
197222
}
198223

199224
#ifdef _MSC_VER

compiler-rt/lib/memprof/memprof_allocator.cpp

Lines changed: 73 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@
3434
#include <sched.h>
3535
#include <time.h>
3636

37+
#define MAX_HISTOGRAM_PRINT_SIZE 32U
38+
39+
extern bool __memprof_histogram;
40+
3741
namespace __memprof {
3842
namespace {
3943
using ::llvm::memprof::MemInfoBlock;
@@ -68,6 +72,14 @@ void Print(const MemInfoBlock &M, const u64 id, bool print_terse) {
6872
"cpu: %u, num same dealloc_cpu: %u\n",
6973
M.NumMigratedCpu, M.NumLifetimeOverlaps, M.NumSameAllocCpu,
7074
M.NumSameDeallocCpu);
75+
Printf("AccessCountHistogram[%u]: ", M.AccessHistogramSize);
76+
uint32_t PrintSize = M.AccessHistogramSize > MAX_HISTOGRAM_PRINT_SIZE
77+
? MAX_HISTOGRAM_PRINT_SIZE
78+
: M.AccessHistogramSize;
79+
for (size_t i = 0; i < PrintSize; ++i) {
80+
Printf("%llu ", ((uint64_t *)M.AccessHistogram)[i]);
81+
}
82+
Printf("\n");
7183
}
7284
}
7385
} // namespace
@@ -216,15 +228,34 @@ u64 GetShadowCount(uptr p, u32 size) {
216228
return count;
217229
}
218230

231+
// Accumulates the access count from the shadow for the given pointer and size.
232+
// See memprof_mapping.h for an overview on histogram counters.
233+
u64 GetShadowCountHistogram(uptr p, u32 size) {
234+
u8 *shadow = (u8 *)HISTOGRAM_MEM_TO_SHADOW(p);
235+
u8 *shadow_end = (u8 *)HISTOGRAM_MEM_TO_SHADOW(p + size);
236+
u64 count = 0;
237+
for (; shadow <= shadow_end; shadow++)
238+
count += *shadow;
239+
return count;
240+
}
241+
219242
// Clears the shadow counters (when memory is allocated).
220243
void ClearShadow(uptr addr, uptr size) {
221244
CHECK(AddrIsAlignedByGranularity(addr));
222245
CHECK(AddrIsInMem(addr));
223246
CHECK(AddrIsAlignedByGranularity(addr + size));
224247
CHECK(AddrIsInMem(addr + size - SHADOW_GRANULARITY));
225248
CHECK(REAL(memset));
226-
uptr shadow_beg = MEM_TO_SHADOW(addr);
227-
uptr shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1;
249+
uptr shadow_beg;
250+
uptr shadow_end;
251+
if (__memprof_histogram) {
252+
shadow_beg = HISTOGRAM_MEM_TO_SHADOW(addr);
253+
shadow_end = HISTOGRAM_MEM_TO_SHADOW(addr + size);
254+
} else {
255+
shadow_beg = MEM_TO_SHADOW(addr);
256+
shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1;
257+
}
258+
228259
if (shadow_end - shadow_beg < common_flags()->clear_shadow_mmap_threshold) {
229260
REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg);
230261
} else {
@@ -279,6 +310,44 @@ struct Allocator {
279310
Print(Value->mib, Key, bool(Arg));
280311
}
281312

313+
// See memprof_mapping.h for an overview on histogram counters.
314+
static MemInfoBlock CreateNewMIB(uptr p, MemprofChunk *m, u64 user_size) {
315+
if (__memprof_histogram) {
316+
return CreateNewMIBWithHistogram(p, m, user_size);
317+
} else {
318+
return CreateNewMIBWithoutHistogram(p, m, user_size);
319+
}
320+
}
321+
322+
static MemInfoBlock CreateNewMIBWithHistogram(uptr p, MemprofChunk *m,
323+
u64 user_size) {
324+
325+
u64 c = GetShadowCountHistogram(p, user_size);
326+
long curtime = GetTimestamp();
327+
uint32_t HistogramSize =
328+
RoundUpTo(user_size, HISTOGRAM_GRANULARITY) / HISTOGRAM_GRANULARITY;
329+
uintptr_t Histogram =
330+
(uintptr_t)InternalAlloc(HistogramSize * sizeof(uint64_t));
331+
memset((void *)Histogram, 0, HistogramSize * sizeof(uint64_t));
332+
for (size_t i = 0; i < HistogramSize; ++i) {
333+
u8 Counter =
334+
*((u8 *)HISTOGRAM_MEM_TO_SHADOW(p + HISTOGRAM_GRANULARITY * i));
335+
((uint64_t *)Histogram)[i] = (uint64_t)Counter;
336+
}
337+
MemInfoBlock newMIB(user_size, c, m->timestamp_ms, curtime, m->cpu_id,
338+
GetCpuId(), Histogram, HistogramSize);
339+
return newMIB;
340+
}
341+
342+
static MemInfoBlock CreateNewMIBWithoutHistogram(uptr p, MemprofChunk *m,
343+
u64 user_size) {
344+
u64 c = GetShadowCount(p, user_size);
345+
long curtime = GetTimestamp();
346+
MemInfoBlock newMIB(user_size, c, m->timestamp_ms, curtime, m->cpu_id,
347+
GetCpuId(), 0, 0);
348+
return newMIB;
349+
}
350+
282351
void FinishAndWrite() {
283352
if (print_text && common_flags()->print_module_map)
284353
DumpProcessMap();
@@ -319,10 +388,7 @@ struct Allocator {
319388
if (!m)
320389
return;
321390
uptr user_beg = ((uptr)m) + kChunkHeaderSize;
322-
u64 c = GetShadowCount(user_beg, user_requested_size);
323-
long curtime = GetTimestamp();
324-
MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
325-
m->cpu_id, GetCpuId());
391+
MemInfoBlock newMIB = CreateNewMIB(user_beg, m, user_requested_size);
326392
InsertOrMerge(m->alloc_context_id, newMIB, A->MIBMap);
327393
},
328394
this);
@@ -451,11 +517,7 @@ struct Allocator {
451517
atomic_exchange(&m->user_requested_size, 0, memory_order_acquire);
452518
if (memprof_inited && atomic_load_relaxed(&constructed) &&
453519
!atomic_load_relaxed(&destructing)) {
454-
u64 c = GetShadowCount(p, user_requested_size);
455-
long curtime = GetTimestamp();
456-
457-
MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
458-
m->cpu_id, GetCpuId());
520+
MemInfoBlock newMIB = this->CreateNewMIB(p, m, user_requested_size);
459521
InsertOrMerge(m->alloc_context_id, newMIB, MIBMap);
460522
}
461523

compiler-rt/lib/memprof/memprof_flags.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,4 @@ MEMPROF_FLAG(bool, allocator_frees_and_returns_null_on_realloc_zero, true,
3838
MEMPROF_FLAG(bool, print_text, false,
3939
"If set, prints the heap profile in text format. Else use the raw binary serialization format.")
4040
MEMPROF_FLAG(bool, print_terse, false,
41-
"If set, prints memory profile in a terse format. Only applicable if print_text = true.")
41+
"If set, prints memory profile in a terse format. Only applicable if print_text = true.")

compiler-rt/lib/memprof/memprof_mapping.h

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ static const u64 kDefaultShadowScale = 3;
2222

2323
#define SHADOW_GRANULARITY (1ULL << SHADOW_SCALE)
2424
#define MEMPROF_ALIGNMENT 32
25-
2625
namespace __memprof {
2726

2827
extern uptr kHighMemEnd; // Initialized in __memprof_init.
@@ -37,6 +36,34 @@ extern uptr kHighMemEnd; // Initialized in __memprof_init.
3736
#define MEM_TO_SHADOW(mem) \
3837
((((mem) & SHADOW_MASK) >> SHADOW_SCALE) + (SHADOW_OFFSET))
3938

39+
// Histogram shadow memory is laid different to the standard configuration:
40+
41+
// 8 bytes
42+
// +---+---+---+ +---+---+---+ +---+---+---+
43+
// Memory | a | | b | | c |
44+
// +---+---+---+ +---+---+---+ +---+---+---+
45+
46+
// +---+ +---+ +---+
47+
// Shadow | a | | b | | c |
48+
// +---+ +---+ +---+
49+
// 1 byte
50+
//
51+
// Where we have a 1 byte counter for each 8 bytes. HISTOGRAM_MEM_TO_SHADOW
52+
// translates a memory address to the address of its corresponding shadow
53+
// counter memory address. The same data is still provided in MIB whether
54+
// histograms are used or not. Total access counts per allocations are
55+
// computed by summing up all individual 1 byte counters. This can incur an
56+
// accuracy penalty.
57+
58+
#define HISTOGRAM_GRANULARITY 8U
59+
60+
#define HISTOGRAM_MAX_COUNTER 255U
61+
62+
#define HISTOGRAM_SHADOW_MASK ~(HISTOGRAM_GRANULARITY - 1)
63+
64+
#define HISTOGRAM_MEM_TO_SHADOW(mem) \
65+
((((mem) & HISTOGRAM_SHADOW_MASK) >> SHADOW_SCALE) + (SHADOW_OFFSET))
66+
4067
#define SHADOW_ENTRY_SIZE (MEM_GRANULARITY >> SHADOW_SCALE)
4168

4269
#define kLowMemBeg 0
@@ -108,6 +135,14 @@ inline void RecordAccess(uptr a) {
108135
(*shadow_address)++;
109136
}
110137

138+
inline void RecordAccessHistogram(uptr a) {
139+
CHECK_EQ(SHADOW_ENTRY_SIZE, 8);
140+
u8 *shadow_address = (u8 *)HISTOGRAM_MEM_TO_SHADOW(a);
141+
if (*shadow_address < HISTOGRAM_MAX_COUNTER) {
142+
(*shadow_address)++;
143+
}
144+
}
145+
111146
} // namespace __memprof
112147

113148
#endif // MEMPROF_MAPPING_H

compiler-rt/lib/memprof/memprof_mibmap.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,18 @@ void InsertOrMerge(const uptr Id, const MemInfoBlock &Block, MIBMapTy &Map) {
3030
} else {
3131
LockedMemInfoBlock *lmib = *h;
3232
SpinMutexLock lock(&lmib->mutex);
33+
uintptr_t ShorterHistogram;
34+
if (Block.AccessHistogramSize > lmib->mib.AccessHistogramSize)
35+
ShorterHistogram = lmib->mib.AccessHistogram;
36+
else
37+
ShorterHistogram = Block.AccessHistogram;
38+
3339
lmib->mib.Merge(Block);
40+
// The larger histogram is kept and the shorter histogram is discarded after
41+
// adding the counters to the larger historam. Free only the shorter
42+
// Histogram
43+
if (Block.AccessHistogramSize > 0 || lmib->mib.AccessHistogramSize > 0)
44+
InternalFree((void *)ShorterHistogram);
3445
}
3546
}
3647

compiler-rt/lib/memprof/memprof_rawprofile.cpp

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -146,24 +146,38 @@ void SerializeStackToBuffer(const Vector<u64> &StackIds,
146146
// ---------- MIB Entry 0
147147
// Alloc Count
148148
// ...
149+
// ---- AccessHistogram Entry 0
150+
// ...
151+
// ---- AccessHistogram Entry AccessHistogramSize - 1
149152
// ---------- MIB Entry 1
150153
// Alloc Count
151154
// ...
155+
// ---- AccessHistogram Entry 0
156+
// ...
157+
// ---- AccessHistogram Entry AccessHistogramSize - 1
152158
// ----------
153159
void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds,
154160
const u64 ExpectedNumBytes, char *&Buffer) {
155161
char *Ptr = Buffer;
156162
const u64 NumEntries = StackIds.Size();
157163
Ptr = WriteBytes(NumEntries, Ptr);
158-
159164
for (u64 i = 0; i < NumEntries; i++) {
160165
const u64 Key = StackIds[i];
161166
MIBMapTy::Handle h(&MIBMap, Key, /*remove=*/true, /*create=*/false);
162167
CHECK(h.exists());
163168
Ptr = WriteBytes(Key, Ptr);
169+
// FIXME: We unnecessarily serialize the AccessHistogram pointer. Adding a
170+
// serialization schema will fix this issue. See also FIXME in
171+
// deserialization.
164172
Ptr = WriteBytes((*h)->mib, Ptr);
173+
for (u64 j = 0; j < (*h)->mib.AccessHistogramSize; ++j) {
174+
u64 HistogramEntry = ((u64 *)((*h)->mib.AccessHistogram))[j];
175+
Ptr = WriteBytes(HistogramEntry, Ptr);
176+
}
177+
if ((*h)->mib.AccessHistogramSize > 0) {
178+
InternalFree((void *)((*h)->mib.AccessHistogram));
179+
}
165180
}
166-
167181
CHECK(ExpectedNumBytes >= static_cast<u64>(Ptr - Buffer) &&
168182
"Expected num bytes != actual bytes written");
169183
}
@@ -192,7 +206,15 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds,
192206
// ---------- MIB Entry
193207
// Alloc Count
194208
// ...
195-
// ----------
209+
// ---- AccessHistogram Entry 0
210+
// ...
211+
// ---- AccessHistogram Entry AccessHistogramSize - 1
212+
// ---------- MIB Entry 1
213+
// Alloc Count
214+
// ...
215+
// ---- AccessHistogram Entry 0
216+
// ...
217+
// ---- AccessHistogram Entry AccessHistogramSize - 1
196218
// Optional Padding Bytes
197219
// ---------- Stack Info
198220
// Num Entries
@@ -218,13 +240,26 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, ArrayRef<LoadedModule> Modules,
218240
const u64 NumMIBInfoBytes = RoundUpTo(
219241
sizeof(u64) + StackIds.Size() * (sizeof(u64) + sizeof(MemInfoBlock)), 8);
220242

243+
// Get Number of AccessHistogram entries in total
244+
u64 TotalAccessHistogramEntries = 0;
245+
MIBMap.ForEach(
246+
[](const uptr Key, UNUSED LockedMemInfoBlock *const &MIB, void *Arg) {
247+
u64 *TotalAccessHistogramEntries = (u64 *)Arg;
248+
*TotalAccessHistogramEntries += MIB->mib.AccessHistogramSize;
249+
},
250+
reinterpret_cast<void *>(&TotalAccessHistogramEntries));
251+
const u64 NumHistogramBytes =
252+
RoundUpTo(TotalAccessHistogramEntries * sizeof(uint64_t), 8);
253+
221254
const u64 NumStackBytes = RoundUpTo(StackSizeBytes(StackIds), 8);
222255

223256
// Ensure that the profile is 8b aligned. We allow for some optional padding
224257
// at the end so that any subsequent profile serialized to the same file does
225258
// not incur unaligned accesses.
226-
const u64 TotalSizeBytes = RoundUpTo(
227-
sizeof(Header) + NumSegmentBytes + NumStackBytes + NumMIBInfoBytes, 8);
259+
const u64 TotalSizeBytes =
260+
RoundUpTo(sizeof(Header) + NumSegmentBytes + NumStackBytes +
261+
NumMIBInfoBytes + NumHistogramBytes,
262+
8);
228263

229264
// Allocate the memory for the entire buffer incl. info blocks.
230265
Buffer = (char *)InternalAlloc(TotalSizeBytes);
@@ -235,14 +270,16 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, ArrayRef<LoadedModule> Modules,
235270
static_cast<u64>(TotalSizeBytes),
236271
sizeof(Header),
237272
sizeof(Header) + NumSegmentBytes,
238-
sizeof(Header) + NumSegmentBytes + NumMIBInfoBytes};
273+
sizeof(Header) + NumSegmentBytes + NumMIBInfoBytes +
274+
NumHistogramBytes};
239275
Ptr = WriteBytes(header, Ptr);
240276

241277
SerializeSegmentsToBuffer(Modules, NumSegmentBytes, Ptr);
242278
Ptr += NumSegmentBytes;
243279

244-
SerializeMIBInfoToBuffer(MIBMap, StackIds, NumMIBInfoBytes, Ptr);
245-
Ptr += NumMIBInfoBytes;
280+
SerializeMIBInfoToBuffer(MIBMap, StackIds,
281+
NumMIBInfoBytes + NumHistogramBytes, Ptr);
282+
Ptr += NumMIBInfoBytes + NumHistogramBytes;
246283

247284
SerializeStackToBuffer(StackIds, NumStackBytes, Ptr);
248285

0 commit comments

Comments
 (0)