Skip to content

Commit 92481af

Browse files
authored
Merge pull request #257 from P403n1x87/chore/small-cleanups
chore: small cleanups
2 parents aafca01 + 4ee62ac commit 92481af

File tree

3 files changed

+72
-73
lines changed

3 files changed

+72
-73
lines changed

src/linux/analyze_elf.h

+6-6
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ _get_base_64(Elf64_Ehdr* ehdr, void* elf_map) {
1010
} /* _get_base_64 */
1111

1212
static int
13-
_py_proc__analyze_elf64(py_proc_t* self, void* elf_map, void* elf_base) {
13+
_py_proc__analyze_elf64(py_proc_t* self, void* elf_map, void* elf_base, proc_vm_map_block_t* bss) {
1414
register int symbols = 0;
1515

1616
Elf64_Ehdr* ehdr = elf_map;
@@ -29,7 +29,7 @@ _py_proc__analyze_elf64(py_proc_t* self, void* elf_map, void* elf_base) {
2929
size_t bss_size = 0;
3030

3131
if (base != UINT64_MAX) {
32-
log_d("Base @ %p", base);
32+
log_d("ELF base @ %p", base);
3333

3434
for (Elf64_Off sh_off = ehdr->e_shoff; sh_off < elf_map_size; sh_off += ehdr->e_shentsize) {
3535
p_shdr = (Elf64_Shdr*)(elf_map + sh_off);
@@ -45,7 +45,7 @@ _py_proc__analyze_elf64(py_proc_t* self, void* elf_map, void* elf_base) {
4545
}
4646
}
4747

48-
if (p_dynsym != NULL) {
48+
if (isvalid(p_dynsym)) {
4949
if (p_dynsym->sh_offset != 0) {
5050
Elf64_Shdr* p_strtabsh = (Elf64_Shdr*)(elf_map + ELF_SH_OFF(ehdr, p_dynsym->sh_link));
5151

@@ -71,9 +71,9 @@ _py_proc__analyze_elf64(py_proc_t* self, void* elf_map, void* elf_base) {
7171
}
7272

7373
// Communicate BSS data back to the caller
74-
self->map.bss.base = bss_base;
75-
self->map.bss.size = bss_size;
76-
log_d("BSS @ %p (size %x, offset %x)", self->map.bss.base, self->map.bss.size, self->map.bss.base - elf_base);
74+
bss->base = bss_base;
75+
bss->size = bss_size;
76+
log_d("BSS @ %p (size %x, offset %x)", bss_base, bss_size, bss_base - elf_base);
7777

7878
SUCCESS;
7979
} /* _py_proc__analyze_elf64 */

src/linux/py_proc.h

+58-53
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ _get_base_64(Elf64_Ehdr* ehdr, void* elf_map) {
101101
} /* _get_base_64 */
102102

103103
static int
104-
_py_proc__analyze_elf64(py_proc_t* self, void* elf_map, void* elf_base) {
104+
_py_proc__analyze_elf64(py_proc_t* self, void* elf_map, void* elf_base, proc_vm_map_block_t* bss) {
105105
register int symbols = 0;
106106

107107
Elf64_Ehdr* ehdr = elf_map;
@@ -120,7 +120,7 @@ _py_proc__analyze_elf64(py_proc_t* self, void* elf_map, void* elf_base) {
120120
size_t bss_size = 0;
121121

122122
if (base != UINT64_MAX) {
123-
log_d("Base @ %p", base);
123+
log_d("ELF base @ %p", base);
124124

125125
for (Elf64_Off sh_off = ehdr->e_shoff; sh_off < elf_map_size; sh_off += ehdr->e_shentsize) {
126126
p_shdr = (Elf64_Shdr*)(elf_map + sh_off);
@@ -136,7 +136,7 @@ _py_proc__analyze_elf64(py_proc_t* self, void* elf_map, void* elf_base) {
136136
}
137137
}
138138

139-
if (p_dynsym != NULL) {
139+
if (isvalid(p_dynsym)) {
140140
if (p_dynsym->sh_offset != 0) {
141141
Elf64_Shdr* p_strtabsh = (Elf64_Shdr*)(elf_map + ELF_SH_OFF(ehdr, p_dynsym->sh_link));
142142

@@ -162,9 +162,9 @@ _py_proc__analyze_elf64(py_proc_t* self, void* elf_map, void* elf_base) {
162162
}
163163

164164
// Communicate BSS data back to the caller
165-
self->map.bss.base = bss_base;
166-
self->map.bss.size = bss_size;
167-
log_d("BSS @ %p (size %x, offset %x)", self->map.bss.base, self->map.bss.size, self->map.bss.base - elf_base);
165+
bss->base = bss_base;
166+
bss->size = bss_size;
167+
log_d("BSS @ %p (size %x, offset %x)", bss_base, bss_size, bss_base - elf_base);
168168

169169
SUCCESS;
170170
} /* _py_proc__analyze_elf64 */
@@ -181,7 +181,7 @@ _get_base_32(Elf32_Ehdr* ehdr, void* elf_map) {
181181
} /* _get_base_32 */
182182

183183
static int
184-
_py_proc__analyze_elf32(py_proc_t* self, void* elf_map, void* elf_base) {
184+
_py_proc__analyze_elf32(py_proc_t* self, void* elf_map, void* elf_base, proc_vm_map_block_t* bss) {
185185
register int symbols = 0;
186186

187187
Elf32_Ehdr* ehdr = elf_map;
@@ -200,7 +200,7 @@ _py_proc__analyze_elf32(py_proc_t* self, void* elf_map, void* elf_base) {
200200
size_t bss_size = 0;
201201

202202
if (base != UINT32_MAX) {
203-
log_d("Base @ %p", base);
203+
log_d("ELF base @ %p", base);
204204

205205
for (Elf32_Off sh_off = ehdr->e_shoff; sh_off < elf_map_size; sh_off += ehdr->e_shentsize) {
206206
p_shdr = (Elf32_Shdr*)(elf_map + sh_off);
@@ -216,7 +216,7 @@ _py_proc__analyze_elf32(py_proc_t* self, void* elf_map, void* elf_base) {
216216
}
217217
}
218218

219-
if (p_dynsym != NULL) {
219+
if (isvalid(p_dynsym)) {
220220
if (p_dynsym->sh_offset != 0) {
221221
Elf32_Shdr* p_strtabsh = (Elf32_Shdr*)(elf_map + ELF_SH_OFF(ehdr, p_dynsym->sh_link));
222222

@@ -242,9 +242,9 @@ _py_proc__analyze_elf32(py_proc_t* self, void* elf_map, void* elf_base) {
242242
}
243243

244244
// Communicate BSS data back to the caller
245-
self->map.bss.base = bss_base;
246-
self->map.bss.size = bss_size;
247-
log_d("BSS @ %p (size %x, offset %x)", self->map.bss.base, self->map.bss.size, self->map.bss.base - elf_base);
245+
bss->base = bss_base;
246+
bss->size = bss_size;
247+
log_d("BSS @ %p (size %x, offset %x)", bss_base, bss_size, bss_base - elf_base);
248248

249249
SUCCESS;
250250
} /* _py_proc__analyze_elf32 */
@@ -259,7 +259,7 @@ _elf_check(Elf64_Ehdr* ehdr) {
259259

260260
// ----------------------------------------------------------------------------
261261
static int
262-
_py_proc__analyze_elf(py_proc_t* self, char* path, void* elf_base) {
262+
_py_proc__analyze_elf(py_proc_t* self, char* path, void* elf_base, proc_vm_map_block_t* bss) {
263263
cu_fd fd = open(path, O_RDONLY);
264264
if (fd == -1) {
265265
log_e("Cannot open binary file %s", path);
@@ -299,11 +299,11 @@ _py_proc__analyze_elf(py_proc_t* self, char* path, void* elf_base) {
299299
switch (ehdr->e_ident[EI_CLASS]) {
300300
case ELFCLASS64:
301301
log_d("%s is 64-bit ELF", path);
302-
return _py_proc__analyze_elf64(self, binary_map->addr, elf_base);
302+
return _py_proc__analyze_elf64(self, binary_map->addr, elf_base, bss);
303303

304304
case ELFCLASS32:
305305
log_d("%s is 32-bit ELF", path);
306-
return _py_proc__analyze_elf32(self, binary_map->addr, elf_base);
306+
return _py_proc__analyze_elf32(self, binary_map->addr, elf_base, bss);
307307

308308
default:
309309
log_e("%s has invalid ELF class", path);
@@ -314,9 +314,10 @@ _py_proc__analyze_elf(py_proc_t* self, char* path, void* elf_base) {
314314

315315
// ----------------------------------------------------------------------------
316316
static int
317-
_py_proc__parse_maps_file(py_proc_t* self) {
318-
int maps_flag = 0;
319-
struct vm_map* map = NULL;
317+
_py_proc__inspect_vm_maps(py_proc_t* self) {
318+
int maps_flag = 0;
319+
struct vm_map* map = NULL;
320+
proc_vm_map_block_t bss;
320321

321322
cu_proc_map_t* proc_maps = proc_map_new(self->pid);
322323
if (!isvalid(proc_maps)) {
@@ -376,10 +377,10 @@ _py_proc__parse_maps_file(py_proc_t* self) {
376377
map->file_size = _file_size(map->path);
377378
map->base = first_binary_map->address;
378379
map->size = first_binary_map->size;
379-
map->has_symbols = success(_py_proc__analyze_elf(self, map->path, map->base));
380+
map->has_symbols = success(_py_proc__analyze_elf(self, map->path, map->base, &bss));
380381
if (map->has_symbols) {
381-
map->bss_base = self->map.bss.base;
382-
map->bss_size = self->map.bss.size;
382+
map->bss_base = bss.base;
383+
map->bss_size = bss.size;
383384
}
384385
log_d("Binary path: %s (symbols: %d)", map->path, map->has_symbols);
385386

@@ -413,7 +414,7 @@ _py_proc__parse_maps_file(py_proc_t* self) {
413414

414415
proc_map_t* first_lib_map = proc_map__first_submatch(proc_maps, LIB_NEEDLE);
415416
if (isvalid(first_lib_map)) {
416-
if (success(_py_proc__analyze_elf(self, first_lib_map->pathname, first_lib_map->address))) {
417+
if (success(_py_proc__analyze_elf(self, first_lib_map->pathname, first_lib_map->address, &bss))) {
417418
// The library binary has symbols
418419
map = &(pd->maps[MAP_LIBSYM]);
419420

@@ -427,8 +428,8 @@ _py_proc__parse_maps_file(py_proc_t* self) {
427428
map->base = first_lib_map->address;
428429
map->size = first_lib_map->size;
429430
map->has_symbols = TRUE;
430-
map->bss_base = self->map.bss.base;
431-
map->bss_size = self->map.bss.size;
431+
map->bss_base = bss.base;
432+
map->bss_size = bss.size;
432433

433434
log_d("Library path: %s (with symbols)", map->path);
434435
} else {
@@ -512,7 +513,7 @@ _py_proc__parse_maps_file(py_proc_t* self) {
512513
log_d("VM maps parsing result: bin=%s lib=%s flags=%d", self->bin_path, self->lib_path, maps_flag);
513514

514515
SUCCESS;
515-
} /* _py_proc__parse_maps_file */
516+
} /* _py_proc__inspect_vm_maps */
516517

517518
// ----------------------------------------------------------------------------
518519
static ssize_t
@@ -536,21 +537,21 @@ _py_proc__get_resident_memory(py_proc_t* self) {
536537

537538
#ifdef NATIVE
538539
// ----------------------------------------------------------------------------
540+
#define RANGES_MAX 256
541+
539542
char pathname[1024];
540543
char prevpathname[1024];
541-
vm_range_t* ranges[256];
544+
vm_range_t* ranges[RANGES_MAX];
542545

543546
static int
544547
_py_proc__get_vm_maps(py_proc_t* self) {
545-
cu_FILE* fp = NULL;
546-
cu_char* line = NULL;
547-
size_t len = 0;
548548
vm_range_tree_t* tree = NULL;
549549
hash_table_t* table = NULL;
550+
cu_proc_map_t* maps = NULL;
550551

551552
if (pargs.where) {
552553
tree = vm_range_tree_new();
553-
table = hash_table_new(256);
554+
table = hash_table_new(RANGES_MAX);
554555

555556
vm_range_tree__destroy(self->maps_tree);
556557
hash_table__destroy(self->base_table);
@@ -559,36 +560,40 @@ _py_proc__get_vm_maps(py_proc_t* self) {
559560
self->base_table = table;
560561
}
561562

562-
fp = _procfs(self->pid, "maps");
563-
if (!isvalid(fp)) {
563+
maps = proc_map_new(self->pid);
564+
if (!isvalid(maps)) {
564565
set_error(EPROC);
565566
FAIL;
566567
}
567568

568569
log_d("Rebuilding vm ranges tree");
569570

570571
int nrange = 0;
571-
while (getline(&line, &len, fp) != -1 && nrange < 256) {
572-
ssize_t lower, upper;
573-
574-
if (sscanf(
575-
line, ADDR_FMT "-" ADDR_FMT " %*s %*x %*x:%*x %*x %s\n", &lower, &upper, // Map bounds
576-
pathname // Binary path
577-
) == 3
578-
&& pathname[0] != '[') {
579-
if (pargs.where) {
580-
if (strcmp(pathname, prevpathname)) {
581-
ranges[nrange++] = vm_range_new(lower, upper, strdup(pathname));
582-
key_dt key = string__hash(pathname);
583-
if (!isvalid(hash_table__get(table, key)))
584-
hash_table__set(table, key, (value_t)lower);
585-
strcpy(prevpathname, pathname);
586-
} else
587-
ranges[nrange - 1]->hi = upper;
572+
PROC_MAP_ITER(maps, m) {
573+
if (nrange >= RANGES_MAX) {
574+
log_e("Too many ranges");
575+
break;
576+
}
577+
578+
if (!isvalid(m->pathname))
579+
continue;
580+
581+
if (pargs.where) {
582+
if (strcmp(m->pathname, prevpathname)) {
583+
ranges[nrange++]
584+
= vm_range_new((addr_t)m->address, ((addr_t)m->address) + m->size, strdup(m->pathname));
585+
key_dt key = string__hash(m->pathname);
586+
if (!isvalid(hash_table__get(table, key)))
587+
hash_table__set(table, key, (value_t)m->address);
588+
strcpy(prevpathname, m->pathname);
588589
} else
589-
// We print the maps instead so that we can resolve them later and use
590-
// the CPU more efficiently to collect samples.
591-
emit_metadata("map", ADDR_FMT "-" ADDR_FMT " %s", lower, upper, pathname);
590+
ranges[nrange - 1]->hi = ((addr_t)m->address) + m->size;
591+
} else {
592+
// We print the maps instead so that we can resolve them later and use
593+
// the CPU more efficiently to collect samples.
594+
emit_metadata(
595+
"map", ADDR_FMT "-" ADDR_FMT " %s", (addr_t)m->address, ((addr_t)m->address) + m->size, m->pathname
596+
);
592597
}
593598
}
594599

@@ -602,7 +607,7 @@ _py_proc__get_vm_maps(py_proc_t* self) {
602607
// ----------------------------------------------------------------------------
603608
static int
604609
_py_proc__init(py_proc_t* self) {
605-
if (!isvalid(self) || fail(_py_proc__parse_maps_file(self))) {
610+
if (!isvalid(self) || fail(_py_proc__inspect_vm_maps(self))) {
606611
set_error(EPROC);
607612
FAIL;
608613
}

src/py_proc.c

+8-14
Original file line numberDiff line numberDiff line change
@@ -221,12 +221,6 @@ _find_version_in_binary(char* path, int* version) {
221221
} /* _find_version_in_binary */
222222
#endif
223223

224-
#if defined PL_LINUX
225-
#define LIB_NEEDLE "libpython"
226-
#else
227-
#define LIB_NEEDLE "python"
228-
#endif
229-
230224
static int
231225
_py_proc__infer_python_version(py_proc_t* self) {
232226
if (!isvalid(self)) {
@@ -418,14 +412,14 @@ _py_proc__check_interp_state(py_proc_t* self, void* raddr) {
418412
static int
419413
_py_proc__scan_bss(py_proc_t* self) {
420414
// Starting with Python 3.11, BSS scans fail because it seems that the
421-
// interpreter state is stored in the data section. In this case, we shift our
422-
// data queries into the data section. We then take steps of 64KB backwards
423-
// and try to find the interpreter state. This is a bit of a hack for now, but
424-
// it seems to work with decent performance. Note that if we fail the first
425-
// scan, we then look for actual interpreter states rather than pointers to
426-
// it. This make the search a little slower, since we now have to check every
427-
// value in the range. However, the step size we chose seems to get us close
428-
// enough in a few attempts.
415+
// interpreter state is stored in the data section. In this case, we shift
416+
// our data queries into the data section. We then take steps of 64KB
417+
// backwards and try to find the interpreter state. This is a bit of a hack
418+
// for now, but it seems to work with decent performance. Note that if we
419+
// fail the first scan, we then look for actual interpreter states rather
420+
// than pointers to it. This make the search a little slower, since we now
421+
// have to check every value in the range. However, the step size we chose
422+
// seems to get us close enough in a few attempts.
429423
if (!isvalid(self) || !isvalid(self->map.bss.base)) {
430424
set_error(EPROC);
431425
FAIL;

0 commit comments

Comments
 (0)