Skip to content

Commit fcad4b9

Browse files
committed
Allow for querying of build_id from objects (#53943)
For GPUCompiler we would like to support a native on disk cache of LLVM IR. One of the longstanding issues has been the cache invalidation of such an on disk cache. With #52233 we now have an integrated cache for the inference results and we can rely on `CodeInstance` to be stable across sessions. Due to #52119 we can also rely on the `objectid` to be stable. My inital thought was to key the native disk cache in GPUCompiler on the objectid of the corresponding CodeInstance (+ some compilation parameters). While discussing this with @rayegun yesterday we noted that having a CodeInstance with the same objectid might not be enough provenance. E.g we are not gurantueed that the CodeInstance is from the same build artifact and the same precise source code. For the package images we are tracking this during loading and validate all contents at once, and we keep explicitly track of the provenance chain. This PR adds a lookup up table where we map from "external_blobs" e.g. loaded images, to the corresponding top module of each image, and uses this to determine the build_id of the package image. (cherry picked from commit d47cbf6)
1 parent 814a04a commit fcad4b9

File tree

6 files changed

+53
-2
lines changed

6 files changed

+53
-2
lines changed

base/loading.jl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3008,6 +3008,14 @@ function module_build_id(m::Module)
30083008
return (UInt128(hi) << 64) | lo
30093009
end
30103010

3011+
function object_build_id(obj)
3012+
mod = ccall(:jl_object_top_module, Any, (Any,), obj)
3013+
if mod === nothing
3014+
return nothing
3015+
end
3016+
return module_build_id(mod::Module)
3017+
end
3018+
30113019
function isvalid_cache_header(f::IOStream)
30123020
pkgimage = Ref{UInt8}()
30133021
checksum = ccall(:jl_read_verify_header, UInt64, (Ptr{Cvoid}, Ptr{UInt8}, Ptr{Int64}, Ptr{Int64}), f.ios, pkgimage, Ref{Int64}(), Ref{Int64}()) # returns checksum id or zero

src/init.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -826,6 +826,7 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
826826

827827
arraylist_new(&jl_linkage_blobs, 0);
828828
arraylist_new(&jl_image_relocs, 0);
829+
arraylist_new(&jl_top_mods, 0);
829830
arraylist_new(&eytzinger_image_tree, 0);
830831
arraylist_new(&eytzinger_idxs, 0);
831832
arraylist_push(&eytzinger_idxs, (void*)0);

src/julia.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2049,6 +2049,7 @@ JL_DLLEXPORT void jl_create_system_image(void **, jl_array_t *worklist, bool_t e
20492049
JL_DLLEXPORT void jl_restore_system_image(const char *fname);
20502050
JL_DLLEXPORT void jl_restore_system_image_data(const char *buf, size_t len);
20512051
JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods, int complete, const char *pkgimage);
2052+
JL_DLLEXPORT jl_value_t *jl_object_top_module(jl_value_t* v) JL_NOTSAFEPOINT;
20522053

20532054
JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t *newly_inferred);
20542055
JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t *ci);

src/julia_internal.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,7 @@ void print_func_loc(JL_STREAM *s, jl_method_t *m);
349349
extern jl_array_t *_jl_debug_method_invalidation JL_GLOBALLY_ROOTED;
350350
JL_DLLEXPORT extern arraylist_t jl_linkage_blobs; // external linkage: sysimg/pkgimages
351351
JL_DLLEXPORT extern arraylist_t jl_image_relocs; // external linkage: sysimg/pkgimages
352+
JL_DLLEXPORT extern arraylist_t jl_top_mods; // external linkage: sysimg/pkgimages
352353
extern arraylist_t eytzinger_image_tree;
353354
extern arraylist_t eytzinger_idxs;
354355

@@ -1011,7 +1012,8 @@ STATIC_INLINE size_t n_linkage_blobs(void) JL_NOTSAFEPOINT
10111012

10121013
size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT;
10131014

1014-
uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT;
1015+
// Query if this object is perm-allocated in an image.
1016+
JL_DLLEXPORT uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT;
10151017

10161018
// the first argument to jl_idtable_rehash is used to return a value
10171019
// make sure it is rooted if it is used after the function returns

src/staticdata.c

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,8 @@ static arraylist_t object_worklist; // used to mimic recursion by jl_serialize_
336336
// jl_linkage_blobs.items[2i:2i+1] correspond to build_ids[i] (0-offset indexing)
337337
arraylist_t jl_linkage_blobs;
338338
arraylist_t jl_image_relocs;
339+
// Keep track of which image corresponds to which top module.
340+
arraylist_t jl_top_mods;
339341

340342
// Eytzinger tree of images. Used for very fast jl_object_in_image queries
341343
// See https://algorithmica.org/en/eytzinger
@@ -450,11 +452,23 @@ size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT
450452
return idx;
451453
}
452454

453-
uint8_t jl_object_in_image(jl_value_t *obj) JL_NOTSAFEPOINT
455+
JL_DLLEXPORT uint8_t jl_object_in_image(jl_value_t *obj) JL_NOTSAFEPOINT
454456
{
455457
return eyt_obj_in_img(obj);
456458
}
457459

460+
// Map an object to it's "owning" top module
461+
JL_DLLEXPORT jl_value_t *jl_object_top_module(jl_value_t* v) JL_NOTSAFEPOINT
462+
{
463+
size_t idx = external_blob_index(v);
464+
size_t lbids = n_linkage_blobs();
465+
if (idx < lbids) {
466+
return (jl_value_t*)jl_top_mods.items[idx];
467+
}
468+
// The object is runtime allocated
469+
return (jl_value_t*)jl_nothing;
470+
}
471+
458472
// hash of definitions for predefined function pointers
459473
static htable_t fptr_to_id;
460474
void *native_functions; // opaque jl_native_code_desc_t blob used for fetching data from LLVM
@@ -3554,6 +3568,15 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
35543568
arraylist_push(&jl_linkage_blobs, (void*)image_base);
35553569
arraylist_push(&jl_linkage_blobs, (void*)(image_base + sizeof_sysimg));
35563570
arraylist_push(&jl_image_relocs, (void*)relocs_base);
3571+
if (restored == NULL) {
3572+
arraylist_push(&jl_top_mods, (void*)jl_top_module);
3573+
} else {
3574+
size_t len = jl_array_nrows(*restored);
3575+
assert(len > 0);
3576+
jl_module_t *topmod = (jl_module_t*)jl_array_ptr_ref(*restored, len-1);
3577+
assert(jl_is_module(topmod));
3578+
arraylist_push(&jl_top_mods, (void*)topmod);
3579+
}
35573580
jl_timing_counter_inc(JL_TIMING_COUNTER_ImageSize, sizeof_sysimg + sizeof(uintptr_t));
35583581
rebuild_image_blob_tree();
35593582

test/precompile.jl

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ function precompile_test_harness(@nospecialize(f), separate::Bool)
4747
nothing
4848
end
4949

50+
@testset "object_build_id" begin
51+
@test Base.object_build_id([1]) === nothing
52+
@test Base.object_build_id(Base) == Base.module_build_id(Base)
53+
end
54+
5055
# method root provenance
5156

5257
rootid(m::Module) = Base.module_build_id(Base.parentmodule(m)) % UInt64
@@ -382,6 +387,9 @@ precompile_test_harness(false) do dir
382387
@test objectid(Foo.a_vec_int) === Foo.oid_vec_int
383388
@test objectid(Foo.a_mat_int) === Foo.oid_mat_int
384389
@test Foo.oid_vec_int !== Foo.oid_mat_int
390+
@test Base.object_build_id(Foo.a_vec_int) == Base.object_build_id(Foo.a_mat_int)
391+
@test Base.object_build_id(Foo) == Base.module_build_id(Foo)
392+
@test Base.object_build_id(Foo.a_vec_int) == Base.module_build_id(Foo)
385393
end
386394

387395
@eval begin function ccallable_test()
@@ -1751,10 +1759,14 @@ let newinterp_path = abspath("compiler/newinterp.jl")
17511759
@test isdefined(ci, :next)
17521760
@test ci.owner === nothing
17531761
@test ci.max_world == typemax(UInt)
1762+
@test Base.module_build_id(CustomAbstractInterpreterCaching) ==
1763+
Base.object_build_id(ci)
17541764
ci = ci.next
17551765
@test !isdefined(ci, :next)
17561766
@test ci.owner === cache_owner
17571767
@test ci.max_world == typemax(UInt)
1768+
@test Base.module_build_id(CustomAbstractInterpreterCaching) ==
1769+
Base.object_build_id(ci)
17581770
end
17591771
let m = only(methods(sum, (Vector{Float64},)))
17601772
found = false
@@ -1764,10 +1776,14 @@ let newinterp_path = abspath("compiler/newinterp.jl")
17641776
@test isdefined(ci, :next)
17651777
@test ci.owner === cache_owner
17661778
@test ci.max_world == typemax(UInt)
1779+
@test Base.module_build_id(CustomAbstractInterpreterCaching) ==
1780+
Base.object_build_id(ci)
17671781
ci = ci.next
17681782
@test !isdefined(ci, :next)
17691783
@test ci.owner === nothing
17701784
@test ci.max_world == typemax(UInt)
1785+
@test Base.module_build_id(CustomAbstractInterpreterCaching) ==
1786+
Base.object_build_id(ci)
17711787
found = true
17721788
break
17731789
end

0 commit comments

Comments
 (0)