Skip to content

More aggressive CGU merging #112648

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions compiler/rustc_middle/src/mir/mono.rs
Original file line number Diff line number Diff line change
Expand Up @@ -335,10 +335,6 @@ impl<'tcx> CodegenUnit<'tcx> {
.expect("create_size_estimate must be called before getting a size_estimate")
}

pub fn modify_size_estimate(&mut self, delta: usize) {
*self.size_estimate.as_mut().unwrap() += delta;
}

pub fn contains_item(&self, item: &MonoItem<'tcx>) -> bool {
self.items().contains_key(item)
}
Expand Down
94 changes: 67 additions & 27 deletions compiler/rustc_monomorphize/src/partitioning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,19 +166,9 @@ where
placed
};

// Merge until we have at most `max_cgu_count` codegen units.
// `merge_codegen_units` is responsible for updating the CGU size
// estimates.
{
let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_merge_cgus");
merge_codegen_units(cx, &mut codegen_units);
debug_dump(tcx, "MERGE", &codegen_units, unique_inlined_stats);
}

// In the next step, we use the inlining map to determine which additional
// monomorphizations have to go into each codegen unit. These additional
// monomorphizations can be drop-glue, functions from external crates, and
// local functions the definition of which is marked with `#[inline]`.
// Use the usage map to put additional mono items in each codegen unit:
// drop-glue, functions from external crates, and local functions the
// definition of which is marked with `#[inline]`.
{
let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_place_inline_items");
place_inlined_mono_items(cx, &mut codegen_units);
Expand All @@ -190,8 +180,17 @@ where
debug_dump(tcx, "INLINE", &codegen_units, unique_inlined_stats);
}

// Next we try to make as many symbols "internal" as possible, so LLVM has
// more freedom to optimize.
// Merge until we have at most `max_cgu_count` codegen units.
// `merge_codegen_units` is responsible for updating the CGU size
// estimates.
{
let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_merge_cgus");
merge_codegen_units(cx, &mut codegen_units);
debug_dump(tcx, "MERGE", &codegen_units, unique_inlined_stats);
}

// Make as many symbols "internal" as possible, so LLVM has more freedom to
// optimize.
if !tcx.sess.link_dead_code() {
let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_internalize_symbols");
internalize_symbols(cx, &mut codegen_units, internalization_candidates);
Expand Down Expand Up @@ -314,35 +313,76 @@ fn merge_codegen_units<'tcx>(
// worse generated code. So we don't allow CGUs smaller than this (unless
// there is just one CGU, of course). Note that CGU sizes of 100,000+ are
// common in larger programs, so this isn't all that large.
const NON_INCR_MIN_CGU_SIZE: usize = 1000;
const NON_INCR_MIN_CGU_SIZE: usize = 2000;

// Repeatedly merge the two smallest codegen units as long as:
// - we have more CGUs than the upper limit, or
// - (Non-incremental builds only) the user didn't specify a CGU count, and
// there are multiple CGUs, and some are below the minimum size.
// - njn: update this comment
//
// The "didn't specify a CGU count" condition is because when an explicit
// count is requested we observe it as closely as possible. For example,
// the `compiler_builtins` crate sets `codegen-units = 10000` and it's
// critical they aren't merged. Also, some tests use explicit small values
// and likewise won't work if small CGUs are merged.
while codegen_units.len() > cx.tcx.sess.codegen_units().as_usize()
|| (cx.tcx.sess.opts.incremental.is_none()
&& matches!(cx.tcx.sess.codegen_units(), CodegenUnits::Default(_))
&& codegen_units.len() > 1
&& codegen_units.iter().any(|cgu| cgu.size_estimate() < NON_INCR_MIN_CGU_SIZE))
{
//eprintln!("-----");
loop {
// njn: where to put this?
// Sort small cgus to the back.
codegen_units.sort_by_cached_key(|cgu| cmp::Reverse(cgu.size_estimate()));

//eprintln!("cgus: {:?}", codegen_units.iter().map(|cgu| cgu.size_estimate()).collect::<Vec<_>>());

let merge1 = codegen_units.len() > cx.tcx.sess.codegen_units().as_usize();

let merge2 = cx.tcx.sess.opts.incremental.is_none()
&& matches!(cx.tcx.sess.codegen_units(), CodegenUnits::Default(_))
&& codegen_units.len() >= 2
&& codegen_units.iter().any(|cgu| cgu.size_estimate() < NON_INCR_MIN_CGU_SIZE);

// njn: addition is an imperfect measure, could be overlap
let merge3 = cx.tcx.sess.opts.incremental.is_none()
&& matches!(cx.tcx.sess.codegen_units(), CodegenUnits::Default(_))
&& codegen_units.len() >= 3
&& {
// eprintln!(
// "sz: {} >= {} + {}?",
// codegen_units[0].size_estimate(),
// codegen_units[codegen_units.len() - 2].size_estimate(),
// codegen_units[codegen_units.len() - 1].size_estimate());

(codegen_units[0].size_estimate() as f64 * 0.8)
>= (codegen_units[codegen_units.len() - 2].size_estimate()
+ codegen_units[codegen_units.len() - 1].size_estimate())
as f64
};

if !(merge1 || merge2 || merge3) {
break;
}

let mut smallest = codegen_units.pop().unwrap();
let second_smallest = codegen_units.last_mut().unwrap();

// Move the mono-items from `smallest` to `second_smallest`
second_smallest.modify_size_estimate(smallest.size_estimate());
for (k, v) in smallest.items_mut().drain() {
second_smallest.items_mut().insert(k, v);
}
// let sm_size = smallest.size_estimate();
// let sec_sm_size = second_smallest.size_estimate();

// Move the items from `smallest` to `second_smallest`. Some of them
// may be duplicate inlined items, in which case the destination CGU is
// unaffected. Recalculate size estimates afterwards.
second_smallest.items_mut().extend(smallest.items_mut().drain());
second_smallest.create_size_estimate(cx.tcx);

// eprintln!(
// "merge: {} {} {}: {} + {} -> {}",
// merge1,
// merge2,
// merge3,
// sec_sm_size,
// sm_size,
// second_smallest.size_estimate()
// );

// Record that `second_smallest` now contains all the stuff that was
// in `smallest` before.
Expand Down