Skip to content

Commit e60b0f8

Browse files
committed
Refactor CodegenUnit size estimates
1 parent 8447f4f commit e60b0f8

File tree

3 files changed

+50
-44
lines changed

3 files changed

+50
-44
lines changed

src/librustc/mir/mono.rs

+39-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
use syntax::ast::NodeId;
1212
use syntax::symbol::InternedString;
13-
use ty::Instance;
13+
use ty::{Instance, TyCtxt};
1414
use util::nodemap::FxHashMap;
1515
use rustc_data_structures::base_n;
1616
use rustc_data_structures::stable_hasher::{HashStable, StableHasherResult,
@@ -25,6 +25,22 @@ pub enum MonoItem<'tcx> {
2525
GlobalAsm(NodeId),
2626
}
2727

28+
impl<'tcx> MonoItem<'tcx> {
29+
pub fn size_estimate<'a>(&self, tcx: &TyCtxt<'a, 'tcx, 'tcx>) -> usize {
30+
match *self {
31+
MonoItem::Fn(instance) => {
32+
// Estimate the size of a function based on how many statements
33+
// it contains.
34+
let mir = tcx.instance_mir(instance.def);
35+
mir.basic_blocks().iter().map(|bb| bb.statements.len()).sum()
36+
},
37+
// Conservatively estimate the size of a static declaration
38+
// or assembly to be 1.
39+
MonoItem::Static(_) | MonoItem::GlobalAsm(_) => 1,
40+
}
41+
}
42+
}
43+
2844
impl<'tcx> HashStable<StableHashingContext<'tcx>> for MonoItem<'tcx> {
2945
fn hash_stable<W: StableHasherResult>(&self,
3046
hcx: &mut StableHashingContext<'tcx>,
@@ -52,6 +68,7 @@ pub struct CodegenUnit<'tcx> {
5268
/// as well as the crate name and disambiguator.
5369
name: InternedString,
5470
items: FxHashMap<MonoItem<'tcx>, (Linkage, Visibility)>,
71+
size_estimate: Option<usize>,
5572
}
5673

5774
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
@@ -101,6 +118,7 @@ impl<'tcx> CodegenUnit<'tcx> {
101118
CodegenUnit {
102119
name: name,
103120
items: FxHashMap(),
121+
size_estimate: None,
104122
}
105123
}
106124

@@ -131,6 +149,25 @@ impl<'tcx> CodegenUnit<'tcx> {
131149
let hash = hash & ((1u128 << 80) - 1);
132150
base_n::encode(hash, base_n::CASE_INSENSITIVE)
133151
}
152+
153+
pub fn estimate_size<'a>(&mut self, tcx: &TyCtxt<'a, 'tcx, 'tcx>) {
154+
// Estimate the size of a codegen unit as (approximately) the number of MIR
155+
// statements it corresponds to.
156+
self.size_estimate = Some(self.items.keys().map(|mi| mi.size_estimate(tcx)).sum());
157+
}
158+
159+
pub fn size_estimate(&self) -> usize {
160+
// Should only be called if `estimate_size` has previously been called.
161+
assert!(self.size_estimate.is_some());
162+
self.size_estimate.unwrap()
163+
}
164+
165+
pub fn modify_size_estimate(&mut self, delta: usize) {
166+
assert!(self.size_estimate.is_some());
167+
if let Some(size_estimate) = self.size_estimate {
168+
self.size_estimate = Some(size_estimate + delta);
169+
}
170+
}
134171
}
135172

136173
impl<'tcx> HashStable<StableHashingContext<'tcx>> for CodegenUnit<'tcx> {
@@ -140,6 +177,7 @@ impl<'tcx> HashStable<StableHashingContext<'tcx>> for CodegenUnit<'tcx> {
140177
let CodegenUnit {
141178
ref items,
142179
name,
180+
..
143181
} = *self;
144182

145183
name.hash_stable(hcx, hasher);

src/librustc_mir/monomorphize/partitioning.rs

+9-41
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ use rustc::mir::mono::{Linkage, Visibility};
110110
use rustc::ty::{self, TyCtxt, InstanceDef};
111111
use rustc::ty::item_path::characteristic_def_id_of_type;
112112
use rustc::util::nodemap::{FxHashMap, FxHashSet};
113-
use std::collections::hash_map::{HashMap, Entry};
113+
use std::collections::hash_map::Entry;
114114
use syntax::ast::NodeId;
115115
use syntax::symbol::{Symbol, InternedString};
116116
use rustc::mir::mono::MonoItem;
@@ -225,12 +225,14 @@ pub fn partition<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
225225
let mut initial_partitioning = place_root_translation_items(tcx,
226226
trans_items);
227227

228+
initial_partitioning.codegen_units.iter_mut().for_each(|cgu| cgu.estimate_size(&tcx));
229+
228230
debug_dump(tcx, "INITIAL PARTITIONING:", initial_partitioning.codegen_units.iter());
229231

230232
// If the partitioning should produce a fixed count of codegen units, merge
231233
// until that count is reached.
232234
if let PartitioningStrategy::FixedUnitCount(count) = strategy {
233-
merge_codegen_units(tcx, &mut initial_partitioning, count, &tcx.crate_name.as_str());
235+
merge_codegen_units(&mut initial_partitioning, count, &tcx.crate_name.as_str());
234236

235237
debug_dump(tcx, "POST MERGING:", initial_partitioning.codegen_units.iter());
236238
}
@@ -242,6 +244,8 @@ pub fn partition<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
242244
let mut post_inlining = place_inlined_translation_items(initial_partitioning,
243245
inlining_map);
244246

247+
post_inlining.codegen_units.iter_mut().for_each(|cgu| cgu.estimate_size(&tcx));
248+
245249
debug_dump(tcx, "POST INLINING:", post_inlining.codegen_units.iter());
246250

247251
// Next we try to make as many symbols "internal" as possible, so LLVM has
@@ -405,8 +409,7 @@ fn place_root_translation_items<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
405409
}
406410
}
407411

408-
fn merge_codegen_units<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
409-
initial_partitioning: &mut PreInliningPartitioning<'tcx>,
412+
fn merge_codegen_units<'tcx>(initial_partitioning: &mut PreInliningPartitioning<'tcx>,
410413
target_cgu_count: usize,
411414
crate_name: &str) {
412415
assert!(target_cgu_count >= 1);
@@ -423,51 +426,16 @@ fn merge_codegen_units<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
423426
// the stable sort below will keep everything nice and deterministic.
424427
codegen_units.sort_by_key(|cgu| cgu.name().clone());
425428

426-
// Estimate the size of a codegen unit as (approximately) the number of MIR
427-
// statements it corresponds to.
428-
fn codegen_unit_size_estimate<'a, 'tcx>(cgu: &CodegenUnit<'tcx>,
429-
mono_item_sizes: &HashMap<MonoItem, usize>)
430-
-> usize {
431-
cgu.items().keys().map(|mi| mono_item_sizes.get(mi).unwrap()).sum()
432-
}
433-
434-
fn mono_item_size_estimate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
435-
item: &MonoItem<'tcx>)
436-
-> usize {
437-
match item {
438-
MonoItem::Fn(instance) => {
439-
// Estimate the size of a function based on how many statements
440-
// it contains.
441-
let mir = tcx.instance_mir(instance.def);
442-
mir.basic_blocks().iter().map(|bb| bb.statements.len()).sum()
443-
},
444-
// Conservatively estimate the size of a static declaration
445-
// or assembly to be 1.
446-
MonoItem::Static(_) | MonoItem::GlobalAsm(_) => 1,
447-
}
448-
}
449-
450-
// Since `sort_by_key` currently recomputes the keys for each comparison,
451-
// we can save unnecessary recomputations by storing size estimates for
452-
// each `MonoItem`. Storing estimates for `CodegenUnit` might be preferable,
453-
// but its structure makes it awkward to use as a key and additionally their
454-
// sizes change as the merging occurs, requiring the map to be updated.
455-
let mut sizes: HashMap<MonoItem, usize> = HashMap::new();
456-
for mis in codegen_units.iter().map(|cgu| cgu.items().keys()) {
457-
mis.for_each(|mi| {
458-
sizes.entry(*mi).or_insert_with(|| mono_item_size_estimate(tcx, mi));
459-
});
460-
}
461-
462429
// Merge the two smallest codegen units until the target size is reached.
463430
// Note that "size" is estimated here rather inaccurately as the number of
464431
// translation items in a given unit. This could be improved on.
465432
while codegen_units.len() > target_cgu_count {
466433
// Sort small cgus to the back
467-
codegen_units.sort_by_key(|cgu| usize::MAX - codegen_unit_size_estimate(cgu, &sizes));
434+
codegen_units.sort_by_key(|cgu| usize::MAX - cgu.size_estimate());
468435
let mut smallest = codegen_units.pop().unwrap();
469436
let second_smallest = codegen_units.last_mut().unwrap();
470437

438+
second_smallest.modify_size_estimate(smallest.size_estimate());
471439
for (k, v) in smallest.items_mut().drain() {
472440
second_smallest.items_mut().insert(k, v);
473441
}

src/librustc_trans/base.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ use std::ffi::CString;
7979
use std::str;
8080
use std::sync::Arc;
8181
use std::time::{Instant, Duration};
82-
use std::i32;
82+
use std::{i32, usize};
8383
use std::iter;
8484
use std::sync::mpsc;
8585
use syntax_pos::Span;
@@ -829,7 +829,7 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
829829
// account the size of each TransItem.
830830
let codegen_units = {
831831
let mut codegen_units = codegen_units;
832-
codegen_units.sort_by_key(|cgu| -(cgu.items().len() as isize));
832+
codegen_units.sort_by_key(|cgu| usize::MAX - cgu.size_estimate());
833833
codegen_units
834834
};
835835

0 commit comments

Comments
 (0)