Skip to content

Commit 6023350

Browse files
committed
auto merge of #11017 : alexcrichton/rust/faster-read, r=thestinger
We were previously reading metadata via `ar p`, but as learned from rustdoc awhile back, spawning a process to do something is pretty slow. Turns out LLVM has an Archive class to read archives, but it cannot write archives. This commits adds bindings to the read-only version of the LLVM archive class (with a new type that only has a read() method), and then it uses this class when reading the metadata out of rlibs. When you put this in tandem of not compressing the metadata, reading the metadata is 4x faster than it used to be The timings I got for reading metadata from the respective libraries was: libstd-04ff901e-0.9-pre.dylib => 100ms libstd-04ff901e-0.9-pre.rlib => 23ms librustuv-7945354c-0.9-pre.dylib => 4ms librustuv-7945354c-0.9-pre.rlib => 1ms librustc-5b94a16f-0.9-pre.dylib => 87ms librustc-5b94a16f-0.9-pre.rlib => 35ms libextra-a6ebb16f-0.9-pre.dylib => 63ms libextra-a6ebb16f-0.9-pre.rlib => 15ms libsyntax-2e4c0458-0.9-pre.dylib => 86ms libsyntax-2e4c0458-0.9-pre.rlib => 22ms In order to always take advantage of these faster metadata read-times, I sort the files in filesearch based on whether they have an rlib extension or not (prefer all rlib files first). Overall, this halved the compile time for a `fn main() {}` crate from 0.185s to 0.095s on my system (when preferring dynamic linking). Reading metadata is still the slowest pass of the compiler at 0.035s, but it's getting pretty close to linking at 0.021s! The next best optimization is to just not copy the metadata from LLVM because that's the most expensive part of reading metadata right now.
2 parents bb02d14 + 64faafb commit 6023350

File tree

8 files changed

+186
-17
lines changed

8 files changed

+186
-17
lines changed

src/librustc/back/archive.rs

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,29 @@
1212
1313
use driver::session::Session;
1414
use metadata::filesearch;
15+
use lib::llvm::{ArchiveRef, llvm};
1516

17+
use std::cast;
1618
use std::io::fs;
19+
use std::libc;
1720
use std::os;
1821
use std::run::{ProcessOptions, Process, ProcessOutput};
1922
use std::str;
23+
use std::unstable::raw;
2024
use extra::tempfile::TempDir;
2125
use syntax::abi;
2226

23-
pub static METADATA_FILENAME: &'static str = "metadata";
27+
pub static METADATA_FILENAME: &'static str = "rust.metadata.bin";
2428

2529
pub struct Archive {
2630
priv sess: Session,
2731
priv dst: Path,
2832
}
2933

34+
pub struct ArchiveRO {
35+
priv ptr: ArchiveRef,
36+
}
37+
3038
fn run_ar(sess: Session, args: &str, cwd: Option<&Path>,
3139
paths: &[&Path]) -> ProcessOutput {
3240
let ar = sess.opts.ar.clone().unwrap_or_else(|| ~"ar");
@@ -193,3 +201,50 @@ impl Archive {
193201
perhaps an -L flag is missing?", name));
194202
}
195203
}
204+
205+
impl ArchiveRO {
206+
/// Opens a static archive for read-only purposes. This is more optimized
207+
/// than the `open` method because it uses LLVM's internal `Archive` class
208+
/// rather than shelling out to `ar` for everything.
209+
///
210+
/// If this archive is used with a mutable method, then an error will be
211+
/// raised.
212+
pub fn open(dst: &Path) -> Option<ArchiveRO> {
213+
unsafe {
214+
let ar = dst.with_c_str(|dst| {
215+
llvm::LLVMRustOpenArchive(dst)
216+
});
217+
if ar.is_null() {
218+
None
219+
} else {
220+
Some(ArchiveRO { ptr: ar })
221+
}
222+
}
223+
}
224+
225+
/// Read a file in the archive
226+
pub fn read<'a>(&'a self, file: &str) -> Option<&'a [u8]> {
227+
unsafe {
228+
let mut size = 0 as libc::size_t;
229+
let ptr = file.with_c_str(|file| {
230+
llvm::LLVMRustArchiveReadSection(self.ptr, file, &mut size)
231+
});
232+
if ptr.is_null() {
233+
None
234+
} else {
235+
Some(cast::transmute(raw::Slice {
236+
data: ptr,
237+
len: size as uint,
238+
}))
239+
}
240+
}
241+
}
242+
}
243+
244+
impl Drop for ArchiveRO {
245+
fn drop(&mut self) {
246+
unsafe {
247+
llvm::LLVMRustDestroyArchive(self.ptr);
248+
}
249+
}
250+
}

src/librustc/back/lto.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
use back::archive::Archive;
11+
use back::archive::ArchiveRO;
1212
use back::link;
1313
use driver::session;
1414
use lib::llvm::{ModuleRef, TargetMachineRef, llvm, True, False};
@@ -43,10 +43,11 @@ pub fn run(sess: session::Session, llmod: ModuleRef,
4343
}
4444
};
4545

46-
let archive = Archive::open(sess, path);
46+
let archive = ArchiveRO::open(&path).expect("wanted an rlib");
4747
debug!("reading {}", name);
4848
let bc = time(sess.time_passes(), format!("read {}.bc", name), (), |_|
4949
archive.read(format!("{}.bc", name)));
50+
let bc = bc.expect("missing bytecode in archive!");
5051
let ptr = bc.as_ptr();
5152
debug!("linking {}", name);
5253
time(sess.time_passes(), format!("ll link {}", name), (), |()| unsafe {

src/librustc/driver/driver.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1100,7 +1100,6 @@ pub fn early_error(emitter: @diagnostic::Emitter, msg: &str) -> ! {
11001100

11011101
pub fn list_metadata(sess: Session, path: &Path, out: @mut io::Writer) {
11021102
metadata::loader::list_file_metadata(
1103-
sess,
11041103
token::get_ident_interner(),
11051104
session::sess_os_to_meta_os(sess.targ_cfg.os), path, out);
11061105
}

src/librustc/lib/llvm.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,8 @@ pub enum Pass_opaque {}
257257
pub type PassRef = *Pass_opaque;
258258
pub enum TargetMachine_opaque {}
259259
pub type TargetMachineRef = *TargetMachine_opaque;
260+
pub enum Archive_opaque {}
261+
pub type ArchiveRef = *Archive_opaque;
260262

261263
pub mod debuginfo {
262264
use super::{ValueRef};
@@ -300,7 +302,7 @@ pub mod llvm {
300302
use super::{Bool, BuilderRef, ContextRef, MemoryBufferRef, ModuleRef};
301303
use super::{ObjectFileRef, Opcode, PassManagerRef, PassManagerBuilderRef};
302304
use super::{SectionIteratorRef, TargetDataRef, TypeKind, TypeRef, UseRef};
303-
use super::{ValueRef, TargetMachineRef, FileType};
305+
use super::{ValueRef, TargetMachineRef, FileType, ArchiveRef};
304306
use super::{CodeGenModel, RelocMode, CodeGenOptLevel};
305307
use super::debuginfo::*;
306308
use std::libc::{c_char, c_int, c_longlong, c_ushort, c_uint, c_ulonglong,
@@ -1748,6 +1750,11 @@ pub mod llvm {
17481750
syms: **c_char,
17491751
len: size_t);
17501752
pub fn LLVMRustMarkAllFunctionsNounwind(M: ModuleRef);
1753+
1754+
pub fn LLVMRustOpenArchive(path: *c_char) -> ArchiveRef;
1755+
pub fn LLVMRustArchiveReadSection(AR: ArchiveRef, name: *c_char,
1756+
out_len: *mut size_t) -> *c_char;
1757+
pub fn LLVMRustDestroyArchive(AR: ArchiveRef);
17511758
}
17521759
}
17531760

src/librustc/metadata/cstore.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
use metadata::cstore;
1717
use metadata::decoder;
18+
use metadata::loader;
1819

1920
use std::hashmap::HashMap;
2021
use extra;
@@ -29,6 +30,7 @@ pub type cnum_map = @mut HashMap<ast::CrateNum, ast::CrateNum>;
2930

3031
pub enum MetadataBlob {
3132
MetadataVec(~[u8]),
33+
MetadataArchive(loader::ArchiveMetadata),
3234
}
3335

3436
pub struct crate_metadata {
@@ -216,6 +218,7 @@ impl MetadataBlob {
216218
pub fn as_slice<'a>(&'a self) -> &'a [u8] {
217219
match *self {
218220
MetadataVec(ref vec) => vec.as_slice(),
221+
MetadataArchive(ref ar) => ar.as_slice(),
219222
}
220223
}
221224
}

src/librustc/metadata/filesearch.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,16 @@ pub fn search(filesearch: @FileSearch, pick: pick) {
123123
match io::result(|| fs::readdir(lib_search_path)) {
124124
Ok(files) => {
125125
let mut rslt = FileDoesntMatch;
126-
for path in files.iter() {
126+
let is_rlib = |p: & &Path| {
127+
p.extension_str() == Some("rlib")
128+
};
129+
// Reading metadata out of rlibs is faster, and if we find both
130+
// an rlib and a dylib we only read one of the files of
131+
// metadata, so in the name of speed, bring all rlib files to
132+
// the front of the search list.
133+
let files1 = files.iter().filter(|p| is_rlib(p));
134+
let files2 = files.iter().filter(|p| !is_rlib(p));
135+
for path in files1.chain(files2) {
127136
debug!("testing {}", path.display());
128137
let maybe_picked = pick(path);
129138
match maybe_picked {

src/librustc/metadata/loader.rs

Lines changed: 66 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010

1111
//! Finds crate binaries and loads their metadata
1212
13-
use back::archive::{Archive, METADATA_FILENAME};
13+
use back::archive::{ArchiveRO, METADATA_FILENAME};
1414
use driver::session::Session;
1515
use lib::llvm::{False, llvm, ObjectFile, mk_section_iter};
16-
use metadata::cstore::{MetadataBlob, MetadataVec};
16+
use metadata::cstore::{MetadataBlob, MetadataVec, MetadataArchive};
1717
use metadata::decoder;
1818
use metadata::encoder;
1919
use metadata::filesearch::{FileMatches, FileDoesntMatch};
@@ -61,6 +61,12 @@ pub struct Library {
6161
metadata: MetadataBlob,
6262
}
6363

64+
pub struct ArchiveMetadata {
65+
priv archive: ArchiveRO,
66+
// See comments in ArchiveMetadata::new for why this is static
67+
priv data: &'static [u8],
68+
}
69+
6470
impl Context {
6571
pub fn load_library_crate(&self) -> Library {
6672
match self.find_library_crate() {
@@ -102,7 +108,7 @@ impl Context {
102108
if candidate && existing {
103109
FileMatches
104110
} else if candidate {
105-
match get_metadata_section(self.sess, self.os, path) {
111+
match get_metadata_section(self.os, path) {
106112
Some(cvec) =>
107113
if crate_matches(cvec.as_slice(), self.name,
108114
self.version, self.hash) {
@@ -248,11 +254,60 @@ fn crate_matches(crate_data: &[u8],
248254
}
249255
}
250256

251-
fn get_metadata_section(sess: Session, os: Os,
252-
filename: &Path) -> Option<MetadataBlob> {
257+
impl ArchiveMetadata {
258+
fn new(ar: ArchiveRO) -> Option<ArchiveMetadata> {
259+
let data: &'static [u8] = {
260+
let data = match ar.read(METADATA_FILENAME) {
261+
Some(data) => data,
262+
None => {
263+
debug!("didn't find '{}' in the archive", METADATA_FILENAME);
264+
return None;
265+
}
266+
};
267+
// This data is actually a pointer inside of the archive itself, but
268+
// we essentially want to cache it because the lookup inside the
269+
// archive is a fairly expensive operation (and it's queried for
270+
// *very* frequently). For this reason, we transmute it to the
271+
// static lifetime to put into the struct. Note that the buffer is
272+
// never actually handed out with a static lifetime, but rather the
273+
// buffer is loaned with the lifetime of this containing object.
274+
// Hence, we're guaranteed that the buffer will never be used after
275+
// this object is dead, so this is a safe operation to transmute and
276+
// store the data as a static buffer.
277+
unsafe { cast::transmute(data) }
278+
};
279+
Some(ArchiveMetadata {
280+
archive: ar,
281+
data: data,
282+
})
283+
}
284+
285+
pub fn as_slice<'a>(&'a self) -> &'a [u8] { self.data }
286+
}
287+
288+
// Just a small wrapper to time how long reading metadata takes.
289+
fn get_metadata_section(os: Os, filename: &Path) -> Option<MetadataBlob> {
290+
use extra::time;
291+
let start = time::precise_time_ns();
292+
let ret = get_metadata_section_imp(os, filename);
293+
info!("reading {} => {}ms", filename.filename_display(),
294+
(time::precise_time_ns() - start) / 1000000);
295+
return ret;
296+
}
297+
298+
fn get_metadata_section_imp(os: Os, filename: &Path) -> Option<MetadataBlob> {
253299
if filename.filename_str().unwrap().ends_with(".rlib") {
254-
let archive = Archive::open(sess, filename.clone());
255-
return Some(MetadataVec(archive.read(METADATA_FILENAME)));
300+
// Use ArchiveRO for speed here, it's backed by LLVM and uses mmap
301+
// internally to read the file. We also avoid even using a memcpy by
302+
// just keeping the archive along while the metadata is in use.
303+
let archive = match ArchiveRO::open(filename) {
304+
Some(ar) => ar,
305+
None => {
306+
debug!("llvm didn't like `{}`", filename.display());
307+
return None;
308+
}
309+
};
310+
return ArchiveMetadata::new(archive).map(|ar| MetadataArchive(ar));
256311
}
257312
unsafe {
258313
let mb = filename.with_c_str(|buf| {
@@ -322,13 +377,13 @@ pub fn read_meta_section_name(os: Os) -> &'static str {
322377
}
323378

324379
// A diagnostic function for dumping crate metadata to an output stream
325-
pub fn list_file_metadata(sess: Session,
326-
intr: @ident_interner,
380+
pub fn list_file_metadata(intr: @ident_interner,
327381
os: Os,
328382
path: &Path,
329383
out: @mut io::Writer) {
330-
match get_metadata_section(sess, os, path) {
331-
option::Some(bytes) => decoder::list_crate_metadata(intr, bytes.as_slice(),
384+
match get_metadata_section(os, path) {
385+
option::Some(bytes) => decoder::list_crate_metadata(intr,
386+
bytes.as_slice(),
332387
out),
333388
option::None => {
334389
write!(out, "could not find metadata in {}.\n", path.display())

src/rustllvm/RustWrapper.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
// except according to those terms.
1010

1111
#include "rustllvm.h"
12+
#include "llvm/Object/Archive.h"
1213

1314
//===----------------------------------------------------------------------===
1415
//
@@ -19,6 +20,7 @@
1920

2021
using namespace llvm;
2122
using namespace llvm::sys;
23+
using namespace llvm::object;
2224

2325
const char *LLVMRustError;
2426

@@ -558,3 +560,41 @@ LLVMRustLinkInExternalBitcode(LLVMModuleRef dst, char *bc, size_t len) {
558560
}
559561
return true;
560562
}
563+
564+
extern "C" void*
565+
LLVMRustOpenArchive(char *path) {
566+
OwningPtr<MemoryBuffer> buf;
567+
error_code err = MemoryBuffer::getFile(path, buf);
568+
if (err) {
569+
LLVMRustError = err.message().c_str();
570+
return NULL;
571+
}
572+
Archive *ret = new Archive(buf.take(), err);
573+
if (err) {
574+
LLVMRustError = err.message().c_str();
575+
return NULL;
576+
}
577+
return ret;
578+
}
579+
580+
extern "C" const char*
581+
LLVMRustArchiveReadSection(Archive *ar, char *name, size_t *size) {
582+
for (Archive::child_iterator child = ar->begin_children(),
583+
end = ar->end_children();
584+
child != end; ++child) {
585+
StringRef sect_name;
586+
error_code err = child->getName(sect_name);
587+
if (err) continue;
588+
if (sect_name.trim(" ") == name) {
589+
StringRef buf = child->getBuffer();
590+
*size = buf.size();
591+
return buf.data();
592+
}
593+
}
594+
return NULL;
595+
}
596+
597+
extern "C" void
598+
LLVMRustDestroyArchive(Archive *ar) {
599+
delete ar;
600+
}

0 commit comments

Comments
 (0)