Skip to content

Fall back to the unoptimized implementation in read_binary_file if File::metadata lies #115549

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 31 additions & 2 deletions compiler/rustc_span/src/source_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,39 @@ impl FileLoader for RealFileLoader {

let mut bytes = Lrc::new_uninit_slice(len as usize);
let mut buf = BorrowedBuf::from(Lrc::get_mut(&mut bytes).unwrap());
file.read_buf_exact(buf.unfilled())?;
match file.read_buf_exact(buf.unfilled()) {
Ok(()) => {}
Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
drop(bytes);
return fs::read(path).map(Vec::into);
}
Err(e) => return Err(e),
}
// SAFETY: If the read_buf_exact call returns Ok(()), then we have
// read len bytes and initialized the buffer.
Ok(unsafe { bytes.assume_init() })
let bytes = unsafe { bytes.assume_init() };

// At this point, we've read all the bytes that filesystem metadata reported exist.
// But we are not guaranteed to be at the end of the file, because we did not attempt to do
// a read with a non-zero-sized buffer and get Ok(0).
// So we do small read to a fixed-size buffer. If the read returns no bytes then we're
// already done, and we just return the Lrc we built above.
// If the read returns bytes however, we just fall back to reading into a Vec then turning
// that into an Lrc, losing our nice peak memory behavior. This fallback code path should
// be rarely exercised.

let mut probe = [0u8; 32];
let n = loop {
match file.read(&mut probe) {
Ok(0) => return Ok(bytes),
Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
Ok(n) => break n,
}
};
let mut bytes: Vec<u8> = bytes.iter().copied().chain(probe[..n].iter().copied()).collect();
file.read_to_end(&mut bytes)?;
Ok(bytes.into())
}
}

Expand Down
27 changes: 27 additions & 0 deletions compiler/rustc_span/src/source_map/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -567,3 +567,30 @@ fn test_next_point() {
assert_eq!(span.hi().0, 6);
assert!(sm.span_to_snippet(span).is_err());
}

#[cfg(target_os = "linux")]
#[test]
fn read_binary_file_handles_lying_stat() {
// read_binary_file tries to read the contents of a file into an Lrc<[u8]> while
// never having two copies of the data in memory at once. This is an optimization
// to support include_bytes! with large files. But since Rust allocators are
// sensitive to alignment, our implementation can't be bootstrapped off calling
// std::fs::read. So we test that we have the same behavior even on files where
// fs::metadata lies.

// stat always says that /proc/self/cmdline is length 0, but it isn't.
let cmdline = Path::new("/proc/self/cmdline");
let len = std::fs::metadata(cmdline).unwrap().len() as usize;
let real = std::fs::read(cmdline).unwrap();
assert!(len < real.len());
let bin = RealFileLoader.read_binary_file(cmdline).unwrap();
assert_eq!(&real[..], &bin[..]);

// stat always says that /sys/devices/system/cpu/kernel_max is the size of a block.
let kernel_max = Path::new("/sys/devices/system/cpu/kernel_max");
let len = std::fs::metadata(kernel_max).unwrap().len() as usize;
let real = std::fs::read(kernel_max).unwrap();
assert!(len > real.len());
let bin = RealFileLoader.read_binary_file(kernel_max).unwrap();
assert_eq!(&real[..], &bin[..]);
}