Skip to content

Commit c83d3c3

Browse files
authored
Rollup merge of rust-lang#63525 - matklad:centraliza-file-loading, r=petrochenkov
Make sure that all file loading happens via SourceMap That way, callers don't need to repeat "let's add this to sm manually for tracking dependencies" trick. It should make it easier to switch to using `FileLoader` for binary files in the future as well cc rust-lang#62948 r? @petrochenkov
2 parents db3bae0 + 14bc998 commit c83d3c3

File tree

6 files changed

+51
-34
lines changed

6 files changed

+51
-34
lines changed

src/libsyntax/ext/expand.rs

+5-8
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ use syntax_pos::{Span, DUMMY_SP, FileName};
2525

2626
use rustc_data_structures::fx::FxHashMap;
2727
use rustc_data_structures::sync::Lrc;
28-
use std::fs;
2928
use std::io::ErrorKind;
3029
use std::{iter, mem};
3130
use std::ops::DerefMut;
@@ -1241,13 +1240,11 @@ impl<'a, 'b> MutVisitor for InvocationCollector<'a, 'b> {
12411240
}
12421241

12431242
let filename = self.cx.resolve_path(&*file.as_str(), it.span());
1244-
match fs::read_to_string(&filename) {
1245-
Ok(src) => {
1246-
let src_interned = Symbol::intern(&src);
1247-
1248-
// Add this input file to the code map to make it available as
1249-
// dependency information
1250-
self.cx.source_map().new_source_file(filename.into(), src);
1243+
match self.cx.source_map().load_file(&filename) {
1244+
Ok(source_file) => {
1245+
let src = source_file.src.as_ref()
1246+
.expect("freshly loaded file should have a source");
1247+
let src_interned = Symbol::intern(src.as_str());
12511248

12521249
let include_info = vec![
12531250
ast::NestedMetaItem::MetaItem(

src/libsyntax/source_map.rs

+20
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,26 @@ impl SourceMap {
171171
Ok(self.new_source_file(filename, src))
172172
}
173173

174+
/// Loads source file as a binary blob.
175+
///
176+
/// Unlike `load_file`, guarantees that no normalization like BOM-removal
177+
/// takes place.
178+
pub fn load_binary_file(&self, path: &Path) -> io::Result<Vec<u8>> {
179+
// Ideally, this should use `self.file_loader`, but it can't
180+
// deal with binary files yet.
181+
let bytes = fs::read(path)?;
182+
183+
// We need to add file to the `SourceMap`, so that it is present
184+
// in dep-info. There's also an edge case that file might be both
185+
// loaded as a binary via `include_bytes!` and as proper `SourceFile`
186+
// via `mod`, so we try to use real file contents and not just an
187+
// empty string.
188+
let text = std::str::from_utf8(&bytes).unwrap_or("")
189+
.to_string();
190+
self.new_source_file(path.to_owned().into(), text);
191+
Ok(bytes)
192+
}
193+
174194
pub fn files(&self) -> MappedLockGuard<'_, Vec<Lrc<SourceFile>>> {
175195
LockGuard::map(self.files.borrow(), |files| &mut files.source_files)
176196
}

src/libsyntax_ext/source_util.rs

+11-26
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ use syntax::tokenstream;
99
use smallvec::SmallVec;
1010
use syntax_pos::{self, Pos, Span};
1111

12-
use std::fs;
13-
use std::io::ErrorKind;
1412
use rustc_data_structures::sync::Lrc;
1513

1614
// These macros all relate to the file system; they either return
@@ -114,20 +112,17 @@ pub fn expand_include_str(cx: &mut ExtCtxt<'_>, sp: Span, tts: &[tokenstream::To
114112
None => return DummyResult::any(sp)
115113
};
116114
let file = cx.resolve_path(file, sp);
117-
match fs::read_to_string(&file) {
118-
Ok(src) => {
119-
let interned_src = Symbol::intern(&src);
120-
121-
// Add this input file to the code map to make it available as
122-
// dependency information
123-
cx.source_map().new_source_file(file.into(), src);
124-
125-
base::MacEager::expr(cx.expr_str(sp, interned_src))
115+
match cx.source_map().load_binary_file(&file) {
116+
Ok(bytes) => match std::str::from_utf8(&bytes) {
117+
Ok(src) => {
118+
let interned_src = Symbol::intern(&src);
119+
base::MacEager::expr(cx.expr_str(sp, interned_src))
120+
}
121+
Err(_) => {
122+
cx.span_err(sp, &format!("{} wasn't a utf-8 file", file.display()));
123+
DummyResult::any(sp)
124+
}
126125
},
127-
Err(ref e) if e.kind() == ErrorKind::InvalidData => {
128-
cx.span_err(sp, &format!("{} wasn't a utf-8 file", file.display()));
129-
DummyResult::any(sp)
130-
}
131126
Err(e) => {
132127
cx.span_err(sp, &format!("couldn't read {}: {}", file.display(), e));
133128
DummyResult::any(sp)
@@ -142,18 +137,8 @@ pub fn expand_include_bytes(cx: &mut ExtCtxt<'_>, sp: Span, tts: &[tokenstream::
142137
None => return DummyResult::any(sp)
143138
};
144139
let file = cx.resolve_path(file, sp);
145-
match fs::read(&file) {
140+
match cx.source_map().load_binary_file(&file) {
146141
Ok(bytes) => {
147-
// Add the contents to the source map if it contains UTF-8.
148-
let (contents, bytes) = match String::from_utf8(bytes) {
149-
Ok(s) => {
150-
let bytes = s.as_bytes().to_owned();
151-
(s, bytes)
152-
},
153-
Err(e) => (String::new(), e.into_bytes()),
154-
};
155-
cx.source_map().new_source_file(file.into(), contents);
156-
157142
base::MacEager::expr(cx.expr_lit(sp, ast::LitKind::ByteStr(Lrc::new(bytes))))
158143
},
159144
Err(e) => {

src/test/ui/.gitattributes

+1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
lexer-crlf-line-endings-string-literal-doc-comment.rs -text
22
trailing-carriage-return-in-string.rs -text
3+
*.bin -text

src/test/ui/include-macros/data.bin

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
This file starts with BOM.
2+
Lines are separated by \r\n.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// run-pass
2+
3+
fn main() {
4+
assert_eq!(
5+
&include_bytes!("data.bin")[..],
6+
&b"\xEF\xBB\xBFThis file starts with BOM.\r\nLines are separated by \\r\\n.\r\n"[..],
7+
);
8+
assert_eq!(
9+
include_str!("data.bin"),
10+
"\u{FEFF}This file starts with BOM.\r\nLines are separated by \\r\\n.\r\n",
11+
);
12+
}

0 commit comments

Comments
 (0)