Skip to content

Commit 6153aae

Browse files
committed
auto merge of #5559 : jbclements/rust/change-to-tt-based-parsing, r=jbclements
Changes the parser to parse all streams into token-trees before hitting the parser proper, in preparation for hygiene. As an added bonus, it appears to speed up the parser (albeit by a totally imperceptible 1%). Also, many comments in the parser. Also, field renaming in token-trees (readme->forest, cur->stack).
2 parents 260d74d + f2e47cd commit 6153aae

File tree

6 files changed

+123
-34
lines changed

6 files changed

+123
-34
lines changed

src/librustc/driver/driver.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ pub fn parse_input(sess: Session, +cfg: ast::crate_cfg, input: input)
151151
-> @ast::crate {
152152
match input {
153153
file_input(ref file) => {
154-
parse::parse_crate_from_file(&(*file), cfg, sess.parse_sess)
154+
parse::parse_crate_from_file_using_tts(&(*file), cfg, sess.parse_sess)
155155
}
156156
str_input(ref src) => {
157157
// FIXME (#2319): Don't really want to box the source string

src/libsyntax/ext/tt/transcribe.rs

+29-28
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use core::vec;
2626
`~` */
2727
///an unzipping of `token_tree`s
2828
struct TtFrame {
29-
readme: @mut ~[ast::token_tree],
29+
forest: @mut ~[ast::token_tree],
3030
idx: uint,
3131
dotdotdoted: bool,
3232
sep: Option<Token>,
@@ -37,7 +37,7 @@ pub struct TtReader {
3737
sp_diag: @span_handler,
3838
interner: @ident_interner,
3939
// the unzipped tree:
40-
cur: @mut TtFrame,
40+
stack: @mut TtFrame,
4141
/* for MBE-style macro transcription */
4242
interpolations: LinearMap<ident, @named_match>,
4343
repeat_idx: ~[uint],
@@ -58,8 +58,8 @@ pub fn new_tt_reader(sp_diag: @span_handler,
5858
let r = @mut TtReader {
5959
sp_diag: sp_diag,
6060
interner: itr,
61-
cur: @mut TtFrame {
62-
readme: @mut src,
61+
stack: @mut TtFrame {
62+
forest: @mut src,
6363
idx: 0u,
6464
dotdotdoted: false,
6565
sep: None,
@@ -81,7 +81,7 @@ pub fn new_tt_reader(sp_diag: @span_handler,
8181

8282
fn dup_tt_frame(f: @mut TtFrame) -> @mut TtFrame {
8383
@mut TtFrame {
84-
readme: @mut (copy *f.readme),
84+
forest: @mut (copy *f.forest),
8585
idx: f.idx,
8686
dotdotdoted: f.dotdotdoted,
8787
sep: copy f.sep,
@@ -96,7 +96,7 @@ pub fn dup_tt_reader(r: @mut TtReader) -> @mut TtReader {
9696
@mut TtReader {
9797
sp_diag: r.sp_diag,
9898
interner: r.interner,
99-
cur: dup_tt_frame(r.cur),
99+
stack: dup_tt_frame(r.stack),
100100
interpolations: r.interpolations,
101101
repeat_idx: copy r.repeat_idx,
102102
repeat_len: copy r.repeat_len,
@@ -167,45 +167,46 @@ fn lockstep_iter_size(t: token_tree, r: &mut TtReader) -> lis {
167167
}
168168
}
169169

170-
170+
// return the next token from the TtReader.
171+
// EFFECT: advances the reader's token field
171172
pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan {
172173
let ret_val = TokenAndSpan {
173174
tok: copy r.cur_tok,
174175
sp: r.cur_span,
175176
};
176177
loop {
177178
{
178-
let cur = &mut *r.cur;
179-
let readme = &mut *cur.readme;
180-
if cur.idx < readme.len() {
179+
let stack = &mut *r.stack;
180+
let forest = &mut *stack.forest;
181+
if stack.idx < forest.len() {
181182
break;
182183
}
183184
}
184185

185186
/* done with this set; pop or repeat? */
186-
if ! r.cur.dotdotdoted
187+
if ! r.stack.dotdotdoted
187188
|| { *r.repeat_idx.last() == *r.repeat_len.last() - 1 } {
188189

189-
match r.cur.up {
190+
match r.stack.up {
190191
None => {
191192
r.cur_tok = EOF;
192193
return ret_val;
193194
}
194195
Some(tt_f) => {
195-
if r.cur.dotdotdoted {
196+
if r.stack.dotdotdoted {
196197
r.repeat_idx.pop();
197198
r.repeat_len.pop();
198199
}
199200

200-
r.cur = tt_f;
201-
r.cur.idx += 1u;
201+
r.stack = tt_f;
202+
r.stack.idx += 1u;
202203
}
203204
}
204205

205206
} else { /* repeat */
206-
r.cur.idx = 0u;
207+
r.stack.idx = 0u;
207208
r.repeat_idx[r.repeat_idx.len() - 1u] += 1u;
208-
match r.cur.sep {
209+
match r.stack.sep {
209210
Some(copy tk) => {
210211
r.cur_tok = tk; /* repeat same span, I guess */
211212
return ret_val;
@@ -216,21 +217,21 @@ pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan {
216217
}
217218
loop { /* because it's easiest, this handles `tt_delim` not starting
218219
with a `tt_tok`, even though it won't happen */
219-
match r.cur.readme[r.cur.idx] {
220+
match r.stack.forest[r.stack.idx] {
220221
tt_delim(copy tts) => {
221-
r.cur = @mut TtFrame {
222-
readme: @mut tts,
222+
r.stack = @mut TtFrame {
223+
forest: @mut tts,
223224
idx: 0u,
224225
dotdotdoted: false,
225226
sep: None,
226-
up: option::Some(r.cur)
227+
up: option::Some(r.stack)
227228
};
228229
// if this could be 0-length, we'd need to potentially recur here
229230
}
230231
tt_tok(sp, copy tok) => {
231232
r.cur_span = sp;
232233
r.cur_tok = tok;
233-
r.cur.idx += 1u;
234+
r.stack.idx += 1u;
234235
return ret_val;
235236
}
236237
tt_seq(sp, copy tts, copy sep, zerok) => {
@@ -256,17 +257,17 @@ pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan {
256257
once");
257258
}
258259

259-
r.cur.idx += 1u;
260+
r.stack.idx += 1u;
260261
return tt_next_token(r);
261262
} else {
262263
r.repeat_len.push(len);
263264
r.repeat_idx.push(0u);
264-
r.cur = @mut TtFrame {
265-
readme: @mut tts,
265+
r.stack = @mut TtFrame {
266+
forest: @mut tts,
266267
idx: 0u,
267268
dotdotdoted: true,
268269
sep: sep,
269-
up: Some(r.cur)
270+
up: Some(r.stack)
270271
};
271272
}
272273
}
@@ -280,13 +281,13 @@ pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan {
280281
(b) we actually can, since it's a token. */
281282
matched_nonterminal(nt_ident(sn,b)) => {
282283
r.cur_span = sp; r.cur_tok = IDENT(sn,b);
283-
r.cur.idx += 1u;
284+
r.stack.idx += 1u;
284285
return ret_val;
285286
}
286287
matched_nonterminal(ref other_whole_nt) => {
287288
r.cur_span = sp;
288289
r.cur_tok = INTERPOLATED(copy *other_whole_nt);
289-
r.cur.idx += 1u;
290+
r.stack.idx += 1u;
290291
return ret_val;
291292
}
292293
matched_seq(*) => {

src/libsyntax/parse/common.rs

+14-1
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,9 @@ pub impl Parser {
159159
}
160160
}
161161

162+
// if the given word is not a keyword, signal an error.
163+
// if the next token is the given keyword, eat it and return
164+
// true. Otherwise, return false.
162165
fn eat_keyword(&self, word: &~str) -> bool {
163166
self.require_keyword(word);
164167
let is_kw = match *self.token {
@@ -169,6 +172,9 @@ pub impl Parser {
169172
is_kw
170173
}
171174

175+
// if the given word is not a keyword, signal an error.
176+
// if the next token is not the given word, signal an error.
177+
// otherwise, eat it.
172178
fn expect_keyword(&self, word: &~str) {
173179
self.require_keyword(word);
174180
if !self.eat_keyword(word) {
@@ -182,10 +188,12 @@ pub impl Parser {
182188
}
183189
}
184190

191+
// return true if the given string is a strict keyword
185192
fn is_strict_keyword(&self, word: &~str) -> bool {
186193
self.strict_keywords.contains(word)
187194
}
188195

196+
// signal an error if the current token is a strict keyword
189197
fn check_strict_keywords(&self) {
190198
match *self.token {
191199
token::IDENT(_, false) => {
@@ -196,16 +204,19 @@ pub impl Parser {
196204
}
197205
}
198206

207+
// signal an error if the given string is a strict keyword
199208
fn check_strict_keywords_(&self, w: &~str) {
200209
if self.is_strict_keyword(w) {
201210
self.fatal(fmt!("found `%s` in ident position", *w));
202211
}
203212
}
204213

214+
// return true if this is a reserved keyword
205215
fn is_reserved_keyword(&self, word: &~str) -> bool {
206216
self.reserved_keywords.contains(word)
207217
}
208218

219+
// signal an error if the current token is a reserved keyword
209220
fn check_reserved_keywords(&self) {
210221
match *self.token {
211222
token::IDENT(_, false) => {
@@ -216,14 +227,16 @@ pub impl Parser {
216227
}
217228
}
218229

230+
// signal an error if the given string is a reserved keyword
219231
fn check_reserved_keywords_(&self, w: &~str) {
220232
if self.is_reserved_keyword(w) {
221233
self.fatal(fmt!("`%s` is a reserved keyword", *w));
222234
}
223235
}
224236

225237
// expect and consume a GT. if a >> is seen, replace it
226-
// with a single > and continue.
238+
// with a single > and continue. If a GT is not seen,
239+
// signal an error.
227240
fn expect_gt(&self) {
228241
if *self.token == token::GT {
229242
self.bump();

src/libsyntax/parse/lexer.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,8 @@ pub fn new_low_level_string_reader(span_diagnostic: @span_handler,
8080
last_pos: filemap.start_pos,
8181
col: CharPos(0),
8282
curr: initial_char,
83-
filemap: filemap, interner: itr,
83+
filemap: filemap,
84+
interner: itr,
8485
/* dummy values; not read */
8586
peek_tok: token::EOF,
8687
peek_span: codemap::dummy_sp()
@@ -150,6 +151,7 @@ impl reader for TtReader {
150151
}
151152

152153
// EFFECT: advance peek_tok and peek_span to refer to the next token.
154+
// EFFECT: update the interner, maybe.
153155
fn string_advance_token(r: @mut StringReader) {
154156
match (consume_whitespace_and_comments(r)) {
155157
Some(comment) => {
@@ -539,6 +541,9 @@ fn ident_continue(c: char) -> bool {
539541
|| (c > 'z' && char::is_XID_continue(c))
540542
}
541543

544+
// return the next token from the string
545+
// EFFECT: advances the input past that token
546+
// EFFECT: updates the interner
542547
fn next_token_inner(rdr: @mut StringReader) -> token::Token {
543548
let mut accum_str = ~"";
544549
let mut c = rdr.curr;

src/libsyntax/parse/mod.rs

+49-3
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,14 @@ pub mod classify;
4545
/// Reporting obsolete syntax
4646
pub mod obsolete;
4747

48+
// info about a parsing session.
49+
// This structure and the reader both have
50+
// an interner associated with them. If they're
51+
// not the same, bad things can happen.
4852
pub struct ParseSess {
49-
cm: @codemap::CodeMap,
53+
cm: @codemap::CodeMap, // better be the same as the one in the reader!
5054
next_id: node_id,
51-
span_diagnostic: @span_handler,
55+
span_diagnostic: @span_handler, // better be the same as the one in the reader!
5256
interner: @ident_interner,
5357
}
5458

@@ -90,6 +94,19 @@ pub fn parse_crate_from_file(
9094
// why is there no p.abort_if_errors here?
9195
}
9296

97+
pub fn parse_crate_from_file_using_tts(
98+
input: &Path,
99+
cfg: ast::crate_cfg,
100+
sess: @mut ParseSess
101+
) -> @ast::crate {
102+
let p = new_parser_from_file(sess, /*bad*/ copy cfg, input);
103+
let tts = p.parse_all_token_trees();
104+
new_parser_from_tts(sess,cfg,tts).parse_crate_mod(/*bad*/ copy cfg)
105+
// why is there no p.abort_if_errors here?
106+
}
107+
108+
109+
93110
pub fn parse_crate_from_source_str(
94111
name: ~str,
95112
source: @~str,
@@ -313,17 +330,46 @@ mod test {
313330
use std;
314331
use core::io;
315332
use core::option::None;
333+
use ast;
316334

317335
#[test] fn to_json_str<E : Encodable<std::json::Encoder>>(val: @E) -> ~str {
318336
do io::with_str_writer |writer| {
319337
val.encode(~std::json::Encoder(writer));
320338
}
321339
}
322340

341+
fn string_to_crate (source_str : @~str) -> @ast::crate {
342+
parse_crate_from_source_str(
343+
~"bogofile",
344+
source_str,
345+
~[],
346+
new_parse_sess(None))
347+
}
348+
349+
fn string_to_tt_to_crate (source_str : @~str) -> @ast::crate {
350+
let tts = parse_tts_from_source_str(
351+
~"bogofile",
352+
source_str,
353+
~[],
354+
new_parse_sess(None));
355+
new_parser_from_tts(new_parse_sess(None),~[],tts)
356+
.parse_crate_mod(~[])
357+
}
358+
359+
// make sure that parsing from TTs produces the same result
360+
// as parsing from strings
361+
#[test] fn tts_produce_the_same_result () {
362+
let source_str = @~"fn foo (x : int) { x; }";
363+
assert_eq!(string_to_tt_to_crate(source_str),
364+
string_to_crate(source_str));
365+
}
366+
367+
// check the contents of the tt manually:
323368
#[test] fn alltts () {
369+
let source_str = @~"fn foo (x : int) { x; }";
324370
let tts = parse_tts_from_source_str(
325371
~"bogofile",
326-
@~"fn foo (x : int) { x; }",
372+
source_str,
327373
~[],
328374
new_parse_sess(None));
329375
assert_eq!(

0 commit comments

Comments
 (0)