Skip to content

Commit 4893450

Browse files
committed
Separate a scan_raw_string (similar raw_byte variant)
1 parent 30ab82e commit 4893450

File tree

1 file changed

+82
-77
lines changed
  • src/libsyntax/parse/lexer

1 file changed

+82
-77
lines changed

src/libsyntax/parse/lexer/mod.rs

Lines changed: 82 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1086,82 +1086,10 @@ impl<'a> StringReader<'a> {
10861086
Ok(TokenKind::lit(token::Str, symbol, suffix))
10871087
}
10881088
'r' => {
1089-
let start_bpos = self.pos;
1090-
self.bump();
1091-
let mut hash_count: u16 = 0;
1092-
while self.ch_is('#') {
1093-
if hash_count == 65535 {
1094-
let bpos = self.next_pos;
1095-
self.fatal_span_(start_bpos,
1096-
bpos,
1097-
"too many `#` symbols: raw strings may be \
1098-
delimited by up to 65535 `#` symbols").raise();
1099-
}
1100-
self.bump();
1101-
hash_count += 1;
1102-
}
1103-
1104-
if self.is_eof() {
1105-
self.fail_unterminated_raw_string(start_bpos, hash_count);
1106-
} else if !self.ch_is('"') {
1107-
let last_bpos = self.pos;
1108-
let curr_char = self.ch.unwrap();
1109-
self.fatal_span_char(start_bpos,
1110-
last_bpos,
1111-
"found invalid character; only `#` is allowed \
1112-
in raw string delimitation",
1113-
curr_char).raise();
1114-
}
1115-
self.bump();
1116-
let content_start_bpos = self.pos;
1117-
let mut content_end_bpos;
1118-
let mut valid = true;
1119-
'outer: loop {
1120-
if self.is_eof() {
1121-
self.fail_unterminated_raw_string(start_bpos, hash_count);
1122-
}
1123-
// if self.ch_is('"') {
1124-
// content_end_bpos = self.pos;
1125-
// for _ in 0..hash_count {
1126-
// self.bump();
1127-
// if !self.ch_is('#') {
1128-
// continue 'outer;
1129-
let c = self.ch.unwrap();
1130-
match c {
1131-
'"' => {
1132-
content_end_bpos = self.pos;
1133-
for _ in 0..hash_count {
1134-
self.bump();
1135-
if !self.ch_is('#') {
1136-
continue 'outer;
1137-
}
1138-
}
1139-
break;
1140-
}
1141-
'\r' => {
1142-
if !self.nextch_is('\n') {
1143-
let last_bpos = self.pos;
1144-
self.err_span_(start_bpos,
1145-
last_bpos,
1146-
"bare CR not allowed in raw string, use \\r \
1147-
instead");
1148-
valid = false;
1149-
}
1150-
}
1151-
_ => (),
1152-
}
1153-
self.bump();
1154-
}
1155-
1156-
self.bump();
1157-
let symbol = if valid {
1158-
self.name_from_to(content_start_bpos, content_end_bpos)
1159-
} else {
1160-
Symbol::intern("??")
1161-
};
1089+
let (kind, symbol) = self.scan_raw_string();
11621090
let suffix = self.scan_optional_raw_name();
11631091

1164-
Ok(TokenKind::lit(token::StrRaw(hash_count), symbol, suffix))
1092+
Ok(TokenKind::lit(kind, symbol, suffix))
11651093
}
11661094
'-' => {
11671095
if self.nextch_is('>') {
@@ -1315,6 +1243,83 @@ impl<'a> StringReader<'a> {
13151243
id
13161244
}
13171245

1246+
fn scan_raw_string(&mut self) -> (token::LitKind, Symbol) {
1247+
let start_bpos = self.pos;
1248+
self.bump();
1249+
let mut hash_count: u16 = 0;
1250+
while self.ch_is('#') {
1251+
if hash_count == 65535 {
1252+
let bpos = self.next_pos;
1253+
self.fatal_span_(start_bpos,
1254+
bpos,
1255+
"too many `#` symbols: raw strings may be \
1256+
delimited by up to 65535 `#` symbols").raise();
1257+
}
1258+
self.bump();
1259+
hash_count += 1;
1260+
}
1261+
1262+
if self.is_eof() {
1263+
self.fail_unterminated_raw_string(start_bpos, hash_count);
1264+
} else if !self.ch_is('"') {
1265+
let last_bpos = self.pos;
1266+
let curr_char = self.ch.unwrap();
1267+
self.fatal_span_char(start_bpos,
1268+
last_bpos,
1269+
"found invalid character; only `#` is allowed \
1270+
in raw string delimitation",
1271+
curr_char).raise();
1272+
}
1273+
self.bump();
1274+
let content_start_bpos = self.pos;
1275+
let mut content_end_bpos;
1276+
let mut valid = true;
1277+
'outer: loop {
1278+
// if self.ch_is('"') {
1279+
// content_end_bpos = self.pos;
1280+
// for _ in 0..hash_count {
1281+
// self.bump();
1282+
// if !self.ch_is('#') {
1283+
// continue 'outer;
1284+
match self.ch {
1285+
None => {
1286+
self.fail_unterminated_raw_string(start_bpos, hash_count);
1287+
}
1288+
Some('"') => {
1289+
content_end_bpos = self.pos;
1290+
for _ in 0..hash_count {
1291+
self.bump();
1292+
if !self.ch_is('#') {
1293+
continue 'outer;
1294+
}
1295+
}
1296+
break;
1297+
}
1298+
Some(c) => {
1299+
if c == '\r' && !self.nextch_is('\n') {
1300+
let last_bpos = self.pos;
1301+
self.err_span_(start_bpos,
1302+
last_bpos,
1303+
"bare CR not allowed in raw string, use \\r \
1304+
instead");
1305+
valid = false;
1306+
}
1307+
}
1308+
}
1309+
self.bump();
1310+
}
1311+
1312+
self.bump();
1313+
1314+
let symbol = if valid {
1315+
self.name_from_to(content_start_bpos, content_end_bpos)
1316+
} else {
1317+
Symbol::intern("??")
1318+
};
1319+
1320+
(token::StrRaw(hash_count), symbol)
1321+
}
1322+
13181323
fn scan_raw_byte_string(&mut self) -> (token::LitKind, Symbol) {
13191324
let start_bpos = self.pos;
13201325
self.bump();
@@ -1324,7 +1329,7 @@ impl<'a> StringReader<'a> {
13241329
let bpos = self.next_pos;
13251330
self.fatal_span_(start_bpos,
13261331
bpos,
1327-
"too many `#` symbols: raw byte strings may be \
1332+
"too many `#` symbols: raw strings may be \
13281333
delimited by up to 65535 `#` symbols").raise();
13291334
}
13301335
self.bump();
@@ -1334,8 +1339,8 @@ impl<'a> StringReader<'a> {
13341339
if self.is_eof() {
13351340
self.fail_unterminated_raw_string(start_bpos, hash_count);
13361341
} else if !self.ch_is('"') {
1337-
let pos = self.pos;
1338-
let ch = self.ch.unwrap();
1342+
let last_bpos = self.pos;
1343+
let curr_char = self.ch.unwrap();
13391344
self.fatal_span_char(start_bpos,
13401345
pos,
13411346
"found invalid character; only `#` is allowed in raw \

0 commit comments

Comments
 (0)