@@ -14,7 +14,7 @@ use parse::new_parser_from_source_str;
14
14
use parse:: parser:: Parser ;
15
15
use parse:: token;
16
16
use ptr:: P ;
17
- use str :: char_at ;
17
+ use std :: iter :: Peekable ;
18
18
19
19
/// Map a string to tts, using a made-up filename:
20
20
pub fn string_to_tts ( source_str : String ) -> Vec < ast:: TokenTree > {
@@ -87,57 +87,55 @@ pub fn strs_to_idents(ids: Vec<&str> ) -> Vec<ast::Ident> {
87
87
88
88
/// Does the given string match the pattern? whitespace in the first string
89
89
/// may be deleted or replaced with other whitespace to match the pattern.
90
- /// this function is Unicode-ignorant; fortunately, the careful design of
91
- /// UTF-8 mitigates this ignorance. In particular, this function only collapses
92
- /// sequences of \n, \r, ' ', and \t, but it should otherwise tolerate Unicode
93
- /// chars. Unsurprisingly, it doesn't do NKF-normalization(?).
90
+ /// This function is relatively Unicode-ignorant; fortunately, the careful design
91
+ /// of UTF-8 mitigates this ignorance. It doesn't do NKF-normalization(?).
94
92
pub fn matches_codepattern ( a : & str , b : & str ) -> bool {
95
- let mut idx_a = 0 ;
96
- let mut idx_b = 0 ;
93
+ let mut a_iter = a. chars ( ) . peekable ( ) ;
94
+ let mut b_iter = b. chars ( ) . peekable ( ) ;
95
+
97
96
loop {
98
- if idx_a == a. len ( ) && idx_b == b. len ( ) {
99
- return true ;
100
- }
101
- else if idx_a == a. len ( ) { return false ; }
102
- else if idx_b == b. len ( ) {
103
- // maybe the stuff left in a is all ws?
104
- if is_whitespace ( char_at ( a, idx_a) ) {
105
- return scan_for_non_ws_or_end ( a, idx_a) == a. len ( ) ;
106
- } else {
107
- return false ;
97
+ let ( a, b) = match ( a_iter. peek ( ) , b_iter. peek ( ) ) {
98
+ ( None , None ) => return true ,
99
+ ( None , _) => return false ,
100
+ ( Some ( a) , None ) => {
101
+ if a. is_whitespace ( ) {
102
+ break // trailing whitespace check is out of loop for borrowck
103
+ } else {
104
+ return false
105
+ }
108
106
}
109
- }
110
- // ws in both given and pattern:
111
- else if is_whitespace ( char_at ( a, idx_a) )
112
- && is_whitespace ( char_at ( b, idx_b) ) {
113
- idx_a = scan_for_non_ws_or_end ( a, idx_a) ;
114
- idx_b = scan_for_non_ws_or_end ( b, idx_b) ;
115
- }
116
- // ws in given only:
117
- else if is_whitespace ( char_at ( a, idx_a) ) {
118
- idx_a = scan_for_non_ws_or_end ( a, idx_a) ;
119
- }
120
- // *don't* silently eat ws in expected only.
121
- else if char_at ( a, idx_a) == char_at ( b, idx_b) {
122
- idx_a += 1 ;
123
- idx_b += 1 ;
124
- }
125
- else {
126
- return false ;
107
+ ( Some ( & a) , Some ( & b) ) => ( a, b)
108
+ } ;
109
+
110
+ if a. is_whitespace ( ) && b. is_whitespace ( ) {
111
+ // skip whitespace for a and b
112
+ scan_for_non_ws_or_end ( & mut a_iter) ;
113
+ scan_for_non_ws_or_end ( & mut b_iter) ;
114
+ } else if a. is_whitespace ( ) {
115
+ // skip whitespace for a
116
+ scan_for_non_ws_or_end ( & mut a_iter) ;
117
+ } else if a == b {
118
+ a_iter. next ( ) ;
119
+ b_iter. next ( ) ;
120
+ } else {
121
+ return false
127
122
}
128
123
}
124
+
125
+ // check if a has *only* trailing whitespace
126
+ a_iter. all ( |c| c. is_whitespace ( ) )
129
127
}
130
128
131
- /// Given a string and an index, return the first usize >= idx
132
- /// that is a non-ws-char or is outside of the legal range of
133
- /// the string.
134
- fn scan_for_non_ws_or_end ( a : & str , idx : usize ) -> usize {
135
- let mut i = idx;
136
- let len = a. len ( ) ;
137
- while ( i < len) && ( is_whitespace ( char_at ( a, i) ) ) {
138
- i += 1 ;
129
+ /// Advances the given peekable `Iterator` until it reaches a non-whitespace character
130
+ fn scan_for_non_ws_or_end < I : Iterator < Item = char > > ( iter : & mut Peekable < I > ) {
131
+ loop {
132
+ match iter. peek ( ) {
133
+ Some ( c) if c. is_whitespace ( ) => { } // fall through; borrowck
134
+ _ => return
135
+ }
136
+
137
+ iter. next ( ) ;
139
138
}
140
- i
141
139
}
142
140
143
141
pub fn is_whitespace ( c : char ) -> bool {
@@ -148,7 +146,8 @@ pub fn is_whitespace(c: char) -> bool {
148
146
mod tests {
149
147
use super :: * ;
150
148
151
- #[ test] fn eqmodws ( ) {
149
+ #[ test]
150
+ fn eqmodws ( ) {
152
151
assert_eq ! ( matches_codepattern( "" , "" ) , true ) ;
153
152
assert_eq ! ( matches_codepattern( "" , "a" ) , false ) ;
154
153
assert_eq ! ( matches_codepattern( "a" , "" ) , false ) ;
@@ -159,5 +158,18 @@ mod tests {
159
158
assert_eq ! ( matches_codepattern( "a b" , "a b" ) , true ) ;
160
159
assert_eq ! ( matches_codepattern( "ab" , "a b" ) , false ) ;
161
160
assert_eq ! ( matches_codepattern( "a b" , "ab" ) , true ) ;
161
+ assert_eq ! ( matches_codepattern( " a b" , "ab" ) , true ) ;
162
+ }
163
+
164
+ #[ test]
165
+ fn more_whitespace ( ) {
166
+ assert_eq ! ( matches_codepattern( "" , "\x0C " ) , false ) ;
167
+ assert_eq ! ( matches_codepattern( "a b" , "a\u{2002} b" ) , true ) ;
168
+ assert_eq ! ( matches_codepattern( "a b " , "a \u{0085} \n \t \r b" ) , true ) ;
169
+ assert_eq ! ( matches_codepattern( "a b" , "a \u{0085} \n \t \r b " ) , false ) ;
170
+ assert_eq ! ( matches_codepattern( "a b" , "a\u{2002} b" ) , true ) ;
171
+ assert_eq ! ( matches_codepattern( "ab" , "a\u{2003} b" ) , false ) ;
172
+ assert_eq ! ( matches_codepattern( "a \u{3000} b" , "ab" ) , true ) ;
173
+ assert_eq ! ( matches_codepattern( "\u{205F} a b" , "ab" ) , true ) ;
162
174
}
163
175
}
0 commit comments