@@ -186,6 +186,7 @@ pub fn push_str_no_overallocate(lhs: &mut ~str, rhs: &str) {
186
186
raw:: set_len( lhs, llen + rlen) ;
187
187
}
188
188
}
189
+
189
190
/// Appends a string slice to the back of a string
190
191
#[ inline( always) ]
191
192
pub fn push_str ( lhs : & mut ~str , rhs : & str ) {
@@ -214,7 +215,6 @@ pub fn append(lhs: ~str, rhs: &str) -> ~str {
214
215
v
215
216
}
216
217
217
-
218
218
/// Concatenate a vector of strings
219
219
pub fn concat ( v : & [ ~str ] ) -> ~str {
220
220
let mut s: ~str = ~"";
@@ -435,35 +435,32 @@ pub fn slice(s: &'a str, begin: uint, end: uint) -> &'a str {
435
435
}
436
436
437
437
/// Splits a string into substrings at each occurrence of a given character
438
- pub fn each_split_char ( s : & str , sep : char , it : & fn ( & str ) -> bool ) {
438
+ pub fn each_split_char ( s : & ' a str , sep : char , it : & fn ( & ' a str ) -> bool ) {
439
439
each_split_char_inner ( s, sep, len ( s) , true , true , it)
440
440
}
441
441
442
- /**
443
- * Like `split_char`, but a trailing empty string is omitted
444
- * (e.g. `split_char_no_trailing("A B ",' ') == ~[~"A",~"B"]`)
445
- */
446
- pub fn each_split_char_no_trailing ( s : & str , sep : char , it : & fn ( & str ) -> bool ) {
442
+ /// Like `each_split_char`, but a trailing empty string is omitted
443
+ pub fn each_split_char_no_trailing ( s : & ' a str , sep : char , it : & fn ( & ' a str ) -> bool ) {
447
444
each_split_char_inner ( s, sep, len ( s) , true , false , it)
448
445
}
449
446
450
447
/**
451
448
* Splits a string into substrings at each occurrence of a given
452
449
* character up to 'count' times.
453
450
*
454
- * The byte must be a valid UTF-8/ASCII byte
451
+ * The character must be a valid UTF-8/ASCII character
455
452
*/
456
- pub fn each_splitn_char ( s : & str , sep : char , count : uint , it : & fn ( & str ) -> bool ) {
453
+ pub fn each_splitn_char ( s : & ' a str , sep : char , count : uint , it : & fn ( & ' a str ) -> bool ) {
457
454
each_split_char_inner ( s, sep, count, true , true , it)
458
455
}
459
456
460
- /// Like `split_char `, but omits empty strings from the returned vector
461
- pub fn each_split_char_nonempty ( s : & str , sep : char , it : & fn ( & str ) -> bool ) {
457
+ /// Like `each_split_char `, but omits empty strings
458
+ pub fn each_split_char_nonempty ( s : & ' a str , sep : char , it : & fn ( & ' a str ) -> bool ) {
462
459
each_split_char_inner ( s, sep, len ( s) , false , false , it)
463
460
}
464
461
465
- fn each_split_char_inner ( s : & str , sep : char , count : uint , allow_empty : bool ,
466
- allow_trailing_empty : bool , it : & fn ( & str ) -> bool ) {
462
+ fn each_split_char_inner ( s : & ' a str , sep : char , count : uint , allow_empty : bool ,
463
+ allow_trailing_empty : bool , it : & fn ( & ' a str ) -> bool ) {
467
464
if sep < 128 u as char {
468
465
let b = sep as u8 , l = len ( s) ;
469
466
let mut done = 0 u;
@@ -478,7 +475,7 @@ fn each_split_char_inner(s: &str, sep: char, count: uint, allow_empty: bool,
478
475
}
479
476
i += 1 u;
480
477
}
481
- // only push a non-empty trailing substring
478
+ // only slice a non-empty trailing substring
482
479
if allow_trailing_empty || start < l {
483
480
if !it ( unsafe { raw:: slice_bytes ( s, start, l) } ) { return ; }
484
481
}
@@ -488,33 +485,30 @@ fn each_split_char_inner(s: &str, sep: char, count: uint, allow_empty: bool,
488
485
}
489
486
490
487
/// Splits a string into substrings using a character function
491
- pub fn each_split ( s : & str , sepfn : & fn ( char ) -> bool , it : & fn ( & str ) -> bool ) {
488
+ pub fn each_split ( s : & ' a str , sepfn : & fn ( char ) -> bool , it : & fn ( & ' a str ) -> bool ) {
492
489
each_split_inner ( s, sepfn, len ( s) , true , true , it)
493
490
}
494
491
495
- /**
496
- * Like `split`, but a trailing empty string is omitted
497
- * (e.g. `split_no_trailing("A B ",' ') == ~[~"A",~"B"]`)
498
- */
499
- pub fn each_split_no_trailing ( s : & str , sepfn : & fn ( char ) -> bool , it : & fn ( & str ) -> bool ) {
492
+ /// Like `each_split`, but a trailing empty string is omitted
493
+ pub fn each_split_no_trailing ( s : & ' a str , sepfn : & fn ( char ) -> bool , it : & fn ( & ' a str ) -> bool ) {
500
494
each_split_inner ( s, sepfn, len ( s) , true , false , it)
501
495
}
502
496
503
497
/**
504
498
* Splits a string into substrings using a character function, cutting at
505
499
* most `count` times.
506
500
*/
507
- pub fn each_splitn ( s : & str , sepfn : & fn ( char ) -> bool , count : uint , it : & fn ( & str ) -> bool ) {
501
+ pub fn each_splitn ( s : & ' a str , sepfn : & fn ( char ) -> bool , count : uint , it : & fn ( & ' a str ) -> bool ) {
508
502
each_split_inner ( s, sepfn, count, true , true , it)
509
503
}
510
504
511
- /// Like `split `, but omits empty strings from the returned vector
512
- pub fn each_split_nonempty ( s : & str , sepfn : & fn ( char ) -> bool , it : & fn ( & str ) -> bool ) {
505
+ /// Like `each_split `, but omits empty strings
506
+ pub fn each_split_nonempty ( s : & ' a str , sepfn : & fn ( char ) -> bool , it : & fn ( & ' a str ) -> bool ) {
513
507
each_split_inner ( s, sepfn, len ( s) , false , false , it)
514
508
}
515
509
516
- fn each_split_inner ( s : & str , sepfn : & fn ( cc : char ) -> bool , count : uint ,
517
- allow_empty : bool , allow_trailing_empty : bool , it : & fn ( & str ) -> bool ) {
510
+ fn each_split_inner ( s : & ' a str , sepfn : & fn ( cc : char ) -> bool , count : uint ,
511
+ allow_empty : bool , allow_trailing_empty : bool , it : & fn ( & ' a str ) -> bool ) {
518
512
let l = len ( s) ;
519
513
let mut i = 0 u, start = 0 u, done = 0 u;
520
514
while i < l && done < count {
@@ -576,16 +570,18 @@ fn iter_between_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint) -> bool) {
576
570
* # Example
577
571
*
578
572
* ~~~
579
- * fail_unless!(["", "XXX", "YYY", ""] == split_str(".XXX.YYY.", "."))
573
+ * let mut v = ~[];
574
+ * for each_split_str(".XXX.YYY.", ".") |subs| { v.push(subs); }
575
+ * fail_unless!(v == ["", "XXX", "YYY", ""]);
580
576
* ~~~
581
577
*/
582
- pub fn each_split_str ( s : & ' a str , sep : & ' b str , it : & fn ( & str ) -> bool ) {
578
+ pub fn each_split_str ( s : & ' a str , sep : & ' b str , it : & fn ( & ' a str ) -> bool ) {
583
579
for iter_between_matches( s, sep) |from, to| {
584
580
if !it ( unsafe { raw:: slice_bytes ( s, from, to) } ) { return ; }
585
581
}
586
582
}
587
583
588
- pub fn each_split_str_nonempty ( s : & ' a str , sep : & ' b str , it : & fn ( & str ) -> bool ) {
584
+ pub fn each_split_str_nonempty ( s : & ' a str , sep : & ' b str , it : & fn ( & ' a str ) -> bool ) {
589
585
for iter_between_matches( s, sep) |from, to| {
590
586
if to > from {
591
587
if !it ( unsafe { raw:: slice_bytes ( s, from, to) } ) { return ; }
@@ -628,15 +624,17 @@ pub fn levdistance(s: &str, t: &str) -> uint {
628
624
}
629
625
630
626
/**
631
- * Splits a string into a vector of the substrings separated by LF ('\n').
627
+ * Splits a string into substrings separated by LF ('\n').
632
628
*/
633
- pub fn each_line ( s : & str , it : & fn ( & str ) -> bool ) { each_split_char_no_trailing ( s, '\n' , it) }
629
+ pub fn each_line ( s : & ' a str , it : & fn ( & ' a str ) -> bool ) {
630
+ each_split_char_no_trailing ( s, '\n' , it)
631
+ }
634
632
635
633
/**
636
- * Splits a string into a vector of the substrings separated by LF ('\n')
634
+ * Splits a string into substrings separated by LF ('\n')
637
635
* and/or CR LF ("\r\n")
638
636
*/
639
- pub fn each_line_any ( s : & str , it : & fn ( & str ) -> bool ) {
637
+ pub fn each_line_any ( s : & ' a str , it : & fn ( & ' a str ) -> bool ) {
640
638
for each_line( s) |s| {
641
639
let l = s. len ( ) ;
642
640
if l > 0 u && s[ l - 1 u] == '\r' as u8 {
@@ -647,33 +645,46 @@ pub fn each_line_any(s: &str, it: &fn(&str) -> bool) {
647
645
}
648
646
}
649
647
650
- /// Splits a string into a vector of the substrings separated by whitespace
651
- pub fn each_word ( s : & str , it : & fn ( & str ) -> bool ) {
652
- each_split_nonempty ( s, |c| char:: is_whitespace ( c ) , it)
648
+ /// Splits a string into substrings separated by whitespace
649
+ pub fn each_word ( s : & ' a str , it : & fn ( & ' a str ) -> bool ) {
650
+ each_split_nonempty ( s, char:: is_whitespace, it)
653
651
}
654
652
655
- /** Split a string into a vector of substrings,
656
- * each of which is less bytes long than a limit
653
+ /** Splits a string into substrings with possibly internal whitespace,
654
+ * each of them at most `lim` bytes long. The substrings have leading and trailing
655
+ * whitespace removed, and are only cut at whitespace boundaries.
656
+ *
657
+ * #Failure:
658
+ *
659
+ * Fails during iteration if the string contains a non-whitespace
660
+ * sequence longer than the limit.
657
661
*/
658
- pub fn each_split_within( ss : & str , lim : uint , it : & fn ( & str ) -> bool ) {
659
- // Just for fun, let's write this as an automaton
662
+ pub fn each_split_within( ss : & ' a str , lim : uint , it : & fn ( & ' a str ) -> bool ) {
663
+ // Just for fun, let's write this as an state machine:
664
+
660
665
enum SplitWithinState {
661
- A , // Leading whitespace, initial state
662
- B , // Words
663
- C , // Internal and trailing whitespace
666
+ A , // leading whitespace, initial state
667
+ B , // words
668
+ C , // internal and trailing whitespace
669
+ }
670
+ enum Whitespace {
671
+ Ws , // current char is whitespace
672
+ Cr // current char is not whitespace
673
+ }
674
+ enum LengthLimit {
675
+ UnderLim , // current char makes current substring still fit in limit
676
+ OverLim // current char makes current substring no longer fit in limit
664
677
}
665
- enum Whitespace { Ws , Cr }
666
- enum LengthLimit { UnderLim , OverLim }
667
678
668
679
let mut slice_start = 0 ;
669
680
let mut last_start = 0 ;
670
681
let mut last_end = 0 ;
671
682
let mut state = A ;
672
683
673
684
let mut cont = true ;
674
- let slice = || { cont = it ( ss . slice ( slice_start, last_end) ) } ;
685
+ let slice: & fn ( ) = || { cont = it ( slice ( ss , slice_start, last_end) ) } ;
675
686
676
- let machine = |i : uint , c : char | {
687
+ let machine: & fn ( uint , char ) -> bool = |i, c| {
677
688
let whitespace = if char:: is_whitespace ( c) { Ws } else { Cr } ;
678
689
let limit = if ( i - slice_start + 1 ) <= lim { UnderLim } else { OverLim } ;
679
690
@@ -693,12 +704,13 @@ pub fn each_split_within(ss: &str, lim: uint, it: &fn(&str) -> bool) {
693
704
(C, Ws, OverLim) => { slice(); A }
694
705
(C, Ws, UnderLim) => { C }
695
706
};
707
+
696
708
cont
697
709
};
698
710
699
711
str::each_chari(ss, machine);
700
712
701
- // Let the automaton 'run out'
713
+ // Let the automaton 'run out' by supplying trailing whitespace
702
714
let mut fake_i = ss.len();
703
715
while cont && match state { B | C => true, A => false } {
704
716
machine(fake_i, ' ');
@@ -1186,8 +1198,7 @@ pub fn rfind_char_from(s: &str, c: char, start: uint) -> Option<uint> {
1186
1198
* or equal to `len(s)`. `start` must be the index of a character boundary,
1187
1199
* as defined by `is_char_boundary`.
1188
1200
*/
1189
- pub fn rfind_char_between(s: &str, c: char, start: uint, end: uint)
1190
- -> Option<uint> {
1201
+ pub fn rfind_char_between(s: &str, c: char, start: uint, end: uint) -> Option<uint> {
1191
1202
if c < 128u as char {
1192
1203
fail_unless!(start >= end);
1193
1204
fail_unless!(start <= len(s));
@@ -1268,11 +1279,7 @@ pub fn find_from(s: &str, start: uint, f: &fn(char)
1268
1279
* or equal to `len(s)`. `start` must be the index of a character
1269
1280
* boundary, as defined by `is_char_boundary`.
1270
1281
*/
1271
- pub fn find_between(s: &str,
1272
- start: uint,
1273
- end: uint,
1274
- f: &fn(char) -> bool)
1275
- -> Option<uint> {
1282
+ pub fn find_between(s: &str, start: uint, end: uint, f: &fn(char) -> bool) -> Option<uint> {
1276
1283
fail_unless!(start <= end);
1277
1284
fail_unless!(end <= len(s));
1278
1285
fail_unless!(is_char_boundary(s, start));
@@ -1323,8 +1330,7 @@ pub fn rfind(s: &str, f: &fn(char) -> bool) -> Option<uint> {
1323
1330
* `start` must be less than or equal to `len(s)', `start` must be the
1324
1331
* index of a character boundary, as defined by `is_char_boundary`
1325
1332
*/
1326
- pub fn rfind_from(s: &str, start: uint, f: &fn(char) -> bool)
1327
- -> Option<uint> {
1333
+ pub fn rfind_from(s: &str, start: uint, f: &fn(char) -> bool) -> Option<uint> {
1328
1334
rfind_between(s, start, 0u, f)
1329
1335
}
1330
1336
@@ -1350,9 +1356,7 @@ pub fn rfind_from(s: &str, start: uint, f: &fn(char) -> bool)
1350
1356
* than or equal to `len(s)`. `start` must be the index of a character
1351
1357
* boundary, as defined by `is_char_boundary`
1352
1358
*/
1353
- pub fn rfind_between(s: &str, start: uint, end: uint,
1354
- f: &fn(char) -> bool)
1355
- -> Option<uint> {
1359
+ pub fn rfind_between(s: &str, start: uint, end: uint, f: &fn(char) -> bool) -> Option<uint> {
1356
1360
fail_unless!(start >= end);
1357
1361
fail_unless!(start <= len(s));
1358
1362
fail_unless!(is_char_boundary(s, start));
@@ -1408,8 +1412,7 @@ pub fn find_str(haystack: &'a str, needle: &'b str) -> Option<uint> {
1408
1412
*
1409
1413
* `start` must be less than or equal to `len(s)`
1410
1414
*/
1411
- pub fn find_str_from(haystack: &'a str, needle: &'b str, start: uint)
1412
- -> Option<uint> {
1415
+ pub fn find_str_from(haystack: &'a str, needle: &'b str, start: uint) -> Option<uint> {
1413
1416
find_str_between(haystack, needle, start, len(haystack))
1414
1417
}
1415
1418
@@ -1433,9 +1436,8 @@ pub fn find_str_from(haystack: &'a str, needle: &'b str, start: uint)
1433
1436
* `start` must be less than or equal to `end` and `end` must be less than
1434
1437
* or equal to `len(s)`.
1435
1438
*/
1436
- pub fn find_str_between(haystack: &'a str, needle: &'b str, start: uint,
1437
- end:uint)
1438
- -> Option<uint> {
1439
+ pub fn find_str_between(haystack: &'a str, needle: &'b str, start: uint, end:uint)
1440
+ -> Option<uint> {
1439
1441
// See Issue #1932 for why this is a naive search
1440
1442
fail_unless!(end <= len(haystack));
1441
1443
let needle_len = len(needle);
@@ -1638,7 +1640,6 @@ pub fn utf16_chars(v: &[u16], f: &fn(char)) {
1638
1640
}
1639
1641
}
1640
1642
1641
-
1642
1643
pub fn from_utf16(v: &[u16]) -> ~str {
1643
1644
let mut buf = ~" ";
1644
1645
unsafe {
@@ -1955,7 +1956,6 @@ pub fn as_c_str<T>(s: &str, f: &fn(*libc::c_char) -> T) -> T {
1955
1956
}
1956
1957
}
1957
1958
1958
-
1959
1959
/**
1960
1960
* Work with the byte buffer and length of a slice.
1961
1961
*
0 commit comments