@@ -55,35 +55,32 @@ Section: Creating a string
55
55
*
56
56
* Raises the `not_utf8` condition if invalid UTF-8
57
57
*/
58
-
59
- pub fn from_utf8 ( vv : & [ u8 ] ) -> ~str {
58
+ pub fn from_utf8 ( v : ~[ u8 ] ) -> ~str {
60
59
use str:: not_utf8:: cond;
61
60
62
- if !is_utf8 ( vv ) {
63
- let first_bad_byte = vec:: find ( vv , |b| !is_utf8 ( [ * b] ) ) . get ( ) ;
61
+ if !is_utf8 ( v ) {
62
+ let first_bad_byte = vec:: find ( v , |b| !is_utf8 ( [ * b] ) ) . get ( ) ;
64
63
cond. raise ( fmt ! ( "from_utf8: input is not UTF-8; first bad byte is %u" ,
65
64
first_bad_byte as uint) )
66
65
}
67
66
else {
68
- return unsafe { raw:: from_utf8 ( vv ) }
67
+ return unsafe { raw:: from_utf8 ( v ) }
69
68
}
70
69
}
71
70
72
71
/**
73
72
* Convert a vector of bytes to a UTF-8 string.
74
73
* The vector needs to be one byte longer than the string, and end with a 0 byte.
75
74
*
76
- * Compared to `from_utf8()`, this fn doesn't need to allocate a new owned str.
77
- *
78
75
* # Failure
79
76
*
80
77
* Fails if invalid UTF-8
81
78
* Fails if not null terminated
82
79
*/
83
- pub fn from_utf8_with_null < ' a > ( vv : & ' a [ u8 ] ) -> & ' a str {
84
- assert_eq ! ( vv [ vv . len( ) - 1 ] , 0 ) ;
85
- assert ! ( is_utf8( vv ) ) ;
86
- return unsafe { raw:: from_utf8_with_null ( vv ) } ;
80
+ pub fn from_utf8_with_null ( v : ~ [ u8 ] ) -> ~ str {
81
+ assert_eq ! ( v [ v . len( ) - 1 ] , 0 ) ;
82
+ assert ! ( is_utf8( v ) ) ;
83
+ unsafe { raw:: from_utf8_with_null ( v ) }
87
84
}
88
85
89
86
/**
@@ -783,16 +780,14 @@ pub mod raw {
783
780
}
784
781
785
782
/// Converts a vector of bytes to a new owned string.
786
- pub unsafe fn from_utf8 ( v : & const [ u8 ] ) -> ~str {
787
- do vec :: as_const_buf ( v ) |buf , len| {
788
- from_buf_len ( buf , len )
789
- }
783
+ pub unsafe fn from_utf8 ( mut v : ~ [ u8 ] ) -> ~str {
784
+ // Make sure the string is NULL terminated.
785
+ v . push ( 0 ) ;
786
+ from_utf8_with_null ( v )
790
787
}
791
788
792
- /// Converts a vector of bytes to a string.
793
- /// The byte slice needs to contain valid utf8 and needs to be one byte longer than
794
- /// the string, if possible ending in a 0 byte.
795
- pub unsafe fn from_utf8_with_null < ' a > ( v : & ' a [ u8 ] ) -> & ' a str {
789
+ /// Converts a vector of bytes with a trailing null to a new owned string.
790
+ pub unsafe fn from_utf8_with_null ( v : ~[ u8 ] ) -> ~str {
796
791
cast:: transmute ( v)
797
792
}
798
793
@@ -811,7 +806,7 @@ pub mod raw {
811
806
}
812
807
813
808
/// Converts a byte to a string.
814
- pub unsafe fn from_byte ( u : u8 ) -> ~str { raw:: from_utf8 ( [ u ] ) }
809
+ pub unsafe fn from_byte ( u : u8 ) -> ~str { raw:: from_utf8_with_null ( ~ [ u , 0 ] ) }
815
810
816
811
/// Form a slice from a C string. Unsafe because the caller must ensure the
817
812
/// C string has the static lifetime, or else the return value may be
@@ -2247,17 +2242,19 @@ impl Zero for @str {
2247
2242
2248
2243
#[cfg(test)]
2249
2244
mod tests {
2250
- use iterator::IteratorUtil;
2245
+ use super::*;
2246
+ use char;
2247
+ use cmp::{TotalOrd, Less, Equal, Greater};
2251
2248
use container::Container;
2252
- use option::Some ;
2249
+ use iterator::IteratorUtil ;
2253
2250
use libc::c_char;
2254
2251
use libc;
2255
2252
use old_iter::BaseIter;
2253
+ use option::Some;
2256
2254
use ptr;
2257
- use str::*;
2258
- use vec;
2255
+ use uint;
2259
2256
use vec::{ImmutableVector, CopyableVector};
2260
- use cmp::{TotalOrd, Less, Equal, Greater} ;
2257
+ use vec ;
2261
2258
2262
2259
#[test]
2263
2260
fn test_eq() {
@@ -2777,15 +2774,15 @@ mod tests {
2777
2774
fn test_from_utf8() {
2778
2775
let ss = ~" ศไทย中华Việt Nam ";
2779
2776
let bb = ~[0xe0_u8, 0xb8_u8, 0xa8_u8,
2780
- 0xe0_u8, 0xb9_u8, 0x84_u8,
2781
- 0xe0_u8, 0xb8_u8, 0x97_u8,
2782
- 0xe0_u8, 0xb8_u8, 0xa2_u8,
2783
- 0xe4_u8, 0xb8_u8, 0xad_u8,
2784
- 0xe5_u8, 0x8d_u8, 0x8e_u8,
2785
- 0x56_u8, 0x69_u8, 0xe1_u8,
2786
- 0xbb_u8, 0x87_u8, 0x74_u8,
2787
- 0x20_u8, 0x4e_u8, 0x61_u8,
2788
- 0x6d_u8];
2777
+ 0xe0_u8, 0xb9_u8, 0x84_u8,
2778
+ 0xe0_u8, 0xb8_u8, 0x97_u8,
2779
+ 0xe0_u8, 0xb8_u8, 0xa2_u8,
2780
+ 0xe4_u8, 0xb8_u8, 0xad_u8,
2781
+ 0xe5_u8, 0x8d_u8, 0x8e_u8,
2782
+ 0x56_u8, 0x69_u8, 0xe1_u8,
2783
+ 0xbb_u8, 0x87_u8, 0x74_u8,
2784
+ 0x20_u8, 0x4e_u8, 0x61_u8,
2785
+ 0x6d_u8];
2789
2786
2790
2787
assert_eq!(ss, from_utf8(bb));
2791
2788
}
@@ -2795,48 +2792,48 @@ mod tests {
2795
2792
fn test_from_utf8_fail() {
2796
2793
use str::not_utf8::cond;
2797
2794
2798
- let bb = ~[0xff_u8, 0xb8_u8, 0xa8_u8,
2799
- 0xe0_u8, 0xb9_u8, 0x84_u8,
2800
- 0xe0_u8, 0xb8_u8, 0x97_u8,
2801
- 0xe0_u8, 0xb8_u8, 0xa2_u8,
2802
- 0xe4_u8, 0xb8_u8, 0xad_u8,
2803
- 0xe5_u8, 0x8d_u8, 0x8e_u8,
2804
- 0x56_u8, 0x69_u8, 0xe1_u8,
2805
- 0xbb_u8, 0x87_u8, 0x74_u8,
2806
- 0x20_u8, 0x4e_u8, 0x61_u8,
2807
- 0x6d_u8];
2808
-
2809
2795
let mut error_happened = false;
2810
2796
let _x = do cond.trap(|err| {
2811
2797
assert_eq!(err, ~" from_utf8: input is not UTF -8 ; first bad byte is 255 ");
2812
2798
error_happened = true;
2813
2799
~" "
2814
2800
}).in {
2801
+ let bb = ~[0xff_u8, 0xb8_u8, 0xa8_u8,
2802
+ 0xe0_u8, 0xb9_u8, 0x84_u8,
2803
+ 0xe0_u8, 0xb8_u8, 0x97_u8,
2804
+ 0xe0_u8, 0xb8_u8, 0xa2_u8,
2805
+ 0xe4_u8, 0xb8_u8, 0xad_u8,
2806
+ 0xe5_u8, 0x8d_u8, 0x8e_u8,
2807
+ 0x56_u8, 0x69_u8, 0xe1_u8,
2808
+ 0xbb_u8, 0x87_u8, 0x74_u8,
2809
+ 0x20_u8, 0x4e_u8, 0x61_u8,
2810
+ 0x6d_u8];
2811
+
2815
2812
from_utf8(bb)
2816
2813
};
2817
2814
assert!(error_happened);
2818
2815
}
2819
2816
2820
2817
#[test]
2821
2818
fn test_unsafe_from_utf8_with_null() {
2822
- let a = [65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
2819
+ let a = ~ [65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
2823
2820
let b = unsafe { raw::from_utf8_with_null(a) };
2824
- assert_eq!(b, " AAAAAAA ");
2821
+ assert_eq!(b, ~ " AAAAAAA ");
2825
2822
}
2826
2823
2827
2824
#[test]
2828
2825
fn test_from_utf8_with_null() {
2829
- let ss = " ศไทย中华Việt Nam ";
2830
- let bb = [0xe0_u8, 0xb8_u8, 0xa8_u8,
2831
- 0xe0_u8, 0xb9_u8, 0x84_u8,
2832
- 0xe0_u8, 0xb8_u8, 0x97_u8,
2833
- 0xe0_u8, 0xb8_u8, 0xa2_u8,
2834
- 0xe4_u8, 0xb8_u8, 0xad_u8,
2835
- 0xe5_u8, 0x8d_u8, 0x8e_u8,
2836
- 0x56_u8, 0x69_u8, 0xe1_u8,
2837
- 0xbb_u8, 0x87_u8, 0x74_u8,
2838
- 0x20_u8, 0x4e_u8, 0x61_u8,
2839
- 0x6d_u8, 0x0_u8];
2826
+ let ss = ~ " ศไทย中华Việt Nam ";
2827
+ let bb = ~ [0xe0_u8, 0xb8_u8, 0xa8_u8,
2828
+ 0xe0_u8, 0xb9_u8, 0x84_u8,
2829
+ 0xe0_u8, 0xb8_u8, 0x97_u8,
2830
+ 0xe0_u8, 0xb8_u8, 0xa2_u8,
2831
+ 0xe4_u8, 0xb8_u8, 0xad_u8,
2832
+ 0xe5_u8, 0x8d_u8, 0x8e_u8,
2833
+ 0x56_u8, 0x69_u8, 0xe1_u8,
2834
+ 0xbb_u8, 0x87_u8, 0x74_u8,
2835
+ 0x20_u8, 0x4e_u8, 0x61_u8,
2836
+ 0x6d_u8, 0x0_u8];
2840
2837
2841
2838
assert_eq!(ss, from_utf8_with_null(bb));
2842
2839
}
@@ -2845,16 +2842,16 @@ mod tests {
2845
2842
#[should_fail]
2846
2843
#[ignore(cfg(windows))]
2847
2844
fn test_from_utf8_with_null_fail() {
2848
- let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
2849
- 0xe0_u8, 0xb9_u8, 0x84_u8,
2850
- 0xe0_u8, 0xb8_u8, 0x97_u8,
2851
- 0xe0_u8, 0xb8_u8, 0xa2_u8,
2852
- 0xe4_u8, 0xb8_u8, 0xad_u8,
2853
- 0xe5_u8, 0x8d_u8, 0x8e_u8,
2854
- 0x56_u8, 0x69_u8, 0xe1_u8,
2855
- 0xbb_u8, 0x87_u8, 0x74_u8,
2856
- 0x20_u8, 0x4e_u8, 0x61_u8,
2857
- 0x6d_u8, 0x0_u8];
2845
+ let bb = ~ [0xff_u8, 0xb8_u8, 0xa8_u8,
2846
+ 0xe0_u8, 0xb9_u8, 0x84_u8,
2847
+ 0xe0_u8, 0xb8_u8, 0x97_u8,
2848
+ 0xe0_u8, 0xb8_u8, 0xa2_u8,
2849
+ 0xe4_u8, 0xb8_u8, 0xad_u8,
2850
+ 0xe5_u8, 0x8d_u8, 0x8e_u8,
2851
+ 0x56_u8, 0x69_u8, 0xe1_u8,
2852
+ 0xbb_u8, 0x87_u8, 0x74_u8,
2853
+ 0x20_u8, 0x4e_u8, 0x61_u8,
2854
+ 0x6d_u8, 0x0_u8];
2858
2855
2859
2856
let _x = from_utf8_with_null(bb);
2860
2857
}
@@ -2863,16 +2860,16 @@ mod tests {
2863
2860
#[should_fail]
2864
2861
#[ignore(cfg(windows))]
2865
2862
fn test_from_utf8_with_null_fail_2() {
2866
- let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
2867
- 0xe0_u8, 0xb9_u8, 0x84_u8,
2868
- 0xe0_u8, 0xb8_u8, 0x97_u8,
2869
- 0xe0_u8, 0xb8_u8, 0xa2_u8,
2870
- 0xe4_u8, 0xb8_u8, 0xad_u8,
2871
- 0xe5_u8, 0x8d_u8, 0x8e_u8,
2872
- 0x56_u8, 0x69_u8, 0xe1_u8,
2873
- 0xbb_u8, 0x87_u8, 0x74_u8,
2874
- 0x20_u8, 0x4e_u8, 0x61_u8,
2875
- 0x6d_u8, 0x60_u8];
2863
+ let bb = ~ [0xff_u8, 0xb8_u8, 0xa8_u8,
2864
+ 0xe0_u8, 0xb9_u8, 0x84_u8,
2865
+ 0xe0_u8, 0xb8_u8, 0x97_u8,
2866
+ 0xe0_u8, 0xb8_u8, 0xa2_u8,
2867
+ 0xe4_u8, 0xb8_u8, 0xad_u8,
2868
+ 0xe5_u8, 0x8d_u8, 0x8e_u8,
2869
+ 0x56_u8, 0x69_u8, 0xe1_u8,
2870
+ 0xbb_u8, 0x87_u8, 0x74_u8,
2871
+ 0x20_u8, 0x4e_u8, 0x61_u8,
2872
+ 0x6d_u8, 0x60_u8];
2876
2873
2877
2874
let _x = from_utf8_with_null(bb);
2878
2875
}
@@ -3121,21 +3118,21 @@ mod tests {
3121
3118
3122
3119
#[test]
3123
3120
fn vec_str_conversions() {
3124
- let s1: ~str = ~" All mimsy were the borogoves";
3121
+ let s1 = ~" All mimsy were the borogoves";
3122
+ let n1 = s1.len();
3123
+ let v = s1.as_bytes().to_owned();
3124
+ let n2 = v.len();
3125
3125
3126
- let v: ~[u8] = s1.as_bytes().to_owned();
3127
- let s2: ~str = from_utf8(v);
3128
- let mut i: uint = 0u;
3129
- let n1: uint = s1.len();
3130
- let n2: uint = v.len();
3131
3126
assert_eq!(n1, n2);
3132
- while i < n1 {
3133
- let a: u8 = s1[i];
3134
- let b: u8 = s2[i];
3127
+
3128
+ let s2 = from_utf8(v);
3129
+
3130
+ for uint::range(0, n1) |i| {
3131
+ let a = s1[i];
3132
+ let b = s2[i];
3135
3133
debug!(a);
3136
3134
debug!(b);
3137
3135
assert_eq!(a, b);
3138
- i += 1u;
3139
3136
}
3140
3137
}
3141
3138
0 commit comments