Skip to content

Commit 73b7604

Browse files
committed
std: add str::from_utf8_slice{,with_null}
1 parent 5795bb1 commit 73b7604

File tree

9 files changed

+156
-26
lines changed

9 files changed

+156
-26
lines changed

src/librustc/metadata/decoder.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -941,7 +941,7 @@ fn read_path(d: ebml::Doc) -> (~str, uint) {
941941
do reader::with_doc_data(d) |desc| {
942942
let pos = io::u64_from_be_bytes(desc, 0u, 4u) as uint;
943943
let pathbytes = desc.slice(4u, desc.len());
944-
let path = str::from_utf8(pathbytes);
944+
let path = str::from_utf8_slice(pathbytes).to_owned();
945945

946946
(path, pos)
947947
}

src/librustc/metadata/tydecode.rs

+8-7
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,10 @@ pub fn parse_ident(st: &mut PState, last: char) -> ast::ident {
9797
return parse_ident_(st, |a| is_last(last, a) );
9898
}
9999

100-
fn parse_ident_(st: &mut PState, is_last: @fn(char) -> bool) ->
101-
ast::ident {
102-
let rslt = scan(st, is_last, str::from_utf8);
103-
return st.tcx.sess.ident_of(rslt);
100+
fn parse_ident_(st: &mut PState, is_last: @fn(char) -> bool) -> ast::ident {
101+
do scan(st, is_last) |v| {
102+
st.tcx.sess.ident_of(str::from_utf8_slice(v))
103+
}
104104
}
105105

106106
pub fn parse_state_from_data<'a>(data: &'a [u8], crate_num: int,
@@ -452,9 +452,10 @@ fn parse_abi_set(st: &mut PState) -> AbiSet {
452452
assert_eq!(next(st), '[');
453453
let mut abis = AbiSet::empty();
454454
while peek(st) != ']' {
455-
// FIXME(#5422) str API should not force this copy
456-
let abi_str = scan(st, |c| c == ',', str::from_utf8);
457-
let abi = abi::lookup(abi_str).expect(abi_str);
455+
let abi = do scan(st, |c| c == ',') |v| {
456+
let abi_str = str::from_utf8_slice(v);
457+
abi::lookup(abi_str).expect(abi_str)
458+
};
458459
abis.add(abi);
459460
}
460461
assert_eq!(next(st), ']');

src/librustdoc/markdown_writer.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,8 @@ fn pandoc_writer(
116116

117117
debug!("pandoc result: %i", output.status);
118118
if output.status != 0 {
119-
error!("pandoc-out: %s", str::from_bytes(output.output));
120-
error!("pandoc-err: %s", str::from_bytes(output.error));
119+
error!("pandoc-out: %s", str::from_utf8_slice(output.output));
120+
error!("pandoc-err: %s", str::from_utf8_slice(output.error));
121121
fail!("pandoc failed");
122122
}
123123
}

src/librustpkg/rustpkg.rc

+1-1
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ impl<'self> PkgScript<'self> {
163163
exe.to_str(), root.to_str(), "configs");
164164
let output = run::process_output(exe.to_str(), [root.to_str(), ~"configs"]);
165165
// Run the configs() function to get the configs
166-
let cfgs = str::from_bytes_slice(output.output).word_iter()
166+
let cfgs = str::from_utf8_slice(output.output).word_iter()
167167
.transform(|w| w.to_owned()).collect();
168168
(cfgs, output.status)
169169
}

src/librustpkg/tests.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,8 @@ fn command_line_test(args: &[~str], cwd: &Path) -> ProcessOutput {
145145
});
146146
let output = prog.finish_with_output();
147147
io::println(fmt!("Output from command %s with args %? was %s {%s}[%?]",
148-
cmd, args, str::from_bytes(output.output),
149-
str::from_bytes(output.error),
148+
cmd, args, str::from_utf8_slice(output.output),
149+
str::from_utf8_slice(output.error),
150150
output.status));
151151
/*
152152
By the way, rustpkg *won't* return a nonzero exit code if it fails --
@@ -246,7 +246,7 @@ fn assert_executable_exists(repo: &Path, short_name: &str) {
246246
fn command_line_test_output(args: &[~str]) -> ~[~str] {
247247
let mut result = ~[];
248248
let p_output = command_line_test(args, &os::getcwd());
249-
let test_output = str::from_bytes(p_output.output);
249+
let test_output = str::from_utf8_slice(p_output.output);
250250
for test_output.split_iter('\n').advance |s| {
251251
result += [s.to_owned()];
252252
}

src/librustpkg/version.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -95,15 +95,15 @@ pub fn try_getting_version(remote_path: &RemotePath) -> Option<Version> {
9595
tmp_dir.to_str()]);
9696
if outp.status == 0 {
9797
debug!("Cloned it... ( %s, %s )",
98-
str::from_bytes(outp.output),
99-
str::from_bytes(outp.error));
98+
str::from_utf8_slice(outp.output),
99+
str::from_utf8_slice(outp.error));
100100
let mut output = None;
101101
debug!("(getting version, now getting tags) executing {git --git-dir=%s tag -l}",
102102
tmp_dir.push(".git").to_str());
103103
let outp = run::process_output("git",
104104
[fmt!("--git-dir=%s", tmp_dir.push(".git").to_str()),
105105
~"tag", ~"-l"]);
106-
let output_text = str::from_bytes(outp.output);
106+
let output_text = str::from_utf8_slice(outp.output);
107107
debug!("Full output: ( %s ) [%?]", output_text, outp.status);
108108
for output_text.line_iter().advance |l| {
109109
debug!("A line of output: %s", l);

src/libstd/rt/io/flate.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ mod test {
117117
let mut out_bytes = [0, .. 100];
118118
let bytes_read = inflate_reader.read(out_bytes).get();
119119
assert_eq!(bytes_read, in_bytes.len());
120-
let out_msg = str::from_utf8(out_bytes);
120+
let out_msg = str::from_utf8_slice(out_bytes);
121121
assert!(in_msg == out_msg);
122122
}
123123
}

src/libstd/str.rs

+136-7
Original file line numberDiff line numberDiff line change
@@ -96,13 +96,26 @@ pub fn from_utf8_with_null<'a>(vv: &'a [u8]) -> &'a str {
9696
*
9797
* Fails if invalid UTF-8
9898
*/
99-
pub fn from_utf8_slice<'a>(vector: &'a [u8]) -> &'a str {
100-
unsafe {
101-
assert!(is_utf8(vector));
102-
let (ptr, len): (*u8, uint) = ::cast::transmute(vector);
103-
let string: &'a str = ::cast::transmute((ptr, len + 1));
104-
string
105-
}
99+
pub fn from_utf8_slice<'a>(v: &'a [u8]) -> &'a str {
100+
assert!(is_utf8(v));
101+
unsafe { raw::from_utf8_slice(v) }
102+
}
103+
104+
/**
105+
* Convert a vector of bytes to a UTF-8 string.
106+
* The vector needs to be one byte longer than the string, and end with a 0 byte.
107+
*
108+
* Compared to `from_utf8()`, this fn doesn't need to allocate a new owned str.
109+
*
110+
* # Failure
111+
*
112+
* Fails if not NULL terminated
113+
* Fails if invalid UTF-8
114+
*/
115+
pub fn from_utf8_slice_with_null<'a>(v: &'a [u8]) -> &'a str {
116+
assert_eq!(v[v.len() - 1], 0);
117+
assert!(is_utf8(v));
118+
unsafe { raw::from_utf8_slice_with_null(v) }
106119
}
107120

108121
/// Copy a slice into a new unique str
@@ -783,6 +796,20 @@ pub mod raw {
783796
cast::transmute(v)
784797
}
785798

799+
/// Converts a vector of bytes to a string slice.
800+
/// The byte slice needs to contain valid utf8.
801+
pub unsafe fn from_utf8_slice<'a>(v: &'a [u8]) -> &'a str {
802+
let (ptr, len): (*u8, uint) = ::cast::transmute(v);
803+
cast::transmute((ptr, len + 1))
804+
}
805+
806+
/// Converts a vector of bytes to a string.
807+
/// The byte slice needs to contain valid utf8 and needs to be one byte longer than
808+
/// the string, if possible ending in a 0 byte.
809+
pub unsafe fn from_utf8_slice_with_null<'a>(v: &'a [u8]) -> &'a str {
810+
cast::transmute(v)
811+
}
812+
786813
/// Converts a byte to a string.
787814
pub unsafe fn from_byte(u: u8) -> ~str { raw::from_utf8([u]) }
788815

@@ -2850,6 +2877,108 @@ mod tests {
28502877
let _x = from_utf8_with_null(bb);
28512878
}
28522879
2880+
#[test]
2881+
fn test_unsafe_from_utf8_slice() {
2882+
let a = [65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8];
2883+
let b = unsafe { raw::from_utf8_slice(a) };
2884+
assert_eq!(b, "AAAAAAA");
2885+
}
2886+
2887+
#[test]
2888+
fn test_from_utf8_slice() {
2889+
let ss = "ศไทย中华Việt Nam";
2890+
let bb = [0xe0_u8, 0xb8_u8, 0xa8_u8,
2891+
0xe0_u8, 0xb9_u8, 0x84_u8,
2892+
0xe0_u8, 0xb8_u8, 0x97_u8,
2893+
0xe0_u8, 0xb8_u8, 0xa2_u8,
2894+
0xe4_u8, 0xb8_u8, 0xad_u8,
2895+
0xe5_u8, 0x8d_u8, 0x8e_u8,
2896+
0x56_u8, 0x69_u8, 0xe1_u8,
2897+
0xbb_u8, 0x87_u8, 0x74_u8,
2898+
0x20_u8, 0x4e_u8, 0x61_u8,
2899+
0x6d_u8];
2900+
2901+
assert_eq!(ss, from_utf8_slice(bb));
2902+
}
2903+
2904+
#[test]
2905+
#[should_fail]
2906+
#[ignore(cfg(windows))]
2907+
fn test_from_utf8_slice_fail() {
2908+
let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
2909+
0xe0_u8, 0xb9_u8, 0x84_u8,
2910+
0xe0_u8, 0xb8_u8, 0x97_u8,
2911+
0xe0_u8, 0xb8_u8, 0xa2_u8,
2912+
0xe4_u8, 0xb8_u8, 0xad_u8,
2913+
0xe5_u8, 0x8d_u8, 0x8e_u8,
2914+
0x56_u8, 0x69_u8, 0xe1_u8,
2915+
0xbb_u8, 0x87_u8, 0x74_u8,
2916+
0x20_u8, 0x4e_u8, 0x61_u8,
2917+
0x6d_u8];
2918+
2919+
let _x = from_utf8_slice(bb);
2920+
}
2921+
2922+
#[test]
2923+
fn test_unsafe_from_utf8_slice_with_null() {
2924+
let a = [65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
2925+
let b = unsafe { raw::from_utf8_slice_with_null(a) };
2926+
assert_eq!(b, "AAAAAAA");
2927+
}
2928+
2929+
#[test]
2930+
fn test_from_utf8_slice_with_null() {
2931+
let ss = "ศไทย中华Việt Nam";
2932+
let bb = [0xe0_u8, 0xb8_u8, 0xa8_u8,
2933+
0xe0_u8, 0xb9_u8, 0x84_u8,
2934+
0xe0_u8, 0xb8_u8, 0x97_u8,
2935+
0xe0_u8, 0xb8_u8, 0xa2_u8,
2936+
0xe4_u8, 0xb8_u8, 0xad_u8,
2937+
0xe5_u8, 0x8d_u8, 0x8e_u8,
2938+
0x56_u8, 0x69_u8, 0xe1_u8,
2939+
0xbb_u8, 0x87_u8, 0x74_u8,
2940+
0x20_u8, 0x4e_u8, 0x61_u8,
2941+
0x6d_u8, 0x0_u8];
2942+
2943+
assert_eq!(ss, from_utf8_slice_with_null(bb));
2944+
}
2945+
2946+
#[test]
2947+
#[should_fail]
2948+
#[ignore(cfg(windows))]
2949+
fn test_from_utf8_slice_with_null_fail() {
2950+
let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
2951+
0xe0_u8, 0xb9_u8, 0x84_u8,
2952+
0xe0_u8, 0xb8_u8, 0x97_u8,
2953+
0xe0_u8, 0xb8_u8, 0xa2_u8,
2954+
0xe4_u8, 0xb8_u8, 0xad_u8,
2955+
0xe5_u8, 0x8d_u8, 0x8e_u8,
2956+
0x56_u8, 0x69_u8, 0xe1_u8,
2957+
0xbb_u8, 0x87_u8, 0x74_u8,
2958+
0x20_u8, 0x4e_u8, 0x61_u8,
2959+
0x6d_u8, 0x0_u8];
2960+
2961+
let _x = from_utf8_slice_with_null(bb);
2962+
}
2963+
2964+
#[test]
2965+
#[should_fail]
2966+
#[ignore(cfg(windows))]
2967+
fn test_from_utf8_slice_with_null_fail_2() {
2968+
let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
2969+
0xe0_u8, 0xb9_u8, 0x84_u8,
2970+
0xe0_u8, 0xb8_u8, 0x97_u8,
2971+
0xe0_u8, 0xb8_u8, 0xa2_u8,
2972+
0xe4_u8, 0xb8_u8, 0xad_u8,
2973+
0xe5_u8, 0x8d_u8, 0x8e_u8,
2974+
0x56_u8, 0x69_u8, 0xe1_u8,
2975+
0xbb_u8, 0x87_u8, 0x74_u8,
2976+
0x20_u8, 0x4e_u8, 0x61_u8,
2977+
0x6d_u8, 0x60_u8];
2978+
2979+
let _x = from_utf8_slice_with_null(bb);
2980+
}
2981+
28532982
#[test]
28542983
fn test_from_buf() {
28552984
unsafe {

src/test/run-pass/const-str-ptr.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ static b: *u8 = c as *u8;
1616

1717
pub fn main() {
1818
let foo = &a as *u8;
19-
assert_eq!(unsafe { str::raw::from_bytes(a) }, ~"hi\x00");
19+
assert_eq!(unsafe { str::raw::from_utf8_slice(a) }, "hi\x00");
2020
assert_eq!(unsafe { str::raw::from_buf(foo) }, ~"hi");
2121
assert_eq!(unsafe { str::raw::from_buf(b) }, ~"hi");
2222
assert!(unsafe { *b == a[0] });

0 commit comments

Comments
 (0)