Skip to content

Commit 4f841ee

Browse files
committed
std: make str::from_utf16 return an Option.
The rest of the codebase is moving toward avoiding `fail!` so we do it here too!
1 parent 35b1b62 commit 4f841ee

File tree

3 files changed

+48
-24
lines changed

3 files changed

+48
-24
lines changed

src/libnative/io/file.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -571,7 +571,8 @@ pub fn readdir(p: &CString) -> IoResult<~[Path]> {
571571
else {
572572
let fp_vec = vec::from_buf(
573573
fp_buf, wcslen(fp_buf) as uint);
574-
let fp_str = str::from_utf16(fp_vec);
574+
let fp_str = str::from_utf16(fp_vec)
575+
.expect("rust_list_dir_wfd_fp_buf returned invalid UTF-16");
575576
paths.push(Path::new(fp_str));
576577
}
577578
more_files = FindNextFileW(find_handle, wfd_ptr as HANDLE);

src/libstd/os.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ pub fn getcwd() -> Path {
8888
fail!();
8989
}
9090
}
91-
Path::new(str::from_utf16(buf))
91+
Path::new(str::from_utf16(buf).expect("GetCurrentDirectoryW returned invalid UTF-16"))
9292
}
9393

9494
#[cfg(windows)]
@@ -124,7 +124,12 @@ pub mod win32 {
124124
}
125125
if k != 0 && done {
126126
let sub = buf.slice(0, k as uint);
127-
res = option::Some(str::from_utf16(sub));
127+
// We want to explicitly catch the case when the
128+
// closure returned invalid UTF-16, rather than
129+
// set `res` to None and continue.
130+
let s = str::from_utf16(sub)
131+
.expect("fill_utf16_buf_and_decode: closure created invalid UTF-16");
132+
res = option::Some(s)
128133
}
129134
}
130135
return res;
@@ -739,7 +744,7 @@ pub fn last_os_error() -> ~str {
739744
fail!("[{}] FormatMessage failure", errno());
740745
}
741746

742-
str::from_utf16(buf)
747+
str::from_utf16(buf).expect("FormatMessageW returned invalid UTF-16")
743748
}
744749
}
745750

@@ -828,8 +833,8 @@ fn real_args() -> ~[~str] {
828833
while *ptr.offset(len as int) != 0 { len += 1; }
829834

830835
// Push it onto the list.
831-
args.push(vec::raw::buf_as_slice(ptr, len,
832-
str::from_utf16));
836+
let opt_s = vec::raw::buf_as_slice(ptr, len, str::from_utf16);
837+
args.push(opt_s.expect("CommandLineToArgvW returned invalid UTF-16"));
833838
}
834839
}
835840

src/libstd/str.rs

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -920,29 +920,32 @@ pub fn utf16_items<'a>(v: &'a [u16]) -> UTF16Items<'a> {
920920
UTF16Items { iter : v.iter() }
921921
}
922922

923-
/// Decode a UTF-16 encoded vector `v` into a string.
924-
///
925-
/// # Failure
926-
///
927-
/// Fails on invalid UTF-16 data.
923+
/// Decode a UTF-16 encoded vector `v` into a string, returning `None`
924+
/// if `v` contains any invalid data.
928925
///
929926
/// # Example
930927
///
931928
/// ```rust
932929
/// use std::str;
933930
///
934931
/// // 𝄞music
935-
/// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
936-
/// 0x0073, 0x0069, 0x0063];
937-
/// assert_eq!(str::from_utf16(v), ~"𝄞music");
932+
/// let mut v = [0xD834, 0xDD1E, 0x006d, 0x0075,
933+
/// 0x0073, 0x0069, 0x0063];
934+
/// assert_eq!(str::from_utf16(v), Some(~"𝄞music"));
935+
///
936+
/// // 𝄞mu<invalid>ic
937+
/// v[4] = 0xD800;
938+
/// assert_eq!(str::from_utf16(v), None);
938939
/// ```
939-
pub fn from_utf16(v: &[u16]) -> ~str {
940-
utf16_items(v).map(|c| {
941-
match c {
942-
ScalarValue(c) => c,
943-
LoneSurrogate(u) => fail!("from_utf16: found lone surrogate {}", u)
944-
}
945-
}).collect()
940+
pub fn from_utf16(v: &[u16]) -> Option<~str> {
941+
let mut s = with_capacity(v.len() / 2);
942+
for c in utf16_items(v) {
943+
match c {
944+
ScalarValue(c) => s.push_char(c),
945+
LoneSurrogate(_) => return None
946+
}
947+
}
948+
Some(s)
946949
}
947950

948951
/// Decode a UTF-16 encoded vector `v` into a string, replacing
@@ -3834,14 +3837,29 @@ mod tests {
38343837
assert!(is_utf16(u));
38353838
assert_eq!(s.to_utf16(), u);
38363839
3837-
assert_eq!(from_utf16(u), s);
3840+
assert_eq!(from_utf16(u).unwrap(), s);
38383841
assert_eq!(from_utf16_lossy(u), s);
38393842
3840-
assert_eq!(from_utf16(s.to_utf16()), s);
3841-
assert_eq!(from_utf16(u).to_utf16(), u);
3843+
assert_eq!(from_utf16(s.to_utf16()).unwrap(), s);
3844+
assert_eq!(from_utf16(u).unwrap().to_utf16(), u);
38423845
}
38433846
}
38443847
3848+
#[test]
3849+
fn test_utf16_invalid() {
3850+
// completely positive cases tested above.
3851+
// lead + eof
3852+
assert_eq!(from_utf16([0xD800]), None);
3853+
// lead + lead
3854+
assert_eq!(from_utf16([0xD800, 0xD800]), None);
3855+
3856+
// isolated trail
3857+
assert_eq!(from_utf16([0x0061, 0xDC00]), None);
3858+
3859+
// general
3860+
assert_eq!(from_utf16([0xD800, 0xd801, 0xdc8b, 0xD800]), None);
3861+
}
3862+
38453863
#[test]
38463864
fn test_utf16_lossy() {
38473865
// completely positive cases tested above.

0 commit comments

Comments
 (0)