Skip to content

Commit a345c54

Browse files
committed
auto merge of #14613 : schmee/rust/utf16-iterator, r=huonw
Closes #14358. ~~The tests are not yet moved to `utf16_iter`, so this probably won't compile. I'm submitting this PR anyway so it can be reviewed and since it was mentioned in #14611.~~ EDIT: Tests now use `utf16_iter`. This deprecates `.to_utf16`. `x.to_utf16()` should be replaced by either `x.utf16_iter().collect::<Vec<u16>>()` (the type annotation may be optional), or just `x.utf16_iter()` directly, if it can be used in an iterator context. [breaking-change] cc @huonw
2 parents 94343da + 3d84b4b commit a345c54

File tree

8 files changed

+84
-27
lines changed

8 files changed

+84
-27
lines changed

src/libcollections/str.rs

+9-12
Original file line numberDiff line numberDiff line change
@@ -803,15 +803,9 @@ pub trait StrAllocating: Str {
803803
}
804804

805805
/// Converts to a vector of `u16` encoded as UTF-16.
806+
#[deprecated = "use `utf16_units` instead"]
806807
fn to_utf16(&self) -> Vec<u16> {
807-
let me = self.as_slice();
808-
let mut u = Vec::new();
809-
for ch in me.chars() {
810-
let mut buf = [0u16, ..2];
811-
let n = ch.encode_utf16(buf /* as mut slice! */);
812-
u.push_all(buf.slice_to(n));
813-
}
814-
u
808+
self.as_slice().utf16_units().collect::<Vec<u16>>()
815809
}
816810

817811
/// Given a string, make a new string with repeated copies of it.
@@ -1619,14 +1613,17 @@ mod tests {
16191613

16201614
for p in pairs.iter() {
16211615
let (s, u) = (*p).clone();
1616+
let s_as_utf16 = s.as_slice().utf16_units().collect::<Vec<u16>>();
1617+
let u_as_string = from_utf16(u.as_slice()).unwrap();
1618+
16221619
assert!(is_utf16(u.as_slice()));
1623-
assert_eq!(s.to_utf16(), u);
1620+
assert_eq!(s_as_utf16, u);
16241621

1625-
assert_eq!(from_utf16(u.as_slice()).unwrap(), s);
1622+
assert_eq!(u_as_string, s);
16261623
assert_eq!(from_utf16_lossy(u.as_slice()), s);
16271624

1628-
assert_eq!(from_utf16(s.to_utf16().as_slice()).unwrap(), s);
1629-
assert_eq!(from_utf16(u.as_slice()).unwrap().to_utf16(), u);
1625+
assert_eq!(from_utf16(s_as_utf16.as_slice()).unwrap(), s);
1626+
assert_eq!(u_as_string.as_slice().utf16_units().collect::<Vec<u16>>(), u);
16301627
}
16311628
}
16321629

src/libcore/str.rs

+45-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
use mem;
1818
use char;
19+
use char::Char;
1920
use clone::Clone;
2021
use cmp;
2122
use cmp::{PartialEq, Eq};
@@ -24,7 +25,7 @@ use default::Default;
2425
use iter::{Filter, Map, Iterator};
2526
use iter::{DoubleEndedIterator, ExactSize};
2627
use iter::range;
27-
use num::Saturating;
28+
use num::{CheckedMul, Saturating};
2829
use option::{None, Option, Some};
2930
use raw::Repr;
3031
use slice::ImmutableVector;
@@ -557,6 +558,41 @@ impl<'a> Iterator<&'a str> for StrSplits<'a> {
557558
}
558559
}
559560

561+
/// External iterator for a string's UTF16 codeunits.
562+
/// Use with the `std::iter` module.
563+
#[deriving(Clone)]
564+
pub struct Utf16CodeUnits<'a> {
565+
chars: Chars<'a>,
566+
extra: u16
567+
}
568+
569+
impl<'a> Iterator<u16> for Utf16CodeUnits<'a> {
570+
#[inline]
571+
fn next(&mut self) -> Option<u16> {
572+
if self.extra != 0 {
573+
let tmp = self.extra;
574+
self.extra = 0;
575+
return Some(tmp);
576+
}
577+
578+
let mut buf = [0u16, ..2];
579+
self.chars.next().map(|ch| {
580+
let n = ch.encode_utf16(buf /* as mut slice! */);
581+
if n == 2 { self.extra = buf[1]; }
582+
buf[0]
583+
})
584+
}
585+
586+
#[inline]
587+
fn size_hint(&self) -> (uint, Option<uint>) {
588+
let (low, high) = self.chars.size_hint();
589+
// every char gets either one u16 or two u16,
590+
// so this iterator is between 1 or 2 times as
591+
// long as the underlying iterator.
592+
(low, high.and_then(|n| n.checked_mul(&2)))
593+
}
594+
}
595+
560596
/*
561597
Section: Comparing strings
562598
*/
@@ -1609,6 +1645,9 @@ pub trait StrSlice<'a> {
16091645
/// and that it is not reallocated (e.g. by pushing to the
16101646
/// string).
16111647
fn as_ptr(&self) -> *const u8;
1648+
1649+
/// Return an iterator of `u16` over the string encoded as UTF-16.
1650+
fn utf16_units(&self) -> Utf16CodeUnits<'a>;
16121651
}
16131652

16141653
impl<'a> StrSlice<'a> for &'a str {
@@ -1957,6 +1996,11 @@ impl<'a> StrSlice<'a> for &'a str {
19571996
fn as_ptr(&self) -> *const u8 {
19581997
self.repr().data
19591998
}
1999+
2000+
#[inline]
2001+
fn utf16_units(&self) -> Utf16CodeUnits<'a> {
2002+
Utf16CodeUnits{ chars: self.chars(), extra: 0}
2003+
}
19602004
}
19612005

19622006
impl<'a> Default for &'a str {

src/libnative/io/c_win32.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ extern "system" {
7070

7171
pub mod compat {
7272
use std::intrinsics::{atomic_store_relaxed, transmute};
73+
use std::iter::Iterator;
7374
use libc::types::os::arch::extra::{LPCWSTR, HMODULE, LPCSTR, LPVOID};
7475

7576
extern "system" {
@@ -82,7 +83,8 @@ pub mod compat {
8283
// layer (after it's loaded) shouldn't be any slower than a regular DLL
8384
// call.
8485
unsafe fn store_func(ptr: *mut uint, module: &str, symbol: &str, fallback: uint) {
85-
let module = module.to_utf16().append_one(0);
86+
let module: Vec<u16> = module.utf16_units().collect();
87+
let module = module.append_one(0);
8688
symbol.with_c_str(|symbol| {
8789
let handle = GetModuleHandleW(module.as_ptr());
8890
let func: uint = transmute(GetProcAddress(handle, symbol));

src/libnative/io/file_win32.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ impl Drop for Inner {
255255

256256
pub fn to_utf16(s: &CString) -> IoResult<Vec<u16>> {
257257
match s.as_str() {
258-
Some(s) => Ok(s.to_utf16().append_one(0)),
258+
Some(s) => Ok(s.utf16_units().collect::<Vec<u16>>().append_one(0)),
259259
None => Err(IoError {
260260
code: libc::ERROR_INVALID_NAME as uint,
261261
extra: 0,

src/libnative/io/process.rs

+10-4
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,8 @@ fn spawn_process_os(cfg: ProcessConfig,
294294
use libc::funcs::extra::msvcrt::get_osfhandle;
295295

296296
use std::mem;
297+
use std::iter::Iterator;
298+
use std::str::StrSlice;
297299

298300
if cfg.gid.is_some() || cfg.uid.is_some() {
299301
return Err(IoError {
@@ -328,7 +330,8 @@ fn spawn_process_os(cfg: ProcessConfig,
328330
lpSecurityDescriptor: ptr::mut_null(),
329331
bInheritHandle: 1,
330332
};
331-
let filename = "NUL".to_utf16().append_one(0);
333+
let filename: Vec<u16> = "NUL".utf16_units().collect();
334+
let filename = filename.append_one(0);
332335
*slot = libc::CreateFileW(filename.as_ptr(),
333336
access,
334337
libc::FILE_SHARE_READ |
@@ -371,7 +374,8 @@ fn spawn_process_os(cfg: ProcessConfig,
371374

372375
with_envp(cfg.env, |envp| {
373376
with_dirp(cfg.cwd, |dirp| {
374-
let mut cmd_str = cmd_str.to_utf16().append_one(0);
377+
let mut cmd_str: Vec<u16> = cmd_str.as_slice().utf16_units().collect();
378+
cmd_str = cmd_str.append_one(0);
375379
let created = CreateProcessW(ptr::null(),
376380
cmd_str.as_mut_ptr(),
377381
ptr::mut_null(),
@@ -770,7 +774,7 @@ fn with_envp<T>(env: Option<&[(CString, CString)]>, cb: |*mut c_void| -> T) -> T
770774
let kv = format!("{}={}",
771775
pair.ref0().as_str().unwrap(),
772776
pair.ref1().as_str().unwrap());
773-
blk.push_all(kv.to_utf16().as_slice());
777+
blk.extend(kv.as_slice().utf16_units());
774778
blk.push(0);
775779
}
776780

@@ -788,7 +792,9 @@ fn with_dirp<T>(d: Option<&CString>, cb: |*const u16| -> T) -> T {
788792
Some(dir) => {
789793
let dir_str = dir.as_str()
790794
.expect("expected workingdirectory to be utf-8 encoded");
791-
let dir_str = dir_str.to_utf16().append_one(0);
795+
let dir_str: Vec<u16> = dir_str.utf16_units().collect();
796+
let dir_str = dir_str.append_one(0);
797+
792798
cb(dir_str.as_ptr())
793799
},
794800
None => cb(ptr::null())

src/librustdoc/flock.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,8 @@ mod imp {
162162

163163
impl Lock {
164164
pub fn new(p: &Path) -> Lock {
165-
let p_16 = p.as_str().unwrap().to_utf16().append_one(0);
165+
let p_16: Vec<u16> = p.as_str().unwrap().utf16_units().collect();
166+
let p_16 = p_16.append_one(0);
166167
let handle = unsafe {
167168
libc::CreateFileW(p_16.as_ptr(),
168169
libc::FILE_GENERIC_READ |

src/libstd/dynamic_lib.rs

+5-2
Original file line numberDiff line numberDiff line change
@@ -281,19 +281,22 @@ pub mod dl {
281281
#[cfg(target_os = "win32")]
282282
pub mod dl {
283283
use c_str::ToCStr;
284+
use iter::Iterator;
284285
use libc;
285286
use os;
286287
use ptr;
287288
use result::{Ok, Err, Result};
288-
use str::StrAllocating;
289+
use str::StrSlice;
289290
use str;
290291
use string::String;
292+
use vec::Vec;
291293

292294
pub unsafe fn open_external<T: ToCStr>(filename: T) -> *mut u8 {
293295
// Windows expects Unicode data
294296
let filename_cstr = filename.to_c_str();
295297
let filename_str = str::from_utf8(filename_cstr.as_bytes_no_nul()).unwrap();
296-
let filename_str = filename_str.to_utf16().append_one(0);
298+
let filename_str: Vec<u16> = filename_str.utf16_units().collect();
299+
let filename_str = filename_str.append_one(0);
297300
LoadLibraryW(filename_str.as_ptr() as *const libc::c_void) as *mut u8
298301
}
299302

src/libstd/os.rs

+9-5
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,8 @@ pub fn getenv(n: &str) -> Option<String> {
365365
unsafe {
366366
with_env_lock(|| {
367367
use os::win32::{fill_utf16_buf_and_decode};
368-
let n = n.to_utf16().append_one(0);
368+
let n: Vec<u16> = n.utf16_units().collect();
369+
let n = n.append_one(0);
369370
fill_utf16_buf_and_decode(|buf, sz| {
370371
libc::GetEnvironmentVariableW(n.as_ptr(), buf, sz)
371372
})
@@ -411,8 +412,10 @@ pub fn setenv(n: &str, v: &str) {
411412

412413
#[cfg(windows)]
413414
fn _setenv(n: &str, v: &str) {
414-
let n = n.to_utf16().append_one(0);
415-
let v = v.to_utf16().append_one(0);
415+
let n: Vec<u16> = n.utf16_units().collect();
416+
let n = n.append_one(0);
417+
let v: Vec<u16> = v.utf16_units().collect();
418+
let v = v.append_one(0);
416419
unsafe {
417420
with_env_lock(|| {
418421
libc::SetEnvironmentVariableW(n.as_ptr(), v.as_ptr());
@@ -437,7 +440,8 @@ pub fn unsetenv(n: &str) {
437440

438441
#[cfg(windows)]
439442
fn _unsetenv(n: &str) {
440-
let n = n.to_utf16().append_one(0);
443+
let n: Vec<u16> = n.utf16_units().collect();
444+
let n = n.append_one(0);
441445
unsafe {
442446
with_env_lock(|| {
443447
libc::SetEnvironmentVariableW(n.as_ptr(), ptr::null());
@@ -804,7 +808,7 @@ pub fn change_dir(p: &Path) -> bool {
804808
#[cfg(windows)]
805809
fn chdir(p: &Path) -> bool {
806810
let p = match p.as_str() {
807-
Some(s) => s.to_utf16().append_one(0),
811+
Some(s) => s.utf16_units().collect::<Vec<u16>>().append_one(0),
808812
None => return false,
809813
};
810814
unsafe {

0 commit comments

Comments
 (0)