Skip to content

Commit 89259b3

Browse files
committed
auto merge of #15085 : brson/rust/stridx, r=alexcrichton
Being able to index into the bytes of a string encourages poor UTF-8 hygiene. To get a view of `&[u8]` from either a `String` or `&str` slice, use the `as_bytes()` method. Closes #12710. [breaking-change] If the diffstat is any indication this shouldn't have a huge impact but it will have some. Most changes in the `str` and `path` module. A lot of the existing usages were in tests where ascii is expected. There are a number of other legit uses where the characters are known to be ascii.
2 parents bd893d1 + d21336e commit 89259b3

File tree

26 files changed

+101
-87
lines changed

26 files changed

+101
-87
lines changed

src/libcollections/str.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1569,8 +1569,8 @@ mod tests {
15691569
let n2: uint = v.len();
15701570
assert_eq!(n1, n2);
15711571
while i < n1 {
1572-
let a: u8 = s1.as_slice()[i];
1573-
let b: u8 = s2.as_slice()[i];
1572+
let a: u8 = s1.as_bytes()[i];
1573+
let b: u8 = s2.as_bytes()[i];
15741574
debug!("{}", a);
15751575
debug!("{}", b);
15761576
assert_eq!(a, b);

src/libcollections/string.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ impl String {
222222
return None
223223
}
224224

225-
let byte = self.as_slice()[len - 1];
225+
let byte = self.as_bytes()[len - 1];
226226
self.vec.set_len(len - 1);
227227
Some(byte)
228228
}

src/libcore/str.rs

+16-14
Original file line numberDiff line numberDiff line change
@@ -1743,7 +1743,7 @@ impl<'a> StrSlice<'a> for &'a str {
17431743
fn lines_any(&self) -> AnyLines<'a> {
17441744
self.lines().map(|line| {
17451745
let l = line.len();
1746-
if l > 0 && line[l - 1] == '\r' as u8 { line.slice(0, l - 1) }
1746+
if l > 0 && line.as_bytes()[l - 1] == '\r' as u8 { line.slice(0, l - 1) }
17471747
else { line }
17481748
})
17491749
}
@@ -1867,26 +1867,26 @@ impl<'a> StrSlice<'a> for &'a str {
18671867
fn is_char_boundary(&self, index: uint) -> bool {
18681868
if index == self.len() { return true; }
18691869
if index > self.len() { return false; }
1870-
let b = self[index];
1870+
let b = self.as_bytes()[index];
18711871
return b < 128u8 || b >= 192u8;
18721872
}
18731873

18741874
#[inline]
18751875
fn char_range_at(&self, i: uint) -> CharRange {
1876-
if self[i] < 128u8 {
1877-
return CharRange {ch: self[i] as char, next: i + 1 };
1876+
if self.as_bytes()[i] < 128u8 {
1877+
return CharRange {ch: self.as_bytes()[i] as char, next: i + 1 };
18781878
}
18791879

18801880
// Multibyte case is a fn to allow char_range_at to inline cleanly
18811881
fn multibyte_char_range_at(s: &str, i: uint) -> CharRange {
1882-
let mut val = s[i] as u32;
1882+
let mut val = s.as_bytes()[i] as u32;
18831883
let w = UTF8_CHAR_WIDTH[val as uint] as uint;
18841884
assert!((w != 0));
18851885

18861886
val = utf8_first_byte!(val, w);
1887-
val = utf8_acc_cont_byte!(val, s[i + 1]);
1888-
if w > 2 { val = utf8_acc_cont_byte!(val, s[i + 2]); }
1889-
if w > 3 { val = utf8_acc_cont_byte!(val, s[i + 3]); }
1887+
val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 1]);
1888+
if w > 2 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 2]); }
1889+
if w > 3 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 3]); }
18901890

18911891
return CharRange {ch: unsafe { mem::transmute(val) }, next: i + w};
18921892
}
@@ -1899,23 +1899,25 @@ impl<'a> StrSlice<'a> for &'a str {
18991899
let mut prev = start;
19001900

19011901
prev = prev.saturating_sub(1);
1902-
if self[prev] < 128 { return CharRange{ch: self[prev] as char, next: prev} }
1902+
if self.as_bytes()[prev] < 128 {
1903+
return CharRange{ch: self.as_bytes()[prev] as char, next: prev}
1904+
}
19031905

19041906
// Multibyte case is a fn to allow char_range_at_reverse to inline cleanly
19051907
fn multibyte_char_range_at_reverse(s: &str, mut i: uint) -> CharRange {
19061908
// while there is a previous byte == 10......
1907-
while i > 0 && s[i] & 192u8 == TAG_CONT_U8 {
1909+
while i > 0 && s.as_bytes()[i] & 192u8 == TAG_CONT_U8 {
19081910
i -= 1u;
19091911
}
19101912

1911-
let mut val = s[i] as u32;
1913+
let mut val = s.as_bytes()[i] as u32;
19121914
let w = UTF8_CHAR_WIDTH[val as uint] as uint;
19131915
assert!((w != 0));
19141916

19151917
val = utf8_first_byte!(val, w);
1916-
val = utf8_acc_cont_byte!(val, s[i + 1]);
1917-
if w > 2 { val = utf8_acc_cont_byte!(val, s[i + 2]); }
1918-
if w > 3 { val = utf8_acc_cont_byte!(val, s[i + 3]); }
1918+
val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 1]);
1919+
if w > 2 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 2]); }
1920+
if w > 3 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 3]); }
19191921

19201922
return CharRange {ch: unsafe { mem::transmute(val) }, next: i};
19211923
}

src/libgetopts/lib.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ impl Matches {
370370
}
371371

372372
fn is_arg(arg: &str) -> bool {
373-
arg.len() > 1 && arg[0] == '-' as u8
373+
arg.len() > 1 && arg.as_bytes()[0] == '-' as u8
374374
}
375375

376376
fn find_opt(opts: &[Opt], nm: Name) -> Option<uint> {
@@ -553,7 +553,7 @@ pub fn getopts(args: &[String], optgrps: &[OptGroup]) -> Result {
553553
} else {
554554
let mut names;
555555
let mut i_arg = None;
556-
if cur.as_slice()[1] == '-' as u8 {
556+
if cur.as_bytes()[1] == '-' as u8 {
557557
let tail = cur.as_slice().slice(2, curlen);
558558
let tail_eq: Vec<&str> = tail.split('=').collect();
559559
if tail_eq.len() <= 1 {

src/librustc/back/link.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -657,8 +657,8 @@ pub fn sanitize(s: &str) -> String {
657657

658658
// Underscore-qualify anything that didn't start as an ident.
659659
if result.len() > 0u &&
660-
result.as_slice()[0] != '_' as u8 &&
661-
! char::is_XID_start(result.as_slice()[0] as char) {
660+
result.as_bytes()[0] != '_' as u8 &&
661+
! char::is_XID_start(result.as_bytes()[0] as char) {
662662
return format!("_{}", result.as_slice());
663663
}
664664

@@ -737,9 +737,9 @@ pub fn mangle_exported_name(ccx: &CrateContext, path: PathElems,
737737
let extra2 = id % EXTRA_CHARS.len();
738738
let id = id / EXTRA_CHARS.len();
739739
let extra3 = id % EXTRA_CHARS.len();
740-
hash.push_char(EXTRA_CHARS[extra1] as char);
741-
hash.push_char(EXTRA_CHARS[extra2] as char);
742-
hash.push_char(EXTRA_CHARS[extra3] as char);
740+
hash.push_char(EXTRA_CHARS.as_bytes()[extra1] as char);
741+
hash.push_char(EXTRA_CHARS.as_bytes()[extra2] as char);
742+
hash.push_char(EXTRA_CHARS.as_bytes()[extra3] as char);
743743

744744
exported_name(path,
745745
hash.as_slice(),

src/librustc/metadata/decoder.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ fn item_sized(item: ebml::Doc) -> ast::Sized {
181181
fn item_method_sort(item: ebml::Doc) -> char {
182182
let mut ret = 'r';
183183
reader::tagged_docs(item, tag_item_trait_method_sort, |doc| {
184-
ret = doc.as_str_slice()[0] as char;
184+
ret = doc.as_str_slice().as_bytes()[0] as char;
185185
false
186186
});
187187
ret
@@ -757,13 +757,13 @@ fn get_explicit_self(item: ebml::Doc) -> ast::ExplicitSelf_ {
757757
let explicit_self_doc = reader::get_doc(item, tag_item_trait_method_explicit_self);
758758
let string = explicit_self_doc.as_str_slice();
759759

760-
let explicit_self_kind = string[0];
760+
let explicit_self_kind = string.as_bytes()[0];
761761
match explicit_self_kind as char {
762762
's' => ast::SelfStatic,
763763
'v' => ast::SelfValue,
764764
'~' => ast::SelfUniq,
765765
// FIXME(#4846) expl. region
766-
'&' => ast::SelfRegion(None, get_mutability(string[1])),
766+
'&' => ast::SelfRegion(None, get_mutability(string.as_bytes()[1])),
767767
_ => fail!("unknown self type code: `{}`", explicit_self_kind as char)
768768
}
769769
}

src/librustc/middle/dead.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ struct DeadVisitor<'a> {
399399
impl<'a> DeadVisitor<'a> {
400400
fn should_warn_about_field(&mut self, node: &ast::StructField_) -> bool {
401401
let (is_named, has_leading_underscore) = match node.ident() {
402-
Some(ref ident) => (true, token::get_ident(*ident).get()[0] == ('_' as u8)),
402+
Some(ref ident) => (true, token::get_ident(*ident).get().as_bytes()[0] == ('_' as u8)),
403403
_ => (false, false)
404404
};
405405
let field_type = ty::node_id_to_type(self.tcx, node.id);

src/librustc/middle/liveness.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1511,7 +1511,7 @@ impl<'a> Liveness<'a> {
15111511

15121512
fn should_warn(&self, var: Variable) -> Option<String> {
15131513
let name = self.ir.variable_name(var);
1514-
if name.len() == 0 || name.as_slice()[0] == ('_' as u8) {
1514+
if name.len() == 0 || name.as_bytes()[0] == ('_' as u8) {
15151515
None
15161516
} else {
15171517
Some(name)

src/librustc/middle/mem_categorization.rs

+1-6
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,6 @@ pub enum FieldName {
126126
#[deriving(Clone, PartialEq, Eq, Hash)]
127127
pub enum ElementKind {
128128
VecElement,
129-
StrElement,
130129
OtherElement,
131130
}
132131

@@ -794,7 +793,7 @@ impl<'t,TYPER:Typer> MemCategorizationContext<'t,TYPER> {
794793
//! - `derefs`: the deref number to be used for
795794
//! the implicit index deref, if any (see above)
796795
797-
let element_ty = match ty::index(base_cmt.ty) {
796+
let element_ty = match ty::array_element_ty(base_cmt.ty) {
798797
Some(ref mt) => mt.ty,
799798
None => {
800799
self.tcx().sess.span_bug(
@@ -1137,9 +1136,6 @@ impl<'t,TYPER:Typer> MemCategorizationContext<'t,TYPER> {
11371136
cat_interior(_, InteriorElement(VecElement)) => {
11381137
"vec content".to_string()
11391138
}
1140-
cat_interior(_, InteriorElement(StrElement)) => {
1141-
"str content".to_string()
1142-
}
11431139
cat_interior(_, InteriorElement(OtherElement)) => {
11441140
"indexed content".to_string()
11451141
}
@@ -1320,7 +1316,6 @@ fn element_kind(t: ty::t) -> ElementKind {
13201316
ty::ty_rptr(_, ty::mt{ty:ty, ..}) |
13211317
ty::ty_uniq(ty) => match ty::get(ty).sty {
13221318
ty::ty_vec(_, None) => VecElement,
1323-
ty::ty_str => StrElement,
13241319
_ => OtherElement
13251320
},
13261321
ty::ty_vec(..) => VecElement,

src/librustc/middle/ty.rs

+15
Original file line numberDiff line numberDiff line change
@@ -2563,6 +2563,21 @@ pub fn deref(t: t, explicit: bool) -> Option<mt> {
25632563

25642564
// Returns the type of t[i]
25652565
pub fn index(t: t) -> Option<mt> {
2566+
match get(t).sty {
2567+
ty_vec(mt, Some(_)) => Some(mt),
2568+
ty_ptr(mt{ty: t, ..}) | ty_rptr(_, mt{ty: t, ..}) |
2569+
ty_box(t) | ty_uniq(t) => match get(t).sty {
2570+
ty_vec(mt, None) => Some(mt),
2571+
_ => None,
2572+
},
2573+
_ => None
2574+
}
2575+
}
2576+
2577+
// Returns the type of elements contained within an 'array-like' type.
2578+
// This is exactly the same as the above, except it supports strings,
2579+
// which can't actually be indexed.
2580+
pub fn array_element_ty(t: t) -> Option<mt> {
25662581
match get(t).sty {
25672582
ty_vec(mt, Some(_)) => Some(mt),
25682583
ty_ptr(mt{ty: t, ..}) | ty_rptr(_, mt{ty: t, ..}) |

src/libstd/io/fs.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1206,8 +1206,8 @@ mod test {
12061206
let mut cur = [0u8, .. 2];
12071207
for f in files {
12081208
let stem = f.filestem_str().unwrap();
1209-
let root = stem[0] - ('0' as u8);
1210-
let name = stem[1] - ('0' as u8);
1209+
let root = stem.as_bytes()[0] - ('0' as u8);
1210+
let name = stem.as_bytes()[1] - ('0' as u8);
12111211
assert!(cur[root as uint] < name);
12121212
cur[root as uint] = name;
12131213
}

src/libstd/path/windows.rs

+25-21
Original file line numberDiff line numberDiff line change
@@ -242,14 +242,18 @@ impl GenericPathUnsafe for Path {
242242
fn is_vol_abs(path: &str, prefix: Option<PathPrefix>) -> bool {
243243
// assume prefix is Some(DiskPrefix)
244244
let rest = path.slice_from(prefix_len(prefix));
245-
!rest.is_empty() && rest[0].is_ascii() && is_sep(rest[0] as char)
245+
!rest.is_empty() && rest.as_bytes()[0].is_ascii() && is_sep(rest.as_bytes()[0] as char)
246246
}
247247
fn shares_volume(me: &Path, path: &str) -> bool {
248248
// path is assumed to have a prefix of Some(DiskPrefix)
249249
let repr = me.repr.as_slice();
250250
match me.prefix {
251-
Some(DiskPrefix) => repr[0] == path[0].to_ascii().to_upper().to_byte(),
252-
Some(VerbatimDiskPrefix) => repr[4] == path[0].to_ascii().to_upper().to_byte(),
251+
Some(DiskPrefix) => {
252+
repr.as_bytes()[0] == path.as_bytes()[0].to_ascii().to_upper().to_byte()
253+
}
254+
Some(VerbatimDiskPrefix) => {
255+
repr.as_bytes()[4] == path.as_bytes()[0].to_ascii().to_upper().to_byte()
256+
}
253257
_ => false
254258
}
255259
}
@@ -279,7 +283,7 @@ impl GenericPathUnsafe for Path {
279283
// if me is "C:" we don't want to add a path separator
280284
match me.prefix {
281285
Some(DiskPrefix) if me.repr.len() == plen => (),
282-
_ if !(me.repr.len() > plen && me.repr.as_slice()[me.repr.len()-1] == SEP_BYTE) => {
286+
_ if !(me.repr.len() > plen && me.repr.as_bytes()[me.repr.len()-1] == SEP_BYTE) => {
283287
s.push_char(SEP);
284288
}
285289
_ => ()
@@ -302,7 +306,7 @@ impl GenericPathUnsafe for Path {
302306
// absolute path, or cwd-relative and self is not same volume
303307
replace_path(self, path, prefix);
304308
}
305-
None if !path.is_empty() && is_sep_(self.prefix, path[0]) => {
309+
None if !path.is_empty() && is_sep_(self.prefix, path.as_bytes()[0]) => {
306310
// volume-relative path
307311
if self.prefix.is_some() {
308312
// truncate self down to the prefix, then append
@@ -478,7 +482,7 @@ impl GenericPath for Path {
478482
match self.prefix {
479483
Some(DiskPrefix) => {
480484
let rest = self.repr.as_slice().slice_from(self.prefix_len());
481-
rest.len() > 0 && rest[0] == SEP_BYTE
485+
rest.len() > 0 && rest.as_bytes()[0] == SEP_BYTE
482486
}
483487
Some(_) => true,
484488
None => false
@@ -638,11 +642,11 @@ impl Path {
638642
let s = match self.prefix {
639643
Some(_) => {
640644
let plen = self.prefix_len();
641-
if repr.len() > plen && repr[plen] == SEP_BYTE {
645+
if repr.len() > plen && repr.as_bytes()[plen] == SEP_BYTE {
642646
repr.slice_from(plen+1)
643647
} else { repr.slice_from(plen) }
644648
}
645-
None if repr[0] == SEP_BYTE => repr.slice_from(1),
649+
None if repr.as_bytes()[0] == SEP_BYTE => repr.slice_from(1),
646650
None => repr
647651
};
648652
let ret = s.split_terminator(SEP).map(Some);
@@ -665,14 +669,14 @@ impl Path {
665669
match (self.prefix, other.prefix) {
666670
(Some(DiskPrefix), Some(VerbatimDiskPrefix)) => {
667671
self.is_absolute() &&
668-
s_repr[0].to_ascii().eq_ignore_case(o_repr[4].to_ascii())
672+
s_repr.as_bytes()[0].to_ascii().eq_ignore_case(o_repr.as_bytes()[4].to_ascii())
669673
}
670674
(Some(VerbatimDiskPrefix), Some(DiskPrefix)) => {
671675
other.is_absolute() &&
672-
s_repr[4].to_ascii().eq_ignore_case(o_repr[0].to_ascii())
676+
s_repr.as_bytes()[4].to_ascii().eq_ignore_case(o_repr.as_bytes()[0].to_ascii())
673677
}
674678
(Some(VerbatimDiskPrefix), Some(VerbatimDiskPrefix)) => {
675-
s_repr[4].to_ascii().eq_ignore_case(o_repr[4].to_ascii())
679+
s_repr.as_bytes()[4].to_ascii().eq_ignore_case(o_repr.as_bytes()[4].to_ascii())
676680
}
677681
(Some(UNCPrefix(_,_)), Some(VerbatimUNCPrefix(_,_))) => {
678682
s_repr.slice(2, self.prefix_len()) == o_repr.slice(8, other.prefix_len())
@@ -718,12 +722,12 @@ impl Path {
718722
let mut comps = comps;
719723
match (comps.is_some(),prefix) {
720724
(false, Some(DiskPrefix)) => {
721-
if s[0] >= 'a' as u8 && s[0] <= 'z' as u8 {
725+
if s.as_bytes()[0] >= 'a' as u8 && s.as_bytes()[0] <= 'z' as u8 {
722726
comps = Some(vec![]);
723727
}
724728
}
725729
(false, Some(VerbatimDiskPrefix)) => {
726-
if s[4] >= 'a' as u8 && s[0] <= 'z' as u8 {
730+
if s.as_bytes()[4] >= 'a' as u8 && s.as_bytes()[0] <= 'z' as u8 {
727731
comps = Some(vec![]);
728732
}
729733
}
@@ -778,12 +782,12 @@ impl Path {
778782
let mut s = String::with_capacity(n);
779783
match prefix {
780784
Some(DiskPrefix) => {
781-
s.push_char(prefix_[0].to_ascii().to_upper().to_char());
785+
s.push_char(prefix_.as_bytes()[0].to_ascii().to_upper().to_char());
782786
s.push_char(':');
783787
}
784788
Some(VerbatimDiskPrefix) => {
785789
s.push_str(prefix_.slice_to(4));
786-
s.push_char(prefix_[4].to_ascii().to_upper().to_char());
790+
s.push_char(prefix_.as_bytes()[4].to_ascii().to_upper().to_char());
787791
s.push_str(prefix_.slice_from(5));
788792
}
789793
Some(UNCPrefix(a,b)) => {
@@ -845,7 +849,7 @@ impl Path {
845849

846850
fn has_nonsemantic_trailing_slash(&self) -> bool {
847851
is_verbatim(self) && self.repr.len() > self.prefix_len()+1 &&
848-
self.repr.as_slice()[self.repr.len()-1] == SEP_BYTE
852+
self.repr.as_bytes()[self.repr.len()-1] == SEP_BYTE
849853
}
850854

851855
fn update_normalized<S: Str>(&mut self, s: S) {
@@ -861,7 +865,7 @@ impl Path {
861865
/// but absolute within that volume.
862866
#[inline]
863867
pub fn is_vol_relative(path: &Path) -> bool {
864-
path.prefix.is_none() && is_sep_byte(&path.repr.as_slice()[0])
868+
path.prefix.is_none() && is_sep_byte(&path.repr.as_bytes()[0])
865869
}
866870

867871
/// Returns whether the path is considered "cwd-relative", which means a path
@@ -991,8 +995,8 @@ fn parse_prefix<'a>(mut path: &'a str) -> Option<PathPrefix> {
991995
} else {
992996
// \\?\path
993997
let idx = path.find('\\');
994-
if idx == Some(2) && path[1] == ':' as u8 {
995-
let c = path[0];
998+
if idx == Some(2) && path.as_bytes()[1] == ':' as u8 {
999+
let c = path.as_bytes()[0];
9961000
if c.is_ascii() && ::char::is_alphabetic(c as char) {
9971001
// \\?\C:\ path
9981002
return Some(VerbatimDiskPrefix);
@@ -1014,9 +1018,9 @@ fn parse_prefix<'a>(mut path: &'a str) -> Option<PathPrefix> {
10141018
}
10151019
_ => ()
10161020
}
1017-
} else if path.len() > 1 && path[1] == ':' as u8 {
1021+
} else if path.len() > 1 && path.as_bytes()[1] == ':' as u8 {
10181022
// C:
1019-
let c = path[0];
1023+
let c = path.as_bytes()[0];
10201024
if c.is_ascii() && ::char::is_alphabetic(c as char) {
10211025
return Some(DiskPrefix);
10221026
}

0 commit comments

Comments
 (0)