Skip to content

Commit ff5fdff

Browse files
committed
ToBase64 and ToHex perf improvements
The overhead of str::push_char is high enough to cripple the performance of these two functions. I've switched them to build the output in a ~[u8] and then convert to a string later. Since we know exactly the bytes going into the vector, we can use the unsafe version to avoid the is_utf8 check. I could have riced it further with vec::raw::get, but it only added ~10MB/s so I didn't think it was worth it. ToHex is still ~30% slower than FromHex, which is puzzling. Before: ``` test base64::test::from_base64 ... bench: 1000 ns/iter (+/- 349) = 204 MB/s test base64::test::to_base64 ... bench: 2390 ns/iter (+/- 1130) = 63 MB/s ... test hex::tests::bench_from_hex ... bench: 884 ns/iter (+/- 220) = 341 MB/s test hex::tests::bench_to_hex ... bench: 2453 ns/iter (+/- 919) = 61 MB/s ``` After: ``` test base64::test::from_base64 ... bench: 1271 ns/iter (+/- 600) = 160 MB/s test base64::test::to_base64 ... bench: 759 ns/iter (+/- 286) = 198 MB/s ... test hex::tests::bench_from_hex ... bench: 875 ns/iter (+/- 377) = 345 MB/s test hex::tests::bench_to_hex ... bench: 593 ns/iter (+/- 240) = 254 MB/s ```
1 parent 463e241 commit ff5fdff

File tree

2 files changed

+38
-37
lines changed

2 files changed

+38
-37
lines changed

src/libextra/base64.rs

+30-31
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
// except according to those terms.
1010

1111
//! Base64 binary-to-text encoding
12+
use std::str;
1213

1314
/// Available encoding character sets
1415
pub enum CharacterSet {
@@ -40,21 +41,13 @@ pub static URL_SAFE: Config =
4041
pub static MIME: Config =
4142
Config {char_set: Standard, pad: true, line_length: Some(76)};
4243

43-
static STANDARD_CHARS: [char, ..64] = [
44-
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
45-
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
46-
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
47-
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
48-
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
49-
];
50-
51-
static URLSAFE_CHARS: [char, ..64] = [
52-
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
53-
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
54-
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
55-
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
56-
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_'
57-
];
44+
static STANDARD_CHARS: &'static[u8] = bytes!("ABCDEFGHIJKLMNOPQRSTUVWXYZ",
45+
"abcdefghijklmnopqrstuvwxyz",
46+
"0123456789+/");
47+
48+
static URLSAFE_CHARS: &'static[u8] = bytes!("ABCDEFGHIJKLMNOPQRSTUVWXYZ",
49+
"abcdefghijklmnopqrstuvwxyz",
50+
"0123456789-_");
5851

5952
/// A trait for converting a value to base64 encoding.
6053
pub trait ToBase64 {
@@ -80,20 +73,21 @@ impl<'self> ToBase64 for &'self [u8] {
8073
* ~~~
8174
*/
8275
fn to_base64(&self, config: Config) -> ~str {
83-
let chars = match config.char_set {
76+
let bytes = match config.char_set {
8477
Standard => STANDARD_CHARS,
8578
UrlSafe => URLSAFE_CHARS
8679
};
8780

88-
let mut s = ~"";
81+
let mut v: ~[u8] = ~[];
8982
let mut i = 0;
9083
let mut cur_length = 0;
9184
let len = self.len();
9285
while i < len - (len % 3) {
9386
match config.line_length {
9487
Some(line_length) =>
9588
if cur_length >= line_length {
96-
s.push_str("\r\n");
89+
v.push('\r' as u8);
90+
v.push('\n' as u8);
9791
cur_length = 0;
9892
},
9993
None => ()
@@ -104,10 +98,10 @@ impl<'self> ToBase64 for &'self [u8] {
10498
(self[i + 2] as u32);
10599

106100
// This 24-bit number gets separated into four 6-bit numbers.
107-
s.push_char(chars[(n >> 18) & 63]);
108-
s.push_char(chars[(n >> 12) & 63]);
109-
s.push_char(chars[(n >> 6 ) & 63]);
110-
s.push_char(chars[n & 63]);
101+
v.push(bytes[(n >> 18) & 63]);
102+
v.push(bytes[(n >> 12) & 63]);
103+
v.push(bytes[(n >> 6 ) & 63]);
104+
v.push(bytes[n & 63]);
111105

112106
cur_length += 4;
113107
i += 3;
@@ -117,7 +111,8 @@ impl<'self> ToBase64 for &'self [u8] {
117111
match config.line_length {
118112
Some(line_length) =>
119113
if cur_length >= line_length {
120-
s.push_str("\r\n");
114+
v.push('\r' as u8);
115+
v.push('\n' as u8);
121116
},
122117
None => ()
123118
}
@@ -129,25 +124,29 @@ impl<'self> ToBase64 for &'self [u8] {
129124
0 => (),
130125
1 => {
131126
let n = (self[i] as u32) << 16;
132-
s.push_char(chars[(n >> 18) & 63]);
133-
s.push_char(chars[(n >> 12) & 63]);
127+
v.push(bytes[(n >> 18) & 63]);
128+
v.push(bytes[(n >> 12) & 63]);
134129
if config.pad {
135-
s.push_str("==");
130+
v.push('=' as u8);
131+
v.push('=' as u8);
136132
}
137133
}
138134
2 => {
139135
let n = (self[i] as u32) << 16 |
140136
(self[i + 1u] as u32) << 8;
141-
s.push_char(chars[(n >> 18) & 63]);
142-
s.push_char(chars[(n >> 12) & 63]);
143-
s.push_char(chars[(n >> 6 ) & 63]);
137+
v.push(bytes[(n >> 18) & 63]);
138+
v.push(bytes[(n >> 12) & 63]);
139+
v.push(bytes[(n >> 6 ) & 63]);
144140
if config.pad {
145-
s.push_char('=');
141+
v.push('=' as u8);
146142
}
147143
}
148144
_ => fail!("Algebra is broken, please alert the math police")
149145
}
150-
s
146+
147+
unsafe {
148+
str::raw::from_bytes_owned(v)
149+
}
151150
}
152151
}
153152

src/libextra/hex.rs

+8-6
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,7 @@ pub trait ToHex {
1919
fn to_hex(&self) -> ~str;
2020
}
2121

22-
static CHARS: [char, ..16] = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
23-
'a', 'b', 'c', 'd', 'e', 'f'];
22+
static CHARS: &'static[u8] = bytes!("0123456789abcdef");
2423

2524
impl<'self> ToHex for &'self [u8] {
2625
/**
@@ -39,13 +38,16 @@ impl<'self> ToHex for &'self [u8] {
3938
* ~~~
4039
*/
4140
fn to_hex(&self) -> ~str {
42-
let mut s = str::with_capacity(self.len() * 2);
41+
// +1 for NULL terminator
42+
let mut v = vec::with_capacity(self.len() * 2 + 1);
4343
for &byte in self.iter() {
44-
s.push_char(CHARS[byte >> 4]);
45-
s.push_char(CHARS[byte & 0xf]);
44+
v.push(CHARS[byte >> 4]);
45+
v.push(CHARS[byte & 0xf]);
4646
}
4747

48-
s
48+
unsafe {
49+
str::raw::from_bytes_owned(v)
50+
}
4951
}
5052
}
5153

0 commit comments

Comments
 (0)