Skip to content

Commit d0ad3c7

Browse files
committed
rollup merge of rust-lang#19594: Arcterus/master
It is useful to have configurable newlines in base64 as the standard leaves that for the implementation to decide. GNU `base64` apparently uses LF, which meant in `uutils` we had to manually convert the CRLF to LF. This made the program very slow for large inputs. [breaking-change]
2 parents ae60f9c + a119ad8 commit d0ad3c7

File tree

2 files changed

+64
-32
lines changed

2 files changed

+64
-32
lines changed

src/libserialize/base64.rs

Lines changed: 63 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
1+
// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
22
// file at the top-level directory of this distribution and at
33
// http://rust-lang.org/COPYRIGHT.
44
//
@@ -28,10 +28,22 @@ pub enum CharacterSet {
2828

2929
impl Copy for CharacterSet {}
3030

31+
/// Available newline types
32+
pub enum Newline {
33+
/// A linefeed (i.e. Unix-style newline)
34+
LF,
35+
/// A carriage return and a linefeed (i.e. Windows-style newline)
36+
CRLF
37+
}
38+
39+
impl Copy for Newline {}
40+
3141
/// Contains configuration parameters for `to_base64`.
3242
pub struct Config {
3343
/// Character set to use
3444
pub char_set: CharacterSet,
45+
/// Newline to use
46+
pub newline: Newline,
3547
/// True to pad output with `=` characters
3648
pub pad: bool,
3749
/// `Some(len)` to wrap lines at `len`, `None` to disable line wrapping
@@ -42,15 +54,15 @@ impl Copy for Config {}
4254

4355
/// Configuration for RFC 4648 standard base64 encoding
4456
pub static STANDARD: Config =
45-
Config {char_set: Standard, pad: true, line_length: None};
57+
Config {char_set: Standard, newline: Newline::CRLF, pad: true, line_length: None};
4658

4759
/// Configuration for RFC 4648 base64url encoding
4860
pub static URL_SAFE: Config =
49-
Config {char_set: UrlSafe, pad: false, line_length: None};
61+
Config {char_set: UrlSafe, newline: Newline::CRLF, pad: false, line_length: None};
5062

5163
/// Configuration for RFC 2045 MIME base64 encoding
5264
pub static MIME: Config =
53-
Config {char_set: Standard, pad: true, line_length: Some(76)};
65+
Config {char_set: Standard, newline: Newline::CRLF, pad: true, line_length: Some(76)};
5466

5567
static STANDARD_CHARS: &'static[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\
5668
abcdefghijklmnopqrstuvwxyz\
@@ -87,24 +99,30 @@ impl ToBase64 for [u8] {
8799
UrlSafe => URLSAFE_CHARS
88100
};
89101

90-
let mut v = Vec::new();
102+
// In general, this Vec only needs (4/3) * self.len() memory, but
103+
// addition is faster than multiplication and division.
104+
let mut v = Vec::with_capacity(self.len() + self.len());
91105
let mut i = 0;
92106
let mut cur_length = 0;
93107
let len = self.len();
94-
while i < len - (len % 3) {
95-
match config.line_length {
96-
Some(line_length) =>
97-
if cur_length >= line_length {
98-
v.push(b'\r');
99-
v.push(b'\n');
100-
cur_length = 0;
101-
},
102-
None => ()
108+
let mod_len = len % 3;
109+
let cond_len = len - mod_len;
110+
let newline = match config.newline {
111+
Newline::LF => b"\n",
112+
Newline::CRLF => b"\r\n"
113+
};
114+
while i < cond_len {
115+
let (first, second, third) = (self[i], self[i + 1], self[i + 2]);
116+
if let Some(line_length) = config.line_length {
117+
if cur_length >= line_length {
118+
v.push_all(newline);
119+
cur_length = 0;
120+
}
103121
}
104122

105-
let n = (self[i] as u32) << 16 |
106-
(self[i + 1] as u32) << 8 |
107-
(self[i + 2] as u32);
123+
let n = (first as u32) << 16 |
124+
(second as u32) << 8 |
125+
(third as u32);
108126

109127
// This 24-bit number gets separated into four 6-bit numbers.
110128
v.push(bytes[((n >> 18) & 63) as uint]);
@@ -116,20 +134,17 @@ impl ToBase64 for [u8] {
116134
i += 3;
117135
}
118136

119-
if len % 3 != 0 {
120-
match config.line_length {
121-
Some(line_length) =>
122-
if cur_length >= line_length {
123-
v.push(b'\r');
124-
v.push(b'\n');
125-
},
126-
None => ()
137+
if mod_len != 0 {
138+
if let Some(line_length) = config.line_length {
139+
if cur_length >= line_length {
140+
v.push_all(newline);
141+
}
127142
}
128143
}
129144

130145
// Heh, would be cool if we knew this was exhaustive
131146
// (the dream of bounded integer types)
132-
match len % 3 {
147+
match mod_len {
133148
0 => (),
134149
1 => {
135150
let n = (self[i] as u32) << 16;
@@ -232,7 +247,7 @@ impl FromBase64 for str {
232247

233248
impl FromBase64 for [u8] {
234249
fn from_base64(&self) -> Result<Vec<u8>, FromBase64Error> {
235-
let mut r = Vec::new();
250+
let mut r = Vec::with_capacity(self.len());
236251
let mut buf: u32 = 0;
237252
let mut modulus = 0i;
238253

@@ -288,7 +303,7 @@ impl FromBase64 for [u8] {
288303
mod tests {
289304
extern crate test;
290305
use self::test::Bencher;
291-
use base64::{Config, FromBase64, ToBase64, STANDARD, URL_SAFE};
306+
use base64::{Config, Newline, FromBase64, ToBase64, STANDARD, URL_SAFE};
292307

293308
#[test]
294309
fn test_to_base64_basic() {
@@ -302,14 +317,27 @@ mod tests {
302317
}
303318

304319
#[test]
305-
fn test_to_base64_line_break() {
320+
fn test_to_base64_crlf_line_break() {
306321
assert!(![0u8, ..1000].to_base64(Config {line_length: None, ..STANDARD})
307322
.contains("\r\n"));
308-
assert_eq!("foobar".as_bytes().to_base64(Config {line_length: Some(4),
309-
..STANDARD}),
323+
assert_eq!(b"foobar".to_base64(Config {line_length: Some(4),
324+
..STANDARD}),
310325
"Zm9v\r\nYmFy");
311326
}
312327

328+
#[test]
329+
fn test_to_base64_lf_line_break() {
330+
assert!(![0u8, ..1000].to_base64(Config {line_length: None,
331+
newline: Newline::LF,
332+
..STANDARD})
333+
.as_slice()
334+
.contains("\n"));
335+
assert_eq!(b"foobar".to_base64(Config {line_length: Some(4),
336+
newline: Newline::LF,
337+
..STANDARD}),
338+
"Zm9v\nYmFy");
339+
}
340+
313341
#[test]
314342
fn test_to_base64_padding() {
315343
assert_eq!("f".as_bytes().to_base64(Config {pad: false, ..STANDARD}), "Zg");
@@ -344,6 +372,10 @@ mod tests {
344372
b"foobar");
345373
assert_eq!("Zm9vYg==\r\n".from_base64().unwrap(),
346374
b"foob");
375+
assert_eq!("Zm9v\nYmFy".from_base64().unwrap(),
376+
b"foobar");
377+
assert_eq!("Zm9vYg==\n".from_base64().unwrap(),
378+
b"foob");
347379
}
348380

349381
#[test]

src/libserialize/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ Core encoding and decoding interfaces.
2323
html_root_url = "http://doc.rust-lang.org/nightly/",
2424
html_playground_url = "http://play.rust-lang.org/")]
2525
#![allow(unknown_features)]
26-
#![feature(macro_rules, default_type_params, phase, slicing_syntax, globs)]
26+
#![feature(macro_rules, default_type_params, phase, slicing_syntax, globs, if_let)]
2727

2828
// test harness access
2929
#[cfg(test)]

0 commit comments

Comments
 (0)