Skip to content

Commit 553ab27

Browse files
committed
serialize: base64: allow LF in addition to CRLF and optimize slightly
It is useful to have configurable newlines in base64 as the standard leaves that for the implementation to decide. GNU `base64` apparently uses LF, which meant in `uutils` we had to manually convert the CRLF to LF. This made the program very slow for large inputs. [breaking-change]
1 parent cafe296 commit 553ab27

File tree

2 files changed

+64
-30
lines changed

2 files changed

+64
-30
lines changed

src/libserialize/base64.rs

+63-29
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
1+
// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
22
// file at the top-level directory of this distribution and at
33
// http://rust-lang.org/COPYRIGHT.
44
//
@@ -14,6 +14,7 @@
1414
1515
pub use self::FromBase64Error::*;
1616
pub use self::CharacterSet::*;
17+
pub use self::Newline::*;
1718

1819
use std::fmt;
1920
use std::error;
@@ -28,10 +29,22 @@ pub enum CharacterSet {
2829

2930
impl Copy for CharacterSet {}
3031

32+
/// Available newline types
33+
pub enum Newline {
34+
/// A linefeed (i.e. Unix-style newline)
35+
LF,
36+
/// A carriage return and a linefeed (i.e. Windows-style newline)
37+
CRLF
38+
}
39+
40+
impl Copy for Newline {}
41+
3142
/// Contains configuration parameters for `to_base64`.
3243
pub struct Config {
3344
/// Character set to use
3445
pub char_set: CharacterSet,
46+
/// Newline to use
47+
pub newline: Newline,
3548
/// True to pad output with `=` characters
3649
pub pad: bool,
3750
/// `Some(len)` to wrap lines at `len`, `None` to disable line wrapping
@@ -42,15 +55,15 @@ impl Copy for Config {}
4255

4356
/// Configuration for RFC 4648 standard base64 encoding
4457
pub static STANDARD: Config =
45-
Config {char_set: Standard, pad: true, line_length: None};
58+
Config {char_set: Standard, newline: CRLF, pad: true, line_length: None};
4659

4760
/// Configuration for RFC 4648 base64url encoding
4861
pub static URL_SAFE: Config =
49-
Config {char_set: UrlSafe, pad: false, line_length: None};
62+
Config {char_set: UrlSafe, newline: CRLF, pad: false, line_length: None};
5063

5164
/// Configuration for RFC 2045 MIME base64 encoding
5265
pub static MIME: Config =
53-
Config {char_set: Standard, pad: true, line_length: Some(76)};
66+
Config {char_set: Standard, newline: CRLF, pad: true, line_length: Some(76)};
5467

5568
static STANDARD_CHARS: &'static[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\
5669
abcdefghijklmnopqrstuvwxyz\
@@ -87,24 +100,29 @@ impl ToBase64 for [u8] {
87100
UrlSafe => URLSAFE_CHARS
88101
};
89102

90-
let mut v = Vec::new();
103+
// In general, this Vec only needs (4/3) * self.len() memory, but
104+
// addition is faster than multiplication and division.
105+
let mut v = Vec::with_capacity(self.len() + self.len());
91106
let mut i = 0;
92107
let mut cur_length = 0;
93108
let len = self.len();
94-
while i < len - (len % 3) {
95-
match config.line_length {
96-
Some(line_length) =>
97-
if cur_length >= line_length {
98-
v.push(b'\r');
99-
v.push(b'\n');
100-
cur_length = 0;
101-
},
102-
None => ()
109+
let mod_len = len % 3;
110+
let cond_len = len - mod_len;
111+
while i < cond_len {
112+
let (first, second, third) = (self[i], self[i + 1], self[i + 2]);
113+
if let Some(line_length) = config.line_length {
114+
if cur_length >= line_length {
115+
v.push_all(match config.newline {
116+
LF => b"\n",
117+
CRLF => b"\r\n"
118+
});
119+
cur_length = 0;
120+
}
103121
}
104122

105-
let n = (self[i] as u32) << 16 |
106-
(self[i + 1] as u32) << 8 |
107-
(self[i + 2] as u32);
123+
let n = (first as u32) << 16 |
124+
(second as u32) << 8 |
125+
(third as u32);
108126

109127
// This 24-bit number gets separated into four 6-bit numbers.
110128
v.push(bytes[((n >> 18) & 63) as uint]);
@@ -116,20 +134,20 @@ impl ToBase64 for [u8] {
116134
i += 3;
117135
}
118136

119-
if len % 3 != 0 {
120-
match config.line_length {
121-
Some(line_length) =>
122-
if cur_length >= line_length {
123-
v.push(b'\r');
124-
v.push(b'\n');
125-
},
126-
None => ()
137+
if mod_len != 0 {
138+
if let Some(line_length) = config.line_length {
139+
if cur_length >= line_length {
140+
v.push_all(match config.newline {
141+
LF => b"\n",
142+
CRLF => b"\r\n"
143+
});
144+
}
127145
}
128146
}
129147

130148
// Heh, would be cool if we knew this was exhaustive
131149
// (the dream of bounded integer types)
132-
match len % 3 {
150+
match mod_len {
133151
0 => (),
134152
1 => {
135153
let n = (self[i] as u32) << 16;
@@ -232,7 +250,7 @@ impl FromBase64 for str {
232250

233251
impl FromBase64 for [u8] {
234252
fn from_base64(&self) -> Result<Vec<u8>, FromBase64Error> {
235-
let mut r = Vec::new();
253+
let mut r = Vec::with_capacity(self.len());
236254
let mut buf: u32 = 0;
237255
let mut modulus = 0i;
238256

@@ -288,7 +306,7 @@ impl FromBase64 for [u8] {
288306
mod tests {
289307
extern crate test;
290308
use self::test::Bencher;
291-
use base64::{Config, FromBase64, ToBase64, STANDARD, URL_SAFE};
309+
use base64::{Config, FromBase64, ToBase64, STANDARD, URL_SAFE, LF};
292310

293311
#[test]
294312
fn test_to_base64_basic() {
@@ -302,14 +320,26 @@ mod tests {
302320
}
303321

304322
#[test]
305-
fn test_to_base64_line_break() {
323+
fn test_to_base64_crlf_line_break() {
306324
assert!(![0u8, ..1000].to_base64(Config {line_length: None, ..STANDARD})
307325
.contains("\r\n"));
308326
assert_eq!("foobar".as_bytes().to_base64(Config {line_length: Some(4),
309327
..STANDARD}),
310328
"Zm9v\r\nYmFy");
311329
}
312330

331+
#[test]
332+
fn test_to_base64_lf_line_break() {
333+
assert!(![0u8, ..1000].to_base64(Config {line_length: None, newline: LF,
334+
..STANDARD})
335+
.as_slice()
336+
.contains("\n"));
337+
assert_eq!("foobar".as_bytes().to_base64(Config {line_length: Some(4),
338+
newline: LF,
339+
..STANDARD}),
340+
"Zm9v\nYmFy".to_string());
341+
}
342+
313343
#[test]
314344
fn test_to_base64_padding() {
315345
assert_eq!("f".as_bytes().to_base64(Config {pad: false, ..STANDARD}), "Zg");
@@ -344,6 +374,10 @@ mod tests {
344374
b"foobar");
345375
assert_eq!("Zm9vYg==\r\n".from_base64().unwrap(),
346376
b"foob");
377+
assert_eq!("Zm9v\nYmFy".from_base64().unwrap(),
378+
b"foobar");
379+
assert_eq!("Zm9vYg==\n".from_base64().unwrap(),
380+
b"foob");
347381
}
348382

349383
#[test]

src/libserialize/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ Core encoding and decoding interfaces.
2323
html_root_url = "http://doc.rust-lang.org/nightly/",
2424
html_playground_url = "http://play.rust-lang.org/")]
2525
#![allow(unknown_features)]
26-
#![feature(macro_rules, default_type_params, phase, slicing_syntax, globs)]
26+
#![feature(macro_rules, default_type_params, phase, slicing_syntax, globs, if_let)]
2727

2828
// test harness access
2929
#[cfg(test)]

0 commit comments

Comments
 (0)