1
Fork 0

serialize: base64: allow LF in addition to CRLF and optimize slightly

It is useful to have configurable newlines in base64 as the standard
leaves that for the implementation to decide.  GNU `base64` apparently
uses LF, which meant in `uutils` we had to manually convert the CRLF to
LF.  This made the program very slow for large inputs.

[breaking-change]
This commit is contained in:
Arcterus 2014-12-06 02:35:26 -08:00
parent cafe296677
commit 553ab271a3
2 changed files with 64 additions and 30 deletions

View file

@ -1,4 +1,4 @@
// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at // file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT. // http://rust-lang.org/COPYRIGHT.
// //
@ -14,6 +14,7 @@
pub use self::FromBase64Error::*; pub use self::FromBase64Error::*;
pub use self::CharacterSet::*; pub use self::CharacterSet::*;
pub use self::Newline::*;
use std::fmt; use std::fmt;
use std::error; use std::error;
@ -28,10 +29,22 @@ pub enum CharacterSet {
impl Copy for CharacterSet {} impl Copy for CharacterSet {}
/// Available newline types
pub enum Newline {
/// A linefeed (i.e. Unix-style newline)
LF,
/// A carriage return and a linefeed (i.e. Windows-style newline)
CRLF
}
impl Copy for Newline {}
/// Contains configuration parameters for `to_base64`. /// Contains configuration parameters for `to_base64`.
pub struct Config { pub struct Config {
/// Character set to use /// Character set to use
pub char_set: CharacterSet, pub char_set: CharacterSet,
/// Newline to use
pub newline: Newline,
/// True to pad output with `=` characters /// True to pad output with `=` characters
pub pad: bool, pub pad: bool,
/// `Some(len)` to wrap lines at `len`, `None` to disable line wrapping /// `Some(len)` to wrap lines at `len`, `None` to disable line wrapping
@ -42,15 +55,15 @@ impl Copy for Config {}
/// Configuration for RFC 4648 standard base64 encoding /// Configuration for RFC 4648 standard base64 encoding
pub static STANDARD: Config = pub static STANDARD: Config =
Config {char_set: Standard, pad: true, line_length: None}; Config {char_set: Standard, newline: CRLF, pad: true, line_length: None};
/// Configuration for RFC 4648 base64url encoding /// Configuration for RFC 4648 base64url encoding
pub static URL_SAFE: Config = pub static URL_SAFE: Config =
Config {char_set: UrlSafe, pad: false, line_length: None}; Config {char_set: UrlSafe, newline: CRLF, pad: false, line_length: None};
/// Configuration for RFC 2045 MIME base64 encoding /// Configuration for RFC 2045 MIME base64 encoding
pub static MIME: Config = pub static MIME: Config =
Config {char_set: Standard, pad: true, line_length: Some(76)}; Config {char_set: Standard, newline: CRLF, pad: true, line_length: Some(76)};
static STANDARD_CHARS: &'static[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\ static STANDARD_CHARS: &'static[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\
abcdefghijklmnopqrstuvwxyz\ abcdefghijklmnopqrstuvwxyz\
@ -87,24 +100,29 @@ impl ToBase64 for [u8] {
UrlSafe => URLSAFE_CHARS UrlSafe => URLSAFE_CHARS
}; };
let mut v = Vec::new(); // In general, this Vec only needs (4/3) * self.len() memory, but
// addition is faster than multiplication and division.
let mut v = Vec::with_capacity(self.len() + self.len());
let mut i = 0; let mut i = 0;
let mut cur_length = 0; let mut cur_length = 0;
let len = self.len(); let len = self.len();
while i < len - (len % 3) { let mod_len = len % 3;
match config.line_length { let cond_len = len - mod_len;
Some(line_length) => while i < cond_len {
if cur_length >= line_length { let (first, second, third) = (self[i], self[i + 1], self[i + 2]);
v.push(b'\r'); if let Some(line_length) = config.line_length {
v.push(b'\n'); if cur_length >= line_length {
cur_length = 0; v.push_all(match config.newline {
}, LF => b"\n",
None => () CRLF => b"\r\n"
});
cur_length = 0;
}
} }
let n = (self[i] as u32) << 16 | let n = (first as u32) << 16 |
(self[i + 1] as u32) << 8 | (second as u32) << 8 |
(self[i + 2] as u32); (third as u32);
// This 24-bit number gets separated into four 6-bit numbers. // This 24-bit number gets separated into four 6-bit numbers.
v.push(bytes[((n >> 18) & 63) as uint]); v.push(bytes[((n >> 18) & 63) as uint]);
@ -116,20 +134,20 @@ impl ToBase64 for [u8] {
i += 3; i += 3;
} }
if len % 3 != 0 { if mod_len != 0 {
match config.line_length { if let Some(line_length) = config.line_length {
Some(line_length) => if cur_length >= line_length {
if cur_length >= line_length { v.push_all(match config.newline {
v.push(b'\r'); LF => b"\n",
v.push(b'\n'); CRLF => b"\r\n"
}, });
None => () }
} }
} }
// Heh, would be cool if we knew this was exhaustive // Heh, would be cool if we knew this was exhaustive
// (the dream of bounded integer types) // (the dream of bounded integer types)
match len % 3 { match mod_len {
0 => (), 0 => (),
1 => { 1 => {
let n = (self[i] as u32) << 16; let n = (self[i] as u32) << 16;
@ -232,7 +250,7 @@ impl FromBase64 for str {
impl FromBase64 for [u8] { impl FromBase64 for [u8] {
fn from_base64(&self) -> Result<Vec<u8>, FromBase64Error> { fn from_base64(&self) -> Result<Vec<u8>, FromBase64Error> {
let mut r = Vec::new(); let mut r = Vec::with_capacity(self.len());
let mut buf: u32 = 0; let mut buf: u32 = 0;
let mut modulus = 0i; let mut modulus = 0i;
@ -288,7 +306,7 @@ impl FromBase64 for [u8] {
mod tests { mod tests {
extern crate test; extern crate test;
use self::test::Bencher; use self::test::Bencher;
use base64::{Config, FromBase64, ToBase64, STANDARD, URL_SAFE}; use base64::{Config, FromBase64, ToBase64, STANDARD, URL_SAFE, LF};
#[test] #[test]
fn test_to_base64_basic() { fn test_to_base64_basic() {
@ -302,7 +320,7 @@ mod tests {
} }
#[test] #[test]
fn test_to_base64_line_break() { fn test_to_base64_crlf_line_break() {
assert!(![0u8, ..1000].to_base64(Config {line_length: None, ..STANDARD}) assert!(![0u8, ..1000].to_base64(Config {line_length: None, ..STANDARD})
.contains("\r\n")); .contains("\r\n"));
assert_eq!("foobar".as_bytes().to_base64(Config {line_length: Some(4), assert_eq!("foobar".as_bytes().to_base64(Config {line_length: Some(4),
@ -310,6 +328,18 @@ mod tests {
"Zm9v\r\nYmFy"); "Zm9v\r\nYmFy");
} }
#[test]
fn test_to_base64_lf_line_break() {
assert!(![0u8, ..1000].to_base64(Config {line_length: None, newline: LF,
..STANDARD})
.as_slice()
.contains("\n"));
assert_eq!("foobar".as_bytes().to_base64(Config {line_length: Some(4),
newline: LF,
..STANDARD}),
"Zm9v\nYmFy".to_string());
}
#[test] #[test]
fn test_to_base64_padding() { fn test_to_base64_padding() {
assert_eq!("f".as_bytes().to_base64(Config {pad: false, ..STANDARD}), "Zg"); assert_eq!("f".as_bytes().to_base64(Config {pad: false, ..STANDARD}), "Zg");
@ -344,6 +374,10 @@ mod tests {
b"foobar"); b"foobar");
assert_eq!("Zm9vYg==\r\n".from_base64().unwrap(), assert_eq!("Zm9vYg==\r\n".from_base64().unwrap(),
b"foob"); b"foob");
assert_eq!("Zm9v\nYmFy".from_base64().unwrap(),
b"foobar");
assert_eq!("Zm9vYg==\n".from_base64().unwrap(),
b"foob");
} }
#[test] #[test]

View file

@ -23,7 +23,7 @@ Core encoding and decoding interfaces.
html_root_url = "http://doc.rust-lang.org/nightly/", html_root_url = "http://doc.rust-lang.org/nightly/",
html_playground_url = "http://play.rust-lang.org/")] html_playground_url = "http://play.rust-lang.org/")]
#![allow(unknown_features)] #![allow(unknown_features)]
#![feature(macro_rules, default_type_params, phase, slicing_syntax, globs)] #![feature(macro_rules, default_type_params, phase, slicing_syntax, globs, if_let)]
// test harness access // test harness access
#[cfg(test)] #[cfg(test)]