serialize: base64: allow LF in addition to CRLF and optimize slightly
It is useful to have configurable newlines in base64 as the standard leaves that for the implementation to decide. GNU `base64` apparently uses LF, which meant in `uutils` we had to manually convert the CRLF to LF. This made the program very slow for large inputs. [breaking-change]
This commit is contained in:
parent
cafe296677
commit
553ab271a3
2 changed files with 64 additions and 30 deletions
|
@ -1,4 +1,4 @@
|
||||||
// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
|
// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
|
||||||
// file at the top-level directory of this distribution and at
|
// file at the top-level directory of this distribution and at
|
||||||
// http://rust-lang.org/COPYRIGHT.
|
// http://rust-lang.org/COPYRIGHT.
|
||||||
//
|
//
|
||||||
|
@ -14,6 +14,7 @@
|
||||||
|
|
||||||
pub use self::FromBase64Error::*;
|
pub use self::FromBase64Error::*;
|
||||||
pub use self::CharacterSet::*;
|
pub use self::CharacterSet::*;
|
||||||
|
pub use self::Newline::*;
|
||||||
|
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::error;
|
use std::error;
|
||||||
|
@ -28,10 +29,22 @@ pub enum CharacterSet {
|
||||||
|
|
||||||
impl Copy for CharacterSet {}
|
impl Copy for CharacterSet {}
|
||||||
|
|
||||||
|
/// Available newline types
|
||||||
|
pub enum Newline {
|
||||||
|
/// A linefeed (i.e. Unix-style newline)
|
||||||
|
LF,
|
||||||
|
/// A carriage return and a linefeed (i.e. Windows-style newline)
|
||||||
|
CRLF
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Copy for Newline {}
|
||||||
|
|
||||||
/// Contains configuration parameters for `to_base64`.
|
/// Contains configuration parameters for `to_base64`.
|
||||||
pub struct Config {
|
pub struct Config {
|
||||||
/// Character set to use
|
/// Character set to use
|
||||||
pub char_set: CharacterSet,
|
pub char_set: CharacterSet,
|
||||||
|
/// Newline to use
|
||||||
|
pub newline: Newline,
|
||||||
/// True to pad output with `=` characters
|
/// True to pad output with `=` characters
|
||||||
pub pad: bool,
|
pub pad: bool,
|
||||||
/// `Some(len)` to wrap lines at `len`, `None` to disable line wrapping
|
/// `Some(len)` to wrap lines at `len`, `None` to disable line wrapping
|
||||||
|
@ -42,15 +55,15 @@ impl Copy for Config {}
|
||||||
|
|
||||||
/// Configuration for RFC 4648 standard base64 encoding
|
/// Configuration for RFC 4648 standard base64 encoding
|
||||||
pub static STANDARD: Config =
|
pub static STANDARD: Config =
|
||||||
Config {char_set: Standard, pad: true, line_length: None};
|
Config {char_set: Standard, newline: CRLF, pad: true, line_length: None};
|
||||||
|
|
||||||
/// Configuration for RFC 4648 base64url encoding
|
/// Configuration for RFC 4648 base64url encoding
|
||||||
pub static URL_SAFE: Config =
|
pub static URL_SAFE: Config =
|
||||||
Config {char_set: UrlSafe, pad: false, line_length: None};
|
Config {char_set: UrlSafe, newline: CRLF, pad: false, line_length: None};
|
||||||
|
|
||||||
/// Configuration for RFC 2045 MIME base64 encoding
|
/// Configuration for RFC 2045 MIME base64 encoding
|
||||||
pub static MIME: Config =
|
pub static MIME: Config =
|
||||||
Config {char_set: Standard, pad: true, line_length: Some(76)};
|
Config {char_set: Standard, newline: CRLF, pad: true, line_length: Some(76)};
|
||||||
|
|
||||||
static STANDARD_CHARS: &'static[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\
|
static STANDARD_CHARS: &'static[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\
|
||||||
abcdefghijklmnopqrstuvwxyz\
|
abcdefghijklmnopqrstuvwxyz\
|
||||||
|
@ -87,24 +100,29 @@ impl ToBase64 for [u8] {
|
||||||
UrlSafe => URLSAFE_CHARS
|
UrlSafe => URLSAFE_CHARS
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut v = Vec::new();
|
// In general, this Vec only needs (4/3) * self.len() memory, but
|
||||||
|
// addition is faster than multiplication and division.
|
||||||
|
let mut v = Vec::with_capacity(self.len() + self.len());
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
let mut cur_length = 0;
|
let mut cur_length = 0;
|
||||||
let len = self.len();
|
let len = self.len();
|
||||||
while i < len - (len % 3) {
|
let mod_len = len % 3;
|
||||||
match config.line_length {
|
let cond_len = len - mod_len;
|
||||||
Some(line_length) =>
|
while i < cond_len {
|
||||||
if cur_length >= line_length {
|
let (first, second, third) = (self[i], self[i + 1], self[i + 2]);
|
||||||
v.push(b'\r');
|
if let Some(line_length) = config.line_length {
|
||||||
v.push(b'\n');
|
if cur_length >= line_length {
|
||||||
cur_length = 0;
|
v.push_all(match config.newline {
|
||||||
},
|
LF => b"\n",
|
||||||
None => ()
|
CRLF => b"\r\n"
|
||||||
|
});
|
||||||
|
cur_length = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let n = (self[i] as u32) << 16 |
|
let n = (first as u32) << 16 |
|
||||||
(self[i + 1] as u32) << 8 |
|
(second as u32) << 8 |
|
||||||
(self[i + 2] as u32);
|
(third as u32);
|
||||||
|
|
||||||
// This 24-bit number gets separated into four 6-bit numbers.
|
// This 24-bit number gets separated into four 6-bit numbers.
|
||||||
v.push(bytes[((n >> 18) & 63) as uint]);
|
v.push(bytes[((n >> 18) & 63) as uint]);
|
||||||
|
@ -116,20 +134,20 @@ impl ToBase64 for [u8] {
|
||||||
i += 3;
|
i += 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
if len % 3 != 0 {
|
if mod_len != 0 {
|
||||||
match config.line_length {
|
if let Some(line_length) = config.line_length {
|
||||||
Some(line_length) =>
|
if cur_length >= line_length {
|
||||||
if cur_length >= line_length {
|
v.push_all(match config.newline {
|
||||||
v.push(b'\r');
|
LF => b"\n",
|
||||||
v.push(b'\n');
|
CRLF => b"\r\n"
|
||||||
},
|
});
|
||||||
None => ()
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Heh, would be cool if we knew this was exhaustive
|
// Heh, would be cool if we knew this was exhaustive
|
||||||
// (the dream of bounded integer types)
|
// (the dream of bounded integer types)
|
||||||
match len % 3 {
|
match mod_len {
|
||||||
0 => (),
|
0 => (),
|
||||||
1 => {
|
1 => {
|
||||||
let n = (self[i] as u32) << 16;
|
let n = (self[i] as u32) << 16;
|
||||||
|
@ -232,7 +250,7 @@ impl FromBase64 for str {
|
||||||
|
|
||||||
impl FromBase64 for [u8] {
|
impl FromBase64 for [u8] {
|
||||||
fn from_base64(&self) -> Result<Vec<u8>, FromBase64Error> {
|
fn from_base64(&self) -> Result<Vec<u8>, FromBase64Error> {
|
||||||
let mut r = Vec::new();
|
let mut r = Vec::with_capacity(self.len());
|
||||||
let mut buf: u32 = 0;
|
let mut buf: u32 = 0;
|
||||||
let mut modulus = 0i;
|
let mut modulus = 0i;
|
||||||
|
|
||||||
|
@ -288,7 +306,7 @@ impl FromBase64 for [u8] {
|
||||||
mod tests {
|
mod tests {
|
||||||
extern crate test;
|
extern crate test;
|
||||||
use self::test::Bencher;
|
use self::test::Bencher;
|
||||||
use base64::{Config, FromBase64, ToBase64, STANDARD, URL_SAFE};
|
use base64::{Config, FromBase64, ToBase64, STANDARD, URL_SAFE, LF};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_to_base64_basic() {
|
fn test_to_base64_basic() {
|
||||||
|
@ -302,7 +320,7 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_to_base64_line_break() {
|
fn test_to_base64_crlf_line_break() {
|
||||||
assert!(![0u8, ..1000].to_base64(Config {line_length: None, ..STANDARD})
|
assert!(![0u8, ..1000].to_base64(Config {line_length: None, ..STANDARD})
|
||||||
.contains("\r\n"));
|
.contains("\r\n"));
|
||||||
assert_eq!("foobar".as_bytes().to_base64(Config {line_length: Some(4),
|
assert_eq!("foobar".as_bytes().to_base64(Config {line_length: Some(4),
|
||||||
|
@ -310,6 +328,18 @@ mod tests {
|
||||||
"Zm9v\r\nYmFy");
|
"Zm9v\r\nYmFy");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_to_base64_lf_line_break() {
|
||||||
|
assert!(![0u8, ..1000].to_base64(Config {line_length: None, newline: LF,
|
||||||
|
..STANDARD})
|
||||||
|
.as_slice()
|
||||||
|
.contains("\n"));
|
||||||
|
assert_eq!("foobar".as_bytes().to_base64(Config {line_length: Some(4),
|
||||||
|
newline: LF,
|
||||||
|
..STANDARD}),
|
||||||
|
"Zm9v\nYmFy".to_string());
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_to_base64_padding() {
|
fn test_to_base64_padding() {
|
||||||
assert_eq!("f".as_bytes().to_base64(Config {pad: false, ..STANDARD}), "Zg");
|
assert_eq!("f".as_bytes().to_base64(Config {pad: false, ..STANDARD}), "Zg");
|
||||||
|
@ -344,6 +374,10 @@ mod tests {
|
||||||
b"foobar");
|
b"foobar");
|
||||||
assert_eq!("Zm9vYg==\r\n".from_base64().unwrap(),
|
assert_eq!("Zm9vYg==\r\n".from_base64().unwrap(),
|
||||||
b"foob");
|
b"foob");
|
||||||
|
assert_eq!("Zm9v\nYmFy".from_base64().unwrap(),
|
||||||
|
b"foobar");
|
||||||
|
assert_eq!("Zm9vYg==\n".from_base64().unwrap(),
|
||||||
|
b"foob");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
|
@ -23,7 +23,7 @@ Core encoding and decoding interfaces.
|
||||||
html_root_url = "http://doc.rust-lang.org/nightly/",
|
html_root_url = "http://doc.rust-lang.org/nightly/",
|
||||||
html_playground_url = "http://play.rust-lang.org/")]
|
html_playground_url = "http://play.rust-lang.org/")]
|
||||||
#![allow(unknown_features)]
|
#![allow(unknown_features)]
|
||||||
#![feature(macro_rules, default_type_params, phase, slicing_syntax, globs)]
|
#![feature(macro_rules, default_type_params, phase, slicing_syntax, globs, if_let)]
|
||||||
|
|
||||||
// test harness access
|
// test harness access
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue