Add changelog; Fork from ; Add gitignore; Add documentation; Add tests; License under LGPL-3; Configure lints; Add readme;

This commit is contained in:
Gabriel Bjørnager Jensen 2024-06-08 10:32:46 +02:00
commit da41f1df23
14 changed files with 773 additions and 0 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
/Cargo.lock
/target

10
CHANGELOG.md Normal file
View file

@ -0,0 +1,10 @@
# 0.0.0
* Add changelog
* Fork from `backspace`
* Add gitignore
* Add documentation
* Add tests
* License under LGPL-3
* Configure lints
* Add readme

160
Cargo.toml Normal file
View file

@ -0,0 +1,160 @@
[package]
name = "bzipper"
version = "0.0.0"
authors = ["Gabriel Bjørnager Jensen"]
edition = "2021"
description = "Binary (de)serialiser."
documentation = "https://docs.rs/bzipper"
readme = "README.md"
homepage = "https://mandelbrot.dk/bzipper"
repository = "https://mandelbrot.dk/bzipper"
license = "LGPL-3.0-or-later"
[lints.clippy]
as_ptr_cast_mut = "forbid"
as_underscore = "warn"
assertions_on_result_states = "warn"
bool_to_int_with_if = "warn"
borrow_as_ptr = "forbid"
branches_sharing_code = "warn"
cast_lossless = "warn"
cast_possible_wrap = "warn"
cast_ptr_alignment = "forbid"
checked_conversions = "warn"
clear_with_drain = "warn"
cloned_instead_of_copied = "warn"
collection_is_never_read = "warn"
dbg_macro = "warn"
debug_assert_with_mut_call = "warn"
deref_by_slicing = "warn"
derive_partial_eq_without_eq = "deny"
empty_enum = "warn"
empty_enum_variants_with_brackets = "warn"
empty_line_after_doc_comments = "warn"
empty_line_after_outer_attr = "warn"
empty_structs_with_brackets = "warn"
enum_variant_names = "allow"
equatable_if_let = "warn"
excessive_precision = "allow"
exit = "warn"
expl_impl_clone_on_copy = "warn"
explicit_deref_methods = "warn"
explicit_into_iter_loop = "warn"
explicit_iter_loop = "warn"
fallible_impl_from = "deny"
flat_map_option = "warn"
float_cmp = "deny" # Fortran, is that you?
float_cmp_const = "deny"
format_push_string = "warn"
from_iter_instead_of_collect = "warn"
future_not_send = "deny"
if_not_else = "warn"
if_then_some_else_none = "warn"
ignored_unit_patterns = "warn"
impl_trait_in_params = "warn"
implicit_clone = "warn"
imprecise_flops = "deny"
inconsistent_struct_constructor = "deny"
index_refutable_slice = "warn"
inefficient_to_string = "warn"
infinite_loop = "deny"
into_iter_without_iter = "deny"
invalid_upcast_comparisons = "warn"
iter_filter_is_ok = "warn"
iter_filter_is_some = "warn"
iter_not_returning_iterator = "deny"
iter_on_empty_collections = "warn"
iter_on_single_items = "warn"
iter_with_drain = "warn"
iter_without_into_iter = "deny"
macro_use_imports = "warn"
manual_assert = "warn"
manual_c_str_literals = "warn"
manual_instant_elapsed = "warn"
manual_is_variant_and = "warn"
manual_let_else = "warn"
manual_ok_or = "warn"
manual_string_new = "warn"
map_unwrap_or = "warn"
match_bool = "warn"
match_on_vec_items = "warn"
match_same_arms = "warn"
mismatching_type_param_order = "warn"
missing_const_for_fn = "warn"
mixed_read_write_in_expression = "deny"
must_use_candidate = "deny"
mut_mut = "deny"
mutex_atomic = "deny"
mutex_integer = "deny"
needless_bitwise_bool = "deny"
needless_collect = "warn"
needless_continue = "warn"
needless_pass_by_ref_mut = "warn"
needless_pass_by_value = "deny"
needless_raw_string_hashes = "warn"
needless_raw_strings = "warn"
no_effect_underscore_binding = "deny"
no_mangle_with_rust_abi = "deny"
non_ascii_literal = "forbid"
nonstandard_macro_braces = "warn"
option_as_ref_cloned = "warn"
option_if_let_else = "warn"
option_option = "deny"
or_fun_call = "deny"
panic_in_result_fn = "deny"
path_buf_push_overwrite = "deny"
pattern_type_mismatch = "deny"
ptr_as_ptr = "forbid"
ptr_cast_constness = "forbid"
pub_underscore_fields = "deny"
pub_with_shorthand = "deny"
read_zero_byte_vec = "deny"
redundant_clone = "deny"
redundant_closure_for_method_calls = "warn"
redundant_else = "warn"
redundant_pub_crate = "warn"
redundant_type_annotations = "warn"
ref_as_ptr = "deny"
ref_binding_to_reference = "warn"
ref_option_ref = "deny"
rest_pat_in_fully_bound_structs = "warn"
return_self_not_must_use = "deny"
same_functions_in_if_condition = "deny"
same_name_method = "deny"
self_named_module_files = "deny"
semicolon_outside_block = "warn"
single_char_pattern = "warn"
str_split_at_newline = "warn"
string_lit_as_bytes = "deny"
string_lit_chars_any = "deny"
string_to_string = "deny"
suboptimal_flops = "deny"
trait_duplication_in_bounds = "deny"
transmute_ptr_to_ptr = "deny"
type_repetition_in_bounds = "deny"
uninhabited_references = "deny"
uninlined_format_args = "deny"
unnecessary_box_returns = "deny"
unnecessary_join = "deny"
unnecessary_self_imports = "deny"
unnecessary_wraps = "warn"
unneeded_field_pattern = "warn"
unnested_or_patterns = "warn"
unseparated_literal_suffix = "warn"
unused_async = "warn"
unused_peekable = "warn"
unused_rounding = "warn"
unused_self = "warn"
use_self = "deny"
used_underscore_binding = "deny"
useless_let_if_seq = "warn"
verbose_bit_mask = "warn"
verbose_file_reads = "warn"
wildcard_dependencies = "deny"
zero_sized_map_values = "deny"
[profile.release]
codegen-units = 1
lto = "fat"
opt-level = 3

8
README.md Normal file
View file

@ -0,0 +1,8 @@
[`bzipper`](https://crates.io/crates/bzipper) is a binary (de)serialiser.
Contrary to [Serde](https://crates.io/crates/serde/)/[Bincode](https://crates.io/crates/bincode/), the goal of this crate is to serialise data without inflating the resulting binary sequence.
As such, one may consider this crate to be more low-level.
Keep in mind that this project is still work-in-progress.
This crate does not require any dependencies at the moment.

View file

@ -0,0 +1,61 @@
// Copyright 2022-2024 Gabriel Bjørnager Jensen.
use crate::error::{Error, Result};
use std::fmt::{Debug, Formatter};
/// A byte stream for deserialisation.
///
/// This type borrows a byte slice (hence [`new`](DStream::new)), keeping track internally of the used bytes.
#[derive(Clone)]
pub struct DStream<'a> {
data: &'a [u8],
len: usize,
}
impl<'a> DStream<'a> {
/// Constructs a new byte stream.
pub fn new<T: AsRef<[u8]> + ?Sized>(buf: &'a T) -> Self { Self {
data: buf.as_ref(),
len: buf.as_ref().len(),
} }
/// Takes bytes from the stream.
///
/// # Errors
///
/// If the internal buffer doesn't hold at least the requested ammount of bytes, an [`EndOfDStream`](Error::EndOfDStream) error is returned.
pub fn take(&mut self, len: usize) -> Result<&[u8]> {
if self.len < len { return Err(Error::EndOfDStream { len: self.len, ok_len: len } ) }
let start = self.data.len() - self.len;
let stop = start + len;
self.len -= len;
Ok(&self.data[start..stop])
}
}
impl Debug for DStream<'_> {
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
let stop = self.data.len();
let start = self.data.len() - self.len;
write!(f, "[")?;
for v in &self.data[start..stop] { write!(f, "{v:#02X},")? };
write!(f, "]")?;
Ok(())
}
}
impl<'a> From<&'a [u8]> for DStream<'a> {
fn from(value: &'a [u8]) -> Self { Self::new(value) }
}
impl<'a, const N: usize> From<&'a [u8; N]> for DStream<'a> {
fn from(value: &'a [u8; N]) -> Self { Self::new(value) }
}

View file

@ -0,0 +1,162 @@
// Copyright 2022-2024 Gabriel Bjørnager Jensen.
use crate::deserialise::DStream;
use crate::error::Error;
use std::convert::Infallible;
use std::error::Error as StdError;
use std::mem::size_of;
use std::num::NonZero;
/// Denotes a type capable of being deserialised.
pub trait Deserialise: Sized {
type Error;
/// Deserialises the byte stream to an object.
fn deserialise(stream: &mut DStream) -> Result<Self, Self::Error>;
}
macro_rules! impl_float {
($type:ty) => {
impl Deserialise for $type {
type Error = Error;
fn deserialise(stream: &mut DStream) -> Result<Self, Self::Error> {
let data = stream
.take(size_of::<Self>())?
.try_into()
.unwrap();
Ok(Self::from_be_bytes(data))
}
}
};
}
macro_rules! impl_int {
($type:ty) => {
impl Deserialise for $type {
type Error = Error;
fn deserialise(stream: &mut DStream) -> Result<Self, Self::Error> {
let data = stream
.take(size_of::<Self>())?
.try_into()
.unwrap();
Ok(Self::from_be_bytes(data))
}
}
impl Deserialise for NonZero<$type> {
type Error = Error;
fn deserialise(stream: &mut DStream) -> Result<Self, Self::Error> {
let value = <$type>::deserialise(stream)?;
NonZero::new(value)
.ok_or(Error::NullInteger)
}
}
};
}
impl<T: Deserialise<Error: StdError + 'static>, const N: usize> Deserialise for [T; N] {
type Error = Box<dyn StdError>;
fn deserialise(stream: &mut DStream) -> Result<Self, Self::Error> {
let len = usize::try_from(u64::deserialise(stream)?).unwrap();
if len != N { return Err(Box::new(Error::ArrayLengthMismatch { len, ok_len: N })) };
let mut buf = Vec::with_capacity(len);
for _ in 0x0..len { buf.push(Deserialise::deserialise(stream)?); }
// If we had used the checked unwrap, we would also
// have to require `T: Debug`.
Ok(unsafe { buf.try_into().unwrap_unchecked() })
}
}
impl Deserialise for () {
type Error = Error;
fn deserialise(_stream: &mut DStream) -> Result<Self, Self::Error> { Ok(()) }
}
impl Deserialise for bool {
type Error = Error;
fn deserialise(stream: &mut DStream) -> Result<Self, Self::Error> {
let value = u8::deserialise(stream)?;
match value {
0x00 => Ok(false),
0x01 => Ok(true),
_ => Err(Error::InvalidBoolean { value })
}
}
}
impl Deserialise for char {
type Error = Error;
fn deserialise(stream: &mut DStream) -> Result<Self, Self::Error> {
let value = u32::deserialise(stream)?;
Self::from_u32(value)
.ok_or(Error::InvalidCodePoint { value })
}
}
impl Deserialise for Infallible {
type Error = Error;
fn deserialise(_stream: &mut DStream) -> Result<Self, Self::Error> { unreachable!() }
}
impl<T: Deserialise<Error: StdError + 'static>> Deserialise for Option<T> {
type Error = Box<dyn StdError>;
fn deserialise(stream: &mut DStream) -> Result<Self, Self::Error> {
let sign = bool::deserialise(stream)?;
if sign {
Ok(Some(T::deserialise(stream)?))
} else {
Ok(None)
}
}
}
impl<T: Deserialise, E: Deserialise> Deserialise for Result<T, E>
where
<T as Deserialise>::Error: StdError + 'static,
<E as Deserialise>::Error: StdError + 'static, {
type Error = Box<dyn StdError>;
fn deserialise(stream: &mut DStream) -> Result<Self, Self::Error> {
let sign = bool::deserialise(stream)?;
let value = if sign {
Err(E::deserialise(stream)?)
} else {
Ok(T::deserialise(stream)?)
};
Ok(value)
}
}
impl_float!(f32);
impl_float!(f64);
impl_int!(i128);
impl_int!(i16);
impl_int!(i32);
impl_int!(i64);
impl_int!(i8);
impl_int!(u128);
impl_int!(u16);
impl_int!(u32);
impl_int!(u64);
impl_int!(u8);

10
src/deserialise/mod.rs Normal file
View file

@ -0,0 +1,10 @@
// Copyright 2022-2024 Gabriel Bjørnager Jensen.
//! Deserialisation utilities.
use crate::use_mod;
use_mod!(pub d_stream);
use_mod!(pub deserialise);
#[cfg(test)]
mod test;

57
src/deserialise/test.rs Normal file
View file

@ -0,0 +1,57 @@
// Copyright 2022-2024 Gabriel Bjørnager Jensen.
use crate::deserialise::{Deserialise, DStream};
#[test]
fn test_serialise() {
let data = [
0x00, 0xFF, 0xFF, 0x0F, 0xEF, 0x1F, 0xDF, 0x2F,
0xCF, 0x3F, 0xBF, 0x4F, 0xAF, 0x5F, 0x9F, 0x6F,
0x8F, 0x7F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x05, 0x00, 0x00, 0x03, 0xBB, 0x00, 0x00,
0x03, 0x91, 0x00, 0x00, 0x03, 0xBC, 0x00, 0x00,
0x03, 0x94, 0x00, 0x00, 0x03, 0xB1, 0x01, 0x00,
0x00, 0x01, 0x80,
];
let mut stream = DStream::from(&data);
assert_eq!(
u8::deserialise(&mut stream).unwrap(),
0x00,
);
assert_eq!(
u8::deserialise(&mut stream).unwrap(),
0xFF,
);
assert_eq!(
u128::deserialise(&mut stream).unwrap(),
0xFF_0F_EF_1F_DF_2F_CF_3F_BF_4F_AF_5F_9F_6F_8F_7F,
);
assert_eq!(
<[char; 0x5]>::deserialise(&mut stream).unwrap(),
['\u{03BB}', '\u{0391}', '\u{03BC}', '\u{0394}', '\u{03B1}'],
);
assert_eq!(
Option::<()>::deserialise(&mut stream).unwrap(),
Some(()),
);
assert_eq!(
Option::<()>::deserialise(&mut stream).unwrap(),
None,
);
assert_eq!(
Result::<(), i8>::deserialise(&mut stream).unwrap(),
Ok(()),
);
assert_eq!(
Result::<(), i8>::deserialise(&mut stream).unwrap(),
Err(i8::MIN),
);
}

56
src/error/mod.rs Normal file
View file

@ -0,0 +1,56 @@
// Copyright 2022-2024 Gabriel Bjørnager Jensen.
//! Error handling.
use std::error::Error as StdError;
use std::fmt::{Display, Formatter};
/// Mapping of [`std::result::Result`].
pub type Result<T> = std::result::Result<T, Error>;
/// Denotes an error.
///
/// These variants are used when a deserialisation fails.
/// Serialisations are assumed infallible.
#[derive(Debug)]
pub enum Error {
ArrayLengthMismatch { len: usize, ok_len: usize },
EndOfDStream { len: usize, ok_len: usize },
InvalidBoolean { value: u8 },
InvalidCodePoint { value: u32 },
NullInteger,
}
impl Display for Error {
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
use Error::*;
match *self {
ArrayLengthMismatch { len, ok_len } => {
write!(f, "expected array of length ({ok_len}) but got ({len}) elements")
},
EndOfDStream { len, ok_len } => {
write!(f, "({ok_len}) byte(s) were requested but only ({len}) byte(s) were left")
},
InvalidBoolean { value } => {
write!(f, "expected boolean but got {value:#02X}")
},
InvalidCodePoint { value } => {
write!(f, "code point U+{value:04X} is not valid")
}
NullInteger => {
write!(f, "expected non-zero integer but got (0)")
},
}
}
}
impl StdError for Error { }

22
src/lib.rs Normal file
View file

@ -0,0 +1,22 @@
// Copyright 2022-2024 Gabriel Bjørnager Jensen.
//! Binary (de)serialisation.
//!
//! Contrary to [Serde](https://crates.io/crates/serde/)/[Bincode](https://crates.io/crates/bincode/), the goal of `bzipper` is to serialise data without inflating the resulting binary sequence.
//! As such, one may consider this crate to be more low-level.
//!
//! Keep in mind that this project is still work-in-progress.
//!
//! This crate does not require any dependencies at the moment.
pub mod deserialise;
pub mod error;
pub mod serialise;
macro_rules! use_mod {
($vis:vis $name:ident) => {
mod $name;
$vis use $name::*;
};
}
pub(in crate) use use_mod;

10
src/serialise/mod.rs Normal file
View file

@ -0,0 +1,10 @@
// Copyright 2022-2024 Gabriel Bjørnager Jensen.
//! Serialisation utilities.
use crate::use_mod;
use_mod!(pub s_stream);
use_mod!(pub serialise);
#[cfg(test)]
mod test;

View file

@ -0,0 +1,54 @@
// Copyright 2022-2024 Gabriel Bjørnager Jensen.
use crate::serialise::Serialise;
use std::fmt::{Debug, Formatter};
use std::mem::size_of;
#[derive(Clone, Eq, PartialEq)]
pub struct SStream(Vec<u8>);
impl SStream {
#[must_use]
pub const fn new() -> Self { Self(Vec::new()) }
pub fn append(&mut self, extra: &[u8]) {
self.0.extend(extra);
}
}
impl AsRef<[u8]> for SStream {
#[inline(always)]
fn as_ref(&self) -> &[u8] { self.0.as_ref() }
}
impl Debug for SStream {
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
write!(f, "[")?;
for v in &self.0 { write!(f, "{v:#02X},")? };
write!(f, "]")?;
Ok(())
}
}
impl Default for SStream {
#[inline(always)]
fn default() -> Self { Self::new() }
}
impl<T: Serialise> From<&T> for SStream {
fn from(value: &T) -> Self {
let mut stream = Self(Vec::with_capacity(size_of::<T>()));
value.serialise(&mut stream);
stream
}
}
impl From<SStream> for Box<[u8]> {
#[inline(always)]
fn from(value: SStream) -> Self { value.0.into_boxed_slice() }
}

View file

@ -0,0 +1,116 @@
// Copyright 2022-2024 Gabriel Bjørnager Jensen.
use crate::serialise::SStream;
use std::convert::Infallible;
use std::mem::size_of;
use std::num::NonZero;
/// Denotes a type capable of being serialised.
pub trait Serialise: Sized {
/// Serialises `self` into a byte stream.
///
/// One may assume that the resulting stream has at most the same ammount of bytes as before serialisation.
/// Therefore, not observing this rule is a logic error.
fn serialise(&self, stream: &mut SStream);
}
macro_rules! impl_float {
($type:ty) => {
impl Serialise for $type {
fn serialise(&self, stream: &mut SStream) {
stream.append(&self.to_be_bytes())
}
}
};
}
macro_rules! impl_int {
($type:ty) => {
impl Serialise for $type {
fn serialise(&self, stream: &mut SStream) {
stream.append(&self.to_be_bytes())
}
}
impl Serialise for NonZero<$type> {
fn serialise(&self, stream: &mut SStream) {
self.get().serialise(stream)
}
}
};
}
impl<T: Serialise, const N: usize> Serialise for [T; N] {
fn serialise(&self, stream: &mut SStream) {
u64::try_from(self.len()).unwrap().serialise(stream);
for v in self { v.serialise(stream) }
}
}
impl Serialise for () {
fn serialise(&self, _stream: &mut SStream) { }
}
impl Serialise for bool {
fn serialise(&self, stream: &mut SStream) {
u8::from(*self).serialise(stream)
}
}
impl Serialise for char {
fn serialise(&self, stream: &mut SStream) {
u32::from(*self).serialise(stream)
}
}
impl Serialise for Infallible {
fn serialise(&self, _stream: &mut SStream) { unreachable!() }
}
impl<T: Serialise> Serialise for Option<T> {
fn serialise(&self, stream: &mut SStream) {
match *self {
None => {
stream.append(&[0x00]);
stream.append(&vec![0x00; size_of::<T>()]);
},
Some(ref v) => {
stream.append(&[0x01]);
v.serialise(stream);
},
};
}
}
impl<T: Serialise, E: Serialise> Serialise for Result<T, E> {
fn serialise(&self, stream: &mut SStream) {
match *self {
Ok(ref v) => {
stream.append(&[0x00]);
v.serialise(stream);
},
Err(ref e) => {
stream.append(&[0x01]);
e.serialise(stream);
},
};
}
}
impl_float!(f32);
impl_float!(f64);
impl_int!(i128);
impl_int!(i16);
impl_int!(i32);
impl_int!(i64);
impl_int!(i8);
impl_int!(u128);
impl_int!(u16);
impl_int!(u32);
impl_int!(u64);
impl_int!(u8);

45
src/serialise/test.rs Normal file
View file

@ -0,0 +1,45 @@
// Copyright 2022-2024 Gabriel Bjørnager Jensen.
use crate::serialise::{SStream, Serialise};
#[test]
fn test_serialise() {
let mut stream = SStream::new();
0x00_u8.serialise(&mut stream);
0xFF_u8.serialise(&mut stream);
0x7F_u8.serialise(&mut stream);
0x0F_7E_u16.serialise(&mut stream);
0x00_2F_87_E7_u32.serialise(&mut stream);
0xF3_37_CF_8B_DB_03_2B_39_u64.serialise(&mut stream);
0x45_A0_15_6A_36_77_17_8A_83_2E_3C_2C_84_10_58_1A_u128.serialise(&mut stream);
['\u{03B4}', '\u{0190}', '\u{03BB}', '\u{03A4}', '\u{03B1}'].serialise(&mut stream);
Result::<u16, char>::Ok(0x45_45).serialise(&mut stream);
Result::<u16, char>::Err(char::REPLACEMENT_CHARACTER).serialise(&mut stream);
Option::<()>::None.serialise(&mut stream);
Option::<()>::Some(()).serialise(&mut stream);
let data: Box<[u8]> = stream.into();
assert_eq!(
data.as_ref(),
[
0x00, 0xFF, 0x7F, 0x0F, 0x7E, 0x00, 0x2F, 0x87,
0xE7, 0xF3, 0x37, 0xCF, 0x8B, 0xDB, 0x03, 0x2B,
0x39, 0x45, 0xA0, 0x15, 0x6A, 0x36, 0x77, 0x17,
0x8A, 0x83, 0x2E, 0x3C, 0x2C, 0x84, 0x10, 0x58,
0x1A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x05, 0x00, 0x00, 0x03, 0xB4, 0x00, 0x00, 0x01,
0x90, 0x00, 0x00, 0x03, 0xBB, 0x00, 0x00, 0x03,
0xA4, 0x00, 0x00, 0x03, 0xB1, 0x00, 0x45, 0x45,
0x01, 0x00, 0x00, 0xFF, 0xFD, 0x00, 0x01,
]
);
}