diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/deserialise/d_stream/mod.rs | 61 | ||||
-rw-r--r-- | src/deserialise/deserialise/mod.rs | 162 | ||||
-rw-r--r-- | src/deserialise/mod.rs | 10 | ||||
-rw-r--r-- | src/deserialise/test.rs | 57 | ||||
-rw-r--r-- | src/error/mod.rs | 56 | ||||
-rw-r--r-- | src/lib.rs | 22 | ||||
-rw-r--r-- | src/serialise/mod.rs | 10 | ||||
-rw-r--r-- | src/serialise/s_stream/mod.rs | 54 | ||||
-rw-r--r-- | src/serialise/serialise/mod.rs | 116 | ||||
-rw-r--r-- | src/serialise/test.rs | 45 |
10 files changed, 593 insertions, 0 deletions
diff --git a/src/deserialise/d_stream/mod.rs b/src/deserialise/d_stream/mod.rs new file mode 100644 index 0000000..f6adbd9 --- /dev/null +++ b/src/deserialise/d_stream/mod.rs @@ -0,0 +1,61 @@ +// Copyright 2022-2024 Gabriel Bjørnager Jensen. + +use crate::error::{Error, Result}; + +use std::fmt::{Debug, Formatter}; + +/// A byte stream for deserialisation. +/// +/// This type borrows a byte slice (hence [`new`](DStream::new)), keeping track internally of the used bytes. +#[derive(Clone)] +pub struct DStream<'a> { + data: &'a [u8], + len: usize, +} + +impl<'a> DStream<'a> { + /// Constructs a new byte stream. + pub fn new<T: AsRef<[u8]> + ?Sized>(buf: &'a T) -> Self { Self { + data: buf.as_ref(), + len: buf.as_ref().len(), + } } + + /// Takes bytes from the stream. + /// + /// # Errors + /// + /// If the internal buffer doesn't hold at least the requested ammount of bytes, an [`EndOfDStream`](Error::EndOfDStream) error is returned. + pub fn take(&mut self, len: usize) -> Result<&[u8]> { + if self.len < len { return Err(Error::EndOfDStream { len: self.len, ok_len: len } ) } + + let start = self.data.len() - self.len; + let stop = start + len; + + self.len -= len; + + Ok(&self.data[start..stop]) + } +} + +impl Debug for DStream<'_> { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + let stop = self.data.len(); + let start = self.data.len() - self.len; + + write!(f, "[")?; + + for v in &self.data[start..stop] { write!(f, "{v:#02X},")? }; + + write!(f, "]")?; + + Ok(()) + } +} + +impl<'a> From<&'a [u8]> for DStream<'a> { + fn from(value: &'a [u8]) -> Self { Self::new(value) } +} + +impl<'a, const N: usize> From<&'a [u8; N]> for DStream<'a> { + fn from(value: &'a [u8; N]) -> Self { Self::new(value) } +} diff --git a/src/deserialise/deserialise/mod.rs b/src/deserialise/deserialise/mod.rs new file mode 100644 index 0000000..d24b290 --- /dev/null +++ b/src/deserialise/deserialise/mod.rs @@ -0,0 +1,162 @@ +// Copyright 2022-2024 Gabriel Bjørnager Jensen. + +use crate::deserialise::DStream; +use crate::error::Error; + +use std::convert::Infallible; +use std::error::Error as StdError; +use std::mem::size_of; +use std::num::NonZero; + +/// Denotes a type capable of being deserialised. +pub trait Deserialise: Sized { + type Error; + + /// Deserialises the byte stream to an object. + fn deserialise(stream: &mut DStream) -> Result<Self, Self::Error>; +} + +macro_rules! impl_float { + ($type:ty) => { + impl Deserialise for $type { + type Error = Error; + + fn deserialise(stream: &mut DStream) -> Result<Self, Self::Error> { + let data = stream + .take(size_of::<Self>())? + .try_into() + .unwrap(); + + Ok(Self::from_be_bytes(data)) + } + } + }; +} + +macro_rules! impl_int { + ($type:ty) => { + impl Deserialise for $type { + type Error = Error; + + fn deserialise(stream: &mut DStream) -> Result<Self, Self::Error> { + let data = stream + .take(size_of::<Self>())? + .try_into() + .unwrap(); + + Ok(Self::from_be_bytes(data)) + } + } + + impl Deserialise for NonZero<$type> { + type Error = Error; + + fn deserialise(stream: &mut DStream) -> Result<Self, Self::Error> { + let value = <$type>::deserialise(stream)?; + + NonZero::new(value) + .ok_or(Error::NullInteger) + } + } + }; +} + +impl<T: Deserialise<Error: StdError + 'static>, const N: usize> Deserialise for [T; N] { + type Error = Box<dyn StdError>; + + fn deserialise(stream: &mut DStream) -> Result<Self, Self::Error> { + let len = usize::try_from(u64::deserialise(stream)?).unwrap(); + if len != N { return Err(Box::new(Error::ArrayLengthMismatch { len, ok_len: N })) }; + + let mut buf = Vec::with_capacity(len); + for _ in 0x0..len { buf.push(Deserialise::deserialise(stream)?); } + + // If we had used the checked unwrap, we would also + // have to require `T: Debug`. + Ok(unsafe { buf.try_into().unwrap_unchecked() }) + } +} + +impl Deserialise for () { + type Error = Error; + + fn deserialise(_stream: &mut DStream) -> Result<Self, Self::Error> { Ok(()) } +} + +impl Deserialise for bool { + type Error = Error; + + fn deserialise(stream: &mut DStream) -> Result<Self, Self::Error> { + let value = u8::deserialise(stream)?; + + match value { + 0x00 => Ok(false), + 0x01 => Ok(true), + _ => Err(Error::InvalidBoolean { value }) + } + } +} + +impl Deserialise for char { + type Error = Error; + + fn deserialise(stream: &mut DStream) -> Result<Self, Self::Error> { + let value = u32::deserialise(stream)?; + + Self::from_u32(value) + .ok_or(Error::InvalidCodePoint { value }) + } +} + +impl Deserialise for Infallible { + type Error = Error; + + fn deserialise(_stream: &mut DStream) -> Result<Self, Self::Error> { unreachable!() } +} + +impl<T: Deserialise<Error: StdError + 'static>> Deserialise for Option<T> { + type Error = Box<dyn StdError>; + + fn deserialise(stream: &mut DStream) -> Result<Self, Self::Error> { + let sign = bool::deserialise(stream)?; + + if sign { + Ok(Some(T::deserialise(stream)?)) + } else { + Ok(None) + } + } +} + +impl<T: Deserialise, E: Deserialise> Deserialise for Result<T, E> +where + <T as Deserialise>::Error: StdError + 'static, + <E as Deserialise>::Error: StdError + 'static, { + type Error = Box<dyn StdError>; + + fn deserialise(stream: &mut DStream) -> Result<Self, Self::Error> { + let sign = bool::deserialise(stream)?; + + let value = if sign { + Err(E::deserialise(stream)?) + } else { + Ok(T::deserialise(stream)?) + }; + + Ok(value) + } +} + +impl_float!(f32); +impl_float!(f64); + +impl_int!(i128); +impl_int!(i16); +impl_int!(i32); +impl_int!(i64); +impl_int!(i8); +impl_int!(u128); +impl_int!(u16); +impl_int!(u32); +impl_int!(u64); +impl_int!(u8); diff --git a/src/deserialise/mod.rs b/src/deserialise/mod.rs new file mode 100644 index 0000000..7941a98 --- /dev/null +++ b/src/deserialise/mod.rs @@ -0,0 +1,10 @@ +// Copyright 2022-2024 Gabriel Bjørnager Jensen. + +//! Deserialisation utilities. + +use crate::use_mod; +use_mod!(pub d_stream); +use_mod!(pub deserialise); + +#[cfg(test)] +mod test; diff --git a/src/deserialise/test.rs b/src/deserialise/test.rs new file mode 100644 index 0000000..1844e3a --- /dev/null +++ b/src/deserialise/test.rs @@ -0,0 +1,57 @@ +// Copyright 2022-2024 Gabriel Bjørnager Jensen. + +use crate::deserialise::{Deserialise, DStream}; + +#[test] +fn test_serialise() { + let data = [ + 0x00, 0xFF, 0xFF, 0x0F, 0xEF, 0x1F, 0xDF, 0x2F, + 0xCF, 0x3F, 0xBF, 0x4F, 0xAF, 0x5F, 0x9F, 0x6F, + 0x8F, 0x7F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x05, 0x00, 0x00, 0x03, 0xBB, 0x00, 0x00, + 0x03, 0x91, 0x00, 0x00, 0x03, 0xBC, 0x00, 0x00, + 0x03, 0x94, 0x00, 0x00, 0x03, 0xB1, 0x01, 0x00, + 0x00, 0x01, 0x80, + ]; + + let mut stream = DStream::from(&data); + + assert_eq!( + u8::deserialise(&mut stream).unwrap(), + 0x00, + ); + assert_eq!( + u8::deserialise(&mut stream).unwrap(), + 0xFF, + ); + + assert_eq!( + u128::deserialise(&mut stream).unwrap(), + 0xFF_0F_EF_1F_DF_2F_CF_3F_BF_4F_AF_5F_9F_6F_8F_7F, + ); + + assert_eq!( + <[char; 0x5]>::deserialise(&mut stream).unwrap(), + ['\u{03BB}', '\u{0391}', '\u{03BC}', '\u{0394}', '\u{03B1}'], + ); + + assert_eq!( + Option::<()>::deserialise(&mut stream).unwrap(), + Some(()), + ); + + assert_eq!( + Option::<()>::deserialise(&mut stream).unwrap(), + None, + ); + + assert_eq!( + Result::<(), i8>::deserialise(&mut stream).unwrap(), + Ok(()), + ); + + assert_eq!( + Result::<(), i8>::deserialise(&mut stream).unwrap(), + Err(i8::MIN), + ); +} diff --git a/src/error/mod.rs b/src/error/mod.rs new file mode 100644 index 0000000..26d9b4a --- /dev/null +++ b/src/error/mod.rs @@ -0,0 +1,56 @@ +// Copyright 2022-2024 Gabriel Bjørnager Jensen. + +//! Error handling. + +use std::error::Error as StdError; +use std::fmt::{Display, Formatter}; + +/// Mapping of [`std::result::Result`]. +pub type Result<T> = std::result::Result<T, Error>; + +/// Denotes an error. +/// +/// These variants are used when a deserialisation fails. +/// Serialisations are assumed infallible. +#[derive(Debug)] +pub enum Error { + ArrayLengthMismatch { len: usize, ok_len: usize }, + + EndOfDStream { len: usize, ok_len: usize }, + + InvalidBoolean { value: u8 }, + + InvalidCodePoint { value: u32 }, + + NullInteger, +} + +impl Display for Error { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + use Error::*; + + match *self { + ArrayLengthMismatch { len, ok_len } => { + write!(f, "expected array of length ({ok_len}) but got ({len}) elements") + }, + + EndOfDStream { len, ok_len } => { + write!(f, "({ok_len}) byte(s) were requested but only ({len}) byte(s) were left") + }, + + InvalidBoolean { value } => { + write!(f, "expected boolean but got {value:#02X}") + }, + + InvalidCodePoint { value } => { + write!(f, "code point U+{value:04X} is not valid") + } + + NullInteger => { + write!(f, "expected non-zero integer but got (0)") + }, + } + } +} + +impl StdError for Error { } diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..44c4c05 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,22 @@ +// Copyright 2022-2024 Gabriel Bjørnager Jensen. + +//! Binary (de)serialisation. +//! +//! Contrary to [Serde](https://crates.io/crates/serde/)/[Bincode](https://crates.io/crates/bincode/), the goal of `bzipper` is to serialise data without inflating the resulting binary sequence. +//! As such, one may consider this crate to be more low-level. +//! +//! Keep in mind that this project is still work-in-progress. +//! +//! This crate does not require any dependencies at the moment. + +pub mod deserialise; +pub mod error; +pub mod serialise; + +macro_rules! use_mod { + ($vis:vis $name:ident) => { + mod $name; + $vis use $name::*; + }; +} +pub(in crate) use use_mod; diff --git a/src/serialise/mod.rs b/src/serialise/mod.rs new file mode 100644 index 0000000..7dd17e3 --- /dev/null +++ b/src/serialise/mod.rs @@ -0,0 +1,10 @@ +// Copyright 2022-2024 Gabriel Bjørnager Jensen. + +//! Serialisation utilities. + +use crate::use_mod; +use_mod!(pub s_stream); +use_mod!(pub serialise); + +#[cfg(test)] +mod test; diff --git a/src/serialise/s_stream/mod.rs b/src/serialise/s_stream/mod.rs new file mode 100644 index 0000000..a1ae05c --- /dev/null +++ b/src/serialise/s_stream/mod.rs @@ -0,0 +1,54 @@ +// Copyright 2022-2024 Gabriel Bjørnager Jensen. + +use crate::serialise::Serialise; + +use std::fmt::{Debug, Formatter}; +use std::mem::size_of; + +#[derive(Clone, Eq, PartialEq)] +pub struct SStream(Vec<u8>); + +impl SStream { + #[must_use] + pub const fn new() -> Self { Self(Vec::new()) } + + pub fn append(&mut self, extra: &[u8]) { + self.0.extend(extra); + } +} + +impl AsRef<[u8]> for SStream { + #[inline(always)] + fn as_ref(&self) -> &[u8] { self.0.as_ref() } +} + +impl Debug for SStream { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + write!(f, "[")?; + + for v in &self.0 { write!(f, "{v:#02X},")? }; + + write!(f, "]")?; + + Ok(()) + } +} + +impl Default for SStream { + #[inline(always)] + fn default() -> Self { Self::new() } +} + +impl<T: Serialise> From<&T> for SStream { + fn from(value: &T) -> Self { + let mut stream = Self(Vec::with_capacity(size_of::<T>())); + value.serialise(&mut stream); + + stream + } +} + +impl From<SStream> for Box<[u8]> { + #[inline(always)] + fn from(value: SStream) -> Self { value.0.into_boxed_slice() } +} diff --git a/src/serialise/serialise/mod.rs b/src/serialise/serialise/mod.rs new file mode 100644 index 0000000..be637cd --- /dev/null +++ b/src/serialise/serialise/mod.rs @@ -0,0 +1,116 @@ +// Copyright 2022-2024 Gabriel Bjørnager Jensen. + +use crate::serialise::SStream; + +use std::convert::Infallible; +use std::mem::size_of; +use std::num::NonZero; + +/// Denotes a type capable of being serialised. +pub trait Serialise: Sized { + /// Serialises `self` into a byte stream. + /// + /// One may assume that the resulting stream has at most the same ammount of bytes as before serialisation. + /// Therefore, not observing this rule is a logic error. + fn serialise(&self, stream: &mut SStream); +} + +macro_rules! impl_float { + ($type:ty) => { + impl Serialise for $type { + fn serialise(&self, stream: &mut SStream) { + stream.append(&self.to_be_bytes()) + } + } + }; +} + +macro_rules! impl_int { + ($type:ty) => { + impl Serialise for $type { + fn serialise(&self, stream: &mut SStream) { + stream.append(&self.to_be_bytes()) + } + } + + impl Serialise for NonZero<$type> { + fn serialise(&self, stream: &mut SStream) { + self.get().serialise(stream) + } + } + }; +} + +impl<T: Serialise, const N: usize> Serialise for [T; N] { + fn serialise(&self, stream: &mut SStream) { + u64::try_from(self.len()).unwrap().serialise(stream); + + for v in self { v.serialise(stream) } + } +} + +impl Serialise for () { + fn serialise(&self, _stream: &mut SStream) { } +} + +impl Serialise for bool { + fn serialise(&self, stream: &mut SStream) { + u8::from(*self).serialise(stream) + } +} + +impl Serialise for char { + fn serialise(&self, stream: &mut SStream) { + u32::from(*self).serialise(stream) + } +} + +impl Serialise for Infallible { + fn serialise(&self, _stream: &mut SStream) { unreachable!() } +} + +impl<T: Serialise> Serialise for Option<T> { + fn serialise(&self, stream: &mut SStream) { + match *self { + None => { + stream.append(&[0x00]); + stream.append(&vec![0x00; size_of::<T>()]); + }, + + Some(ref v) => { + stream.append(&[0x01]); + v.serialise(stream); + }, + }; + } +} + +impl<T: Serialise, E: Serialise> Serialise for Result<T, E> { + fn serialise(&self, stream: &mut SStream) { + match *self { + Ok(ref v) => { + stream.append(&[0x00]); + v.serialise(stream); + }, + + Err(ref e) => { + stream.append(&[0x01]); + e.serialise(stream); + }, + }; + } +} + +impl_float!(f32); +impl_float!(f64); + +impl_int!(i128); +impl_int!(i16); +impl_int!(i32); +impl_int!(i64); +impl_int!(i8); +impl_int!(u128); +impl_int!(u16); +impl_int!(u32); +impl_int!(u64); +impl_int!(u8); diff --git a/src/serialise/test.rs b/src/serialise/test.rs new file mode 100644 index 0000000..7ab2393 --- /dev/null +++ b/src/serialise/test.rs @@ -0,0 +1,45 @@ +// Copyright 2022-2024 Gabriel Bjørnager Jensen. + +use crate::serialise::{SStream, Serialise}; + +#[test] +fn test_serialise() { + let mut stream = SStream::new(); + + 0x00_u8.serialise(&mut stream); + 0xFF_u8.serialise(&mut stream); + 0x7F_u8.serialise(&mut stream); + + 0x0F_7E_u16.serialise(&mut stream); + + 0x00_2F_87_E7_u32.serialise(&mut stream); + + 0xF3_37_CF_8B_DB_03_2B_39_u64.serialise(&mut stream); + + 0x45_A0_15_6A_36_77_17_8A_83_2E_3C_2C_84_10_58_1A_u128.serialise(&mut stream); + + ['\u{03B4}', '\u{0190}', '\u{03BB}', '\u{03A4}', '\u{03B1}'].serialise(&mut stream); + + Result::<u16, char>::Ok(0x45_45).serialise(&mut stream); + Result::<u16, char>::Err(char::REPLACEMENT_CHARACTER).serialise(&mut stream); + + Option::<()>::None.serialise(&mut stream); + Option::<()>::Some(()).serialise(&mut stream); + + let data: Box<[u8]> = stream.into(); + + assert_eq!( + data.as_ref(), + [ + 0x00, 0xFF, 0x7F, 0x0F, 0x7E, 0x00, 0x2F, 0x87, + 0xE7, 0xF3, 0x37, 0xCF, 0x8B, 0xDB, 0x03, 0x2B, + 0x39, 0x45, 0xA0, 0x15, 0x6A, 0x36, 0x77, 0x17, + 0x8A, 0x83, 0x2E, 0x3C, 0x2C, 0x84, 0x10, 0x58, + 0x1A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x03, 0xB4, 0x00, 0x00, 0x01, + 0x90, 0x00, 0x00, 0x03, 0xBB, 0x00, 0x00, 0x03, + 0xA4, 0x00, 0x00, 0x03, 0xB1, 0x00, 0x45, 0x45, + 0x01, 0x00, 0x00, 0xFF, 0xFD, 0x00, 0x01, + ] + ); +} |