Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions src/derive/vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ impl<'a, T: Decode<'a>> Decoder<'a, VecDeque<T>> for VecDecoder<'a, T> {
}

#[cfg(test)]
mod test {
mod tests {
use alloc::collections::*;
use alloc::vec::Vec;

Expand All @@ -386,10 +386,25 @@ mod test {
type T = BinaryHeap<u8>;
let data: T = bench_data();
let encoded = crate::encode(&data);

let mut buffer = crate::Buffer::new();
b.iter(|| {
let decoded: T = crate::decode::<T>(&encoded).unwrap();
let decoded: T = buffer.decode::<T>(&encoded).unwrap();
debug_assert!(data.iter().eq(decoded.iter()));
decoded
})
}
}

#[cfg(test)]
mod tests2 {
use alloc::vec::Vec;
type T = [[u8; 32]; 32];
fn bench_data() -> Vec<Vec<T>> {
crate::random_data(1000)
.into_iter()
.map(|t: T| if t[0][0] & 1 == 0 { vec![t] } else { vec![] })
.collect()
}
crate::bench_encode_decode!(vec_zero_or_one_large_array_vec: Vec<Vec<T>>);
}
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ mod length;
mod nightly;
mod pack;
mod pack_ints;
mod pack_shared;
mod str;
mod u8_char;

Expand Down
50 changes: 19 additions & 31 deletions src/pack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::consume::{consume_byte, consume_byte_arrays, consume_bytes};
use crate::error::err;
use crate::fast::CowSlice;
use crate::pack_ints::{Int, SizedInt};
use crate::pack_shared::PackingTrait;
use alloc::vec::Vec;

/// Possible states per byte in descending order. Each packed byte will use `log2(states)` bits.
Expand All @@ -17,8 +18,9 @@ enum Packing {
_2,
}

impl Packing {
fn new(max: u8) -> Self {
impl PackingTrait for Packing {
fn new<T: crate::pack_ints::SizedUInt>(max: T) -> Self {
let max: u8 = bytemuck::must_cast(max);
match max {
// We could encode max 0 as nothing, but that could allocate unbounded memory when decoding.
0..=1 => Self::_2,
Expand All @@ -30,12 +32,16 @@ impl Packing {
}
}

fn write(self, out: &mut Vec<u8>, offset_by_min: bool) {
fn write<T: crate::pack_ints::SizedUInt>(self, out: &mut Vec<u8>, offset_by_min: bool) {
// pack_ints::Packing needs generics, we only use this on u8 here.
assert_eq!(core::mem::size_of::<T>(), 1);
// Encoded in such a way such that 0 is `Self::_256` and higher numbers are smaller packing.
// Also makes `Self::_256` with offset_by_min = true is unrepresentable.
out.push(self as u8 * 2 - offset_by_min as u8);
}
}

impl Packing {
fn read(input: &mut &[u8]) -> Result<(Self, bool)> {
let v = consume_byte(input)?;
let p_u8 = crate::nightly::div_ceil_u8(v, 2);
Expand Down Expand Up @@ -93,44 +99,26 @@ pub fn pack_bytes<T: Byte>(bytes: &mut [T], out: &mut Vec<u8>) {
out.extend_from_slice(bytemuck::must_cast_slice(bytes));
return;
}
let (min, max) = crate::pack_ints::minmax(bytes);

// i8 packs as u8 if positive.
let basic_packing = if min >= T::default() {
Packing::new(bytemuck::must_cast(max))
} else {
Packing::_256 // Any negative i8 as u8 is > 15 and can't be packed without offset_packing.
};

// u8::wrapping_sub == i8::wrapping_sub, so we can use u8s from here onward.
let min: u8 = bytemuck::must_cast(min);
let max: u8 = bytemuck::must_cast(max);
let (basic_packing, min_max) =
crate::pack_shared::basic_packing_and_signed_min_max_cast_to_unsigned(bytes, out);
let bytes: &mut [u8] = bytemuck::must_cast_slice_mut(bytes);
pack_bytes_unsigned(bytes, out, basic_packing, min, max);

// <T as SizedInt>::Unsigned for T: Byte is always u8, but we can't prove
// that here, so we perform this cast which doesn't change the underlying type.
let min_max = min_max.map(|(min, max)| (bytemuck::must_cast(min), bytemuck::must_cast(max)));

pack_bytes_unsigned(bytes, out, basic_packing, min_max);
}

/// [`pack_bytes`] but after i8s have been cast to u8s.
fn pack_bytes_unsigned(
bytes: &mut [u8],
out: &mut Vec<u8>,
basic_packing: Packing,
min: u8,
max: u8,
min_max: Option<(u8, u8)>,
) {
// If subtracting min from all bytes results in a better packing do it, otherwise don't bother.
let offset_packing = Packing::new(max.wrapping_sub(min));
let p = if offset_packing > basic_packing && bytes.len() > 5 {
for b in bytes.iter_mut() {
*b = b.wrapping_sub(min);
}
offset_packing.write(out, true);
out.push(min);
offset_packing
} else {
basic_packing.write(out, false);
basic_packing
};

let p = crate::pack_shared::offset_packing(bytes, out, basic_packing, min_max);
match p {
Packing::_256 => out.extend_from_slice(bytes),
Packing::_16 => pack_arithmetic::<16>(bytes, out),
Expand Down
60 changes: 7 additions & 53 deletions src/pack_ints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::consume::{consume_byte, consume_byte_arrays};
use crate::error::error;
use crate::fast::CowSlice;
use crate::pack::{invalid_packing, pack_bytes, unpack_bytes};
use crate::pack_shared::PackingTrait;
use crate::Error;
use alloc::vec::Vec;
use bytemuck::Pod;
Expand All @@ -19,7 +20,7 @@ enum Packing {
_8,
}

impl Packing {
impl PackingTrait for Packing {
fn new<T: SizedUInt>(max: T) -> Self {
let max: u128 = max.try_into().unwrap_or_else(|_| unreachable!()); // From<usize> isn't implemented for u128.
#[allow(clippy::match_overlapping_arm)] // Just make sure not to reorder them.
Expand All @@ -37,7 +38,9 @@ impl Packing {
// Also makes no packing with offset_by_min = true is unrepresentable.
out.push((self as u8 - Self::new(T::MAX) as u8) * 2 - offset_by_min as u8);
}
}

impl Packing {
fn read<T: SizedUInt>(input: &mut &[u8]) -> Result<(Self, bool)> {
let v = consume_byte(input)?;
let p_u8 = crate::nightly::div_ceil_u8(v, 2) + Self::new(T::MAX) as u8;
Expand Down Expand Up @@ -329,18 +332,8 @@ impl SizedUInt for u8 {
}
}

pub fn minmax<T: SizedInt>(v: &[T]) -> (T, T) {
let mut min = T::MAX;
let mut max = T::MIN;
for &v in v.iter() {
min = min.min(v);
max = max.max(v);
}
(min, max)
}

fn skip_packing<T: SizedInt>(length: usize) -> bool {
// Be careful using size_of::<T> since usize can be 4 or 8.
// T is SizedInt, so it cannot be usize/isize, therefore comparisions against size are safe.
if core::mem::size_of::<T>() == 1 {
return true; // u8s can't be packed by pack_ints (only pack_bytes).
}
Expand Down Expand Up @@ -370,31 +363,9 @@ fn pack_ints_sized<T: SizedInt>(ints: &mut [T], out: &mut Vec<u8>) {
let (basic_packing, min_max) = if skip_packing::<T>(ints.len()) {
(Packing::new(T::Unsigned::MAX), None)
} else {
// Take a small sample to avoid wastefully scanning the whole slice.
let (sample, remaining) = ints.split_at(ints.len().min(16));
let (min, max) = minmax(sample);

// Only have to check packing(max - min) since it's always as good as packing(max).
let none = Packing::new(T::Unsigned::MAX);
if Packing::new(max.to_unsigned().wrapping_sub(min.to_unsigned())) == none {
none.write::<T::Unsigned>(out, false);
(none, None)
} else {
let (remaining_min, remaining_max) = minmax(remaining);
let min = min.min(remaining_min);
let max = max.max(remaining_max);

// Signed ints pack as unsigned ints if positive.
let basic_packing = if min >= T::default() {
Packing::new(max.to_unsigned())
} else {
none // Any negative can't be packed without offset_packing.
};
(basic_packing, Some((min, max)))
}
crate::pack_shared::basic_packing_and_signed_min_max_cast_to_unsigned(ints, out)
};
let ints = bytemuck::must_cast_slice_mut(ints);
let min_max = min_max.map(|(min, max)| (min.to_unsigned(), max.to_unsigned()));
pack_ints_sized_unsigned::<T::Unsigned>(ints, out, basic_packing, min_max);
}

Expand All @@ -405,24 +376,7 @@ fn pack_ints_sized_unsigned<T: SizedUInt>(
basic_packing: Packing,
min_max: Option<(T, T)>,
) {
let p = if let Some((min, max)) = min_max {
// If subtracting min from all ints results in a better packing do it, otherwise don't bother.
let offset_packing = Packing::new(max.wrapping_sub(min));
if offset_packing > basic_packing && ints.len() > 5 {
for b in ints.iter_mut() {
*b = b.wrapping_sub(min);
}
offset_packing.write::<T>(out, true);
T::write(min, out);
offset_packing
} else {
basic_packing.write::<T>(out, false);
basic_packing
}
} else {
basic_packing
};

let p = crate::pack_shared::offset_packing(ints, out, basic_packing, min_max);
match p {
Packing::_128 => T::pack128(ints, out),
Packing::_64 => T::pack64(ints, out),
Expand Down
81 changes: 81 additions & 0 deletions src/pack_shared.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
use crate::pack_ints::{SizedInt, SizedUInt};
use alloc::vec::Vec;

pub trait PackingTrait: Copy + PartialOrd {
fn new<T: SizedUInt>(max: T) -> Self;

fn write<T: SizedUInt>(self, out: &mut Vec<u8>, offset_by_min: bool);
}

fn minmax<T: SizedInt>(v: &[T]) -> (T, T) {
let mut min = T::MAX;
let mut max = T::MIN;
for &v in v.iter() {
min = min.min(v);
max = max.max(v);
}
(min, max)
}

// Writes a packing to `out` iff it returns None.
pub fn basic_packing_and_signed_min_max_cast_to_unsigned<T: SizedInt, P: PackingTrait>(
ints: &[T],
out: &mut Vec<u8>,
) -> (P, Option<(T::Unsigned, T::Unsigned)>) {
// Take a small sample to avoid wastefully scanning the whole slice.
// Note: This small sample is purely an optimization, it has no impact on the encoded result
// because we only use it to bail from scanning the entire slice if the first 16-32 elements
// cannot be packed.
let sample_size = (32 / core::mem::size_of::<T>()).max(16);
let (sample, remaining) = ints.split_at(ints.len().min(sample_size));
let (min, max) = minmax(sample);

// Only have to check packing(max - min) since it's always as good as packing(max).
let none = P::new(T::Unsigned::MAX);
if P::new(max.to_unsigned().wrapping_sub(min.to_unsigned())) == none {
none.write::<T::Unsigned>(out, false);
(none, None)
} else {
let (remaining_min, remaining_max) = minmax(remaining);
let min = min.min(remaining_min);
let max = max.max(remaining_max);

// Signed ints pack as unsigned ints if positive.
let basic_packing = if min >= T::default() {
P::new(max.to_unsigned())
} else {
none // Any negative can't be packed without offset_packing.
};

(basic_packing, Some((min.to_unsigned(), max.to_unsigned())))
}
}

// Writes a packing to `out` iff `min_max` is Some.
pub fn offset_packing<T: SizedUInt, P: PackingTrait>(
ints: &mut [T],
out: &mut Vec<u8>,
basic_packing: P,
min_max: Option<(T, T)>,
) -> P {
if let Some((min, max)) = min_max {
// If subtracting min from all ints results in a better packing do it, otherwise don't bother.
let offset_packing = P::new(max.wrapping_sub(min));
// TODO(breaking change) don't hardcode this as 5. Only perform offset_packing
// on a few elements if the added T::write(min, out) makes it still smaller.
let small_skip_offset_packing = 5;
if offset_packing > basic_packing && ints.len() > small_skip_offset_packing {
for b in ints.iter_mut() {
*b = b.wrapping_sub(min);
}
offset_packing.write::<T>(out, true);
T::write(min, out);
offset_packing
} else {
basic_packing.write::<T>(out, false);
basic_packing
}
} else {
basic_packing
}
}
Loading