From f070230236b942c381cd6333bf211522cdef6473 Mon Sep 17 00:00:00 2001 From: Cai Bear Date: Fri, 26 Jun 2026 22:46:10 -0700 Subject: [PATCH 1/3] Optimize encoding incompressible u8. --- src/derive/vec.rs | 18 +++++++++-- src/lib.rs | 1 + src/pack.rs | 50 +++++++++++------------------ src/pack_ints.rs | 60 ++++------------------------------ src/pack_shared.rs | 80 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 123 insertions(+), 86 deletions(-) create mode 100644 src/pack_shared.rs diff --git a/src/derive/vec.rs b/src/derive/vec.rs index 52ab03f..737a9a2 100644 --- a/src/derive/vec.rs +++ b/src/derive/vec.rs @@ -363,7 +363,7 @@ impl<'a, T: Decode<'a>> Decoder<'a, VecDeque> for VecDecoder<'a, T> { } #[cfg(test)] -mod test { +mod tests { use alloc::collections::*; use alloc::vec::Vec; @@ -386,10 +386,24 @@ mod test { type T = BinaryHeap; let data: T = bench_data(); let encoded = crate::encode(&data); + + let mut buffer = crate::Buffer::new(); b.iter(|| { - let decoded: T = crate::decode::(&encoded).unwrap(); + let decoded: T = buffer.decode::(&encoded).unwrap(); debug_assert!(data.iter().eq(decoded.iter())); decoded }) } } + +#[cfg(test)] +mod tests2 { + type T = [[u8; 32]; 32]; + fn bench_data() -> Vec> { + crate::random_data(1000) + .into_iter() + .map(|t: T| if t[0][0] & 1 == 0 { vec![t] } else { vec![] }) + .collect() + } + crate::bench_encode_decode!(vec_zero_or_one_large_array_vec: Vec>); +} diff --git a/src/lib.rs b/src/lib.rs index e527c10..d82e0ee 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,6 +29,7 @@ mod length; mod nightly; mod pack; mod pack_ints; +mod pack_shared; mod str; mod u8_char; diff --git a/src/pack.rs b/src/pack.rs index 6c0684b..4effe98 100644 --- a/src/pack.rs +++ b/src/pack.rs @@ -3,6 +3,7 @@ use crate::consume::{consume_byte, consume_byte_arrays, consume_bytes}; use crate::error::err; use crate::fast::CowSlice; use crate::pack_ints::{Int, SizedInt}; +use crate::pack_shared::PackingTrait; use alloc::vec::Vec; /// Possible states per byte in descending order. Each packed byte will use `log2(states)` bits. @@ -17,8 +18,9 @@ enum Packing { _2, } -impl Packing { - fn new(max: u8) -> Self { +impl PackingTrait for Packing { + fn new(max: T) -> Self { + let max: u8 = bytemuck::must_cast(max); match max { // We could encode max 0 as nothing, but that could allocate unbounded memory when decoding. 0..=1 => Self::_2, @@ -30,12 +32,16 @@ impl Packing { } } - fn write(self, out: &mut Vec, offset_by_min: bool) { + fn write(self, out: &mut Vec, offset_by_min: bool) { + // pack_ints::Packing needs generics, we only use this on u8 here. + assert_eq!(std::mem::size_of::(), 1); // Encoded in such a way such that 0 is `Self::_256` and higher numbers are smaller packing. // Also makes `Self::_256` with offset_by_min = true is unrepresentable. out.push(self as u8 * 2 - offset_by_min as u8); } +} +impl Packing { fn read(input: &mut &[u8]) -> Result<(Self, bool)> { let v = consume_byte(input)?; let p_u8 = crate::nightly::div_ceil_u8(v, 2); @@ -93,20 +99,16 @@ pub fn pack_bytes(bytes: &mut [T], out: &mut Vec) { out.extend_from_slice(bytemuck::must_cast_slice(bytes)); return; } - let (min, max) = crate::pack_ints::minmax(bytes); - - // i8 packs as u8 if positive. - let basic_packing = if min >= T::default() { - Packing::new(bytemuck::must_cast(max)) - } else { - Packing::_256 // Any negative i8 as u8 is > 15 and can't be packed without offset_packing. - }; - // u8::wrapping_sub == i8::wrapping_sub, so we can use u8s from here onward. - let min: u8 = bytemuck::must_cast(min); - let max: u8 = bytemuck::must_cast(max); + let (basic_packing, min_max) = + crate::pack_shared::basic_packing_and_signed_min_max_cast_to_unsigned(bytes, out); let bytes: &mut [u8] = bytemuck::must_cast_slice_mut(bytes); - pack_bytes_unsigned(bytes, out, basic_packing, min, max); + + // ::Unsigned for T: Byte is always u8, but we can't prove + // that here, so we perform this cast which doesn't change the underlying type. + let min_max = min_max.map(|(min, max)| (bytemuck::must_cast(min), bytemuck::must_cast(max))); + + pack_bytes_unsigned(bytes, out, basic_packing, min_max); } /// [`pack_bytes`] but after i8s have been cast to u8s. @@ -114,23 +116,9 @@ fn pack_bytes_unsigned( bytes: &mut [u8], out: &mut Vec, basic_packing: Packing, - min: u8, - max: u8, + min_max: Option<(u8, u8)>, ) { - // If subtracting min from all bytes results in a better packing do it, otherwise don't bother. - let offset_packing = Packing::new(max.wrapping_sub(min)); - let p = if offset_packing > basic_packing && bytes.len() > 5 { - for b in bytes.iter_mut() { - *b = b.wrapping_sub(min); - } - offset_packing.write(out, true); - out.push(min); - offset_packing - } else { - basic_packing.write(out, false); - basic_packing - }; - + let p = crate::pack_shared::offset_packing(bytes, out, basic_packing, min_max); match p { Packing::_256 => out.extend_from_slice(bytes), Packing::_16 => pack_arithmetic::<16>(bytes, out), diff --git a/src/pack_ints.rs b/src/pack_ints.rs index 68b4431..d9c528f 100644 --- a/src/pack_ints.rs +++ b/src/pack_ints.rs @@ -3,6 +3,7 @@ use crate::consume::{consume_byte, consume_byte_arrays}; use crate::error::error; use crate::fast::CowSlice; use crate::pack::{invalid_packing, pack_bytes, unpack_bytes}; +use crate::pack_shared::PackingTrait; use crate::Error; use alloc::vec::Vec; use bytemuck::Pod; @@ -19,7 +20,7 @@ enum Packing { _8, } -impl Packing { +impl PackingTrait for Packing { fn new(max: T) -> Self { let max: u128 = max.try_into().unwrap_or_else(|_| unreachable!()); // From isn't implemented for u128. #[allow(clippy::match_overlapping_arm)] // Just make sure not to reorder them. @@ -37,7 +38,9 @@ impl Packing { // Also makes no packing with offset_by_min = true is unrepresentable. out.push((self as u8 - Self::new(T::MAX) as u8) * 2 - offset_by_min as u8); } +} +impl Packing { fn read(input: &mut &[u8]) -> Result<(Self, bool)> { let v = consume_byte(input)?; let p_u8 = crate::nightly::div_ceil_u8(v, 2) + Self::new(T::MAX) as u8; @@ -329,18 +332,8 @@ impl SizedUInt for u8 { } } -pub fn minmax(v: &[T]) -> (T, T) { - let mut min = T::MAX; - let mut max = T::MIN; - for &v in v.iter() { - min = min.min(v); - max = max.max(v); - } - (min, max) -} - fn skip_packing(length: usize) -> bool { - // Be careful using size_of:: since usize can be 4 or 8. + // T is SizedInt, so it cannot be usize/isize, therefore comparisions against size are safe. if core::mem::size_of::() == 1 { return true; // u8s can't be packed by pack_ints (only pack_bytes). } @@ -370,31 +363,9 @@ fn pack_ints_sized(ints: &mut [T], out: &mut Vec) { let (basic_packing, min_max) = if skip_packing::(ints.len()) { (Packing::new(T::Unsigned::MAX), None) } else { - // Take a small sample to avoid wastefully scanning the whole slice. - let (sample, remaining) = ints.split_at(ints.len().min(16)); - let (min, max) = minmax(sample); - - // Only have to check packing(max - min) since it's always as good as packing(max). - let none = Packing::new(T::Unsigned::MAX); - if Packing::new(max.to_unsigned().wrapping_sub(min.to_unsigned())) == none { - none.write::(out, false); - (none, None) - } else { - let (remaining_min, remaining_max) = minmax(remaining); - let min = min.min(remaining_min); - let max = max.max(remaining_max); - - // Signed ints pack as unsigned ints if positive. - let basic_packing = if min >= T::default() { - Packing::new(max.to_unsigned()) - } else { - none // Any negative can't be packed without offset_packing. - }; - (basic_packing, Some((min, max))) - } + crate::pack_shared::basic_packing_and_signed_min_max_cast_to_unsigned(ints, out) }; let ints = bytemuck::must_cast_slice_mut(ints); - let min_max = min_max.map(|(min, max)| (min.to_unsigned(), max.to_unsigned())); pack_ints_sized_unsigned::(ints, out, basic_packing, min_max); } @@ -405,24 +376,7 @@ fn pack_ints_sized_unsigned( basic_packing: Packing, min_max: Option<(T, T)>, ) { - let p = if let Some((min, max)) = min_max { - // If subtracting min from all ints results in a better packing do it, otherwise don't bother. - let offset_packing = Packing::new(max.wrapping_sub(min)); - if offset_packing > basic_packing && ints.len() > 5 { - for b in ints.iter_mut() { - *b = b.wrapping_sub(min); - } - offset_packing.write::(out, true); - T::write(min, out); - offset_packing - } else { - basic_packing.write::(out, false); - basic_packing - } - } else { - basic_packing - }; - + let p = crate::pack_shared::offset_packing(ints, out, basic_packing, min_max); match p { Packing::_128 => T::pack128(ints, out), Packing::_64 => T::pack64(ints, out), diff --git a/src/pack_shared.rs b/src/pack_shared.rs new file mode 100644 index 0000000..be4e9b5 --- /dev/null +++ b/src/pack_shared.rs @@ -0,0 +1,80 @@ +use crate::pack_ints::{SizedInt, SizedUInt}; + +pub trait PackingTrait: Copy + PartialOrd { + fn new(max: T) -> Self; + + fn write(self, out: &mut Vec, offset_by_min: bool); +} + +fn minmax(v: &[T]) -> (T, T) { + let mut min = T::MAX; + let mut max = T::MIN; + for &v in v.iter() { + min = min.min(v); + max = max.max(v); + } + (min, max) +} + +// Writes a packing to `out` iff it returns None. +pub fn basic_packing_and_signed_min_max_cast_to_unsigned( + ints: &[T], + out: &mut Vec, +) -> (P, Option<(T::Unsigned, T::Unsigned)>) { + // Take a small sample to avoid wastefully scanning the whole slice. + // Note: This small sample is purely an optimization, it has no impact on the encoded result + // because we only use it to bail from scanning the entire slice if the first 16-32 elements + // cannot be packed. + let sample_size = (32 / std::mem::size_of::()).max(16); + let (sample, remaining) = ints.split_at(ints.len().min(sample_size)); + let (min, max) = minmax(sample); + + // Only have to check packing(max - min) since it's always as good as packing(max). + let none = P::new(T::Unsigned::MAX); + if P::new(max.to_unsigned().wrapping_sub(min.to_unsigned())) == none { + none.write::(out, false); + (none, None) + } else { + let (remaining_min, remaining_max) = minmax(remaining); + let min = min.min(remaining_min); + let max = max.max(remaining_max); + + // Signed ints pack as unsigned ints if positive. + let basic_packing = if min >= T::default() { + P::new(max.to_unsigned()) + } else { + none // Any negative can't be packed without offset_packing. + }; + + (basic_packing, Some((min.to_unsigned(), max.to_unsigned()))) + } +} + +// Writes a packing to `out` iff `min_max` is Some. +pub fn offset_packing( + ints: &mut [T], + out: &mut Vec, + basic_packing: P, + min_max: Option<(T, T)>, +) -> P { + if let Some((min, max)) = min_max { + // If subtracting min from all ints results in a better packing do it, otherwise don't bother. + let offset_packing = P::new(max.wrapping_sub(min)); + // TODO(breaking change) don't hardcode this as 5. Only perform offset_packing + // on a few elements if the added T::write(min, out) makes it still smaller. + let small_skip_offset_packing = 5; + if offset_packing > basic_packing && ints.len() > small_skip_offset_packing { + for b in ints.iter_mut() { + *b = b.wrapping_sub(min); + } + offset_packing.write::(out, true); + T::write(min, out); + offset_packing + } else { + basic_packing.write::(out, false); + basic_packing + } + } else { + basic_packing + } +} From d5d6220b86220affbd47b7eeb2fe1e417f423b20 Mon Sep 17 00:00:00 2001 From: Cai Bear Date: Fri, 26 Jun 2026 23:05:46 -0700 Subject: [PATCH 2/3] Fix previous no_std. --- src/pack.rs | 2 +- src/pack_shared.rs | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/pack.rs b/src/pack.rs index 4effe98..2a47746 100644 --- a/src/pack.rs +++ b/src/pack.rs @@ -34,7 +34,7 @@ impl PackingTrait for Packing { fn write(self, out: &mut Vec, offset_by_min: bool) { // pack_ints::Packing needs generics, we only use this on u8 here. - assert_eq!(std::mem::size_of::(), 1); + assert_eq!(core::mem::size_of::(), 1); // Encoded in such a way such that 0 is `Self::_256` and higher numbers are smaller packing. // Also makes `Self::_256` with offset_by_min = true is unrepresentable. out.push(self as u8 * 2 - offset_by_min as u8); diff --git a/src/pack_shared.rs b/src/pack_shared.rs index be4e9b5..1f346c6 100644 --- a/src/pack_shared.rs +++ b/src/pack_shared.rs @@ -1,4 +1,5 @@ use crate::pack_ints::{SizedInt, SizedUInt}; +use alloc::vec::Vec; pub trait PackingTrait: Copy + PartialOrd { fn new(max: T) -> Self; @@ -25,7 +26,7 @@ pub fn basic_packing_and_signed_min_max_cast_to_unsigned()).max(16); + let sample_size = (32 / core::mem::size_of::()).max(16); let (sample, remaining) = ints.split_at(ints.len().min(sample_size)); let (min, max) = minmax(sample); From 4f9c9cf7b819296926179ee16ad9035820ce71da Mon Sep 17 00:00:00 2001 From: Cai Bear Date: Fri, 26 Jun 2026 23:08:55 -0700 Subject: [PATCH 3/3] Fix previous no_std tests. --- src/derive/vec.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/derive/vec.rs b/src/derive/vec.rs index 737a9a2..570a6d8 100644 --- a/src/derive/vec.rs +++ b/src/derive/vec.rs @@ -398,6 +398,7 @@ mod tests { #[cfg(test)] mod tests2 { + use alloc::vec::Vec; type T = [[u8; 32]; 32]; fn bench_data() -> Vec> { crate::random_data(1000)