255 lines
9.0 KiB
Rust
255 lines
9.0 KiB
Rust
// SPDX-License-Identifier: CC0-1.0
|
|
|
|
//! Variable length integer encoding A.K.A [`CompactSize`].
|
|
//!
|
|
//! An integer can be encoded depending on the represented value to save space. Variable length
|
|
//! integers always precede an array/vector of a type of data that may vary in length.
|
|
//!
|
|
//! [`CompactSize`]: <https://en.bitcoin.it/wiki/Protocol_documentation#Variable_length_integer>
|
|
|
|
use crate::array_vec::ArrayVec;
|
|
use crate::ToU64;
|
|
|
|
/// The maximum size of a serialized object in bytes or number of elements
|
|
/// (for eg vectors) when the size is encoded as `CompactSize`.
|
|
///
|
|
/// This is `MAX_SIZE` in Bitcoin Core.
|
|
// Issue: https://github.com/rust-bitcoin/rust-bitcoin/issues/3264
|
|
pub const MAX_ENCODABLE_VALUE: u64 = 0x0200_0000;
|
|
|
|
/// The maximum length of an encoding.
|
|
const MAX_ENCODING_SIZE: usize = 9;
|
|
|
|
/// Returns the number of bytes used to encode this `CompactSize` value.
|
|
///
|
|
/// # Returns
|
|
///
|
|
/// - 1 for 0..=0xFC
|
|
/// - 3 for 0xFD..=(2^16-1)
|
|
/// - 5 for 0x10000..=(2^32-1)
|
|
/// - 9 otherwise.
|
|
#[inline]
|
|
pub fn encoded_size(value: impl ToU64) -> usize { encoded_size_const(value.to_u64()) }
|
|
|
|
/// Returns the number of bytes used to encode this `CompactSize` value (in const context).
|
|
///
|
|
/// # Returns
|
|
///
|
|
/// - 1 for 0..=0xFC
|
|
/// - 3 for 0xFD..=(2^16-1)
|
|
/// - 5 for 0x10000..=(2^32-1)
|
|
/// - 9 otherwise.
|
|
#[inline]
|
|
pub const fn encoded_size_const(value: u64) -> usize {
|
|
match value {
|
|
0..=0xFC => 1,
|
|
0xFD..=0xFFFF => 3,
|
|
0x10000..=0xFFFFFFFF => 5,
|
|
_ => 9,
|
|
}
|
|
}
|
|
|
|
/// Encodes `CompactSize` without allocating.
|
|
#[inline]
|
|
pub fn encode(value: impl ToU64) -> ArrayVec<u8, MAX_ENCODING_SIZE> {
|
|
let value = value.to_u64();
|
|
let mut res = ArrayVec::<u8, MAX_ENCODING_SIZE>::new();
|
|
match value {
|
|
0..=0xFC => {
|
|
res.push(value as u8); // Cast ok because of match.
|
|
}
|
|
0xFD..=0xFFFF => {
|
|
let v = value as u16; // Cast ok because of match.
|
|
res.push(0xFD);
|
|
res.extend_from_slice(&v.to_le_bytes());
|
|
}
|
|
0x10000..=0xFFFFFFFF => {
|
|
let v = value as u32; // Cast ok because of match.
|
|
res.push(0xFE);
|
|
res.extend_from_slice(&v.to_le_bytes());
|
|
}
|
|
_ => {
|
|
let v = value;
|
|
res.push(0xFF);
|
|
res.extend_from_slice(&v.to_le_bytes());
|
|
}
|
|
}
|
|
res
|
|
}
|
|
|
|
/// Gets the compact size encoded value from `slice` and moves slice past the encoding.
|
|
///
|
|
/// Caller to guarantee that the encoding is well formed. Well formed is defined as:
|
|
///
|
|
/// * Being at least long enough.
|
|
/// * Containing a minimal encoding.
|
|
///
|
|
/// # Panics
|
|
///
|
|
/// * Panics in release mode if the `slice` does not contain a valid minimal compact size encoding.
|
|
/// * Panics in debug mode if the encoding is not minimal (referred to as "non-canonical" in Core).
|
|
pub fn decode_unchecked(slice: &mut &[u8]) -> u64 {
|
|
if slice.is_empty() {
|
|
panic!("tried to decode an empty slice");
|
|
}
|
|
|
|
match slice[0] {
|
|
0xFF => {
|
|
const SIZE: usize = 9;
|
|
if slice.len() < SIZE {
|
|
panic!("slice too short, expected at least 9 bytes");
|
|
};
|
|
|
|
let mut bytes = [0_u8; SIZE - 1];
|
|
bytes.copy_from_slice(&slice[1..SIZE]);
|
|
|
|
let v = u64::from_le_bytes(bytes);
|
|
debug_assert!(v > u32::MAX.into(), "non-minimal encoding of a u64");
|
|
*slice = &slice[SIZE..];
|
|
v
|
|
}
|
|
0xFE => {
|
|
const SIZE: usize = 5;
|
|
if slice.len() < SIZE {
|
|
panic!("slice too short, expected at least 5 bytes");
|
|
};
|
|
|
|
let mut bytes = [0_u8; SIZE - 1];
|
|
bytes.copy_from_slice(&slice[1..SIZE]);
|
|
|
|
let v = u32::from_le_bytes(bytes);
|
|
debug_assert!(v > u16::MAX.into(), "non-minimal encoding of a u32");
|
|
*slice = &slice[SIZE..];
|
|
u64::from(v)
|
|
}
|
|
0xFD => {
|
|
const SIZE: usize = 3;
|
|
if slice.len() < SIZE {
|
|
panic!("slice too short, expected at least 3 bytes");
|
|
};
|
|
|
|
let mut bytes = [0_u8; SIZE - 1];
|
|
bytes.copy_from_slice(&slice[1..SIZE]);
|
|
|
|
let v = u16::from_le_bytes(bytes);
|
|
debug_assert!(v >= 0xFD, "non-minimal encoding of a u16");
|
|
*slice = &slice[SIZE..];
|
|
u64::from(v)
|
|
}
|
|
n => {
|
|
*slice = &slice[1..];
|
|
u64::from(n)
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn encoded_value_1_byte() {
|
|
// Check lower bound, upper bound (and implicitly endian-ness).
|
|
for v in [0x00, 0x01, 0x02, 0xFA, 0xFB, 0xFC] {
|
|
let v = v as u32;
|
|
assert_eq!(encoded_size(v), 1);
|
|
// Should be encoded as the value as a u8.
|
|
let want = [v as u8];
|
|
let got = encode(v);
|
|
assert_eq!(got.as_slice().len(), 1); // sanity check
|
|
assert_eq!(got.as_slice(), want);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn decode_value_1_byte() {
|
|
// Check lower bound, upper bound.
|
|
for v in [0x00, 0x01, 0x02, 0xFA, 0xFB, 0xFC] {
|
|
let raw = [v];
|
|
let mut slice = raw.as_slice();
|
|
let got = decode_unchecked(&mut slice);
|
|
assert_eq!(got, u64::from(v));
|
|
assert!(slice.is_empty());
|
|
}
|
|
}
|
|
|
|
macro_rules! check_encode {
|
|
($($test_name:ident, $size:expr, $value:expr, $want:expr);* $(;)?) => {
|
|
$(
|
|
#[test]
|
|
fn $test_name() {
|
|
let value = $value as u64; // Because default integer type is i32.
|
|
let got = encode(value);
|
|
assert_eq!(got.as_slice().len(), $size); // sanity check
|
|
assert_eq!(got.as_slice(), &$want);
|
|
}
|
|
)*
|
|
}
|
|
}
|
|
|
|
check_encode! {
|
|
// 3 byte encoding.
|
|
encoded_value_3_byte_lower_bound, 3, 0xFD, [0xFD, 0xFD, 0x00]; // 0x00FD
|
|
encoded_value_3_byte_endianness, 3, 0xABCD, [0xFD, 0xCD, 0xAB];
|
|
encoded_value_3_byte_upper_bound, 3, 0xFFFF, [0xFD, 0xFF, 0xFF];
|
|
// 5 byte encoding.
|
|
encoded_value_5_byte_lower_bound, 5, 0x0001_0000, [0xFE, 0x00, 0x00, 0x01, 0x00];
|
|
encoded_value_5_byte_endianness, 5, 0x0123_4567, [0xFE, 0x67, 0x45, 0x23, 0x01];
|
|
encoded_value_5_byte_upper_bound, 5, 0xFFFF_FFFF, [0xFE, 0xFF, 0xFF, 0xFF, 0xFF];
|
|
// 9 byte encoding.
|
|
encoded_value_9_byte_lower_bound, 9, 0x0000_0001_0000_0000, [0xFF, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00];
|
|
encoded_value_9_byte_endianness, 9, 0x0123_4567_89AB_CDEF, [0xFF, 0xEF, 0xCD, 0xAB, 0x89, 0x67, 0x45, 0x23, 0x01];
|
|
encoded_value_9_byte_upper_bound, 9, u64::MAX, [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF];
|
|
}
|
|
|
|
macro_rules! check_decode {
|
|
($($test_name:ident, $size:expr, $want:expr, $encoded:expr);* $(;)?) => {
|
|
$(
|
|
#[test]
|
|
fn $test_name() {
|
|
let mut slice = $encoded.as_slice();
|
|
let got = decode_unchecked(&mut slice);
|
|
assert_eq!(got, $want);
|
|
assert_eq!(slice.len(), $encoded.len() - $size);
|
|
}
|
|
)*
|
|
}
|
|
}
|
|
|
|
check_decode! {
|
|
// 3 byte encoding.
|
|
decode_from_3_byte_slice_lower_bound, 3, 0xFD, [0xFD, 0xFD, 0x00];
|
|
decode_from_3_byte_slice_endianness, 3, 0xABCD, [0xFD, 0xCD, 0xAB];
|
|
decode_from_3_byte_slice_upper_bound, 3, 0xFFFF, [0xFD, 0xFF, 0xFF];
|
|
// 5 byte encoding.
|
|
decode_from_5_byte_slice_lower_bound, 5, 0x0001_0000, [0xFE, 0x00, 0x00, 0x01, 0x00];
|
|
decode_from_5_byte_slice_endianness, 5, 0x0123_4567, [0xFE, 0x67, 0x45, 0x23, 0x01];
|
|
decode_from_5_byte_slice_upper_bound, 5, 0xFFFF_FFFF, [0xFE, 0xFF, 0xFF, 0xFF, 0xFF];
|
|
// 9 byte encoding.
|
|
decode_from_9_byte_slice_lower_bound, 9, 0x0000_0001_0000_0000, [0xFF, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00];
|
|
decode_from_9_byte_slice_endianness, 9, 0x0123_4567_89AB_CDEF, [0xFF, 0xEF, 0xCD, 0xAB, 0x89, 0x67, 0x45, 0x23, 0x01];
|
|
decode_from_9_byte_slice_upper_bound, 9, u64::MAX, [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF];
|
|
|
|
// Check slices that are bigger than the actual encoding.
|
|
decode_1_byte_from_bigger_slice, 1, 32, [0x20, 0xAB, 0xBC];
|
|
decode_3_byte_from_bigger_slice, 3, 0xFFFF, [0xFD, 0xFF, 0xFF, 0xAB, 0xBC];
|
|
decode_5_byte_from_bigger_slice, 5, 0xFFFF_FFFF, [0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xAB, 0xBC];
|
|
decode_9_byte_from_bigger_slice, 9, u64::MAX, [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xAB, 0xBC];
|
|
}
|
|
|
|
#[test]
|
|
#[should_panic]
|
|
fn decode_from_empty_slice_panics() {
|
|
let mut slice = [].as_slice();
|
|
let _ = decode_unchecked(&mut slice);
|
|
}
|
|
|
|
#[test]
|
|
#[should_panic]
|
|
// Non-minimal is referred to as non-canonical in Core (`bitcoin/src/serialize.h`).
|
|
fn decode_non_minimal_panics() {
|
|
let mut slice = [0xFE, 0xCD, 0xAB].as_slice();
|
|
let _ = decode_unchecked(&mut slice);
|
|
}
|
|
}
|