Merge rust-bitcoin/rust-bitcoin#1273: Redesign `hex::BufEncoder` to accept owned arrays

1bf885550e Redesign `hex::BufEncoder` to accept owned arrays (Martin Habovstiak)

Pull request description:

  Not being able to create an owned `BufEncoder` prevented returning it from functions which need to allocate the buffer on stack. Such is the case in WIP serde via consensus serialzation.

  This change refactors `OutBytes` to be unsized, adds an `AsOutBytes` trait and uses that one instead of `Into` to perform the conversion.

  Closes #1270

  This is meant as potentially mergeable demonstration. Interestingly, it was easier than I expected.

ACKs for top commit:
  tcharding:
    Except for the question about the `out_bytes` module, ACK 1bf885550e
  apoelstra:
    ACK 1bf885550e

Tree-SHA512: 39bc48e54ae0e66b988ba5ad6ea7ecbe03e5f4c71792df0f8e2b03aa2e97e2c0fac1cb03e84ecac12ec6f13649554b57e1000710c34c638d17d9bb575d0ac0a1
This commit is contained in:
Andrew Poelstra 2022-09-15 13:17:19 +00:00
commit f41ec20ee4
No known key found for this signature in database
GPG Key ID: C588D63CE41B97C1
3 changed files with 101 additions and 19 deletions

View File

@ -8,10 +8,32 @@ pub use out_bytes::OutBytes;
use super::Case;
/// Trait for types that can be soundly converted to `OutBytes`.
///
/// To protect the API from future breakage this sealed trait guards which types can be used with
/// the `Encoder`. Currently it is implemented for byte arrays of various interesting lengths.
///
/// ## Safety
///
/// This is not `unsafe` yet but the `as_out_bytes` should always return the same reference if the
/// same reference is supplied. IOW the returned memory address and length should be the same if
/// the input memory address and length are the same.
///
/// If the trait ever becomes `unsafe` this will be required for soundness.
pub trait AsOutBytes: out_bytes::Sealed {
/// Performs the conversion.
fn as_out_bytes(&self) -> &OutBytes;
/// Performs the conversion.
fn as_mut_out_bytes(&mut self) -> &mut OutBytes;
}
/// Implements `OutBytes`
///
/// This prevents the rest of the crate from accessing the field of `OutBytes`.
mod out_bytes {
use super::AsOutBytes;
/// A byte buffer that can only be written-into.
///
/// You shouldn't concern yourself with this, just call `BufEncoder::new` with your array.
@ -20,11 +42,12 @@ mod out_bytes {
/// `unsafe` until it's proven to be needed but if it does we have an easy, compatible upgrade
/// option.
///
/// We also don't bother with unsized type because the immutable version is useless and this avoids
/// `unsafe` while we don't want/need it.
pub struct OutBytes<'a>(&'a mut [u8]);
/// Warning: `repr(transparent)` is an internal implementation detail and **must not** be
/// relied on!
#[repr(transparent)]
pub struct OutBytes([u8]);
impl<'a> OutBytes<'a> {
impl OutBytes {
/// Returns the first `len` bytes as initialized.
///
/// Not `unsafe` because we don't use `unsafe` (yet).
@ -51,23 +74,83 @@ mod out_bytes {
pub(crate) fn len(&self) -> usize {
self.0.len()
}
fn from_bytes(slice: &[u8]) -> &Self {
// SAFETY: copied from std
// conversion of reference to pointer of the same referred type is always sound,
// including in unsized types.
// Thanks to repr(transparent) the types have the same layout making the other
// conversion sound.
// The pointer was just created from a reference that's still alive so dereferencing is
// sound.
unsafe {
&*(slice as *const [u8] as *const Self)
}
}
fn from_mut_bytes(slice: &mut [u8]) -> &mut Self {
// SAFETY: copied from std
// conversion of reference to pointer of the same referred type is always sound,
// including in unsized types.
// Thanks to repr(transparent) the types have the same layout making the other
// conversion sound.
// The pointer was just created from a reference that's still alive so dereferencing is
// sound.
unsafe {
&mut *(slice as *mut [u8] as *mut Self)
}
}
}
macro_rules! impl_from_array {
($($len:expr),* $(,)?) => {
$(
impl<'a> From<&'a mut [u8; $len]> for OutBytes<'a> {
fn from(value: &'a mut [u8; $len]) -> Self {
OutBytes(value)
impl AsOutBytes for [u8; $len] {
fn as_out_bytes(&self) -> &OutBytes {
OutBytes::from_bytes(self)
}
fn as_mut_out_bytes(&mut self) -> &mut OutBytes {
OutBytes::from_mut_bytes(self)
}
}
impl Sealed for [u8; $len] {}
)*
}
}
impl<T: AsOutBytes + ?Sized> AsOutBytes for &'_ mut T {
fn as_out_bytes(&self) -> &OutBytes {
(**self).as_out_bytes()
}
fn as_mut_out_bytes(&mut self) -> &mut OutBytes {
(**self).as_mut_out_bytes()
}
}
impl<T: AsOutBytes + ?Sized> Sealed for &'_ mut T {}
impl AsOutBytes for OutBytes {
fn as_out_bytes(&self) -> &OutBytes {
self
}
fn as_mut_out_bytes(&mut self) -> &mut OutBytes {
self
}
}
impl Sealed for OutBytes {}
// As a sanity check we only provide conversions for even, non-empty arrays.
// Weird lengths 66 and 130 are provided for serialized public keys.
impl_from_array!(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 64, 66, 128, 130, 256, 512, 1024, 2048, 4096, 8192);
/// Prevents outside crates from implementing the trait
pub trait Sealed {}
}
/// Hex-encodes bytes into the provided buffer.
@ -75,19 +158,18 @@ mod out_bytes {
/// This is an important building block for fast hex-encoding. Because string writing tools
/// provided by `core::fmt` involve dynamic dispatch and don't allow reserving capacity in strings
/// buffering the hex and then formatting it is significantly faster.
pub struct BufEncoder<'a> {
buf: OutBytes<'a>,
pub struct BufEncoder<T: AsOutBytes> {
buf: T,
pos: usize,
}
impl<'a> BufEncoder<'a> {
impl<T: AsOutBytes> BufEncoder<T> {
/// Creates an empty `BufEncoder`.
///
/// This is usually used with uninitialized (zeroed) byte array allocated on stack.
/// This can only be constructed with an even-length, non-empty array.
#[inline]
pub fn new<T: Into<OutBytes<'a>>>(buf: T) -> Self {
let buf = buf.into();
pub fn new(buf: T) -> Self {
BufEncoder {
buf,
pos: 0,
@ -102,7 +184,7 @@ impl<'a> BufEncoder<'a> {
#[inline]
#[cfg_attr(rust_v_1_46, track_caller)]
pub fn put_byte(&mut self, byte: u8, case: Case) {
self.buf.write(self.pos, &super::byte_to_hex(byte, case.table()));
self.buf.as_mut_out_bytes().write(self.pos, &super::byte_to_hex(byte, case.table()));
self.pos += 2;
}
@ -117,7 +199,7 @@ impl<'a> BufEncoder<'a> {
// Panic if the result wouldn't fit address space to not waste time and give the optimizer
// more opportunities.
let double_len = bytes.len().checked_mul(2).expect("overflow");
assert!(double_len <= self.buf.len() - self.pos);
assert!(double_len <= self.buf.as_out_bytes().len() - self.pos);
for byte in bytes {
self.put_byte(*byte, case);
}
@ -126,13 +208,13 @@ impl<'a> BufEncoder<'a> {
/// Returns true if no more bytes can be written into the buffer.
#[inline]
pub fn is_full(&self) -> bool {
self.pos == self.buf.len()
self.pos == self.buf.as_out_bytes().len()
}
/// Returns the written bytes as a hex `str`.
#[inline]
pub fn as_str(&self) -> &str {
core::str::from_utf8(self.buf.assume_init(self.pos)).expect("we only write ASCII")
core::str::from_utf8(self.buf.as_out_bytes().assume_init(self.pos)).expect("we only write ASCII")
}
/// Resets the buffer to become empty.

View File

@ -181,7 +181,8 @@ macro_rules! fmt_hex_exact {
const _: () = [()][($len > usize::max_value() / 2) as usize];
assert_eq!($bytes.len(), $len);
let mut buf = [0u8; $len * 2];
$crate::hex::display::fmt_hex_exact_fn($formatter, (&mut buf).into(), $bytes, $case)
let buf = $crate::hex::buf_encoder::AsOutBytes::as_mut_out_bytes(&mut buf);
$crate::hex::display::fmt_hex_exact_fn($formatter, buf, $bytes, $case)
}
}
}
@ -189,7 +190,7 @@ macro_rules! fmt_hex_exact {
// Implementation detail of `write_hex_exact` macro to de-duplicate the code
#[doc(hidden)]
#[inline]
pub fn fmt_hex_exact_fn(f: &mut fmt::Formatter, buf: OutBytes<'_>, bytes: &[u8], case: Case) -> fmt::Result {
pub fn fmt_hex_exact_fn(f: &mut fmt::Formatter, buf: &mut OutBytes, bytes: &[u8], case: Case) -> fmt::Result {
let mut encoder = BufEncoder::new(buf);
encoder.put_bytes(bytes, case);
f.pad_integral(true, "0x", encoder.as_str())

View File

@ -11,7 +11,6 @@
// Experimental features we need.
#![cfg_attr(docsrs, feature(doc_cfg))]
// Coding conventions
#![forbid(unsafe_code)]
#![deny(non_upper_case_globals)]
#![deny(non_camel_case_types)]
#![deny(non_snake_case)]