Optimize base58 on small inputs
Most base58 strings in Bitcoin are somewhat short. There was previously an "optimization" putting part of the input on stack which was removed in #2759 because it actually made the code slower. This appears to be mostly because of branches caused by using `iter::Chain`. Manually splitting the iterations into two helped bring the performance close to what #2759 achieved but that still wasn't worth it. But given that we know the input length in many cases (it's just a slice) we can determine whether it'll fit a buffer upfront and then just call different functions which don't have the branches in loops. To avoid having two functions this uses generics instead. Further, we increase the buffer length to 128 and use `ArrayVec` from `internals` which internally avoids initializing the buffer thanks to `MaybeUninit` In total this increases performance by around 4% on my machine.
This commit is contained in:
parent
2320877253
commit
d05723c401
|
@ -31,9 +31,10 @@ pub mod error;
|
||||||
|
|
||||||
#[cfg(not(feature = "std"))]
|
#[cfg(not(feature = "std"))]
|
||||||
pub use alloc::{string::String, vec::Vec};
|
pub use alloc::{string::String, vec::Vec};
|
||||||
use core::{fmt, str};
|
use core::fmt;
|
||||||
#[cfg(feature = "std")]
|
#[cfg(feature = "std")]
|
||||||
pub use std::{string::String, vec::Vec};
|
pub use std::{string::String, vec::Vec};
|
||||||
|
use internals::array_vec::ArrayVec;
|
||||||
|
|
||||||
use hashes::sha256d;
|
use hashes::sha256d;
|
||||||
|
|
||||||
|
@ -117,42 +118,98 @@ pub fn decode_check(data: &str) -> Result<Vec<u8>, Error> {
|
||||||
Ok(ret)
|
Ok(ret)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const SHORT_OPT_BUFFER_LEN: usize = 128;
|
||||||
|
|
||||||
/// Encodes `data` as a base58 string (see also `base58::encode_check()`).
|
/// Encodes `data` as a base58 string (see also `base58::encode_check()`).
|
||||||
pub fn encode(data: &[u8]) -> String { encode_iter(data.iter().cloned()) }
|
pub fn encode(data: &[u8]) -> String {
|
||||||
|
let reserve_len = encoded_reserve_len(data.len());
|
||||||
|
let mut res = String::with_capacity(reserve_len);
|
||||||
|
if reserve_len <= SHORT_OPT_BUFFER_LEN {
|
||||||
|
format_iter(&mut res, data.iter().copied(), &mut ArrayVec::<u8, SHORT_OPT_BUFFER_LEN>::new())
|
||||||
|
} else {
|
||||||
|
format_iter(&mut res, data.iter().copied(), &mut Vec::with_capacity(reserve_len))
|
||||||
|
}.expect("string doesn't error");
|
||||||
|
res
|
||||||
|
}
|
||||||
|
|
||||||
/// Encodes `data` as a base58 string including the checksum.
|
/// Encodes `data` as a base58 string including the checksum.
|
||||||
///
|
///
|
||||||
/// The checksum is the first four bytes of the sha256d of the data, concatenated onto the end.
|
/// The checksum is the first four bytes of the sha256d of the data, concatenated onto the end.
|
||||||
pub fn encode_check(data: &[u8]) -> String {
|
pub fn encode_check(data: &[u8]) -> String {
|
||||||
let checksum = sha256d::Hash::hash(data);
|
let mut res = String::with_capacity(encoded_check_reserve_len(data.len()));
|
||||||
encode_iter(data.iter().cloned().chain(checksum[0..4].iter().cloned()))
|
encode_check_to_writer(&mut res, data).expect("string doesn't fail");
|
||||||
|
res
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Encodes a slice as base58, including the checksum, into a formatter.
|
/// Encodes a slice as base58, including the checksum, into a formatter.
|
||||||
///
|
///
|
||||||
/// The checksum is the first four bytes of the sha256d of the data, concatenated onto the end.
|
/// The checksum is the first four bytes of the sha256d of the data, concatenated onto the end.
|
||||||
pub fn encode_check_to_fmt(fmt: &mut fmt::Formatter, data: &[u8]) -> fmt::Result {
|
pub fn encode_check_to_fmt(fmt: &mut fmt::Formatter, data: &[u8]) -> fmt::Result {
|
||||||
|
encode_check_to_writer(fmt, data)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn encode_check_to_writer(fmt: &mut impl fmt::Write, data: &[u8]) -> fmt::Result {
|
||||||
let checksum = sha256d::Hash::hash(data);
|
let checksum = sha256d::Hash::hash(data);
|
||||||
let iter = data.iter().cloned().chain(checksum[0..4].iter().cloned());
|
let iter = data.iter().cloned().chain(checksum[0..4].iter().cloned());
|
||||||
format_iter(fmt, iter)
|
let reserve_len = encoded_check_reserve_len(data.len());
|
||||||
|
if reserve_len <= SHORT_OPT_BUFFER_LEN {
|
||||||
|
format_iter(fmt, iter, &mut ArrayVec::<u8, SHORT_OPT_BUFFER_LEN>::new())
|
||||||
|
} else {
|
||||||
|
format_iter(fmt, iter, &mut Vec::with_capacity(reserve_len))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn encode_iter<I>(data: I) -> String
|
/// Returns the length to reserve when encoding base58 without checksum
|
||||||
where
|
const fn encoded_reserve_len(unencoded_len: usize) -> usize {
|
||||||
I: Iterator<Item = u8> + Clone,
|
// log2(256) / log2(58) ~ 1.37 = 137 / 100
|
||||||
{
|
unencoded_len * 137 / 100
|
||||||
let mut ret = String::new();
|
|
||||||
format_iter(&mut ret, data).expect("writing into string shouldn't fail");
|
|
||||||
ret
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn format_iter<I, W>(writer: &mut W, data: I) -> Result<(), fmt::Error>
|
/// Returns the length to reserve when encoding base58 with checksum
|
||||||
|
const fn encoded_check_reserve_len(unencoded_len: usize) -> usize {
|
||||||
|
encoded_reserve_len(unencoded_len + 4)
|
||||||
|
}
|
||||||
|
|
||||||
|
trait Buffer: Sized {
|
||||||
|
fn push(&mut self, val: u8);
|
||||||
|
fn slice(&self) -> &[u8];
|
||||||
|
fn slice_mut(&mut self) -> &mut [u8];
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Buffer for Vec<u8> {
|
||||||
|
fn push(&mut self, val: u8) {
|
||||||
|
Vec::push(self, val)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn slice(&self) -> &[u8] {
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
fn slice_mut(&mut self) -> &mut [u8] {
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<const N: usize> Buffer for ArrayVec<u8, N> {
|
||||||
|
fn push(&mut self, val: u8) {
|
||||||
|
ArrayVec::push(self, val)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn slice(&self) -> &[u8] {
|
||||||
|
self.as_slice()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn slice_mut(&mut self) -> &mut [u8] {
|
||||||
|
self.as_mut_slice()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
fn format_iter<I, W>(writer: &mut W, data: I, buf: &mut impl Buffer) -> Result<(), fmt::Error>
|
||||||
where
|
where
|
||||||
I: Iterator<Item = u8> + Clone,
|
I: Iterator<Item = u8> + Clone,
|
||||||
W: fmt::Write,
|
W: fmt::Write,
|
||||||
{
|
{
|
||||||
let mut ret = Vec::with_capacity(128);
|
|
||||||
|
|
||||||
let mut leading_zero_count = 0;
|
let mut leading_zero_count = 0;
|
||||||
let mut leading_zeroes = true;
|
let mut leading_zeroes = true;
|
||||||
// Build string in little endian with 0-58 in place of characters...
|
// Build string in little endian with 0-58 in place of characters...
|
||||||
|
@ -164,21 +221,24 @@ where
|
||||||
leading_zeroes = false;
|
leading_zeroes = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
for ch in ret.iter_mut() {
|
for ch in buf.slice_mut() {
|
||||||
let new_ch = *ch as usize * 256 + carry;
|
let new_ch = *ch as usize * 256 + carry;
|
||||||
*ch = (new_ch % 58) as u8;
|
*ch = (new_ch % 58) as u8;
|
||||||
carry = new_ch / 58;
|
carry = new_ch / 58;
|
||||||
}
|
}
|
||||||
|
|
||||||
while carry > 0 {
|
while carry > 0 {
|
||||||
ret.push((carry % 58) as u8);
|
buf.push((carry % 58) as u8);
|
||||||
carry /= 58;
|
carry /= 58;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ... then reverse it and convert to chars
|
// ... then reverse it and convert to chars
|
||||||
ret.resize(ret.len() + leading_zero_count, 0);
|
for _ in 0..leading_zero_count {
|
||||||
|
buf.push(0);
|
||||||
|
}
|
||||||
|
|
||||||
for ch in ret.iter().rev() {
|
for ch in buf.slice().iter().rev() {
|
||||||
writer.write_char(BASE58_CHARS[*ch as usize] as char)?;
|
writer.write_char(BASE58_CHARS[*ch as usize] as char)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -203,7 +263,7 @@ mod tests {
|
||||||
assert_eq!(&encode(&[0, 13, 36][..]), "1211");
|
assert_eq!(&encode(&[0, 13, 36][..]), "1211");
|
||||||
assert_eq!(&encode(&[0, 0, 0, 0, 13, 36][..]), "1111211");
|
assert_eq!(&encode(&[0, 0, 0, 0, 13, 36][..]), "1111211");
|
||||||
|
|
||||||
// Long input (>100 bytes => has to use heap)
|
// Long input (>128 bytes => has to use heap)
|
||||||
let res = encode(
|
let res = encode(
|
||||||
"BitcoinBitcoinBitcoinBitcoinBitcoinBitcoinBitcoinBitcoinBitcoinBit\
|
"BitcoinBitcoinBitcoinBitcoinBitcoinBitcoinBitcoinBitcoinBitcoinBit\
|
||||||
coinBitcoinBitcoinBitcoinBitcoinBitcoinBitcoinBitcoinBitcoinBitcoinBitcoin"
|
coinBitcoinBitcoinBitcoinBitcoinBitcoinBitcoinBitcoinBitcoinBitcoinBitcoin"
|
||||||
|
|
Loading…
Reference in New Issue