Merge pull request #208 from sgeisler/hex_bytes

Use more performant hex_bytes function
2019-01-15 14:06:00 -05:00 · 2019-01-15 14:06:00 -05:00 · 2ed4b1f246
parent 282daaab69 4c29fc0e8d
commit 2ed4b1f246
4 changed files with 99 additions and 101 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,4 +1,3 @@
 [package]
 name = "bitcoin"
 version = "0.15.1"
@ -18,6 +17,7 @@ path = "src/lib.rs"
 [features]
 fuzztarget = ["secp256k1/fuzztarget"]
 serde-decimal = ["serde", "strason"]
 unstable = []
 [dependencies]
 bitcoin-bech32 = "0.8.0"
--- a/src/util/iter.rs
+++ b/src/util/iter.rs
@ -1,80 +0,0 @@
 // Rust Bitcoin Library
 // Written in 2014 by
 //     Andrew Poelstra <apoelstra@wpsoftware.net>
 //
 // To the extent possible under law, the author(s) have dedicated all
 // copyright and related and neighboring rights to this software to
 // the public domain worldwide. This software is distributed without
 // any warranty.
 //
 // You should have received a copy of the CC0 Public Domain Dedication
 // along with this software.
 // If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
 //
 //! Iterator adaptors
 //!
 //! Iterator adaptors needed by Bitcoin but not provided by the Rust
 //! standard library.
 /// An iterator that returns pairs of elements
 pub struct Pair<I>
    where I: Iterator
 {
    iter: I,
    last_elem: Option<I::Item>
 }
 impl<I: Iterator> Iterator for Pair<I> {
    type Item = (I::Item, I::Item);
    #[inline]
    fn next(&mut self) -> Option<(I::Item, I::Item)> {
        let elem1 = self.iter.next();
        if elem1.is_none() {
            None
        } else {
            let elem2 = self.iter.next();
            if elem2.is_none() {
                self.last_elem = elem1;
                None
            } else {
                Some((elem1.unwrap(), elem2.unwrap()))
            }
        }
    }
    #[inline]
    fn size_hint(&self) -> (usize, Option<usize>) {
        match self.iter.size_hint() {
            (n, None) => (n/2, None),
            (n, Some(m)) => (n/2, Some(m/2))
        }
    }
 }
 impl<I: Iterator> Pair<I> {
    /// Returns the last element of the iterator if there were an odd
    /// number of elements remaining before it was Pair-ified.
    #[inline]
    pub fn remainder(self) -> Option<I::Item> {
        self.last_elem
    }
 }
 /// Returns an iterator that returns elements of the original iterator 2 at a time
 pub trait Pairable : Sized + Iterator {
    /// Returns an iterator that returns elements of the original iterator 2 at a time
    fn pair(self) -> Pair<Self>;
 }
 impl<I: Iterator> Pairable for I {
    /// Creates an iterator that yields pairs of elements from the underlying
    /// iterator, yielding `None` when there are fewer than two elements to
    /// return.
    #[inline]
    fn pair(self) -> Pair<I> {
        Pair {iter: self, last_elem: None }
    }
 }
--- a/src/util/misc.rs
+++ b/src/util/misc.rs
@ -17,29 +17,59 @@
 //! Various utility functions
 use blockdata::opcodes;
 use util::iter::Pairable;
 use consensus::encode;
 /// Helper function to convert hex nibble characters to their respective value
 #[inline]
 fn hex_val(c: u8) -> Result<u8, encode::Error> {
    let res = match c {
        b'0' ... b'9' => c - '0' as u8,
        b'a' ... b'f' => c - 'a' as u8 + 10,
        b'A' ... b'F' => c - 'A' as u8 + 10,
        _ => return Err(encode::Error::UnexpectedHexDigit(c as char)),
    };
    Ok(res)
 }
 /// Convert a hexadecimal-encoded string to its corresponding bytes
-pub fn hex_bytes(s: &str) -> Result<Vec<u8>, encode::Error> {
+pub fn hex_bytes(data: &str) -> Result<Vec<u8>, encode::Error> {
-    let mut v = vec![];
+    // This code is optimized to be as fast as possible without using unsafe or platform specific
-    let mut iter = s.chars().pair();
+    // features. If you want to refactor it please make sure you don't introduce performance
-    // Do the parsing
+    // regressions (run the benchmark with `cargo bench --features unstable`).
-    iter.by_ref().fold(Ok(()), |e, (f, s)| 
+
-        if e.is_err() { e }
+    // If the hex string has an uneven length fail early
-        else {
+    if data.len() % 2 != 0 {
-            match (f.to_digit(16), s.to_digit(16)) {
+        return Err(encode::Error::ParseFailed("hexstring of odd length"));
                (None, _) => Err(encode::Error::UnexpectedHexDigit(f)),
                (_, None) => Err(encode::Error::UnexpectedHexDigit(s)),
                (Some(f), Some(s)) => { v.push((f * 0x10 + s) as u8); Ok(()) }
            }
        }
    )?;
    // Check that there was no remainder
    match iter.remainder() {
        Some(_) => Err(encode::Error::ParseFailed("hexstring of odd length")),
        None => Ok(v)
    }
    // Preallocate the uninitialized memory for the byte array
    let mut res = Vec::with_capacity(data.len() / 2);
    let mut hex_it = data.bytes();
    loop {
        // Get most significant nibble of current byte or end iteration
        let msn = match hex_it.next() {
            None => break,
            Some(x) => x,
        };
        // Get least significant nibble of current byte
        let lsn = match hex_it.next() {
            None => unreachable!("len % 2 == 0"),
            Some(x) => x,
        };
        // Convert bytes representing characters to their represented value and combine lsn and msn.
        // The and_then and map are crucial for performance, in comparision to using ? and then
        // using the results of that for the calculation it's nearly twice as fast. Using bit
        // shifting and or instead of multiply and add on the other hand doesn't show a significant
        // increase in performance.
        match hex_val(msn).and_then(|msn_val| hex_val(lsn).map(|lsn_val| msn_val * 16 + lsn_val)) {
            Ok(x) => res.push(x),
            Err(e) => return Err(e),
        }
    }
    Ok(res)
 }
 /// Search for `needle` in the vector `haystack` and remove every
@ -77,6 +107,55 @@ pub fn script_find_and_remove(haystack: &mut Vec<u8>, needle: &[u8]) -> usize {
    n_deleted
 }
 #[cfg(all(test, feature="unstable"))]
 mod benches {
    use rand::{Rng, thread_rng};
    use super::hex_bytes;
    use test::Bencher;
    fn join<I: Iterator<Item=IT>, IT: AsRef<str>>(iter: I, expected_len: usize) -> String {
        let mut res = String::with_capacity(expected_len);
        for s in iter {
            res.push_str(s.as_ref());
        }
        res
    }
    fn bench_from_hex(b: &mut Bencher, data_size: usize) {
        let data_bytes = thread_rng()
            .gen_iter()
            .take(data_size)
            .collect::<Vec<u8>>();
        let data = join(data_bytes.iter().map(|x| format!("{:02x}", x)), data_size * 2);
        assert_eq!(hex_bytes(&data).unwrap(), data_bytes);
        b.iter(move || {
            hex_bytes(&data).unwrap()
        })
    }
    #[bench]
    fn from_hex_16_bytes(b: &mut Bencher) {
        bench_from_hex(b, 16);
    }
    #[bench]
    fn from_hex_64_bytes(b: &mut Bencher) {
        bench_from_hex(b, 64);
    }
    #[bench]
    fn from_hex_256_bytes(b: &mut Bencher) {
        bench_from_hex(b, 256);
    }
    #[bench]
    fn from_hex_4m_bytes(b: &mut Bencher) {
        bench_from_hex(b, 1024 * 1024 * 4);
    }
 }
 #[cfg(test)]
 mod tests {
    use super::script_find_and_remove;
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@ -24,7 +24,6 @@ pub mod bip143;
 pub mod contracthash;
 pub mod decimal;
 pub mod hash;
 pub mod iter;
 pub mod misc;
 pub mod uint;