Replace PatriciaTree with HashMap for UTXO set

We get a speed up (~5%) and memory savings (~10%) on initial sync from
using a HashMap, though it's hard to tell precisely how much savings
because it's quite nonlinear.

I haven't tested de/serialization. Some work needs to be done there to
split up the UTXO set since it takes forever to saveload.
This commit is contained in:
Andrew Poelstra 2014-07-20 16:52:00 -07:00
parent 54e4ea4586
commit 46969b3396
4 changed files with 86 additions and 30 deletions

View File

@ -18,6 +18,7 @@
//! index of UTXOs. //! index of UTXOs.
//! //!
use std::collections::HashMap;
use std::io::IoResult; use std::io::IoResult;
use std::mem; use std::mem;
@ -26,21 +27,16 @@ use blockdata::constants::genesis_block;
use blockdata::block::Block; use blockdata::block::Block;
use network::constants::Network; use network::constants::Network;
use network::serialize::{Serializable, SerializeIter}; use network::serialize::{Serializable, SerializeIter};
use util::hash::Sha256dHash; use util::hash::{DumbHasher, Sha256dHash};
use util::uint::Uint128; use util::uint::Uint128;
use util::patricia_tree::PatriciaTree;
use util::thinvec::ThinVec; use util::thinvec::ThinVec;
/// How much of the hash to use as a key
static KEY_LEN: uint = 128;
/// Vector of outputs; None indicates a nonexistent or already spent output /// Vector of outputs; None indicates a nonexistent or already spent output
type UtxoNode = ThinVec<Option<Box<TxOut>>>; type UtxoNode = ThinVec<Option<Box<TxOut>>>;
/// The UTXO set /// The UTXO set
pub struct UtxoSet { pub struct UtxoSet {
// We use a 128-bit indexed tree to save memory table: HashMap<Uint128, UtxoNode, DumbHasher>,
tree: PatriciaTree<UtxoNode, Uint128>,
last_hash: Sha256dHash, last_hash: Sha256dHash,
// A circular buffer of deleted utxos, grouped by block // A circular buffer of deleted utxos, grouped by block
spent_txos: Vec<Vec<Box<TxOut>>>, spent_txos: Vec<Vec<Box<TxOut>>>,
@ -49,7 +45,7 @@ pub struct UtxoSet {
n_utxos: u64 n_utxos: u64
} }
impl_serializable!(UtxoSet, last_hash, n_utxos, spent_txos, spent_idx, tree) impl_serializable!(UtxoSet, last_hash, n_utxos, spent_txos, spent_idx, table)
impl UtxoSet { impl UtxoSet {
/// Constructs a new UTXO set /// Constructs a new UTXO set
@ -59,7 +55,7 @@ impl UtxoSet {
// must follow suit, otherwise we will accept a transaction spending it // must follow suit, otherwise we will accept a transaction spending it
// while the reference client won't, causing us to fork off the network. // while the reference client won't, causing us to fork off the network.
UtxoSet { UtxoSet {
tree: PatriciaTree::new(), table: HashMap::with_hasher(DumbHasher),
last_hash: genesis_block(network).header.bitcoin_hash(), last_hash: genesis_block(network).header.bitcoin_hash(),
spent_txos: Vec::from_elem(rewind_limit, vec![]), spent_txos: Vec::from_elem(rewind_limit, vec![]),
spent_idx: 0, spent_idx: 0,
@ -77,7 +73,7 @@ impl UtxoSet {
unsafe { new_node.init(vout as uint, Some(box txo.clone())); } unsafe { new_node.init(vout as uint, Some(box txo.clone())); }
} }
// TODO: insert/lookup should return a Result which we pass along // TODO: insert/lookup should return a Result which we pass along
if self.tree.insert(&txid.as_uint128(), KEY_LEN, new_node) { if self.table.insert(txid.as_uint128(), new_node) {
self.n_utxos += tx.output.len() as u64; self.n_utxos += tx.output.len() as u64;
return true; return true;
} }
@ -89,7 +85,7 @@ impl UtxoSet {
// This whole function has awkward scoping thx to lexical borrow scoping :( // This whole function has awkward scoping thx to lexical borrow scoping :(
let (ret, should_delete) = { let (ret, should_delete) = {
// Locate the UTXO, failing if not found // Locate the UTXO, failing if not found
let node = match self.tree.lookup_mut(&txid.as_uint128(), KEY_LEN) { let node = match self.table.find_mut(&txid.as_uint128()) {
Some(node) => node, Some(node) => node,
None => return None None => return None
}; };
@ -107,7 +103,7 @@ impl UtxoSet {
// Delete the whole node if it is no longer being used // Delete the whole node if it is no longer being used
if should_delete { if should_delete {
self.tree.delete(&txid.as_uint128(), KEY_LEN); self.table.remove(&txid.as_uint128());
} }
self.n_utxos -= if ret.is_some() { 1 } else { 0 }; self.n_utxos -= if ret.is_some() { 1 } else { 0 };
@ -117,7 +113,7 @@ impl UtxoSet {
/// Get a reference to a UTXO in the set /// Get a reference to a UTXO in the set
pub fn get_utxo<'a>(&'a mut self, txid: Sha256dHash, vout: u32) -> Option<&'a Box<TxOut>> { pub fn get_utxo<'a>(&'a mut self, txid: Sha256dHash, vout: u32) -> Option<&'a Box<TxOut>> {
// Locate the UTXO, failing if not found // Locate the UTXO, failing if not found
let node = match self.tree.lookup_mut(&txid.as_uint128(), KEY_LEN) { let node = match self.table.find_mut(&txid.as_uint128()) {
Some(node) => node, Some(node) => node,
None => return None None => return None
}; };
@ -169,7 +165,7 @@ impl UtxoSet {
if blockhash == "00000000000a4d0a398161ffc163c503763b1f4360639393e0e4c8e300e0caec".to_string() || if blockhash == "00000000000a4d0a398161ffc163c503763b1f4360639393e0e4c8e300e0caec".to_string() ||
blockhash == "00000000000743f190a18c5577a3c2d2a1f610ae9601ac046a38084ccb7cd721".to_string() { blockhash == "00000000000743f190a18c5577a3c2d2a1f610ae9601ac046a38084ccb7cd721".to_string() {
// For these specific blocks, overwrite the old UTXOs. // For these specific blocks, overwrite the old UTXOs.
self.tree.delete(&tx.bitcoin_hash().as_uint128(), KEY_LEN); self.table.remove(&tx.bitcoin_hash().as_uint128());
self.add_utxos(tx); self.add_utxos(tx);
} else { } else {
// Otherwise fail the block // Otherwise fail the block
@ -215,7 +211,7 @@ impl UtxoSet {
for (txo, inp) in extract_vec.move_iter().zip(tx.input.iter()) { for (txo, inp) in extract_vec.move_iter().zip(tx.input.iter()) {
// Remove the tx's utxo list and patch the txo into place // Remove the tx's utxo list and patch the txo into place
let new_node = let new_node =
match self.tree.delete(&inp.prev_hash.as_uint128(), KEY_LEN) { match self.table.pop(&inp.prev_hash.as_uint128()) {
Some(mut thinvec) => { Some(mut thinvec) => {
let old_len = thinvec.len() as u32; let old_len = thinvec.len() as u32;
if old_len < inp.prev_index + 1 { if old_len < inp.prev_index + 1 {
@ -237,7 +233,7 @@ impl UtxoSet {
} }
}; };
// Ram it back into the tree // Ram it back into the tree
self.tree.insert(&inp.prev_hash.as_uint128(), KEY_LEN, new_node); self.table.insert(inp.prev_hash.as_uint128(), new_node);
} }
} }
skipped_genesis = true; skipped_genesis = true;
@ -259,11 +255,6 @@ impl UtxoSet {
pub fn n_utxos(&self) -> uint { pub fn n_utxos(&self) -> uint {
self.n_utxos as uint self.n_utxos as uint
} }
/// Get the number of UTXOs in the set
pub fn tree_size(&self) -> uint {
self.tree.node_count()
}
} }
#[cfg(test)] #[cfg(test)]

View File

@ -32,6 +32,7 @@
#![feature(macro_rules)] #![feature(macro_rules)]
#![feature(overloaded_calls)] #![feature(overloaded_calls)]
#![feature(unsafe_destructor)] #![feature(unsafe_destructor)]
#![feature(default_type_params)]
#![comment = "Rust Bitcoin Library"] #![comment = "Rust Bitcoin Library"]
#![license = "CC0"] #![license = "CC0"]

View File

@ -21,6 +21,9 @@
use collections::Vec; use collections::Vec;
use collections::bitv::{Bitv, from_bytes}; use collections::bitv::{Bitv, from_bytes};
use std::default::Default;
use std::hash::{Hash, Hasher};
use std::collections::HashMap;
use std::io::{IoError, IoResult, InvalidInput, OtherIoError, standard_error}; use std::io::{IoError, IoResult, InvalidInput, OtherIoError, standard_error};
use std::io::{BufferedReader, BufferedWriter, File, Truncate, Write}; use std::io::{BufferedReader, BufferedWriter, File, Truncate, Write};
use std::io::fs::rename; use std::io::fs::rename;
@ -418,6 +421,30 @@ impl<T: Serializable> Serializable for Vec<T> {
} }
} }
impl <K: Serializable+Eq+Hash<u64>, T: Serializable, H: Hasher<u64>+Default> Serializable for HashMap<K, T, H> {
fn serialize(&self) -> Vec<u8> {
let n_elems = u64_to_varint(self.len() as u64);
let mut rv = n_elems.serialize();
for (key, value) in self.iter() {
rv.extend(key.serialize().move_iter());
rv.extend(value.serialize().move_iter());
}
rv
}
fn deserialize<I: Iterator<u8>>(mut iter: I) -> IoResult<HashMap<K, T, H>> {
let mut n_elems = varint_to_u64(try!(Serializable::deserialize(iter.by_ref())));
let mut ret = HashMap::with_capacity_and_hasher(n_elems as uint, Default::default());
while n_elems > 0 {
let key: K = try!(Serializable::deserialize(iter.by_ref()));
let value: T = try!(Serializable::deserialize(iter.by_ref()));
ret.insert(key, value);
n_elems -= 1;
}
Ok(ret)
}
}
impl<T: Serializable> Serializable for ThinVec<T> { impl<T: Serializable> Serializable for ThinVec<T> {
fn serialize(&self) -> Vec<u8> { fn serialize(&self) -> Vec<u8> {
let n_elems = u64_to_varint(self.len() as u64); let n_elems = u64_to_varint(self.len() as u64);

View File

@ -18,11 +18,11 @@
use collections::bitv::{Bitv, from_bytes}; use collections::bitv::{Bitv, from_bytes};
use core::char::from_digit; use core::char::from_digit;
use core::cmp::min; use core::cmp::min;
use std::default::Default;
use std::fmt; use std::fmt;
use std::io::{IoResult, IoError, InvalidInput}; use std::io::{IoResult, IoError, InvalidInput};
use std::mem::transmute; use std::mem::transmute;
use std::hash::sip::SipState; use std::hash::{Hash, Hasher};
use std::hash::Hash;
use crypto::digest::Digest; use crypto::digest::Digest;
use crypto::sha2; use crypto::sha2;
@ -35,16 +35,51 @@ use util::uint::Uint256;
/// A Bitcoin hash, 32-bytes, computed from x as SHA256(SHA256(x)) /// A Bitcoin hash, 32-bytes, computed from x as SHA256(SHA256(x))
pub struct Sha256dHash([u8, ..32]); pub struct Sha256dHash([u8, ..32]);
/// Allow this to be used as a key for Rust's HashMap et. al. /// A "hasher" which just truncates
impl Hash for Sha256dHash { pub struct DumbHasher;
fn hash(&self, state: &mut SipState) {
let &Sha256dHash(ref data) = self; // Allow these to be used as a key for Rust's HashMap et. al.
for ch in data.iter() { impl Hash<u64> for Sha256dHash {
ch.hash(state); #[inline]
} fn hash(&self, state: &mut u64) {
use std::mem;
let myarr: [u64, ..4] = unsafe { mem::transmute(*self) };
*state = myarr[0];
} }
} }
impl Hash<u64> for Uint256 {
#[inline]
fn hash(&self, state: &mut u64) {
use std::mem;
let myarr: [u64, ..4] = unsafe { mem::transmute(*self) };
*state = myarr[0];
}
}
impl Hash<u64> for Uint128 {
#[inline]
fn hash(&self, state: &mut u64) {
use std::mem;
let myarr: [u64, ..2] = unsafe { mem::transmute(*self) };
*state = myarr[0];
}
}
impl Hasher<u64> for DumbHasher {
#[inline]
fn hash<T: Hash<u64>>(&self, value: &T) -> u64 {
let mut ret = 0u64;
value.hash(&mut ret);
ret
}
}
impl Default for DumbHasher {
#[inline]
fn default() -> DumbHasher { DumbHasher }
}
/// Returns the all-zeroes "hash" /// Returns the all-zeroes "hash"
pub fn zero_hash() -> Sha256dHash { Sha256dHash([0u8, ..32]) } pub fn zero_hash() -> Sha256dHash { Sha256dHash([0u8, ..32]) }
@ -118,6 +153,8 @@ impl PartialEq for Sha256dHash {
} }
} }
impl Eq for Sha256dHash {}
impl Serializable for Sha256dHash { impl Serializable for Sha256dHash {
fn serialize(&self) -> Vec<u8> { fn serialize(&self) -> Vec<u8> {
let &Sha256dHash(ref data) = self; let &Sha256dHash(ref data) = self;