Replace PatriciaTree with HashMap for UTXO set

We get a speed up (~5%) and memory savings (~10%) on initial sync from
using a HashMap, though it's hard to tell precisely how much savings
because it's quite nonlinear.

I haven't tested de/serialization. Some work needs to be done there to
split up the UTXO set since it takes forever to saveload.
This commit is contained in:
Andrew Poelstra 2014-07-20 16:52:00 -07:00
parent 54e4ea4586
commit 46969b3396
4 changed files with 86 additions and 30 deletions

View File

@ -18,6 +18,7 @@
//! index of UTXOs.
//!
use std::collections::HashMap;
use std::io::IoResult;
use std::mem;
@ -26,21 +27,16 @@ use blockdata::constants::genesis_block;
use blockdata::block::Block;
use network::constants::Network;
use network::serialize::{Serializable, SerializeIter};
use util::hash::Sha256dHash;
use util::hash::{DumbHasher, Sha256dHash};
use util::uint::Uint128;
use util::patricia_tree::PatriciaTree;
use util::thinvec::ThinVec;
/// How much of the hash to use as a key
static KEY_LEN: uint = 128;
/// Vector of outputs; None indicates a nonexistent or already spent output
type UtxoNode = ThinVec<Option<Box<TxOut>>>;
/// The UTXO set
pub struct UtxoSet {
// We use a 128-bit indexed tree to save memory
tree: PatriciaTree<UtxoNode, Uint128>,
table: HashMap<Uint128, UtxoNode, DumbHasher>,
last_hash: Sha256dHash,
// A circular buffer of deleted utxos, grouped by block
spent_txos: Vec<Vec<Box<TxOut>>>,
@ -49,7 +45,7 @@ pub struct UtxoSet {
n_utxos: u64
}
impl_serializable!(UtxoSet, last_hash, n_utxos, spent_txos, spent_idx, tree)
impl_serializable!(UtxoSet, last_hash, n_utxos, spent_txos, spent_idx, table)
impl UtxoSet {
/// Constructs a new UTXO set
@ -59,7 +55,7 @@ impl UtxoSet {
// must follow suit, otherwise we will accept a transaction spending it
// while the reference client won't, causing us to fork off the network.
UtxoSet {
tree: PatriciaTree::new(),
table: HashMap::with_hasher(DumbHasher),
last_hash: genesis_block(network).header.bitcoin_hash(),
spent_txos: Vec::from_elem(rewind_limit, vec![]),
spent_idx: 0,
@ -77,7 +73,7 @@ impl UtxoSet {
unsafe { new_node.init(vout as uint, Some(box txo.clone())); }
}
// TODO: insert/lookup should return a Result which we pass along
if self.tree.insert(&txid.as_uint128(), KEY_LEN, new_node) {
if self.table.insert(txid.as_uint128(), new_node) {
self.n_utxos += tx.output.len() as u64;
return true;
}
@ -89,7 +85,7 @@ impl UtxoSet {
// This whole function has awkward scoping thx to lexical borrow scoping :(
let (ret, should_delete) = {
// Locate the UTXO, failing if not found
let node = match self.tree.lookup_mut(&txid.as_uint128(), KEY_LEN) {
let node = match self.table.find_mut(&txid.as_uint128()) {
Some(node) => node,
None => return None
};
@ -107,7 +103,7 @@ impl UtxoSet {
// Delete the whole node if it is no longer being used
if should_delete {
self.tree.delete(&txid.as_uint128(), KEY_LEN);
self.table.remove(&txid.as_uint128());
}
self.n_utxos -= if ret.is_some() { 1 } else { 0 };
@ -117,7 +113,7 @@ impl UtxoSet {
/// Get a reference to a UTXO in the set
pub fn get_utxo<'a>(&'a mut self, txid: Sha256dHash, vout: u32) -> Option<&'a Box<TxOut>> {
// Locate the UTXO, failing if not found
let node = match self.tree.lookup_mut(&txid.as_uint128(), KEY_LEN) {
let node = match self.table.find_mut(&txid.as_uint128()) {
Some(node) => node,
None => return None
};
@ -169,7 +165,7 @@ impl UtxoSet {
if blockhash == "00000000000a4d0a398161ffc163c503763b1f4360639393e0e4c8e300e0caec".to_string() ||
blockhash == "00000000000743f190a18c5577a3c2d2a1f610ae9601ac046a38084ccb7cd721".to_string() {
// For these specific blocks, overwrite the old UTXOs.
self.tree.delete(&tx.bitcoin_hash().as_uint128(), KEY_LEN);
self.table.remove(&tx.bitcoin_hash().as_uint128());
self.add_utxos(tx);
} else {
// Otherwise fail the block
@ -215,7 +211,7 @@ impl UtxoSet {
for (txo, inp) in extract_vec.move_iter().zip(tx.input.iter()) {
// Remove the tx's utxo list and patch the txo into place
let new_node =
match self.tree.delete(&inp.prev_hash.as_uint128(), KEY_LEN) {
match self.table.pop(&inp.prev_hash.as_uint128()) {
Some(mut thinvec) => {
let old_len = thinvec.len() as u32;
if old_len < inp.prev_index + 1 {
@ -237,7 +233,7 @@ impl UtxoSet {
}
};
// Ram it back into the tree
self.tree.insert(&inp.prev_hash.as_uint128(), KEY_LEN, new_node);
self.table.insert(inp.prev_hash.as_uint128(), new_node);
}
}
skipped_genesis = true;
@ -259,11 +255,6 @@ impl UtxoSet {
pub fn n_utxos(&self) -> uint {
self.n_utxos as uint
}
/// Get the number of UTXOs in the set
pub fn tree_size(&self) -> uint {
self.tree.node_count()
}
}
#[cfg(test)]

View File

@ -32,6 +32,7 @@
#![feature(macro_rules)]
#![feature(overloaded_calls)]
#![feature(unsafe_destructor)]
#![feature(default_type_params)]
#![comment = "Rust Bitcoin Library"]
#![license = "CC0"]

View File

@ -21,6 +21,9 @@
use collections::Vec;
use collections::bitv::{Bitv, from_bytes};
use std::default::Default;
use std::hash::{Hash, Hasher};
use std::collections::HashMap;
use std::io::{IoError, IoResult, InvalidInput, OtherIoError, standard_error};
use std::io::{BufferedReader, BufferedWriter, File, Truncate, Write};
use std::io::fs::rename;
@ -418,6 +421,30 @@ impl<T: Serializable> Serializable for Vec<T> {
}
}
impl <K: Serializable+Eq+Hash<u64>, T: Serializable, H: Hasher<u64>+Default> Serializable for HashMap<K, T, H> {
fn serialize(&self) -> Vec<u8> {
let n_elems = u64_to_varint(self.len() as u64);
let mut rv = n_elems.serialize();
for (key, value) in self.iter() {
rv.extend(key.serialize().move_iter());
rv.extend(value.serialize().move_iter());
}
rv
}
fn deserialize<I: Iterator<u8>>(mut iter: I) -> IoResult<HashMap<K, T, H>> {
let mut n_elems = varint_to_u64(try!(Serializable::deserialize(iter.by_ref())));
let mut ret = HashMap::with_capacity_and_hasher(n_elems as uint, Default::default());
while n_elems > 0 {
let key: K = try!(Serializable::deserialize(iter.by_ref()));
let value: T = try!(Serializable::deserialize(iter.by_ref()));
ret.insert(key, value);
n_elems -= 1;
}
Ok(ret)
}
}
impl<T: Serializable> Serializable for ThinVec<T> {
fn serialize(&self) -> Vec<u8> {
let n_elems = u64_to_varint(self.len() as u64);

View File

@ -18,11 +18,11 @@
use collections::bitv::{Bitv, from_bytes};
use core::char::from_digit;
use core::cmp::min;
use std::default::Default;
use std::fmt;
use std::io::{IoResult, IoError, InvalidInput};
use std::mem::transmute;
use std::hash::sip::SipState;
use std::hash::Hash;
use std::hash::{Hash, Hasher};
use crypto::digest::Digest;
use crypto::sha2;
@ -35,16 +35,51 @@ use util::uint::Uint256;
/// A Bitcoin hash, 32-bytes, computed from x as SHA256(SHA256(x))
pub struct Sha256dHash([u8, ..32]);
/// Allow this to be used as a key for Rust's HashMap et. al.
impl Hash for Sha256dHash {
fn hash(&self, state: &mut SipState) {
let &Sha256dHash(ref data) = self;
for ch in data.iter() {
ch.hash(state);
/// A "hasher" which just truncates
pub struct DumbHasher;
// Allow these to be used as a key for Rust's HashMap et. al.
impl Hash<u64> for Sha256dHash {
#[inline]
fn hash(&self, state: &mut u64) {
use std::mem;
let myarr: [u64, ..4] = unsafe { mem::transmute(*self) };
*state = myarr[0];
}
}
impl Hash<u64> for Uint256 {
#[inline]
fn hash(&self, state: &mut u64) {
use std::mem;
let myarr: [u64, ..4] = unsafe { mem::transmute(*self) };
*state = myarr[0];
}
}
impl Hash<u64> for Uint128 {
#[inline]
fn hash(&self, state: &mut u64) {
use std::mem;
let myarr: [u64, ..2] = unsafe { mem::transmute(*self) };
*state = myarr[0];
}
}
impl Hasher<u64> for DumbHasher {
#[inline]
fn hash<T: Hash<u64>>(&self, value: &T) -> u64 {
let mut ret = 0u64;
value.hash(&mut ret);
ret
}
}
impl Default for DumbHasher {
#[inline]
fn default() -> DumbHasher { DumbHasher }
}
/// Returns the all-zeroes "hash"
pub fn zero_hash() -> Sha256dHash { Sha256dHash([0u8, ..32]) }
@ -118,6 +153,8 @@ impl PartialEq for Sha256dHash {
}
}
impl Eq for Sha256dHash {}
impl Serializable for Sha256dHash {
fn serialize(&self) -> Vec<u8> {
let &Sha256dHash(ref data) = self;