547 lines
19 KiB
Rust
547 lines
19 KiB
Rust
// Rust Bitcoin Library
|
|
// Written in 2014 by
|
|
// Andrew Poelstra <apoelstra@wpsoftware.net>
|
|
//
|
|
// To the extent possible under law, the author(s) have dedicated all
|
|
// copyright and related and neighboring rights to this software to
|
|
// the public domain worldwide. This software is distributed without
|
|
// any warranty.
|
|
//
|
|
// You should have received a copy of the CC0 Public Domain Dedication
|
|
// along with this software.
|
|
// If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
|
//
|
|
|
|
//! # Patricia/Radix Trie
|
|
//!
|
|
//! A Patricia trie is a trie in which nodes with only one child are
|
|
//! merged with the child, giving huge space savings for sparse tries.
|
|
//! A radix tree is more general, working with keys that are arbitrary
|
|
//! strings; a Patricia tree uses bitstrings.
|
|
//!
|
|
|
|
use core::fmt::Show;
|
|
use core::iter::ByRef;
|
|
use core::cmp;
|
|
use std::num::{Zero, One};
|
|
use std::io::{IoResult, InvalidInput, standard_error};
|
|
|
|
use network::serialize::{Serializable, SerializeIter};
|
|
use util::BitArray;
|
|
use util::misc::prepend_err;
|
|
|
|
/// Patricia troo
|
|
pub struct PatriciaTree<T, K> {
|
|
data: Option<T>,
|
|
child_l: Option<Box<PatriciaTree<T, K>>>,
|
|
child_r: Option<Box<PatriciaTree<T, K>>>,
|
|
skip_prefix: K,
|
|
skip_len: u8
|
|
}
|
|
|
|
impl<T, K:BitArray+Eq+Zero+One+BitXor<K,K>+Shl<uint,K>+Shr<uint,K>> PatriciaTree<T, K> {
|
|
/// Constructs a new Patricia tree
|
|
pub fn new() -> PatriciaTree<T, K> {
|
|
PatriciaTree {
|
|
data: None,
|
|
child_l: None,
|
|
child_r: None,
|
|
skip_prefix: Zero::zero(),
|
|
skip_len: 0
|
|
}
|
|
}
|
|
|
|
/// Lookup a value by exactly matching `key` and return a referenc
|
|
pub fn lookup_mut<'a>(&'a mut self, key: &K, key_len: uint) -> Option<&'a mut T> {
|
|
// Caution: `lookup_mut` never modifies its self parameter (in fact its
|
|
// internal recursion uses a non-mutable self, so we are OK to just
|
|
// transmute our self pointer into a mutable self before passing it in.
|
|
use std::mem::transmute;
|
|
unsafe { transmute(self.lookup(key, key_len)) }
|
|
}
|
|
|
|
/// Lookup a value by exactly matching `key` and return a mutable reference
|
|
pub fn lookup<'a>(&'a self, key: &K, key_len: uint) -> Option<&'a T> {
|
|
let mut node = self;
|
|
let mut key_idx = 0;
|
|
|
|
loop {
|
|
// If the search key is shorter than the node prefix, there is no
|
|
// way we can match, so fail.
|
|
if key_len - key_idx < node.skip_len as uint {
|
|
return None;
|
|
}
|
|
|
|
// Key fails to match prefix --- no match
|
|
if node.skip_prefix != key.bit_slice(key_idx, key_idx + node.skip_len as uint) {
|
|
return None;
|
|
}
|
|
|
|
// Key matches prefix: if they are an exact match, return the data
|
|
if node.skip_len as uint == key_len - key_idx {
|
|
return node.data.as_ref();
|
|
} else {
|
|
// Key matches prefix: search key longer than node key, recurse
|
|
key_idx += 1 + node.skip_len as uint;
|
|
let subtree = if key.bit(key_idx - 1) { &node.child_r } else { &node.child_l };
|
|
match subtree {
|
|
&Some(ref bx) => {
|
|
node = &**bx; // bx is a &Box<U> here, so &**bx gets &U
|
|
}
|
|
&None => { return None; }
|
|
}
|
|
}
|
|
} // end loop
|
|
}
|
|
|
|
/// Inserts a value with key `key`, returning true on success. If a value is already
|
|
/// stored against `key`, do nothing and return false.
|
|
#[inline]
|
|
pub fn insert(&mut self, key: &K, key_len: uint, value: T) -> bool {
|
|
self.real_insert(key, key_len, value, false)
|
|
}
|
|
|
|
/// Inserts a value with key `key`, returning true on success. If a value is already
|
|
/// stored against `key`, overwrite it and return false.
|
|
#[inline]
|
|
pub fn insert_or_update(&mut self, key: &K, key_len: uint, value: T) -> bool {
|
|
self.real_insert(key, key_len, value, true)
|
|
}
|
|
|
|
fn real_insert(&mut self, key: &K, key_len: uint, value: T, overwrite: bool) -> bool {
|
|
let mut node = self;
|
|
let mut idx = 0;
|
|
loop {
|
|
// Mask in case search key is shorter than node key
|
|
let slice_len = cmp::min(node.skip_len as uint, key_len - idx);
|
|
let masked_prefix = node.skip_prefix.mask(slice_len);
|
|
let key_slice = key.bit_slice(idx, idx + slice_len);
|
|
|
|
// Prefixes do not match: split key
|
|
if masked_prefix != key_slice {
|
|
let diff = (masked_prefix ^ key_slice).trailing_zeros();
|
|
|
|
// Remove the old node's children
|
|
let child_l = node.child_l.take();
|
|
let child_r = node.child_r.take();
|
|
let value_neighbor = node.data.take();
|
|
let tmp = node; // borrowck hack
|
|
let (insert, neighbor) = if key_slice.bit(diff)
|
|
{ (&mut tmp.child_r, &mut tmp.child_l) }
|
|
else { (&mut tmp.child_l, &mut tmp.child_r) };
|
|
*insert = Some(box PatriciaTree {
|
|
data: None,
|
|
child_l: None,
|
|
child_r: None,
|
|
skip_prefix: key.bit_slice(idx + diff + 1, key_len),
|
|
skip_len: (key_len - idx - diff - 1) as u8
|
|
});
|
|
*neighbor = Some(box PatriciaTree {
|
|
data: value_neighbor,
|
|
child_l: child_l,
|
|
child_r: child_r,
|
|
skip_prefix: tmp.skip_prefix >> (diff + 1),
|
|
skip_len: tmp.skip_len - diff as u8 - 1
|
|
});
|
|
// Chop the prefix down
|
|
tmp.skip_len = diff as u8;
|
|
tmp.skip_prefix = tmp.skip_prefix.mask(diff);
|
|
// Recurse
|
|
idx += 1 + diff;
|
|
node = &mut **insert.get_mut_ref();
|
|
}
|
|
// Prefixes match
|
|
else {
|
|
let slice_len = key_len - idx;
|
|
// Search key is shorter than skip prefix: truncate the prefix and attach
|
|
// the old data as a child
|
|
if node.skip_len as uint > slice_len {
|
|
// Remove the old node's children
|
|
let child_l = node.child_l.take();
|
|
let child_r = node.child_r.take();
|
|
let value_neighbor = node.data.take();
|
|
// Put the old data in a new child, with the remainder of the prefix
|
|
let new_child = if node.skip_prefix.bit(slice_len)
|
|
{ &mut node.child_r } else { &mut node.child_l };
|
|
*new_child = Some(box PatriciaTree {
|
|
data: value_neighbor,
|
|
child_l: child_l,
|
|
child_r: child_r,
|
|
skip_prefix: node.skip_prefix >> (slice_len + 1),
|
|
skip_len: node.skip_len - slice_len as u8 - 1
|
|
});
|
|
// Chop the prefix down and put the new data in place
|
|
node.skip_len = slice_len as u8;
|
|
node.skip_prefix = key_slice;
|
|
node.data = Some(value);
|
|
return true;
|
|
}
|
|
// If we have an exact match, great, insert it
|
|
else if node.skip_len as uint == slice_len {
|
|
if node.data.is_none() {
|
|
node.data = Some(value);
|
|
return true;
|
|
}
|
|
if overwrite {
|
|
node.data = Some(value);
|
|
}
|
|
return false;
|
|
}
|
|
// Search key longer than node key, recurse
|
|
else {
|
|
let tmp = node; // hack to appease borrowck
|
|
idx += tmp.skip_len as uint + 1;
|
|
let subtree = if key.bit(idx - 1)
|
|
{ &mut tmp.child_r } else { &mut tmp.child_l };
|
|
// Recurse, adding a new node if necessary
|
|
if subtree.is_none() {
|
|
*subtree = Some(box PatriciaTree {
|
|
data: None,
|
|
child_l: None,
|
|
child_r: None,
|
|
skip_prefix: key.bit_slice(idx, key_len),
|
|
skip_len: key_len as u8 - idx as u8
|
|
});
|
|
}
|
|
// subtree.get_mut_ref is a &mut Box<U> here, so &mut ** gets a &mut U
|
|
node = &mut **subtree.get_mut_ref();
|
|
} // end search_len vs prefix len
|
|
} // end if prefixes match
|
|
} // end loop
|
|
}
|
|
|
|
/// Deletes a value with key `key`, returning it on success. If no value with
|
|
/// the given key is found, return None
|
|
pub fn delete(&mut self, key: &K, key_len: uint) -> Option<T> {
|
|
/// Return value is (deletable, actual return value), where `deletable` is true
|
|
/// is true when the entire node can be deleted (i.e. it has no children)
|
|
fn recurse<T, K:BitArray+Eq+Zero+One+Add<K,K>+Shr<uint,K>+Shl<uint,K>>(tree: &mut PatriciaTree<T, K>, key: &K, key_len: uint) -> (bool, Option<T>) {
|
|
// If the search key is shorter than the node prefix, there is no
|
|
// way we can match, so fail.
|
|
if key_len < tree.skip_len as uint {
|
|
return (false, None);
|
|
}
|
|
|
|
// Key fails to match prefix --- no match
|
|
if tree.skip_prefix != key.mask(tree.skip_len as uint) {
|
|
return (false, None);
|
|
}
|
|
|
|
// If we are here, the key matches the prefix
|
|
if tree.skip_len as uint == key_len {
|
|
// Exact match -- delete and return
|
|
let ret = tree.data.take();
|
|
let bit = tree.child_r.is_some();
|
|
// First try to consolidate if there is only one child
|
|
if tree.child_l.is_some() && tree.child_r.is_some() {
|
|
// Two children means we cannot consolidate or delete
|
|
return (false, ret);
|
|
}
|
|
match (tree.child_l.take(), tree.child_r.take()) {
|
|
(Some(_), Some(_)) => unreachable!(),
|
|
(Some(consolidate), None) | (None, Some(consolidate)) => {
|
|
tree.data = consolidate.data;
|
|
tree.child_l = consolidate.child_l;
|
|
tree.child_r = consolidate.child_r;
|
|
let new_bit = if bit { let ret: K = One::one();
|
|
ret << (tree.skip_len as uint) }
|
|
else { Zero::zero() };
|
|
tree.skip_prefix = tree.skip_prefix +
|
|
new_bit +
|
|
(consolidate.skip_prefix << (1 + tree.skip_len as uint));
|
|
tree.skip_len += 1 + consolidate.skip_len;
|
|
return (false, ret);
|
|
}
|
|
// No children means this node is deletable
|
|
(None, None) => { return (true, ret); }
|
|
}
|
|
}
|
|
|
|
// Otherwise, the key is longer than the prefix and we need to recurse
|
|
let next_bit = key.bit(tree.skip_len as uint);
|
|
// Recursively get the return value. This awkward scope is required
|
|
// to shorten the time we mutably borrow the node's children -- we
|
|
// might want to borrow the sibling later, so the borrow needs to end.
|
|
let ret = {
|
|
let target = if next_bit { &mut tree.child_r } else { &mut tree.child_l };
|
|
|
|
// If we can't recurse, fail
|
|
if target.is_none() {
|
|
return (false, None);
|
|
}
|
|
// Otherwise, do it
|
|
let (delete_child, ret) = recurse(&mut **target.get_mut_ref(),
|
|
&key.shr(&(tree.skip_len as uint + 1)),
|
|
key_len - tree.skip_len as uint - 1);
|
|
if delete_child {
|
|
target.take();
|
|
}
|
|
ret
|
|
};
|
|
|
|
// The above block may have deleted the target. If we now have only one
|
|
// child, merge it into the parent. (If we have no children, mark this
|
|
// node for deletion.)
|
|
if tree.data.is_some() {
|
|
// First though, if this is a data node, we can neither delete nor
|
|
// consolidate it.
|
|
return (false, ret);
|
|
}
|
|
|
|
match (tree.child_r.is_some(), tree.child_l.take(), tree.child_r.take()) {
|
|
// Two children? Can't do anything, just sheepishly put them back
|
|
(_, Some(child_l), Some(child_r)) => {
|
|
tree.child_l = Some(child_l);
|
|
tree.child_r = Some(child_r);
|
|
return (false, ret);
|
|
}
|
|
// One child? Consolidate
|
|
(bit, Some(consolidate), None) | (bit, None, Some(consolidate)) => {
|
|
tree.data = consolidate.data;
|
|
tree.child_l = consolidate.child_l;
|
|
tree.child_r = consolidate.child_r;
|
|
let new_bit = if bit { let ret: K = One::one();
|
|
ret << (tree.skip_len as uint) }
|
|
else { Zero::zero() };
|
|
tree.skip_prefix = tree.skip_prefix +
|
|
new_bit +
|
|
(consolidate.skip_prefix << (1 + tree.skip_len as uint));
|
|
tree.skip_len += 1 + consolidate.skip_len;
|
|
return (false, ret);
|
|
}
|
|
// No children? Delete
|
|
(_, None, None) => {
|
|
return (true, ret);
|
|
}
|
|
}
|
|
}
|
|
let (_, ret) = recurse(self, key, key_len);
|
|
ret
|
|
}
|
|
|
|
/// Count all the nodes
|
|
pub fn node_count(&self) -> uint {
|
|
fn recurse<T, K>(node: &Option<Box<PatriciaTree<T, K>>>) -> uint {
|
|
match node {
|
|
&Some(ref node) => { 1 + recurse(&node.child_l) + recurse(&node.child_r) }
|
|
&None => 0
|
|
}
|
|
}
|
|
1 + recurse(&self.child_l) + recurse(&self.child_r)
|
|
}
|
|
}
|
|
|
|
impl<T:Show, K:BitArray> PatriciaTree<T, K> {
|
|
/// Print the entire tree
|
|
pub fn print<'a>(&'a self) {
|
|
fn recurse<'a, T:Show, K:BitArray>(tree: &'a PatriciaTree<T, K>, depth: uint) {
|
|
for i in range(0, tree.skip_len as uint) {
|
|
print!("{:}", if tree.skip_prefix.bit(i) { 1u } else { 0 });
|
|
}
|
|
println!(": {:}", tree.data);
|
|
// left gets no indentation
|
|
match tree.child_l {
|
|
Some(ref t) => {
|
|
for _ in range(0, depth + tree.skip_len as uint) {
|
|
print!("-");
|
|
}
|
|
print!("0");
|
|
recurse(&**t, depth + tree.skip_len as uint + 1);
|
|
}
|
|
None => { }
|
|
}
|
|
// right one gets indentation
|
|
match tree.child_r {
|
|
Some(ref t) => {
|
|
for _ in range(0, depth + tree.skip_len as uint) {
|
|
print!("_");
|
|
}
|
|
print!("1");
|
|
recurse(&**t, depth + tree.skip_len as uint + 1);
|
|
}
|
|
None => { }
|
|
}
|
|
}
|
|
recurse(self, 0);
|
|
}
|
|
}
|
|
|
|
impl<T:Serializable+'static, K:BitArray+Serializable+'static> Serializable for PatriciaTree<T, K> {
|
|
fn serialize(&self) -> Vec<u8> {
|
|
// Depth-first serialization
|
|
let mut ret = vec![];
|
|
// Serialize self, then children
|
|
ret.extend(self.skip_prefix.serialize().move_iter());
|
|
ret.extend(self.skip_len.serialize().move_iter());
|
|
ret.extend(self.data.serialize().move_iter());
|
|
ret.extend(self.child_l.serialize().move_iter());
|
|
ret.extend(self.child_r.serialize().move_iter());
|
|
ret
|
|
}
|
|
|
|
fn serialize_iter<'a>(&'a self) -> SerializeIter<'a> {
|
|
SerializeIter {
|
|
data_iter: None,
|
|
sub_iter_iter: box vec![ &self.skip_prefix as &Serializable,
|
|
&self.skip_len as &Serializable,
|
|
&self.data as &Serializable,
|
|
&self.child_l as &Serializable,
|
|
&self.child_r as &Serializable ].move_iter(),
|
|
sub_iter: None,
|
|
sub_started: false
|
|
}
|
|
}
|
|
|
|
fn deserialize<I: Iterator<u8>>(mut iter: I) -> IoResult<PatriciaTree<T, K>> {
|
|
// This goofy deserialization routine is to prevent an infinite
|
|
// regress of ByRef<ByRef<...<ByRef<I>>...>>, see #15188
|
|
fn recurse<T:Serializable, K:Serializable, I: Iterator<u8>>(iter: &mut ByRef<I>) -> IoResult<PatriciaTree<T, K>> {
|
|
Ok(PatriciaTree {
|
|
skip_prefix: try!(prepend_err("skip_prefix", Serializable::deserialize(iter.by_ref()))),
|
|
skip_len: try!(prepend_err("skip_len", Serializable::deserialize(iter.by_ref()))),
|
|
data: try!(prepend_err("data", Serializable::deserialize(iter.by_ref()))),
|
|
child_l: match iter.next() {
|
|
Some(1) => Some(box try!(prepend_err("child_l", recurse(iter)))),
|
|
Some(0) => None,
|
|
_ => { return Err(standard_error(InvalidInput)) }
|
|
},
|
|
child_r: match iter.next() {
|
|
Some(1) => Some(box try!(prepend_err("child_r", recurse(iter)))),
|
|
Some(0) => None,
|
|
_ => { return Err(standard_error(InvalidInput)) }
|
|
}
|
|
})
|
|
}
|
|
recurse(&mut iter.by_ref())
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use std::prelude::*;
|
|
use std::io::IoResult;
|
|
use std::num::Zero;
|
|
|
|
use util::hash::Sha256dHash;
|
|
use util::uint::Uint128;
|
|
use util::uint::Uint256;
|
|
use util::patricia_tree::PatriciaTree;
|
|
use network::serialize::Serializable;
|
|
|
|
#[test]
|
|
fn patricia_single_insert_lookup_delete_test() {
|
|
let mut key: Uint256 = FromPrimitive::from_u64(0xDEADBEEFDEADBEEF).unwrap();
|
|
key = key + (key << 64);
|
|
|
|
let mut tree = PatriciaTree::new();
|
|
tree.insert(&key, 100, 100u32);
|
|
tree.insert(&key, 120, 100u32);
|
|
|
|
assert_eq!(tree.lookup(&key, 100), Some(&100u32));
|
|
assert_eq!(tree.lookup(&key, 101), None);
|
|
assert_eq!(tree.lookup(&key, 99), None);
|
|
assert_eq!(tree.delete(&key, 100), Some(100u32));
|
|
}
|
|
|
|
#[test]
|
|
fn patricia_insert_lookup_delete_test() {
|
|
let mut tree = PatriciaTree::new();
|
|
let mut hashes = vec![];
|
|
for i in range(0u32, 5000) {
|
|
let hash = Sha256dHash::from_data(&[(i / 0x100) as u8, (i % 0x100) as u8]).as_uint128();
|
|
tree.insert(&hash, 250, i);
|
|
hashes.push(hash);
|
|
}
|
|
|
|
// Check that all inserts are correct
|
|
for (n, hash) in hashes.iter().enumerate() {
|
|
let ii = n as u32;
|
|
let ret = tree.lookup(hash, 250);
|
|
assert_eq!(ret, Some(&ii));
|
|
}
|
|
|
|
// Delete all the odd-numbered nodes
|
|
for (n, hash) in hashes.iter().enumerate() {
|
|
if n % 2 == 1 {
|
|
let ii = n as u32;
|
|
let ret = tree.delete(hash, 250);
|
|
assert_eq!(ret, Some(ii));
|
|
}
|
|
}
|
|
|
|
// Confirm all is correct
|
|
for (n, hash) in hashes.iter().enumerate() {
|
|
let ii = n as u32;
|
|
let ret = tree.lookup(hash, 250);
|
|
if n % 2 == 0 {
|
|
assert_eq!(ret, Some(&ii));
|
|
} else {
|
|
assert_eq!(ret, None);
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn patricia_insert_substring_keys() {
|
|
// This test uses a bunch of keys that are substrings of each other
|
|
// to make sure insertion and deletion does not lose data
|
|
let mut tree = PatriciaTree::new();
|
|
let mut hashes = vec![];
|
|
// Start by inserting a bunch of chunder
|
|
for i in range(1u32, 500) {
|
|
let hash = Sha256dHash::from_data(&[(i / 0x100) as u8, (i % 0x100) as u8]).as_uint128();
|
|
tree.insert(&hash, 128, i * 1000);
|
|
hashes.push(hash);
|
|
}
|
|
// Do the actual test -- note that we also test insertion and deletion
|
|
// at the root here.
|
|
for i in range(0u32, 10) {
|
|
tree.insert(&Zero::zero(), i as uint, i);
|
|
}
|
|
for i in range(0u32, 10) {
|
|
let m = tree.lookup(&Zero::zero(), i as uint);
|
|
assert_eq!(m, Some(&i));
|
|
}
|
|
for i in range(0u32, 10) {
|
|
let m = tree.delete(&Zero::zero(), i as uint);
|
|
assert_eq!(m, Some(i));
|
|
}
|
|
// Check that the chunder was unharmed
|
|
for (n, hash) in hashes.iter().enumerate() {
|
|
let ii = ((n + 1) * 1000) as u32;
|
|
let ret = tree.lookup(hash, 128);
|
|
assert_eq!(ret, Some(&ii));
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn patricia_serialize_test() {
|
|
// Build a tree
|
|
let mut tree = PatriciaTree::new();
|
|
let mut hashes = vec![];
|
|
for i in range(0u32, 5000) {
|
|
let hash = Sha256dHash::from_data(&[(i / 0x100) as u8, (i % 0x100) as u8]).as_uint128();
|
|
tree.insert(&hash, 250, i);
|
|
hashes.push(hash);
|
|
}
|
|
|
|
// Serialize it
|
|
let serialized = tree.serialize();
|
|
// Check iterator
|
|
let serialized_1 = tree.serialize_iter().collect();
|
|
assert_eq!(serialized, serialized_1);
|
|
// Deserialize it
|
|
let deserialized: IoResult<PatriciaTree<u32, Uint128>> = Serializable::deserialize(serialized.iter().map(|n| *n));
|
|
assert!(deserialized.is_ok());
|
|
let new_tree = deserialized.unwrap();
|
|
|
|
// Check that all inserts are still there
|
|
for (n, hash) in hashes.iter().enumerate() {
|
|
let ii = n as u32;
|
|
let ret = new_tree.lookup(hash, 250);
|
|
assert_eq!(ret, Some(&ii));
|
|
}
|
|
}
|
|
}
|
|
|