merkle_node: rewrite algorithm

Drop recursion, reduce memory usage to be logarithmic in size of tree
rather than linear, and put it all in one function rather than three.

Also make the method an trait method on MerkleNode which makes it a
easier on type inference, by writing e.g. TxMerkleNode::calculate_root.
This commit is contained in:
Andrew Poelstra 2024-06-16 14:59:49 +00:00
parent 2bc97b22e2
commit f7ce9bbee8
No known key found for this signature in database
GPG Key ID: C588D63CE41B97C1
3 changed files with 59 additions and 92 deletions

View File

@ -17,7 +17,7 @@ use crate::blockdata::script;
use crate::blockdata::transaction::{Transaction, Wtxid};
use crate::consensus::{encode, Decodable, Encodable, Params};
use crate::internal_macros::{impl_consensus_encoding, impl_hashencode};
use crate::merkle_tree::{self, TxMerkleNode, WitnessMerkleNode};
use crate::merkle_tree::{MerkleNode as _, TxMerkleNode, WitnessMerkleNode};
use crate::pow::{CompactTarget, Target, Work};
use crate::prelude::*;
use crate::VarInt;
@ -281,7 +281,7 @@ impl Block {
/// Computes the transaction merkle root.
pub fn compute_merkle_root(&self) -> Option<TxMerkleNode> {
let hashes = self.txdata.iter().map(|obj| obj.compute_txid());
merkle_tree::calculate_root(hashes)
TxMerkleNode::calculate_root(hashes)
}
/// Computes the witness commitment for the block's transaction list.
@ -305,7 +305,7 @@ impl Block {
t.compute_wtxid()
}
});
merkle_tree::calculate_root(hashes)
WitnessMerkleNode::calculate_root(hashes)
}
/// Returns the weight of the block.

View File

@ -515,7 +515,7 @@ impl std::error::Error for MerkleBlockError {
#[cfg(test)]
mod tests {
#[cfg(feature = "rand-std")]
use {crate::merkle_tree, core::cmp, secp256k1::rand::prelude::*};
use {crate::merkle_tree::MerkleNode as _, core::cmp, secp256k1::rand::prelude::*};
use super::*;
use crate::consensus::encode;
@ -564,8 +564,7 @@ mod tests {
// Calculate the merkle root and height
let hashes = tx_ids.iter().copied();
let merkle_root_1: TxMerkleNode =
merkle_tree::calculate_root(hashes).expect("hashes is not empty");
let merkle_root_1 = TxMerkleNode::calculate_root(hashes).expect("hashes is not empty");
let mut height = 1;
let mut ntx = tx_count;
while ntx > 1 {

View File

@ -5,20 +5,17 @@
//! # Examples
//!
//! ```
//! # use bitcoin::{merkle_tree, Txid};
//! # use bitcoin::merkle_tree::TxMerkleNode;
//! # use bitcoin::Txid;
//! # use bitcoin::merkle_tree::{MerkleNode as _, TxMerkleNode};
//! # use bitcoin::hashes::Hash;
//! # let tx1 = Txid::all_zeros(); // Dummy hash values.
//! # let tx2 = Txid::all_zeros();
//! let tx_hashes = vec![tx1, tx2]; // All the hashes we wish to merkelize.
//! let root: Option<TxMerkleNode> = merkle_tree::calculate_root(tx_hashes.into_iter());
//! let root = TxMerkleNode::calculate_root(tx_hashes.into_iter());
//! ```
mod block;
use core::cmp::min;
use core::iter;
use hashes::{sha256d, HashEngine as _};
use crate::internal_macros::impl_hashencode;
@ -39,6 +36,14 @@ impl_hashencode!(TxMerkleNode);
impl_hashencode!(WitnessMerkleNode);
/// A node in a Merkle tree of transactions or witness data within a block.
///
/// This trait is used to compute the transaction Merkle root contained in
/// a block header. This is a particularly weird algorithm -- it interprets
/// the list of transactions as a balanced binary tree, duplicating branches
/// as needed to fill out the tree to a power of two size.
///
/// Other Merkle trees in Bitcoin, such as those used in Taproot commitments,
/// do not use this algorithm and cannot use this trait.
pub trait MerkleNode: Copy {
/// The hash (TXID or WTXID) of a transaciton in the tree.
type Leaf;
@ -47,6 +52,43 @@ pub trait MerkleNode: Copy {
fn from_leaf(leaf: Self::Leaf) -> Self;
/// Combine two nodes to get a single node. The final node of a tree is called the "root".
fn combine(&self, other: &Self) -> Self;
/// Given an iterator of leaves, compute the Merkle root.
///
/// Returns `None` iff the iterator was empty.
fn calculate_root<I: Iterator<Item = Self::Leaf>>(iter: I) -> Option<Self> {
let mut stack = Vec::<(usize, Self)>::with_capacity(32);
// Start with a standard Merkle tree root computation...
for (mut n, leaf) in iter.enumerate() {
stack.push((0, Self::from_leaf(leaf)));
while n & 1 == 1 {
let right = stack.pop().unwrap();
let left = stack.pop().unwrap();
debug_assert_eq!(left.0, right.0);
stack.push((left.0 + 1, left.1.combine(&right.1)));
n >>= 1;
}
}
// ...then, deal with incomplete trees. Bitcoin does a weird thing in
// which it doubles-up nodes of the tree to fill out the tree, rather
// than treating incomplete branches specially. This, along with its
// conflation of leaves with leaf hashes, makes its Merkle tree
// construction theoretically (though probably not practically)
// vulnerable to collisions. This is consensus logic so we just have
// to accept it.
while stack.len() > 1 {
let mut right = stack.pop().unwrap();
let left = stack.pop().unwrap();
while right.0 != left.0 {
assert!(right.0 < left.0);
right = (right.0 + 1, right.1.combine(&right.1)); // combine with self
}
stack.push((left.0 + 1, left.1.combine(&right.1)));
}
stack.pop().map(|(_, h)| h)
}
}
// These two impl blocks are identical. FIXME once we have nailed down
@ -76,75 +118,6 @@ impl MerkleNode for WitnessMerkleNode {
}
}
/// Calculates the merkle root of a list of *hashes*, inline (in place) in `hashes`.
///
/// In most cases, you'll want to use [`calculate_root`] instead. Please note, calling this function
/// trashes the data in `hashes` (i.e. the `hashes` is left in an undefined state at conclusion of
/// this method and should not be used again afterwards).
///
/// # Returns
///
/// - `None` if `hashes` is empty. The merkle root of an empty tree of hashes is undefined.
/// - `Some(hash)` if `hashes` contains one element. A single hash is by definition the merkle root.
/// - `Some(merkle_root)` if length of `hashes` is greater than one.
pub fn calculate_root_inline<T: MerkleNode>(hashes: &mut [T]) -> Option<T> {
match hashes.len() {
0 => None,
1 => Some(hashes[0]),
_ => Some(merkle_root_r(hashes)),
}
}
/// Calculates the merkle root of an iterator of *hashes*.
///
/// # Returns
///
/// - `None` if `hashes` is empty. The merkle root of an empty tree of hashes is undefined.
/// - `Some(hash)` if `hashes` contains one element. A single hash is by definition the merkle root.
/// - `Some(merkle_root)` if length of `hashes` is greater than one.
pub fn calculate_root<T, I>(mut hashes: I) -> Option<T>
where
T: MerkleNode,
I: Iterator<Item = T::Leaf>,
{
let first: T::Leaf = hashes.next()?;
let second = match hashes.next() {
Some(second) => second,
None => return Some(T::from_leaf(first)),
};
let mut hashes = iter::once(first).chain(iter::once(second)).chain(hashes);
// We need a local copy to pass to `merkle_root_r`. It's more efficient to do the first loop of
// processing as we make the copy instead of copying the whole iterator.
let (min, max) = hashes.size_hint();
let mut alloc = Vec::with_capacity(max.unwrap_or(min) / 2 + 1);
while let Some(hash1) = hashes.next().map(T::from_leaf) {
// If the size is odd, use the last element twice.
let hash2 = hashes.next().map(T::from_leaf).unwrap_or(hash1);
alloc.push(hash1.combine(&hash2));
}
Some(merkle_root_r(&mut alloc))
}
// `hashes` must contain at least one hash.
fn merkle_root_r<T: MerkleNode>(hashes: &mut [T]) -> T {
if hashes.len() == 1 {
return hashes[0];
}
for idx in 0..((hashes.len() + 1) / 2) {
let idx1 = 2 * idx;
let idx2 = min(idx1 + 1, hashes.len() - 1);
hashes[idx] = hashes[idx1].combine(&hashes[idx2]);
}
let half_len = hashes.len() / 2 + hashes.len() % 2;
merkle_root_r(&mut hashes[0..half_len])
}
#[cfg(test)]
mod tests {
use super::*;
@ -152,21 +125,16 @@ mod tests {
use crate::consensus::encode::deserialize;
#[test]
fn both_merkle_root_functions_return_the_same_result() {
fn static_vector() {
// testnet block 000000000000045e0b1660b6445b5e5c5ab63c9a4f956be7e1e69be04fa4497b
let segwit_block = include_bytes!("../../tests/data/testnet_block_000000000000045e0b1660b6445b5e5c5ab63c9a4f956be7e1e69be04fa4497b.raw");
let block: Block = deserialize(&segwit_block[..]).expect("Failed to deserialize block");
assert!(block.check_merkle_root()); // Sanity check.
assert!(block.check_merkle_root());
// Same as `block.check_merkle_root` but do it explicitly.
let hashes_iter = block.txdata.iter().map(|obj| obj.compute_txid());
let mut hashes_array = [TxMerkleNode::all_zeros(); 15];
for (i, hash) in hashes_iter.clone().enumerate() {
hashes_array[i] = TxMerkleNode::from_leaf(hash);
}
let from_iter = calculate_root(hashes_iter);
let from_array = calculate_root_inline(&mut hashes_array);
assert_eq!(from_iter, from_array);
let from_iter = TxMerkleNode::calculate_root(hashes_iter.clone());
assert_eq!(from_iter, Some(block.header.merkle_root));
}
}