From a9108d3939680695b530eb59abbbd84168ff2087 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Mon, 30 Jan 2023 13:05:11 +1100 Subject: [PATCH] Refactor script module The `script` module is large and unwieldy. Refactor the `script` module, splitting it up into a tree of modules. Here are a few of the changes and their stated benefits - Split the two script types out into separate files: Readers of the methods can then tell immediately from the file name which type they are reading. - Put all the impls for the two script types together: Makes parsing the API easier because one can more quickly see which traits are implemented on what i.e., all the `AsRef` imlps are grouped together. - Put the impls for the two script types in order, first `Script` then `ScriptBuf`: Makes it easier for us to see if we missed something. - Put the `Builder` and `Instruction` (and associated) types in their own modules: Some devs find long files hard to navigate, so far there hasn't been too much push back against short files. - Put tests in a separate file: This idea was recently discussed. This is only moving code and fixing import statements etc. No other changes to the code. --- bitcoin/src/blockdata/script.rs | 2707 ----------------- bitcoin/src/blockdata/script/builder.rs | 149 + bitcoin/src/blockdata/script/instruction.rs | 228 ++ bitcoin/src/blockdata/script/mod.rs | 328 ++ bitcoin/src/blockdata/script/tests.rs | 660 ++++ .../src/blockdata/script/types/borrowed.rs | 512 ++++ bitcoin/src/blockdata/script/types/mod.rs | 527 ++++ bitcoin/src/blockdata/script/types/owned.rs | 373 +++ 8 files changed, 2777 insertions(+), 2707 deletions(-) delete mode 100644 bitcoin/src/blockdata/script.rs create mode 100644 bitcoin/src/blockdata/script/builder.rs create mode 100644 bitcoin/src/blockdata/script/instruction.rs create mode 100644 bitcoin/src/blockdata/script/mod.rs create mode 100644 bitcoin/src/blockdata/script/tests.rs create mode 100644 bitcoin/src/blockdata/script/types/borrowed.rs create mode 100644 bitcoin/src/blockdata/script/types/mod.rs create mode 100644 bitcoin/src/blockdata/script/types/owned.rs diff --git a/bitcoin/src/blockdata/script.rs b/bitcoin/src/blockdata/script.rs deleted file mode 100644 index cef569bb..00000000 --- a/bitcoin/src/blockdata/script.rs +++ /dev/null @@ -1,2707 +0,0 @@ -// Written in 2014 by Andrew Poelstra -// SPDX-License-Identifier: CC0-1.0 - -//! Bitcoin scripts. -//! -//! *[See also the `Script` type](Script).* -//! -//! This module provides the structures and functions needed to support scripts. -//! -//!
-//! What is Bitcoin script -//! -//! Scripts define Bitcoin's digital signature scheme: a signature is formed -//! from a script (the second half of which is defined by a coin to be spent, -//! and the first half provided by the spending transaction), and is valid iff -//! the script leaves `TRUE` on the stack after being evaluated. Bitcoin's -//! script is a stack-based assembly language similar in spirit to [Forth]. -//! -//! Script is represented as a sequence of bytes on the wire, each byte representing an operation, -//! or data to be pushed on the stack. -//! -//! See [Bitcoin Wiki: Script][wiki-script] for more information. -//! -//! [Forth]: https://en.wikipedia.org/wiki/Forth_(programming_language) -//! -//! [wiki-script]: https://en.bitcoin.it/wiki/Script -//!
-//! -//! In this library we chose to keep the byte representation in memory and decode opcodes only when -//! processing the script. This is similar to Rust choosing to represent strings as UTF-8-encoded -//! bytes rather than slice of `char`s. In both cases the individual items can have different sizes -//! and forcing them to be larger would waste memory and, in case of Bitcoin script, even some -//! performance (forcing allocations). -//! -//! ## `Script` vs `ScriptBuf` vs `Builder` -//! -//! These are the most important types in this module and they are quite similar, so it may seem -//! confusing what the differences are. `Script` is an unsized type much like `str` or `Path` are -//! and `ScriptBuf` is an owned counterpart to `Script` just like `String` is an owned counterpart -//! to `str`. -//! -//! However it is common to construct an owned script and then pass it around. For this case a -//! builder API is more convenient. To support this we provide `Builder` type which is very similar -//! to `ScriptBuf` but its methods take `self` instead of `&mut self` and return `Self`. It also -//! contains a cache that may make some modifications faster. This cache is usually not needed -//! outside of creating the script. -//! -//! At the time of writing there's only one operation using the cache - `push_verify`, so the cache -//! is minimal but we may extend it in the future if needed. - -use crate::prelude::*; - -use alloc::rc::Rc; -use alloc::sync::Arc; -use bitcoin_internals::debug_from_display; -use crate::io; -use core::cmp::Ordering; -use core::convert::TryFrom; -use core::borrow::{Borrow, BorrowMut}; -use core::{fmt, default::Default}; -use core::ops::{Deref, DerefMut, Index, Range, RangeFull, RangeFrom, RangeTo, RangeInclusive, RangeToInclusive}; -#[cfg(rust_v_1_53)] -use core::ops::Bound; - -#[cfg(feature = "serde")] use serde; - -use crate::hash_types::{PubkeyHash, WPubkeyHash, ScriptHash, WScriptHash}; -use crate::blockdata::opcodes::{self, all::*}; -use crate::consensus::{encode, Decodable, Encodable}; -use crate::hashes::{Hash, hex}; -use crate::policy::DUST_RELAY_TX_FEE; -#[cfg(feature="bitcoinconsensus")] use bitcoinconsensus; -#[cfg(feature="bitcoinconsensus")] use core::convert::From; -use crate::OutPoint; - -use crate::key::PublicKey; -use crate::address::{WitnessVersion, WitnessProgram}; -use crate::taproot::{LeafVersion, TapNodeHash, TapLeafHash}; -use secp256k1::{Secp256k1, Verification, XOnlyPublicKey}; -use crate::schnorr::{TapTweak, TweakedPublicKey, UntweakedPublicKey}; - -/// Bitcoin script slice. -/// -/// *[See also the `bitcoin::blockdata::script` module](crate::blockdata::script).* -/// -/// `Script` is a script slice, the most primitive script type. It's usually seen in its borrowed -/// form `&Script`. It is always encoded as a series of bytes representing the opcodes and data -/// pushes. -/// -/// ## Validity -/// -/// `Script` does not have any validity invariants - it's essentially just a marked slice of -/// bytes. This is similar to [`Path`](std::path::Path) vs [`OsStr`](std::ffi::OsStr) where they -/// are trivially cast-able to each-other and `Path` doesn't guarantee being a usable FS path but -/// having a newtype still has value because of added methods, readability and basic type checking. -/// -/// Although at least data pushes could be checked not to overflow the script, bad scripts are -/// allowed to be in a transaction (outputs just become unspendable) and there even are such -/// transactions in the chain. Thus we must allow such scripts to be placed in the transaction. -/// -/// ## Slicing safety -/// -/// Slicing is similar to how `str` works: some ranges may be incorrect and indexing by -/// `usize` is not supported. However, as opposed to `std`, we have no way of checking -/// correctness without causing linear complexity so there are **no panics on invalid -/// ranges!** If you supply an invalid range, you'll get a garbled script. -/// -/// The range is considered valid if it's at a boundary of instruction. Care must be taken -/// especially with push operations because you could get a reference to arbitrary -/// attacker-supplied bytes that look like a valid script. -/// -/// It is recommended to use `.instructions()` method to get an iterator over script -/// instructions and work with that instead. -/// -/// ## Memory safety -/// -/// The type is `#[repr(transparent)]` for internal purposes only! -/// No consumer crate may rely on the represenation of the struct! -/// -/// ## References -/// -/// -/// ### Bitcoin Core References -/// -/// * [CScript definition](https://github.com/bitcoin/bitcoin/blob/d492dc1cdaabdc52b0766bf4cba4bd73178325d0/src/script/script.h#L410) -/// -#[derive(PartialOrd, Ord, PartialEq, Eq, Hash)] -#[repr(transparent)] -pub struct Script([u8]); - -/// An owned, growable script. -/// -/// `ScriptBuf` is the most common script type that has the ownership over the contents of the -/// script. It has a close relationship with its borrowed counterpart, [`Script`]. -/// -/// Just as other similar types, this implements [`Deref`], so [deref coercions] apply. Also note -/// that all the safety/validity restrictions that apply to [`Script`] apply to `ScriptBuf` as well. -/// -/// [deref coercions]: https://doc.rust-lang.org/std/ops/trait.Deref.html#more-on-deref-coercion -#[derive(Default, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)] -pub struct ScriptBuf(Vec); - -impl ToOwned for Script { - type Owned = ScriptBuf; - - fn to_owned(&self) -> Self::Owned { - ScriptBuf(self.0.to_owned()) - } -} - -impl Script { - /// Treat byte slice as `Script` - #[inline] - pub fn from_bytes(bytes: &[u8]) -> &Script { - // SAFETY: copied from `std` - // The pointer was just created from a reference which is still alive. - // Casting slice pointer to a transparent struct wrapping that slice is sound (same - // layout). - unsafe { - &*(bytes as *const [u8] as *const Script) - } - } - - /// Treat mutable byte slice as `Script` - #[inline] - pub fn from_bytes_mut(bytes: &mut [u8]) -> &mut Script { - // SAFETY: copied from `std` - // The pointer was just created from a reference which is still alive. - // Casting slice pointer to a transparent struct wrapping that slice is sound (same - // layout). - // Function signature prevents callers from accessing `bytes` while the returned reference - // is alive. - unsafe { - &mut *(bytes as *mut [u8] as *mut Script) - } - } - - /// Returns the script data as a byte slice. - #[inline] - pub fn as_bytes(&self) -> &[u8] { - &self.0 - } - - /// Returns the script data as a mutable byte slice. - #[inline] - pub fn as_mut_bytes(&mut self) -> &mut [u8] { - &mut self.0 - } - - /// Creates a new empty script. - #[inline] - pub fn empty() -> &'static Script { Script::from_bytes(&[]) } - - /// Creates a new script builder - pub fn builder() -> Builder { - Builder::new() - } - - /// Returns 160-bit hash of the script. - #[inline] - pub fn script_hash(&self) -> ScriptHash { - ScriptHash::hash(self.as_bytes()) - } - - /// Returns 256-bit hash of the script for P2WSH outputs. - #[inline] - pub fn wscript_hash(&self) -> WScriptHash { - WScriptHash::hash(self.as_bytes()) - } - - /// Computes leaf hash of tapscript. - #[inline] - pub fn tapscript_leaf_hash(&self) -> TapLeafHash { - TapLeafHash::from_script(self, LeafVersion::TapScript) - } - - /// Returns the length in bytes of the script. - #[inline] - pub fn len(&self) -> usize { self.0.len() } - - /// Returns whether the script is the empty script. - #[inline] - pub fn is_empty(&self) -> bool { self.0.is_empty() } - - /// Returns a copy of the script data. - #[inline] - pub fn to_bytes(&self) -> Vec { self.0.to_owned() } - - /// Returns an iterator over script bytes. - #[inline] - pub fn bytes(&self) -> Bytes<'_> { - Bytes(self.as_bytes().iter().copied()) - } - - /// Computes the P2WSH output corresponding to this witnessScript (aka the "witness redeem - /// script"). - #[inline] - pub fn to_v0_p2wsh(&self) -> ScriptBuf { - ScriptBuf::new_v0_p2wsh(&self.wscript_hash()) - } - - /// Computes P2TR output with a given internal key and a single script spending path equal to - /// the current script, assuming that the script is a Tapscript. - #[inline] - pub fn to_v1_p2tr(&self, secp: &Secp256k1, internal_key: UntweakedPublicKey) -> ScriptBuf { - let leaf_hash = self.tapscript_leaf_hash(); - let merkle_root = TapNodeHash::from(leaf_hash); - ScriptBuf::new_v1_p2tr(secp, internal_key, Some(merkle_root)) - } - - /// Returns witness version of the script, if any, assuming the script is a `scriptPubkey`. - #[inline] - pub fn witness_version(&self) -> Option { - self.0.first().and_then(|opcode| WitnessVersion::try_from(opcodes::All::from(*opcode)).ok()) - } - - /// Checks whether a script pubkey is a P2SH output. - #[inline] - pub fn is_p2sh(&self) -> bool { - self.0.len() == 23 - && self.0[0] == OP_HASH160.to_u8() - && self.0[1] == OP_PUSHBYTES_20.to_u8() - && self.0[22] == OP_EQUAL.to_u8() - } - - /// Checks whether a script pubkey is a P2PKH output. - #[inline] - pub fn is_p2pkh(&self) -> bool { - self.0.len() == 25 - && self.0[0] == OP_DUP.to_u8() - && self.0[1] == OP_HASH160.to_u8() - && self.0[2] == OP_PUSHBYTES_20.to_u8() - && self.0[23] == OP_EQUALVERIFY.to_u8() - && self.0[24] == OP_CHECKSIG.to_u8() - } - - /// Checks whether a script pubkey is a P2PK output. - /// - /// You can obtain the public key, if its valid, - /// by calling [`p2pk_public_key()`](Self::p2pk_public_key) - #[inline] - pub fn is_p2pk(&self) -> bool { - self.p2pk_pubkey_bytes().is_some() - } - - /// Returns the public key if this script is P2PK with a **valid** public key. - /// - /// This may return `None` even when [`is_p2pk()`](Self::is_p2pk) returns true. - /// This happens when the public key is invalid (e.g. the point not being on the curve). - /// It also implies the script is unspendable. - #[inline] - pub fn p2pk_public_key(&self) -> Option { - PublicKey::from_slice(self.p2pk_pubkey_bytes()?).ok() - } - - /// Returns the bytes of the (possibly invalid) public key if this script is P2PK. - #[inline] - fn p2pk_pubkey_bytes(&self) -> Option<&[u8]> { - match self.len() { - 67 if self.0[0] == OP_PUSHBYTES_65.to_u8() - && self.0[66] == OP_CHECKSIG.to_u8() => { - Some(&self.0[1..66]) - } - 35 if self.0[0] == OP_PUSHBYTES_33.to_u8() - && self.0[34] == OP_CHECKSIG.to_u8() => { - Some(&self.0[1..34]) - } - _ => None - } - } - - /// Checks whether a script pubkey is a Segregated Witness (segwit) program. - #[inline] - pub fn is_witness_program(&self) -> bool { - // A scriptPubKey (or redeemScript as defined in BIP16/P2SH) that consists of a 1-byte - // push opcode (for 0 to 16) followed by a data push between 2 and 40 bytes gets a new - // special meaning. The value of the first push is called the "version byte". The following - // byte vector pushed is called the "witness program". - let script_len = self.0.len(); - if !(4..=42).contains(&script_len) { - return false - } - let ver_opcode = opcodes::All::from(self.0[0]); // Version 0 or PUSHNUM_1-PUSHNUM_16 - let push_opbyte = self.0[1]; // Second byte push opcode 2-40 bytes - WitnessVersion::try_from(ver_opcode).is_ok() - && push_opbyte >= OP_PUSHBYTES_2.to_u8() - && push_opbyte <= OP_PUSHBYTES_40.to_u8() - // Check that the rest of the script has the correct size - && script_len - 2 == push_opbyte as usize - } - - /// Checks whether a script pubkey is a P2WSH output. - #[inline] - pub fn is_v0_p2wsh(&self) -> bool { - self.0.len() == 34 - && self.witness_version() == Some(WitnessVersion::V0) - && self.0[1] == OP_PUSHBYTES_32.to_u8() - } - - /// Checks whether a script pubkey is a P2WPKH output. - #[inline] - pub fn is_v0_p2wpkh(&self) -> bool { - self.0.len() == 22 - && self.witness_version() == Some(WitnessVersion::V0) - && self.0[1] == OP_PUSHBYTES_20.to_u8() - } - - /// Checks whether a script pubkey is a P2TR output. - #[inline] - pub fn is_v1_p2tr(&self) -> bool { - self.0.len() == 34 - && self.witness_version() == Some(WitnessVersion::V1) - && self.0[1] == OP_PUSHBYTES_32.to_u8() - } - - /// Check if this is an OP_RETURN output. - #[inline] - pub fn is_op_return (&self) -> bool { - match self.0.first() { - Some(b) => *b == OP_RETURN.to_u8(), - None => false - } - } - - /// Checks whether a script can be proven to have no satisfying input. - #[inline] - pub fn is_provably_unspendable(&self) -> bool { - use crate::blockdata::opcodes::Class::{ReturnOp, IllegalOp}; - - match self.0.first() { - Some(b) => { - let first = opcodes::All::from(*b); - let class = first.classify(opcodes::ClassifyContext::Legacy); - - class == ReturnOp || class == IllegalOp - }, - None => false, - } - } - - /// Returns the minimum value an output with this script should have in order to be - /// broadcastable on today's Bitcoin network. - pub fn dust_value(&self) -> crate::Amount { - // This must never be lower than Bitcoin Core's GetDustThreshold() (as of v0.21) as it may - // otherwise allow users to create transactions which likely can never be broadcast/confirmed. - let sats = DUST_RELAY_TX_FEE as u64 / 1000 * // The default dust relay fee is 3000 satoshi/kB (i.e. 3 sat/vByte) - if self.is_op_return() { - 0 - } else if self.is_witness_program() { - 32 + 4 + 1 + (107 / 4) + 4 + // The spend cost copied from Core - 8 + // The serialized size of the TxOut's amount field - self.consensus_encode(&mut sink()).expect("sinks don't error") as u64 // The serialized size of this script_pubkey - } else { - 32 + 4 + 1 + 107 + 4 + // The spend cost copied from Core - 8 + // The serialized size of the TxOut's amount field - self.consensus_encode(&mut sink()).expect("sinks don't error") as u64 // The serialized size of this script_pubkey - }; - - crate::Amount::from_sat(sats) - } - - /// Iterates over the script instructions. - /// - /// Each returned item is a nested enum covering opcodes, datapushes and errors. - /// At most one error will be returned and then the iterator will end. To instead iterate over - /// the script as sequence of bytes call the [`bytes`](Self::bytes) method. - /// - /// To force minimal pushes, use [`instructions_minimal`](Self::instructions_minimal). - #[inline] - pub fn instructions(&self) -> Instructions { - Instructions { - data: self.0.iter(), - enforce_minimal: false, - } - } - - /// Iterates over the script instructions while enforcing minimal pushes. - /// - /// This is similar to [`instructions`](Self::instructions) but an error is returned if a push - /// is not minimal. - #[inline] - pub fn instructions_minimal(&self) -> Instructions { - Instructions { - data: self.0.iter(), - enforce_minimal: true, - } - } - - /// Iterates over the script instructions and their indices. - /// - /// Unless the script contains an error, the returned item consists of an index pointing to the - /// position in the script where the instruction begins and the decoded instruction - either an - /// opcode or data push. - /// - /// To force minimal pushes, use [`Self::instruction_indices_minimal`]. - #[inline] - pub fn instruction_indices(&self) -> InstructionIndices { - InstructionIndices::from_instructions(self.instructions()) - } - - /// Iterates over the script instructions and their indices while enforcing minimal pushes. - /// - /// This is similar to [`instruction_indices`](Self::instruction_indices) but an error is - /// returned if a push is not minimal. - #[inline] - pub fn instruction_indices_minimal(&self) -> InstructionIndices { - InstructionIndices::from_instructions(self.instructions_minimal()) - } - - /// Shorthand for [`Self::verify_with_flags`] with flag [bitcoinconsensus::VERIFY_ALL]. - /// - /// # Parameters - /// * `index` - The input index in spending which is spending this transaction. - /// * `amount` - The amount this script guards. - /// * `spending_tx` - The transaction that attempts to spend the output holding this script. - #[cfg(feature="bitcoinconsensus")] - #[cfg_attr(docsrs, doc(cfg(feature = "bitcoinconsensus")))] - pub fn verify (&self, index: usize, amount: crate::Amount, spending_tx: &[u8]) -> Result<(), Error> { - self.verify_with_flags(index, amount, spending_tx, bitcoinconsensus::VERIFY_ALL) - } - - /// Verifies spend of an input script. - /// - /// # Parameters - /// * `index` - The input index in spending which is spending this transaction. - /// * `amount` - The amount this script guards. - /// * `spending_tx` - The transaction that attempts to spend the output holding this script. - /// * `flags` - Verification flags, see [`bitcoinconsensus::VERIFY_ALL`] and similar. - #[cfg(feature="bitcoinconsensus")] - #[cfg_attr(docsrs, doc(cfg(feature = "bitcoinconsensus")))] - pub fn verify_with_flags>(&self, index: usize, amount: crate::Amount, spending_tx: &[u8], flags: F) -> Result<(), Error> { - Ok(bitcoinconsensus::verify_with_flags (&self.0[..], amount.to_sat(), spending_tx, index, flags.into())?) - } - - /// Writes the assembly decoding of the script to the formatter. - pub fn fmt_asm(&self, f: &mut dyn fmt::Write) -> fmt::Result { - bytes_to_asm_fmt(self.as_ref(), f) - } - - /// Returns the assembly decoding of the script. - pub fn to_asm_string(&self) -> String { - let mut buf = String::new(); - self.fmt_asm(&mut buf).unwrap(); - buf - } - - /// Formats the script as lower-case hex. - /// - /// This is a more convenient and performant way to write `format!("{:x}", script)`. - /// For better performance you should generally prefer displaying the script but if `String` is - /// required (this is common in tests) this method is can be used. - pub fn to_hex_string(&self) -> String { - self.as_bytes().to_lower_hex_string() - } - - /// Returns the first opcode of the script (if there is any). - pub fn first_opcode(&self) -> Option { - self.as_bytes().first().copied().map(From::from) - } - - /// Iterates the script to find the last opcode. - /// - /// Returns `None` is the instruction is data push or if the script is empty. - fn last_opcode(&self) -> Option { - match self.instructions().last() { - Some(Ok(Instruction::Op(op))) => Some(op), - _ => None, - } - } - - /// Converts a [`Box