From 0f897f80a5e2660f6e32e906850fcdd0502ab2a4 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Wed, 4 Sep 2024 09:16:59 +1000 Subject: [PATCH] Re-write (and re-name) read_uint_iter The `UintError` type (returned by `read_uint_iter`) is not that useful because one variant is unreachable. Re-write the function by doing:n - Re-write the function to reduce the error cases returned. - Re-name it to `read_push_data_len` - Move it to `internals` - Use `PushDataLenLen` enum instead of an int parameter --- bitcoin/src/blockdata/script/instruction.rs | 21 ++---- bitcoin/src/blockdata/script/mod.rs | 63 +++--------------- internals/src/lib.rs | 1 + internals/src/script.rs | 73 +++++++++++++++++++++ 4 files changed, 87 insertions(+), 71 deletions(-) create mode 100644 internals/src/script.rs diff --git a/bitcoin/src/blockdata/script/instruction.rs b/bitcoin/src/blockdata/script/instruction.rs index 8a1865a2b..23c0083d4 100644 --- a/bitcoin/src/blockdata/script/instruction.rs +++ b/bitcoin/src/blockdata/script/instruction.rs @@ -1,7 +1,9 @@ // SPDX-License-Identifier: CC0-1.0 +use internals::script::{self, PushDataLenLen}; + use super::{ - read_uint_iter, Error, PushBytes, Script, ScriptBuf, ScriptBufExtPriv as _, UintError, + Error, PushBytes, Script, ScriptBuf, ScriptBufExtPriv as _, }; use crate::opcodes::{self, Opcode}; @@ -129,13 +131,9 @@ impl<'a> Instructions<'a> { len: PushDataLenLen, min_push_len: usize, ) -> Option, Error>> { - let n = match read_uint_iter(&mut self.data, len as usize) { + let n = match script::read_push_data_len(&mut self.data, len) { Ok(n) => n, - // We do exhaustive matching to not forget to handle new variants if we extend - // `UintError` type. - // Overflow actually means early end of script (script is definitely shorter - // than `usize::MAX`) - Err(UintError::EarlyEndOfScript) | Err(UintError::NumericOverflow) => { + Err(_) => { self.kill(); return Some(Err(Error::EarlyEndOfScript)); } @@ -153,15 +151,6 @@ impl<'a> Instructions<'a> { } } -/// Allowed length of push data length. -/// -/// This makes it easier to prove correctness of `next_push_data_len`. -pub(super) enum PushDataLenLen { - One = 1, - Two = 2, - Four = 4, -} - impl<'a> Iterator for Instructions<'a> { type Item = Result, Error>; diff --git a/bitcoin/src/blockdata/script/mod.rs b/bitcoin/src/blockdata/script/mod.rs index 57a4bae9f..aee687b28 100644 --- a/bitcoin/src/blockdata/script/mod.rs +++ b/bitcoin/src/blockdata/script/mod.rs @@ -65,7 +65,8 @@ use core::fmt; use core::ops::{Deref, DerefMut}; use hashes::{hash160, sha256}; -use internals::impl_to_hex_from_lower_hex; +use internals::{impl_to_hex_from_lower_hex}; +use internals::script::{self, PushDataLenLen}; use io::{BufRead, Write}; use crate::consensus::{encode, Decodable, Encodable}; @@ -294,28 +295,6 @@ pub fn read_scriptbool(v: &[u8]) -> bool { } } -// We internally use implementation based on iterator so that it automatically advances as needed -// Errors are same as above, just different type. -fn read_uint_iter(data: &mut core::slice::Iter<'_, u8>, size: usize) -> Result { - if data.len() < size { - Err(UintError::EarlyEndOfScript) - } else if size > usize::from(u16::MAX / 8) { - // Casting to u32 would overflow - Err(UintError::NumericOverflow) - } else { - let mut ret = 0; - for (i, item) in data.take(size).enumerate() { - ret = usize::from(*item) - // Casting is safe because we checked above to not repeat the same check in a loop - .checked_shl((i * 8) as u32) - .ok_or(UintError::NumericOverflow)? - .checked_add(ret) - .ok_or(UintError::NumericOverflow)?; - } - Ok(ret) - } -} - fn opcode_to_verify(opcode: Option) -> Option { opcode.and_then(|opcode| match opcode { OP_EQUAL => Some(OP_EQUALVERIFY), @@ -661,23 +640,15 @@ impl Decodable for ScriptBuf { pub(super) fn bytes_to_asm_fmt(script: &[u8], f: &mut dyn fmt::Write) -> fmt::Result { // This has to be a macro because it needs to break the loop macro_rules! read_push_data_len { - ($iter:expr, $len:literal, $formatter:expr) => { - match read_uint_iter($iter, $len) { + ($iter:expr, $size:path, $formatter:expr) => { + match script::read_push_data_len($iter, $size) { Ok(n) => { n }, - Err(UintError::EarlyEndOfScript) => { + Err(_) => { $formatter.write_str("")?; break; } - // We got the data in a slice which implies it being shorter than `usize::MAX` - // So if we got overflow, we can confidently say the number is higher than length of - // the slice even though we don't know the exact number. This implies attempt to push - // past end. - Err(UintError::NumericOverflow) => { - $formatter.write_str("")?; - break; - } } } } @@ -698,15 +669,15 @@ pub(super) fn bytes_to_asm_fmt(script: &[u8], f: &mut dyn fmt::Write) -> fmt::Re match opcode { OP_PUSHDATA1 => { // side effects: may write and break from the loop - read_push_data_len!(&mut iter, 1, f) + read_push_data_len!(&mut iter, PushDataLenLen::One, f) } OP_PUSHDATA2 => { // side effects: may write and break from the loop - read_push_data_len!(&mut iter, 2, f) + read_push_data_len!(&mut iter, PushDataLenLen::Two, f) } OP_PUSHDATA4 => { // side effects: may write and break from the loop - read_push_data_len!(&mut iter, 4, f) + read_push_data_len!(&mut iter, PushDataLenLen::Four, f) } _ => 0, } @@ -791,24 +762,6 @@ impl std::error::Error for Error { } } -// Our internal error proves that we only return these two cases from `read_uint_iter`. -// Since it's private we don't bother with trait impls besides From. -enum UintError { - EarlyEndOfScript, - NumericOverflow, -} - -internals::impl_from_infallible!(UintError); - -impl From for Error { - fn from(error: UintError) -> Self { - match error { - UintError::EarlyEndOfScript => Error::EarlyEndOfScript, - UintError::NumericOverflow => Error::NumericOverflow, - } - } -} - /// Error while hashing a redeem script. #[derive(Debug, Clone, PartialEq, Eq)] pub struct RedeemScriptSizeError { diff --git a/internals/src/lib.rs b/internals/src/lib.rs index 4085072c8..36240abd4 100644 --- a/internals/src/lib.rs +++ b/internals/src/lib.rs @@ -39,6 +39,7 @@ pub mod const_tools; pub mod error; pub mod macros; mod parse; +pub mod script; #[cfg(feature = "serde")] #[macro_use] pub mod serde; diff --git a/internals/src/script.rs b/internals/src/script.rs new file mode 100644 index 000000000..89e2a326e --- /dev/null +++ b/internals/src/script.rs @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: CC0-1.0 + +//! Internal script related helper functions and types. + +/// Reads a `usize` from an iterator. +/// +/// A script push data instruction includes the length of the data being pushed, this function reads +/// that length from an iterator (encoded in either 1, 2, or 4 bytes). +// We internally use implementation based on iterator so that it automatically advances as needed. +pub fn read_push_data_len( + data: &mut core::slice::Iter<'_, u8>, + size: PushDataLenLen, +) -> Result { + // The `size` enum enforces that the maximum shift will be 32 and + // that we can only ever read up to 4 bytes. + let size = size as usize; + + if data.len() < size { + return Err(EarlyEndOfScriptError); + }; + + let mut ret = 0; + for (i, item) in data.take(size).enumerate() { + ret |= usize::from(*item) << (i * 8); + } + Ok(ret) +} + +/// The number of bytes used to encode an unsigned integer as the length of a push data instruction. +/// +/// This makes it easier to prove correctness of `next_push_data_len` and `read_push_data_len`. +#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum PushDataLenLen { + /// Unsigned integer comprising of a single byte. + One = 1, + /// Unsigned integer comprising of two bytes. + Two = 2, + /// Unsigned integer comprising of four bytes. + Four = 4, +} + +/// Indicates that we tried to read more bytes from the script than available. +#[derive(Debug)] +pub struct EarlyEndOfScriptError; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn reads_4_bytes() { + let bytes = [0x01, 0x23, 0x45, 0x67]; + let want = u32::from_le_bytes([0x01, 0x23, 0x45, 0x67]); + let got = read_push_data_len(&mut bytes.iter(), PushDataLenLen::Four).unwrap(); + assert_eq!(got, want as usize) + } + + #[test] + fn reads_2_bytes() { + let bytes = [0x01, 0x23]; + let want = u16::from_le_bytes([0x01, 0x23]); + let got = read_push_data_len(&mut bytes.iter(), PushDataLenLen::Two).unwrap(); + assert_eq!(got, want as usize) + } + + #[test] + fn reads_1_byte() { + let bytes = [0x01]; + let want = 0x01; + let got = read_push_data_len(&mut bytes.iter(), PushDataLenLen::One).unwrap(); + assert_eq!(got, want as usize) + } +}