Merge rust-bitcoin/rust-bitcoin#3293: priority: Re-write (and re-name) `read_uint_iter`

0f897f80a5 Re-write (and re-name) read_uint_iter (Tobin C. Harding) Pull request description: The `UintError` type (returned by `read_uint_iter`) is not that useful because one variant is unreachable. Re-write the function by doing:n - Re-write the function to reduce the error cases returned. - Re-name it to `read_push_data_len` - Move it to `internals` - Use `PushDataLenLen` enum instead of an int parameter ACKs for top commit: apoelstra: ACK 0f897f80a5 successfully ran local tests; lol so much better than the old code Kixunil: ACK 0f897f80a5 Tree-SHA512: 095017a32c2d5bb2268cb1a059d0022e122faf8b41295f14970e7968374dd1c35c3b95357aba5aabaa17843439aebc237000009015ea9b8bc58ab1b337e8e1bc
2024-09-09 16:37:30 +00:00 · 2024-09-09 16:37:30 +00:00 · cfe6c0a999
parent 929eaf23d4 0f897f80a5
commit cfe6c0a999
4 changed files with 87 additions and 71 deletions
--- a/bitcoin/src/blockdata/script/instruction.rs
+++ b/bitcoin/src/blockdata/script/instruction.rs
@ -1,7 +1,9 @@
 // SPDX-License-Identifier: CC0-1.0

+use internals::script::{self, PushDataLenLen};
+
 use super::{
-    read_uint_iter, Error, PushBytes, Script, ScriptBuf, ScriptBufExtPriv as _, UintError,
+    Error, PushBytes, Script, ScriptBuf, ScriptBufExtPriv as _,
 };
 use crate::opcodes::{self, Opcode};

@ -129,13 +131,9 @@ impl<'a> Instructions<'a> {
        len: PushDataLenLen,
        min_push_len: usize,
    ) -> Option<Result<Instruction<'a>, Error>> {
-        let n = match read_uint_iter(&mut self.data, len as usize) {
+        let n = match script::read_push_data_len(&mut self.data, len) {
            Ok(n) => n,
-            // We do exhaustive matching to not forget to handle new variants if we extend
-            // `UintError` type.
-            // Overflow actually means early end of script (script is definitely shorter
-            // than `usize::MAX`)
-            Err(UintError::EarlyEndOfScript) | Err(UintError::NumericOverflow) => {
+            Err(_) => {
                self.kill();
                return Some(Err(Error::EarlyEndOfScript));
            }
@ -153,15 +151,6 @@ impl<'a> Instructions<'a> {
    }
 }

-/// Allowed length of push data length.
-///
-/// This makes it easier to prove correctness of `next_push_data_len`.
-pub(super) enum PushDataLenLen {
-    One = 1,
-    Two = 2,
-    Four = 4,
-}
-
 impl<'a> Iterator for Instructions<'a> {
    type Item = Result<Instruction<'a>, Error>;

--- a/bitcoin/src/blockdata/script/mod.rs
+++ b/bitcoin/src/blockdata/script/mod.rs
@ -65,7 +65,8 @@ use core::fmt;
 use core::ops::{Deref, DerefMut};

 use hashes::{hash160, sha256};
-use internals::impl_to_hex_from_lower_hex;
+use internals::{impl_to_hex_from_lower_hex};
+use internals::script::{self, PushDataLenLen};
 use io::{BufRead, Write};

 use crate::consensus::{encode, Decodable, Encodable};
@ -294,28 +295,6 @@ pub fn read_scriptbool(v: &[u8]) -> bool {
    }
 }

-// We internally use implementation based on iterator so that it automatically advances as needed
-// Errors are same as above, just different type.
-fn read_uint_iter(data: &mut core::slice::Iter<'_, u8>, size: usize) -> Result<usize, UintError> {
-    if data.len() < size {
-        Err(UintError::EarlyEndOfScript)
-    } else if size > usize::from(u16::MAX / 8) {
-        // Casting to u32 would overflow
-        Err(UintError::NumericOverflow)
-    } else {
-        let mut ret = 0;
-        for (i, item) in data.take(size).enumerate() {
-            ret = usize::from(*item)
-                // Casting is safe because we checked above to not repeat the same check in a loop
-                .checked_shl((i * 8) as u32)
-                .ok_or(UintError::NumericOverflow)?
-                .checked_add(ret)
-                .ok_or(UintError::NumericOverflow)?;
-        }
-        Ok(ret)
-    }
-}
-
 fn opcode_to_verify(opcode: Option<Opcode>) -> Option<Opcode> {
    opcode.and_then(|opcode| match opcode {
        OP_EQUAL => Some(OP_EQUALVERIFY),
@ -661,23 +640,15 @@ impl Decodable for ScriptBuf {
 pub(super) fn bytes_to_asm_fmt(script: &[u8], f: &mut dyn fmt::Write) -> fmt::Result {
    // This has to be a macro because it needs to break the loop
    macro_rules! read_push_data_len {
-        ($iter:expr, $len:literal, $formatter:expr) => {
-            match read_uint_iter($iter, $len) {
+        ($iter:expr, $size:path, $formatter:expr) => {
+            match script::read_push_data_len($iter, $size) {
                Ok(n) => {
                    n
                },
-                Err(UintError::EarlyEndOfScript) => {
+                Err(_) => {
                    $formatter.write_str("<unexpected end>")?;
                    break;
                }
-                // We got the data in a slice which implies it being shorter than `usize::MAX`
-                // So if we got overflow, we can confidently say the number is higher than length of
-                // the slice even though we don't know the exact number. This implies attempt to push
-                // past end.
-                Err(UintError::NumericOverflow) => {
-                    $formatter.write_str("<push past end>")?;
-                    break;
-                }
            }
        }
    }
@ -698,15 +669,15 @@ pub(super) fn bytes_to_asm_fmt(script: &[u8], f: &mut dyn fmt::Write) -> fmt::Re
            match opcode {
                OP_PUSHDATA1 => {
                    // side effects: may write and break from the loop
-                    read_push_data_len!(&mut iter, 1, f)
+                    read_push_data_len!(&mut iter, PushDataLenLen::One, f)
                }
                OP_PUSHDATA2 => {
                    // side effects: may write and break from the loop
-                    read_push_data_len!(&mut iter, 2, f)
+                    read_push_data_len!(&mut iter, PushDataLenLen::Two, f)
                }
                OP_PUSHDATA4 => {
                    // side effects: may write and break from the loop
-                    read_push_data_len!(&mut iter, 4, f)
+                    read_push_data_len!(&mut iter, PushDataLenLen::Four, f)
                }
                _ => 0,
            }
@ -791,24 +762,6 @@ impl std::error::Error for Error {
    }
 }

-// Our internal error proves that we only return these two cases from `read_uint_iter`.
-// Since it's private we don't bother with trait impls besides From.
-enum UintError {
-    EarlyEndOfScript,
-    NumericOverflow,
-}
-
-internals::impl_from_infallible!(UintError);
-
-impl From<UintError> for Error {
-    fn from(error: UintError) -> Self {
-        match error {
-            UintError::EarlyEndOfScript => Error::EarlyEndOfScript,
-            UintError::NumericOverflow => Error::NumericOverflow,
-        }
-    }
-}
-
 /// Error while hashing a redeem script.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct RedeemScriptSizeError {
--- a/internals/src/lib.rs
+++ b/internals/src/lib.rs
@ -39,6 +39,7 @@ pub mod const_tools;
 pub mod error;
 pub mod macros;
 mod parse;
+pub mod script;
 #[cfg(feature = "serde")]
 #[macro_use]
 pub mod serde;
--- a/internals/src/script.rs
+++ b/internals/src/script.rs
@ -0,0 +1,73 @@
+// SPDX-License-Identifier: CC0-1.0
+
+//! Internal script related helper functions and types.
+
+/// Reads a `usize` from an iterator.
+///
+/// A script push data instruction includes the length of the data being pushed, this function reads
+/// that length from an iterator (encoded in either 1, 2, or 4 bytes).
+// We internally use implementation based on iterator so that it automatically advances as needed.
+pub fn read_push_data_len(
+    data: &mut core::slice::Iter<'_, u8>,
+    size: PushDataLenLen,
+) -> Result<usize, EarlyEndOfScriptError> {
+    // The `size` enum enforces that the maximum shift will be 32 and
+    // that we can only ever read up to 4 bytes.
+    let size = size as usize;
+
+    if data.len() < size {
+        return Err(EarlyEndOfScriptError);
+    };
+
+    let mut ret = 0;
+    for (i, item) in data.take(size).enumerate() {
+        ret |= usize::from(*item) << (i * 8);
+    }
+    Ok(ret)
+}
+
+/// The number of bytes used to encode an unsigned integer as the length of a push data instruction.
+///
+/// This makes it easier to prove correctness of `next_push_data_len` and `read_push_data_len`.
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
+pub enum PushDataLenLen {
+    /// Unsigned integer comprising of a single byte.
+    One = 1,
+    /// Unsigned integer comprising of two bytes.
+    Two = 2,
+    /// Unsigned integer comprising of four bytes.
+    Four = 4,
+}
+
+/// Indicates that we tried to read more bytes from the script than available.
+#[derive(Debug)]
+pub struct EarlyEndOfScriptError;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn reads_4_bytes() {
+        let bytes = [0x01, 0x23, 0x45, 0x67];
+        let want = u32::from_le_bytes([0x01, 0x23, 0x45, 0x67]);
+        let got = read_push_data_len(&mut bytes.iter(), PushDataLenLen::Four).unwrap();
+        assert_eq!(got, want as usize)
+    }
+
+    #[test]
+    fn reads_2_bytes() {
+        let bytes = [0x01, 0x23];
+        let want = u16::from_le_bytes([0x01, 0x23]);
+        let got = read_push_data_len(&mut bytes.iter(), PushDataLenLen::Two).unwrap();
+        assert_eq!(got, want as usize)
+    }
+
+    #[test]
+    fn reads_1_byte() {
+        let bytes = [0x01];
+        let want = 0x01;
+        let got = read_push_data_len(&mut bytes.iter(), PushDataLenLen::One).unwrap();
+        assert_eq!(got, want as usize)
+    }
+}