From 1f55edf718fb0fed099f2f22a6735276229408f5 Mon Sep 17 00:00:00 2001 From: Martin Habovstiak Date: Sat, 2 Oct 2021 20:27:39 +0200 Subject: [PATCH 1/5] Use iterator in `blockdata::script::Instructions` This refactors `blockdata::script::Instructions` to use `::core::slice::Iter<'a, u8>` instead of `&'a [u8]` to better express the intention and to avoid some slicing mistakes. Similarly to a previous change this uses a macro to deduplicate the common logic and the new `read_uint_iter` internal function to automatically advance the iterator. Addresses: https://github.com/rust-bitcoin/rust-bitcoin/pull/662#pullrequestreview-768320603 --- src/blockdata/script.rs | 128 +++++++++++++++------------------------- 1 file changed, 47 insertions(+), 81 deletions(-) diff --git a/src/blockdata/script.rs b/src/blockdata/script.rs index dd448fd8..57431150 100644 --- a/src/blockdata/script.rs +++ b/src/blockdata/script.rs @@ -582,7 +582,7 @@ impl Script { /// To force minimal pushes, use [`Self::instructions_minimal`]. pub fn instructions(&self) -> Instructions { Instructions { - data: &self.0[..], + data: self.0.iter(), enforce_minimal: false, } } @@ -590,7 +590,7 @@ impl Script { /// Iterates over the script in the form of `Instruction`s while enforcing minimal pushes. pub fn instructions_minimal(&self) -> Instructions { Instructions { - data: &self.0[..], + data: self.0.iter(), enforce_minimal: true, } } @@ -729,113 +729,79 @@ pub enum Instruction<'a> { /// Iterator over a script returning parsed opcodes. pub struct Instructions<'a> { - data: &'a [u8], + data: ::core::slice::Iter<'a, u8>, enforce_minimal: bool, } +impl<'a> Instructions<'a> { + fn next_push_data_len(&mut self, len: usize, max: usize) -> Option, Error>> { + let n = match read_uint_iter(&mut self.data, len) { + Ok(n) => n, + // We do exhaustive matching to not forget to handle new variants if we extend + // `UintError` type. + // Overflow actually means early end of script (script is definitely shorter + // than `usize::max_value()`) + Err(UintError::EarlyEndOfScript) | Err(UintError::NumericOverflow) => { + let data_len = self.data.len(); + self.data.nth(data_len); // Kill iterator so that it does not return an infinite stream of errors + return Some(Err(Error::EarlyEndOfScript)); + }, + }; + if self.enforce_minimal && n < max { + let data_len = self.data.len(); + self.data.nth(data_len); // Kill iterator so that it does not return an infinite stream of errors + return Some(Err(Error::NonMinimalPush)); + } + let ret = Some(Ok(Instruction::PushBytes(&self.data.as_slice()[..n]))); + self.data.nth(n.max(1) - 1); + ret + } +} + impl<'a> Iterator for Instructions<'a> { type Item = Result, Error>; fn next(&mut self) -> Option, Error>> { - if self.data.is_empty() { - return None; - } + let &byte = self.data.next()?; // classify parameter does not really matter here since we are only using // it for pushes and nums - match opcodes::All::from(self.data[0]).classify(opcodes::ClassifyContext::Legacy) { + match opcodes::All::from(byte).classify(opcodes::ClassifyContext::Legacy) { opcodes::Class::PushBytes(n) => { + // make sure safety argument holds across refactorings + let n: u32 = n; + // casting is safe because we don't support 16-bit architectures let n = n as usize; - if self.data.len() < n + 1 { - self.data = &[]; // Kill iterator so that it does not return an infinite stream of errors + + if self.data.len() < n { + let data_len = self.data.len(); + self.data.nth(data_len); // Kill iterator so that it does not return an infinite stream of errors return Some(Err(Error::EarlyEndOfScript)); } if self.enforce_minimal { - if n == 1 && (self.data[1] == 0x81 || (self.data[1] > 0 && self.data[1] <= 16)) { - self.data = &[]; + // index acceess is safe because we checked the lenght above + if n == 1 && (self.data.as_slice()[0] == 0x81 || (self.data.as_slice()[0] > 0 && self.data.as_slice()[0] <= 16)) { + let data_len = self.data.len(); + self.data.nth(data_len); // Kill iterator so that it does not return an infinite stream of errors return Some(Err(Error::NonMinimalPush)); } } - let ret = Some(Ok(Instruction::PushBytes(&self.data[1..n+1]))); - self.data = &self.data[n + 1..]; + let ret = Some(Ok(Instruction::PushBytes(&self.data.as_slice()[..n]))); + self.data.nth(n.max(1) - 1); ret } opcodes::Class::Ordinary(opcodes::Ordinary::OP_PUSHDATA1) => { - if self.data.len() < 2 { - self.data = &[]; - return Some(Err(Error::EarlyEndOfScript)); - } - let n = match read_uint(&self.data[1..], 1) { - Ok(n) => n, - Err(e) => { - self.data = &[]; - return Some(Err(e)); - } - }; - if self.data.len() < n + 2 { - self.data = &[]; - return Some(Err(Error::EarlyEndOfScript)); - } - if self.enforce_minimal && n < 76 { - self.data = &[]; - return Some(Err(Error::NonMinimalPush)); - } - let ret = Some(Ok(Instruction::PushBytes(&self.data[2..n+2]))); - self.data = &self.data[n + 2..]; - ret + self.next_push_data_len(1, 76) } opcodes::Class::Ordinary(opcodes::Ordinary::OP_PUSHDATA2) => { - if self.data.len() < 3 { - self.data = &[]; - return Some(Err(Error::EarlyEndOfScript)); - } - let n = match read_uint(&self.data[1..], 2) { - Ok(n) => n, - Err(e) => { - self.data = &[]; - return Some(Err(e)); - } - }; - if self.enforce_minimal && n < 0x100 { - self.data = &[]; - return Some(Err(Error::NonMinimalPush)); - } - if self.data.len() < n + 3 { - self.data = &[]; - return Some(Err(Error::EarlyEndOfScript)); - } - let ret = Some(Ok(Instruction::PushBytes(&self.data[3..n + 3]))); - self.data = &self.data[n + 3..]; - ret + self.next_push_data_len(2, 0x100) } opcodes::Class::Ordinary(opcodes::Ordinary::OP_PUSHDATA4) => { - if self.data.len() < 5 { - self.data = &[]; - return Some(Err(Error::EarlyEndOfScript)); - } - let n = match read_uint(&self.data[1..], 4) { - Ok(n) => n, - Err(e) => { - self.data = &[]; - return Some(Err(e)); - } - }; - if self.enforce_minimal && n < 0x10000 { - self.data = &[]; - return Some(Err(Error::NonMinimalPush)); - } - if self.data.len() < n + 5 { - self.data = &[]; - return Some(Err(Error::EarlyEndOfScript)); - } - let ret = Some(Ok(Instruction::PushBytes(&self.data[5..n + 5]))); - self.data = &self.data[n + 5..]; - ret + self.next_push_data_len(4, 0x10000) } // Everything else we can push right through _ => { - let ret = Some(Ok(Instruction::Op(opcodes::All::from(self.data[0])))); - self.data = &self.data[1..]; + let ret = Some(Ok(Instruction::Op(opcodes::All::from(byte)))); ret } } From bc763259fe7b79ed50af53d04af8198d350555aa Mon Sep 17 00:00:00 2001 From: Martin Habovstiak Date: Thu, 7 Oct 2021 17:00:14 +0200 Subject: [PATCH 2/5] Move repeated code to functions in script This creates a few primitive functions for handling iterators and uses them to avoid repeated code. As a result not only is the code simpler but also fixes a forgotten bound check. Thanks to a helper function which always does bounds check correctly this can no longer be forgotten. --- src/blockdata/script.rs | 57 ++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/src/blockdata/script.rs b/src/blockdata/script.rs index 57431150..d45faa1e 100644 --- a/src/blockdata/script.rs +++ b/src/blockdata/script.rs @@ -734,6 +734,26 @@ pub struct Instructions<'a> { } impl<'a> Instructions<'a> { + /// Set the iterator to end so that it won't iterate any longer + fn kill(&mut self) { + let len = self.data.len(); + self.data.nth(len.max(1) - 1); + } + + /// takes `len` bytes long slice from iterator and returns it advancing iterator + /// if the iterator is not long enough `None` is returned and the iterator is killed + /// to avoid returning an infinite stream of errors. + fn take_slice_or_kill(&mut self, len: usize) -> Option<&'a [u8]> { + if self.data.len() >= len { + let slice = &self.data.as_slice()[..len]; + self.data.nth(len.max(1) - 1); + Some(slice) + } else { + self.kill(); + None + } + } + fn next_push_data_len(&mut self, len: usize, max: usize) -> Option, Error>> { let n = match read_uint_iter(&mut self.data, len) { Ok(n) => n, @@ -742,19 +762,15 @@ impl<'a> Instructions<'a> { // Overflow actually means early end of script (script is definitely shorter // than `usize::max_value()`) Err(UintError::EarlyEndOfScript) | Err(UintError::NumericOverflow) => { - let data_len = self.data.len(); - self.data.nth(data_len); // Kill iterator so that it does not return an infinite stream of errors + self.kill(); return Some(Err(Error::EarlyEndOfScript)); }, }; if self.enforce_minimal && n < max { - let data_len = self.data.len(); - self.data.nth(data_len); // Kill iterator so that it does not return an infinite stream of errors + self.kill(); return Some(Err(Error::NonMinimalPush)); } - let ret = Some(Ok(Instruction::PushBytes(&self.data.as_slice()[..n]))); - self.data.nth(n.max(1) - 1); - ret + Some(self.take_slice_or_kill(n).map(Instruction::PushBytes).ok_or(Error::EarlyEndOfScript)) } } @@ -773,22 +789,21 @@ impl<'a> Iterator for Instructions<'a> { // casting is safe because we don't support 16-bit architectures let n = n as usize; - if self.data.len() < n { - let data_len = self.data.len(); - self.data.nth(data_len); // Kill iterator so that it does not return an infinite stream of errors - return Some(Err(Error::EarlyEndOfScript)); - } - if self.enforce_minimal { - // index acceess is safe because we checked the lenght above - if n == 1 && (self.data.as_slice()[0] == 0x81 || (self.data.as_slice()[0] > 0 && self.data.as_slice()[0] <= 16)) { - let data_len = self.data.len(); - self.data.nth(data_len); // Kill iterator so that it does not return an infinite stream of errors - return Some(Err(Error::NonMinimalPush)); + let op_byte = self.data.as_slice().first(); + match (self.enforce_minimal, op_byte, n) { + (true, Some(&op_byte), 1) if op_byte == 0x81 || (op_byte > 0 && op_byte <= 16) => { + self.kill(); + Some(Err(Error::NonMinimalPush)) + }, + (_, None, 0) => { + // the iterator is already empty, may as well use this information to avoid + // whole take_slice_or_kill function + Some(Ok(Instruction::PushBytes(&[]))) + }, + _ => { + Some(self.take_slice_or_kill(n).map(Instruction::PushBytes).ok_or(Error::EarlyEndOfScript)) } } - let ret = Some(Ok(Instruction::PushBytes(&self.data.as_slice()[..n]))); - self.data.nth(n.max(1) - 1); - ret } opcodes::Class::Ordinary(opcodes::Ordinary::OP_PUSHDATA1) => { self.next_push_data_len(1, 76) From 0ec6d96a7beece696701d0c146adfa1ad03ddd9a Mon Sep 17 00:00:00 2001 From: Martin Habovstiak Date: Wed, 9 Feb 2022 18:37:34 +0100 Subject: [PATCH 3/5] Cleanup after `Instructions` refactoring * Changes `Option` to `Result` to avoid repeated `.ok_or(...)` * Renames `max` to `min_push_len` * Removes useless variable --- src/blockdata/script.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/blockdata/script.rs b/src/blockdata/script.rs index d45faa1e..d17df327 100644 --- a/src/blockdata/script.rs +++ b/src/blockdata/script.rs @@ -743,18 +743,18 @@ impl<'a> Instructions<'a> { /// takes `len` bytes long slice from iterator and returns it advancing iterator /// if the iterator is not long enough `None` is returned and the iterator is killed /// to avoid returning an infinite stream of errors. - fn take_slice_or_kill(&mut self, len: usize) -> Option<&'a [u8]> { + fn take_slice_or_kill(&mut self, len: usize) -> Result<&'a [u8], Error> { if self.data.len() >= len { let slice = &self.data.as_slice()[..len]; self.data.nth(len.max(1) - 1); - Some(slice) + Ok(slice) } else { self.kill(); - None + Err(Error::EarlyEndOfScript) } } - fn next_push_data_len(&mut self, len: usize, max: usize) -> Option, Error>> { + fn next_push_data_len(&mut self, len: usize, min_push_len: usize) -> Option, Error>> { let n = match read_uint_iter(&mut self.data, len) { Ok(n) => n, // We do exhaustive matching to not forget to handle new variants if we extend @@ -766,11 +766,11 @@ impl<'a> Instructions<'a> { return Some(Err(Error::EarlyEndOfScript)); }, }; - if self.enforce_minimal && n < max { + if self.enforce_minimal && n < min_push_len { self.kill(); return Some(Err(Error::NonMinimalPush)); } - Some(self.take_slice_or_kill(n).map(Instruction::PushBytes).ok_or(Error::EarlyEndOfScript)) + Some(self.take_slice_or_kill(n).map(Instruction::PushBytes)) } } @@ -801,7 +801,7 @@ impl<'a> Iterator for Instructions<'a> { Some(Ok(Instruction::PushBytes(&[]))) }, _ => { - Some(self.take_slice_or_kill(n).map(Instruction::PushBytes).ok_or(Error::EarlyEndOfScript)) + Some(self.take_slice_or_kill(n).map(Instruction::PushBytes)) } } } @@ -816,8 +816,7 @@ impl<'a> Iterator for Instructions<'a> { } // Everything else we can push right through _ => { - let ret = Some(Ok(Instruction::Op(opcodes::All::from(byte)))); - ret + Some(Ok(Instruction::Op(opcodes::All::from(byte)))) } } } From e6ff754b73a2bd6f6c2755f1a2bb9e477bc4d63d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Habov=C5=A1tiak?= Date: Mon, 14 Mar 2022 21:17:38 +0100 Subject: [PATCH 4/5] Fix doc of take_slice_or_kill Co-authored-by: Dr. Maxim Orlovsky --- src/blockdata/script.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/blockdata/script.rs b/src/blockdata/script.rs index d17df327..39f00312 100644 --- a/src/blockdata/script.rs +++ b/src/blockdata/script.rs @@ -741,7 +741,7 @@ impl<'a> Instructions<'a> { } /// takes `len` bytes long slice from iterator and returns it advancing iterator - /// if the iterator is not long enough `None` is returned and the iterator is killed + /// if the iterator is not long enough [`Error::EarlyEndOfScript`] is returned and the iterator is killed /// to avoid returning an infinite stream of errors. fn take_slice_or_kill(&mut self, len: usize) -> Result<&'a [u8], Error> { if self.data.len() >= len { From 2c28d3b44802d42db2e940c8e9e4c6eaaeb19c81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Habov=C5=A1tiak?= Date: Thu, 21 Apr 2022 19:14:30 +0200 Subject: [PATCH 5/5] Fix handling of empty slice in Instructions The code would've taken one element when an empty slice was requested. Co-authored-by: Tobin C. Harding --- src/blockdata/script.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/blockdata/script.rs b/src/blockdata/script.rs index 39f00312..524e48c4 100644 --- a/src/blockdata/script.rs +++ b/src/blockdata/script.rs @@ -746,7 +746,10 @@ impl<'a> Instructions<'a> { fn take_slice_or_kill(&mut self, len: usize) -> Result<&'a [u8], Error> { if self.data.len() >= len { let slice = &self.data.as_slice()[..len]; - self.data.nth(len.max(1) - 1); + if len > 0 { + self.data.nth(len - 1); + } + Ok(slice) } else { self.kill();