Refactor Script::bytes_to_asm_fmt to use iterator
This refactors `Script::bytes_to_asm_fmt`` function to use an iterator instead of index. Such change makes it easier to reason about overflows or out-of-bounds accesses. As a result this also fixes three unlikely overflows and happens to improve formatting to not output space at the beginning in some weird cases. To improve robustness even better it also moves `read_uint` implementation to internal function which returns a more specific error type which can be exhaustively matched on to guarantee correct error handling. Probably because of lack of this the code was previously checking the same condition twice, the second time being unreachable and attempting to behave differently than the first one. Finally this uses macro to deduplicate code which differs only in single number, ensuring the code stays in sync across all branches.
This commit is contained in:
parent
23ccc58d7b
commit
49bd3af449
|
@ -154,6 +154,22 @@ impl fmt::Display for Error {
|
||||||
#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
|
#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
|
||||||
impl ::std::error::Error for Error {}
|
impl ::std::error::Error for Error {}
|
||||||
|
|
||||||
|
// Our internal error proves that we only return these two cases from `read_uint_iter`.
|
||||||
|
// Since it's private we don't bother with trait impls besides From.
|
||||||
|
enum UintError {
|
||||||
|
EarlyEndOfScript,
|
||||||
|
NumericOverflow,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<UintError> for Error {
|
||||||
|
fn from(error: UintError) -> Self {
|
||||||
|
match error {
|
||||||
|
UintError::EarlyEndOfScript => Error::EarlyEndOfScript,
|
||||||
|
UintError::NumericOverflow => Error::NumericOverflow,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(feature="bitcoinconsensus")]
|
#[cfg(feature="bitcoinconsensus")]
|
||||||
#[doc(hidden)]
|
#[doc(hidden)]
|
||||||
impl From<bitcoinconsensus::Error> for Error {
|
impl From<bitcoinconsensus::Error> for Error {
|
||||||
|
@ -227,13 +243,38 @@ pub fn read_scriptbool(v: &[u8]) -> bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Read a script-encoded unsigned integer
|
/// Read a script-encoded unsigned integer
|
||||||
|
///
|
||||||
|
/// ## Errors
|
||||||
|
///
|
||||||
|
/// This function returns an error in these cases:
|
||||||
|
///
|
||||||
|
/// * `data` is shorter than `size` => `EarlyEndOfScript`
|
||||||
|
/// * `size` is greater than `u16::max_value / 8` (8191) => `NumericOverflow`
|
||||||
|
/// * The number being read overflows `usize` => `NumericOverflow`
|
||||||
|
///
|
||||||
|
/// Note that this does **not** return an error for `size` between `core::size_of::<usize>()`
|
||||||
|
/// and `u16::max_value / 8` if there's no overflow.
|
||||||
pub fn read_uint(data: &[u8], size: usize) -> Result<usize, Error> {
|
pub fn read_uint(data: &[u8], size: usize) -> Result<usize, Error> {
|
||||||
|
read_uint_iter(&mut data.iter(), size).map_err(Into::into)
|
||||||
|
}
|
||||||
|
|
||||||
|
// We internally use implementation based on iterator so that it automatically advances as needed
|
||||||
|
// Errors are same as above, just different type.
|
||||||
|
fn read_uint_iter(data: &mut ::core::slice::Iter<'_, u8>, size: usize) -> Result<usize, UintError> {
|
||||||
if data.len() < size {
|
if data.len() < size {
|
||||||
Err(Error::EarlyEndOfScript)
|
Err(UintError::EarlyEndOfScript)
|
||||||
|
} else if size > usize::from(u16::max_value() / 8) {
|
||||||
|
// Casting to u32 would overflow
|
||||||
|
Err(UintError::NumericOverflow)
|
||||||
} else {
|
} else {
|
||||||
let mut ret = 0;
|
let mut ret = 0;
|
||||||
for (i, item) in data.iter().take(size).enumerate() {
|
for (i, item) in data.take(size).enumerate() {
|
||||||
ret += (*item as usize) << (i * 8);
|
ret = usize::from(*item)
|
||||||
|
// Casting is safe because we checked above to not repeat the same check in a loop
|
||||||
|
.checked_shl((i * 8) as u32)
|
||||||
|
.ok_or(UintError::NumericOverflow)?
|
||||||
|
.checked_add(ret)
|
||||||
|
.ok_or(UintError::NumericOverflow)?;
|
||||||
}
|
}
|
||||||
Ok(ret)
|
Ok(ret)
|
||||||
}
|
}
|
||||||
|
@ -477,50 +518,62 @@ impl Script {
|
||||||
|
|
||||||
/// Write the assembly decoding of the script bytes to the formatter.
|
/// Write the assembly decoding of the script bytes to the formatter.
|
||||||
pub fn bytes_to_asm_fmt(script: &[u8], f: &mut dyn fmt::Write) -> fmt::Result {
|
pub fn bytes_to_asm_fmt(script: &[u8], f: &mut dyn fmt::Write) -> fmt::Result {
|
||||||
let mut index = 0;
|
// This has to be a macro because it needs to break the loop
|
||||||
while index < script.len() {
|
macro_rules! read_push_data_len {
|
||||||
let opcode = opcodes::All::from(script[index]);
|
($iter:expr, $len:expr, $formatter:expr) => {
|
||||||
index += 1;
|
match read_uint_iter($iter, $len) {
|
||||||
|
Ok(n) => {
|
||||||
|
n
|
||||||
|
},
|
||||||
|
Err(UintError::EarlyEndOfScript) => {
|
||||||
|
$formatter.write_str("<unexpected end>")?;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// We got the data in a slice which implies it being shorter than `usize::max_value()`
|
||||||
|
// So if we got overflow, we can confidently say the number is higher than length of
|
||||||
|
// the slice even though we don't know the exact number. This implies attempt to push
|
||||||
|
// past end.
|
||||||
|
Err(UintError::NumericOverflow) => {
|
||||||
|
$formatter.write_str("<push past end>")?;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut iter = script.iter();
|
||||||
|
// Was at least one opcode emitted?
|
||||||
|
let mut at_least_one = false;
|
||||||
|
// `iter` needs to be borrowed in `read_push_data_len`, so we have to use `while let` instead
|
||||||
|
// of `for`.
|
||||||
|
while let Some(byte) = iter.next() {
|
||||||
|
let opcode = opcodes::All::from(*byte);
|
||||||
|
|
||||||
let data_len = if let opcodes::Class::PushBytes(n) = opcode.classify(opcodes::ClassifyContext::Legacy) {
|
let data_len = if let opcodes::Class::PushBytes(n) = opcode.classify(opcodes::ClassifyContext::Legacy) {
|
||||||
n as usize
|
n as usize
|
||||||
} else {
|
} else {
|
||||||
match opcode {
|
match opcode {
|
||||||
opcodes::all::OP_PUSHDATA1 => {
|
opcodes::all::OP_PUSHDATA1 => {
|
||||||
if script.len() < index + 1 {
|
// side effects: may write and break from the loop
|
||||||
f.write_str("<unexpected end>")?;
|
read_push_data_len!(&mut iter, 1, f)
|
||||||
break;
|
|
||||||
}
|
|
||||||
match read_uint(&script[index..], 1) {
|
|
||||||
Ok(n) => { index += 1; n as usize }
|
|
||||||
Err(_) => { f.write_str("<bad length>")?; break; }
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
opcodes::all::OP_PUSHDATA2 => {
|
opcodes::all::OP_PUSHDATA2 => {
|
||||||
if script.len() < index + 2 {
|
// side effects: may write and break from the loop
|
||||||
f.write_str("<unexpected end>")?;
|
read_push_data_len!(&mut iter, 2, f)
|
||||||
break;
|
|
||||||
}
|
|
||||||
match read_uint(&script[index..], 2) {
|
|
||||||
Ok(n) => { index += 2; n as usize }
|
|
||||||
Err(_) => { f.write_str("<bad length>")?; break; }
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
opcodes::all::OP_PUSHDATA4 => {
|
opcodes::all::OP_PUSHDATA4 => {
|
||||||
if script.len() < index + 4 {
|
// side effects: may write and break from the loop
|
||||||
f.write_str("<unexpected end>")?;
|
read_push_data_len!(&mut iter, 4, f)
|
||||||
break;
|
|
||||||
}
|
|
||||||
match read_uint(&script[index..], 4) {
|
|
||||||
Ok(n) => { index += 4; n as usize }
|
|
||||||
Err(_) => { f.write_str("<bad length>")?; break; }
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
_ => 0
|
_ => 0
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if index > 1 { f.write_str(" ")?; }
|
if at_least_one {
|
||||||
|
f.write_str(" ")?;
|
||||||
|
} else {
|
||||||
|
at_least_one = true;
|
||||||
|
}
|
||||||
// Write the opcode
|
// Write the opcode
|
||||||
if opcode == opcodes::all::OP_PUSHBYTES_0 {
|
if opcode == opcodes::all::OP_PUSHBYTES_0 {
|
||||||
f.write_str("OP_0")?;
|
f.write_str("OP_0")?;
|
||||||
|
@ -530,17 +583,13 @@ impl Script {
|
||||||
// Write any pushdata
|
// Write any pushdata
|
||||||
if data_len > 0 {
|
if data_len > 0 {
|
||||||
f.write_str(" ")?;
|
f.write_str(" ")?;
|
||||||
match index.checked_add(data_len) {
|
if data_len <= iter.len() {
|
||||||
Some(end) if end <= script.len() => {
|
for ch in iter.by_ref().take(data_len) {
|
||||||
for ch in &script[index..end] {
|
|
||||||
write!(f, "{:02x}", ch)?;
|
write!(f, "{:02x}", ch)?;
|
||||||
}
|
}
|
||||||
index = end;
|
} else {
|
||||||
},
|
|
||||||
_ => {
|
|
||||||
f.write_str("<push past end>")?;
|
f.write_str("<push past end>")?;
|
||||||
break;
|
break;
|
||||||
},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1167,13 +1216,13 @@ mod test {
|
||||||
assert_eq!(hex_script!("4c").asm(),
|
assert_eq!(hex_script!("4c").asm(),
|
||||||
"<unexpected end>");
|
"<unexpected end>");
|
||||||
assert_eq!(hex_script!("4c0201").asm(),
|
assert_eq!(hex_script!("4c0201").asm(),
|
||||||
" OP_PUSHDATA1 <push past end>");
|
"OP_PUSHDATA1 <push past end>");
|
||||||
assert_eq!(hex_script!("4d").asm(),
|
assert_eq!(hex_script!("4d").asm(),
|
||||||
"<unexpected end>");
|
"<unexpected end>");
|
||||||
assert_eq!(hex_script!("4dffff01").asm(),
|
assert_eq!(hex_script!("4dffff01").asm(),
|
||||||
" OP_PUSHDATA2 <push past end>");
|
"OP_PUSHDATA2 <push past end>");
|
||||||
assert_eq!(hex_script!("4effffffff01").asm(),
|
assert_eq!(hex_script!("4effffffff01").asm(),
|
||||||
" OP_PUSHDATA4 <push past end>");
|
"OP_PUSHDATA4 <push past end>");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
Loading…
Reference in New Issue