From 6b90e42e78b18e8dda24983d4f6b764d717a4978 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Thu, 8 May 2025 15:12:05 +1000 Subject: [PATCH 1/2] Finalize the script hex APIs Recently we made an attempt at making the hex APIs for scripts easier to use, better documented, and shown via an example. After that work we decided it would be better if `LowerHex`/`UpperHex` did not have the prefix. We also wanted to further clarify the inherent function names to make the all explicit. See GitHub issue #4316 for the thread of discussion. Note that this PR does not require changes to the serde regression test which were non changed in the original work either. --- bitcoin/examples/script.rs | 41 +++++++++++++----------- bitcoin/src/blockdata/script/borrowed.rs | 13 ++++++-- bitcoin/src/blockdata/script/owned.rs | 10 +++++- primitives/src/script/mod.rs | 25 ++++++--------- 4 files changed, 51 insertions(+), 38 deletions(-) diff --git a/bitcoin/examples/script.rs b/bitcoin/examples/script.rs index ee8be6ee0..930b4899e 100644 --- a/bitcoin/examples/script.rs +++ b/bitcoin/examples/script.rs @@ -15,10 +15,16 @@ use bitcoin::ScriptBuf; fn main() { let pk = "b472a266d0bd89c13706a4132ccfb16f7c3b9fcb".parse::().unwrap(); - // TL;DR Use `to_hex_string` and `from_hex`. + // TL;DR Use `to_hex_string_prefixed` and `from_hex_prefixed`. let script_code = script::p2wpkh_script_code(pk); - let hex = script_code.to_hex_string(); - let decoded = ScriptBuf::from_hex(&hex).unwrap(); + let hex = script_code.to_hex_string_prefixed(); + let decoded = ScriptBuf::from_hex_prefixed(&hex).unwrap(); + assert_eq!(decoded, script_code); + + // Or if you prefer: `to_hex_string_no_length_prefix` and `from_hex_no_length_prefix`. + let script_code = script::p2wpkh_script_code(pk); + let hex = script_code.to_hex_string_no_length_prefix(); + let decoded = ScriptBuf::from_hex_no_length_prefix(&hex).unwrap(); assert_eq!(decoded, script_code); // Writes the script as human-readable eg, OP_DUP OP_HASH160 OP_PUSHBYTES_20 ... @@ -27,28 +33,25 @@ fn main() { // We do not implement parsing scripts from human-readable format. // let decoded = s.parse::().unwrap(); - // This is equivalent to consensus encoding i.e., includes the length prefix. + // This is not equivalent to consensus encoding i.e., does not include the length prefix. let hex_lower_hex_trait = format!("{script_code:x}"); println!("hex created using `LowerHex`: {hex_lower_hex_trait}"); // The `deserialize_hex` function requires the length prefix. - assert_eq!(encode::deserialize_hex::(&hex_lower_hex_trait).unwrap(), script_code); - // And so does `from_hex`. - assert_eq!(ScriptBuf::from_hex(&hex_lower_hex_trait).unwrap(), script_code); - - // And we also provide an explicit constructor that does not use the length prefix. - let other = ScriptBuf::from_hex_no_length_prefix(&hex_lower_hex_trait).unwrap(); - // Without a prefix the script parses but its not the one we meant. - assert_ne!(other, script_code); + assert!(encode::deserialize_hex::(&hex_lower_hex_trait).is_err()); + // And so does `from_hex_prefixed`. + assert!(ScriptBuf::from_hex_prefixed(&hex_lower_hex_trait).is_err()); + // But we provide an explicit constructor that does not. + assert_eq!(ScriptBuf::from_hex_no_length_prefix(&hex_lower_hex_trait).unwrap(), script_code); // This is consensus encoding i.e., includes the length prefix. - let hex_inherent = script_code.to_hex_string(); // Defined in `ScriptExt`. - println!("hex created using inherent `to_hex_string`: {hex_inherent}"); + let hex_inherent = script_code.to_hex_string_prefixed(); // Defined in `ScriptExt`. + println!("hex created using inherent `to_hex_string_prefixed`: {hex_inherent}"); - // The inverse of `to_hex_string` is `from_hex`. - let decoded = ScriptBuf::from_hex(&hex_inherent).unwrap(); // Defined in `ScriptBufExt`. + // The inverse of `to_hex_string_prefixed` is `from_hex_string_prefixed`. + let decoded = ScriptBuf::from_hex_prefixed(&hex_inherent).unwrap(); // Defined in `ScriptBufExt`. assert_eq!(decoded, script_code); - // We can also parse the output of `to_hex_string` using `deserialize_hex`. + // We can also parse the output of `to_hex_string_prefixed` using `deserialize_hex`. let decoded = encode::deserialize_hex::(&hex_inherent).unwrap(); assert_eq!(decoded, script_code); @@ -58,8 +61,10 @@ fn main() { let decoded: ScriptBuf = encode::deserialize_hex(&encoded).unwrap(); assert_eq!(decoded, script_code); + // And we can mix these to calls because both include the length prefix. - let decoded = ScriptBuf::from_hex(&encoded).unwrap(); + let encoded = encode::serialize_hex(&script_code); + let decoded = ScriptBuf::from_hex_prefixed(&encoded).unwrap(); assert_eq!(decoded, script_code); // Encode/decode using a byte vector. diff --git a/bitcoin/src/blockdata/script/borrowed.rs b/bitcoin/src/blockdata/script/borrowed.rs index cb8ded5af..f969c7c4c 100644 --- a/bitcoin/src/blockdata/script/borrowed.rs +++ b/bitcoin/src/blockdata/script/borrowed.rs @@ -376,12 +376,19 @@ crate::internal_macros::define_extension_trait! { fn to_asm_string(&self) -> String { self.to_string() } /// Consensus encodes the script as lower-case hex. - fn to_hex_string(&self) -> String { consensus::encode::serialize_hex(self) } + #[deprecated(since = "TBD", note = "use `to_hex_string_prefixed()` instead")] + fn to_hex_string(&self) -> String { self.to_hex_string_prefixed() } + + /// Consensus encodes the script as lower-case hex. + fn to_hex_string_prefixed(&self) -> String { consensus::encode::serialize_hex(self) } /// Consensus encodes the script as lower-case hex. /// - /// This is **not** consensus encoding, you likely want to use `to_hex_string`. The returned - /// hex string will not include the length prefix. + /// This is **not** consensus encoding, you likely want to use `to_hex_string_prefixed`. + /// + /// # Returns + /// + /// The returned hex string will not include the length prefix. fn to_hex_string_no_length_prefix(&self) -> String { self.as_bytes().to_lower_hex_string() } diff --git a/bitcoin/src/blockdata/script/owned.rs b/bitcoin/src/blockdata/script/owned.rs index 49357d32e..2b2bc818d 100644 --- a/bitcoin/src/blockdata/script/owned.rs +++ b/bitcoin/src/blockdata/script/owned.rs @@ -30,10 +30,18 @@ crate::internal_macros::define_extension_trait! { /// Constructs a new [`ScriptBuf`] from a hex string. /// /// The input string is expected to be consensus encoded i.e., includes the length prefix. - fn from_hex(s: &str) -> Result { + fn from_hex_prefixed(s: &str) -> Result { consensus::encode::deserialize_hex(s) } + /// Constructs a new [`ScriptBuf`] from a hex string. + /// + /// The input string is expected to be consensus encoded i.e., includes the length prefix. + #[deprecated(since = "TBD", note = "use `from_hex_string_prefixed()` instead")] + fn from_hex(s: &str) -> Result { + Self::from_hex_prefixed(s) + } + /// Constructs a new [`ScriptBuf`] from a hex string. /// /// This is **not** consensus encoding. If your hex string is a consensus encode script then diff --git a/primitives/src/script/mod.rs b/primitives/src/script/mod.rs index 473b85cb1..012b4f444 100644 --- a/primitives/src/script/mod.rs +++ b/primitives/src/script/mod.rs @@ -431,12 +431,9 @@ impl fmt::Display for ScriptBuf { #[cfg(feature = "hex")] impl fmt::LowerHex for Script { - // Currently we drop all formatter options. #[inline] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let compact = internals::compact_size::encode(self.as_bytes().len()); - write!(f, "{:x}", compact.as_slice().as_hex())?; - write!(f, "{:x}", self.as_bytes().as_hex()) + fmt::LowerHex::fmt(&self.as_bytes().as_hex(), f) } } #[cfg(feature = "alloc")] @@ -454,12 +451,9 @@ internals::impl_to_hex_from_lower_hex!(ScriptBuf, |script_buf: &Self| script_buf #[cfg(feature = "hex")] impl fmt::UpperHex for Script { - // Currently we drop all formatter options. #[inline] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let compact = internals::compact_size::encode(self.as_bytes().len()); - write!(f, "{:X}", compact.as_slice().as_hex())?; - write!(f, "{:X}", self.as_bytes().as_hex()) + fmt::UpperHex::fmt(&self.as_bytes().as_hex(), f) } } @@ -511,8 +505,7 @@ impl serde::Serialize for Script { S: serde::Serializer, { if serializer.is_human_readable() { - // Do not call LowerHex because we don't want to add the len prefix. - serializer.collect_str(&format_args!("{}", self.as_bytes().as_hex())) + serializer.collect_str(&format_args!("{:x}", self)) } else { serializer.serialize_bytes(self.as_bytes()) } @@ -803,8 +796,8 @@ mod tests { #[cfg(feature = "hex")] { - assert_eq!(format!("{:x}", script), "0300a1b2"); - assert_eq!(format!("{:X}", script), "0300A1B2"); + assert_eq!(format!("{:x}", script), "00a1b2"); + assert_eq!(format!("{:X}", script), "00A1B2"); } assert!(!format!("{:?}", script).is_empty()); } @@ -816,8 +809,8 @@ mod tests { #[cfg(feature = "hex")] { - assert_eq!(format!("{:x}", script_buf), "0300a1b2"); - assert_eq!(format!("{:X}", script_buf), "0300A1B2"); + assert_eq!(format!("{:x}", script_buf), "00a1b2"); + assert_eq!(format!("{:X}", script_buf), "00A1B2"); } assert!(!format!("{:?}", script_buf).is_empty()); } @@ -935,7 +928,7 @@ mod tests { fn script_to_hex() { let script = Script::from_bytes(&[0xa1, 0xb2, 0xc3]); let hex = script.to_hex(); - assert_eq!(hex, "03a1b2c3"); + assert_eq!(hex, "a1b2c3"); } #[test] @@ -944,6 +937,6 @@ mod tests { fn scriptbuf_to_hex() { let script = ScriptBuf::from_bytes(vec![0xa1, 0xb2, 0xc3]); let hex = script.to_hex(); - assert_eq!(hex, "03a1b2c3"); + assert_eq!(hex, "a1b2c3"); } } From 3b8164139f6ecebc97b66a299b4a87c2288d8a76 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Thu, 8 May 2025 15:25:52 +1000 Subject: [PATCH 2/2] primitives: Add docs section for script hex API In an effort to bring developer attention to the myriad of APIs for parsing and formatting scripts as hex add a section to the rustodcs of `Script` and `ScriptBuf` (same text for both). --- primitives/src/script/borrowed.rs | 8 ++++++++ primitives/src/script/owned.rs | 9 +++++++++ 2 files changed, 17 insertions(+) diff --git a/primitives/src/script/borrowed.rs b/primitives/src/script/borrowed.rs index 59e4585fa..df4e2a3d1 100644 --- a/primitives/src/script/borrowed.rs +++ b/primitives/src/script/borrowed.rs @@ -49,6 +49,14 @@ internals::transparent_newtype! { /// The type is `#[repr(transparent)]` for internal purposes only! /// No consumer crate may rely on the representation of the struct! /// + /// # Hexadecimal strings + /// + /// Scripts are consensus encoded with a length prefix and as a result of this in some places in + /// the eccosystem one will encounter hex strings that include the prefix while in other places + /// the prefix is excluded. To support parsing and formatting scripts as hex we provide a bunch + /// of different APIs and trait implementations. Please see [`examples/script.rs`] for a + /// thorough example of all the APIs. + /// /// # Bitcoin Core References /// /// * [CScript definition](https://github.com/bitcoin/bitcoin/blob/d492dc1cdaabdc52b0766bf4cba4bd73178325d0/src/script/script.h#L410) diff --git a/primitives/src/script/owned.rs b/primitives/src/script/owned.rs index 1732845e0..b76d841d4 100644 --- a/primitives/src/script/owned.rs +++ b/primitives/src/script/owned.rs @@ -16,6 +16,15 @@ use crate::prelude::{Box, Vec}; /// Just as other similar types, this implements [`Deref`], so [deref coercions] apply. Also note /// that all the safety/validity restrictions that apply to [`Script`] apply to `ScriptBuf` as well. /// +/// # Hexadecimal strings +/// +/// Scripts are consensus encoded with a length prefix and as a result of this in some places in the +/// eccosystem one will encounter hex strings that include the prefix while in other places the +/// prefix is excluded. To support parsing and formatting scripts as hex we provide a bunch of +/// different APIs and trait implementations. Please see [`examples/script.rs`] for a thorough +/// example of all the APIs. +/// +/// [`examples/script.rs`]: /// [deref coercions]: https://doc.rust-lang.org/std/ops/trait.Deref.html#more-on-deref-coercion #[derive(Default, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)] pub struct ScriptBuf(Vec);