From 98e39b438300a4fdf985bac3ad5a7072d2e0005e Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Fri, 24 Aug 2018 19:39:22 +0000 Subject: [PATCH 1/4] transaction: make 0-input de/serialization always use Segwit --- src/blockdata/transaction.rs | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/src/blockdata/transaction.rs b/src/blockdata/transaction.rs index 849b54ed..a0b28d29 100644 --- a/src/blockdata/transaction.rs +++ b/src/blockdata/transaction.rs @@ -128,7 +128,18 @@ impl Default for TxOut { } } -/// A Bitcoin transaction, which describes an authenticated movement of coins +/// A Bitcoin transaction, which describes an authenticated movement of coins. +/// +/// If any inputs have nonempty witnesses, the entire transaction is serialized +/// in the post-BIP141 Segwit format which includes a list of witnesses. If all +/// inputs have empty witnesses, the transaction is serialized in the pre-BIP141 +/// format. +/// +/// There is one major exception to this: to avoid deserialization ambiguity, +/// if the transaction has no inputs, it is serialized in the BIP141 style. Be +/// aware that this differs from the transaction format in PSBT, which _never_ +/// uses BIP141. (Ordinarily there is no conflict, since in PSBT transactions +/// are always unsigned and therefore their inputs have empty witnesses.) #[derive(Clone, PartialEq, Eq, Debug, Hash)] pub struct Transaction { /// The protocol version, should always be 1. @@ -357,7 +368,7 @@ impl Decodable for TxIn { impl Encodable for Transaction { fn consensus_encode(&self, s: &mut S) -> Result <(), encode::Error> { self.version.consensus_encode(s)?; - let mut have_witness = false; + let mut have_witness = self.input.is_empty(); for input in &self.input { if !input.witness.is_empty() { have_witness = true; @@ -495,7 +506,6 @@ mod tests { use super::{Transaction, TxIn}; use blockdata::script::Script; - #[cfg(all(feature = "serde", feature = "strason"))] use consensus::encode::serialize; use consensus::encode::deserialize; use util::hash::{BitcoinHash, Sha256dHash}; @@ -542,6 +552,20 @@ mod tests { assert_eq!(realtx.get_weight(), 193*4); } + #[test] + fn tx_no_input_deserialization() { + let hex_tx = hex_bytes( + "010000000001000100e1f505000000001976a9140389035a9225b3839e2bbf32d826a1e222031fd888ac00000000" + ).unwrap(); + let tx: Transaction = deserialize(&hex_tx).expect("deserialize tx"); + + assert_eq!(tx.input.len(), 0); + assert_eq!(tx.output.len(), 1); + + let reser = serialize(&tx); + assert_eq!(hex_tx, reser); + } + #[test] fn test_ntxid() { let hex_tx = hex_bytes("0100000001a15d57094aa7a21a28cb20b59aab8fc7d1149a3bdbcddba9c622e4f5f6a99ece010000006c493046022100f93bb0e7d8db7bd46e40132d1f8242026e045f03a0efe71bbb8e3f475e970d790221009337cd7f1f929f00cc6ff01f03729b069a7c21b59b1736ddfee5db5946c5da8c0121033b9b137ee87d5a812d6f506efdd37f0affa7ffc310711c06c7f3e097c9447c52ffffffff0100e1f505000000001976a9140389035a9225b3839e2bbf32d826a1e222031fd888ac00000000").unwrap(); From a181a523c6e24a3ebc738e0b9afd78bb5fe9af03 Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Mon, 1 Oct 2018 20:38:05 +0000 Subject: [PATCH 2/4] remove special case for 0-input 0-output transaction deserialization This creates two ways to encode an empty transaction; we should use only the Segwit-enabled one because that's what we do for 0-input non-0-output transactions. --- src/blockdata/transaction.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/blockdata/transaction.rs b/src/blockdata/transaction.rs index a0b28d29..4df2b3bd 100644 --- a/src/blockdata/transaction.rs +++ b/src/blockdata/transaction.rs @@ -399,15 +399,6 @@ impl Decodable for Transaction { if input.is_empty() { let segwit_flag: u8 = Decodable::consensus_decode(d)?; match segwit_flag { - // Empty tx - 0 => { - Ok(Transaction { - version: version, - input: input, - output: vec![], - lock_time: Decodable::consensus_decode(d)?, - }) - } // BIP144 input witnesses 1 => { let mut input: Vec = Decodable::consensus_decode(d)?; From 7f7013db9c481f2b94185908a40bc7816d11a9e8 Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Mon, 1 Oct 2018 20:38:50 +0000 Subject: [PATCH 3/4] fuzz: check that transaction deserialization roundtrips --- fuzz/fuzz_targets/deserialize_transaction.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fuzz/fuzz_targets/deserialize_transaction.rs b/fuzz/fuzz_targets/deserialize_transaction.rs index a3b251d6..7f4a5d04 100644 --- a/fuzz/fuzz_targets/deserialize_transaction.rs +++ b/fuzz/fuzz_targets/deserialize_transaction.rs @@ -5,13 +5,22 @@ fn do_test(data: &[u8]) { match tx_result { Err(_) => {}, Ok(mut tx) => { - let len = bitcoin::consensus::encode::serialize(&tx).len() as u64; + let ser = bitcoin::consensus::encode::serialize(&tx); + assert_eq!(&ser[..], data); + let len = ser.len() as u64; let calculated_weight = tx.get_weight(); for input in &mut tx.input { input.witness = vec![]; } let no_witness_len = bitcoin::consensus::encode::serialize(&tx).len() as u64; - assert_eq!(no_witness_len * 3 + len, calculated_weight); + // For 0-input transactions, `no_witness_len` will be incorrect because + // we serialize as segwit even after "stripping the witnesses". We need + // to drop two bytes (i.e. eight weight) + if tx.input.is_empty() { + assert_eq!(no_witness_len * 3 + len - 8, calculated_weight); + } else { + assert_eq!(no_witness_len * 3 + len, calculated_weight); + } }, } } @@ -58,7 +67,7 @@ mod tests { #[test] fn duplicate_crash() { let mut a = Vec::new(); - extend_vec_from_hex("00", &mut a); + extend_vec_from_hex("000700000001000000010000", &mut a); super::do_test(&a); } } From 11a27832350d49e923925fda93948742cc173029 Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Wed, 10 Oct 2018 02:45:09 +0000 Subject: [PATCH 4/4] add comment expanding on the segwit ambiguity --- src/blockdata/transaction.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/blockdata/transaction.rs b/src/blockdata/transaction.rs index 4df2b3bd..c90d7a1c 100644 --- a/src/blockdata/transaction.rs +++ b/src/blockdata/transaction.rs @@ -140,6 +140,24 @@ impl Default for TxOut { /// aware that this differs from the transaction format in PSBT, which _never_ /// uses BIP141. (Ordinarily there is no conflict, since in PSBT transactions /// are always unsigned and therefore their inputs have empty witnesses.) +/// +/// The specific ambiguity is that Segwit uses the flag bytes `0001` where an old +/// serializer would read the number of transaction inputs. The old serializer +/// would interpret this as "no inputs, one output", which means the transaction +/// is invalid, and simply reject it. Segwit further specifies that this encoding +/// should *only* be used when some input has a nonempty witness; that is, +/// witness-less transactions should be encoded in the traditional format. +/// +/// However, in protocols where transactions may legitimately have 0 inputs, e.g. +/// when parties are cooperatively funding a transaction, the "00 means Segwit" +/// heuristic does not work. Since Segwit requires such a transaction be encoded +/// in the the original transaction format (since it has no inputs and therefore +/// no input witnesses), a traditionally encoded transaction may have the `0001` +/// Segwit flag in it, which confuses most Segwit parsers including the one in +/// Bitcoin Core. +/// +/// We therefore deviate from the spec by always using the Segwit witness encoding +/// for 0-input transactions, which results in unambiguously parseable transactions. #[derive(Clone, PartialEq, Eq, Debug, Hash)] pub struct Transaction { /// The protocol version, should always be 1.