Add `consensus_decode_from_finite_reader` optimization

As things are right now, memory exhaustion protection in `Decodable`
is based on checking input-decoded lengths against arbitrary limits,
and ad-hoc wrapping collection deserialization in `Take`.

The problem with that are two-fold:

* Potential consensus bugs due to incorrect limits.
* Performance degradation when decoding nested structured,
  due to recursive `Take<Take<..>>` readers.

This change introduces a systematic approach to the problem.

A concept of a "size-limited-reader" is introduced to rely on
the input data to finish at enforced limit and fail deserialization.

Memory exhaustion protection is now achived by capping allocations
to reasonable values, yet allowing the underlying collections
to grow to accomodate rare yet legitmately oversized data (with tiny
performance cost), and reliance on input data size limit.

A set of simple rules allow avoiding recursive `Take` wrappers.

Fix #997
This commit is contained in:
Dawid Ciężarkiewicz 2022-05-28 20:58:43 -07:00
parent 99ae48ab01
commit 082e185711
5 changed files with 215 additions and 86 deletions

View File

@ -23,6 +23,7 @@
//! This module provides the structures and functions needed to support scripts.
//!
use crate::consensus::encode::MAX_VEC_SIZE;
use crate::prelude::*;
use crate::io;
@ -1075,9 +1076,14 @@ impl Encodable for Script {
}
impl Decodable for Script {
#[inline]
fn consensus_decode_from_finite_reader<D: io::Read>(d: D) -> Result<Self, encode::Error> {
Ok(Script(Decodable::consensus_decode_from_finite_reader(d)?))
}
#[inline]
fn consensus_decode<D: io::Read>(d: D) -> Result<Self, encode::Error> {
Ok(Script(Decodable::consensus_decode(d)?))
Self::consensus_decode_from_finite_reader(d.take(MAX_VEC_SIZE as u64))
}
}

View File

@ -639,14 +639,20 @@ impl Encodable for TxIn {
}
}
impl Decodable for TxIn {
fn consensus_decode<D: io::Read>(mut d: D) -> Result<Self, encode::Error> {
#[inline]
fn consensus_decode_from_finite_reader<D: io::Read>(mut d: D) -> Result<Self, encode::Error> {
Ok(TxIn {
previous_output: Decodable::consensus_decode(&mut d)?,
script_sig: Decodable::consensus_decode(&mut d)?,
sequence: Decodable::consensus_decode(d)?,
previous_output: Decodable::consensus_decode_from_finite_reader(&mut d)?,
script_sig: Decodable::consensus_decode_from_finite_reader(&mut d)?,
sequence: Decodable::consensus_decode_from_finite_reader(d)?,
witness: Witness::default(),
})
}
#[inline]
fn consensus_decode<D: io::Read>(d: D) -> Result<Self, encode::Error> {
Self::consensus_decode_from_finite_reader(d.take(MAX_VEC_SIZE as u64))
}
}
impl Encodable for Transaction {
@ -680,20 +686,19 @@ impl Encodable for Transaction {
}
impl Decodable for Transaction {
fn consensus_decode<D: io::Read>(d: D) -> Result<Self, encode::Error> {
let mut d = d.take(MAX_VEC_SIZE as u64);
let version = i32::consensus_decode(&mut d)?;
let input = Vec::<TxIn>::consensus_decode(&mut d)?;
fn consensus_decode_from_finite_reader<D: io::Read>(mut d: D) -> Result<Self, encode::Error> {
let version = i32::consensus_decode_from_finite_reader(&mut d)?;
let input = Vec::<TxIn>::consensus_decode_from_finite_reader(&mut d)?;
// segwit
if input.is_empty() {
let segwit_flag = u8::consensus_decode(&mut d)?;
let segwit_flag = u8::consensus_decode_from_finite_reader(&mut d)?;
match segwit_flag {
// BIP144 input witnesses
1 => {
let mut input = Vec::<TxIn>::consensus_decode(&mut d)?;
let output = Vec::<TxOut>::consensus_decode(&mut d)?;
let mut input = Vec::<TxIn>::consensus_decode_from_finite_reader(&mut d)?;
let output = Vec::<TxOut>::consensus_decode_from_finite_reader(&mut d)?;
for txin in input.iter_mut() {
txin.witness = Decodable::consensus_decode(&mut d)?;
txin.witness = Decodable::consensus_decode_from_finite_reader(&mut d)?;
}
if !input.is_empty() && input.iter().all(|input| input.witness.is_empty()) {
Err(encode::Error::ParseFailed("witness flag set but no witnesses present"))
@ -702,7 +707,7 @@ impl Decodable for Transaction {
version,
input,
output,
lock_time: Decodable::consensus_decode(d)?,
lock_time: Decodable::consensus_decode_from_finite_reader(d)?,
})
}
}
@ -714,11 +719,15 @@ impl Decodable for Transaction {
Ok(Transaction {
version,
input,
output: Decodable::consensus_decode(&mut d)?,
lock_time: Decodable::consensus_decode(d)?,
output: Decodable::consensus_decode_from_finite_reader(&mut d)?,
lock_time: Decodable::consensus_decode_from_finite_reader(d)?,
})
}
}
fn consensus_decode<D: io::Read>(d: D) -> Result<Self, encode::Error> {
Self::consensus_decode_from_finite_reader(d.take(MAX_VEC_SIZE as u64))
}
}
/// This type is consensus valid but an input including it would prevent the transaction from

View File

@ -167,7 +167,7 @@ pub fn deserialize<T: Decodable>(data: &[u8]) -> Result<T, Error> {
/// doesn't consume the entire vector.
pub fn deserialize_partial<T: Decodable>(data: &[u8]) -> Result<(T, usize), Error> {
let mut decoder = Cursor::new(data);
let rv = Decodable::consensus_decode(&mut decoder)?;
let rv = Decodable::consensus_decode_from_finite_reader(&mut decoder)?;
let consumed = decoder.position() as usize;
Ok((rv, consumed))
@ -319,6 +319,47 @@ pub trait Encodable {
/// Data which can be encoded in a consensus-consistent way
pub trait Decodable: Sized {
/// Decode `Self` from a size-limited reader.
///
/// Like `consensus_decode` but relies on the reader being
/// limited in the amount of data it returns, e.g. by
/// being wrapped in [`std::io::Take`].
///
/// Failling to obide to this requirement might lead to
/// memory exhaustion caused by malicious inputs.
///
/// Users should default to `consensus_decode`, but
/// when data to be decoded is already in a byte vector
/// of a limited size, calling this function directly
/// might be marginally faster (due to avoiding
/// extra checks).
///
/// ### Rules for trait implementations
///
/// * Simple types that that have a fixed size (own and member fields),
/// don't have to overwrite this method, or be concern with it.
/// * Types that deserialize using externally provided length
/// should implement it:
/// * Make `consensus_decode` forward to `consensus_decode_bytes_from_finite_reader`
/// with the reader wrapped by `Take`. Failure to do so, without other
/// forms of memory exhaustion protection might lead to resource exhaustion
/// vulnerability.
/// * Put a max cap on things like `Vec::with_capacity` to avoid oversized
/// allocations, and rely on the reader running out of data, and collections
/// reallocating on a legitimately oversized input data, instead of trying
/// to enforce arbitrary length limits.
/// * Types that contain other types that implement custom `consensus_decode_from_finite_reader`,
/// should also implement it applying same rules, and in addition make sure to call
/// `consensus_decode_from_finite_reader` on all members, to avoid creating redundant
/// `Take` wrappers. Failure to do so might result only in a tiny performance hit.
fn consensus_decode_from_finite_reader<D: io::Read>(d: D) -> Result<Self, Error> {
// This method is always strictly less general than, `consensus_decode`,
// so it's safe and make sense to default to just calling it.
// This way most types, that don't care about protecting against
// resource exhaustion due to malicious input, can just ignore it.
Self::consensus_decode(d)
}
/// Decode an object with a well-defined format
fn consensus_decode<D: io::Read>(d: D) -> Result<Self, Error>;
}
@ -555,23 +596,29 @@ macro_rules! impl_vec {
Ok(len)
}
}
impl Decodable for Vec<$type> {
#[inline]
fn consensus_decode<D: io::Read>(mut d: D) -> Result<Self, Error> {
let len = VarInt::consensus_decode(&mut d)?.0;
let byte_size = (len as usize)
.checked_mul(mem::size_of::<$type>())
.ok_or(self::Error::ParseFailed("Invalid length"))?;
if byte_size > MAX_VEC_SIZE {
return Err(self::Error::OversizedVectorAllocation { requested: byte_size, max: MAX_VEC_SIZE })
}
let mut ret = Vec::with_capacity(len as usize);
let mut d = d.take(MAX_VEC_SIZE as u64);
fn consensus_decode_from_finite_reader<D: io::Read>(mut d: D) -> Result<Self, Error> {
let len = VarInt::consensus_decode_from_finite_reader(&mut d)?.0;
// Do not allocate upfront more items than if the sequnce of type
// occupied roughly quarter a block. This should never be the case
// for normal data, but even if that's not true - `push` will just
// reallocate.
// Note: OOM protection relies on reader eventually running out of
// data to feed us.
let max_capacity = MAX_VEC_SIZE / 4 / mem::size_of::<$type>();
let mut ret = Vec::with_capacity(core::cmp::min(len as usize, max_capacity));
for _ in 0..len {
ret.push(Decodable::consensus_decode(&mut d)?);
ret.push(Decodable::consensus_decode_from_finite_reader(&mut d)?);
}
Ok(ret)
}
#[inline]
fn consensus_decode<D: io::Read>(d: D) -> Result<Self, Error> {
Self::consensus_decode_from_finite_reader(d.take(MAX_VEC_SIZE as u64))
}
}
}
}
@ -597,6 +644,33 @@ pub(crate) fn consensus_encode_with_size<S: io::Write>(data: &[u8], mut s: S) ->
}
struct ReadBytesFromFiniteReaderOpts {
len: usize,
chunk_size: usize,
}
/// Read `opts.len` bytes from reader, where `opts.len` could potentially be malicious
///
/// This function relies on reader being bound in amount of data
/// it returns for OOM protection. See [`Decodable::consensus_decode_from_finite_reader`].
#[inline]
fn read_bytes_from_finite_reader<D: io::Read>(mut d: D, mut opts: ReadBytesFromFiniteReaderOpts) -> Result<Vec<u8>, Error> {
let mut ret = vec![];
assert_ne!(opts.chunk_size, 0);
while opts.len > 0 {
let chunk_start = ret.len();
let chunk_size = core::cmp::min(opts.len, opts.chunk_size);
let chunk_end = chunk_start + chunk_size;
ret.resize(chunk_end, 0u8);
d.read_slice(&mut ret[chunk_start..chunk_end])?;
opts.len -= chunk_size;
}
Ok(ret)
}
impl Encodable for Vec<u8> {
#[inline]
fn consensus_encode<S: io::Write>(&self, s: S) -> Result<usize, io::Error> {
@ -606,14 +680,15 @@ impl Encodable for Vec<u8> {
impl Decodable for Vec<u8> {
#[inline]
fn consensus_decode<D: io::Read>(mut d: D) -> Result<Self, Error> {
fn consensus_decode_from_finite_reader<D: io::Read>(mut d: D) -> Result<Self, Error> {
let len = VarInt::consensus_decode(&mut d)?.0 as usize;
if len > MAX_VEC_SIZE {
return Err(self::Error::OversizedVectorAllocation { requested: len, max: MAX_VEC_SIZE })
}
let mut ret = vec![0u8; len];
d.read_slice(&mut ret)?;
Ok(ret)
// most real-world vec of bytes data, wouldn't be larger than 128KiB
read_bytes_from_finite_reader(d, ReadBytesFromFiniteReaderOpts { len, chunk_size: 128 * 1024 })
}
#[inline]
fn consensus_decode<D: io::Read>(d: D) -> Result<Self, Error> {
Self::consensus_decode_from_finite_reader(d.take(MAX_VEC_SIZE as u64))
}
}
@ -625,9 +700,14 @@ impl Encodable for Box<[u8]> {
}
impl Decodable for Box<[u8]> {
#[inline]
fn consensus_decode_from_finite_reader<D: io::Read>(d: D) -> Result<Self, Error> {
<Vec<u8>>::consensus_decode_from_finite_reader(d).map(From::from)
}
#[inline]
fn consensus_decode<D: io::Read>(d: D) -> Result<Self, Error> {
<Vec<u8>>::consensus_decode(d).map(From::from)
Self::consensus_decode_from_finite_reader(d.take(MAX_VEC_SIZE as u64))
}
}
@ -651,17 +731,11 @@ impl Encodable for CheckedData {
impl Decodable for CheckedData {
#[inline]
fn consensus_decode<D: io::Read>(mut d: D) -> Result<Self, Error> {
let len = u32::consensus_decode(&mut d)?;
if len > MAX_VEC_SIZE as u32 {
return Err(self::Error::OversizedVectorAllocation {
requested: len as usize,
max: MAX_VEC_SIZE
});
}
let checksum = <[u8; 4]>::consensus_decode(&mut d)?;
let mut ret = vec![0u8; len as usize];
d.read_slice(&mut ret)?;
fn consensus_decode_from_finite_reader<D: io::Read>(mut d: D) -> Result<Self, Error> {
let len = u32::consensus_decode_from_finite_reader(&mut d)? as usize;
let checksum = <[u8; 4]>::consensus_decode_from_finite_reader(&mut d)?;
let ret = read_bytes_from_finite_reader(d, ReadBytesFromFiniteReaderOpts { len, chunk_size: MAX_VEC_SIZE })?;
let expected_checksum = sha2_checksum(&ret);
if expected_checksum != checksum {
Err(self::Error::InvalidChecksum {
@ -672,6 +746,10 @@ impl Decodable for CheckedData {
Ok(CheckedData(ret))
}
}
fn consensus_decode<D: io::Read>(d: D) -> Result<Self, Error> {
Self::consensus_decode_from_finite_reader(d.take(MAX_VEC_SIZE as u64))
}
}
// References
@ -1064,5 +1142,21 @@ mod tests {
}
}
#[test]
fn test_read_bytes_from_finite_reader() {
let data : Vec<u8> = (0..10).collect();
for chunk_size in 1..20 {
assert_eq!(
read_bytes_from_finite_reader(
io::Cursor::new(&data),
ReadBytesFromFiniteReaderOpts { len: data.len(), chunk_size }
).unwrap(),
data
);
}
}
}

View File

@ -32,6 +32,16 @@ macro_rules! impl_consensus_encoding {
}
impl $crate::consensus::Decodable for $thing {
#[inline]
fn consensus_decode_from_finite_reader<D: $crate::io::Read>(
mut d: D,
) -> Result<$thing, $crate::consensus::encode::Error> {
Ok($thing {
$($field: $crate::consensus::Decodable::consensus_decode_from_finite_reader(&mut d)?),+
})
}
#[inline]
fn consensus_decode<D: $crate::io::Read>(
d: D,

View File

@ -20,7 +20,7 @@
use crate::prelude::*;
use core::{mem, fmt, iter};
use core::{fmt, iter};
use crate::io;
use crate::blockdata::block;
@ -29,7 +29,7 @@ use crate::network::address::{Address, AddrV2Message};
use crate::network::{message_network, message_bloom};
use crate::network::message_blockdata;
use crate::network::message_filter;
use crate::consensus::encode::{CheckedData, Decodable, Encodable, VarInt, MAX_VEC_SIZE};
use crate::consensus::encode::{CheckedData, Decodable, Encodable, VarInt};
use crate::consensus::{encode, serialize};
use crate::util::merkleblock::MerkleBlock;
@ -38,6 +38,10 @@ use crate::util::merkleblock::MerkleBlock;
/// This limit is not currently enforced by this implementation.
pub const MAX_INV_SIZE: usize = 50_000;
/// Maximum size, in bytes, of an encoded message
/// This by neccessity should be larger tham `MAX_VEC_SIZE`
pub const MAX_MSG_SIZE: usize = 5_000_000;
/// Serializer for command string
#[derive(PartialEq, Eq, Clone, Debug)]
pub struct CommandString(Cow<'static, str>);
@ -341,15 +345,11 @@ struct HeaderDeserializationWrapper(Vec<block::BlockHeader>);
impl Decodable for HeaderDeserializationWrapper {
#[inline]
fn consensus_decode<D: io::Read>(mut d: D) -> Result<Self, encode::Error> {
fn consensus_decode_from_finite_reader<D: io::Read>(mut d: D) -> Result<Self, encode::Error> {
let len = VarInt::consensus_decode(&mut d)?.0;
let byte_size = (len as usize)
.checked_mul(mem::size_of::<block::BlockHeader>())
.ok_or(encode::Error::ParseFailed("Invalid length"))?;
if byte_size > MAX_VEC_SIZE {
return Err(encode::Error::OversizedVectorAllocation { requested: byte_size, max: MAX_VEC_SIZE })
}
let mut ret = Vec::with_capacity(len as usize);
// should be above usual number of items to avoid
// allocation
let mut ret = Vec::with_capacity(core::cmp::min(1024 * 16, len as usize));
for _ in 0..len {
ret.push(Decodable::consensus_decode(&mut d)?);
if u8::consensus_decode(&mut d)? != 0u8 {
@ -358,49 +358,54 @@ impl Decodable for HeaderDeserializationWrapper {
}
Ok(HeaderDeserializationWrapper(ret))
}
#[inline]
fn consensus_decode<D: io::Read>(d: D) -> Result<Self, encode::Error> {
Self::consensus_decode_from_finite_reader(d.take(MAX_MSG_SIZE as u64))
}
}
impl Decodable for RawNetworkMessage {
fn consensus_decode<D: io::Read>(mut d: D) -> Result<Self, encode::Error> {
let magic = Decodable::consensus_decode(&mut d)?;
let cmd = CommandString::consensus_decode(&mut d)?;
let raw_payload = CheckedData::consensus_decode(&mut d)?.0;
fn consensus_decode_from_finite_reader<D: io::Read>(mut d: D) -> Result<Self, encode::Error> {
let magic = Decodable::consensus_decode_from_finite_reader(&mut d)?;
let cmd = CommandString::consensus_decode_from_finite_reader(&mut d)?;
let raw_payload = CheckedData::consensus_decode_from_finite_reader(&mut d)?.0;
let mut mem_d = io::Cursor::new(raw_payload);
let payload = match &cmd.0[..] {
"version" => NetworkMessage::Version(Decodable::consensus_decode(&mut mem_d)?),
"version" => NetworkMessage::Version(Decodable::consensus_decode_from_finite_reader(&mut mem_d)?),
"verack" => NetworkMessage::Verack,
"addr" => NetworkMessage::Addr(Decodable::consensus_decode(&mut mem_d)?),
"inv" => NetworkMessage::Inv(Decodable::consensus_decode(&mut mem_d)?),
"getdata" => NetworkMessage::GetData(Decodable::consensus_decode(&mut mem_d)?),
"notfound" => NetworkMessage::NotFound(Decodable::consensus_decode(&mut mem_d)?),
"getblocks" => NetworkMessage::GetBlocks(Decodable::consensus_decode(&mut mem_d)?),
"getheaders" => NetworkMessage::GetHeaders(Decodable::consensus_decode(&mut mem_d)?),
"addr" => NetworkMessage::Addr(Decodable::consensus_decode_from_finite_reader(&mut mem_d)?),
"inv" => NetworkMessage::Inv(Decodable::consensus_decode_from_finite_reader(&mut mem_d)?),
"getdata" => NetworkMessage::GetData(Decodable::consensus_decode_from_finite_reader(&mut mem_d)?),
"notfound" => NetworkMessage::NotFound(Decodable::consensus_decode_from_finite_reader(&mut mem_d)?),
"getblocks" => NetworkMessage::GetBlocks(Decodable::consensus_decode_from_finite_reader(&mut mem_d)?),
"getheaders" => NetworkMessage::GetHeaders(Decodable::consensus_decode_from_finite_reader(&mut mem_d)?),
"mempool" => NetworkMessage::MemPool,
"block" => NetworkMessage::Block(Decodable::consensus_decode(&mut mem_d)?),
"block" => NetworkMessage::Block(Decodable::consensus_decode_from_finite_reader(&mut mem_d)?),
"headers" => NetworkMessage::Headers(
HeaderDeserializationWrapper::consensus_decode(&mut mem_d)?.0
HeaderDeserializationWrapper::consensus_decode_from_finite_reader(&mut mem_d)?.0
),
"sendheaders" => NetworkMessage::SendHeaders,
"getaddr" => NetworkMessage::GetAddr,
"ping" => NetworkMessage::Ping(Decodable::consensus_decode(&mut mem_d)?),
"pong" => NetworkMessage::Pong(Decodable::consensus_decode(&mut mem_d)?),
"merkleblock" => NetworkMessage::MerkleBlock(Decodable::consensus_decode(&mut mem_d)?),
"filterload" => NetworkMessage::FilterLoad(Decodable::consensus_decode(&mut mem_d)?),
"filteradd" => NetworkMessage::FilterAdd(Decodable::consensus_decode(&mut mem_d)?),
"ping" => NetworkMessage::Ping(Decodable::consensus_decode_from_finite_reader(&mut mem_d)?),
"pong" => NetworkMessage::Pong(Decodable::consensus_decode_from_finite_reader(&mut mem_d)?),
"merkleblock" => NetworkMessage::MerkleBlock(Decodable::consensus_decode_from_finite_reader(&mut mem_d)?),
"filterload" => NetworkMessage::FilterLoad(Decodable::consensus_decode_from_finite_reader(&mut mem_d)?),
"filteradd" => NetworkMessage::FilterAdd(Decodable::consensus_decode_from_finite_reader(&mut mem_d)?),
"filterclear" => NetworkMessage::FilterClear,
"tx" => NetworkMessage::Tx(Decodable::consensus_decode(&mut mem_d)?),
"getcfilters" => NetworkMessage::GetCFilters(Decodable::consensus_decode(&mut mem_d)?),
"cfilter" => NetworkMessage::CFilter(Decodable::consensus_decode(&mut mem_d)?),
"getcfheaders" => NetworkMessage::GetCFHeaders(Decodable::consensus_decode(&mut mem_d)?),
"cfheaders" => NetworkMessage::CFHeaders(Decodable::consensus_decode(&mut mem_d)?),
"getcfcheckpt" => NetworkMessage::GetCFCheckpt(Decodable::consensus_decode(&mut mem_d)?),
"cfcheckpt" => NetworkMessage::CFCheckpt(Decodable::consensus_decode(&mut mem_d)?),
"reject" => NetworkMessage::Reject(Decodable::consensus_decode(&mut mem_d)?),
"alert" => NetworkMessage::Alert(Decodable::consensus_decode(&mut mem_d)?),
"feefilter" => NetworkMessage::FeeFilter(Decodable::consensus_decode(&mut mem_d)?),
"tx" => NetworkMessage::Tx(Decodable::consensus_decode_from_finite_reader(&mut mem_d)?),
"getcfilters" => NetworkMessage::GetCFilters(Decodable::consensus_decode_from_finite_reader(&mut mem_d)?),
"cfilter" => NetworkMessage::CFilter(Decodable::consensus_decode_from_finite_reader(&mut mem_d)?),
"getcfheaders" => NetworkMessage::GetCFHeaders(Decodable::consensus_decode_from_finite_reader(&mut mem_d)?),
"cfheaders" => NetworkMessage::CFHeaders(Decodable::consensus_decode_from_finite_reader(&mut mem_d)?),
"getcfcheckpt" => NetworkMessage::GetCFCheckpt(Decodable::consensus_decode_from_finite_reader(&mut mem_d)?),
"cfcheckpt" => NetworkMessage::CFCheckpt(Decodable::consensus_decode_from_finite_reader(&mut mem_d)?),
"reject" => NetworkMessage::Reject(Decodable::consensus_decode_from_finite_reader(&mut mem_d)?),
"alert" => NetworkMessage::Alert(Decodable::consensus_decode_from_finite_reader(&mut mem_d)?),
"feefilter" => NetworkMessage::FeeFilter(Decodable::consensus_decode_from_finite_reader(&mut mem_d)?),
"wtxidrelay" => NetworkMessage::WtxidRelay,
"addrv2" => NetworkMessage::AddrV2(Decodable::consensus_decode(&mut mem_d)?),
"addrv2" => NetworkMessage::AddrV2(Decodable::consensus_decode_from_finite_reader(&mut mem_d)?),
"sendaddrv2" => NetworkMessage::SendAddrV2,
_ => NetworkMessage::Unknown {
command: cmd,
@ -412,6 +417,11 @@ impl Decodable for RawNetworkMessage {
payload,
})
}
#[inline]
fn consensus_decode<D: io::Read>(d: D) -> Result<Self, encode::Error> {
Self::consensus_decode_from_finite_reader(d.take(MAX_MSG_SIZE as u64))
}
}
#[cfg(test)]