diff --git a/Cargo-minimal.lock b/Cargo-minimal.lock index b8f1f663a..1cedc08c2 100644 --- a/Cargo-minimal.lock +++ b/Cargo-minimal.lock @@ -179,6 +179,13 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4c819a1287eb618df47cc647173c5c4c66ba19d888a6e50d605672aed3140de" +[[package]] +name = "chacha20-poly1305" +version = "0.1.0" +dependencies = [ + "hex-conservative", +] + [[package]] name = "getrandom" version = "0.2.0" diff --git a/Cargo-recent.lock b/Cargo-recent.lock index 1b6cb85ae..ff4b96204 100644 --- a/Cargo-recent.lock +++ b/Cargo-recent.lock @@ -181,6 +181,13 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chacha20-poly1305" +version = "0.1.0" +dependencies = [ + "hex-conservative", +] + [[package]] name = "getrandom" version = "0.2.15" diff --git a/Cargo.toml b/Cargo.toml index 492954549..8f8a3c163 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["addresses", "base58", "bitcoin", "fuzz", "hashes", "internals", "io", "primitives", "units"] +members = ["addresses", "base58", "bitcoin", "chacha20_poly1305", "fuzz", "hashes", "internals", "io", "primitives", "units"] resolver = "2" [patch.crates-io.bitcoin-addresses] diff --git a/chacha20_poly1305/Cargo.toml b/chacha20_poly1305/Cargo.toml new file mode 100644 index 000000000..3df491b99 --- /dev/null +++ b/chacha20_poly1305/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "chacha20-poly1305" +version = "0.1.0" +license = "CC0-1.0" +repository = "https://github.com/rust-bitcoin/rust-bitcoin/" +description = "The ChaCha20 stream cipher and Poly1305 MAC based AEAD." +categories = ["cryptography"] +keywords = ["crypto", "encryption"] +readme = "README.md" +edition = "2021" +rust-version = "1.63.0" +exclude = ["tests", "contrib"] + +[features] +default = ["std"] +std = ["alloc"] +alloc = [] + +[dev-dependencies] +hex = { package = "hex-conservative", version = "0.2.0", default-features = false, features = ["alloc"] } + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] diff --git a/chacha20_poly1305/README.md b/chacha20_poly1305/README.md new file mode 100644 index 000000000..0f5b35b54 --- /dev/null +++ b/chacha20_poly1305/README.md @@ -0,0 +1,7 @@ +# ChaCha20-Poly1305 + +An authenticated encryption with associated data (AEAD) algorithm implemented with the ChaCha20 stream cipher and the Poly1305 message authentication code (MAC). + +## Minimum Supported Rust Version (MSRV) + +This library should always compile with any combination of features on **Rust 1.63.0**. diff --git a/chacha20_poly1305/contrib/test_vars.sh b/chacha20_poly1305/contrib/test_vars.sh new file mode 100644 index 000000000..7aeb1eccd --- /dev/null +++ b/chacha20_poly1305/contrib/test_vars.sh @@ -0,0 +1,14 @@ +# No shebang, this file should not be executed. +# shellcheck disable=SC2148 +# +# disable verify unused vars, despite the fact that they are used when sourced +# shellcheck disable=SC2034 + +# Test these features with "std" enabled. +FEATURES_WITH_STD="" + +# Test these features without "std" enabled. +FEATURES_WITHOUT_STD="alloc" + +# Run these examples. +EXAMPLES="" diff --git a/chacha20_poly1305/src/chacha20.rs b/chacha20_poly1305/src/chacha20.rs new file mode 100644 index 000000000..5c64da40f --- /dev/null +++ b/chacha20_poly1305/src/chacha20.rs @@ -0,0 +1,450 @@ +// SPDX-License-Identifier: CC0-1.0 + +//! The ChaCha20 stream cipher from RFC8439. + +use core::ops::BitXor; + +/// The first four words (32-bit) of the ChaCha stream cipher state are constants. +const WORD_1: u32 = 0x61707865; +const WORD_2: u32 = 0x3320646e; +const WORD_3: u32 = 0x79622d32; +const WORD_4: u32 = 0x6b206574; + +/// The cipher's block size is 64 bytes. +const CHACHA_BLOCKSIZE: usize = 64; + +/// A 256-bit secret key shared by the parties communicating. +#[derive(Clone, Copy)] +pub struct Key([u8; 32]); + +impl Key { + /// Create a new key. + pub const fn new(key: [u8; 32]) -> Self { + Key(key) + } +} + +/// A 96-bit initialization vector (IV), or nonce. +#[derive(Clone, Copy)] +pub struct Nonce([u8; 12]); + +impl Nonce { + /// Create a new nonce. + pub const fn new(nonce: [u8; 12]) -> Self { + Nonce(nonce) + } +} + +/// A SIMD-friendly structure which holds 25% of the cipher state. +/// +/// The cipher's quarter round function is the bulk of its work +/// and there are large performance gains to be had if the function +/// leverages SIMD instructions on architectures which support them. Because +/// the algorithm allows for the cipher's state to be operated on in +/// parallel (each round only touches a quarter of the state), then theoretically +/// the parallel SIMD instructions should be used. But sometimes the +/// compiler needs a few hints to ensure it recognizes a "vectorizable" function. +/// That is the goal of this type, which clearly breaks the state up into four +/// chunks and exposes functions which align with SIMD lanes. +/// +/// This type is attempting to be as close as possible to the experimental [`core::simd::u32x4`] +/// which at this time is feature gated and well beyond the project's MSRV. But ideally +/// an easy transistion can be made in the future. +/// +/// A few SIMD relevant design choices: +/// * Heavy use of inline functions to help the compiler recognize vectorizable sections. +/// * For-each loops are easy for the compiler to recognize as vectorizable. +/// * The type is a based on an array instead of tuple since the heterogeneous +/// nature of tuples can confuse the compiler into thinking it is not vectorizable. +/// * Memory alignment lines up with SIMD size. +/// +/// In the future, a "blacklist" for the alignment option might be useful to +/// disable it on architectures which definitely do not support SIMD in order to avoid +/// needless memory inefficientcies. +#[repr(align(16))] +#[derive(Clone, Copy, PartialEq)] +struct U32x4([u32; 4]); + +impl U32x4 { + #[inline(always)] + fn wrapping_add(self, rhs: Self) -> Self { + let mut result = [0u32; 4]; + (0..4).for_each(|i| { + result[i] = self.0[i].wrapping_add(rhs.0[i]); + }); + U32x4(result) + } + + #[inline(always)] + fn rotate_left(self, n: u32) -> Self { + let mut result = [0u32; 4]; + (0..4).for_each(|i| { + result[i] = self.0[i].rotate_left(n); + }); + U32x4(result) + } + + #[inline(always)] + fn rotate_elements_left(self) -> Self { + let mut result = [0u32; 4]; + (0..4).for_each(|i| { + result[i] = self.0[(i + N as usize) % 4]; + }); + U32x4(result) + } + + #[inline(always)] + fn rotate_elements_right(self) -> Self { + let mut result = [0u32; 4]; + (0..4).for_each(|i| { + result[i] = self.0[(i + 4 - N as usize) % 4]; + }); + U32x4(result) + } + + #[inline(always)] + fn to_le_bytes(self) -> [u8; 16] { + let mut bytes = [0u8; 16]; + (0..4).for_each(|i| { + bytes[i * 4..(i + 1) * 4].copy_from_slice(&self.0[i].to_le_bytes()); + }); + bytes + } +} + +impl BitXor for U32x4 { + type Output = Self; + + #[inline(always)] + fn bitxor(self, rhs: Self) -> Self { + let mut result = [0u32; 4]; + (0..4).for_each(|i| { + result[i] = self.0[i] ^ rhs.0[i]; + }); + U32x4(result) + } +} + +/// The 512-bit cipher state is chunk'd up into 16 32-bit words. +/// +/// The 16 words can be visualized as a 4x4 matrix: +/// +/// 0 1 2 3 +/// 4 5 6 7 +/// 8 9 10 11 +/// 12 13 14 15 +#[derive(Clone, Copy, PartialEq)] +struct State { + matrix: [U32x4; 4], +} + +impl State { + /// New prepared state. + const fn new(key: Key, nonce: Nonce, count: u32) -> Self { + // Hardcoding indexes to keep the function const. + let k0 = u32::from_le_bytes([key.0[0], key.0[1], key.0[2], key.0[3]]); + let k1 = u32::from_le_bytes([key.0[4], key.0[5], key.0[6], key.0[7]]); + let k2 = u32::from_le_bytes([key.0[8], key.0[9], key.0[10], key.0[11]]); + let k3 = u32::from_le_bytes([key.0[12], key.0[13], key.0[14], key.0[15]]); + let k4 = u32::from_le_bytes([key.0[16], key.0[17], key.0[18], key.0[19]]); + let k5 = u32::from_le_bytes([key.0[20], key.0[21], key.0[22], key.0[23]]); + let k6 = u32::from_le_bytes([key.0[24], key.0[25], key.0[26], key.0[27]]); + let k7 = u32::from_le_bytes([key.0[28], key.0[29], key.0[30], key.0[31]]); + + let n0 = u32::from_le_bytes([nonce.0[0], nonce.0[1], nonce.0[2], nonce.0[3]]); + let n1 = u32::from_le_bytes([nonce.0[4], nonce.0[5], nonce.0[6], nonce.0[7]]); + let n2 = u32::from_le_bytes([nonce.0[8], nonce.0[9], nonce.0[10], nonce.0[11]]); + + State { + matrix: [ + U32x4([WORD_1, WORD_2, WORD_3, WORD_4]), + U32x4([k0, k1, k2, k3]), + U32x4([k4, k5, k6, k7]), + U32x4([count, n0, n1, n2]), + ], + } + } + + /// Four quarter rounds performed on the entire state of the cipher in a vectorized SIMD friendly fashion. + #[inline(always)] + fn quarter_round(a: U32x4, b: U32x4, c: U32x4, d: U32x4) -> (U32x4, U32x4, U32x4, U32x4) { + let a = a.wrapping_add(b); + let d = d.bitxor(a).rotate_left(16); + + let c = c.wrapping_add(d); + let b = b.bitxor(c).rotate_left(12); + + let a = a.wrapping_add(b); + let d = d.bitxor(a).rotate_left(8); + + let c = c.wrapping_add(d); + let b = b.bitxor(c).rotate_left(7); + + (a, b, c, d) + } + + /// Perform a round on "columns" and then "diagonals" of the state. + /// + /// The column quarter rounds are made up of indexes: `[0,4,8,12]`, `[1,5,9,13]`, `[2,6,10,14]`, `[3,7,11,15]`. + /// The diagonals quarter rounds are made up of indexes: `[0,5,10,15]`, `[1,6,11,12]`, `[2,7,8,13]`, `[3,4,9,14]`. + /// + /// The underlying quarter_round function is vectorized using the + /// u32x4 type in order to perform 4 quarter round functions at the same time. + /// This is a little more difficult to read, but it gives the compiler + /// a strong hint to use the performant SIMD instructions. + #[inline(always)] + fn double_round(state: [U32x4; 4]) -> [U32x4; 4] { + let [mut a, mut b, mut c, mut d] = state; + + // Column round. + (a, b, c, d) = Self::quarter_round(a, b, c, d); + + // Diagonal round (with rotations). + b = b.rotate_elements_left::<1>(); + c = c.rotate_elements_left::<2>(); + d = d.rotate_elements_left::<3>(); + (a, b, c, d) = Self::quarter_round(a, b, c, d); + // Rotate the words back into their normal positions. + b = b.rotate_elements_right::<1>(); + c = c.rotate_elements_right::<2>(); + d = d.rotate_elements_right::<3>(); + + [a, b, c, d] + } + + /// Transform the state by performing the ChaCha block function. + fn chacha_block(&mut self) { + let mut working_state = self.matrix; + + for _ in 0..10 { + working_state = Self::double_round(working_state); + } + + // Add the working state to the original state. + (0..4).for_each(|i| { + self.matrix[i] = working_state[i].wrapping_add(self.matrix[i]); + }); + } + + /// Expose the 512-bit state as a byte stream. + fn keystream(&self) -> [u8; 64] { + let mut keystream = [0u8; 64]; + for i in 0..4 { + keystream[i * 16..(i + 1) * 16].copy_from_slice(&self.matrix[i].to_le_bytes()); + } + keystream + } +} + +/// The ChaCha20 stream cipher from RFC8439. +/// +/// The 20-round IETF version uses a 96-bit nonce and 32-bit block counter. This is the +/// variant used in the Bitcoin ecosystem, including BIP324. +pub struct ChaCha20 { + /// Secret key shared by the parties communicating. + key: Key, + /// A key and nonce pair should only be used once. + nonce: Nonce, + /// Internal block index of keystream. + block_count: u32, + /// Interal byte offset index of the block_count. + seek_offset_bytes: usize, +} + +impl ChaCha20 { + /// Make a new instance of ChaCha20 from an index in the keystream. + pub const fn new(key: Key, nonce: Nonce, seek: u32) -> Self { + let block_count = seek / 64; + let seek_offset_bytes = (seek % 64) as usize; + ChaCha20 { key, nonce, block_count, seek_offset_bytes } + } + + /// Make a new instance of ChaCha20 from a block in the keystream. + pub const fn new_from_block(key: Key, nonce: Nonce, block: u32) -> Self { + ChaCha20 { key, nonce, block_count: block, seek_offset_bytes: 0 } + } + + /// Apply the keystream to a buffer. + pub fn apply_keystream(&mut self, buffer: &mut [u8]) { + let num_full_blocks = buffer.len() / CHACHA_BLOCKSIZE; + for block in 0..num_full_blocks { + let keystream = + keystream_at_slice(self.key, self.nonce, self.block_count, self.seek_offset_bytes); + for (buffer_byte, keystream_byte) in buffer + [block * CHACHA_BLOCKSIZE..(block + 1) * CHACHA_BLOCKSIZE] + .iter_mut() + .zip(keystream.iter()) + { + *buffer_byte ^= *keystream_byte + } + self.block_count += 1; + } + if buffer.len() % 64 > 0 { + let keystream = + keystream_at_slice(self.key, self.nonce, self.block_count, self.seek_offset_bytes); + for (buffer_byte, keystream_byte) in + buffer[num_full_blocks * CHACHA_BLOCKSIZE..].iter_mut().zip(keystream.iter()) + { + *buffer_byte ^= *keystream_byte + } + self.block_count += 1; + } + } + + /// Get the keystream block at a specified block. + pub fn get_keystream(&mut self, block: u32) -> [u8; 64] { + self.block(block); + keystream_at_slice(self.key, self.nonce, self.block_count, self.seek_offset_bytes) + } + + /// Update the index of the keystream to the given byte. + pub fn seek(&mut self, seek: u32) { + self.block_count = seek / 64; + self.seek_offset_bytes = (seek % 64) as usize; + } + + /// Update the index of the keystream to a block. + pub fn block(&mut self, block: u32) { + self.block_count = block; + self.seek_offset_bytes = 0; + } +} + +fn keystream_at_slice(key: Key, nonce: Nonce, count: u32, seek: usize) -> [u8; 64] { + let mut keystream: [u8; 128] = [0; 128]; + let (first_half, second_half) = keystream.split_at_mut(64); + + let mut state = State::new(key, nonce, count); + state.chacha_block(); + first_half.copy_from_slice(&state.keystream()); + + let mut state = State::new(key, nonce, count + 1); + state.chacha_block(); + second_half.copy_from_slice(&state.keystream()); + + let seeked_keystream: [u8; 64] = + keystream[seek..seek + 64].try_into().expect("slicing produces 64-byte slice"); + seeked_keystream +} + +#[cfg(test)] +#[cfg(feature = "alloc")] +mod tests { + use super::*; + use hex::prelude::*; + + #[test] + fn test_chacha_block() { + let mut state = State { + matrix: [ + U32x4([0x61707865, 0x3320646e, 0x79622d32, 0x6b206574]), + U32x4([0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c]), + U32x4([0x13121110, 0x17161514, 0x1b1a1918, 0x1f1e1d1c]), + U32x4([0x00000001, 0x09000000, 0x4a000000, 0x00000000]), + ], + }; + state.chacha_block(); + + let expected = [ + U32x4([0xe4e7f110, 0x15593bd1, 0x1fdd0f50, 0xc47120a3]), + U32x4([0xc7f4d1c7, 0x0368c033, 0x9aaa2204, 0x4e6cd4c3]), + U32x4([0x466482d2, 0x09aa9f07, 0x05d7c214, 0xa2028bd9]), + U32x4([0xd19c12b5, 0xb94e16de, 0xe883d0cb, 0x4e3c50a2]), + ]; + + for (actual, expected) in state.matrix.iter().zip(expected.iter()) { + assert_eq!(actual.0, expected.0); + } + } + + #[test] + fn test_prepare_state() { + let key = + Key(Vec::from_hex("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f") + .unwrap() + .try_into() + .unwrap()); + let nonce = Nonce(Vec::from_hex("000000090000004a00000000").unwrap().try_into().unwrap()); + let count = 1; + let state = State::new(key, nonce, count); + assert_eq!(state.matrix[1].0[0].to_be_bytes().to_lower_hex_string(), "03020100"); + assert_eq!(state.matrix[2].0[2].to_be_bytes().to_lower_hex_string(), "1b1a1918"); + assert_eq!(state.matrix[3].0[2].to_be_bytes().to_lower_hex_string(), "4a000000"); + assert_eq!(state.matrix[3].0[3].to_be_bytes().to_lower_hex_string(), "00000000"); + assert_eq!(state.matrix[3].0[0].to_be_bytes().to_lower_hex_string(), "00000001"); + } + + #[test] + fn test_small_plaintext() { + let key = + Key(Vec::from_hex("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f") + .unwrap() + .try_into() + .unwrap()); + let nonce = Nonce(Vec::from_hex("000000090000004a00000000").unwrap().try_into().unwrap()); + let count = 1; + let mut chacha = ChaCha20::new(key, nonce, count); + let mut binding = [8; 3]; + chacha.apply_keystream(&mut binding[..]); + let mut chacha = ChaCha20::new(key, nonce, count); + chacha.apply_keystream(&mut binding[..]); + assert_eq!([8; 3], binding); + } + + #[test] + fn test_modulo_64() { + let key = + Key(Vec::from_hex("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f") + .unwrap() + .try_into() + .unwrap()); + let nonce = Nonce(Vec::from_hex("000000090000004a00000000").unwrap().try_into().unwrap()); + let count = 1; + let mut chacha = ChaCha20::new(key, nonce, count); + let mut binding = [8; 64]; + chacha.apply_keystream(&mut binding[..]); + let mut chacha = ChaCha20::new(key, nonce, count); + chacha.apply_keystream(&mut binding[..]); + assert_eq!([8; 64], binding); + } + + #[test] + fn test_rfc_standard() { + let key = + Key(Vec::from_hex("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f") + .unwrap() + .try_into() + .unwrap()); + let nonce = Nonce(Vec::from_hex("000000000000004a00000000").unwrap().try_into().unwrap()); + let count = 64; + let mut chacha = ChaCha20::new(key, nonce, count); + let mut binding = *b"Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it."; + let to = binding; + chacha.apply_keystream(&mut binding[..]); + assert_eq!(binding[..], Vec::from_hex("6e2e359a2568f98041ba0728dd0d6981e97e7aec1d4360c20a27afccfd9fae0bf91b65c5524733ab8f593dabcd62b3571639d624e65152ab8f530c359f0861d807ca0dbf500d6a6156a38e088a22b65e52bc514d16ccf806818ce91ab77937365af90bbf74a35be6b40b8eedf2785e42874d").unwrap()); + let mut chacha = ChaCha20::new(key, nonce, count); + chacha.apply_keystream(&mut binding[..]); + let binding = *b"Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it."; + assert_eq!(binding, to); + } + + #[test] + fn test_new_from_block() { + let key = + Key(Vec::from_hex("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f") + .unwrap() + .try_into() + .unwrap()); + let nonce = Nonce(Vec::from_hex("000000000000004a00000000").unwrap().try_into().unwrap()); + let block: u32 = 1; + let mut chacha = ChaCha20::new_from_block(key, nonce, block); + let mut binding = *b"Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it."; + let to = binding; + chacha.apply_keystream(&mut binding[..]); + assert_eq!(binding[..], Vec::from_hex("6e2e359a2568f98041ba0728dd0d6981e97e7aec1d4360c20a27afccfd9fae0bf91b65c5524733ab8f593dabcd62b3571639d624e65152ab8f530c359f0861d807ca0dbf500d6a6156a38e088a22b65e52bc514d16ccf806818ce91ab77937365af90bbf74a35be6b40b8eedf2785e42874d").unwrap()); + chacha.block(block); + chacha.apply_keystream(&mut binding[..]); + let binding = *b"Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it."; + assert_eq!(binding, to); + } +} diff --git a/chacha20_poly1305/src/lib.rs b/chacha20_poly1305/src/lib.rs new file mode 100644 index 000000000..bc37ecdbc --- /dev/null +++ b/chacha20_poly1305/src/lib.rs @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: CC0-1.0 + +//! Combine the ChaCha20 stream cipher with the Poly1305 message authentication code +//! to form an authenticated encryption with additional data (AEAD) algorithm. + +pub mod chacha20; +pub mod poly1305; + +use chacha20::ChaCha20; +use poly1305::Poly1305; + +use core::fmt; + +pub use self::chacha20::{Key, Nonce}; + +/// Zero array for padding slices. +const ZEROES: [u8; 16] = [0u8; 16]; + +/// Errors encrypting and decrypting messages with ChaCha20 and Poly1305 authentication tags. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum Error { + /// Additional data showing up when it is not expected. + UnauthenticatedAdditionalData, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Error::UnauthenticatedAdditionalData => write!(f, "Unauthenticated aad."), + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for Error { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Error::UnauthenticatedAdditionalData => None, + } + } +} + +/// Encrypt and decrypt content along with a authentication tag. +pub struct ChaCha20Poly1305 { + key: Key, + nonce: Nonce, +} + +impl ChaCha20Poly1305 { + /// Make a new instance of a ChaCha20Poly1305 AEAD. + pub const fn new(key: Key, nonce: Nonce) -> Self { + ChaCha20Poly1305 { key, nonce } + } + + /// Encrypt content in place and return the Poly1305 16-byte authentication tag. + /// + /// # Arguments + /// + /// - `content` - Plaintext to be encrypted in place. + /// - `aad` - Optional metadata covered by the authentication tag. + /// + /// # Returns + /// + /// The 16-byte authentication tag. + pub fn encrypt(self, content: &mut [u8], aad: Option<&[u8]>) -> [u8; 16] { + let mut chacha = ChaCha20::new_from_block(self.key, self.nonce, 1); + chacha.apply_keystream(content); + let keystream = chacha.get_keystream(0); + let mut poly = + Poly1305::new(keystream[..32].try_into().expect("slicing produces 32-byte slice")); + let aad = aad.unwrap_or(&[]); + // AAD and ciphertext are padded if not 16-byte aligned. + poly.input(aad); + let aad_overflow = aad.len() % 16; + if aad_overflow > 0 { + poly.input(&ZEROES[0..(16 - aad_overflow)]); + } + + poly.input(content); + let text_overflow = content.len() % 16; + if text_overflow > 0 { + poly.input(&ZEROES[0..(16 - text_overflow)]); + } + + let len_buffer = encode_lengths(aad.len() as u64, content.len() as u64); + poly.input(&len_buffer); + poly.tag() + } + + /// Decrypt the ciphertext in place if authentication tag is correct. + /// + /// # Arguments + /// + /// - `content` - Ciphertext to be decrypted in place. + /// - `tag` - 16-byte authentication tag. + /// - `aad` - Optional metadata covered by the authentication tag. + pub fn decrypt( + self, + content: &mut [u8], + tag: [u8; 16], + aad: Option<&[u8]>, + ) -> Result<(), Error> { + let mut chacha = ChaCha20::new_from_block(self.key, self.nonce, 0); + let keystream = chacha.get_keystream(0); + let mut poly = + Poly1305::new(keystream[..32].try_into().expect("slicing produces 32-byte slice")); + let aad = aad.unwrap_or(&[]); + poly.input(aad); + // AAD and ciphertext are padded if not 16-byte aligned. + let aad_overflow = aad.len() % 16; + if aad_overflow > 0 { + poly.input(&ZEROES[0..(16 - aad_overflow)]); + } + poly.input(content); + let msg_overflow = content.len() % 16; + if msg_overflow > 0 { + poly.input(&ZEROES[0..(16 - msg_overflow)]); + } + + let len_buffer = encode_lengths(aad.len() as u64, content.len() as u64); + poly.input(&len_buffer); + let derived_tag = poly.tag(); + if derived_tag == tag { + let mut chacha = ChaCha20::new_from_block(self.key, self.nonce, 1); + chacha.apply_keystream(content); + Ok(()) + } else { + Err(Error::UnauthenticatedAdditionalData) + } + } +} + +/// AAD and content lengths are each encoded in 8-bytes. +fn encode_lengths(aad_len: u64, content_len: u64) -> [u8; 16] { + let aad_len_bytes = aad_len.to_le_bytes(); + let content_len_bytes = content_len.to_le_bytes(); + let mut len_buffer = [0u8; 16]; + let (aad_len_buffer, content_len_buffer) = len_buffer.split_at_mut(8); + aad_len_buffer.copy_from_slice(&aad_len_bytes[..]); + content_len_buffer.copy_from_slice(&content_len_bytes[..]); + + len_buffer +} + +#[cfg(test)] +#[cfg(feature = "alloc")] +mod tests { + use super::*; + use hex::prelude::*; + + #[test] + fn test_rfc7539() { + let mut message = *b"Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it."; + let aad = Vec::from_hex("50515253c0c1c2c3c4c5c6c7").unwrap(); + let key = Key::new( + Vec::from_hex("808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9f") + .unwrap() + .try_into() + .unwrap(), + ); + let nonce = + Nonce::new(Vec::from_hex("070000004041424344454647").unwrap().try_into().unwrap()); + let cipher = ChaCha20Poly1305::new(key, nonce); + let tag = cipher.encrypt(&mut message, Some(&aad)); + + let mut buffer = [0u8; 130]; + buffer[..message.len()].copy_from_slice(&message); + buffer[message.len()..].copy_from_slice(&tag); + + assert_eq!(&buffer.to_lower_hex_string(), "d31a8d34648e60db7b86afbc53ef7ec2a4aded51296e08fea9e2b5a736ee62d63dbea45e8ca9671282fafb69da92728b1a71de0a9e060b2905d6a5b67ecd3b3692ddbd7f2d778b8c9803aee328091b58fab324e4fad675945585808b4831d7bc3ff4def08e4b7a9de576d26586cec64b61161ae10b594f09e26a7e902ecbd0600691"); + } +} diff --git a/chacha20_poly1305/src/poly1305.rs b/chacha20_poly1305/src/poly1305.rs new file mode 100644 index 000000000..0de1f8ba4 --- /dev/null +++ b/chacha20_poly1305/src/poly1305.rs @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: CC0-1.0 + +//! Poly1305 one-time message authenticator from RFC8439. +//! +//! Heavily inspired by the ["Donna"](https://github.com/floodyberry/poly1305-donna/blob/master/poly1305-donna-32.h) implementation in C +//! and Loup Vaillant's [Poly1305 design article](https://loup-vaillant.fr/tutorials/poly1305-design). + +/// 2^26 for the 26-bit limbs. +const BITMASK: u32 = 0x03ffffff; +/// Number is encoded in five 26-bit limbs. +const CARRY: u32 = 26; + +/// Poly1305 authenticator takes a 32-byte one-time key and a message and produces a 16-byte tag. +/// +/// 64-bit constant time multiplication and addition implementation. +pub struct Poly1305 { + /// r part of the secret key. + r: [u32; 5], + /// s part of the secret key. + s: [u32; 4], + /// State used to create tag. + acc: [u32; 5], + /// Leftovers between adds. + leftovers: [u8; 16], + /// Track relevant leftover bytes. + leftovers_len: usize, +} + +impl Poly1305 { + /// Initialize authenticator with a 32-byte one-time secret key. + pub const fn new(key: [u8; 32]) -> Self { + // Taken from Donna. Assigns r to a 26-bit 5-limb number while simultaneously 'clamping' r. + let r0 = u32::from_le_bytes([key[0], key[1], key[2], key[3]]) & 0x3ffffff; + let r1 = (u32::from_le_bytes([key[3], key[4], key[5], key[6]]) >> 2) & 0x03ffff03; + let r2 = (u32::from_le_bytes([key[6], key[7], key[8], key[9]]) >> 4) & 0x03ffc0ff; + let r3 = (u32::from_le_bytes([key[9], key[10], key[11], key[12]]) >> 6) & 0x03f03fff; + let r4 = (u32::from_le_bytes([key[12], key[13], key[14], key[15]]) >> 8) & 0x000fffff; + + let s0 = u32::from_le_bytes([key[16], key[17], key[18], key[19]]); + let s1 = u32::from_le_bytes([key[20], key[21], key[22], key[23]]); + let s2 = u32::from_le_bytes([key[24], key[25], key[26], key[27]]); + let s3 = u32::from_le_bytes([key[28], key[29], key[30], key[31]]); + + Poly1305 { + r: [r0, r1, r2, r3, r4], + s: [s0, s1, s2, s3], + acc: [0; 5], + leftovers: [0; 16], + leftovers_len: 0, + } + } + + /// Add message to be authenticated, can be called multiple times before creating tag. + pub fn input(&mut self, message: &[u8]) { + // Process previous leftovers if the message is long enough to fill the leftovers buffer. If + // the message is too short then it will just be added to the leftovers at the end. Now if there + // are no leftovers, but the message can fill the buffer, it will process that buffer and + // and process the rest of the message later on. + let fill = + if self.leftovers_len + message.len() >= 16 { 16 - self.leftovers_len } else { 0 }; + + if fill > 0 { + self.leftovers[self.leftovers_len..].copy_from_slice(&message[0..fill]); + + let msg_slice = prepare_padded_message_slice(&self.leftovers, false); + for (i, b) in msg_slice.iter().enumerate() { + self.acc[i] += *b; + } + self.r_times_a(); + self.leftovers_len = 0; + } + + // Remove prefix already processed in leftovers. + let remaining_message = &message[fill..]; + + // Add message to accumulator. + let mut i = 0; + while i < remaining_message.len() / 16 { + let msg_slice = + prepare_padded_message_slice(&remaining_message[i * 16..(i + 1) * 16], false); + for (i, b) in msg_slice.iter().enumerate() { + self.acc[i] += *b; + } + self.r_times_a(); + i += 1; + } + + // Save any leftovers. + if remaining_message.len() % 16 > 0 { + let message_index = remaining_message.len() - (remaining_message.len() % 16); + let new_len = self.leftovers_len + remaining_message.len() % 16; + self.leftovers[self.leftovers_len..new_len] + .copy_from_slice(&remaining_message[message_index..]); + self.leftovers_len = new_len; + } + } + + /// Generate authentication tag. + pub fn tag(mut self) -> [u8; 16] { + // Add any remaining leftovers to accumulator. + if self.leftovers_len > 0 { + let msg_slice = + prepare_padded_message_slice(&self.leftovers[..self.leftovers_len], true); + for (i, b) in msg_slice.iter().enumerate() { + self.acc[i] += *b; + } + self.r_times_a(); + self.leftovers_len = 0; + } + + // Carry and mask. + for i in 1..4 { + self.acc[i + 1] += self.acc[i] >> CARRY; + } + self.acc[0] += (self.acc[4] >> CARRY) * 5; + self.acc[1] += self.acc[0] >> CARRY; + for i in 0..self.acc.len() { + self.acc[i] &= BITMASK; + } + // Reduce. + let mut t = self.acc; + t[0] += 5; + t[4] = t[4].wrapping_sub(1 << CARRY); + for i in 0..3 { + t[i + 1] += t[i] >> CARRY; + } + t[4] = t[4].wrapping_add(t[3] >> CARRY); + for t in t.iter_mut().take(4) { + *t &= BITMASK; + } + // Convert acc to a 4 item array. + let mask = (t[4] >> 31).wrapping_sub(1); + for (i, t) in t.iter().enumerate().take(self.acc.len()) { + self.acc[i] = t & mask | self.acc[i] & !mask; + } + // Voodoo from donna to convert to [u32; 4]. + let a0 = self.acc[0] | self.acc[1] << 26; + let a1 = self.acc[1] >> 6 | self.acc[2] << 20; + let a2 = self.acc[2] >> 12 | self.acc[3] << 14; + let a3 = self.acc[3] >> 18 | self.acc[4] << 8; + let a = [a0, a1, a2, a3]; + // a + s + let mut tag: [u64; 4] = [0; 4]; + for i in 0..4 { + tag[i] = a[i] as u64 + self.s[i] as u64; + } + + // Carry. + for i in 0..3 { + tag[i + 1] += tag[i] >> 32; + } + + // Return the 16 least significant bytes. + let mut ret: [u8; 16] = [0; 16]; + for i in 0..tag.len() { + let bytes = (tag[i] as u32).to_le_bytes(); + ret[i * 4..(i + 1) * 4].copy_from_slice(&bytes); + } + ret + } + + fn r_times_a(&mut self) { + // Multiply and reduce. + // While this looks complicated, it is a variation of schoolbook multiplication, + // described well in an article here: https://loup-vaillant.fr/tutorials/poly1305-design + let mut t = [0; 5]; + for i in 0..5 { + for (j, t) in t.iter_mut().enumerate() { + let modulus: u64 = if i > j { 5 } else { 1 }; + let start = (5 - i) % 5; + *t += modulus * self.r[i] as u64 * self.acc[(start + j) % 5] as u64; + } + } + // Carry. + for i in 0..4 { + t[i + 1] += t[i] >> CARRY; + } + // Mask. + for (i, t) in t.iter().enumerate().take(self.acc.len()) { + self.acc[i] = *t as u32 & BITMASK; + } + // Carry and mask first limb. + self.acc[0] += (t[4] >> CARRY) as u32 * 5; + self.acc[1] += self.acc[0] >> CARRY; + self.acc[0] &= BITMASK; + } +} + +// Encode 16-byte (tag sized), unless is_last flag set to true, piece of message into 5 26-bit limbs. +fn prepare_padded_message_slice(msg: &[u8], is_last: bool) -> [u32; 5] { + let hi_bit: u32 = if is_last { 0 } else { 1 << 24 }; + let mut fmt_msg = [0u8; 17]; + fmt_msg[..msg.len()].copy_from_slice(msg); + // Tack on a 1-byte so messages with buncha zeroes at the end don't have the same MAC. + fmt_msg[msg.len()] = 0x01; + // Encode number in five 26-bit limbs. + let m0 = u32::from_le_bytes(fmt_msg[0..4].try_into().expect("Valid subset of 32.")) & BITMASK; + let m1 = + u32::from_le_bytes(fmt_msg[3..7].try_into().expect("Valid subset of 32.")) >> 2 & BITMASK; + let m2 = + u32::from_le_bytes(fmt_msg[6..10].try_into().expect("Valid subset of 32.")) >> 4 & BITMASK; + let m3 = + u32::from_le_bytes(fmt_msg[9..13].try_into().expect("Valid subset of 32.")) >> 6 & BITMASK; + let m4 = + u32::from_le_bytes(fmt_msg[12..16].try_into().expect("Valid subset of 32.")) >> 8 | hi_bit; + [m0, m1, m2, m3, m4] +} + +fn _print_acc(num: &[u32; 5]) { + let a0 = num[0] | num[1] << 26; + let a1 = num[1] >> 6 | num[2] << 20; + let a2 = num[2] >> 12 | num[3] << 14; + let a3 = num[3] >> 18 | num[4] << 8; + let a = [a0, a1, a2, a3]; + let mut ret: [u8; 16] = [0; 16]; + for i in 0..a.len() { + let bytes = a[i].to_le_bytes(); + ret[i * 4..(i + 1) * 4].copy_from_slice(&bytes); + } + ret.reverse(); +} + +#[cfg(test)] +#[cfg(feature = "alloc")] +mod tests { + use super::*; + use hex::prelude::*; + + #[test] + fn test_rfc7539() { + let key = Vec::from_hex("85d6be7857556d337f4452fe42d506a80103808afb0db2fd4abff6af4149f51b") + .unwrap() + .as_slice() + .try_into() + .unwrap(); + let mut poly = Poly1305::new(key); + let message = b"Cryptographic Forum Research Group"; + poly.input(message); + let tag = poly.tag(); + assert_eq!("a8061dc1305136c6c22b8baf0c0127a9", tag.to_lower_hex_string()); + } +}