450 lines
17 KiB
Rust
450 lines
17 KiB
Rust
// SPDX-License-Identifier: CC0-1.0
|
|
|
|
//! The ChaCha20 stream cipher from RFC8439.
|
|
|
|
use core::ops::BitXor;
|
|
|
|
/// The first four words (32-bit) of the ChaCha stream cipher state are constants.
|
|
const WORD_1: u32 = 0x61707865;
|
|
const WORD_2: u32 = 0x3320646e;
|
|
const WORD_3: u32 = 0x79622d32;
|
|
const WORD_4: u32 = 0x6b206574;
|
|
|
|
/// The cipher's block size is 64 bytes.
|
|
const CHACHA_BLOCKSIZE: usize = 64;
|
|
|
|
/// A 256-bit secret key shared by the parties communicating.
|
|
#[derive(Clone, Copy)]
|
|
pub struct Key([u8; 32]);
|
|
|
|
impl Key {
|
|
/// Constructs a new key.
|
|
pub const fn new(key: [u8; 32]) -> Self { Key(key) }
|
|
}
|
|
|
|
/// A 96-bit initialization vector (IV), or nonce.
|
|
#[derive(Clone, Copy)]
|
|
pub struct Nonce([u8; 12]);
|
|
|
|
impl Nonce {
|
|
/// Constructs a new nonce.
|
|
pub const fn new(nonce: [u8; 12]) -> Self { Nonce(nonce) }
|
|
}
|
|
|
|
/// A SIMD-friendly structure which holds 25% of the cipher state.
|
|
///
|
|
/// The cipher's quarter round function is the bulk of its work
|
|
/// and there are large performance gains to be had if the function
|
|
/// leverages SIMD instructions on architectures which support them. Because
|
|
/// the algorithm allows for the cipher's state to be operated on in
|
|
/// parallel (each round only touches a quarter of the state), then theoretically
|
|
/// the parallel SIMD instructions should be used. But sometimes the
|
|
/// compiler needs a few hints to ensure it recognizes a "vectorizable" function.
|
|
/// That is the goal of this type, which clearly breaks the state up into four
|
|
/// chunks and exposes functions which align with SIMD lanes.
|
|
///
|
|
/// This type is attempting to be as close as possible to the experimental [`core::simd::u32x4`]
|
|
/// which at this time is feature gated and well beyond the project's MSRV. But ideally
|
|
/// an easy transistion can be made in the future.
|
|
///
|
|
/// A few SIMD relevant design choices:
|
|
/// * Heavy use of inline functions to help the compiler recognize vectorizable sections.
|
|
/// * For-each loops are easy for the compiler to recognize as vectorizable.
|
|
/// * The type is a based on an array instead of tuple since the heterogeneous
|
|
/// nature of tuples can confuse the compiler into thinking it is not vectorizable.
|
|
/// * Memory alignment lines up with SIMD size.
|
|
///
|
|
/// In the future, a "blacklist" for the alignment option might be useful to
|
|
/// disable it on architectures which definitely do not support SIMD in order to avoid
|
|
/// needless memory inefficientcies.
|
|
#[repr(align(16))]
|
|
#[derive(Clone, Copy, PartialEq)]
|
|
struct U32x4([u32; 4]);
|
|
|
|
impl U32x4 {
|
|
#[inline(always)]
|
|
fn wrapping_add(self, rhs: Self) -> Self {
|
|
let mut result = [0u32; 4];
|
|
(0..4).for_each(|i| {
|
|
result[i] = self.0[i].wrapping_add(rhs.0[i]);
|
|
});
|
|
U32x4(result)
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn rotate_left(self, n: u32) -> Self {
|
|
let mut result = [0u32; 4];
|
|
(0..4).for_each(|i| {
|
|
result[i] = self.0[i].rotate_left(n);
|
|
});
|
|
U32x4(result)
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn rotate_elements_left<const N: u32>(self) -> Self {
|
|
let mut result = [0u32; 4];
|
|
(0..4).for_each(|i| {
|
|
result[i] = self.0[(i + N as usize) % 4];
|
|
});
|
|
U32x4(result)
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn rotate_elements_right<const N: u32>(self) -> Self {
|
|
let mut result = [0u32; 4];
|
|
(0..4).for_each(|i| {
|
|
result[i] = self.0[(i + 4 - N as usize) % 4];
|
|
});
|
|
U32x4(result)
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn to_le_bytes(self) -> [u8; 16] {
|
|
let mut bytes = [0u8; 16];
|
|
(0..4).for_each(|i| {
|
|
bytes[i * 4..(i + 1) * 4].copy_from_slice(&self.0[i].to_le_bytes());
|
|
});
|
|
bytes
|
|
}
|
|
}
|
|
|
|
impl BitXor for U32x4 {
|
|
type Output = Self;
|
|
|
|
#[inline(always)]
|
|
fn bitxor(self, rhs: Self) -> Self {
|
|
let mut result = [0u32; 4];
|
|
(0..4).for_each(|i| {
|
|
result[i] = self.0[i] ^ rhs.0[i];
|
|
});
|
|
U32x4(result)
|
|
}
|
|
}
|
|
|
|
/// The 512-bit cipher state is chunk'd up into 16 32-bit words.
|
|
///
|
|
/// The 16 words can be visualized as a 4x4 matrix:
|
|
///
|
|
/// 0 1 2 3
|
|
/// 4 5 6 7
|
|
/// 8 9 10 11
|
|
/// 12 13 14 15
|
|
#[derive(Clone, Copy, PartialEq)]
|
|
struct State {
|
|
matrix: [U32x4; 4],
|
|
}
|
|
|
|
impl State {
|
|
/// New prepared state.
|
|
const fn new(key: Key, nonce: Nonce, count: u32) -> Self {
|
|
// Hardcoding indexes to keep the function const.
|
|
let k0 = u32::from_le_bytes([key.0[0], key.0[1], key.0[2], key.0[3]]);
|
|
let k1 = u32::from_le_bytes([key.0[4], key.0[5], key.0[6], key.0[7]]);
|
|
let k2 = u32::from_le_bytes([key.0[8], key.0[9], key.0[10], key.0[11]]);
|
|
let k3 = u32::from_le_bytes([key.0[12], key.0[13], key.0[14], key.0[15]]);
|
|
let k4 = u32::from_le_bytes([key.0[16], key.0[17], key.0[18], key.0[19]]);
|
|
let k5 = u32::from_le_bytes([key.0[20], key.0[21], key.0[22], key.0[23]]);
|
|
let k6 = u32::from_le_bytes([key.0[24], key.0[25], key.0[26], key.0[27]]);
|
|
let k7 = u32::from_le_bytes([key.0[28], key.0[29], key.0[30], key.0[31]]);
|
|
|
|
let n0 = u32::from_le_bytes([nonce.0[0], nonce.0[1], nonce.0[2], nonce.0[3]]);
|
|
let n1 = u32::from_le_bytes([nonce.0[4], nonce.0[5], nonce.0[6], nonce.0[7]]);
|
|
let n2 = u32::from_le_bytes([nonce.0[8], nonce.0[9], nonce.0[10], nonce.0[11]]);
|
|
|
|
State {
|
|
matrix: [
|
|
U32x4([WORD_1, WORD_2, WORD_3, WORD_4]),
|
|
U32x4([k0, k1, k2, k3]),
|
|
U32x4([k4, k5, k6, k7]),
|
|
U32x4([count, n0, n1, n2]),
|
|
],
|
|
}
|
|
}
|
|
|
|
/// Four quarter rounds performed on the entire state of the cipher in a vectorized SIMD friendly fashion.
|
|
#[inline(always)]
|
|
fn quarter_round(a: U32x4, b: U32x4, c: U32x4, d: U32x4) -> (U32x4, U32x4, U32x4, U32x4) {
|
|
let a = a.wrapping_add(b);
|
|
let d = d.bitxor(a).rotate_left(16);
|
|
|
|
let c = c.wrapping_add(d);
|
|
let b = b.bitxor(c).rotate_left(12);
|
|
|
|
let a = a.wrapping_add(b);
|
|
let d = d.bitxor(a).rotate_left(8);
|
|
|
|
let c = c.wrapping_add(d);
|
|
let b = b.bitxor(c).rotate_left(7);
|
|
|
|
(a, b, c, d)
|
|
}
|
|
|
|
/// Perform a round on "columns" and then "diagonals" of the state.
|
|
///
|
|
/// The column quarter rounds are made up of indexes: `[0,4,8,12]`, `[1,5,9,13]`, `[2,6,10,14]`, `[3,7,11,15]`.
|
|
/// The diagonals quarter rounds are made up of indexes: `[0,5,10,15]`, `[1,6,11,12]`, `[2,7,8,13]`, `[3,4,9,14]`.
|
|
///
|
|
/// The underlying quarter_round function is vectorized using the
|
|
/// u32x4 type in order to perform 4 quarter round functions at the same time.
|
|
/// This is a little more difficult to read, but it gives the compiler
|
|
/// a strong hint to use the performant SIMD instructions.
|
|
#[inline(always)]
|
|
fn double_round(state: [U32x4; 4]) -> [U32x4; 4] {
|
|
let [mut a, mut b, mut c, mut d] = state;
|
|
|
|
// Column round.
|
|
(a, b, c, d) = Self::quarter_round(a, b, c, d);
|
|
|
|
// Diagonal round (with rotations).
|
|
b = b.rotate_elements_left::<1>();
|
|
c = c.rotate_elements_left::<2>();
|
|
d = d.rotate_elements_left::<3>();
|
|
(a, b, c, d) = Self::quarter_round(a, b, c, d);
|
|
// Rotate the words back into their normal positions.
|
|
b = b.rotate_elements_right::<1>();
|
|
c = c.rotate_elements_right::<2>();
|
|
d = d.rotate_elements_right::<3>();
|
|
|
|
[a, b, c, d]
|
|
}
|
|
|
|
/// Transform the state by performing the ChaCha block function.
|
|
fn chacha_block(&mut self) {
|
|
let mut working_state = self.matrix;
|
|
|
|
for _ in 0..10 {
|
|
working_state = Self::double_round(working_state);
|
|
}
|
|
|
|
// Add the working state to the original state.
|
|
(0..4).for_each(|i| {
|
|
self.matrix[i] = working_state[i].wrapping_add(self.matrix[i]);
|
|
});
|
|
}
|
|
|
|
/// Expose the 512-bit state as a byte stream.
|
|
fn keystream(&self) -> [u8; 64] {
|
|
let mut keystream = [0u8; 64];
|
|
for i in 0..4 {
|
|
keystream[i * 16..(i + 1) * 16].copy_from_slice(&self.matrix[i].to_le_bytes());
|
|
}
|
|
keystream
|
|
}
|
|
}
|
|
|
|
/// The ChaCha20 stream cipher from RFC8439.
|
|
///
|
|
/// The 20-round IETF version uses a 96-bit nonce and 32-bit block counter. This is the
|
|
/// variant used in the Bitcoin ecosystem, including BIP324.
|
|
pub struct ChaCha20 {
|
|
/// Secret key shared by the parties communicating.
|
|
key: Key,
|
|
/// A key and nonce pair should only be used once.
|
|
nonce: Nonce,
|
|
/// Internal block index of keystream.
|
|
block_count: u32,
|
|
/// Interal byte offset index of the block_count.
|
|
seek_offset_bytes: usize,
|
|
}
|
|
|
|
impl ChaCha20 {
|
|
/// Make a new instance of ChaCha20 from an index in the keystream.
|
|
pub const fn new(key: Key, nonce: Nonce, seek: u32) -> Self {
|
|
let block_count = seek / 64;
|
|
let seek_offset_bytes = (seek % 64) as usize;
|
|
ChaCha20 { key, nonce, block_count, seek_offset_bytes }
|
|
}
|
|
|
|
/// Make a new instance of ChaCha20 from a block in the keystream.
|
|
pub const fn new_from_block(key: Key, nonce: Nonce, block: u32) -> Self {
|
|
ChaCha20 { key, nonce, block_count: block, seek_offset_bytes: 0 }
|
|
}
|
|
|
|
/// Apply the keystream to a buffer.
|
|
pub fn apply_keystream(&mut self, buffer: &mut [u8]) {
|
|
let num_full_blocks = buffer.len() / CHACHA_BLOCKSIZE;
|
|
for block in 0..num_full_blocks {
|
|
let keystream =
|
|
keystream_at_slice(self.key, self.nonce, self.block_count, self.seek_offset_bytes);
|
|
for (buffer_byte, keystream_byte) in buffer
|
|
[block * CHACHA_BLOCKSIZE..(block + 1) * CHACHA_BLOCKSIZE]
|
|
.iter_mut()
|
|
.zip(keystream.iter())
|
|
{
|
|
*buffer_byte ^= *keystream_byte
|
|
}
|
|
self.block_count += 1;
|
|
}
|
|
if buffer.len() % 64 > 0 {
|
|
let keystream =
|
|
keystream_at_slice(self.key, self.nonce, self.block_count, self.seek_offset_bytes);
|
|
for (buffer_byte, keystream_byte) in
|
|
buffer[num_full_blocks * CHACHA_BLOCKSIZE..].iter_mut().zip(keystream.iter())
|
|
{
|
|
*buffer_byte ^= *keystream_byte
|
|
}
|
|
self.block_count += 1;
|
|
}
|
|
}
|
|
|
|
/// Get the keystream block at a specified block.
|
|
pub fn get_keystream(&mut self, block: u32) -> [u8; 64] {
|
|
self.block(block);
|
|
keystream_at_slice(self.key, self.nonce, self.block_count, self.seek_offset_bytes)
|
|
}
|
|
|
|
/// Update the index of the keystream to the given byte.
|
|
pub fn seek(&mut self, seek: u32) {
|
|
self.block_count = seek / 64;
|
|
self.seek_offset_bytes = (seek % 64) as usize;
|
|
}
|
|
|
|
/// Update the index of the keystream to a block.
|
|
pub fn block(&mut self, block: u32) {
|
|
self.block_count = block;
|
|
self.seek_offset_bytes = 0;
|
|
}
|
|
}
|
|
|
|
fn keystream_at_slice(key: Key, nonce: Nonce, count: u32, seek: usize) -> [u8; 64] {
|
|
let mut keystream: [u8; 128] = [0; 128];
|
|
let (first_half, second_half) = keystream.split_at_mut(64);
|
|
|
|
let mut state = State::new(key, nonce, count);
|
|
state.chacha_block();
|
|
first_half.copy_from_slice(&state.keystream());
|
|
|
|
let mut state = State::new(key, nonce, count + 1);
|
|
state.chacha_block();
|
|
second_half.copy_from_slice(&state.keystream());
|
|
|
|
let seeked_keystream: [u8; 64] =
|
|
keystream[seek..seek + 64].try_into().expect("slicing produces 64-byte slice");
|
|
seeked_keystream
|
|
}
|
|
|
|
#[cfg(test)]
|
|
#[cfg(feature = "alloc")]
|
|
mod tests {
|
|
use alloc::vec::Vec;
|
|
|
|
use hex::prelude::*;
|
|
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_chacha_block() {
|
|
let mut state = State {
|
|
matrix: [
|
|
U32x4([0x61707865, 0x3320646e, 0x79622d32, 0x6b206574]),
|
|
U32x4([0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c]),
|
|
U32x4([0x13121110, 0x17161514, 0x1b1a1918, 0x1f1e1d1c]),
|
|
U32x4([0x00000001, 0x09000000, 0x4a000000, 0x00000000]),
|
|
],
|
|
};
|
|
state.chacha_block();
|
|
|
|
let expected = [
|
|
U32x4([0xe4e7f110, 0x15593bd1, 0x1fdd0f50, 0xc47120a3]),
|
|
U32x4([0xc7f4d1c7, 0x0368c033, 0x9aaa2204, 0x4e6cd4c3]),
|
|
U32x4([0x466482d2, 0x09aa9f07, 0x05d7c214, 0xa2028bd9]),
|
|
U32x4([0xd19c12b5, 0xb94e16de, 0xe883d0cb, 0x4e3c50a2]),
|
|
];
|
|
|
|
for (actual, expected) in state.matrix.iter().zip(expected.iter()) {
|
|
assert_eq!(actual.0, expected.0);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_prepare_state() {
|
|
let key =
|
|
Key(Vec::from_hex("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f")
|
|
.unwrap()
|
|
.try_into()
|
|
.unwrap());
|
|
let nonce = Nonce(Vec::from_hex("000000090000004a00000000").unwrap().try_into().unwrap());
|
|
let count = 1;
|
|
let state = State::new(key, nonce, count);
|
|
assert_eq!(state.matrix[1].0[0].to_be_bytes().to_lower_hex_string(), "03020100");
|
|
assert_eq!(state.matrix[2].0[2].to_be_bytes().to_lower_hex_string(), "1b1a1918");
|
|
assert_eq!(state.matrix[3].0[2].to_be_bytes().to_lower_hex_string(), "4a000000");
|
|
assert_eq!(state.matrix[3].0[3].to_be_bytes().to_lower_hex_string(), "00000000");
|
|
assert_eq!(state.matrix[3].0[0].to_be_bytes().to_lower_hex_string(), "00000001");
|
|
}
|
|
|
|
#[test]
|
|
fn test_small_plaintext() {
|
|
let key =
|
|
Key(Vec::from_hex("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f")
|
|
.unwrap()
|
|
.try_into()
|
|
.unwrap());
|
|
let nonce = Nonce(Vec::from_hex("000000090000004a00000000").unwrap().try_into().unwrap());
|
|
let count = 1;
|
|
let mut chacha = ChaCha20::new(key, nonce, count);
|
|
let mut binding = [8; 3];
|
|
chacha.apply_keystream(&mut binding[..]);
|
|
let mut chacha = ChaCha20::new(key, nonce, count);
|
|
chacha.apply_keystream(&mut binding[..]);
|
|
assert_eq!([8; 3], binding);
|
|
}
|
|
|
|
#[test]
|
|
fn test_modulo_64() {
|
|
let key =
|
|
Key(Vec::from_hex("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f")
|
|
.unwrap()
|
|
.try_into()
|
|
.unwrap());
|
|
let nonce = Nonce(Vec::from_hex("000000090000004a00000000").unwrap().try_into().unwrap());
|
|
let count = 1;
|
|
let mut chacha = ChaCha20::new(key, nonce, count);
|
|
let mut binding = [8; 64];
|
|
chacha.apply_keystream(&mut binding[..]);
|
|
let mut chacha = ChaCha20::new(key, nonce, count);
|
|
chacha.apply_keystream(&mut binding[..]);
|
|
assert_eq!([8; 64], binding);
|
|
}
|
|
|
|
#[test]
|
|
fn test_rfc_standard() {
|
|
let key =
|
|
Key(Vec::from_hex("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f")
|
|
.unwrap()
|
|
.try_into()
|
|
.unwrap());
|
|
let nonce = Nonce(Vec::from_hex("000000000000004a00000000").unwrap().try_into().unwrap());
|
|
let count = 64;
|
|
let mut chacha = ChaCha20::new(key, nonce, count);
|
|
let mut binding = *b"Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it.";
|
|
let to = binding;
|
|
chacha.apply_keystream(&mut binding[..]);
|
|
assert_eq!(binding[..], Vec::from_hex("6e2e359a2568f98041ba0728dd0d6981e97e7aec1d4360c20a27afccfd9fae0bf91b65c5524733ab8f593dabcd62b3571639d624e65152ab8f530c359f0861d807ca0dbf500d6a6156a38e088a22b65e52bc514d16ccf806818ce91ab77937365af90bbf74a35be6b40b8eedf2785e42874d").unwrap());
|
|
let mut chacha = ChaCha20::new(key, nonce, count);
|
|
chacha.apply_keystream(&mut binding[..]);
|
|
let binding = *b"Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it.";
|
|
assert_eq!(binding, to);
|
|
}
|
|
|
|
#[test]
|
|
fn test_new_from_block() {
|
|
let key =
|
|
Key(Vec::from_hex("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f")
|
|
.unwrap()
|
|
.try_into()
|
|
.unwrap());
|
|
let nonce = Nonce(Vec::from_hex("000000000000004a00000000").unwrap().try_into().unwrap());
|
|
let block: u32 = 1;
|
|
let mut chacha = ChaCha20::new_from_block(key, nonce, block);
|
|
let mut binding = *b"Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it.";
|
|
let to = binding;
|
|
chacha.apply_keystream(&mut binding[..]);
|
|
assert_eq!(binding[..], Vec::from_hex("6e2e359a2568f98041ba0728dd0d6981e97e7aec1d4360c20a27afccfd9fae0bf91b65c5524733ab8f593dabcd62b3571639d624e65152ab8f530c359f0861d807ca0dbf500d6a6156a38e088a22b65e52bc514d16ccf806818ce91ab77937365af90bbf74a35be6b40b8eedf2785e42874d").unwrap());
|
|
chacha.block(block);
|
|
chacha.apply_keystream(&mut binding[..]);
|
|
let binding = *b"Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it.";
|
|
assert_eq!(binding, to);
|
|
}
|
|
}
|