diff --git a/hashes/src/ripemd160/crypto.rs b/hashes/src/ripemd160/crypto.rs new file mode 100644 index 000000000..4c6226892 --- /dev/null +++ b/hashes/src/ripemd160/crypto.rs @@ -0,0 +1,321 @@ +// SPDX-License-Identifier: CC0-1.0 + +use super::{HashEngine, BLOCK_SIZE}; + +#[cfg(feature = "small-hash")] +#[macro_use] +mod small_hash { + #[rustfmt::skip] + pub(super) fn round(a: u32, _b: u32, c: u32, _d: u32, e: u32, + x: u32, bits: u32, add: u32, round: u32, + ) -> (u32, u32) { + let a = a.wrapping_add(round).wrapping_add(x).wrapping_add(add); + let a = a.rotate_left(bits).wrapping_add(e); + let c = c.rotate_left(10); + + (a, c) + } + + macro_rules! round( + ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, + $x:expr, $bits:expr, $add:expr, $round:expr) => ({ + let updates = small_hash::round($a, $b, $c, $d, $e, $x, $bits, $add, $round); + $a = updates.0; + $c = updates.1; + }); + ); +} + +#[cfg(not(feature = "small-hash"))] +macro_rules! round( + ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, + $x:expr, $bits:expr, $add:expr, $round:expr) => ({ + $a = $a.wrapping_add($round).wrapping_add($x).wrapping_add($add); + $a = $a.rotate_left($bits).wrapping_add($e); + $c = $c.rotate_left(10); + }); +); + +macro_rules! process_block( + ($h:expr, $data:expr, + $( round1: h_ordering $f0:expr, $f1:expr, $f2:expr, $f3:expr, $f4:expr; + data_index $data_index1:expr; roll_shift $bits1:expr; )* + $( round2: h_ordering $g0:expr, $g1:expr, $g2:expr, $g3:expr, $g4:expr; + data_index $data_index2:expr; roll_shift $bits2:expr; )* + $( round3: h_ordering $h0:expr, $h1:expr, $h2:expr, $h3:expr, $h4:expr; + data_index $data_index3:expr; roll_shift $bits3:expr; )* + $( round4: h_ordering $i0:expr, $i1:expr, $i2:expr, $i3:expr, $i4:expr; + data_index $data_index4:expr; roll_shift $bits4:expr; )* + $( round5: h_ordering $j0:expr, $j1:expr, $j2:expr, $j3:expr, $j4:expr; + data_index $data_index5:expr; roll_shift $bits5:expr; )* + $( par_round1: h_ordering $pj0:expr, $pj1:expr, $pj2:expr, $pj3:expr, $pj4:expr; + data_index $pdata_index1:expr; roll_shift $pbits1:expr; )* + $( par_round2: h_ordering $pi0:expr, $pi1:expr, $pi2:expr, $pi3:expr, $pi4:expr; + data_index $pdata_index2:expr; roll_shift $pbits2:expr; )* + $( par_round3: h_ordering $ph0:expr, $ph1:expr, $ph2:expr, $ph3:expr, $ph4:expr; + data_index $pdata_index3:expr; roll_shift $pbits3:expr; )* + $( par_round4: h_ordering $pg0:expr, $pg1:expr, $pg2:expr, $pg3:expr, $pg4:expr; + data_index $pdata_index4:expr; roll_shift $pbits4:expr; )* + $( par_round5: h_ordering $pf0:expr, $pf1:expr, $pf2:expr, $pf3:expr, $pf4:expr; + data_index $pdata_index5:expr; roll_shift $pbits5:expr; )* + ) => ({ + let mut bb = $h; + let mut bbb = $h; + + // Round 1 + $( round!(bb[$f0], bb[$f1], bb[$f2], bb[$f3], bb[$f4], + $data[$data_index1], $bits1, 0x00000000, + bb[$f1] ^ bb[$f2] ^ bb[$f3]); )* + + // Round 2 + $( round!(bb[$g0], bb[$g1], bb[$g2], bb[$g3], bb[$g4], + $data[$data_index2], $bits2, 0x5a827999, + (bb[$g1] & bb[$g2]) | (!bb[$g1] & bb[$g3])); )* + + // Round 3 + $( round!(bb[$h0], bb[$h1], bb[$h2], bb[$h3], bb[$h4], + $data[$data_index3], $bits3, 0x6ed9eba1, + (bb[$h1] | !bb[$h2]) ^ bb[$h3]); )* + + // Round 4 + $( round!(bb[$i0], bb[$i1], bb[$i2], bb[$i3], bb[$i4], + $data[$data_index4], $bits4, 0x8f1bbcdc, + (bb[$i1] & bb[$i3]) | (bb[$i2] & !bb[$i3])); )* + + // Round 5 + $( round!(bb[$j0], bb[$j1], bb[$j2], bb[$j3], bb[$j4], + $data[$data_index5], $bits5, 0xa953fd4e, + bb[$j1] ^ (bb[$j2] | !bb[$j3])); )* + + // Parallel rounds: these are the same as the previous five + // rounds except that the constants have changed, we work + // with the other buffer, and they are applied in reverse + // order. + + // Parallel Round 1 + $( round!(bbb[$pj0], bbb[$pj1], bbb[$pj2], bbb[$pj3], bbb[$pj4], + $data[$pdata_index1], $pbits1, 0x50a28be6, + bbb[$pj1] ^ (bbb[$pj2] | !bbb[$pj3])); )* + + // Porallel Round 2 + $( round!(bbb[$pi0], bbb[$pi1], bbb[$pi2], bbb[$pi3], bbb[$pi4], + $data[$pdata_index2], $pbits2, 0x5c4dd124, + (bbb[$pi1] & bbb[$pi3]) | (bbb[$pi2] & !bbb[$pi3])); )* + + // Parallel Round 3 + $( round!(bbb[$ph0], bbb[$ph1], bbb[$ph2], bbb[$ph3], bbb[$ph4], + $data[$pdata_index3], $pbits3, 0x6d703ef3, + (bbb[$ph1] | !bbb[$ph2]) ^ bbb[$ph3]); )* + + // Parallel Round 4 + $( round!(bbb[$pg0], bbb[$pg1], bbb[$pg2], bbb[$pg3], bbb[$pg4], + $data[$pdata_index4], $pbits4, 0x7a6d76e9, + (bbb[$pg1] & bbb[$pg2]) | (!bbb[$pg1] & bbb[$pg3])); )* + + // Parallel Round 5 + $( round!(bbb[$pf0], bbb[$pf1], bbb[$pf2], bbb[$pf3], bbb[$pf4], + $data[$pdata_index5], $pbits5, 0x00000000, + bbb[$pf1] ^ bbb[$pf2] ^ bbb[$pf3]); )* + + // Combine results + bbb[3] = bbb[3].wrapping_add($h[1]).wrapping_add(bb[2]); + $h[1] = $h[2].wrapping_add(bb[3]).wrapping_add(bbb[4]); + $h[2] = $h[3].wrapping_add(bb[4]).wrapping_add(bbb[0]); + $h[3] = $h[4].wrapping_add(bb[0]).wrapping_add(bbb[1]); + $h[4] = $h[0].wrapping_add(bb[1]).wrapping_add(bbb[2]); + $h[0] = bbb[3]; + }); +); + +impl HashEngine { + pub(super) fn process_block(&mut self) { + debug_assert_eq!(self.buffer.len(), BLOCK_SIZE); + + let mut w = [0u32; 16]; + for (w_val, buff_bytes) in w.iter_mut().zip(self.buffer.chunks_exact(4)) { + *w_val = u32::from_le_bytes(buff_bytes.try_into().expect("4 byte slice")) + } + + process_block!(self.h, w, + // Round 1 + round1: h_ordering 0, 1, 2, 3, 4; data_index 0; roll_shift 11; + round1: h_ordering 4, 0, 1, 2, 3; data_index 1; roll_shift 14; + round1: h_ordering 3, 4, 0, 1, 2; data_index 2; roll_shift 15; + round1: h_ordering 2, 3, 4, 0, 1; data_index 3; roll_shift 12; + round1: h_ordering 1, 2, 3, 4, 0; data_index 4; roll_shift 5; + round1: h_ordering 0, 1, 2, 3, 4; data_index 5; roll_shift 8; + round1: h_ordering 4, 0, 1, 2, 3; data_index 6; roll_shift 7; + round1: h_ordering 3, 4, 0, 1, 2; data_index 7; roll_shift 9; + round1: h_ordering 2, 3, 4, 0, 1; data_index 8; roll_shift 11; + round1: h_ordering 1, 2, 3, 4, 0; data_index 9; roll_shift 13; + round1: h_ordering 0, 1, 2, 3, 4; data_index 10; roll_shift 14; + round1: h_ordering 4, 0, 1, 2, 3; data_index 11; roll_shift 15; + round1: h_ordering 3, 4, 0, 1, 2; data_index 12; roll_shift 6; + round1: h_ordering 2, 3, 4, 0, 1; data_index 13; roll_shift 7; + round1: h_ordering 1, 2, 3, 4, 0; data_index 14; roll_shift 9; + round1: h_ordering 0, 1, 2, 3, 4; data_index 15; roll_shift 8; + + // Round 2 + round2: h_ordering 4, 0, 1, 2, 3; data_index 7; roll_shift 7; + round2: h_ordering 3, 4, 0, 1, 2; data_index 4; roll_shift 6; + round2: h_ordering 2, 3, 4, 0, 1; data_index 13; roll_shift 8; + round2: h_ordering 1, 2, 3, 4, 0; data_index 1; roll_shift 13; + round2: h_ordering 0, 1, 2, 3, 4; data_index 10; roll_shift 11; + round2: h_ordering 4, 0, 1, 2, 3; data_index 6; roll_shift 9; + round2: h_ordering 3, 4, 0, 1, 2; data_index 15; roll_shift 7; + round2: h_ordering 2, 3, 4, 0, 1; data_index 3; roll_shift 15; + round2: h_ordering 1, 2, 3, 4, 0; data_index 12; roll_shift 7; + round2: h_ordering 0, 1, 2, 3, 4; data_index 0; roll_shift 12; + round2: h_ordering 4, 0, 1, 2, 3; data_index 9; roll_shift 15; + round2: h_ordering 3, 4, 0, 1, 2; data_index 5; roll_shift 9; + round2: h_ordering 2, 3, 4, 0, 1; data_index 2; roll_shift 11; + round2: h_ordering 1, 2, 3, 4, 0; data_index 14; roll_shift 7; + round2: h_ordering 0, 1, 2, 3, 4; data_index 11; roll_shift 13; + round2: h_ordering 4, 0, 1, 2, 3; data_index 8; roll_shift 12; + + // Round 3 + round3: h_ordering 3, 4, 0, 1, 2; data_index 3; roll_shift 11; + round3: h_ordering 2, 3, 4, 0, 1; data_index 10; roll_shift 13; + round3: h_ordering 1, 2, 3, 4, 0; data_index 14; roll_shift 6; + round3: h_ordering 0, 1, 2, 3, 4; data_index 4; roll_shift 7; + round3: h_ordering 4, 0, 1, 2, 3; data_index 9; roll_shift 14; + round3: h_ordering 3, 4, 0, 1, 2; data_index 15; roll_shift 9; + round3: h_ordering 2, 3, 4, 0, 1; data_index 8; roll_shift 13; + round3: h_ordering 1, 2, 3, 4, 0; data_index 1; roll_shift 15; + round3: h_ordering 0, 1, 2, 3, 4; data_index 2; roll_shift 14; + round3: h_ordering 4, 0, 1, 2, 3; data_index 7; roll_shift 8; + round3: h_ordering 3, 4, 0, 1, 2; data_index 0; roll_shift 13; + round3: h_ordering 2, 3, 4, 0, 1; data_index 6; roll_shift 6; + round3: h_ordering 1, 2, 3, 4, 0; data_index 13; roll_shift 5; + round3: h_ordering 0, 1, 2, 3, 4; data_index 11; roll_shift 12; + round3: h_ordering 4, 0, 1, 2, 3; data_index 5; roll_shift 7; + round3: h_ordering 3, 4, 0, 1, 2; data_index 12; roll_shift 5; + + // Round 4 + round4: h_ordering 2, 3, 4, 0, 1; data_index 1; roll_shift 11; + round4: h_ordering 1, 2, 3, 4, 0; data_index 9; roll_shift 12; + round4: h_ordering 0, 1, 2, 3, 4; data_index 11; roll_shift 14; + round4: h_ordering 4, 0, 1, 2, 3; data_index 10; roll_shift 15; + round4: h_ordering 3, 4, 0, 1, 2; data_index 0; roll_shift 14; + round4: h_ordering 2, 3, 4, 0, 1; data_index 8; roll_shift 15; + round4: h_ordering 1, 2, 3, 4, 0; data_index 12; roll_shift 9; + round4: h_ordering 0, 1, 2, 3, 4; data_index 4; roll_shift 8; + round4: h_ordering 4, 0, 1, 2, 3; data_index 13; roll_shift 9; + round4: h_ordering 3, 4, 0, 1, 2; data_index 3; roll_shift 14; + round4: h_ordering 2, 3, 4, 0, 1; data_index 7; roll_shift 5; + round4: h_ordering 1, 2, 3, 4, 0; data_index 15; roll_shift 6; + round4: h_ordering 0, 1, 2, 3, 4; data_index 14; roll_shift 8; + round4: h_ordering 4, 0, 1, 2, 3; data_index 5; roll_shift 6; + round4: h_ordering 3, 4, 0, 1, 2; data_index 6; roll_shift 5; + round4: h_ordering 2, 3, 4, 0, 1; data_index 2; roll_shift 12; + + // Round 5 + round5: h_ordering 1, 2, 3, 4, 0; data_index 4; roll_shift 9; + round5: h_ordering 0, 1, 2, 3, 4; data_index 0; roll_shift 15; + round5: h_ordering 4, 0, 1, 2, 3; data_index 5; roll_shift 5; + round5: h_ordering 3, 4, 0, 1, 2; data_index 9; roll_shift 11; + round5: h_ordering 2, 3, 4, 0, 1; data_index 7; roll_shift 6; + round5: h_ordering 1, 2, 3, 4, 0; data_index 12; roll_shift 8; + round5: h_ordering 0, 1, 2, 3, 4; data_index 2; roll_shift 13; + round5: h_ordering 4, 0, 1, 2, 3; data_index 10; roll_shift 12; + round5: h_ordering 3, 4, 0, 1, 2; data_index 14; roll_shift 5; + round5: h_ordering 2, 3, 4, 0, 1; data_index 1; roll_shift 12; + round5: h_ordering 1, 2, 3, 4, 0; data_index 3; roll_shift 13; + round5: h_ordering 0, 1, 2, 3, 4; data_index 8; roll_shift 14; + round5: h_ordering 4, 0, 1, 2, 3; data_index 11; roll_shift 11; + round5: h_ordering 3, 4, 0, 1, 2; data_index 6; roll_shift 8; + round5: h_ordering 2, 3, 4, 0, 1; data_index 15; roll_shift 5; + round5: h_ordering 1, 2, 3, 4, 0; data_index 13; roll_shift 6; + + // Porallel Round 1; + par_round1: h_ordering 0, 1, 2, 3, 4; data_index 5; roll_shift 8; + par_round1: h_ordering 4, 0, 1, 2, 3; data_index 14; roll_shift 9; + par_round1: h_ordering 3, 4, 0, 1, 2; data_index 7; roll_shift 9; + par_round1: h_ordering 2, 3, 4, 0, 1; data_index 0; roll_shift 11; + par_round1: h_ordering 1, 2, 3, 4, 0; data_index 9; roll_shift 13; + par_round1: h_ordering 0, 1, 2, 3, 4; data_index 2; roll_shift 15; + par_round1: h_ordering 4, 0, 1, 2, 3; data_index 11; roll_shift 15; + par_round1: h_ordering 3, 4, 0, 1, 2; data_index 4; roll_shift 5; + par_round1: h_ordering 2, 3, 4, 0, 1; data_index 13; roll_shift 7; + par_round1: h_ordering 1, 2, 3, 4, 0; data_index 6; roll_shift 7; + par_round1: h_ordering 0, 1, 2, 3, 4; data_index 15; roll_shift 8; + par_round1: h_ordering 4, 0, 1, 2, 3; data_index 8; roll_shift 11; + par_round1: h_ordering 3, 4, 0, 1, 2; data_index 1; roll_shift 14; + par_round1: h_ordering 2, 3, 4, 0, 1; data_index 10; roll_shift 14; + par_round1: h_ordering 1, 2, 3, 4, 0; data_index 3; roll_shift 12; + par_round1: h_ordering 0, 1, 2, 3, 4; data_index 12; roll_shift 6; + + // Parallel Round 2 + par_round2: h_ordering 4, 0, 1, 2, 3; data_index 6; roll_shift 9; + par_round2: h_ordering 3, 4, 0, 1, 2; data_index 11; roll_shift 13; + par_round2: h_ordering 2, 3, 4, 0, 1; data_index 3; roll_shift 15; + par_round2: h_ordering 1, 2, 3, 4, 0; data_index 7; roll_shift 7; + par_round2: h_ordering 0, 1, 2, 3, 4; data_index 0; roll_shift 12; + par_round2: h_ordering 4, 0, 1, 2, 3; data_index 13; roll_shift 8; + par_round2: h_ordering 3, 4, 0, 1, 2; data_index 5; roll_shift 9; + par_round2: h_ordering 2, 3, 4, 0, 1; data_index 10; roll_shift 11; + par_round2: h_ordering 1, 2, 3, 4, 0; data_index 14; roll_shift 7; + par_round2: h_ordering 0, 1, 2, 3, 4; data_index 15; roll_shift 7; + par_round2: h_ordering 4, 0, 1, 2, 3; data_index 8; roll_shift 12; + par_round2: h_ordering 3, 4, 0, 1, 2; data_index 12; roll_shift 7; + par_round2: h_ordering 2, 3, 4, 0, 1; data_index 4; roll_shift 6; + par_round2: h_ordering 1, 2, 3, 4, 0; data_index 9; roll_shift 15; + par_round2: h_ordering 0, 1, 2, 3, 4; data_index 1; roll_shift 13; + par_round2: h_ordering 4, 0, 1, 2, 3; data_index 2; roll_shift 11; + + // Parallel Round 3 + par_round3: h_ordering 3, 4, 0, 1, 2; data_index 15; roll_shift 9; + par_round3: h_ordering 2, 3, 4, 0, 1; data_index 5; roll_shift 7; + par_round3: h_ordering 1, 2, 3, 4, 0; data_index 1; roll_shift 15; + par_round3: h_ordering 0, 1, 2, 3, 4; data_index 3; roll_shift 11; + par_round3: h_ordering 4, 0, 1, 2, 3; data_index 7; roll_shift 8; + par_round3: h_ordering 3, 4, 0, 1, 2; data_index 14; roll_shift 6; + par_round3: h_ordering 2, 3, 4, 0, 1; data_index 6; roll_shift 6; + par_round3: h_ordering 1, 2, 3, 4, 0; data_index 9; roll_shift 14; + par_round3: h_ordering 0, 1, 2, 3, 4; data_index 11; roll_shift 12; + par_round3: h_ordering 4, 0, 1, 2, 3; data_index 8; roll_shift 13; + par_round3: h_ordering 3, 4, 0, 1, 2; data_index 12; roll_shift 5; + par_round3: h_ordering 2, 3, 4, 0, 1; data_index 2; roll_shift 14; + par_round3: h_ordering 1, 2, 3, 4, 0; data_index 10; roll_shift 13; + par_round3: h_ordering 0, 1, 2, 3, 4; data_index 0; roll_shift 13; + par_round3: h_ordering 4, 0, 1, 2, 3; data_index 4; roll_shift 7; + par_round3: h_ordering 3, 4, 0, 1, 2; data_index 13; roll_shift 5; + + // Parallel Round 4 + par_round4: h_ordering 2, 3, 4, 0, 1; data_index 8; roll_shift 15; + par_round4: h_ordering 1, 2, 3, 4, 0; data_index 6; roll_shift 5; + par_round4: h_ordering 0, 1, 2, 3, 4; data_index 4; roll_shift 8; + par_round4: h_ordering 4, 0, 1, 2, 3; data_index 1; roll_shift 11; + par_round4: h_ordering 3, 4, 0, 1, 2; data_index 3; roll_shift 14; + par_round4: h_ordering 2, 3, 4, 0, 1; data_index 11; roll_shift 14; + par_round4: h_ordering 1, 2, 3, 4, 0; data_index 15; roll_shift 6; + par_round4: h_ordering 0, 1, 2, 3, 4; data_index 0; roll_shift 14; + par_round4: h_ordering 4, 0, 1, 2, 3; data_index 5; roll_shift 6; + par_round4: h_ordering 3, 4, 0, 1, 2; data_index 12; roll_shift 9; + par_round4: h_ordering 2, 3, 4, 0, 1; data_index 2; roll_shift 12; + par_round4: h_ordering 1, 2, 3, 4, 0; data_index 13; roll_shift 9; + par_round4: h_ordering 0, 1, 2, 3, 4; data_index 9; roll_shift 12; + par_round4: h_ordering 4, 0, 1, 2, 3; data_index 7; roll_shift 5; + par_round4: h_ordering 3, 4, 0, 1, 2; data_index 10; roll_shift 15; + par_round4: h_ordering 2, 3, 4, 0, 1; data_index 14; roll_shift 8; + + // Parallel Round 5 + par_round5: h_ordering 1, 2, 3, 4, 0; data_index 12; roll_shift 8; + par_round5: h_ordering 0, 1, 2, 3, 4; data_index 15; roll_shift 5; + par_round5: h_ordering 4, 0, 1, 2, 3; data_index 10; roll_shift 12; + par_round5: h_ordering 3, 4, 0, 1, 2; data_index 4; roll_shift 9; + par_round5: h_ordering 2, 3, 4, 0, 1; data_index 1; roll_shift 12; + par_round5: h_ordering 1, 2, 3, 4, 0; data_index 5; roll_shift 5; + par_round5: h_ordering 0, 1, 2, 3, 4; data_index 8; roll_shift 14; + par_round5: h_ordering 4, 0, 1, 2, 3; data_index 7; roll_shift 6; + par_round5: h_ordering 3, 4, 0, 1, 2; data_index 6; roll_shift 8; + par_round5: h_ordering 2, 3, 4, 0, 1; data_index 2; roll_shift 13; + par_round5: h_ordering 1, 2, 3, 4, 0; data_index 13; roll_shift 6; + par_round5: h_ordering 0, 1, 2, 3, 4; data_index 14; roll_shift 5; + par_round5: h_ordering 4, 0, 1, 2, 3; data_index 0; roll_shift 15; + par_round5: h_ordering 3, 4, 0, 1, 2; data_index 3; roll_shift 13; + par_round5: h_ordering 2, 3, 4, 0, 1; data_index 9; roll_shift 11; + par_round5: h_ordering 1, 2, 3, 4, 0; data_index 11; roll_shift 11; + ); + } +} diff --git a/hashes/src/ripemd160/mod.rs b/hashes/src/ripemd160/mod.rs index fa0893d7a..8ef659770 100644 --- a/hashes/src/ripemd160/mod.rs +++ b/hashes/src/ripemd160/mod.rs @@ -2,6 +2,8 @@ //! RIPEMD160 implementation. +mod crypto; + use core::cmp; use crate::{incomplete_block_len, HashEngine as _}; @@ -88,324 +90,6 @@ impl crate::HashEngine for HashEngine { crate::internal_macros::engine_input_impl!(); } -#[cfg(feature = "small-hash")] -#[macro_use] -mod small_hash { - #[rustfmt::skip] - pub(super) fn round(a: u32, _b: u32, c: u32, _d: u32, e: u32, - x: u32, bits: u32, add: u32, round: u32, - ) -> (u32, u32) { - let a = a.wrapping_add(round).wrapping_add(x).wrapping_add(add); - let a = a.rotate_left(bits).wrapping_add(e); - let c = c.rotate_left(10); - - (a, c) - } - - macro_rules! round( - ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, - $x:expr, $bits:expr, $add:expr, $round:expr) => ({ - let updates = small_hash::round($a, $b, $c, $d, $e, $x, $bits, $add, $round); - $a = updates.0; - $c = updates.1; - }); - ); -} - -#[cfg(not(feature = "small-hash"))] -macro_rules! round( - ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, - $x:expr, $bits:expr, $add:expr, $round:expr) => ({ - $a = $a.wrapping_add($round).wrapping_add($x).wrapping_add($add); - $a = $a.rotate_left($bits).wrapping_add($e); - $c = $c.rotate_left(10); - }); -); - -macro_rules! process_block( - ($h:expr, $data:expr, - $( round1: h_ordering $f0:expr, $f1:expr, $f2:expr, $f3:expr, $f4:expr; - data_index $data_index1:expr; roll_shift $bits1:expr; )* - $( round2: h_ordering $g0:expr, $g1:expr, $g2:expr, $g3:expr, $g4:expr; - data_index $data_index2:expr; roll_shift $bits2:expr; )* - $( round3: h_ordering $h0:expr, $h1:expr, $h2:expr, $h3:expr, $h4:expr; - data_index $data_index3:expr; roll_shift $bits3:expr; )* - $( round4: h_ordering $i0:expr, $i1:expr, $i2:expr, $i3:expr, $i4:expr; - data_index $data_index4:expr; roll_shift $bits4:expr; )* - $( round5: h_ordering $j0:expr, $j1:expr, $j2:expr, $j3:expr, $j4:expr; - data_index $data_index5:expr; roll_shift $bits5:expr; )* - $( par_round1: h_ordering $pj0:expr, $pj1:expr, $pj2:expr, $pj3:expr, $pj4:expr; - data_index $pdata_index1:expr; roll_shift $pbits1:expr; )* - $( par_round2: h_ordering $pi0:expr, $pi1:expr, $pi2:expr, $pi3:expr, $pi4:expr; - data_index $pdata_index2:expr; roll_shift $pbits2:expr; )* - $( par_round3: h_ordering $ph0:expr, $ph1:expr, $ph2:expr, $ph3:expr, $ph4:expr; - data_index $pdata_index3:expr; roll_shift $pbits3:expr; )* - $( par_round4: h_ordering $pg0:expr, $pg1:expr, $pg2:expr, $pg3:expr, $pg4:expr; - data_index $pdata_index4:expr; roll_shift $pbits4:expr; )* - $( par_round5: h_ordering $pf0:expr, $pf1:expr, $pf2:expr, $pf3:expr, $pf4:expr; - data_index $pdata_index5:expr; roll_shift $pbits5:expr; )* - ) => ({ - let mut bb = $h; - let mut bbb = $h; - - // Round 1 - $( round!(bb[$f0], bb[$f1], bb[$f2], bb[$f3], bb[$f4], - $data[$data_index1], $bits1, 0x00000000, - bb[$f1] ^ bb[$f2] ^ bb[$f3]); )* - - // Round 2 - $( round!(bb[$g0], bb[$g1], bb[$g2], bb[$g3], bb[$g4], - $data[$data_index2], $bits2, 0x5a827999, - (bb[$g1] & bb[$g2]) | (!bb[$g1] & bb[$g3])); )* - - // Round 3 - $( round!(bb[$h0], bb[$h1], bb[$h2], bb[$h3], bb[$h4], - $data[$data_index3], $bits3, 0x6ed9eba1, - (bb[$h1] | !bb[$h2]) ^ bb[$h3]); )* - - // Round 4 - $( round!(bb[$i0], bb[$i1], bb[$i2], bb[$i3], bb[$i4], - $data[$data_index4], $bits4, 0x8f1bbcdc, - (bb[$i1] & bb[$i3]) | (bb[$i2] & !bb[$i3])); )* - - // Round 5 - $( round!(bb[$j0], bb[$j1], bb[$j2], bb[$j3], bb[$j4], - $data[$data_index5], $bits5, 0xa953fd4e, - bb[$j1] ^ (bb[$j2] | !bb[$j3])); )* - - // Parallel rounds: these are the same as the previous five - // rounds except that the constants have changed, we work - // with the other buffer, and they are applied in reverse - // order. - - // Parallel Round 1 - $( round!(bbb[$pj0], bbb[$pj1], bbb[$pj2], bbb[$pj3], bbb[$pj4], - $data[$pdata_index1], $pbits1, 0x50a28be6, - bbb[$pj1] ^ (bbb[$pj2] | !bbb[$pj3])); )* - - // Porallel Round 2 - $( round!(bbb[$pi0], bbb[$pi1], bbb[$pi2], bbb[$pi3], bbb[$pi4], - $data[$pdata_index2], $pbits2, 0x5c4dd124, - (bbb[$pi1] & bbb[$pi3]) | (bbb[$pi2] & !bbb[$pi3])); )* - - // Parallel Round 3 - $( round!(bbb[$ph0], bbb[$ph1], bbb[$ph2], bbb[$ph3], bbb[$ph4], - $data[$pdata_index3], $pbits3, 0x6d703ef3, - (bbb[$ph1] | !bbb[$ph2]) ^ bbb[$ph3]); )* - - // Parallel Round 4 - $( round!(bbb[$pg0], bbb[$pg1], bbb[$pg2], bbb[$pg3], bbb[$pg4], - $data[$pdata_index4], $pbits4, 0x7a6d76e9, - (bbb[$pg1] & bbb[$pg2]) | (!bbb[$pg1] & bbb[$pg3])); )* - - // Parallel Round 5 - $( round!(bbb[$pf0], bbb[$pf1], bbb[$pf2], bbb[$pf3], bbb[$pf4], - $data[$pdata_index5], $pbits5, 0x00000000, - bbb[$pf1] ^ bbb[$pf2] ^ bbb[$pf3]); )* - - // Combine results - bbb[3] = bbb[3].wrapping_add($h[1]).wrapping_add(bb[2]); - $h[1] = $h[2].wrapping_add(bb[3]).wrapping_add(bbb[4]); - $h[2] = $h[3].wrapping_add(bb[4]).wrapping_add(bbb[0]); - $h[3] = $h[4].wrapping_add(bb[0]).wrapping_add(bbb[1]); - $h[4] = $h[0].wrapping_add(bb[1]).wrapping_add(bbb[2]); - $h[0] = bbb[3]; - }); -); - -impl HashEngine { - fn process_block(&mut self) { - debug_assert_eq!(self.buffer.len(), BLOCK_SIZE); - - let mut w = [0u32; 16]; - for (w_val, buff_bytes) in w.iter_mut().zip(self.buffer.chunks_exact(4)) { - *w_val = u32::from_le_bytes(buff_bytes.try_into().expect("4 byte slice")) - } - - process_block!(self.h, w, - // Round 1 - round1: h_ordering 0, 1, 2, 3, 4; data_index 0; roll_shift 11; - round1: h_ordering 4, 0, 1, 2, 3; data_index 1; roll_shift 14; - round1: h_ordering 3, 4, 0, 1, 2; data_index 2; roll_shift 15; - round1: h_ordering 2, 3, 4, 0, 1; data_index 3; roll_shift 12; - round1: h_ordering 1, 2, 3, 4, 0; data_index 4; roll_shift 5; - round1: h_ordering 0, 1, 2, 3, 4; data_index 5; roll_shift 8; - round1: h_ordering 4, 0, 1, 2, 3; data_index 6; roll_shift 7; - round1: h_ordering 3, 4, 0, 1, 2; data_index 7; roll_shift 9; - round1: h_ordering 2, 3, 4, 0, 1; data_index 8; roll_shift 11; - round1: h_ordering 1, 2, 3, 4, 0; data_index 9; roll_shift 13; - round1: h_ordering 0, 1, 2, 3, 4; data_index 10; roll_shift 14; - round1: h_ordering 4, 0, 1, 2, 3; data_index 11; roll_shift 15; - round1: h_ordering 3, 4, 0, 1, 2; data_index 12; roll_shift 6; - round1: h_ordering 2, 3, 4, 0, 1; data_index 13; roll_shift 7; - round1: h_ordering 1, 2, 3, 4, 0; data_index 14; roll_shift 9; - round1: h_ordering 0, 1, 2, 3, 4; data_index 15; roll_shift 8; - - // Round 2 - round2: h_ordering 4, 0, 1, 2, 3; data_index 7; roll_shift 7; - round2: h_ordering 3, 4, 0, 1, 2; data_index 4; roll_shift 6; - round2: h_ordering 2, 3, 4, 0, 1; data_index 13; roll_shift 8; - round2: h_ordering 1, 2, 3, 4, 0; data_index 1; roll_shift 13; - round2: h_ordering 0, 1, 2, 3, 4; data_index 10; roll_shift 11; - round2: h_ordering 4, 0, 1, 2, 3; data_index 6; roll_shift 9; - round2: h_ordering 3, 4, 0, 1, 2; data_index 15; roll_shift 7; - round2: h_ordering 2, 3, 4, 0, 1; data_index 3; roll_shift 15; - round2: h_ordering 1, 2, 3, 4, 0; data_index 12; roll_shift 7; - round2: h_ordering 0, 1, 2, 3, 4; data_index 0; roll_shift 12; - round2: h_ordering 4, 0, 1, 2, 3; data_index 9; roll_shift 15; - round2: h_ordering 3, 4, 0, 1, 2; data_index 5; roll_shift 9; - round2: h_ordering 2, 3, 4, 0, 1; data_index 2; roll_shift 11; - round2: h_ordering 1, 2, 3, 4, 0; data_index 14; roll_shift 7; - round2: h_ordering 0, 1, 2, 3, 4; data_index 11; roll_shift 13; - round2: h_ordering 4, 0, 1, 2, 3; data_index 8; roll_shift 12; - - // Round 3 - round3: h_ordering 3, 4, 0, 1, 2; data_index 3; roll_shift 11; - round3: h_ordering 2, 3, 4, 0, 1; data_index 10; roll_shift 13; - round3: h_ordering 1, 2, 3, 4, 0; data_index 14; roll_shift 6; - round3: h_ordering 0, 1, 2, 3, 4; data_index 4; roll_shift 7; - round3: h_ordering 4, 0, 1, 2, 3; data_index 9; roll_shift 14; - round3: h_ordering 3, 4, 0, 1, 2; data_index 15; roll_shift 9; - round3: h_ordering 2, 3, 4, 0, 1; data_index 8; roll_shift 13; - round3: h_ordering 1, 2, 3, 4, 0; data_index 1; roll_shift 15; - round3: h_ordering 0, 1, 2, 3, 4; data_index 2; roll_shift 14; - round3: h_ordering 4, 0, 1, 2, 3; data_index 7; roll_shift 8; - round3: h_ordering 3, 4, 0, 1, 2; data_index 0; roll_shift 13; - round3: h_ordering 2, 3, 4, 0, 1; data_index 6; roll_shift 6; - round3: h_ordering 1, 2, 3, 4, 0; data_index 13; roll_shift 5; - round3: h_ordering 0, 1, 2, 3, 4; data_index 11; roll_shift 12; - round3: h_ordering 4, 0, 1, 2, 3; data_index 5; roll_shift 7; - round3: h_ordering 3, 4, 0, 1, 2; data_index 12; roll_shift 5; - - // Round 4 - round4: h_ordering 2, 3, 4, 0, 1; data_index 1; roll_shift 11; - round4: h_ordering 1, 2, 3, 4, 0; data_index 9; roll_shift 12; - round4: h_ordering 0, 1, 2, 3, 4; data_index 11; roll_shift 14; - round4: h_ordering 4, 0, 1, 2, 3; data_index 10; roll_shift 15; - round4: h_ordering 3, 4, 0, 1, 2; data_index 0; roll_shift 14; - round4: h_ordering 2, 3, 4, 0, 1; data_index 8; roll_shift 15; - round4: h_ordering 1, 2, 3, 4, 0; data_index 12; roll_shift 9; - round4: h_ordering 0, 1, 2, 3, 4; data_index 4; roll_shift 8; - round4: h_ordering 4, 0, 1, 2, 3; data_index 13; roll_shift 9; - round4: h_ordering 3, 4, 0, 1, 2; data_index 3; roll_shift 14; - round4: h_ordering 2, 3, 4, 0, 1; data_index 7; roll_shift 5; - round4: h_ordering 1, 2, 3, 4, 0; data_index 15; roll_shift 6; - round4: h_ordering 0, 1, 2, 3, 4; data_index 14; roll_shift 8; - round4: h_ordering 4, 0, 1, 2, 3; data_index 5; roll_shift 6; - round4: h_ordering 3, 4, 0, 1, 2; data_index 6; roll_shift 5; - round4: h_ordering 2, 3, 4, 0, 1; data_index 2; roll_shift 12; - - // Round 5 - round5: h_ordering 1, 2, 3, 4, 0; data_index 4; roll_shift 9; - round5: h_ordering 0, 1, 2, 3, 4; data_index 0; roll_shift 15; - round5: h_ordering 4, 0, 1, 2, 3; data_index 5; roll_shift 5; - round5: h_ordering 3, 4, 0, 1, 2; data_index 9; roll_shift 11; - round5: h_ordering 2, 3, 4, 0, 1; data_index 7; roll_shift 6; - round5: h_ordering 1, 2, 3, 4, 0; data_index 12; roll_shift 8; - round5: h_ordering 0, 1, 2, 3, 4; data_index 2; roll_shift 13; - round5: h_ordering 4, 0, 1, 2, 3; data_index 10; roll_shift 12; - round5: h_ordering 3, 4, 0, 1, 2; data_index 14; roll_shift 5; - round5: h_ordering 2, 3, 4, 0, 1; data_index 1; roll_shift 12; - round5: h_ordering 1, 2, 3, 4, 0; data_index 3; roll_shift 13; - round5: h_ordering 0, 1, 2, 3, 4; data_index 8; roll_shift 14; - round5: h_ordering 4, 0, 1, 2, 3; data_index 11; roll_shift 11; - round5: h_ordering 3, 4, 0, 1, 2; data_index 6; roll_shift 8; - round5: h_ordering 2, 3, 4, 0, 1; data_index 15; roll_shift 5; - round5: h_ordering 1, 2, 3, 4, 0; data_index 13; roll_shift 6; - - // Porallel Round 1; - par_round1: h_ordering 0, 1, 2, 3, 4; data_index 5; roll_shift 8; - par_round1: h_ordering 4, 0, 1, 2, 3; data_index 14; roll_shift 9; - par_round1: h_ordering 3, 4, 0, 1, 2; data_index 7; roll_shift 9; - par_round1: h_ordering 2, 3, 4, 0, 1; data_index 0; roll_shift 11; - par_round1: h_ordering 1, 2, 3, 4, 0; data_index 9; roll_shift 13; - par_round1: h_ordering 0, 1, 2, 3, 4; data_index 2; roll_shift 15; - par_round1: h_ordering 4, 0, 1, 2, 3; data_index 11; roll_shift 15; - par_round1: h_ordering 3, 4, 0, 1, 2; data_index 4; roll_shift 5; - par_round1: h_ordering 2, 3, 4, 0, 1; data_index 13; roll_shift 7; - par_round1: h_ordering 1, 2, 3, 4, 0; data_index 6; roll_shift 7; - par_round1: h_ordering 0, 1, 2, 3, 4; data_index 15; roll_shift 8; - par_round1: h_ordering 4, 0, 1, 2, 3; data_index 8; roll_shift 11; - par_round1: h_ordering 3, 4, 0, 1, 2; data_index 1; roll_shift 14; - par_round1: h_ordering 2, 3, 4, 0, 1; data_index 10; roll_shift 14; - par_round1: h_ordering 1, 2, 3, 4, 0; data_index 3; roll_shift 12; - par_round1: h_ordering 0, 1, 2, 3, 4; data_index 12; roll_shift 6; - - // Parallel Round 2 - par_round2: h_ordering 4, 0, 1, 2, 3; data_index 6; roll_shift 9; - par_round2: h_ordering 3, 4, 0, 1, 2; data_index 11; roll_shift 13; - par_round2: h_ordering 2, 3, 4, 0, 1; data_index 3; roll_shift 15; - par_round2: h_ordering 1, 2, 3, 4, 0; data_index 7; roll_shift 7; - par_round2: h_ordering 0, 1, 2, 3, 4; data_index 0; roll_shift 12; - par_round2: h_ordering 4, 0, 1, 2, 3; data_index 13; roll_shift 8; - par_round2: h_ordering 3, 4, 0, 1, 2; data_index 5; roll_shift 9; - par_round2: h_ordering 2, 3, 4, 0, 1; data_index 10; roll_shift 11; - par_round2: h_ordering 1, 2, 3, 4, 0; data_index 14; roll_shift 7; - par_round2: h_ordering 0, 1, 2, 3, 4; data_index 15; roll_shift 7; - par_round2: h_ordering 4, 0, 1, 2, 3; data_index 8; roll_shift 12; - par_round2: h_ordering 3, 4, 0, 1, 2; data_index 12; roll_shift 7; - par_round2: h_ordering 2, 3, 4, 0, 1; data_index 4; roll_shift 6; - par_round2: h_ordering 1, 2, 3, 4, 0; data_index 9; roll_shift 15; - par_round2: h_ordering 0, 1, 2, 3, 4; data_index 1; roll_shift 13; - par_round2: h_ordering 4, 0, 1, 2, 3; data_index 2; roll_shift 11; - - // Parallel Round 3 - par_round3: h_ordering 3, 4, 0, 1, 2; data_index 15; roll_shift 9; - par_round3: h_ordering 2, 3, 4, 0, 1; data_index 5; roll_shift 7; - par_round3: h_ordering 1, 2, 3, 4, 0; data_index 1; roll_shift 15; - par_round3: h_ordering 0, 1, 2, 3, 4; data_index 3; roll_shift 11; - par_round3: h_ordering 4, 0, 1, 2, 3; data_index 7; roll_shift 8; - par_round3: h_ordering 3, 4, 0, 1, 2; data_index 14; roll_shift 6; - par_round3: h_ordering 2, 3, 4, 0, 1; data_index 6; roll_shift 6; - par_round3: h_ordering 1, 2, 3, 4, 0; data_index 9; roll_shift 14; - par_round3: h_ordering 0, 1, 2, 3, 4; data_index 11; roll_shift 12; - par_round3: h_ordering 4, 0, 1, 2, 3; data_index 8; roll_shift 13; - par_round3: h_ordering 3, 4, 0, 1, 2; data_index 12; roll_shift 5; - par_round3: h_ordering 2, 3, 4, 0, 1; data_index 2; roll_shift 14; - par_round3: h_ordering 1, 2, 3, 4, 0; data_index 10; roll_shift 13; - par_round3: h_ordering 0, 1, 2, 3, 4; data_index 0; roll_shift 13; - par_round3: h_ordering 4, 0, 1, 2, 3; data_index 4; roll_shift 7; - par_round3: h_ordering 3, 4, 0, 1, 2; data_index 13; roll_shift 5; - - // Parallel Round 4 - par_round4: h_ordering 2, 3, 4, 0, 1; data_index 8; roll_shift 15; - par_round4: h_ordering 1, 2, 3, 4, 0; data_index 6; roll_shift 5; - par_round4: h_ordering 0, 1, 2, 3, 4; data_index 4; roll_shift 8; - par_round4: h_ordering 4, 0, 1, 2, 3; data_index 1; roll_shift 11; - par_round4: h_ordering 3, 4, 0, 1, 2; data_index 3; roll_shift 14; - par_round4: h_ordering 2, 3, 4, 0, 1; data_index 11; roll_shift 14; - par_round4: h_ordering 1, 2, 3, 4, 0; data_index 15; roll_shift 6; - par_round4: h_ordering 0, 1, 2, 3, 4; data_index 0; roll_shift 14; - par_round4: h_ordering 4, 0, 1, 2, 3; data_index 5; roll_shift 6; - par_round4: h_ordering 3, 4, 0, 1, 2; data_index 12; roll_shift 9; - par_round4: h_ordering 2, 3, 4, 0, 1; data_index 2; roll_shift 12; - par_round4: h_ordering 1, 2, 3, 4, 0; data_index 13; roll_shift 9; - par_round4: h_ordering 0, 1, 2, 3, 4; data_index 9; roll_shift 12; - par_round4: h_ordering 4, 0, 1, 2, 3; data_index 7; roll_shift 5; - par_round4: h_ordering 3, 4, 0, 1, 2; data_index 10; roll_shift 15; - par_round4: h_ordering 2, 3, 4, 0, 1; data_index 14; roll_shift 8; - - // Parallel Round 5 - par_round5: h_ordering 1, 2, 3, 4, 0; data_index 12; roll_shift 8; - par_round5: h_ordering 0, 1, 2, 3, 4; data_index 15; roll_shift 5; - par_round5: h_ordering 4, 0, 1, 2, 3; data_index 10; roll_shift 12; - par_round5: h_ordering 3, 4, 0, 1, 2; data_index 4; roll_shift 9; - par_round5: h_ordering 2, 3, 4, 0, 1; data_index 1; roll_shift 12; - par_round5: h_ordering 1, 2, 3, 4, 0; data_index 5; roll_shift 5; - par_round5: h_ordering 0, 1, 2, 3, 4; data_index 8; roll_shift 14; - par_round5: h_ordering 4, 0, 1, 2, 3; data_index 7; roll_shift 6; - par_round5: h_ordering 3, 4, 0, 1, 2; data_index 6; roll_shift 8; - par_round5: h_ordering 2, 3, 4, 0, 1; data_index 2; roll_shift 13; - par_round5: h_ordering 1, 2, 3, 4, 0; data_index 13; roll_shift 6; - par_round5: h_ordering 0, 1, 2, 3, 4; data_index 14; roll_shift 5; - par_round5: h_ordering 4, 0, 1, 2, 3; data_index 0; roll_shift 15; - par_round5: h_ordering 3, 4, 0, 1, 2; data_index 3; roll_shift 13; - par_round5: h_ordering 2, 3, 4, 0, 1; data_index 9; roll_shift 11; - par_round5: h_ordering 1, 2, 3, 4, 0; data_index 11; roll_shift 11; - ); - } -} - #[cfg(test)] mod tests { #[test] diff --git a/hashes/src/sha1/crypto.rs b/hashes/src/sha1/crypto.rs new file mode 100644 index 000000000..6f8a3fd8a --- /dev/null +++ b/hashes/src/sha1/crypto.rs @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: CC0-1.0 + +use super::{HashEngine, BLOCK_SIZE}; + +impl HashEngine { + // Basic unoptimized algorithm from Wikipedia + pub(super) fn process_block(&mut self) { + debug_assert_eq!(self.buffer.len(), BLOCK_SIZE); + + let mut w = [0u32; 80]; + for (w_val, buff_bytes) in w.iter_mut().zip(self.buffer.chunks_exact(4)) { + *w_val = u32::from_be_bytes(buff_bytes.try_into().expect("4 bytes slice")) + } + for i in 16..80 { + w[i] = (w[i - 3] ^ w[i - 8] ^ w[i - 14] ^ w[i - 16]).rotate_left(1); + } + + let mut a = self.h[0]; + let mut b = self.h[1]; + let mut c = self.h[2]; + let mut d = self.h[3]; + let mut e = self.h[4]; + + for (i, &wi) in w.iter().enumerate() { + let (f, k) = match i { + 0..=19 => ((b & c) | (!b & d), 0x5a827999), + 20..=39 => (b ^ c ^ d, 0x6ed9eba1), + 40..=59 => ((b & c) | (b & d) | (c & d), 0x8f1bbcdc), + 60..=79 => (b ^ c ^ d, 0xca62c1d6), + _ => unreachable!(), + }; + + let new_a = + a.rotate_left(5).wrapping_add(f).wrapping_add(e).wrapping_add(k).wrapping_add(wi); + e = d; + d = c; + c = b.rotate_left(30); + b = a; + a = new_a; + } + + self.h[0] = self.h[0].wrapping_add(a); + self.h[1] = self.h[1].wrapping_add(b); + self.h[2] = self.h[2].wrapping_add(c); + self.h[3] = self.h[3].wrapping_add(d); + self.h[4] = self.h[4].wrapping_add(e); + } +} diff --git a/hashes/src/sha1/mod.rs b/hashes/src/sha1/mod.rs index 152a8f348..f0748c33b 100644 --- a/hashes/src/sha1/mod.rs +++ b/hashes/src/sha1/mod.rs @@ -2,6 +2,8 @@ //! SHA1 implementation. +mod crypto; + use core::cmp; use crate::{incomplete_block_len, HashEngine as _}; @@ -80,51 +82,6 @@ impl crate::HashEngine for HashEngine { crate::internal_macros::engine_input_impl!(); } -impl HashEngine { - // Basic unoptimized algorithm from Wikipedia - fn process_block(&mut self) { - debug_assert_eq!(self.buffer.len(), BLOCK_SIZE); - - let mut w = [0u32; 80]; - for (w_val, buff_bytes) in w.iter_mut().zip(self.buffer.chunks_exact(4)) { - *w_val = u32::from_be_bytes(buff_bytes.try_into().expect("4 bytes slice")) - } - for i in 16..80 { - w[i] = (w[i - 3] ^ w[i - 8] ^ w[i - 14] ^ w[i - 16]).rotate_left(1); - } - - let mut a = self.h[0]; - let mut b = self.h[1]; - let mut c = self.h[2]; - let mut d = self.h[3]; - let mut e = self.h[4]; - - for (i, &wi) in w.iter().enumerate() { - let (f, k) = match i { - 0..=19 => ((b & c) | (!b & d), 0x5a827999), - 20..=39 => (b ^ c ^ d, 0x6ed9eba1), - 40..=59 => ((b & c) | (b & d) | (c & d), 0x8f1bbcdc), - 60..=79 => (b ^ c ^ d, 0xca62c1d6), - _ => unreachable!(), - }; - - let new_a = - a.rotate_left(5).wrapping_add(f).wrapping_add(e).wrapping_add(k).wrapping_add(wi); - e = d; - d = c; - c = b.rotate_left(30); - b = a; - a = new_a; - } - - self.h[0] = self.h[0].wrapping_add(a); - self.h[1] = self.h[1].wrapping_add(b); - self.h[2] = self.h[2].wrapping_add(c); - self.h[3] = self.h[3].wrapping_add(d); - self.h[4] = self.h[4].wrapping_add(e); - } -} - #[cfg(test)] mod tests { #[test] diff --git a/hashes/src/sha256/crypto.rs b/hashes/src/sha256/crypto.rs new file mode 100644 index 000000000..210eb6e6e --- /dev/null +++ b/hashes/src/sha256/crypto.rs @@ -0,0 +1,619 @@ +// SPDX-License-Identifier: CC0-1.0 + +#[cfg(all(feature = "std", target_arch = "x86"))] +use core::arch::x86::*; +#[cfg(all(feature = "std", target_arch = "x86_64"))] +use core::arch::x86_64::*; + +use super::{HashEngine, Midstate, BLOCK_SIZE}; + +#[allow(non_snake_case)] +const fn Ch(x: u32, y: u32, z: u32) -> u32 { z ^ (x & (y ^ z)) } +#[allow(non_snake_case)] +const fn Maj(x: u32, y: u32, z: u32) -> u32 { (x & y) | (z & (x | y)) } +#[allow(non_snake_case)] +const fn Sigma0(x: u32) -> u32 { x.rotate_left(30) ^ x.rotate_left(19) ^ x.rotate_left(10) } +#[allow(non_snake_case)] +const fn Sigma1(x: u32) -> u32 { x.rotate_left(26) ^ x.rotate_left(21) ^ x.rotate_left(7) } +const fn sigma0(x: u32) -> u32 { x.rotate_left(25) ^ x.rotate_left(14) ^ (x >> 3) } +const fn sigma1(x: u32) -> u32 { x.rotate_left(15) ^ x.rotate_left(13) ^ (x >> 10) } + +#[cfg(feature = "small-hash")] +#[macro_use] +mod small_hash { + use super::*; + + #[rustfmt::skip] + pub(super) const fn round(a: u32, b: u32, c: u32, d: u32, e: u32, + f: u32, g: u32, h: u32, k: u32, w: u32) -> (u32, u32) { + let t1 = + h.wrapping_add(Sigma1(e)).wrapping_add(Ch(e, f, g)).wrapping_add(k).wrapping_add(w); + let t2 = Sigma0(a).wrapping_add(Maj(a, b, c)); + (d.wrapping_add(t1), t1.wrapping_add(t2)) + } + #[rustfmt::skip] + pub(super) const fn later_round(a: u32, b: u32, c: u32, d: u32, e: u32, + f: u32, g: u32, h: u32, k: u32, w: u32, + w1: u32, w2: u32, w3: u32, + ) -> (u32, u32, u32) { + let w = w.wrapping_add(sigma1(w1)).wrapping_add(w2).wrapping_add(sigma0(w3)); + let (d, h) = round(a, b, c, d, e, f, g, h, k, w); + (d, h, w) + } + + macro_rules! round( + // first round + ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr, $k:expr, $w:expr) => ( + let updates = small_hash::round($a, $b, $c, $d, $e, $f, $g, $h, $k, $w); + $d = updates.0; + $h = updates.1; + ); + // later rounds we reassign $w before doing the first-round computation + ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr, $k:expr, $w:expr, $w1:expr, $w2:expr, $w3:expr) => ( + let updates = small_hash::later_round($a, $b, $c, $d, $e, $f, $g, $h, $k, $w, $w1, $w2, $w3); + $d = updates.0; + $h = updates.1; + $w = updates.2; + ) + ); +} + +#[cfg(not(feature = "small-hash"))] +#[macro_use] +mod fast_hash { + macro_rules! round( + // first round + ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr, $k:expr, $w:expr) => ( + let t1 = $h.wrapping_add(Sigma1($e)).wrapping_add(Ch($e, $f, $g)).wrapping_add($k).wrapping_add($w); + let t2 = Sigma0($a).wrapping_add(Maj($a, $b, $c)); + $d = $d.wrapping_add(t1); + $h = t1.wrapping_add(t2); + ); + // later rounds we reassign $w before doing the first-round computation + ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr, $k:expr, $w:expr, $w1:expr, $w2:expr, $w3:expr) => ( + $w = $w.wrapping_add(sigma1($w1)).wrapping_add($w2).wrapping_add(sigma0($w3)); + round!($a, $b, $c, $d, $e, $f, $g, $h, $k, $w); + ) + ); +} + +impl Midstate { + #[allow(clippy::identity_op)] // more readble + const fn read_u32(bytes: &[u8], index: usize) -> u32 { + ((bytes[index + 0] as u32) << 24) + | ((bytes[index + 1] as u32) << 16) + | ((bytes[index + 2] as u32) << 8) + | ((bytes[index + 3] as u32) << 0) + } + + const fn copy_w(bytes: &[u8], index: usize) -> [u32; 16] { + let mut w = [0u32; 16]; + let mut i = 0; + while i < 16 { + w[i] = Self::read_u32(bytes, index + i * 4); + i += 1; + } + w + } + + pub(super) const fn compute_midstate_unoptimized(bytes: &[u8], finalize: bool) -> Self { + let mut state = [ + 0x6a09e667u32, + 0xbb67ae85, + 0x3c6ef372, + 0xa54ff53a, + 0x510e527f, + 0x9b05688c, + 0x1f83d9ab, + 0x5be0cd19, + ]; + + let num_chunks = (bytes.len() + 9 + 63) / 64; + let mut chunk = 0; + #[allow(clippy::precedence)] + while chunk < num_chunks { + if !finalize && chunk + 1 == num_chunks { + break; + } + let mut w = if chunk * 64 + 64 <= bytes.len() { + Self::copy_w(bytes, chunk * 64) + } else { + let mut buf = [0; 64]; + let mut i = 0; + let offset = chunk * 64; + while offset + i < bytes.len() { + buf[i] = bytes[offset + i]; + i += 1; + } + if (bytes.len() % 64 <= 64 - 9) || (chunk + 2 == num_chunks) { + buf[i] = 0x80; + } + #[allow(clippy::identity_op)] // more readble + #[allow(clippy::erasing_op)] + if chunk + 1 == num_chunks { + let bit_len = bytes.len() as u64 * 8; + buf[64 - 8] = ((bit_len >> 8 * 7) & 0xFF) as u8; + buf[64 - 7] = ((bit_len >> 8 * 6) & 0xFF) as u8; + buf[64 - 6] = ((bit_len >> 8 * 5) & 0xFF) as u8; + buf[64 - 5] = ((bit_len >> 8 * 4) & 0xFF) as u8; + buf[64 - 4] = ((bit_len >> 8 * 3) & 0xFF) as u8; + buf[64 - 3] = ((bit_len >> 8 * 2) & 0xFF) as u8; + buf[64 - 2] = ((bit_len >> 8 * 1) & 0xFF) as u8; + buf[64 - 1] = ((bit_len >> 8 * 0) & 0xFF) as u8; + } + Self::copy_w(&buf, 0) + }; + chunk += 1; + + let mut a = state[0]; + let mut b = state[1]; + let mut c = state[2]; + let mut d = state[3]; + let mut e = state[4]; + let mut f = state[5]; + let mut g = state[6]; + let mut h = state[7]; + + round!(a, b, c, d, e, f, g, h, 0x428a2f98, w[0]); + round!(h, a, b, c, d, e, f, g, 0x71374491, w[1]); + round!(g, h, a, b, c, d, e, f, 0xb5c0fbcf, w[2]); + round!(f, g, h, a, b, c, d, e, 0xe9b5dba5, w[3]); + round!(e, f, g, h, a, b, c, d, 0x3956c25b, w[4]); + round!(d, e, f, g, h, a, b, c, 0x59f111f1, w[5]); + round!(c, d, e, f, g, h, a, b, 0x923f82a4, w[6]); + round!(b, c, d, e, f, g, h, a, 0xab1c5ed5, w[7]); + round!(a, b, c, d, e, f, g, h, 0xd807aa98, w[8]); + round!(h, a, b, c, d, e, f, g, 0x12835b01, w[9]); + round!(g, h, a, b, c, d, e, f, 0x243185be, w[10]); + round!(f, g, h, a, b, c, d, e, 0x550c7dc3, w[11]); + round!(e, f, g, h, a, b, c, d, 0x72be5d74, w[12]); + round!(d, e, f, g, h, a, b, c, 0x80deb1fe, w[13]); + round!(c, d, e, f, g, h, a, b, 0x9bdc06a7, w[14]); + round!(b, c, d, e, f, g, h, a, 0xc19bf174, w[15]); + + round!(a, b, c, d, e, f, g, h, 0xe49b69c1, w[0], w[14], w[9], w[1]); + round!(h, a, b, c, d, e, f, g, 0xefbe4786, w[1], w[15], w[10], w[2]); + round!(g, h, a, b, c, d, e, f, 0x0fc19dc6, w[2], w[0], w[11], w[3]); + round!(f, g, h, a, b, c, d, e, 0x240ca1cc, w[3], w[1], w[12], w[4]); + round!(e, f, g, h, a, b, c, d, 0x2de92c6f, w[4], w[2], w[13], w[5]); + round!(d, e, f, g, h, a, b, c, 0x4a7484aa, w[5], w[3], w[14], w[6]); + round!(c, d, e, f, g, h, a, b, 0x5cb0a9dc, w[6], w[4], w[15], w[7]); + round!(b, c, d, e, f, g, h, a, 0x76f988da, w[7], w[5], w[0], w[8]); + round!(a, b, c, d, e, f, g, h, 0x983e5152, w[8], w[6], w[1], w[9]); + round!(h, a, b, c, d, e, f, g, 0xa831c66d, w[9], w[7], w[2], w[10]); + round!(g, h, a, b, c, d, e, f, 0xb00327c8, w[10], w[8], w[3], w[11]); + round!(f, g, h, a, b, c, d, e, 0xbf597fc7, w[11], w[9], w[4], w[12]); + round!(e, f, g, h, a, b, c, d, 0xc6e00bf3, w[12], w[10], w[5], w[13]); + round!(d, e, f, g, h, a, b, c, 0xd5a79147, w[13], w[11], w[6], w[14]); + round!(c, d, e, f, g, h, a, b, 0x06ca6351, w[14], w[12], w[7], w[15]); + round!(b, c, d, e, f, g, h, a, 0x14292967, w[15], w[13], w[8], w[0]); + + round!(a, b, c, d, e, f, g, h, 0x27b70a85, w[0], w[14], w[9], w[1]); + round!(h, a, b, c, d, e, f, g, 0x2e1b2138, w[1], w[15], w[10], w[2]); + round!(g, h, a, b, c, d, e, f, 0x4d2c6dfc, w[2], w[0], w[11], w[3]); + round!(f, g, h, a, b, c, d, e, 0x53380d13, w[3], w[1], w[12], w[4]); + round!(e, f, g, h, a, b, c, d, 0x650a7354, w[4], w[2], w[13], w[5]); + round!(d, e, f, g, h, a, b, c, 0x766a0abb, w[5], w[3], w[14], w[6]); + round!(c, d, e, f, g, h, a, b, 0x81c2c92e, w[6], w[4], w[15], w[7]); + round!(b, c, d, e, f, g, h, a, 0x92722c85, w[7], w[5], w[0], w[8]); + round!(a, b, c, d, e, f, g, h, 0xa2bfe8a1, w[8], w[6], w[1], w[9]); + round!(h, a, b, c, d, e, f, g, 0xa81a664b, w[9], w[7], w[2], w[10]); + round!(g, h, a, b, c, d, e, f, 0xc24b8b70, w[10], w[8], w[3], w[11]); + round!(f, g, h, a, b, c, d, e, 0xc76c51a3, w[11], w[9], w[4], w[12]); + round!(e, f, g, h, a, b, c, d, 0xd192e819, w[12], w[10], w[5], w[13]); + round!(d, e, f, g, h, a, b, c, 0xd6990624, w[13], w[11], w[6], w[14]); + round!(c, d, e, f, g, h, a, b, 0xf40e3585, w[14], w[12], w[7], w[15]); + round!(b, c, d, e, f, g, h, a, 0x106aa070, w[15], w[13], w[8], w[0]); + + round!(a, b, c, d, e, f, g, h, 0x19a4c116, w[0], w[14], w[9], w[1]); + round!(h, a, b, c, d, e, f, g, 0x1e376c08, w[1], w[15], w[10], w[2]); + round!(g, h, a, b, c, d, e, f, 0x2748774c, w[2], w[0], w[11], w[3]); + round!(f, g, h, a, b, c, d, e, 0x34b0bcb5, w[3], w[1], w[12], w[4]); + round!(e, f, g, h, a, b, c, d, 0x391c0cb3, w[4], w[2], w[13], w[5]); + round!(d, e, f, g, h, a, b, c, 0x4ed8aa4a, w[5], w[3], w[14], w[6]); + round!(c, d, e, f, g, h, a, b, 0x5b9cca4f, w[6], w[4], w[15], w[7]); + round!(b, c, d, e, f, g, h, a, 0x682e6ff3, w[7], w[5], w[0], w[8]); + round!(a, b, c, d, e, f, g, h, 0x748f82ee, w[8], w[6], w[1], w[9]); + round!(h, a, b, c, d, e, f, g, 0x78a5636f, w[9], w[7], w[2], w[10]); + round!(g, h, a, b, c, d, e, f, 0x84c87814, w[10], w[8], w[3], w[11]); + round!(f, g, h, a, b, c, d, e, 0x8cc70208, w[11], w[9], w[4], w[12]); + round!(e, f, g, h, a, b, c, d, 0x90befffa, w[12], w[10], w[5], w[13]); + round!(d, e, f, g, h, a, b, c, 0xa4506ceb, w[13], w[11], w[6], w[14]); + round!(c, d, e, f, g, h, a, b, 0xbef9a3f7, w[14], w[12], w[7], w[15]); + round!(b, c, d, e, f, g, h, a, 0xc67178f2, w[15], w[13], w[8], w[0]); + + state[0] = state[0].wrapping_add(a); + state[1] = state[1].wrapping_add(b); + state[2] = state[2].wrapping_add(c); + state[3] = state[3].wrapping_add(d); + state[4] = state[4].wrapping_add(e); + state[5] = state[5].wrapping_add(f); + state[6] = state[6].wrapping_add(g); + state[7] = state[7].wrapping_add(h); + } + let mut output = [0u8; 32]; + let mut i = 0; + #[allow(clippy::identity_op)] // more readble + while i < 8 { + output[i * 4 + 0] = (state[i + 0] >> 24) as u8; + output[i * 4 + 1] = (state[i + 0] >> 16) as u8; + output[i * 4 + 2] = (state[i + 0] >> 8) as u8; + output[i * 4 + 3] = (state[i + 0] >> 0) as u8; + i += 1; + } + Midstate { bytes: output, bytes_hashed: bytes.len() as u64 } + } +} + +impl HashEngine { + pub(super) fn process_block(&mut self) { + #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))] + { + if std::is_x86_feature_detected!("sse4.1") + && std::is_x86_feature_detected!("sha") + && std::is_x86_feature_detected!("sse2") + && std::is_x86_feature_detected!("ssse3") + { + return unsafe { self.process_block_simd_x86_intrinsics() }; + } + } + + // fallback implementation without using any intrinsics + self.software_process_block() + } + + #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))] + #[target_feature(enable = "sha,sse2,ssse3,sse4.1")] + unsafe fn process_block_simd_x86_intrinsics(&mut self) { + // Code translated and based on from + // https://github.com/noloader/SHA-Intrinsics/blob/4899efc81d1af159c1fd955936c673139f35aea9/sha256-x86.c + + /* sha256-x86.c - Intel SHA extensions using C intrinsics */ + /* Written and place in public domain by Jeffrey Walton */ + /* Based on code from Intel, and by Sean Gulley for */ + /* the miTLS project. */ + + // Variable names are also kept the same as in the original C code for easier comparison. + let (mut state0, mut state1); + let (mut msg, mut tmp); + + let (mut msg0, mut msg1, mut msg2, mut msg3); + + let (abef_save, cdgh_save); + + #[allow(non_snake_case)] + let MASK: __m128i = + _mm_set_epi64x(0x0c0d_0e0f_0809_0a0bu64 as i64, 0x0405_0607_0001_0203u64 as i64); + + let block_offset = 0; + + // Load initial values + // CAST SAFETY: loadu_si128 documentation states that mem_addr does not + // need to be aligned on any particular boundary. + tmp = _mm_loadu_si128(self.h.as_ptr().add(0) as *const __m128i); + state1 = _mm_loadu_si128(self.h.as_ptr().add(4) as *const __m128i); + + tmp = _mm_shuffle_epi32(tmp, 0xB1); // CDAB + state1 = _mm_shuffle_epi32(state1, 0x1B); // EFGH + state0 = _mm_alignr_epi8(tmp, state1, 8); // ABEF + state1 = _mm_blend_epi16(state1, tmp, 0xF0); // CDGH + + // Process a single block + { + // Save current state + abef_save = state0; + cdgh_save = state1; + + // Rounds 0-3 + msg = _mm_loadu_si128(self.buffer.as_ptr().add(block_offset) as *const __m128i); + msg0 = _mm_shuffle_epi8(msg, MASK); + msg = _mm_add_epi32( + msg0, + _mm_set_epi64x(0xE9B5DBA5B5C0FBCFu64 as i64, 0x71374491428A2F98u64 as i64), + ); + state1 = _mm_sha256rnds2_epu32(state1, state0, msg); + msg = _mm_shuffle_epi32(msg, 0x0E); + state0 = _mm_sha256rnds2_epu32(state0, state1, msg); + + // Rounds 4-7 + msg1 = _mm_loadu_si128(self.buffer.as_ptr().add(block_offset + 16) as *const __m128i); + msg1 = _mm_shuffle_epi8(msg1, MASK); + msg = _mm_add_epi32( + msg1, + _mm_set_epi64x(0xAB1C5ED5923F82A4u64 as i64, 0x59F111F13956C25Bu64 as i64), + ); + state1 = _mm_sha256rnds2_epu32(state1, state0, msg); + msg = _mm_shuffle_epi32(msg, 0x0E); + state0 = _mm_sha256rnds2_epu32(state0, state1, msg); + msg0 = _mm_sha256msg1_epu32(msg0, msg1); + + // Rounds 8-11 + msg2 = _mm_loadu_si128(self.buffer.as_ptr().add(block_offset + 32) as *const __m128i); + msg2 = _mm_shuffle_epi8(msg2, MASK); + msg = _mm_add_epi32( + msg2, + _mm_set_epi64x(0x550C7DC3243185BEu64 as i64, 0x12835B01D807AA98u64 as i64), + ); + state1 = _mm_sha256rnds2_epu32(state1, state0, msg); + msg = _mm_shuffle_epi32(msg, 0x0E); + state0 = _mm_sha256rnds2_epu32(state0, state1, msg); + msg1 = _mm_sha256msg1_epu32(msg1, msg2); + + // Rounds 12-15 + msg3 = _mm_loadu_si128(self.buffer.as_ptr().add(block_offset + 48) as *const __m128i); + msg3 = _mm_shuffle_epi8(msg3, MASK); + msg = _mm_add_epi32( + msg3, + _mm_set_epi64x(0xC19BF1749BDC06A7u64 as i64, 0x80DEB1FE72BE5D74u64 as i64), + ); + state1 = _mm_sha256rnds2_epu32(state1, state0, msg); + tmp = _mm_alignr_epi8(msg3, msg2, 4); + msg0 = _mm_add_epi32(msg0, tmp); + msg0 = _mm_sha256msg2_epu32(msg0, msg3); + msg = _mm_shuffle_epi32(msg, 0x0E); + state0 = _mm_sha256rnds2_epu32(state0, state1, msg); + msg2 = _mm_sha256msg1_epu32(msg2, msg3); + + // Rounds 16-19 + msg = _mm_add_epi32( + msg0, + _mm_set_epi64x(0x240CA1CC0FC19DC6u64 as i64, 0xEFBE4786E49B69C1u64 as i64), + ); + state1 = _mm_sha256rnds2_epu32(state1, state0, msg); + tmp = _mm_alignr_epi8(msg0, msg3, 4); + msg1 = _mm_add_epi32(msg1, tmp); + msg1 = _mm_sha256msg2_epu32(msg1, msg0); + msg = _mm_shuffle_epi32(msg, 0x0E); + state0 = _mm_sha256rnds2_epu32(state0, state1, msg); + msg3 = _mm_sha256msg1_epu32(msg3, msg0); + + // Rounds 20-23 + msg = _mm_add_epi32( + msg1, + _mm_set_epi64x(0x76F988DA5CB0A9DCu64 as i64, 0x4A7484AA2DE92C6Fu64 as i64), + ); + state1 = _mm_sha256rnds2_epu32(state1, state0, msg); + tmp = _mm_alignr_epi8(msg1, msg0, 4); + msg2 = _mm_add_epi32(msg2, tmp); + msg2 = _mm_sha256msg2_epu32(msg2, msg1); + msg = _mm_shuffle_epi32(msg, 0x0E); + state0 = _mm_sha256rnds2_epu32(state0, state1, msg); + msg0 = _mm_sha256msg1_epu32(msg0, msg1); + + // Rounds 24-27 + msg = _mm_add_epi32( + msg2, + _mm_set_epi64x(0xBF597FC7B00327C8u64 as i64, 0xA831C66D983E5152u64 as i64), + ); + state1 = _mm_sha256rnds2_epu32(state1, state0, msg); + tmp = _mm_alignr_epi8(msg2, msg1, 4); + msg3 = _mm_add_epi32(msg3, tmp); + msg3 = _mm_sha256msg2_epu32(msg3, msg2); + msg = _mm_shuffle_epi32(msg, 0x0E); + state0 = _mm_sha256rnds2_epu32(state0, state1, msg); + msg1 = _mm_sha256msg1_epu32(msg1, msg2); + + // Rounds 28-31 + msg = _mm_add_epi32( + msg3, + _mm_set_epi64x(0x1429296706CA6351u64 as i64, 0xD5A79147C6E00BF3u64 as i64), + ); + state1 = _mm_sha256rnds2_epu32(state1, state0, msg); + tmp = _mm_alignr_epi8(msg3, msg2, 4); + msg0 = _mm_add_epi32(msg0, tmp); + msg0 = _mm_sha256msg2_epu32(msg0, msg3); + msg = _mm_shuffle_epi32(msg, 0x0E); + state0 = _mm_sha256rnds2_epu32(state0, state1, msg); + msg2 = _mm_sha256msg1_epu32(msg2, msg3); + + // Rounds 32-35 + msg = _mm_add_epi32( + msg0, + _mm_set_epi64x(0x53380D134D2C6DFCu64 as i64, 0x2E1B213827B70A85u64 as i64), + ); + state1 = _mm_sha256rnds2_epu32(state1, state0, msg); + tmp = _mm_alignr_epi8(msg0, msg3, 4); + msg1 = _mm_add_epi32(msg1, tmp); + msg1 = _mm_sha256msg2_epu32(msg1, msg0); + msg = _mm_shuffle_epi32(msg, 0x0E); + state0 = _mm_sha256rnds2_epu32(state0, state1, msg); + msg3 = _mm_sha256msg1_epu32(msg3, msg0); + + // Rounds 36-39 + msg = _mm_add_epi32( + msg1, + _mm_set_epi64x(0x92722C8581C2C92Eu64 as i64, 0x766A0ABB650A7354u64 as i64), + ); + state1 = _mm_sha256rnds2_epu32(state1, state0, msg); + tmp = _mm_alignr_epi8(msg1, msg0, 4); + msg2 = _mm_add_epi32(msg2, tmp); + msg2 = _mm_sha256msg2_epu32(msg2, msg1); + msg = _mm_shuffle_epi32(msg, 0x0E); + state0 = _mm_sha256rnds2_epu32(state0, state1, msg); + msg0 = _mm_sha256msg1_epu32(msg0, msg1); + + // Rounds 40-43 + msg = _mm_add_epi32( + msg2, + _mm_set_epi64x(0xC76C51A3C24B8B70u64 as i64, 0xA81A664BA2BFE8A1u64 as i64), + ); + state1 = _mm_sha256rnds2_epu32(state1, state0, msg); + tmp = _mm_alignr_epi8(msg2, msg1, 4); + msg3 = _mm_add_epi32(msg3, tmp); + msg3 = _mm_sha256msg2_epu32(msg3, msg2); + msg = _mm_shuffle_epi32(msg, 0x0E); + state0 = _mm_sha256rnds2_epu32(state0, state1, msg); + msg1 = _mm_sha256msg1_epu32(msg1, msg2); + + // Rounds 44-47 + msg = _mm_add_epi32( + msg3, + _mm_set_epi64x(0x106AA070F40E3585u64 as i64, 0xD6990624D192E819u64 as i64), + ); + state1 = _mm_sha256rnds2_epu32(state1, state0, msg); + tmp = _mm_alignr_epi8(msg3, msg2, 4); + msg0 = _mm_add_epi32(msg0, tmp); + msg0 = _mm_sha256msg2_epu32(msg0, msg3); + msg = _mm_shuffle_epi32(msg, 0x0E); + state0 = _mm_sha256rnds2_epu32(state0, state1, msg); + msg2 = _mm_sha256msg1_epu32(msg2, msg3); + + // Rounds 48-51 + msg = _mm_add_epi32( + msg0, + _mm_set_epi64x(0x34B0BCB52748774Cu64 as i64, 0x1E376C0819A4C116u64 as i64), + ); + state1 = _mm_sha256rnds2_epu32(state1, state0, msg); + tmp = _mm_alignr_epi8(msg0, msg3, 4); + msg1 = _mm_add_epi32(msg1, tmp); + msg1 = _mm_sha256msg2_epu32(msg1, msg0); + msg = _mm_shuffle_epi32(msg, 0x0E); + state0 = _mm_sha256rnds2_epu32(state0, state1, msg); + msg3 = _mm_sha256msg1_epu32(msg3, msg0); + + // Rounds 52-55 + msg = _mm_add_epi32( + msg1, + _mm_set_epi64x(0x682E6FF35B9CCA4Fu64 as i64, 0x4ED8AA4A391C0CB3u64 as i64), + ); + state1 = _mm_sha256rnds2_epu32(state1, state0, msg); + tmp = _mm_alignr_epi8(msg1, msg0, 4); + msg2 = _mm_add_epi32(msg2, tmp); + msg2 = _mm_sha256msg2_epu32(msg2, msg1); + msg = _mm_shuffle_epi32(msg, 0x0E); + state0 = _mm_sha256rnds2_epu32(state0, state1, msg); + + // Rounds 56-59 + msg = _mm_add_epi32( + msg2, + _mm_set_epi64x(0x8CC7020884C87814u64 as i64, 0x78A5636F748F82EEu64 as i64), + ); + state1 = _mm_sha256rnds2_epu32(state1, state0, msg); + tmp = _mm_alignr_epi8(msg2, msg1, 4); + msg3 = _mm_add_epi32(msg3, tmp); + msg3 = _mm_sha256msg2_epu32(msg3, msg2); + msg = _mm_shuffle_epi32(msg, 0x0E); + state0 = _mm_sha256rnds2_epu32(state0, state1, msg); + + // Rounds 60-63 + msg = _mm_add_epi32( + msg3, + _mm_set_epi64x(0xC67178F2BEF9A3F7u64 as i64, 0xA4506CEB90BEFFFAu64 as i64), + ); + state1 = _mm_sha256rnds2_epu32(state1, state0, msg); + msg = _mm_shuffle_epi32(msg, 0x0E); + state0 = _mm_sha256rnds2_epu32(state0, state1, msg); + + // Combine state + state0 = _mm_add_epi32(state0, abef_save); + state1 = _mm_add_epi32(state1, cdgh_save); + } + + tmp = _mm_shuffle_epi32(state0, 0x1B); // FEBA + state1 = _mm_shuffle_epi32(state1, 0xB1); // DCHG + state0 = _mm_blend_epi16(tmp, state1, 0xF0); // DCBA + state1 = _mm_alignr_epi8(state1, tmp, 8); // ABEF + + // Save state + // CAST SAFETY: storeu_si128 documentation states that mem_addr does not + // need to be aligned on any particular boundary. + _mm_storeu_si128(self.h.as_mut_ptr().add(0) as *mut __m128i, state0); + _mm_storeu_si128(self.h.as_mut_ptr().add(4) as *mut __m128i, state1); + } + + // Algorithm copied from libsecp256k1 + fn software_process_block(&mut self) { + debug_assert_eq!(self.buffer.len(), BLOCK_SIZE); + + let mut w = [0u32; 16]; + for (w_val, buff_bytes) in w.iter_mut().zip(self.buffer.chunks_exact(4)) { + *w_val = u32::from_be_bytes(buff_bytes.try_into().expect("4 byte slice")); + } + + let mut a = self.h[0]; + let mut b = self.h[1]; + let mut c = self.h[2]; + let mut d = self.h[3]; + let mut e = self.h[4]; + let mut f = self.h[5]; + let mut g = self.h[6]; + let mut h = self.h[7]; + + round!(a, b, c, d, e, f, g, h, 0x428a2f98, w[0]); + round!(h, a, b, c, d, e, f, g, 0x71374491, w[1]); + round!(g, h, a, b, c, d, e, f, 0xb5c0fbcf, w[2]); + round!(f, g, h, a, b, c, d, e, 0xe9b5dba5, w[3]); + round!(e, f, g, h, a, b, c, d, 0x3956c25b, w[4]); + round!(d, e, f, g, h, a, b, c, 0x59f111f1, w[5]); + round!(c, d, e, f, g, h, a, b, 0x923f82a4, w[6]); + round!(b, c, d, e, f, g, h, a, 0xab1c5ed5, w[7]); + round!(a, b, c, d, e, f, g, h, 0xd807aa98, w[8]); + round!(h, a, b, c, d, e, f, g, 0x12835b01, w[9]); + round!(g, h, a, b, c, d, e, f, 0x243185be, w[10]); + round!(f, g, h, a, b, c, d, e, 0x550c7dc3, w[11]); + round!(e, f, g, h, a, b, c, d, 0x72be5d74, w[12]); + round!(d, e, f, g, h, a, b, c, 0x80deb1fe, w[13]); + round!(c, d, e, f, g, h, a, b, 0x9bdc06a7, w[14]); + round!(b, c, d, e, f, g, h, a, 0xc19bf174, w[15]); + + round!(a, b, c, d, e, f, g, h, 0xe49b69c1, w[0], w[14], w[9], w[1]); + round!(h, a, b, c, d, e, f, g, 0xefbe4786, w[1], w[15], w[10], w[2]); + round!(g, h, a, b, c, d, e, f, 0x0fc19dc6, w[2], w[0], w[11], w[3]); + round!(f, g, h, a, b, c, d, e, 0x240ca1cc, w[3], w[1], w[12], w[4]); + round!(e, f, g, h, a, b, c, d, 0x2de92c6f, w[4], w[2], w[13], w[5]); + round!(d, e, f, g, h, a, b, c, 0x4a7484aa, w[5], w[3], w[14], w[6]); + round!(c, d, e, f, g, h, a, b, 0x5cb0a9dc, w[6], w[4], w[15], w[7]); + round!(b, c, d, e, f, g, h, a, 0x76f988da, w[7], w[5], w[0], w[8]); + round!(a, b, c, d, e, f, g, h, 0x983e5152, w[8], w[6], w[1], w[9]); + round!(h, a, b, c, d, e, f, g, 0xa831c66d, w[9], w[7], w[2], w[10]); + round!(g, h, a, b, c, d, e, f, 0xb00327c8, w[10], w[8], w[3], w[11]); + round!(f, g, h, a, b, c, d, e, 0xbf597fc7, w[11], w[9], w[4], w[12]); + round!(e, f, g, h, a, b, c, d, 0xc6e00bf3, w[12], w[10], w[5], w[13]); + round!(d, e, f, g, h, a, b, c, 0xd5a79147, w[13], w[11], w[6], w[14]); + round!(c, d, e, f, g, h, a, b, 0x06ca6351, w[14], w[12], w[7], w[15]); + round!(b, c, d, e, f, g, h, a, 0x14292967, w[15], w[13], w[8], w[0]); + + round!(a, b, c, d, e, f, g, h, 0x27b70a85, w[0], w[14], w[9], w[1]); + round!(h, a, b, c, d, e, f, g, 0x2e1b2138, w[1], w[15], w[10], w[2]); + round!(g, h, a, b, c, d, e, f, 0x4d2c6dfc, w[2], w[0], w[11], w[3]); + round!(f, g, h, a, b, c, d, e, 0x53380d13, w[3], w[1], w[12], w[4]); + round!(e, f, g, h, a, b, c, d, 0x650a7354, w[4], w[2], w[13], w[5]); + round!(d, e, f, g, h, a, b, c, 0x766a0abb, w[5], w[3], w[14], w[6]); + round!(c, d, e, f, g, h, a, b, 0x81c2c92e, w[6], w[4], w[15], w[7]); + round!(b, c, d, e, f, g, h, a, 0x92722c85, w[7], w[5], w[0], w[8]); + round!(a, b, c, d, e, f, g, h, 0xa2bfe8a1, w[8], w[6], w[1], w[9]); + round!(h, a, b, c, d, e, f, g, 0xa81a664b, w[9], w[7], w[2], w[10]); + round!(g, h, a, b, c, d, e, f, 0xc24b8b70, w[10], w[8], w[3], w[11]); + round!(f, g, h, a, b, c, d, e, 0xc76c51a3, w[11], w[9], w[4], w[12]); + round!(e, f, g, h, a, b, c, d, 0xd192e819, w[12], w[10], w[5], w[13]); + round!(d, e, f, g, h, a, b, c, 0xd6990624, w[13], w[11], w[6], w[14]); + round!(c, d, e, f, g, h, a, b, 0xf40e3585, w[14], w[12], w[7], w[15]); + round!(b, c, d, e, f, g, h, a, 0x106aa070, w[15], w[13], w[8], w[0]); + + round!(a, b, c, d, e, f, g, h, 0x19a4c116, w[0], w[14], w[9], w[1]); + round!(h, a, b, c, d, e, f, g, 0x1e376c08, w[1], w[15], w[10], w[2]); + round!(g, h, a, b, c, d, e, f, 0x2748774c, w[2], w[0], w[11], w[3]); + round!(f, g, h, a, b, c, d, e, 0x34b0bcb5, w[3], w[1], w[12], w[4]); + round!(e, f, g, h, a, b, c, d, 0x391c0cb3, w[4], w[2], w[13], w[5]); + round!(d, e, f, g, h, a, b, c, 0x4ed8aa4a, w[5], w[3], w[14], w[6]); + round!(c, d, e, f, g, h, a, b, 0x5b9cca4f, w[6], w[4], w[15], w[7]); + round!(b, c, d, e, f, g, h, a, 0x682e6ff3, w[7], w[5], w[0], w[8]); + round!(a, b, c, d, e, f, g, h, 0x748f82ee, w[8], w[6], w[1], w[9]); + round!(h, a, b, c, d, e, f, g, 0x78a5636f, w[9], w[7], w[2], w[10]); + round!(g, h, a, b, c, d, e, f, 0x84c87814, w[10], w[8], w[3], w[11]); + round!(f, g, h, a, b, c, d, e, 0x8cc70208, w[11], w[9], w[4], w[12]); + round!(e, f, g, h, a, b, c, d, 0x90befffa, w[12], w[10], w[5], w[13]); + round!(d, e, f, g, h, a, b, c, 0xa4506ceb, w[13], w[11], w[6], w[14]); + round!(c, d, e, f, g, h, a, b, 0xbef9a3f7, w[14], w[12], w[7], w[15]); + round!(b, c, d, e, f, g, h, a, 0xc67178f2, w[15], w[13], w[8], w[0]); + + self.h[0] = self.h[0].wrapping_add(a); + self.h[1] = self.h[1].wrapping_add(b); + self.h[2] = self.h[2].wrapping_add(c); + self.h[3] = self.h[3].wrapping_add(d); + self.h[4] = self.h[4].wrapping_add(e); + self.h[5] = self.h[5].wrapping_add(f); + self.h[6] = self.h[6].wrapping_add(g); + self.h[7] = self.h[7].wrapping_add(h); + } +} diff --git a/hashes/src/sha256/mod.rs b/hashes/src/sha256/mod.rs index 8770b332b..cd6beb24f 100644 --- a/hashes/src/sha256/mod.rs +++ b/hashes/src/sha256/mod.rs @@ -2,10 +2,8 @@ //! SHA256 implementation. -#[cfg(all(feature = "std", target_arch = "x86"))] -use core::arch::x86::*; -#[cfg(all(feature = "std", target_arch = "x86_64"))] -use core::arch::x86_64::*; +mod crypto; + use core::{cmp, convert, fmt}; use crate::{incomplete_block_len, sha256d, HashEngine as _}; @@ -256,617 +254,6 @@ impl fmt::Display for MidstateError { #[cfg(feature = "std")] impl std::error::Error for MidstateError {} -#[allow(non_snake_case)] -const fn Ch(x: u32, y: u32, z: u32) -> u32 { z ^ (x & (y ^ z)) } -#[allow(non_snake_case)] -const fn Maj(x: u32, y: u32, z: u32) -> u32 { (x & y) | (z & (x | y)) } -#[allow(non_snake_case)] -const fn Sigma0(x: u32) -> u32 { x.rotate_left(30) ^ x.rotate_left(19) ^ x.rotate_left(10) } -#[allow(non_snake_case)] -const fn Sigma1(x: u32) -> u32 { x.rotate_left(26) ^ x.rotate_left(21) ^ x.rotate_left(7) } -const fn sigma0(x: u32) -> u32 { x.rotate_left(25) ^ x.rotate_left(14) ^ (x >> 3) } -const fn sigma1(x: u32) -> u32 { x.rotate_left(15) ^ x.rotate_left(13) ^ (x >> 10) } - -#[cfg(feature = "small-hash")] -#[macro_use] -mod small_hash { - use super::*; - - #[rustfmt::skip] - pub(super) const fn round(a: u32, b: u32, c: u32, d: u32, e: u32, - f: u32, g: u32, h: u32, k: u32, w: u32) -> (u32, u32) { - let t1 = - h.wrapping_add(Sigma1(e)).wrapping_add(Ch(e, f, g)).wrapping_add(k).wrapping_add(w); - let t2 = Sigma0(a).wrapping_add(Maj(a, b, c)); - (d.wrapping_add(t1), t1.wrapping_add(t2)) - } - #[rustfmt::skip] - pub(super) const fn later_round(a: u32, b: u32, c: u32, d: u32, e: u32, - f: u32, g: u32, h: u32, k: u32, w: u32, - w1: u32, w2: u32, w3: u32, - ) -> (u32, u32, u32) { - let w = w.wrapping_add(sigma1(w1)).wrapping_add(w2).wrapping_add(sigma0(w3)); - let (d, h) = round(a, b, c, d, e, f, g, h, k, w); - (d, h, w) - } - - macro_rules! round( - // first round - ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr, $k:expr, $w:expr) => ( - let updates = small_hash::round($a, $b, $c, $d, $e, $f, $g, $h, $k, $w); - $d = updates.0; - $h = updates.1; - ); - // later rounds we reassign $w before doing the first-round computation - ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr, $k:expr, $w:expr, $w1:expr, $w2:expr, $w3:expr) => ( - let updates = small_hash::later_round($a, $b, $c, $d, $e, $f, $g, $h, $k, $w, $w1, $w2, $w3); - $d = updates.0; - $h = updates.1; - $w = updates.2; - ) - ); -} - -#[cfg(not(feature = "small-hash"))] -#[macro_use] -mod fast_hash { - macro_rules! round( - // first round - ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr, $k:expr, $w:expr) => ( - let t1 = $h.wrapping_add(Sigma1($e)).wrapping_add(Ch($e, $f, $g)).wrapping_add($k).wrapping_add($w); - let t2 = Sigma0($a).wrapping_add(Maj($a, $b, $c)); - $d = $d.wrapping_add(t1); - $h = t1.wrapping_add(t2); - ); - // later rounds we reassign $w before doing the first-round computation - ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr, $k:expr, $w:expr, $w1:expr, $w2:expr, $w3:expr) => ( - $w = $w.wrapping_add(sigma1($w1)).wrapping_add($w2).wrapping_add(sigma0($w3)); - round!($a, $b, $c, $d, $e, $f, $g, $h, $k, $w); - ) - ); -} - -impl Midstate { - #[allow(clippy::identity_op)] // more readble - const fn read_u32(bytes: &[u8], index: usize) -> u32 { - ((bytes[index + 0] as u32) << 24) - | ((bytes[index + 1] as u32) << 16) - | ((bytes[index + 2] as u32) << 8) - | ((bytes[index + 3] as u32) << 0) - } - - const fn copy_w(bytes: &[u8], index: usize) -> [u32; 16] { - let mut w = [0u32; 16]; - let mut i = 0; - while i < 16 { - w[i] = Self::read_u32(bytes, index + i * 4); - i += 1; - } - w - } - - const fn compute_midstate_unoptimized(bytes: &[u8], finalize: bool) -> Self { - let mut state = [ - 0x6a09e667u32, - 0xbb67ae85, - 0x3c6ef372, - 0xa54ff53a, - 0x510e527f, - 0x9b05688c, - 0x1f83d9ab, - 0x5be0cd19, - ]; - - let num_chunks = (bytes.len() + 9 + 63) / 64; - let mut chunk = 0; - #[allow(clippy::precedence)] - while chunk < num_chunks { - if !finalize && chunk + 1 == num_chunks { - break; - } - let mut w = if chunk * 64 + 64 <= bytes.len() { - Self::copy_w(bytes, chunk * 64) - } else { - let mut buf = [0; 64]; - let mut i = 0; - let offset = chunk * 64; - while offset + i < bytes.len() { - buf[i] = bytes[offset + i]; - i += 1; - } - if (bytes.len() % 64 <= 64 - 9) || (chunk + 2 == num_chunks) { - buf[i] = 0x80; - } - #[allow(clippy::identity_op)] // more readble - #[allow(clippy::erasing_op)] - if chunk + 1 == num_chunks { - let bit_len = bytes.len() as u64 * 8; - buf[64 - 8] = ((bit_len >> 8 * 7) & 0xFF) as u8; - buf[64 - 7] = ((bit_len >> 8 * 6) & 0xFF) as u8; - buf[64 - 6] = ((bit_len >> 8 * 5) & 0xFF) as u8; - buf[64 - 5] = ((bit_len >> 8 * 4) & 0xFF) as u8; - buf[64 - 4] = ((bit_len >> 8 * 3) & 0xFF) as u8; - buf[64 - 3] = ((bit_len >> 8 * 2) & 0xFF) as u8; - buf[64 - 2] = ((bit_len >> 8 * 1) & 0xFF) as u8; - buf[64 - 1] = ((bit_len >> 8 * 0) & 0xFF) as u8; - } - Self::copy_w(&buf, 0) - }; - chunk += 1; - - let mut a = state[0]; - let mut b = state[1]; - let mut c = state[2]; - let mut d = state[3]; - let mut e = state[4]; - let mut f = state[5]; - let mut g = state[6]; - let mut h = state[7]; - - round!(a, b, c, d, e, f, g, h, 0x428a2f98, w[0]); - round!(h, a, b, c, d, e, f, g, 0x71374491, w[1]); - round!(g, h, a, b, c, d, e, f, 0xb5c0fbcf, w[2]); - round!(f, g, h, a, b, c, d, e, 0xe9b5dba5, w[3]); - round!(e, f, g, h, a, b, c, d, 0x3956c25b, w[4]); - round!(d, e, f, g, h, a, b, c, 0x59f111f1, w[5]); - round!(c, d, e, f, g, h, a, b, 0x923f82a4, w[6]); - round!(b, c, d, e, f, g, h, a, 0xab1c5ed5, w[7]); - round!(a, b, c, d, e, f, g, h, 0xd807aa98, w[8]); - round!(h, a, b, c, d, e, f, g, 0x12835b01, w[9]); - round!(g, h, a, b, c, d, e, f, 0x243185be, w[10]); - round!(f, g, h, a, b, c, d, e, 0x550c7dc3, w[11]); - round!(e, f, g, h, a, b, c, d, 0x72be5d74, w[12]); - round!(d, e, f, g, h, a, b, c, 0x80deb1fe, w[13]); - round!(c, d, e, f, g, h, a, b, 0x9bdc06a7, w[14]); - round!(b, c, d, e, f, g, h, a, 0xc19bf174, w[15]); - - round!(a, b, c, d, e, f, g, h, 0xe49b69c1, w[0], w[14], w[9], w[1]); - round!(h, a, b, c, d, e, f, g, 0xefbe4786, w[1], w[15], w[10], w[2]); - round!(g, h, a, b, c, d, e, f, 0x0fc19dc6, w[2], w[0], w[11], w[3]); - round!(f, g, h, a, b, c, d, e, 0x240ca1cc, w[3], w[1], w[12], w[4]); - round!(e, f, g, h, a, b, c, d, 0x2de92c6f, w[4], w[2], w[13], w[5]); - round!(d, e, f, g, h, a, b, c, 0x4a7484aa, w[5], w[3], w[14], w[6]); - round!(c, d, e, f, g, h, a, b, 0x5cb0a9dc, w[6], w[4], w[15], w[7]); - round!(b, c, d, e, f, g, h, a, 0x76f988da, w[7], w[5], w[0], w[8]); - round!(a, b, c, d, e, f, g, h, 0x983e5152, w[8], w[6], w[1], w[9]); - round!(h, a, b, c, d, e, f, g, 0xa831c66d, w[9], w[7], w[2], w[10]); - round!(g, h, a, b, c, d, e, f, 0xb00327c8, w[10], w[8], w[3], w[11]); - round!(f, g, h, a, b, c, d, e, 0xbf597fc7, w[11], w[9], w[4], w[12]); - round!(e, f, g, h, a, b, c, d, 0xc6e00bf3, w[12], w[10], w[5], w[13]); - round!(d, e, f, g, h, a, b, c, 0xd5a79147, w[13], w[11], w[6], w[14]); - round!(c, d, e, f, g, h, a, b, 0x06ca6351, w[14], w[12], w[7], w[15]); - round!(b, c, d, e, f, g, h, a, 0x14292967, w[15], w[13], w[8], w[0]); - - round!(a, b, c, d, e, f, g, h, 0x27b70a85, w[0], w[14], w[9], w[1]); - round!(h, a, b, c, d, e, f, g, 0x2e1b2138, w[1], w[15], w[10], w[2]); - round!(g, h, a, b, c, d, e, f, 0x4d2c6dfc, w[2], w[0], w[11], w[3]); - round!(f, g, h, a, b, c, d, e, 0x53380d13, w[3], w[1], w[12], w[4]); - round!(e, f, g, h, a, b, c, d, 0x650a7354, w[4], w[2], w[13], w[5]); - round!(d, e, f, g, h, a, b, c, 0x766a0abb, w[5], w[3], w[14], w[6]); - round!(c, d, e, f, g, h, a, b, 0x81c2c92e, w[6], w[4], w[15], w[7]); - round!(b, c, d, e, f, g, h, a, 0x92722c85, w[7], w[5], w[0], w[8]); - round!(a, b, c, d, e, f, g, h, 0xa2bfe8a1, w[8], w[6], w[1], w[9]); - round!(h, a, b, c, d, e, f, g, 0xa81a664b, w[9], w[7], w[2], w[10]); - round!(g, h, a, b, c, d, e, f, 0xc24b8b70, w[10], w[8], w[3], w[11]); - round!(f, g, h, a, b, c, d, e, 0xc76c51a3, w[11], w[9], w[4], w[12]); - round!(e, f, g, h, a, b, c, d, 0xd192e819, w[12], w[10], w[5], w[13]); - round!(d, e, f, g, h, a, b, c, 0xd6990624, w[13], w[11], w[6], w[14]); - round!(c, d, e, f, g, h, a, b, 0xf40e3585, w[14], w[12], w[7], w[15]); - round!(b, c, d, e, f, g, h, a, 0x106aa070, w[15], w[13], w[8], w[0]); - - round!(a, b, c, d, e, f, g, h, 0x19a4c116, w[0], w[14], w[9], w[1]); - round!(h, a, b, c, d, e, f, g, 0x1e376c08, w[1], w[15], w[10], w[2]); - round!(g, h, a, b, c, d, e, f, 0x2748774c, w[2], w[0], w[11], w[3]); - round!(f, g, h, a, b, c, d, e, 0x34b0bcb5, w[3], w[1], w[12], w[4]); - round!(e, f, g, h, a, b, c, d, 0x391c0cb3, w[4], w[2], w[13], w[5]); - round!(d, e, f, g, h, a, b, c, 0x4ed8aa4a, w[5], w[3], w[14], w[6]); - round!(c, d, e, f, g, h, a, b, 0x5b9cca4f, w[6], w[4], w[15], w[7]); - round!(b, c, d, e, f, g, h, a, 0x682e6ff3, w[7], w[5], w[0], w[8]); - round!(a, b, c, d, e, f, g, h, 0x748f82ee, w[8], w[6], w[1], w[9]); - round!(h, a, b, c, d, e, f, g, 0x78a5636f, w[9], w[7], w[2], w[10]); - round!(g, h, a, b, c, d, e, f, 0x84c87814, w[10], w[8], w[3], w[11]); - round!(f, g, h, a, b, c, d, e, 0x8cc70208, w[11], w[9], w[4], w[12]); - round!(e, f, g, h, a, b, c, d, 0x90befffa, w[12], w[10], w[5], w[13]); - round!(d, e, f, g, h, a, b, c, 0xa4506ceb, w[13], w[11], w[6], w[14]); - round!(c, d, e, f, g, h, a, b, 0xbef9a3f7, w[14], w[12], w[7], w[15]); - round!(b, c, d, e, f, g, h, a, 0xc67178f2, w[15], w[13], w[8], w[0]); - - state[0] = state[0].wrapping_add(a); - state[1] = state[1].wrapping_add(b); - state[2] = state[2].wrapping_add(c); - state[3] = state[3].wrapping_add(d); - state[4] = state[4].wrapping_add(e); - state[5] = state[5].wrapping_add(f); - state[6] = state[6].wrapping_add(g); - state[7] = state[7].wrapping_add(h); - } - let mut output = [0u8; 32]; - let mut i = 0; - #[allow(clippy::identity_op)] // more readble - while i < 8 { - output[i * 4 + 0] = (state[i + 0] >> 24) as u8; - output[i * 4 + 1] = (state[i + 0] >> 16) as u8; - output[i * 4 + 2] = (state[i + 0] >> 8) as u8; - output[i * 4 + 3] = (state[i + 0] >> 0) as u8; - i += 1; - } - Midstate { bytes: output, bytes_hashed: bytes.len() as u64 } - } -} - -impl HashEngine { - fn process_block(&mut self) { - #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))] - { - if std::is_x86_feature_detected!("sse4.1") - && std::is_x86_feature_detected!("sha") - && std::is_x86_feature_detected!("sse2") - && std::is_x86_feature_detected!("ssse3") - { - return unsafe { self.process_block_simd_x86_intrinsics() }; - } - } - - // fallback implementation without using any intrinsics - self.software_process_block() - } - - #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))] - #[target_feature(enable = "sha,sse2,ssse3,sse4.1")] - unsafe fn process_block_simd_x86_intrinsics(&mut self) { - // Code translated and based on from - // https://github.com/noloader/SHA-Intrinsics/blob/4899efc81d1af159c1fd955936c673139f35aea9/sha256-x86.c - - /* sha256-x86.c - Intel SHA extensions using C intrinsics */ - /* Written and place in public domain by Jeffrey Walton */ - /* Based on code from Intel, and by Sean Gulley for */ - /* the miTLS project. */ - - // Variable names are also kept the same as in the original C code for easier comparison. - let (mut state0, mut state1); - let (mut msg, mut tmp); - - let (mut msg0, mut msg1, mut msg2, mut msg3); - - let (abef_save, cdgh_save); - - #[allow(non_snake_case)] - let MASK: __m128i = - _mm_set_epi64x(0x0c0d_0e0f_0809_0a0bu64 as i64, 0x0405_0607_0001_0203u64 as i64); - - let block_offset = 0; - - // Load initial values - // CAST SAFETY: loadu_si128 documentation states that mem_addr does not - // need to be aligned on any particular boundary. - tmp = _mm_loadu_si128(self.h.as_ptr().add(0) as *const __m128i); - state1 = _mm_loadu_si128(self.h.as_ptr().add(4) as *const __m128i); - - tmp = _mm_shuffle_epi32(tmp, 0xB1); // CDAB - state1 = _mm_shuffle_epi32(state1, 0x1B); // EFGH - state0 = _mm_alignr_epi8(tmp, state1, 8); // ABEF - state1 = _mm_blend_epi16(state1, tmp, 0xF0); // CDGH - - // Process a single block - { - // Save current state - abef_save = state0; - cdgh_save = state1; - - // Rounds 0-3 - msg = _mm_loadu_si128(self.buffer.as_ptr().add(block_offset) as *const __m128i); - msg0 = _mm_shuffle_epi8(msg, MASK); - msg = _mm_add_epi32( - msg0, - _mm_set_epi64x(0xE9B5DBA5B5C0FBCFu64 as i64, 0x71374491428A2F98u64 as i64), - ); - state1 = _mm_sha256rnds2_epu32(state1, state0, msg); - msg = _mm_shuffle_epi32(msg, 0x0E); - state0 = _mm_sha256rnds2_epu32(state0, state1, msg); - - // Rounds 4-7 - msg1 = _mm_loadu_si128(self.buffer.as_ptr().add(block_offset + 16) as *const __m128i); - msg1 = _mm_shuffle_epi8(msg1, MASK); - msg = _mm_add_epi32( - msg1, - _mm_set_epi64x(0xAB1C5ED5923F82A4u64 as i64, 0x59F111F13956C25Bu64 as i64), - ); - state1 = _mm_sha256rnds2_epu32(state1, state0, msg); - msg = _mm_shuffle_epi32(msg, 0x0E); - state0 = _mm_sha256rnds2_epu32(state0, state1, msg); - msg0 = _mm_sha256msg1_epu32(msg0, msg1); - - // Rounds 8-11 - msg2 = _mm_loadu_si128(self.buffer.as_ptr().add(block_offset + 32) as *const __m128i); - msg2 = _mm_shuffle_epi8(msg2, MASK); - msg = _mm_add_epi32( - msg2, - _mm_set_epi64x(0x550C7DC3243185BEu64 as i64, 0x12835B01D807AA98u64 as i64), - ); - state1 = _mm_sha256rnds2_epu32(state1, state0, msg); - msg = _mm_shuffle_epi32(msg, 0x0E); - state0 = _mm_sha256rnds2_epu32(state0, state1, msg); - msg1 = _mm_sha256msg1_epu32(msg1, msg2); - - // Rounds 12-15 - msg3 = _mm_loadu_si128(self.buffer.as_ptr().add(block_offset + 48) as *const __m128i); - msg3 = _mm_shuffle_epi8(msg3, MASK); - msg = _mm_add_epi32( - msg3, - _mm_set_epi64x(0xC19BF1749BDC06A7u64 as i64, 0x80DEB1FE72BE5D74u64 as i64), - ); - state1 = _mm_sha256rnds2_epu32(state1, state0, msg); - tmp = _mm_alignr_epi8(msg3, msg2, 4); - msg0 = _mm_add_epi32(msg0, tmp); - msg0 = _mm_sha256msg2_epu32(msg0, msg3); - msg = _mm_shuffle_epi32(msg, 0x0E); - state0 = _mm_sha256rnds2_epu32(state0, state1, msg); - msg2 = _mm_sha256msg1_epu32(msg2, msg3); - - // Rounds 16-19 - msg = _mm_add_epi32( - msg0, - _mm_set_epi64x(0x240CA1CC0FC19DC6u64 as i64, 0xEFBE4786E49B69C1u64 as i64), - ); - state1 = _mm_sha256rnds2_epu32(state1, state0, msg); - tmp = _mm_alignr_epi8(msg0, msg3, 4); - msg1 = _mm_add_epi32(msg1, tmp); - msg1 = _mm_sha256msg2_epu32(msg1, msg0); - msg = _mm_shuffle_epi32(msg, 0x0E); - state0 = _mm_sha256rnds2_epu32(state0, state1, msg); - msg3 = _mm_sha256msg1_epu32(msg3, msg0); - - // Rounds 20-23 - msg = _mm_add_epi32( - msg1, - _mm_set_epi64x(0x76F988DA5CB0A9DCu64 as i64, 0x4A7484AA2DE92C6Fu64 as i64), - ); - state1 = _mm_sha256rnds2_epu32(state1, state0, msg); - tmp = _mm_alignr_epi8(msg1, msg0, 4); - msg2 = _mm_add_epi32(msg2, tmp); - msg2 = _mm_sha256msg2_epu32(msg2, msg1); - msg = _mm_shuffle_epi32(msg, 0x0E); - state0 = _mm_sha256rnds2_epu32(state0, state1, msg); - msg0 = _mm_sha256msg1_epu32(msg0, msg1); - - // Rounds 24-27 - msg = _mm_add_epi32( - msg2, - _mm_set_epi64x(0xBF597FC7B00327C8u64 as i64, 0xA831C66D983E5152u64 as i64), - ); - state1 = _mm_sha256rnds2_epu32(state1, state0, msg); - tmp = _mm_alignr_epi8(msg2, msg1, 4); - msg3 = _mm_add_epi32(msg3, tmp); - msg3 = _mm_sha256msg2_epu32(msg3, msg2); - msg = _mm_shuffle_epi32(msg, 0x0E); - state0 = _mm_sha256rnds2_epu32(state0, state1, msg); - msg1 = _mm_sha256msg1_epu32(msg1, msg2); - - // Rounds 28-31 - msg = _mm_add_epi32( - msg3, - _mm_set_epi64x(0x1429296706CA6351u64 as i64, 0xD5A79147C6E00BF3u64 as i64), - ); - state1 = _mm_sha256rnds2_epu32(state1, state0, msg); - tmp = _mm_alignr_epi8(msg3, msg2, 4); - msg0 = _mm_add_epi32(msg0, tmp); - msg0 = _mm_sha256msg2_epu32(msg0, msg3); - msg = _mm_shuffle_epi32(msg, 0x0E); - state0 = _mm_sha256rnds2_epu32(state0, state1, msg); - msg2 = _mm_sha256msg1_epu32(msg2, msg3); - - // Rounds 32-35 - msg = _mm_add_epi32( - msg0, - _mm_set_epi64x(0x53380D134D2C6DFCu64 as i64, 0x2E1B213827B70A85u64 as i64), - ); - state1 = _mm_sha256rnds2_epu32(state1, state0, msg); - tmp = _mm_alignr_epi8(msg0, msg3, 4); - msg1 = _mm_add_epi32(msg1, tmp); - msg1 = _mm_sha256msg2_epu32(msg1, msg0); - msg = _mm_shuffle_epi32(msg, 0x0E); - state0 = _mm_sha256rnds2_epu32(state0, state1, msg); - msg3 = _mm_sha256msg1_epu32(msg3, msg0); - - // Rounds 36-39 - msg = _mm_add_epi32( - msg1, - _mm_set_epi64x(0x92722C8581C2C92Eu64 as i64, 0x766A0ABB650A7354u64 as i64), - ); - state1 = _mm_sha256rnds2_epu32(state1, state0, msg); - tmp = _mm_alignr_epi8(msg1, msg0, 4); - msg2 = _mm_add_epi32(msg2, tmp); - msg2 = _mm_sha256msg2_epu32(msg2, msg1); - msg = _mm_shuffle_epi32(msg, 0x0E); - state0 = _mm_sha256rnds2_epu32(state0, state1, msg); - msg0 = _mm_sha256msg1_epu32(msg0, msg1); - - // Rounds 40-43 - msg = _mm_add_epi32( - msg2, - _mm_set_epi64x(0xC76C51A3C24B8B70u64 as i64, 0xA81A664BA2BFE8A1u64 as i64), - ); - state1 = _mm_sha256rnds2_epu32(state1, state0, msg); - tmp = _mm_alignr_epi8(msg2, msg1, 4); - msg3 = _mm_add_epi32(msg3, tmp); - msg3 = _mm_sha256msg2_epu32(msg3, msg2); - msg = _mm_shuffle_epi32(msg, 0x0E); - state0 = _mm_sha256rnds2_epu32(state0, state1, msg); - msg1 = _mm_sha256msg1_epu32(msg1, msg2); - - // Rounds 44-47 - msg = _mm_add_epi32( - msg3, - _mm_set_epi64x(0x106AA070F40E3585u64 as i64, 0xD6990624D192E819u64 as i64), - ); - state1 = _mm_sha256rnds2_epu32(state1, state0, msg); - tmp = _mm_alignr_epi8(msg3, msg2, 4); - msg0 = _mm_add_epi32(msg0, tmp); - msg0 = _mm_sha256msg2_epu32(msg0, msg3); - msg = _mm_shuffle_epi32(msg, 0x0E); - state0 = _mm_sha256rnds2_epu32(state0, state1, msg); - msg2 = _mm_sha256msg1_epu32(msg2, msg3); - - // Rounds 48-51 - msg = _mm_add_epi32( - msg0, - _mm_set_epi64x(0x34B0BCB52748774Cu64 as i64, 0x1E376C0819A4C116u64 as i64), - ); - state1 = _mm_sha256rnds2_epu32(state1, state0, msg); - tmp = _mm_alignr_epi8(msg0, msg3, 4); - msg1 = _mm_add_epi32(msg1, tmp); - msg1 = _mm_sha256msg2_epu32(msg1, msg0); - msg = _mm_shuffle_epi32(msg, 0x0E); - state0 = _mm_sha256rnds2_epu32(state0, state1, msg); - msg3 = _mm_sha256msg1_epu32(msg3, msg0); - - // Rounds 52-55 - msg = _mm_add_epi32( - msg1, - _mm_set_epi64x(0x682E6FF35B9CCA4Fu64 as i64, 0x4ED8AA4A391C0CB3u64 as i64), - ); - state1 = _mm_sha256rnds2_epu32(state1, state0, msg); - tmp = _mm_alignr_epi8(msg1, msg0, 4); - msg2 = _mm_add_epi32(msg2, tmp); - msg2 = _mm_sha256msg2_epu32(msg2, msg1); - msg = _mm_shuffle_epi32(msg, 0x0E); - state0 = _mm_sha256rnds2_epu32(state0, state1, msg); - - // Rounds 56-59 - msg = _mm_add_epi32( - msg2, - _mm_set_epi64x(0x8CC7020884C87814u64 as i64, 0x78A5636F748F82EEu64 as i64), - ); - state1 = _mm_sha256rnds2_epu32(state1, state0, msg); - tmp = _mm_alignr_epi8(msg2, msg1, 4); - msg3 = _mm_add_epi32(msg3, tmp); - msg3 = _mm_sha256msg2_epu32(msg3, msg2); - msg = _mm_shuffle_epi32(msg, 0x0E); - state0 = _mm_sha256rnds2_epu32(state0, state1, msg); - - // Rounds 60-63 - msg = _mm_add_epi32( - msg3, - _mm_set_epi64x(0xC67178F2BEF9A3F7u64 as i64, 0xA4506CEB90BEFFFAu64 as i64), - ); - state1 = _mm_sha256rnds2_epu32(state1, state0, msg); - msg = _mm_shuffle_epi32(msg, 0x0E); - state0 = _mm_sha256rnds2_epu32(state0, state1, msg); - - // Combine state - state0 = _mm_add_epi32(state0, abef_save); - state1 = _mm_add_epi32(state1, cdgh_save); - } - - tmp = _mm_shuffle_epi32(state0, 0x1B); // FEBA - state1 = _mm_shuffle_epi32(state1, 0xB1); // DCHG - state0 = _mm_blend_epi16(tmp, state1, 0xF0); // DCBA - state1 = _mm_alignr_epi8(state1, tmp, 8); // ABEF - - // Save state - // CAST SAFETY: storeu_si128 documentation states that mem_addr does not - // need to be aligned on any particular boundary. - _mm_storeu_si128(self.h.as_mut_ptr().add(0) as *mut __m128i, state0); - _mm_storeu_si128(self.h.as_mut_ptr().add(4) as *mut __m128i, state1); - } - - // Algorithm copied from libsecp256k1 - fn software_process_block(&mut self) { - debug_assert_eq!(self.buffer.len(), BLOCK_SIZE); - - let mut w = [0u32; 16]; - for (w_val, buff_bytes) in w.iter_mut().zip(self.buffer.chunks_exact(4)) { - *w_val = u32::from_be_bytes(buff_bytes.try_into().expect("4 byte slice")); - } - - let mut a = self.h[0]; - let mut b = self.h[1]; - let mut c = self.h[2]; - let mut d = self.h[3]; - let mut e = self.h[4]; - let mut f = self.h[5]; - let mut g = self.h[6]; - let mut h = self.h[7]; - - round!(a, b, c, d, e, f, g, h, 0x428a2f98, w[0]); - round!(h, a, b, c, d, e, f, g, 0x71374491, w[1]); - round!(g, h, a, b, c, d, e, f, 0xb5c0fbcf, w[2]); - round!(f, g, h, a, b, c, d, e, 0xe9b5dba5, w[3]); - round!(e, f, g, h, a, b, c, d, 0x3956c25b, w[4]); - round!(d, e, f, g, h, a, b, c, 0x59f111f1, w[5]); - round!(c, d, e, f, g, h, a, b, 0x923f82a4, w[6]); - round!(b, c, d, e, f, g, h, a, 0xab1c5ed5, w[7]); - round!(a, b, c, d, e, f, g, h, 0xd807aa98, w[8]); - round!(h, a, b, c, d, e, f, g, 0x12835b01, w[9]); - round!(g, h, a, b, c, d, e, f, 0x243185be, w[10]); - round!(f, g, h, a, b, c, d, e, 0x550c7dc3, w[11]); - round!(e, f, g, h, a, b, c, d, 0x72be5d74, w[12]); - round!(d, e, f, g, h, a, b, c, 0x80deb1fe, w[13]); - round!(c, d, e, f, g, h, a, b, 0x9bdc06a7, w[14]); - round!(b, c, d, e, f, g, h, a, 0xc19bf174, w[15]); - - round!(a, b, c, d, e, f, g, h, 0xe49b69c1, w[0], w[14], w[9], w[1]); - round!(h, a, b, c, d, e, f, g, 0xefbe4786, w[1], w[15], w[10], w[2]); - round!(g, h, a, b, c, d, e, f, 0x0fc19dc6, w[2], w[0], w[11], w[3]); - round!(f, g, h, a, b, c, d, e, 0x240ca1cc, w[3], w[1], w[12], w[4]); - round!(e, f, g, h, a, b, c, d, 0x2de92c6f, w[4], w[2], w[13], w[5]); - round!(d, e, f, g, h, a, b, c, 0x4a7484aa, w[5], w[3], w[14], w[6]); - round!(c, d, e, f, g, h, a, b, 0x5cb0a9dc, w[6], w[4], w[15], w[7]); - round!(b, c, d, e, f, g, h, a, 0x76f988da, w[7], w[5], w[0], w[8]); - round!(a, b, c, d, e, f, g, h, 0x983e5152, w[8], w[6], w[1], w[9]); - round!(h, a, b, c, d, e, f, g, 0xa831c66d, w[9], w[7], w[2], w[10]); - round!(g, h, a, b, c, d, e, f, 0xb00327c8, w[10], w[8], w[3], w[11]); - round!(f, g, h, a, b, c, d, e, 0xbf597fc7, w[11], w[9], w[4], w[12]); - round!(e, f, g, h, a, b, c, d, 0xc6e00bf3, w[12], w[10], w[5], w[13]); - round!(d, e, f, g, h, a, b, c, 0xd5a79147, w[13], w[11], w[6], w[14]); - round!(c, d, e, f, g, h, a, b, 0x06ca6351, w[14], w[12], w[7], w[15]); - round!(b, c, d, e, f, g, h, a, 0x14292967, w[15], w[13], w[8], w[0]); - - round!(a, b, c, d, e, f, g, h, 0x27b70a85, w[0], w[14], w[9], w[1]); - round!(h, a, b, c, d, e, f, g, 0x2e1b2138, w[1], w[15], w[10], w[2]); - round!(g, h, a, b, c, d, e, f, 0x4d2c6dfc, w[2], w[0], w[11], w[3]); - round!(f, g, h, a, b, c, d, e, 0x53380d13, w[3], w[1], w[12], w[4]); - round!(e, f, g, h, a, b, c, d, 0x650a7354, w[4], w[2], w[13], w[5]); - round!(d, e, f, g, h, a, b, c, 0x766a0abb, w[5], w[3], w[14], w[6]); - round!(c, d, e, f, g, h, a, b, 0x81c2c92e, w[6], w[4], w[15], w[7]); - round!(b, c, d, e, f, g, h, a, 0x92722c85, w[7], w[5], w[0], w[8]); - round!(a, b, c, d, e, f, g, h, 0xa2bfe8a1, w[8], w[6], w[1], w[9]); - round!(h, a, b, c, d, e, f, g, 0xa81a664b, w[9], w[7], w[2], w[10]); - round!(g, h, a, b, c, d, e, f, 0xc24b8b70, w[10], w[8], w[3], w[11]); - round!(f, g, h, a, b, c, d, e, 0xc76c51a3, w[11], w[9], w[4], w[12]); - round!(e, f, g, h, a, b, c, d, 0xd192e819, w[12], w[10], w[5], w[13]); - round!(d, e, f, g, h, a, b, c, 0xd6990624, w[13], w[11], w[6], w[14]); - round!(c, d, e, f, g, h, a, b, 0xf40e3585, w[14], w[12], w[7], w[15]); - round!(b, c, d, e, f, g, h, a, 0x106aa070, w[15], w[13], w[8], w[0]); - - round!(a, b, c, d, e, f, g, h, 0x19a4c116, w[0], w[14], w[9], w[1]); - round!(h, a, b, c, d, e, f, g, 0x1e376c08, w[1], w[15], w[10], w[2]); - round!(g, h, a, b, c, d, e, f, 0x2748774c, w[2], w[0], w[11], w[3]); - round!(f, g, h, a, b, c, d, e, 0x34b0bcb5, w[3], w[1], w[12], w[4]); - round!(e, f, g, h, a, b, c, d, 0x391c0cb3, w[4], w[2], w[13], w[5]); - round!(d, e, f, g, h, a, b, c, 0x4ed8aa4a, w[5], w[3], w[14], w[6]); - round!(c, d, e, f, g, h, a, b, 0x5b9cca4f, w[6], w[4], w[15], w[7]); - round!(b, c, d, e, f, g, h, a, 0x682e6ff3, w[7], w[5], w[0], w[8]); - round!(a, b, c, d, e, f, g, h, 0x748f82ee, w[8], w[6], w[1], w[9]); - round!(h, a, b, c, d, e, f, g, 0x78a5636f, w[9], w[7], w[2], w[10]); - round!(g, h, a, b, c, d, e, f, 0x84c87814, w[10], w[8], w[3], w[11]); - round!(f, g, h, a, b, c, d, e, 0x8cc70208, w[11], w[9], w[4], w[12]); - round!(e, f, g, h, a, b, c, d, 0x90befffa, w[12], w[10], w[5], w[13]); - round!(d, e, f, g, h, a, b, c, 0xa4506ceb, w[13], w[11], w[6], w[14]); - round!(c, d, e, f, g, h, a, b, 0xbef9a3f7, w[14], w[12], w[7], w[15]); - round!(b, c, d, e, f, g, h, a, 0xc67178f2, w[15], w[13], w[8], w[0]); - - self.h[0] = self.h[0].wrapping_add(a); - self.h[1] = self.h[1].wrapping_add(b); - self.h[2] = self.h[2].wrapping_add(c); - self.h[3] = self.h[3].wrapping_add(d); - self.h[4] = self.h[4].wrapping_add(e); - self.h[5] = self.h[5].wrapping_add(f); - self.h[6] = self.h[6].wrapping_add(g); - self.h[7] = self.h[7].wrapping_add(h); - } -} - #[cfg(test)] mod tests { use core::array; diff --git a/hashes/src/sha512/crypto.rs b/hashes/src/sha512/crypto.rs new file mode 100644 index 000000000..cbb1c82ab --- /dev/null +++ b/hashes/src/sha512/crypto.rs @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: CC0-1.0 + +use super::{HashEngine, BLOCK_SIZE}; + +#[allow(non_snake_case)] +fn Ch(x: u64, y: u64, z: u64) -> u64 { z ^ (x & (y ^ z)) } +#[allow(non_snake_case)] +fn Maj(x: u64, y: u64, z: u64) -> u64 { (x & y) | (z & (x | y)) } +#[allow(non_snake_case)] +fn Sigma0(x: u64) -> u64 { x.rotate_left(36) ^ x.rotate_left(30) ^ x.rotate_left(25) } +#[allow(non_snake_case)] +fn Sigma1(x: u64) -> u64 { x.rotate_left(50) ^ x.rotate_left(46) ^ x.rotate_left(23) } +fn sigma0(x: u64) -> u64 { x.rotate_left(63) ^ x.rotate_left(56) ^ (x >> 7) } +fn sigma1(x: u64) -> u64 { x.rotate_left(45) ^ x.rotate_left(3) ^ (x >> 6) } + +#[cfg(feature = "small-hash")] +#[macro_use] +mod small_hash { + use super::*; + + #[rustfmt::skip] + pub(super) fn round(a: u64, b: u64, c: u64, d: &mut u64, e: u64, + f: u64, g: u64, h: &mut u64, k: u64, w: u64, + ) { + let t1 = + h.wrapping_add(Sigma1(e)).wrapping_add(Ch(e, f, g)).wrapping_add(k).wrapping_add(w); + let t2 = Sigma0(a).wrapping_add(Maj(a, b, c)); + *d = d.wrapping_add(t1); + *h = t1.wrapping_add(t2); + } + #[rustfmt::skip] + pub(super) fn later_round(a: u64, b: u64, c: u64, d: &mut u64, e: u64, + f: u64, g: u64, h: &mut u64, k: u64, w: u64, + w1: u64, w2: u64, w3: u64, + ) -> u64 { + let w = w.wrapping_add(sigma1(w1)).wrapping_add(w2).wrapping_add(sigma0(w3)); + round(a, b, c, d, e, f, g, h, k, w); + w + } + + macro_rules! round( + // first round + ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr, $k:expr, $w:expr) => ( + small_hash::round($a, $b, $c, &mut $d, $e, $f, $g, &mut $h, $k, $w) + ); + // later rounds we reassign $w before doing the first-round computation + ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr, $k:expr, $w:expr, $w1:expr, $w2:expr, $w3:expr) => ( + $w = small_hash::later_round($a, $b, $c, &mut $d, $e, $f, $g, &mut $h, $k, $w, $w1, $w2, $w3) + ) + ); +} + +#[cfg(not(feature = "small-hash"))] +#[macro_use] +mod fast_hash { + macro_rules! round( + // first round + ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr, $k:expr, $w:expr) => ( + let t1 = $h.wrapping_add(Sigma1($e)).wrapping_add(Ch($e, $f, $g)).wrapping_add($k).wrapping_add($w); + let t2 = Sigma0($a).wrapping_add(Maj($a, $b, $c)); + $d = $d.wrapping_add(t1); + $h = t1.wrapping_add(t2); + ); + // later rounds we reassign $w before doing the first-round computation + ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr, $k:expr, $w:expr, $w1:expr, $w2:expr, $w3:expr) => ( + $w = $w.wrapping_add(sigma1($w1)).wrapping_add($w2).wrapping_add(sigma0($w3)); + round!($a, $b, $c, $d, $e, $f, $g, $h, $k, $w); + ) + ); +} + +impl HashEngine { + // Algorithm copied from libsecp256k1 + pub(crate) fn process_block(&mut self) { + debug_assert_eq!(self.buffer.len(), BLOCK_SIZE); + + let mut w = [0u64; 16]; + for (w_val, buff_bytes) in w.iter_mut().zip(self.buffer.chunks_exact(8)) { + *w_val = u64::from_be_bytes(buff_bytes.try_into().expect("8 byte slice")); + } + + let mut a = self.h[0]; + let mut b = self.h[1]; + let mut c = self.h[2]; + let mut d = self.h[3]; + let mut e = self.h[4]; + let mut f = self.h[5]; + let mut g = self.h[6]; + let mut h = self.h[7]; + + round!(a, b, c, d, e, f, g, h, 0x428a2f98d728ae22, w[0]); + round!(h, a, b, c, d, e, f, g, 0x7137449123ef65cd, w[1]); + round!(g, h, a, b, c, d, e, f, 0xb5c0fbcfec4d3b2f, w[2]); + round!(f, g, h, a, b, c, d, e, 0xe9b5dba58189dbbc, w[3]); + round!(e, f, g, h, a, b, c, d, 0x3956c25bf348b538, w[4]); + round!(d, e, f, g, h, a, b, c, 0x59f111f1b605d019, w[5]); + round!(c, d, e, f, g, h, a, b, 0x923f82a4af194f9b, w[6]); + round!(b, c, d, e, f, g, h, a, 0xab1c5ed5da6d8118, w[7]); + round!(a, b, c, d, e, f, g, h, 0xd807aa98a3030242, w[8]); + round!(h, a, b, c, d, e, f, g, 0x12835b0145706fbe, w[9]); + round!(g, h, a, b, c, d, e, f, 0x243185be4ee4b28c, w[10]); + round!(f, g, h, a, b, c, d, e, 0x550c7dc3d5ffb4e2, w[11]); + round!(e, f, g, h, a, b, c, d, 0x72be5d74f27b896f, w[12]); + round!(d, e, f, g, h, a, b, c, 0x80deb1fe3b1696b1, w[13]); + round!(c, d, e, f, g, h, a, b, 0x9bdc06a725c71235, w[14]); + round!(b, c, d, e, f, g, h, a, 0xc19bf174cf692694, w[15]); + + round!(a, b, c, d, e, f, g, h, 0xe49b69c19ef14ad2, w[0], w[14], w[9], w[1]); + round!(h, a, b, c, d, e, f, g, 0xefbe4786384f25e3, w[1], w[15], w[10], w[2]); + round!(g, h, a, b, c, d, e, f, 0x0fc19dc68b8cd5b5, w[2], w[0], w[11], w[3]); + round!(f, g, h, a, b, c, d, e, 0x240ca1cc77ac9c65, w[3], w[1], w[12], w[4]); + round!(e, f, g, h, a, b, c, d, 0x2de92c6f592b0275, w[4], w[2], w[13], w[5]); + round!(d, e, f, g, h, a, b, c, 0x4a7484aa6ea6e483, w[5], w[3], w[14], w[6]); + round!(c, d, e, f, g, h, a, b, 0x5cb0a9dcbd41fbd4, w[6], w[4], w[15], w[7]); + round!(b, c, d, e, f, g, h, a, 0x76f988da831153b5, w[7], w[5], w[0], w[8]); + round!(a, b, c, d, e, f, g, h, 0x983e5152ee66dfab, w[8], w[6], w[1], w[9]); + round!(h, a, b, c, d, e, f, g, 0xa831c66d2db43210, w[9], w[7], w[2], w[10]); + round!(g, h, a, b, c, d, e, f, 0xb00327c898fb213f, w[10], w[8], w[3], w[11]); + round!(f, g, h, a, b, c, d, e, 0xbf597fc7beef0ee4, w[11], w[9], w[4], w[12]); + round!(e, f, g, h, a, b, c, d, 0xc6e00bf33da88fc2, w[12], w[10], w[5], w[13]); + round!(d, e, f, g, h, a, b, c, 0xd5a79147930aa725, w[13], w[11], w[6], w[14]); + round!(c, d, e, f, g, h, a, b, 0x06ca6351e003826f, w[14], w[12], w[7], w[15]); + round!(b, c, d, e, f, g, h, a, 0x142929670a0e6e70, w[15], w[13], w[8], w[0]); + + round!(a, b, c, d, e, f, g, h, 0x27b70a8546d22ffc, w[0], w[14], w[9], w[1]); + round!(h, a, b, c, d, e, f, g, 0x2e1b21385c26c926, w[1], w[15], w[10], w[2]); + round!(g, h, a, b, c, d, e, f, 0x4d2c6dfc5ac42aed, w[2], w[0], w[11], w[3]); + round!(f, g, h, a, b, c, d, e, 0x53380d139d95b3df, w[3], w[1], w[12], w[4]); + round!(e, f, g, h, a, b, c, d, 0x650a73548baf63de, w[4], w[2], w[13], w[5]); + round!(d, e, f, g, h, a, b, c, 0x766a0abb3c77b2a8, w[5], w[3], w[14], w[6]); + round!(c, d, e, f, g, h, a, b, 0x81c2c92e47edaee6, w[6], w[4], w[15], w[7]); + round!(b, c, d, e, f, g, h, a, 0x92722c851482353b, w[7], w[5], w[0], w[8]); + round!(a, b, c, d, e, f, g, h, 0xa2bfe8a14cf10364, w[8], w[6], w[1], w[9]); + round!(h, a, b, c, d, e, f, g, 0xa81a664bbc423001, w[9], w[7], w[2], w[10]); + round!(g, h, a, b, c, d, e, f, 0xc24b8b70d0f89791, w[10], w[8], w[3], w[11]); + round!(f, g, h, a, b, c, d, e, 0xc76c51a30654be30, w[11], w[9], w[4], w[12]); + round!(e, f, g, h, a, b, c, d, 0xd192e819d6ef5218, w[12], w[10], w[5], w[13]); + round!(d, e, f, g, h, a, b, c, 0xd69906245565a910, w[13], w[11], w[6], w[14]); + round!(c, d, e, f, g, h, a, b, 0xf40e35855771202a, w[14], w[12], w[7], w[15]); + round!(b, c, d, e, f, g, h, a, 0x106aa07032bbd1b8, w[15], w[13], w[8], w[0]); + + round!(a, b, c, d, e, f, g, h, 0x19a4c116b8d2d0c8, w[0], w[14], w[9], w[1]); + round!(h, a, b, c, d, e, f, g, 0x1e376c085141ab53, w[1], w[15], w[10], w[2]); + round!(g, h, a, b, c, d, e, f, 0x2748774cdf8eeb99, w[2], w[0], w[11], w[3]); + round!(f, g, h, a, b, c, d, e, 0x34b0bcb5e19b48a8, w[3], w[1], w[12], w[4]); + round!(e, f, g, h, a, b, c, d, 0x391c0cb3c5c95a63, w[4], w[2], w[13], w[5]); + round!(d, e, f, g, h, a, b, c, 0x4ed8aa4ae3418acb, w[5], w[3], w[14], w[6]); + round!(c, d, e, f, g, h, a, b, 0x5b9cca4f7763e373, w[6], w[4], w[15], w[7]); + round!(b, c, d, e, f, g, h, a, 0x682e6ff3d6b2b8a3, w[7], w[5], w[0], w[8]); + round!(a, b, c, d, e, f, g, h, 0x748f82ee5defb2fc, w[8], w[6], w[1], w[9]); + round!(h, a, b, c, d, e, f, g, 0x78a5636f43172f60, w[9], w[7], w[2], w[10]); + round!(g, h, a, b, c, d, e, f, 0x84c87814a1f0ab72, w[10], w[8], w[3], w[11]); + round!(f, g, h, a, b, c, d, e, 0x8cc702081a6439ec, w[11], w[9], w[4], w[12]); + round!(e, f, g, h, a, b, c, d, 0x90befffa23631e28, w[12], w[10], w[5], w[13]); + round!(d, e, f, g, h, a, b, c, 0xa4506cebde82bde9, w[13], w[11], w[6], w[14]); + round!(c, d, e, f, g, h, a, b, 0xbef9a3f7b2c67915, w[14], w[12], w[7], w[15]); + round!(b, c, d, e, f, g, h, a, 0xc67178f2e372532b, w[15], w[13], w[8], w[0]); + + round!(a, b, c, d, e, f, g, h, 0xca273eceea26619c, w[0], w[14], w[9], w[1]); + round!(h, a, b, c, d, e, f, g, 0xd186b8c721c0c207, w[1], w[15], w[10], w[2]); + round!(g, h, a, b, c, d, e, f, 0xeada7dd6cde0eb1e, w[2], w[0], w[11], w[3]); + round!(f, g, h, a, b, c, d, e, 0xf57d4f7fee6ed178, w[3], w[1], w[12], w[4]); + round!(e, f, g, h, a, b, c, d, 0x06f067aa72176fba, w[4], w[2], w[13], w[5]); + round!(d, e, f, g, h, a, b, c, 0x0a637dc5a2c898a6, w[5], w[3], w[14], w[6]); + round!(c, d, e, f, g, h, a, b, 0x113f9804bef90dae, w[6], w[4], w[15], w[7]); + round!(b, c, d, e, f, g, h, a, 0x1b710b35131c471b, w[7], w[5], w[0], w[8]); + round!(a, b, c, d, e, f, g, h, 0x28db77f523047d84, w[8], w[6], w[1], w[9]); + round!(h, a, b, c, d, e, f, g, 0x32caab7b40c72493, w[9], w[7], w[2], w[10]); + round!(g, h, a, b, c, d, e, f, 0x3c9ebe0a15c9bebc, w[10], w[8], w[3], w[11]); + round!(f, g, h, a, b, c, d, e, 0x431d67c49c100d4c, w[11], w[9], w[4], w[12]); + round!(e, f, g, h, a, b, c, d, 0x4cc5d4becb3e42b6, w[12], w[10], w[5], w[13]); + round!(d, e, f, g, h, a, b, c, 0x597f299cfc657e2a, w[13], w[11], w[6], w[14]); + round!(c, d, e, f, g, h, a, b, 0x5fcb6fab3ad6faec, w[14], w[12], w[7], w[15]); + round!(b, c, d, e, f, g, h, a, 0x6c44198c4a475817, w[15], w[13], w[8], w[0]); + + self.h[0] = self.h[0].wrapping_add(a); + self.h[1] = self.h[1].wrapping_add(b); + self.h[2] = self.h[2].wrapping_add(c); + self.h[3] = self.h[3].wrapping_add(d); + self.h[4] = self.h[4].wrapping_add(e); + self.h[5] = self.h[5].wrapping_add(f); + self.h[6] = self.h[6].wrapping_add(g); + self.h[7] = self.h[7].wrapping_add(h); + } +} diff --git a/hashes/src/sha512/mod.rs b/hashes/src/sha512/mod.rs index 1ecd563df..4db0bec54 100644 --- a/hashes/src/sha512/mod.rs +++ b/hashes/src/sha512/mod.rs @@ -2,6 +2,8 @@ //! SHA512 implementation. +mod crypto; + use core::cmp; use crate::{incomplete_block_len, HashEngine as _}; @@ -121,188 +123,6 @@ impl crate::HashEngine for HashEngine { crate::internal_macros::engine_input_impl!(); } -#[allow(non_snake_case)] -fn Ch(x: u64, y: u64, z: u64) -> u64 { z ^ (x & (y ^ z)) } -#[allow(non_snake_case)] -fn Maj(x: u64, y: u64, z: u64) -> u64 { (x & y) | (z & (x | y)) } -#[allow(non_snake_case)] -fn Sigma0(x: u64) -> u64 { x.rotate_left(36) ^ x.rotate_left(30) ^ x.rotate_left(25) } -#[allow(non_snake_case)] -fn Sigma1(x: u64) -> u64 { x.rotate_left(50) ^ x.rotate_left(46) ^ x.rotate_left(23) } -fn sigma0(x: u64) -> u64 { x.rotate_left(63) ^ x.rotate_left(56) ^ (x >> 7) } -fn sigma1(x: u64) -> u64 { x.rotate_left(45) ^ x.rotate_left(3) ^ (x >> 6) } - -#[cfg(feature = "small-hash")] -#[macro_use] -mod small_hash { - use super::*; - - #[rustfmt::skip] - pub(super) fn round(a: u64, b: u64, c: u64, d: &mut u64, e: u64, - f: u64, g: u64, h: &mut u64, k: u64, w: u64, - ) { - let t1 = - h.wrapping_add(Sigma1(e)).wrapping_add(Ch(e, f, g)).wrapping_add(k).wrapping_add(w); - let t2 = Sigma0(a).wrapping_add(Maj(a, b, c)); - *d = d.wrapping_add(t1); - *h = t1.wrapping_add(t2); - } - #[rustfmt::skip] - pub(super) fn later_round(a: u64, b: u64, c: u64, d: &mut u64, e: u64, - f: u64, g: u64, h: &mut u64, k: u64, w: u64, - w1: u64, w2: u64, w3: u64, - ) -> u64 { - let w = w.wrapping_add(sigma1(w1)).wrapping_add(w2).wrapping_add(sigma0(w3)); - round(a, b, c, d, e, f, g, h, k, w); - w - } - - macro_rules! round( - // first round - ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr, $k:expr, $w:expr) => ( - small_hash::round($a, $b, $c, &mut $d, $e, $f, $g, &mut $h, $k, $w) - ); - // later rounds we reassign $w before doing the first-round computation - ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr, $k:expr, $w:expr, $w1:expr, $w2:expr, $w3:expr) => ( - $w = small_hash::later_round($a, $b, $c, &mut $d, $e, $f, $g, &mut $h, $k, $w, $w1, $w2, $w3) - ) - ); -} - -#[cfg(not(feature = "small-hash"))] -#[macro_use] -mod fast_hash { - macro_rules! round( - // first round - ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr, $k:expr, $w:expr) => ( - let t1 = $h.wrapping_add(Sigma1($e)).wrapping_add(Ch($e, $f, $g)).wrapping_add($k).wrapping_add($w); - let t2 = Sigma0($a).wrapping_add(Maj($a, $b, $c)); - $d = $d.wrapping_add(t1); - $h = t1.wrapping_add(t2); - ); - // later rounds we reassign $w before doing the first-round computation - ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr, $k:expr, $w:expr, $w1:expr, $w2:expr, $w3:expr) => ( - $w = $w.wrapping_add(sigma1($w1)).wrapping_add($w2).wrapping_add(sigma0($w3)); - round!($a, $b, $c, $d, $e, $f, $g, $h, $k, $w); - ) - ); -} - -impl HashEngine { - // Algorithm copied from libsecp256k1 - pub(crate) fn process_block(&mut self) { - debug_assert_eq!(self.buffer.len(), BLOCK_SIZE); - - let mut w = [0u64; 16]; - for (w_val, buff_bytes) in w.iter_mut().zip(self.buffer.chunks_exact(8)) { - *w_val = u64::from_be_bytes(buff_bytes.try_into().expect("8 byte slice")); - } - - let mut a = self.h[0]; - let mut b = self.h[1]; - let mut c = self.h[2]; - let mut d = self.h[3]; - let mut e = self.h[4]; - let mut f = self.h[5]; - let mut g = self.h[6]; - let mut h = self.h[7]; - - round!(a, b, c, d, e, f, g, h, 0x428a2f98d728ae22, w[0]); - round!(h, a, b, c, d, e, f, g, 0x7137449123ef65cd, w[1]); - round!(g, h, a, b, c, d, e, f, 0xb5c0fbcfec4d3b2f, w[2]); - round!(f, g, h, a, b, c, d, e, 0xe9b5dba58189dbbc, w[3]); - round!(e, f, g, h, a, b, c, d, 0x3956c25bf348b538, w[4]); - round!(d, e, f, g, h, a, b, c, 0x59f111f1b605d019, w[5]); - round!(c, d, e, f, g, h, a, b, 0x923f82a4af194f9b, w[6]); - round!(b, c, d, e, f, g, h, a, 0xab1c5ed5da6d8118, w[7]); - round!(a, b, c, d, e, f, g, h, 0xd807aa98a3030242, w[8]); - round!(h, a, b, c, d, e, f, g, 0x12835b0145706fbe, w[9]); - round!(g, h, a, b, c, d, e, f, 0x243185be4ee4b28c, w[10]); - round!(f, g, h, a, b, c, d, e, 0x550c7dc3d5ffb4e2, w[11]); - round!(e, f, g, h, a, b, c, d, 0x72be5d74f27b896f, w[12]); - round!(d, e, f, g, h, a, b, c, 0x80deb1fe3b1696b1, w[13]); - round!(c, d, e, f, g, h, a, b, 0x9bdc06a725c71235, w[14]); - round!(b, c, d, e, f, g, h, a, 0xc19bf174cf692694, w[15]); - - round!(a, b, c, d, e, f, g, h, 0xe49b69c19ef14ad2, w[0], w[14], w[9], w[1]); - round!(h, a, b, c, d, e, f, g, 0xefbe4786384f25e3, w[1], w[15], w[10], w[2]); - round!(g, h, a, b, c, d, e, f, 0x0fc19dc68b8cd5b5, w[2], w[0], w[11], w[3]); - round!(f, g, h, a, b, c, d, e, 0x240ca1cc77ac9c65, w[3], w[1], w[12], w[4]); - round!(e, f, g, h, a, b, c, d, 0x2de92c6f592b0275, w[4], w[2], w[13], w[5]); - round!(d, e, f, g, h, a, b, c, 0x4a7484aa6ea6e483, w[5], w[3], w[14], w[6]); - round!(c, d, e, f, g, h, a, b, 0x5cb0a9dcbd41fbd4, w[6], w[4], w[15], w[7]); - round!(b, c, d, e, f, g, h, a, 0x76f988da831153b5, w[7], w[5], w[0], w[8]); - round!(a, b, c, d, e, f, g, h, 0x983e5152ee66dfab, w[8], w[6], w[1], w[9]); - round!(h, a, b, c, d, e, f, g, 0xa831c66d2db43210, w[9], w[7], w[2], w[10]); - round!(g, h, a, b, c, d, e, f, 0xb00327c898fb213f, w[10], w[8], w[3], w[11]); - round!(f, g, h, a, b, c, d, e, 0xbf597fc7beef0ee4, w[11], w[9], w[4], w[12]); - round!(e, f, g, h, a, b, c, d, 0xc6e00bf33da88fc2, w[12], w[10], w[5], w[13]); - round!(d, e, f, g, h, a, b, c, 0xd5a79147930aa725, w[13], w[11], w[6], w[14]); - round!(c, d, e, f, g, h, a, b, 0x06ca6351e003826f, w[14], w[12], w[7], w[15]); - round!(b, c, d, e, f, g, h, a, 0x142929670a0e6e70, w[15], w[13], w[8], w[0]); - - round!(a, b, c, d, e, f, g, h, 0x27b70a8546d22ffc, w[0], w[14], w[9], w[1]); - round!(h, a, b, c, d, e, f, g, 0x2e1b21385c26c926, w[1], w[15], w[10], w[2]); - round!(g, h, a, b, c, d, e, f, 0x4d2c6dfc5ac42aed, w[2], w[0], w[11], w[3]); - round!(f, g, h, a, b, c, d, e, 0x53380d139d95b3df, w[3], w[1], w[12], w[4]); - round!(e, f, g, h, a, b, c, d, 0x650a73548baf63de, w[4], w[2], w[13], w[5]); - round!(d, e, f, g, h, a, b, c, 0x766a0abb3c77b2a8, w[5], w[3], w[14], w[6]); - round!(c, d, e, f, g, h, a, b, 0x81c2c92e47edaee6, w[6], w[4], w[15], w[7]); - round!(b, c, d, e, f, g, h, a, 0x92722c851482353b, w[7], w[5], w[0], w[8]); - round!(a, b, c, d, e, f, g, h, 0xa2bfe8a14cf10364, w[8], w[6], w[1], w[9]); - round!(h, a, b, c, d, e, f, g, 0xa81a664bbc423001, w[9], w[7], w[2], w[10]); - round!(g, h, a, b, c, d, e, f, 0xc24b8b70d0f89791, w[10], w[8], w[3], w[11]); - round!(f, g, h, a, b, c, d, e, 0xc76c51a30654be30, w[11], w[9], w[4], w[12]); - round!(e, f, g, h, a, b, c, d, 0xd192e819d6ef5218, w[12], w[10], w[5], w[13]); - round!(d, e, f, g, h, a, b, c, 0xd69906245565a910, w[13], w[11], w[6], w[14]); - round!(c, d, e, f, g, h, a, b, 0xf40e35855771202a, w[14], w[12], w[7], w[15]); - round!(b, c, d, e, f, g, h, a, 0x106aa07032bbd1b8, w[15], w[13], w[8], w[0]); - - round!(a, b, c, d, e, f, g, h, 0x19a4c116b8d2d0c8, w[0], w[14], w[9], w[1]); - round!(h, a, b, c, d, e, f, g, 0x1e376c085141ab53, w[1], w[15], w[10], w[2]); - round!(g, h, a, b, c, d, e, f, 0x2748774cdf8eeb99, w[2], w[0], w[11], w[3]); - round!(f, g, h, a, b, c, d, e, 0x34b0bcb5e19b48a8, w[3], w[1], w[12], w[4]); - round!(e, f, g, h, a, b, c, d, 0x391c0cb3c5c95a63, w[4], w[2], w[13], w[5]); - round!(d, e, f, g, h, a, b, c, 0x4ed8aa4ae3418acb, w[5], w[3], w[14], w[6]); - round!(c, d, e, f, g, h, a, b, 0x5b9cca4f7763e373, w[6], w[4], w[15], w[7]); - round!(b, c, d, e, f, g, h, a, 0x682e6ff3d6b2b8a3, w[7], w[5], w[0], w[8]); - round!(a, b, c, d, e, f, g, h, 0x748f82ee5defb2fc, w[8], w[6], w[1], w[9]); - round!(h, a, b, c, d, e, f, g, 0x78a5636f43172f60, w[9], w[7], w[2], w[10]); - round!(g, h, a, b, c, d, e, f, 0x84c87814a1f0ab72, w[10], w[8], w[3], w[11]); - round!(f, g, h, a, b, c, d, e, 0x8cc702081a6439ec, w[11], w[9], w[4], w[12]); - round!(e, f, g, h, a, b, c, d, 0x90befffa23631e28, w[12], w[10], w[5], w[13]); - round!(d, e, f, g, h, a, b, c, 0xa4506cebde82bde9, w[13], w[11], w[6], w[14]); - round!(c, d, e, f, g, h, a, b, 0xbef9a3f7b2c67915, w[14], w[12], w[7], w[15]); - round!(b, c, d, e, f, g, h, a, 0xc67178f2e372532b, w[15], w[13], w[8], w[0]); - - round!(a, b, c, d, e, f, g, h, 0xca273eceea26619c, w[0], w[14], w[9], w[1]); - round!(h, a, b, c, d, e, f, g, 0xd186b8c721c0c207, w[1], w[15], w[10], w[2]); - round!(g, h, a, b, c, d, e, f, 0xeada7dd6cde0eb1e, w[2], w[0], w[11], w[3]); - round!(f, g, h, a, b, c, d, e, 0xf57d4f7fee6ed178, w[3], w[1], w[12], w[4]); - round!(e, f, g, h, a, b, c, d, 0x06f067aa72176fba, w[4], w[2], w[13], w[5]); - round!(d, e, f, g, h, a, b, c, 0x0a637dc5a2c898a6, w[5], w[3], w[14], w[6]); - round!(c, d, e, f, g, h, a, b, 0x113f9804bef90dae, w[6], w[4], w[15], w[7]); - round!(b, c, d, e, f, g, h, a, 0x1b710b35131c471b, w[7], w[5], w[0], w[8]); - round!(a, b, c, d, e, f, g, h, 0x28db77f523047d84, w[8], w[6], w[1], w[9]); - round!(h, a, b, c, d, e, f, g, 0x32caab7b40c72493, w[9], w[7], w[2], w[10]); - round!(g, h, a, b, c, d, e, f, 0x3c9ebe0a15c9bebc, w[10], w[8], w[3], w[11]); - round!(f, g, h, a, b, c, d, e, 0x431d67c49c100d4c, w[11], w[9], w[4], w[12]); - round!(e, f, g, h, a, b, c, d, 0x4cc5d4becb3e42b6, w[12], w[10], w[5], w[13]); - round!(d, e, f, g, h, a, b, c, 0x597f299cfc657e2a, w[13], w[11], w[6], w[14]); - round!(c, d, e, f, g, h, a, b, 0x5fcb6fab3ad6faec, w[14], w[12], w[7], w[15]); - round!(b, c, d, e, f, g, h, a, 0x6c44198c4a475817, w[15], w[13], w[8], w[0]); - - self.h[0] = self.h[0].wrapping_add(a); - self.h[1] = self.h[1].wrapping_add(b); - self.h[2] = self.h[2].wrapping_add(c); - self.h[3] = self.h[3].wrapping_add(d); - self.h[4] = self.h[4].wrapping_add(e); - self.h[5] = self.h[5].wrapping_add(f); - self.h[6] = self.h[6].wrapping_add(g); - self.h[7] = self.h[7].wrapping_add(h); - } -} - #[cfg(test)] mod tests { #[test]