keyfork/crates/util/keyfork-mnemonic-util/src/lib.rs

608 lines
21 KiB
Rust

//! Zero-dependency mnemonic encoding and decoding of data.
//!
//! Mnemonics can be used to safely encode data of 32, 48, and 64 bytes as a phrase:
//!
//! ```rust
//! use keyfork_mnemonic_util::Mnemonic;
//! let data = b"Hello, world! I am a mnemonic :)";
//! assert_eq!(data.len(), 32);
//! let mnemonic = Mnemonic::try_from_slice(data).unwrap();
//! println!("Our mnemonic is: {mnemonic}");
//! ```
//!
//! A mnemonic can also be parsed from a string:
//!
//! ```rust
//! use keyfork_mnemonic_util::Mnemonic;
//! use std::str::FromStr;
//!
//! let data = b"Hello, world! I am a mnemonic :)";
//! let words = "embody clock brand tattoo search desert saddle eternal
//! goddess animal banner dolphin bitter mother loyal asset
//! hover clock forward system normal mosquito trim credit";
//! let mnemonic = Mnemonic::from_str(words).unwrap();
//! assert_eq!(&data[..], mnemonic.as_bytes());
//! ```
//!
//! Mnemonics can also be used to store data of other lengths, but such functionality is not
//! verified to be safe:
//!
//! ```rust
//! use keyfork_mnemonic_util::Mnemonic;
//! let data = b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
//! let mnemonic = unsafe { Mnemonic::from_raw_bytes(data.as_slice()) };
//! let mnemonic_text = mnemonic.to_string();
//! ```
//!
//! If given an invalid length, undefined behavior may follow, or code may panic.
//!
//! ```rust,should_panic
//! use keyfork_mnemonic_util::Mnemonic;
//! use std::str::FromStr;
//!
//! // NOTE: Data is of invalid length, 31
//! let data = b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
//! let mnemonic = unsafe { Mnemonic::from_raw_bytes(data.as_slice()) };
//! let mnemonic_text = mnemonic.to_string();
//! // NOTE: panic happens here
//! let new_mnemonic = Mnemonic::from_str(&mnemonic_text).unwrap();
//! ```
use std::{error::Error, fmt::Display, marker::PhantomData, str::FromStr, sync::OnceLock};
use keyfork_bug::bug;
use hmac::Hmac;
use pbkdf2::pbkdf2;
use sha2::{Digest, Sha256, Sha512};
/// The error type representing a failure to create a [`Mnemonic`]. These errors only occur during
/// [`Mnemonic`] creation.
#[derive(Debug, Clone)]
pub enum MnemonicGenerationError {
/// The amount of bits passed to a mnemonic must be divisible by 32.
InvalidByteCount(usize),
/// The length of a mnemonic in bits must be within the BIP-0039 range, and supported by the
/// library. Currently, only 128, 192 (for testing purposes), and 256 are supported.
InvalidByteLength(usize),
/// Invalid length resulting from PBKDF2.
InvalidPbkdf2Length,
}
impl Display for MnemonicGenerationError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
MnemonicGenerationError::InvalidByteCount(count) => {
write!(f, "Invalid byte count: {count}, must be divisible by 8")
}
MnemonicGenerationError::InvalidByteLength(count) => {
write!(f, "Invalid byte length: {count}, must be 128 or 256")
}
MnemonicGenerationError::InvalidPbkdf2Length => {
f.write_str("Invalid length from PBKDF2")
}
}
}
}
impl Error for MnemonicGenerationError {}
/// A trait representing a BIP-0039 wordlist, of 2048 words, with each word having a unique first
/// three letters.
pub trait Wordlist: std::fmt::Debug {
/// Get a reference to a [`std::sync::OnceLock`] Self.
fn get_singleton<'a>() -> &'a Self;
/// Return a representation of the words in the wordlist as an array of [`str`].
fn to_str_array(&self) -> [&str; 2048];
}
/// A wordlist for the English language, from the BIP-0039 dataset.
#[derive(Debug)]
pub struct English {
words: [String; 2048],
}
static ENGLISH: OnceLock<English> = OnceLock::new();
impl Wordlist for English {
fn get_singleton<'a>() -> &'a Self {
ENGLISH.get_or_init(|| {
let wordlist_file = include_str!("data/wordlist.txt");
let mut words = wordlist_file.lines().skip(1).map(|x| x.trim().to_string());
English {
words: std::array::from_fn(|_| {
words.next().expect(bug!("wordlist {} should have 2048 words"))
}),
}
})
}
fn to_str_array(&self) -> [&str; 2048] {
std::array::from_fn(|i| self.words[i].as_str())
}
}
struct AssertValidMnemonicSize<const N: usize>;
impl<const N: usize> AssertValidMnemonicSize<N> {
const OK_CHUNKS: () = assert!(N % 4 == 0, "bytes must be a length divisible by 4");
const OK_SIZE: () = assert!(N <= 1024, "bytes must be less-or-equal 1024");
}
/// A BIP-0039 mnemonic with reference to a [`Wordlist`].
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct MnemonicBase<W: Wordlist> {
data: Vec<u8>,
marker: PhantomData<W>,
}
/// A default Mnemonic using the English language.
pub type Mnemonic = MnemonicBase<English>;
impl<W> Display for MnemonicBase<W>
where
W: Wordlist,
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let wordlist = W::get_singleton();
let words = wordlist.to_str_array();
let mut iter = self
.words()
.into_iter()
.filter_map(|word| words.get(word))
.peekable();
while let Some(word) = iter.next() {
f.write_str(word)?;
if iter.peek().is_some() {
f.write_str(" ")?;
}
}
Ok(())
}
}
/// The error type representing a failure to parse a [`Mnemonic`]. These errors only occur during
/// [`Mnemonic`] creation.
#[derive(Debug, Clone)]
pub enum MnemonicFromStrError {
/// The amount of words used to parse a mnemonic was not correct.
InvalidWordCount(usize),
/// One of the words used to generate the mnemonic was not found in the default wordlist.
InvalidWord(usize),
/// The checksum for the mnemonic did not match the given words.
InvalidChecksum,
}
impl Display for MnemonicFromStrError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("Mnemonic error: ")?;
match self {
MnemonicFromStrError::InvalidWordCount(count) => {
write!(f, "Incorrect word count: {count}")
}
MnemonicFromStrError::InvalidWord(index) => {
write!(f, "Unknown word at index: {index}")
}
MnemonicFromStrError::InvalidChecksum => {
f.write_str("Checksum of data did not match expected value")
}
}
}
}
impl Error for MnemonicFromStrError {}
impl<W> FromStr for MnemonicBase<W>
where
W: Wordlist,
{
type Err = MnemonicFromStrError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let wordlist = W::get_singleton();
let wordlist_words = wordlist.to_str_array();
let words: Vec<_> = s.split_whitespace().collect();
let mut usize_words = vec![];
let mut bits = vec![false; words.len() * 11];
for (index, word) in words.iter().enumerate() {
let word = wordlist_words
.iter()
.position(|w| w == word)
.ok_or(MnemonicFromStrError::InvalidWord(index))?;
usize_words.push(word);
for bit in 0..11 {
bits[index * 11 + bit] = (word & (1 << (10 - bit))) > 0;
}
}
let mut checksum_bits = vec![false; bits.len() - (bits.len() * 32 / 33)];
checksum_bits.copy_from_slice(&bits[bits.len() * 32 / 33..]);
// remove checksum bits
bits.truncate(bits.len() * 32 / 33);
// bits.truncate(bits.len() - bits.len() % 32);
let data: Vec<u8> = bits
.chunks_exact(8)
.map(|chunk| {
let mut num = 0u8;
for i in 0..8 {
num += u8::from(chunk[7 - i]) << i;
}
num
})
.collect();
let mut hasher = Sha256::new();
hasher.update(&data);
let hash = hasher.finalize().to_vec();
for (i, bit) in checksum_bits.iter().enumerate() {
if !hash[i / 8] & (1 << (7 - (i % 8))) == u8::from(*bit) {
return Err(MnemonicFromStrError::InvalidChecksum);
}
}
Ok(MnemonicBase {
data,
marker: PhantomData,
})
}
}
impl<W> MnemonicBase<W>
where
W: Wordlist,
{
/// Generate a [`Mnemonic`] from the provided data and [`Wordlist`]. The data is expected to be
/// of 128, 192, or 256 bits, as per BIP-0039.
///
/// # Errors
/// An error may be returned if the data is not within the expected lengths.
///
/// # Examples
/// ```rust
/// use keyfork_mnemonic_util::Mnemonic;
/// let data = b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
/// let mnemonic = Mnemonic::try_from_slice(data.as_slice()).unwrap();
/// ```
pub fn try_from_slice(bytes: &[u8]) -> Result<MnemonicBase<W>, MnemonicGenerationError> {
let bit_count = bytes.len() * 8;
if bit_count % 32 != 0 {
return Err(MnemonicGenerationError::InvalidByteCount(bit_count));
}
// 192 supported for test suite
if ![128, 192, 256].contains(&bit_count) {
return Err(MnemonicGenerationError::InvalidByteLength(bit_count));
}
Ok( Self::from_raw_bytes(bytes) )
}
/// Generate a [`Mnemonic`] from the provided data and [`Wordlist`]. The data may be of a size
/// of a factor of 4, up to 1024 bytes.
///
/// ```rust
/// use keyfork_mnemonic_util::Mnemonic;
/// let data = b"hello world!";
/// let mnemonic = Mnemonic::from_array(*data);
/// ```
///
/// If an invalid size is requested, the code will fail to compile:
///
/// ```rust,compile_fail
/// use keyfork_mnemonic_util::Mnemonic;
/// let mnemonic = Mnemonic::from_array([0u8; 53]);
/// ```
///
/// ```rust,compile_fail
/// use keyfork_mnemonic_util::Mnemonic;
/// let mnemonic = Mnemonic::from_array([0u8; 1024 + 4]);
/// ```
pub fn from_array<const N: usize>(bytes: [u8; N]) -> MnemonicBase<W> {
#[allow(clippy::let_unit_value)]
{
let () = AssertValidMnemonicSize::<N>::OK_CHUNKS;
let () = AssertValidMnemonicSize::<N>::OK_SIZE;
}
Self::from_raw_bytes(&bytes)
}
/// Create a Mnemonic using an arbitrary length of given data. The length does not need to
/// conform to BIP-0039 standards, but should be a multiple of 32 bits or 4 bytes.
///
/// # Panics
/// This function can potentially produce mnemonics that are not BIP-0039 compliant or can't
/// properly be encoded as a mnemonic. It is assumed the caller asserts the byte count is `% 4
/// == 0`. If the assumption is incorrect, code may panic. The
/// [`MnemonicBase::from_array`] function may be used to generate entropy if the length of the
/// data is known at compile-time.
///
/// # Examples
/// ```rust
/// use keyfork_mnemonic_util::Mnemonic;
/// let data = b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
/// let mnemonic = unsafe { Mnemonic::from_raw_bytes(data.as_slice()) };
/// let mnemonic_text = mnemonic.to_string();
/// ```
///
/// If given an invalid length, undefined behavior may follow, or code may panic.
///
/// ```rust,should_panic
/// use keyfork_mnemonic_util::Mnemonic;
/// use std::str::FromStr;
///
/// // NOTE: Data is of invalid length, 31
/// let data = b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
/// let mnemonic = unsafe { Mnemonic::from_raw_bytes(data.as_slice()) };
/// ```
pub fn from_raw_bytes(bytes: &[u8]) -> MnemonicBase<W> {
assert!(bytes.len() % 4 == 0);
assert!(bytes.len() <= 1024);
MnemonicBase {
data: bytes.to_vec(),
marker: PhantomData,
}
}
/// Create a Mnemonic using an arbitrary length of given data. The length does not need to
/// conform to BIP-0039 standards, but should be a multiple of 32 bits or 4 bytes.
///
/// # Safety
///
/// This function can potentially produce mnemonics that are not BIP-0039 compliant or can't
/// properly be encoded as a mnemonic. It is assumed the caller asserts the byte count is `% 4
/// == 0`. If the assumption is incorrect, code may panic.
#[deprecated = "use Mnemonic::from_raw_bytes"]
pub unsafe fn from_raw_entropy(bytes: &[u8]) -> MnemonicBase<W> {
MnemonicBase {
data: bytes.to_vec(),
marker: PhantomData,
}
}
/// A view to internal representation of the decoded data.
pub fn as_bytes(&self) -> &[u8] {
&self.data
}
/// A clone of the internal representation of the decoded data.
pub fn to_bytes(&self) -> Vec<u8> {
self.data.to_vec()
}
/// Conver the Mnemonic into the internal representation of the decoded data.
pub fn into_bytes(self) -> Vec<u8> {
self.data
}
/// Clone the existing data.
#[deprecated = "Use as_bytes(), to_bytes(), or into_bytes() instead"]
pub fn entropy(&self) -> Vec<u8> {
self.data.clone()
}
/// Create a BIP-0032 seed from the provided data and an optional passphrase.
///
/// # Errors
/// The method should not return an error.
#[deprecated = "Use generate_seed() instead"]
pub fn seed<'a>(
&self,
passphrase: impl Into<Option<&'a str>>,
) -> Result<Vec<u8>, MnemonicGenerationError> {
Ok(self.generate_seed(passphrase).to_vec())
}
/// Create a BIP-0032 seed from the provided data and an optional passphrase.
///
/// # Panics
/// The function may panic if the HmacSha512 function returns an error. The only error the
/// HmacSha512 function should return is an invalid length, which should not be possible.
pub fn generate_seed<'a>(&self, passphrase: impl Into<Option<&'a str>>) -> [u8; 64] {
let passphrase = passphrase.into();
let mut seed = [0u8; 64];
let mnemonic = self.to_string();
let salt = ["mnemonic", passphrase.unwrap_or("")].join("");
pbkdf2::<Hmac<Sha512>>(mnemonic.as_bytes(), salt.as_bytes(), 2048, &mut seed)
.expect(bug!("HmacSha512 InvalidLength should be infallible"));
seed
}
/// Encode the mnemonic into a list of integers 11 bits in length, matching the length of a
/// BIP-0039 wordlist.
pub fn words(&self) -> Vec<usize> {
let bit_count = self.data.len() * 8;
let mut bits = vec![false; bit_count + bit_count / 32];
for byte_index in 0..bit_count / 8 {
for bit_index in 0..8 {
bits[byte_index * 8 + bit_index] =
(self.data[byte_index] & (1 << (7 - bit_index))) > 0;
}
}
let mut hasher = Sha256::new();
hasher.update(&self.data);
let hash = hasher.finalize().to_vec();
for check_bit in 0..bit_count / 32 {
bits[bit_count + check_bit] = (hash[check_bit / 8] & (1 << (7 - (check_bit % 8)))) > 0;
}
// TODO: find a way to not have to collect to vec
bits.chunks_exact(11)
.peekable()
.map(|chunk| {
let mut num = 0usize;
for i in 0..11 {
num += usize::from(chunk[10 - i]) << i;
}
num
})
.collect()
}
}
impl<W> MnemonicBase<W>
where
W: Wordlist,
{
/// Generate a [`Mnemonic`] from the provided data and [`Wordlist`]. The data is expected to be
/// of 128, 192, or 256 bits, as per BIP-0039.
///
/// # Errors
/// An error may be returned if the data is not within the expected lengths.
#[deprecated = "use Mnemonic::try_from_slice"]
pub fn from_bytes(bytes: &[u8]) -> Result<MnemonicBase<W>, MnemonicGenerationError> {
MnemonicBase::try_from_slice(bytes)
}
/// Generate a [`Mnemonic`] from the provided data and [`Wordlist`]. The data is expected to be
/// of 128, 192, or 256 bits, as per BIP-0039.
///
/// # Errors
/// An error may be returned if the data is not within the expected lengths.
#[deprecated = "use Mnemonic::try_from_slice"]
pub fn from_entropy(bytes: &[u8]) -> Result<MnemonicBase<W>, MnemonicGenerationError> {
MnemonicBase::try_from_slice(bytes)
}
/// Generate a [`Mnemonic`] from the provided data and [`Wordlist`]. The data may be of a size
/// of a factor of 4, up to 1024 bytes.
///
#[deprecated = "Use Mnemonic::from_array"]
pub fn from_nonstandard_bytes<const N: usize>(bytes: [u8; N]) -> MnemonicBase<W> {
MnemonicBase::from_array(bytes)
}
}
#[cfg(test)]
mod tests {
use std::{collections::HashSet, fs::File, io::Read};
use super::*;
#[test]
fn can_load_wordlist() {
let _wordlist = English::get_singleton();
}
#[test]
fn reproduces_its_own_seed() {
let mut random_handle = File::open("/dev/random").unwrap();
let entropy = &mut [0u8; 256 / 8];
random_handle.read_exact(&mut entropy[..]).unwrap();
let mnemonic = super::Mnemonic::try_from_slice(&entropy[..256 / 8]).unwrap();
let new_entropy = mnemonic.as_bytes();
assert_eq!(new_entropy, entropy);
}
#[test]
fn conforms_to_trezor_tests() {
let content = include_str!("data/vectors.json");
let jsonobj: serde_json::Value = serde_json::from_str(content).unwrap();
for test in jsonobj["english"].as_array().unwrap() {
let [ref hex_, ref seed, ..] = test.as_array().unwrap()[..] else {
panic!("bad test: {test}");
};
let hex = hex::decode(hex_.as_str().unwrap()).unwrap();
let mnemonic = Mnemonic::try_from_slice(&hex).unwrap();
assert_eq!(mnemonic.to_string(), seed.as_str().unwrap());
}
}
#[test]
fn matches_bip39_crate() {
let mut random_handle = File::open("/dev/random").unwrap();
let entropy = &mut [0u8; 256 / 8];
random_handle.read_exact(&mut entropy[..]).unwrap();
let my_mnemonic = Mnemonic::try_from_slice(&entropy[..256 / 8]).unwrap();
let their_mnemonic = bip39::Mnemonic::from_entropy(&entropy[..256 / 8]).unwrap();
assert_eq!(my_mnemonic.to_string(), their_mnemonic.to_string());
assert_eq!(my_mnemonic.generate_seed(None), their_mnemonic.to_seed(""));
assert_eq!(
my_mnemonic.generate_seed("testing"),
their_mnemonic.to_seed("testing")
);
assert_ne!(
my_mnemonic.generate_seed("test1"),
their_mnemonic.to_seed("test2")
);
}
#[test]
fn count_rate_of_duplicate_words() {
let tests = 100_000;
let mut count = 0.;
let entropy = &mut [0u8; 256 / 8];
let mut random = std::fs::File::open("/dev/urandom").unwrap();
let mut hs = HashSet::<usize>::with_capacity(24);
for _ in 0..tests {
random.read_exact(&mut entropy[..]).unwrap();
let mnemonic = Mnemonic::try_from_slice(&entropy[..256 / 8]).unwrap();
let words = mnemonic.words();
hs.clear();
hs.extend(words);
if hs.len() != 24 {
count += 1.;
}
}
// NOTE: Birthday problem math is: 0.126532
// Set values to (about) 1 below, 1 above
// Source: https://en.wikipedia.org/wiki/Birthday_problem
let min = 11.5;
let max = 13.5;
assert!(
count > f64::from(tests) * min / 100.,
"{count} probability should be more than {min}%: {}",
count / f64::from(tests)
);
assert!(
count < f64::from(tests) * max / 100.,
"{count} probability should be more than {max}%: {}",
count / f64::from(tests)
);
}
#[test]
fn can_do_up_to_8192_bits() {
let mut entropy = [0u8; 1024];
let mut random = std::fs::File::open("/dev/urandom").unwrap();
random.read_exact(&mut entropy[..]).unwrap();
let mnemonic = Mnemonic::from_array(entropy);
let words = mnemonic.words();
assert_eq!(words.len(), 768);
}
#[test]
#[should_panic]
fn fails_over_8192_bits() {
let entropy = &mut [0u8; 1024 + 4];
let mut random = std::fs::File::open("/dev/urandom").unwrap();
random.read_exact(&mut entropy[..]).unwrap();
let _mnemonic = Mnemonic::from_raw_bytes(&entropy[..]);
}
#[test]
#[should_panic]
fn fails_over_invalid_size() {
let entropy = &mut [0u8; 255];
let mut random = std::fs::File::open("/dev/urandom").unwrap();
random.read_exact(&mut entropy[..]).unwrap();
let _mnemonic = Mnemonic::from_raw_bytes(&entropy[..]);
}
}