//! Zero-dependency mnemonic encoding and decoding of data. //! //! Mnemonics can be used to safely encode data of 32, 48, and 64 bytes as a phrase: //! //! ```rust //! use keyfork_mnemonic_util::Mnemonic; //! let data = b"Hello, world! I am a mnemonic :)"; //! assert_eq!(data.len(), 32); //! let mnemonic = Mnemonic::try_from_slice(data).unwrap(); //! println!("Our mnemonic is: {mnemonic}"); //! ``` //! //! A mnemonic can also be parsed from a string: //! //! ```rust //! use keyfork_mnemonic_util::Mnemonic; //! use std::str::FromStr; //! //! let data = b"Hello, world! I am a mnemonic :)"; //! let words = "embody clock brand tattoo search desert saddle eternal //! goddess animal banner dolphin bitter mother loyal asset //! hover clock forward system normal mosquito trim credit"; //! let mnemonic = Mnemonic::from_str(words).unwrap(); //! assert_eq!(&data[..], mnemonic.as_bytes()); //! ``` //! //! Mnemonics can also be used to store data of other lengths, but such functionality is not //! verified to be safe: //! //! ```rust //! use keyfork_mnemonic_util::Mnemonic; //! let data = b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; //! let mnemonic = unsafe { Mnemonic::from_raw_bytes(data.as_slice()) }; //! let mnemonic_text = mnemonic.to_string(); //! ``` //! //! If given an invalid length, undefined behavior may follow, or code may panic. //! //! ```rust,should_panic //! use keyfork_mnemonic_util::Mnemonic; //! use std::str::FromStr; //! //! // NOTE: Data is of invalid length, 31 //! let data = b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; //! let mnemonic = unsafe { Mnemonic::from_raw_bytes(data.as_slice()) }; //! let mnemonic_text = mnemonic.to_string(); //! // NOTE: panic happens here //! let new_mnemonic = Mnemonic::from_str(&mnemonic_text).unwrap(); //! ``` use std::{error::Error, fmt::Display, marker::PhantomData, str::FromStr, sync::OnceLock}; use keyfork_bug::bug; use hmac::Hmac; use pbkdf2::pbkdf2; use sha2::{Digest, Sha256, Sha512}; /// The error type representing a failure to create a [`Mnemonic`]. These errors only occur during /// [`Mnemonic`] creation. #[derive(Debug, Clone)] pub enum MnemonicGenerationError { /// The amount of bits passed to a mnemonic must be divisible by 32. InvalidByteCount(usize), /// The length of a mnemonic in bits must be within the BIP-0039 range, and supported by the /// library. Currently, only 128, 192 (for testing purposes), and 256 are supported. InvalidByteLength(usize), /// Invalid length resulting from PBKDF2. InvalidPbkdf2Length, } impl Display for MnemonicGenerationError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { MnemonicGenerationError::InvalidByteCount(count) => { write!(f, "Invalid byte count: {count}, must be divisible by 8") } MnemonicGenerationError::InvalidByteLength(count) => { write!(f, "Invalid byte length: {count}, must be 128 or 256") } MnemonicGenerationError::InvalidPbkdf2Length => { f.write_str("Invalid length from PBKDF2") } } } } impl Error for MnemonicGenerationError {} /// A trait representing a BIP-0039 wordlist, of 2048 words, with each word having a unique first /// three letters. pub trait Wordlist: std::fmt::Debug { /// Get a reference to a [`std::sync::OnceLock`] Self. fn get_singleton<'a>() -> &'a Self; /// Return a representation of the words in the wordlist as an array of [`str`]. fn to_str_array(&self) -> [&str; 2048]; } /// A wordlist for the English language, from the BIP-0039 dataset. #[derive(Debug)] pub struct English { words: [String; 2048], } static ENGLISH: OnceLock = OnceLock::new(); impl Wordlist for English { fn get_singleton<'a>() -> &'a Self { ENGLISH.get_or_init(|| { let wordlist_file = include_str!("data/wordlist.txt"); let mut words = wordlist_file.lines().skip(1).map(|x| x.trim().to_string()); English { words: std::array::from_fn(|_| { words.next().expect(bug!("wordlist {} should have 2048 words")) }), } }) } fn to_str_array(&self) -> [&str; 2048] { std::array::from_fn(|i| self.words[i].as_str()) } } struct AssertValidMnemonicSize; impl AssertValidMnemonicSize { const OK_CHUNKS: () = assert!(N % 4 == 0, "bytes must be a length divisible by 4"); const OK_SIZE: () = assert!(N <= 1024, "bytes must be less-or-equal 1024"); } /// A BIP-0039 mnemonic with reference to a [`Wordlist`]. #[derive(Debug, Clone, PartialEq, Eq)] pub struct MnemonicBase { data: Vec, marker: PhantomData, } /// A default Mnemonic using the English language. pub type Mnemonic = MnemonicBase; impl Display for MnemonicBase where W: Wordlist, { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let wordlist = W::get_singleton(); let words = wordlist.to_str_array(); let mut iter = self .words() .into_iter() .filter_map(|word| words.get(word)) .peekable(); while let Some(word) = iter.next() { f.write_str(word)?; if iter.peek().is_some() { f.write_str(" ")?; } } Ok(()) } } /// The error type representing a failure to parse a [`Mnemonic`]. These errors only occur during /// [`Mnemonic`] creation. #[derive(Debug, Clone)] pub enum MnemonicFromStrError { /// The amount of words used to parse a mnemonic was not correct. InvalidWordCount(usize), /// One of the words used to generate the mnemonic was not found in the default wordlist. InvalidWord(usize), /// The checksum for the mnemonic did not match the given words. InvalidChecksum, } impl Display for MnemonicFromStrError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str("Mnemonic error: ")?; match self { MnemonicFromStrError::InvalidWordCount(count) => { write!(f, "Incorrect word count: {count}") } MnemonicFromStrError::InvalidWord(index) => { write!(f, "Unknown word at index: {index}") } MnemonicFromStrError::InvalidChecksum => { f.write_str("Checksum of data did not match expected value") } } } } impl Error for MnemonicFromStrError {} impl FromStr for MnemonicBase where W: Wordlist, { type Err = MnemonicFromStrError; fn from_str(s: &str) -> Result { let wordlist = W::get_singleton(); let wordlist_words = wordlist.to_str_array(); let words: Vec<_> = s.split_whitespace().collect(); let mut usize_words = vec![]; let mut bits = vec![false; words.len() * 11]; for (index, word) in words.iter().enumerate() { let word = wordlist_words .iter() .position(|w| w == word) .ok_or(MnemonicFromStrError::InvalidWord(index))?; usize_words.push(word); for bit in 0..11 { bits[index * 11 + bit] = (word & (1 << (10 - bit))) > 0; } } let mut checksum_bits = vec![false; bits.len() - (bits.len() * 32 / 33)]; checksum_bits.copy_from_slice(&bits[bits.len() * 32 / 33..]); // remove checksum bits bits.truncate(bits.len() * 32 / 33); // bits.truncate(bits.len() - bits.len() % 32); let data: Vec = bits .chunks_exact(8) .map(|chunk| { let mut num = 0u8; for i in 0..8 { num += u8::from(chunk[7 - i]) << i; } num }) .collect(); let mut hasher = Sha256::new(); hasher.update(&data); let hash = hasher.finalize().to_vec(); for (i, bit) in checksum_bits.iter().enumerate() { if !hash[i / 8] & (1 << (7 - (i % 8))) == u8::from(*bit) { return Err(MnemonicFromStrError::InvalidChecksum); } } Ok(MnemonicBase { data, marker: PhantomData, }) } } impl MnemonicBase where W: Wordlist, { /// Generate a [`Mnemonic`] from the provided data and [`Wordlist`]. The data is expected to be /// of 128, 192, or 256 bits, as per BIP-0039. /// /// # Errors /// An error may be returned if the data is not within the expected lengths. /// /// # Examples /// ```rust /// use keyfork_mnemonic_util::Mnemonic; /// let data = b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; /// let mnemonic = Mnemonic::try_from_slice(data.as_slice()).unwrap(); /// ``` pub fn try_from_slice(bytes: &[u8]) -> Result, MnemonicGenerationError> { let bit_count = bytes.len() * 8; if bit_count % 32 != 0 { return Err(MnemonicGenerationError::InvalidByteCount(bit_count)); } // 192 supported for test suite if ![128, 192, 256].contains(&bit_count) { return Err(MnemonicGenerationError::InvalidByteLength(bit_count)); } Ok( Self::from_raw_bytes(bytes) ) } /// Generate a [`Mnemonic`] from the provided data and [`Wordlist`]. The data may be of a size /// of a factor of 4, up to 1024 bytes. /// /// ```rust /// use keyfork_mnemonic_util::Mnemonic; /// let data = b"hello world!"; /// let mnemonic = Mnemonic::from_array(*data); /// ``` /// /// If an invalid size is requested, the code will fail to compile: /// /// ```rust,compile_fail /// use keyfork_mnemonic_util::Mnemonic; /// let mnemonic = Mnemonic::from_array([0u8; 53]); /// ``` /// /// ```rust,compile_fail /// use keyfork_mnemonic_util::Mnemonic; /// let mnemonic = Mnemonic::from_array([0u8; 1024 + 4]); /// ``` pub fn from_array(bytes: [u8; N]) -> MnemonicBase { #[allow(clippy::let_unit_value)] { let () = AssertValidMnemonicSize::::OK_CHUNKS; let () = AssertValidMnemonicSize::::OK_SIZE; } Self::from_raw_bytes(&bytes) } /// Create a Mnemonic using an arbitrary length of given data. The length does not need to /// conform to BIP-0039 standards, but should be a multiple of 32 bits or 4 bytes. /// /// # Panics /// This function can potentially produce mnemonics that are not BIP-0039 compliant or can't /// properly be encoded as a mnemonic. It is assumed the caller asserts the byte count is `% 4 /// == 0`. If the assumption is incorrect, code may panic. The /// [`MnemonicBase::from_array`] function may be used to generate entropy if the length of the /// data is known at compile-time. /// /// # Examples /// ```rust /// use keyfork_mnemonic_util::Mnemonic; /// let data = b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; /// let mnemonic = unsafe { Mnemonic::from_raw_bytes(data.as_slice()) }; /// let mnemonic_text = mnemonic.to_string(); /// ``` /// /// If given an invalid length, undefined behavior may follow, or code may panic. /// /// ```rust,should_panic /// use keyfork_mnemonic_util::Mnemonic; /// use std::str::FromStr; /// /// // NOTE: Data is of invalid length, 31 /// let data = b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; /// let mnemonic = unsafe { Mnemonic::from_raw_bytes(data.as_slice()) }; /// ``` pub fn from_raw_bytes(bytes: &[u8]) -> MnemonicBase { assert!(bytes.len() % 4 == 0); assert!(bytes.len() <= 1024); MnemonicBase { data: bytes.to_vec(), marker: PhantomData, } } /// Create a Mnemonic using an arbitrary length of given data. The length does not need to /// conform to BIP-0039 standards, but should be a multiple of 32 bits or 4 bytes. /// /// # Safety /// /// This function can potentially produce mnemonics that are not BIP-0039 compliant or can't /// properly be encoded as a mnemonic. It is assumed the caller asserts the byte count is `% 4 /// == 0`. If the assumption is incorrect, code may panic. #[deprecated = "use Mnemonic::from_raw_bytes"] pub unsafe fn from_raw_entropy(bytes: &[u8]) -> MnemonicBase { MnemonicBase { data: bytes.to_vec(), marker: PhantomData, } } /// A view to internal representation of the decoded data. pub fn as_bytes(&self) -> &[u8] { &self.data } /// A view to internal representation of the decoded data. pub fn as_slice(&self) -> &[u8] { &self.data } /// A clone of the internal representation of the decoded data. pub fn to_bytes(&self) -> Vec { self.data.to_vec() } /// A clone of the internal representation of the decoded data. pub fn to_vec(&self) -> Vec { self.data.to_vec() } /// Conver the Mnemonic into the internal representation of the decoded data. pub fn into_bytes(self) -> Vec { self.data } /// Conver the Mnemonic into the internal representation of the decoded data. pub fn into_vec(self) -> Vec { self.data } /// Clone the existing data. #[deprecated = "Use as_bytes(), to_bytes(), or into_bytes() instead"] pub fn entropy(&self) -> Vec { self.data.clone() } /// Create a BIP-0032 seed from the provided data and an optional passphrase. /// /// # Errors /// The method should not return an error. #[deprecated = "Use generate_seed() instead"] pub fn seed<'a>( &self, passphrase: impl Into>, ) -> Result, MnemonicGenerationError> { Ok(self.generate_seed(passphrase).to_vec()) } /// Create a BIP-0032 seed from the provided data and an optional passphrase. /// /// # Panics /// The function may panic if the HmacSha512 function returns an error. The only error the /// HmacSha512 function should return is an invalid length, which should not be possible. pub fn generate_seed<'a>(&self, passphrase: impl Into>) -> [u8; 64] { let passphrase = passphrase.into(); let mut seed = [0u8; 64]; let mnemonic = self.to_string(); let salt = ["mnemonic", passphrase.unwrap_or("")].join(""); pbkdf2::>(mnemonic.as_bytes(), salt.as_bytes(), 2048, &mut seed) .expect(bug!("HmacSha512 InvalidLength should be infallible")); seed } /// Encode the mnemonic into a list of integers 11 bits in length, matching the length of a /// BIP-0039 wordlist. pub fn words(&self) -> Vec { let bit_count = self.data.len() * 8; let mut bits = vec![false; bit_count + bit_count / 32]; for byte_index in 0..bit_count / 8 { for bit_index in 0..8 { bits[byte_index * 8 + bit_index] = (self.data[byte_index] & (1 << (7 - bit_index))) > 0; } } let mut hasher = Sha256::new(); hasher.update(&self.data); let hash = hasher.finalize().to_vec(); for check_bit in 0..bit_count / 32 { bits[bit_count + check_bit] = (hash[check_bit / 8] & (1 << (7 - (check_bit % 8)))) > 0; } // TODO: find a way to not have to collect to vec bits.chunks_exact(11) .peekable() .map(|chunk| { let mut num = 0usize; for i in 0..11 { num += usize::from(chunk[10 - i]) << i; } num }) .collect() } } impl MnemonicBase where W: Wordlist, { /// Generate a [`Mnemonic`] from the provided data and [`Wordlist`]. The data is expected to be /// of 128, 192, or 256 bits, as per BIP-0039. /// /// # Errors /// An error may be returned if the data is not within the expected lengths. #[deprecated = "use Mnemonic::try_from_slice"] pub fn from_bytes(bytes: &[u8]) -> Result, MnemonicGenerationError> { MnemonicBase::try_from_slice(bytes) } /// Generate a [`Mnemonic`] from the provided data and [`Wordlist`]. The data is expected to be /// of 128, 192, or 256 bits, as per BIP-0039. /// /// # Errors /// An error may be returned if the data is not within the expected lengths. #[deprecated = "use Mnemonic::try_from_slice"] pub fn from_entropy(bytes: &[u8]) -> Result, MnemonicGenerationError> { MnemonicBase::try_from_slice(bytes) } /// Generate a [`Mnemonic`] from the provided data and [`Wordlist`]. The data may be of a size /// of a factor of 4, up to 1024 bytes. /// #[deprecated = "Use Mnemonic::from_array"] pub fn from_nonstandard_bytes(bytes: [u8; N]) -> MnemonicBase { MnemonicBase::from_array(bytes) } } #[cfg(test)] mod tests { use std::{collections::HashSet, fs::File, io::Read}; use super::*; #[test] fn can_load_wordlist() { let _wordlist = English::get_singleton(); } #[test] fn reproduces_its_own_seed() { let mut random_handle = File::open("/dev/random").unwrap(); let entropy = &mut [0u8; 256 / 8]; random_handle.read_exact(&mut entropy[..]).unwrap(); let mnemonic = super::Mnemonic::try_from_slice(&entropy[..256 / 8]).unwrap(); let new_entropy = mnemonic.as_bytes(); assert_eq!(new_entropy, entropy); } #[test] fn conforms_to_trezor_tests() { let content = include_str!("data/vectors.json"); let jsonobj: serde_json::Value = serde_json::from_str(content).unwrap(); for test in jsonobj["english"].as_array().unwrap() { let [ref hex_, ref seed, ..] = test.as_array().unwrap()[..] else { panic!("bad test: {test}"); }; let hex = hex::decode(hex_.as_str().unwrap()).unwrap(); let mnemonic = Mnemonic::try_from_slice(&hex).unwrap(); assert_eq!(mnemonic.to_string(), seed.as_str().unwrap()); } } #[test] fn matches_bip39_crate() { let mut random_handle = File::open("/dev/random").unwrap(); let entropy = &mut [0u8; 256 / 8]; random_handle.read_exact(&mut entropy[..]).unwrap(); let my_mnemonic = Mnemonic::try_from_slice(&entropy[..256 / 8]).unwrap(); let their_mnemonic = bip39::Mnemonic::from_entropy(&entropy[..256 / 8]).unwrap(); assert_eq!(my_mnemonic.to_string(), their_mnemonic.to_string()); assert_eq!(my_mnemonic.generate_seed(None), their_mnemonic.to_seed("")); assert_eq!( my_mnemonic.generate_seed("testing"), their_mnemonic.to_seed("testing") ); assert_ne!( my_mnemonic.generate_seed("test1"), their_mnemonic.to_seed("test2") ); } #[test] fn count_rate_of_duplicate_words() { let tests = 100_000; let mut count = 0.; let entropy = &mut [0u8; 256 / 8]; let mut random = std::fs::File::open("/dev/urandom").unwrap(); let mut hs = HashSet::::with_capacity(24); for _ in 0..tests { random.read_exact(&mut entropy[..]).unwrap(); let mnemonic = Mnemonic::try_from_slice(&entropy[..256 / 8]).unwrap(); let words = mnemonic.words(); hs.clear(); hs.extend(words); if hs.len() != 24 { count += 1.; } } // NOTE: Birthday problem math is: 0.126532 // Set values to (about) 1 below, 1 above // Source: https://en.wikipedia.org/wiki/Birthday_problem let min = 11.5; let max = 13.5; assert!( count > f64::from(tests) * min / 100., "{count} probability should be more than {min}%: {}", count / f64::from(tests) ); assert!( count < f64::from(tests) * max / 100., "{count} probability should be more than {max}%: {}", count / f64::from(tests) ); } #[test] fn can_do_up_to_8192_bits() { let mut entropy = [0u8; 1024]; let mut random = std::fs::File::open("/dev/urandom").unwrap(); random.read_exact(&mut entropy[..]).unwrap(); let mnemonic = Mnemonic::from_array(entropy); let words = mnemonic.words(); assert_eq!(words.len(), 768); } #[test] #[should_panic] fn fails_over_8192_bits() { let entropy = &mut [0u8; 1024 + 4]; let mut random = std::fs::File::open("/dev/urandom").unwrap(); random.read_exact(&mut entropy[..]).unwrap(); let _mnemonic = Mnemonic::from_raw_bytes(&entropy[..]); } #[test] #[should_panic] fn fails_over_invalid_size() { let entropy = &mut [0u8; 255]; let mut random = std::fs::File::open("/dev/urandom").unwrap(); random.read_exact(&mut entropy[..]).unwrap(); let _mnemonic = Mnemonic::from_raw_bytes(&entropy[..]); } }