keyfork/keyfork-mnemonic-util/src/lib.rs

use std::{collections::HashMap, str::FromStr, sync::Arc, error::Error, fmt::Display};

use sha2::{Digest, Sha256, Sha512};
use pbkdf2::pbkdf2;
use hmac::Hmac;

/// The error type representing a failure to create a [`Mnemonic`]. These errors only occur during
/// [`Mnemonic`] creation.
#[derive(Debug, Clone)]
pub enum MnemonicGenerationError {
    /// The amount of bits passed to a mnemonic must be divisible by 32.
    InvalidByteCount(usize),

    /// The length of a mnemonic in bits must be within the BIP-0039 range, and supported by the
    /// library. Currently, only 128, 192 (for testing purposes), and 256 are supported.
    InvalidByteLength(usize),

    /// Invalid length resulting from PBKDF2.
    InvalidPbkdf2Length,
}

impl Display for MnemonicGenerationError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            MnemonicGenerationError::InvalidByteCount(count) => {
                write!(f, "Invalid byte count: {count}, must be divisible by 8")
            }
            MnemonicGenerationError::InvalidByteLength(count) => {
                write!(f, "Invalid byte length: {count}, must be 128 or 256")
            }
            MnemonicGenerationError::InvalidPbkdf2Length => {
                f.write_str("Invalid length from PBKDF2")
            },
        }
    }
}

impl Error for MnemonicGenerationError {}

/// A BIP-0039 compatible list of words.
#[derive(Debug, Clone)]
pub struct Wordlist(Vec<String>);

impl Default for Wordlist {
    /// Returns the English wordlist in the Bitcoin BIP-0039 specification.
    fn default() -> Self {
        // TODO: English is the only supported language.
        let wordlist_file = include_str!("data/wordlist.txt");
        Wordlist(
            wordlist_file
                .lines()
                // skip 1: comment at top of file to point to BIP-0039 source.
                .skip(1)
                .map(|x| x.trim().to_string())
                .collect(),
        )
    }
}

impl Wordlist {
    /// Return an Arced version of the Wordlist
    #[allow(clippy::must_use_candidate)]
    pub fn arc(self) -> Arc<Self> {
        Arc::new(self)
    }

    /// Given an index, get a word from the wordlist.
    fn get_word(&self, word: usize) -> Option<&String> {
        self.0.get(word)
    }

    fn inner(&self) -> &Vec<String> {
        &self.0
    }

    #[cfg(test)]
    fn into_inner(self) -> Vec<String> {
        self.0
    }
}

/// A BIP-0039 mnemonic with reference to a [`Wordlist`].
#[derive(Debug, Clone)]
pub struct Mnemonic {
    words: Vec<usize>,
    wordlist: Arc<Wordlist>,
}

impl Display for Mnemonic {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let mut iter = self.words.iter().peekable();
        while let Some(word_index) = iter.next() {
            let word = self.wordlist.get_word(*word_index).expect("word");
            write!(f, "{word}")?;
            if iter.peek().is_some() {
                write!(f, " ")?;
            }
        }
        Ok(())
    }
}

/// The error type representing a failure to parse a [`Mnemonic`]. These errors only occur during
/// [`Mnemonic`] creation.
#[derive(Debug, Clone)]
pub enum MnemonicFromStrError {
    /// The amount of words used to parse a mnemonic was not correct.
    InvalidWordCount(usize),

    /// One of the words used to generate the mnemonic was not found in the default wordlist.
    InvalidWord(usize),
}

impl Display for MnemonicFromStrError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            MnemonicFromStrError::InvalidWordCount(count) => {
                write!(f, "Incorrect word count: {count}")
            }
            MnemonicFromStrError::InvalidWord(index) => {
                write!(f, "Unknown word at index: {index}")
            }
        }
    }
}

impl Error for MnemonicFromStrError {}

impl FromStr for Mnemonic {
    type Err = MnemonicFromStrError;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        let wordlist = Wordlist::default().arc();
        let hm: HashMap<&str, usize> = wordlist
            .inner()
            .iter()
            .enumerate()
            .map(|(a, b)| (b.as_str(), a))
            .collect();
        let mut words: Vec<usize> = Vec::with_capacity(24);
        for (index, word) in s.split_whitespace().enumerate() {
            match hm.get(&word) {
                Some(id) => words.push(*id),
                None => return Err(MnemonicFromStrError::InvalidWord(index)),
            }
        }
        if ![12, 24].contains(&words.len()) {
            return Err(MnemonicFromStrError::InvalidWordCount(words.len()));
        }
        Ok(Mnemonic { words, wordlist })
    }
}

impl Mnemonic {
    /// Generate a [`Mnemonic`] from the provided entropy and [`Wordlist`].
    ///
    /// # Errors
    /// An error may be returned if the entropy is not within the acceptable lengths.
    pub fn from_entropy(
        bytes: &[u8],
        wordlist: Arc<Wordlist>,
    ) -> Result<Mnemonic, MnemonicGenerationError> {
        let bit_count = bytes.len() * 8;

        if bit_count % 32 != 0 {
            return Err(MnemonicGenerationError::InvalidByteCount(bit_count));
        }
        // 192 supported for test suite
        if ![128, 192, 256].contains(&bit_count) {
            return Err(MnemonicGenerationError::InvalidByteLength(bit_count));
        }

        let mut bits = vec![false; bit_count + bit_count / 32];

        for byte_index in 0..bit_count / 8 {
            for bit_index in 0..8 {
                bits[byte_index * 8 + bit_index] = (bytes[byte_index] & (1 << (7 - bit_index))) > 0;
            }
        }

        let mut hasher = Sha256::new();
        hasher.update(bytes);
        let hash = hasher.finalize().to_vec();
        for check_bit in 0..bit_count / 32 {
            bits[bit_count + check_bit] = (hash[check_bit / 8] & (1 << (7 - (check_bit % 8)))) > 0;
        }

        let words = bits
            // NOTE: Tested with all approved variants. Always divisible by 11.
            .chunks_exact(11)
            .map(|chunk| {
                let mut num = 0usize;
                for i in 0..11 {
                    num += usize::from(chunk[10 - i]) << i;
                }
                num
            })
            .collect::<Vec<_>>();

        Ok(Mnemonic { words, wordlist })
    }

    pub fn entropy(&self) -> Vec<u8> {
        let mut bits = vec![false; self.words.len() * 11];
        for (index, word) in self.words.iter().enumerate() {
            for bit in 0..11 {
                bits[index * 11 + bit] = (word & (1 << (10 - bit))) > 0;
            }
        }

        // remove checksum bits
        bits.truncate(bits.len() - bits.len() % 32);

        bits.chunks_exact(8)
            .map(|chunk| {
                let mut num = 0u8;
                for i in 0..8 {
                    num += u8::from(chunk[7 - i]) << i;
                }
                num
            })
            .collect()
    }

    pub fn seed<'a>(&self, passphrase: impl Into<Option<&'a str>>) -> Result<Vec<u8>, MnemonicGenerationError> {
        let passphrase = passphrase.into();

        let mut seed = [0u8; 64];
        let mnemonic = self.to_string();
        let salt = ["mnemonic", passphrase.unwrap_or("")].join("");
        pbkdf2::<Hmac<Sha512>>(mnemonic.as_bytes(), salt.as_bytes(), 2048, &mut seed)
            .map_err(|_| MnemonicGenerationError::InvalidPbkdf2Length)?;
        Ok(seed.to_vec())
    }

    pub fn into_inner(self) -> (Vec<usize>, Arc<Wordlist>) {
        (self.words, self.wordlist)
    }
}

#[cfg(test)]
mod tests {
    use std::{collections::HashSet, fs::File, io::Read};

    use super::*;

    #[test]
    fn wordlist_word_count_correct() {
        let wordlist = Wordlist::default().into_inner();
        assert_eq!(
            wordlist.len(),
            2usize.pow(11),
            "Wordlist did not include correct word count"
        );
    }

    #[test]
    fn reproduces_its_own_seed() {
        let mut random_handle = File::open("/dev/random").unwrap();
        let entropy = &mut [0u8; 256 / 8];
        random_handle.read_exact(&mut entropy[..]).unwrap();
        let wordlist = Wordlist::default().arc();
        let mnemonic = super::Mnemonic::from_entropy(&entropy[..256 / 8], wordlist).unwrap();
        let new_entropy = mnemonic.entropy();
        assert_eq!(&new_entropy, entropy);
    }

    #[test]
    fn conforms_to_trezor_tests() {
        let content = include_str!("data/vectors.json");
        let jsonobj: serde_json::Value = serde_json::from_str(content).unwrap();
        let wordlist = Wordlist::default().arc();

        for test in jsonobj["english"].as_array().unwrap() {
            let [ref hex_, ref seed, ..] = test.as_array().unwrap()[..] else {
                panic!("bad test: {test}");
            };
            let hex = hex::decode(hex_.as_str().unwrap()).unwrap();

            let mnemonic = Mnemonic::from_entropy(&hex, wordlist.clone()).unwrap();

            assert_eq!(mnemonic.to_string(), seed.as_str().unwrap());
        }
    }

    #[test]
    fn matches_bip39_crate() {
        let mut random_handle = File::open("/dev/random").unwrap();
        let entropy = &mut [0u8; 256 / 8];
        random_handle.read_exact(&mut entropy[..]).unwrap();
        let wordlist = Wordlist::default().arc();
        let my_mnemonic = super::Mnemonic::from_entropy(&entropy[..256 / 8], wordlist).unwrap();
        let their_mnemonic = bip39::Mnemonic::from_entropy(&entropy[..256 / 8]).unwrap();
        assert_eq!(my_mnemonic.to_string(), their_mnemonic.to_string());
        assert_eq!(my_mnemonic.seed(None).unwrap(), their_mnemonic.to_seed(""));
        assert_eq!(my_mnemonic.seed("testing").unwrap(), their_mnemonic.to_seed("testing"));
        assert_ne!(my_mnemonic.seed("test1").unwrap(), their_mnemonic.to_seed("test2"));
    }

    #[test]
    fn count_rate_of_duplicate_words() {
        let tests = 100_000;
        let mut count = 0.;
        let entropy = &mut [0u8; 256 / 8];
        let wordlist = Wordlist::default().arc();
        let mut random = std::fs::File::open("/dev/urandom").unwrap();
        let mut hs = HashSet::<usize>::with_capacity(24);

        for _ in 0..tests {
            random.read_exact(&mut entropy[..]).unwrap();
            let mnemonic = Mnemonic::from_entropy(&entropy[..256 / 8], wordlist.clone()).unwrap();
            let (words, _) = mnemonic.into_inner();
            hs.clear();
            hs.extend(words);
            if hs.len() != 24 {
                count += 1.;
            }
        }

        // NOTE: Birthday problem math is: 0.126532
        // Set values to (about) 1 below, 1 above
        // Source: https://en.wikipedia.org/wiki/Birthday_problem
        let min = 11.5;
        let max = 13.5;
        assert!(
            count > f64::from(tests) * min / 100.,
            "{count} probability should be more than {min}%: {}",
            count / f64::from(tests)
        );
        assert!(
            count < f64::from(tests) * max / 100.,
            "{count} probability should be more than {max}%: {}",
            count / f64::from(tests)
        );
    }
}