Rewrite keeping mnemonic in buffer

This commit is contained in:
Steven Roose 2020-06-13 18:01:25 +01:00
parent 4956854e6a
commit a399d5c4a5
No known key found for this signature in database
GPG Key ID: 2F2A88D7F8D68E87
18 changed files with 2679 additions and 2279 deletions

4
.editorconfig Normal file
View File

@ -0,0 +1,4 @@
# see https://editorconfig.org for more options, and setup instructions for yours editor
[*.rs]
indent_style = tabs

30
.travis.yml Normal file
View File

@ -0,0 +1,30 @@
language: rust
cache: cargo
matrix:
include:
- rust: stable
- rust: beta
- rust: nightly
env: BENCHES=true
- rust: 1.24.0
- rust: 1.22.0
env: ONLY_LOW_MEMORY=true
before_install:
- sudo apt-get -qq update
- sudo apt-get install -y binutils-dev libunwind8-dev
script:
- if not ${ONLY_LOW_MEMORY}; then cargo build --verbose; fi
- if not ${ONLY_LOW_MEMORY}; then cargo test --verbose; fi
- if not ${ONLY_LOW_MEMORY}; then cargo build --verbose --features rand,all-languages; fi
- if not ${ONLY_LOW_MEMORY}; then cargo test --verbose --features rand,all-languages; fi
# low-memory
- cargo build --verbose --features low-memory
- cargo test --verbose --features low-memory
- cargo build --verbose --features low-memory,rand,all-languages
- cargo test --verbose --features low-memory,rand,all-languages
# benchmarks
- if ${BENCHES}; then cargo bench --verbose --features rand; fi
- if ${BENCHES}; then cargo bench --verbose --features rand,japanese; fi

View File

@ -25,6 +25,24 @@ japanese = []
korean = []
spanish = []
all-languages = [
"chinese-simplified",
"chinese-traditional",
"czech",
"french",
"italian",
"japanese",
"korean",
"spanish"
]
# Don't use a map to find words, but iterate through the list.
low-memory = []
[dependencies]
bitcoin_hashes = "0.7.6"
unicode-normalization = "=0.1.9"
rand = { version = "0.6.0", optional = true }
[dev-dependencies]
rand = { version = "0.6.0", optional = false }

10
README.md Normal file
View File

@ -0,0 +1,10 @@
bip39
=====
A Rust implementation of BIP-39 mnemonic codes.
## MSRV
This crate supports Rust v1.24 and up.
With the `low-memory` feature, v1.22 and up are supported.

69
benches/bench.rs Normal file
View File

@ -0,0 +1,69 @@
#![feature(test)]
extern crate bip39;
extern crate test;
use test::Bencher;
use bip39::*;
#[cfg(not(any(
feature = "chinese-simplified", feature = "chinese-traditional", feature = "czech",
feature = "french", feature = "italian", feature = "japanese", feature = "korean",
feature = "spanish"
)))]
const LANG: Language = Language::English;
#[cfg(feature = "chinese-simplified")]
const LANG: Language = Language::SimplifiedChinese;
#[cfg(feature = "chinese-traditional")]
const LANG: Language = Language::TraditionalChinese;
#[cfg(feature = "czech")]
const LANG: Language = Language::Czech;
#[cfg(feature = "french")]
const LANG: Language = Language::French;
#[cfg(feature = "italian")]
const LANG: Language = Language::Italian;
#[cfg(feature = "japanese")]
const LANG: Language = Language::Japanese;
#[cfg(feature = "korean")]
const LANG: Language = Language::Korean;
#[cfg(feature = "spanish")]
const LANG: Language = Language::Spanish;
#[bench]
fn validate(b: &mut Bencher) {
let entropy = "7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f".as_bytes();
let mnemonic = Mnemonic::from_entropy_in(LANG, &entropy).unwrap();
assert_eq!(mnemonic.word_count(), 24);
let phrase = mnemonic.as_str();
b.iter(|| {
let _ = Mnemonic::validate_in(Language::English, &phrase);
});
}
#[bench]
fn from_entropy(b: &mut Bencher) {
let entropy = "7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f".as_bytes();
b.iter(|| {
let _ = Mnemonic::from_entropy_in(LANG, &entropy).unwrap();
});
}
#[bench]
fn new_mnemonic(b: &mut Bencher) {
b.iter(|| {
let _ = Mnemonic::generate_in(LANG, 24);
});
}
#[bench]
fn to_seed(b: &mut Bencher) {
let entropy = "7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f".as_bytes();
let m = Mnemonic::from_entropy_in(LANG, &entropy).unwrap();
b.iter(|| {
let _ = m.to_seed("");
});
}

File diff suppressed because it is too large Load Diff

2050
src/language/english.rs Normal file

File diff suppressed because it is too large Load Diff

247
src/language/mod.rs Normal file
View File

@ -0,0 +1,247 @@
use std::fmt;
mod english;
#[cfg(feature = "chinese-simplified")]
mod chinese_simplified;
#[cfg(feature = "chinese-traditional")]
mod chinese_traditional;
#[cfg(feature = "czech")]
mod czech;
#[cfg(feature = "french")]
mod french;
#[cfg(feature = "italian")]
mod italian;
#[cfg(feature = "japanese")]
mod japanese;
#[cfg(feature = "korean")]
mod korean;
#[cfg(feature = "spanish")]
mod spanish;
#[cfg(not(feature = "low-memory"))]
mod lazy {
use std::cell::Cell;
use std::collections::HashMap;
use std::sync::Once;
/// Type used to load a word map in a lazy fashion.
pub(crate) struct LazyMap(Cell<Option<HashMap<&'static str, u16>>>, Once);
impl LazyMap {
#[allow(deprecated)]
const INIT: Self = LazyMap(Cell::new(None), ::std::sync::ONCE_INIT);
#[inline(always)]
pub fn get(&'static self, list: &'static [&'static str]) -> &HashMap<&'static str, u16> {
self.1.call_once(|| {
let mut map = HashMap::new();
for (idx, word) in list.iter().enumerate() {
map.insert(*word, idx as u16);
}
self.0.set(Some(map));
});
// `self.0` is guaranteed to be `Some` by this point
// The `Once` will catch and propagate panics
unsafe {
match *self.0.as_ptr() {
Some(ref x) => x,
None => panic!(),
}
}
}
}
// This marker impl is required for the Cell to work.
// The LazyMap is an implementation identical to lazy_static's.
// We assume lazy_static's exact same usage is considered safe.
#[cfg(not(feature = "low-memory"))]
unsafe impl Sync for LazyMap {}
pub(crate) static LAZY_MAP_ENGLISH: LazyMap = LazyMap::INIT;
#[cfg(feature = "chinese-simplified")]
pub(crate) static LAZY_MAP_CHINESE_SIMPLIFIED: LazyMap = LazyMap::INIT;
#[cfg(feature = "chinese-traditional")]
pub(crate) static LAZY_MAP_CHINESE_TRADITIONAL: LazyMap = LazyMap::INIT;
#[cfg(feature = "czech")]
pub(crate) static LAZY_MAP_CZECH: LazyMap = LazyMap::INIT;
#[cfg(feature = "french")]
pub(crate) static LAZY_MAP_FRENCH: LazyMap = LazyMap::INIT;
#[cfg(feature = "italian")]
pub(crate) static LAZY_MAP_ITALIAN: LazyMap = LazyMap::INIT;
#[cfg(feature = "japanese")]
pub(crate) static LAZY_MAP_JAPANESE: LazyMap = LazyMap::INIT;
#[cfg(feature = "korean")]
pub(crate) static LAZY_MAP_KOREAN: LazyMap = LazyMap::INIT;
#[cfg(feature = "spanish")]
pub(crate) static LAZY_MAP_SPANISH: LazyMap = LazyMap::INIT;
}
/// Language to be used for the mnemonic phrase.
///
/// The English language is always available, other languages are enabled using
/// the compilation features.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub enum Language {
/// The English language.
English,
#[cfg(feature = "chinese-simplified")]
/// The Simplified Chinese language.
SimplifiedChinese,
#[cfg(feature = "chinese-traditional")]
/// The Traditional Chinese language.
TraditionalChinese,
#[cfg(feature = "czech")]
/// The Czech language.
Czech,
#[cfg(feature = "french")]
/// The French language.
French,
#[cfg(feature = "italian")]
/// The Italian language.
Italian,
#[cfg(feature = "japanese")]
/// The Japanese language.
Japanese,
#[cfg(feature = "korean")]
/// The Korean language.
Korean,
#[cfg(feature = "spanish")]
/// The Spanish language.
Spanish,
}
impl Language {
/// Get words from the wordlist that start with the given prefix.
pub fn words_by_prefix(self, prefix: &str) -> &[&'static str] {
let first = match self.word_list().iter().position(|w| w.starts_with(prefix)) {
Some(i) => i,
None => return &[],
};
let count = self.word_list()[first..].iter().take_while(|w| w.starts_with(prefix)).count();
&self.word_list()[first .. first + count]
}
/// The word list for this language.
#[inline]
pub(crate) fn word_list(self) -> &'static [&'static str; 2048] {
match self {
Language::English => &english::WORDS,
#[cfg(feature = "chinese-simplified")]
Language::SimplifiedChinese => &chinese_simplified::WORDS,
#[cfg(feature = "chinese-traditional")]
Language::TraditionalChinese => &chinese_traditional::WORDS,
#[cfg(feature = "czech")]
Language::Czech => &czech::WORDS,
#[cfg(feature = "french")]
Language::French => &french::WORDS,
#[cfg(feature = "italian")]
Language::Italian => &italian::WORDS,
#[cfg(feature = "japanese")]
Language::Japanese => &japanese::WORDS,
#[cfg(feature = "korean")]
Language::Korean => &korean::WORDS,
#[cfg(feature = "spanish")]
Language::Spanish => &spanish::WORDS,
}
}
/// The word map that maps words to the index in the word list for this language.
#[cfg(not(feature = "low-memory"))]
pub(crate) fn word_map(self) -> &'static ::std::collections::HashMap<&'static str, u16> {
match self {
Language::English => lazy::LAZY_MAP_ENGLISH.get(self.word_list()),
#[cfg(feature = "chinese-simplified")]
Language::SimplifiedChinese => lazy::LAZY_MAP_CHINESE_SIMPLIFIED.get(self.word_list()),
#[cfg(feature = "chinese-traditional")]
Language::TraditionalChinese => lazy::LAZY_MAP_CHINESE_TRADITIONAL.get(self.word_list()),
#[cfg(feature = "czech")]
Language::Czech => lazy::LAZY_MAP_CZECH.get(self.word_list()),
#[cfg(feature = "french")]
Language::French => lazy::LAZY_MAP_FRENCH.get(self.word_list()),
#[cfg(feature = "italian")]
Language::Italian => lazy::LAZY_MAP_ITALIAN.get(self.word_list()),
#[cfg(feature = "japanese")]
Language::Japanese => lazy::LAZY_MAP_JAPANESE.get(self.word_list()),
#[cfg(feature = "korean")]
Language::Korean => lazy::LAZY_MAP_KOREAN.get(self.word_list()),
#[cfg(feature = "spanish")]
Language::Spanish => lazy::LAZY_MAP_SPANISH.get(self.word_list()),
}
}
}
impl fmt::Display for Language {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(self, f)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg(all(
feature = "chinese-simplified", feature = "chinese-traditional", feature = "czech",
feature = "french", feature = "italian", feature = "japanese", feature = "korean",
feature = "spanish"
))]
#[test]
fn validate_wordlist_checksums() {
//! In this test, we ensure that the wordlists are identical.
//!
//! They are as follows in the bips repository:
//! 5c5942792bd8340cb8b27cd592f1015edf56a8c5b26276ee18a482428e7c5726 chinese_simplified.txt
//! 417b26b3d8500a4ae3d59717d7011952db6fc2fb84b807f3f94ac734e89c1b5f chinese_traditional.txt
//! 7e80e161c3e93d9554c2efb78d4e3cebf8fc727e9c52e03b83b94406bdcc95fc czech.txt
//! 2f5eed53a4727b4bf8880d8f3f199efc90e58503646d9ff8eff3a2ed3b24dbda english.txt
//! ebc3959ab7801a1df6bac4fa7d970652f1df76b683cd2f4003c941c63d517e59 french.txt
//! d392c49fdb700a24cd1fceb237c1f65dcc128f6b34a8aacb58b59384b5c648c2 italian.txt
//! 2eed0aef492291e061633d7ad8117f1a2b03eb80a29d0e4e3117ac2528d05ffd japanese.txt
//! 9e95f86c167de88f450f0aaf89e87f6624a57f973c67b516e338e8e8b8897f60 korean.txt
//! 46846a5a0139d1e3cb77293e521c2865f7bcdb82c44e8d0a06a2cd0ecba48c0b spanish.txt
use std::io::Write;
use bitcoin_hashes::{sha256, Hash};
let checksums = [
("5c5942792bd8340cb8b27cd592f1015edf56a8c5b26276ee18a482428e7c5726", Language::SimplifiedChinese),
("417b26b3d8500a4ae3d59717d7011952db6fc2fb84b807f3f94ac734e89c1b5f", Language::TraditionalChinese),
("7e80e161c3e93d9554c2efb78d4e3cebf8fc727e9c52e03b83b94406bdcc95fc", Language::Czech),
("2f5eed53a4727b4bf8880d8f3f199efc90e58503646d9ff8eff3a2ed3b24dbda", Language::English),
("ebc3959ab7801a1df6bac4fa7d970652f1df76b683cd2f4003c941c63d517e59", Language::French),
("d392c49fdb700a24cd1fceb237c1f65dcc128f6b34a8aacb58b59384b5c648c2", Language::Italian),
("2eed0aef492291e061633d7ad8117f1a2b03eb80a29d0e4e3117ac2528d05ffd", Language::Japanese),
("9e95f86c167de88f450f0aaf89e87f6624a57f973c67b516e338e8e8b8897f60", Language::Korean),
("46846a5a0139d1e3cb77293e521c2865f7bcdb82c44e8d0a06a2cd0ecba48c0b", Language::Spanish),
];
for &(sum, lang) in &checksums {
let mut digest = sha256::Hash::engine();
for (_idx, word) in lang.word_list().iter().enumerate() {
assert!(::unicode_normalization::is_nfkd(&word));
write!(&mut digest, "{}\n", word).unwrap();
#[cfg(not(feature = "low-memory"))]
assert_eq!(_idx, lang.word_map()[word] as usize);
}
assert_eq!(&sha256::Hash::from_engine(digest).to_string(), sum,
"word list for language {} failed checksum check", lang,
);
}
}
#[test]
fn words_by_prefix() {
let lang = Language::English;
let res = lang.words_by_prefix("woo");
assert_eq!(res, ["wood","wool"]);
let res = lang.words_by_prefix("");
assert_eq!(res.len(), 2048);
let res = lang.words_by_prefix("woof");
assert!(res.is_empty());
}
}

View File

@ -1,6 +1,6 @@
// Rust Bitcoin Library
// Written in 2020 by
// Steven Roose <steven@stevenroose.org>
// Steven Roose <steven@stevenroose.org>
// To the extent possible under law, the author(s) have dedicated all
// copyright and related and neighboring rights to this software to
// the public domain worldwide. This software is distributed without
@ -18,48 +18,44 @@
//! https://github.com/bitcoin/bips/blob/master/bip-0039.mediawiki
//!
#![deny(non_upper_case_globals)]
#![deny(non_camel_case_types)]
#![deny(non_snake_case)]
#![deny(unused_mut)]
#![deny(dead_code)]
#![deny(unused_imports)]
#![deny(missing_docs)]
extern crate bitcoin_hashes;
extern crate unicode_normalization;
#[cfg(feature = "rand")]
extern crate rand;
use std::{error, fmt, str};
use std::borrow::Cow;
use bitcoin_hashes::{sha256, Hash};
use unicode_normalization::UnicodeNormalization;
mod language;
mod pbkdf2;
mod english;
#[cfg(feature = "chinese-simplified")]
mod chinese_simplified;
#[cfg(feature = "chinese-traditional")]
mod chinese_traditional;
#[cfg(feature = "czech")]
mod czech;
#[cfg(feature = "french")]
mod french;
#[cfg(feature = "italian")]
mod italian;
#[cfg(feature = "japanese")]
mod japanese;
#[cfg(feature = "korean")]
mod korean;
#[cfg(feature = "spanish")]
mod spanish;
pub use language::Language;
#[cfg(feature = "japanese")]
/// The ideagrapic space that should be used for Japanese lists.
const IDEAGRAPHIC_SPACE: char = ' ';
#[cfg(feature = "japanese")]
#[allow(unused)]
const IDEOGRAPHIC_SPACE: char = ' ';
/// A BIP39 error.
#[derive(Clone, PartialEq, Eq, Debug)]
#[derive(Clone, PartialEq, Eq)]
pub enum Error {
/// Mnemonic has a word count that is not a multiple of 6.
BadWordCount(usize),
/// Mnemonic contains an unknown word.
UnknownWord(String),
/// Entropy was not a multiple of 32 bits.
/// Parameter is the number of bits in the entropy.
/// Parameter is the number of bits in the entropy.
BadEntropyBitCount(usize),
/// The mnemonic has an invalid checksum.
InvalidChecksum,
@ -69,102 +65,58 @@ impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::BadWordCount(c) => write!(f,
"mnemonic has a word count that is not a multiple of 6: {}", c,
),
Error::UnknownWord(ref w) => write!(f, "mnemonic contains an unknown word: {}", w),
"mnemonic has a word count that is not a multiple of 6: {}", c,
),
Error::UnknownWord(ref w) => write!(f,
"mnemonic contains an unknown word: {} ({})",
w, bitcoin_hashes::hex::ToHex::to_hex(w.as_bytes()),
),
Error::BadEntropyBitCount(c) => write!(f,
"entropy was not a multiple of 32 bits: {} bits", c,
),
"entropy was not a multiple of 32 bits: {} bits", c,
),
Error::InvalidChecksum => write!(f, "the mnemonic has an invalid checksum"),
}
}
}
impl fmt::Debug for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(self, f)
}
}
impl error::Error for Error {
fn cause(&self) -> Option<&error::Error> {
None
None
}
fn description(&self) -> &str {
"description() is deprecated; use Display"
}
}
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub enum Language {
/// The English language.
English,
#[cfg(feature = "chinese-simplified")]
/// The Simplified Chinese language.
SimplifiedChinese,
#[cfg(feature = "chinese-traditional")]
/// The Traditional Chinese language.
TraditionalChinese,
#[cfg(feature = "czech")]
/// The Czech language.
Czech,
#[cfg(feature = "french")]
/// The French language.
French,
#[cfg(feature = "italian")]
/// The Italian language.
Italian,
#[cfg(feature = "japanese")]
/// The Japanese language.
Japanese,
#[cfg(feature = "korean")]
/// The Korean language.
Korean,
#[cfg(feature = "spanish")]
/// The Spanish language.
Spanish,
}
impl Language {
/// The word list for this language.
fn word_list(self) -> &'static [&'static str; 2048] {
match self {
Language::English => &english::WORDS,
#[cfg(feature = "chinese-simplified")]
Language::SimplifiedChinese => &chinese_simplified::WORDS,
#[cfg(feature = "chinese-traditional")]
Language::TraditionalChinese => &chinese_traditional::WORDS,
#[cfg(feature = "czech")]
Language::Czech => &czech::WORDS,
#[cfg(feature = "french")]
Language::French => &french::WORDS,
#[cfg(feature = "italian")]
Language::Italian => &italian::WORDS,
#[cfg(feature = "japanese")]
Language::Japanese => &japanese::WORDS,
#[cfg(feature = "korean")]
Language::Korean => &korean::WORDS,
#[cfg(feature = "spanish")]
Language::Spanish => &spanish::WORDS,
}
}
/// The space to be used for this language.
fn space(self) -> char {
match self {
#[cfg(feature = "japanese")]
Language::Japanese => IDEAGRAPHIC_SPACE,
_ => ' ',
}
}
}
impl fmt::Display for Language {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(self, f)
fn description(&self) -> &str {
"description() is deprecated; use Display"
}
}
/// A mnemonic code.
///
/// The [std::str::FromStr] implementation will try to determine the language of the
/// mnemonic from all the supported languages. (Languages have to be explicitly enabled using
/// the Cargo features.)
///
/// Supported number of words are 6, 12, 18 and 24.
#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct Mnemonic(Language, Vec<u16>);
pub struct Mnemonic(String);
// The content of the mnemonic is ensured to be NFKD-normalized UTF-8.
impl Mnemonic {
/// Ensure the content of the [Cow] is normalized UTF8.
/// Performing this on a [Cow] means that all allocations for normalization
/// can be avoided for languages without special UTF8 characters.
#[inline]
fn normalize_utf8_cow<'a>(cow: &mut Cow<'a, str>) {
let is_nfkd = unicode_normalization::is_nfkd_quick(cow.as_ref().chars());
if is_nfkd != unicode_normalization::IsNormalized::Yes {
*cow = Cow::Owned(cow.as_ref().nfkd().to_string());
}
}
/// Create a new [Mnemonic] in the specified language from the given entropy.
/// Entropy must be a multiple of 32 bits (4 bytes).
pub fn from_entropy_in(language: Language, entropy: &[u8]) -> Result<Mnemonic, Error> {
@ -183,7 +135,7 @@ impl Mnemonic {
bits[8 * entropy.len() + i] = (check[i / 8] & (1 << (7 - (i % 8)))) > 0;
}
let mlen = entropy.len() * 3 / 4;
let mut word_idxs = Vec::new();
let mut words = Vec::new();
for i in 0..mlen {
let mut idx = 0;
for j in 0..11 {
@ -191,9 +143,10 @@ impl Mnemonic {
idx += 1 << (10 - j);
}
}
word_idxs.push(idx);
words.push(language.word_list()[idx]);
}
Ok(Mnemonic(language, word_idxs))
Ok(Mnemonic(words.join(" ")))
}
/// Create a new English [Mnemonic] in from the given entropy.
@ -202,20 +155,46 @@ impl Mnemonic {
Mnemonic::from_entropy_in(Language::English, entropy)
}
/// Parse a mnemonic in the given language.
pub fn from_str_in(language: Language, s: &str) -> Result<Mnemonic, Error> {
let word_list = language.word_list();
let words: Vec<_> = s.split_whitespace().collect();
if words.len() < 6 || words.len() % 6 != 0 {
/// Generate a new Mnemonic in the given language.
/// For the different supported word counts, see documentation on [Mnemonoc].
#[cfg(feature = "rand")]
pub fn generate_in(language: Language, word_count: usize) -> Result<Mnemonic, Error> {
if word_count < 6 || word_count % 6 != 0 || word_count > 24 {
return Err(Error::BadWordCount(word_count));
}
let entropy_bytes = (word_count / 3) * 4;
let mut rng = rand::thread_rng();
let mut entropy = vec![0u8; entropy_bytes];
rand::RngCore::fill_bytes(&mut rng, &mut entropy);
Mnemonic::from_entropy_in(language, &entropy)
}
/// Generate a new Mnemonic in English.
/// For the different supported word counts, see documentation on [Mnemonoc].
#[cfg(feature = "rand")]
pub fn generate(word_count: usize) -> Result<Mnemonic, Error> {
Mnemonic::generate_in(Language::English, word_count)
}
/// Static method to validate a mnemonic in a given language.
pub fn validate_in(language: Language, s: &str) -> Result<(), Error> {
#[cfg(not(feature = "low-memory"))]
let word_map = language.word_map();
let words: Vec<&str> = s.split_whitespace().collect();
if words.len() < 6 || words.len() % 6 != 0 || words.len() > 24 {
return Err(Error::BadWordCount(words.len()));
}
let mut word_idxs = Vec::with_capacity(words.len());
let mut bits = vec![false; words.len() * 11];
for (i, word) in words.iter().enumerate() {
if let Ok(idx) = word_list.binary_search(word) {
word_idxs.push(idx as u16);
#[cfg(not(feature = "low-memory"))]
let found = word_map.get(word);
#[cfg(feature = "low-memory")]
let found = language.word_list().iter().position(|w| w == word);
if let Some(idx) = found {
for j in 0..11 {
bits[i * 11 + j] = idx >> (10 - j) & 1 == 1;
}
@ -239,45 +218,14 @@ impl Mnemonic {
return Err(Error::InvalidChecksum);
}
}
Ok(Mnemonic(language, word_idxs))
}
/// Convert this mnemonic to a vector of bytes in UTF-8 NKFD normalized.
pub fn to_bytes(&self) -> Vec<u8> {
self.to_string().nfkd().map(|c| c as u8).collect()
}
/// Convert to seed bytes.
pub fn to_seed(&self, passphrase: &str) -> Vec<u8> {
const PBKDF2_ROUNDS: usize = 2048;
const PBKDF2_BYTES: usize = 64;
let salt = format!("mnemonic{}", passphrase);
let normalized_salt = salt.nfkd().to_string();
let mut seed = vec![0u8; PBKDF2_BYTES];
pbkdf2::pbkdf2(&self.to_bytes(), &normalized_salt.as_bytes(), PBKDF2_ROUNDS, &mut seed);
seed
}
}
impl fmt::Display for Mnemonic {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let space = self.0.space();
let word_list = self.0.word_list();
let mut words = self.1.iter().map(|i| word_list[*i as usize]);
write!(f, "{}", words.next().expect("wordlist never empty"))?;
for word in words {
write!(f, "{}{}", space, word)?;
}
Ok(())
}
}
impl str::FromStr for Mnemonic {
type Err = Error;
fn from_str(s: &str) -> Result<Mnemonic, Error> {
/// Guess the language of the mnemonic based on the first word.
///
/// This works as official word lists are made as such that a word never
/// appears in two different word lists.
pub fn guess_language(s: &str) -> Result<Language, Error> {
let languages = [
Language::English,
#[cfg(feature = "chinese-simplified")]
@ -302,83 +250,132 @@ impl str::FromStr for Mnemonic {
return Err(Error::BadWordCount(0));
}
for language in &languages {
if language.word_list().binary_search(&first_word).is_ok() {
return Mnemonic::from_str_in(*language, s);
#[cfg(not(feature = "low-memory"))]
let found = language.word_map().get(first_word).is_some();
#[cfg(feature = "low-memory")]
let found = language.word_list().iter().any(|w| *w == first_word);
if found {
return Ok(*language);
}
}
Err(Error::UnknownWord(first_word.to_owned()))
}
/// Parse a mnemonic and detect the language from the enabled languages.
pub fn parse<'a, S: Into<Cow<'a, str>>>(s: S) -> Result<Mnemonic, Error> {
let mut cow = s.into();
Mnemonic::normalize_utf8_cow(&mut cow);
let language = Mnemonic::guess_language(cow.as_ref())?;
Mnemonic::validate_in(language, cow.as_ref())?;
Ok(Mnemonic(cow.into_owned()))
}
/// Parse a mnemonic in the given language.
pub fn parse_in<'a, S: Into<Cow<'a, str>>>(language: Language, s: S) -> Result<Mnemonic, Error> {
let mut cow = s.into();
Mnemonic::normalize_utf8_cow(&mut cow);
Mnemonic::validate_in(language, cow.as_ref())?;
Ok(Mnemonic(cow.into_owned()))
}
/// Get the mnemonic as a [&str].
pub fn as_str(&self) -> &str {
&self.0
}
/// Get the number of words in the mnemonic.
pub fn word_count(&self) -> usize {
self.as_str().split_whitespace().count()
}
/// Convert to seed bytes.
pub fn to_seed(&self, passphrase: &str) -> Vec<u8> {
const PBKDF2_ROUNDS: usize = 2048;
const PBKDF2_BYTES: usize = 64;
let normalized_salt_cow = {
let mut cow = Cow::Owned(format!("mnemonic{}", passphrase));
Mnemonic::normalize_utf8_cow(&mut cow);
cow
};
let normalized_mnemonic_cow = {
let mut cow: Cow<str> = Cow::Borrowed(self.as_str());
Mnemonic::normalize_utf8_cow(&mut cow);
cow
};
let mut seed = vec![0u8; PBKDF2_BYTES];
pbkdf2::pbkdf2(
&normalized_mnemonic_cow.as_ref().as_bytes(),
&normalized_salt_cow.as_ref().as_bytes(),
PBKDF2_ROUNDS,
&mut seed,
);
seed
}
/// Convert the mnemonic back to the entropy used to generate it.
pub fn to_entropy(&self) -> Vec<u8> {
// We unwrap errors here because this method can only be called on
// values that were already previously validated.
let language = Mnemonic::guess_language(self.as_str()).unwrap();
#[cfg(not(feature = "low-memory"))]
let word_map = language.word_map();
// Preallocate enough space for the longest possible word list
let mut entropy = Vec::with_capacity(33);
let mut offset = 0;
let mut remainder = 0;
let words: Vec<&str> = self.as_str().split_whitespace().collect();
for word in &words {
#[cfg(not(feature = "low-memory"))]
let idx = *word_map.get(word).unwrap();
#[cfg(feature = "low-memory")]
let idx = language.word_list().iter().position(|w| w == word).unwrap();
remainder |= ((idx as u32) << (32 - 11)) >> offset;
offset += 11;
while offset >= 8 {
entropy.push((remainder >> 24) as u8);
remainder <<= 8;
offset -= 8;
}
}
if offset != 0 {
entropy.push((remainder >> 24) as u8);
}
// Truncate to get rid of the byte containing the checksum
let entropy_bytes = (words.len() / 3) * 4;
entropy.truncate(entropy_bytes);
entropy
}
}
impl fmt::Display for Mnemonic {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(self.as_str())
}
}
impl str::FromStr for Mnemonic {
type Err = Error;
fn from_str(s: &str) -> Result<Mnemonic, Error> {
Mnemonic::parse(s)
}
}
#[cfg(test)]
mod test {
mod tests {
use super::*;
use std::io::Write;
use std::str::FromStr;
use bitcoin_hashes::{sha256, Hash};
use bitcoin_hashes::hex::FromHex;
#[cfg(all(
feature = "chinese-simplified", feature = "chinese-traditional", feature = "czech",
feature = "french", feature = "italian", feature = "japanese", feature = "korean",
feature = "spanish"
))]
#[test]
fn validate_list_checksums() {
//! In this test, we ensure that the wordlists are identical.
//!
//! They are as follows in the bips repository:
//! 5c5942792bd8340cb8b27cd592f1015edf56a8c5b26276ee18a482428e7c5726 chinese_simplified.txt
//! 417b26b3d8500a4ae3d59717d7011952db6fc2fb84b807f3f94ac734e89c1b5f chinese_traditional.txt
//! 7e80e161c3e93d9554c2efb78d4e3cebf8fc727e9c52e03b83b94406bdcc95fc czech.txt
//! 2f5eed53a4727b4bf8880d8f3f199efc90e58503646d9ff8eff3a2ed3b24dbda english.txt
//! ebc3959ab7801a1df6bac4fa7d970652f1df76b683cd2f4003c941c63d517e59 french.txt
//! d392c49fdb700a24cd1fceb237c1f65dcc128f6b34a8aacb58b59384b5c648c2 italian.txt
//! 2eed0aef492291e061633d7ad8117f1a2b03eb80a29d0e4e3117ac2528d05ffd japanese.txt
//! 9e95f86c167de88f450f0aaf89e87f6624a57f973c67b516e338e8e8b8897f60 korean.txt
//! 46846a5a0139d1e3cb77293e521c2865f7bcdb82c44e8d0a06a2cd0ecba48c0b spanish.txt
let checksums = [
("5c5942792bd8340cb8b27cd592f1015edf56a8c5b26276ee18a482428e7c5726", Language::SimplifiedChinese),
("417b26b3d8500a4ae3d59717d7011952db6fc2fb84b807f3f94ac734e89c1b5f", Language::TraditionalChinese),
("7e80e161c3e93d9554c2efb78d4e3cebf8fc727e9c52e03b83b94406bdcc95fc", Language::Czech),
("2f5eed53a4727b4bf8880d8f3f199efc90e58503646d9ff8eff3a2ed3b24dbda", Language::English),
("ebc3959ab7801a1df6bac4fa7d970652f1df76b683cd2f4003c941c63d517e59", Language::French),
("d392c49fdb700a24cd1fceb237c1f65dcc128f6b34a8aacb58b59384b5c648c2", Language::Italian),
("2eed0aef492291e061633d7ad8117f1a2b03eb80a29d0e4e3117ac2528d05ffd", Language::Japanese),
("9e95f86c167de88f450f0aaf89e87f6624a57f973c67b516e338e8e8b8897f60", Language::Korean),
("46846a5a0139d1e3cb77293e521c2865f7bcdb82c44e8d0a06a2cd0ecba48c0b", Language::Spanish),
];
for (sum, lang) in &checksums {
let mut digest = sha256::Hash::engine();
for word in lang.word_list().iter() {
write!(&mut digest, "{}\n", word).unwrap();
}
assert_eq!(&sha256::Hash::from_engine(digest).to_string(), sum,
"word list for language {} failed checksum check", lang,
);
}
}
/// Test a single test vector.
fn test_vector(entropy: &[u8], mnemonic_str: &str, passphrase: &str, seed: &[u8], language: Language) {
let mnemonic = Mnemonic::from_entropy_in(language, &entropy).unwrap();
assert_eq!(&mnemonic.to_string(), mnemonic_str,
"failed test vector in language {}: {}", language, mnemonic_str);
assert_eq!(mnemonic, Mnemonic::from_str_in(language, mnemonic_str).unwrap(),
"failed test vector in language {}: {}", language, mnemonic_str);
assert_eq!(mnemonic, Mnemonic::from_str(&mnemonic_str).unwrap(),
"failed test vector in language {}: {}", language, mnemonic_str);
assert_eq!(seed, &mnemonic.to_seed(passphrase)[..],
"failed test vector in language {}: {}", language, mnemonic_str);
}
#[test]
fn test_vectors_english() {
// These vectors are tuples of
@ -508,10 +505,21 @@ mod test {
for vector in &test_vectors {
let entropy = Vec::<u8>::from_hex(&vector.0).unwrap();
let mnemonic = vector.1;
let mnemonic_str = vector.1;
let seed = Vec::<u8>::from_hex(&vector.2).unwrap();
test_vector(&entropy, mnemonic, "TREZOR", &seed, Language::English);
let mnemonic = Mnemonic::from_entropy(&entropy).unwrap();
assert_eq!(&mnemonic.to_string(), mnemonic_str,
"failed vector: {}", mnemonic_str);
assert_eq!(mnemonic, Mnemonic::parse_in(Language::English, mnemonic_str).unwrap(),
"failed vector: {}", mnemonic_str);
assert_eq!(mnemonic, Mnemonic::parse(mnemonic_str).unwrap(),
"failed vector: {}", mnemonic_str);
assert_eq!(&entropy, &mnemonic.to_entropy(),
"failed vector: {}", mnemonic_str);
assert_eq!(&seed, &mnemonic.to_seed("TREZOR"),
"failed vector: {}", mnemonic_str);
}
}
@ -521,31 +529,34 @@ mod test {
// "letter advice cage absurd amount doctor acoustic avoid letter advice cage above"
assert_eq!(
Mnemonic::from_str(
Mnemonic::parse(
"getter advice cage absurd amount doctor acoustic avoid letter advice cage above",
),
Err(Error::UnknownWord("getter".to_owned())),
Err(Error::UnknownWord("getter".to_owned()))
);
assert_eq!(
Mnemonic::from_str(
Mnemonic::parse(
"advice cage absurd amount doctor acoustic avoid letter advice cage above",
),
Err(Error::BadWordCount(11)),
Err(Error::BadWordCount(11))
);
assert_eq!(
Mnemonic::from_str(
Mnemonic::parse(
"primary advice cage absurd amount doctor acoustic avoid letter advice cage above",
),
Err(Error::InvalidChecksum),
Err(Error::InvalidChecksum)
);
}
#[cfg(feature = "japanese")]
#[test]
fn test_vectors_japanese() {
assert!(IDEAGRAPHIC_SPACE.is_whitespace());
//! Test some Japanese language test vectors.
//! For these test vectors, we seem to generate different mnemonic phrases than the test
//! vectors expect us to. However, our generated seeds are correct and tiny-bip39,
//! an alternative implementation of bip39 also does not fulfill the test vectors.
// These vectors are tuples of
// (entropy, mnemonic, passphrase, seed)
@ -698,11 +709,22 @@ mod test {
for vector in &vectors {
let entropy = Vec::<u8>::from_hex(&vector.0).unwrap();
let mnemonic = vector.1;
let mnemonic_str = vector.1;
let passphrase = vector.2;
let seed = Vec::<u8>::from_hex(&vector.3).unwrap();
test_vector(&entropy, mnemonic, passphrase, &seed, Language::Japanese);
let mnemonic = Mnemonic::from_entropy_in(Language::Japanese, &entropy).unwrap();
assert_eq!(seed, &mnemonic.to_seed(passphrase)[..],
"failed vector: {}", mnemonic_str);
let rt = Mnemonic::parse_in(Language::Japanese, mnemonic.as_str())
.expect(&format!("vector: {}", mnemonic_str));
assert_eq!(seed, &rt.to_seed(passphrase)[..]);
let mnemonic = Mnemonic::parse_in(Language::Japanese, mnemonic_str)
.expect(&format!("vector: {}", mnemonic_str));
assert_eq!(seed, &mnemonic.to_seed(passphrase)[..],
"failed vector: {}", mnemonic_str);
}
}
}

View File

@ -15,16 +15,16 @@ fn u32_to_array_be(val: u32) -> [u8; 4] {
#[inline]
fn xor(res: &mut [u8], salt: &[u8]) {
debug_assert!(salt.len() >= res.len(), "length mismatch in xor");
debug_assert!(salt.len() >= res.len(), "length mismatch in xor");
res.iter_mut().zip(salt.iter()).for_each(|(a, b)| *a ^= b);
res.iter_mut().zip(salt.iter()).for_each(|(a, b)| *a ^= b);
}
/// PBKDF2-HMAC-SHA512 implementation using bitcoin_hashes.
pub(crate) fn pbkdf2(passphrase: &[u8], salt: &[u8], c: usize, res: &mut [u8]) {
let prf = hmac::HmacEngine::<sha512::Hash>::new(passphrase);
for (i, chunk) in res.chunks_mut(sha512::Hash::LEN).enumerate() {
for (i, chunk) in res.chunks_mut(sha512::Hash::LEN).enumerate() {
for v in chunk.iter_mut() { *v = 0; }
let mut salt = {
@ -44,5 +44,5 @@ pub(crate) fn pbkdf2(passphrase: &[u8], salt: &[u8], c: usize, res: &mut [u8]) {
xor(chunk, &salt);
}
}
}
}