Refactor and add second variant, originally written by Heiko Schaefer

This commit is contained in:
Christian Reitter 2025-01-12 16:06:35 +01:00
parent 65046cf885
commit 471ef18d52
4 changed files with 361 additions and 2 deletions

249
Cargo.lock generated
View File

@ -1,6 +1,21 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
version = 4
[[package]]
name = "addr2line"
version = "0.24.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
dependencies = [
"gimli",
]
[[package]]
name = "adler2"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
[[package]]
name = "anstream"
@ -51,18 +66,57 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "async-trait"
version = "0.1.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f934833b4b7233644e5848f235df3f57ed8c80f1528a26c3dfa13d2147fa056"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "backtrace"
version = "0.3.74"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a"
dependencies = [
"addr2line",
"cfg-if",
"libc",
"miniz_oxide",
"object",
"rustc-demangle",
"windows-targets",
]
[[package]]
name = "bit-vec"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2c54ff287cfc0a34f38a6b832ea1bd8e448a330b3e40a50859e6488bee07f22"
[[package]]
name = "block-buffer"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
dependencies = [
"generic-array",
]
[[package]]
name = "bloom_filter_gen"
version = "0.1.0"
dependencies = [
"bloomfilter",
"clap",
"csv",
"serde",
"serde_derive",
"sha256",
]
[[package]]
@ -82,6 +136,12 @@ version = "3.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
[[package]]
name = "bytes"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b"
[[package]]
name = "cfg-if"
version = "1.0.0"
@ -134,6 +194,66 @@ version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
[[package]]
name = "cpufeatures"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16b80225097f2e5ae4e7179dd2266824648f3e2f49d9134d584b76389d31c4c3"
dependencies = [
"libc",
]
[[package]]
name = "crypto-common"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
dependencies = [
"generic-array",
"typenum",
]
[[package]]
name = "csv"
version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf"
dependencies = [
"csv-core",
"itoa",
"ryu",
"serde",
]
[[package]]
name = "csv-core"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70"
dependencies = [
"memchr",
]
[[package]]
name = "digest"
version = "0.10.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
"block-buffer",
"crypto-common",
]
[[package]]
name = "generic-array"
version = "0.14.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
dependencies = [
"typenum",
"version_check",
]
[[package]]
name = "getrandom"
version = "0.2.15"
@ -147,18 +267,36 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "gimli"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "hex"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
[[package]]
name = "is_terminal_polyfill"
version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "itoa"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
[[package]]
name = "js-sys"
version = "0.3.76"
@ -181,12 +319,42 @@ version = "0.4.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
[[package]]
name = "memchr"
version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "miniz_oxide"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ffbe83022cedc1d264172192511ae958937694cd57ce297164951b8b3568394"
dependencies = [
"adler2",
]
[[package]]
name = "object"
version = "0.36.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
dependencies = [
"memchr",
]
[[package]]
name = "once_cell"
version = "1.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
[[package]]
name = "pin-project-lite"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
[[package]]
name = "proc-macro2"
version = "1.0.92"
@ -205,6 +373,62 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "rustc-demangle"
version = "0.1.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
[[package]]
name = "ryu"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
[[package]]
name = "serde"
version = "1.0.217"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.217"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "sha2"
version = "0.10.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8"
dependencies = [
"cfg-if",
"cpufeatures",
"digest",
]
[[package]]
name = "sha256"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18278f6a914fa3070aa316493f7d2ddfb9ac86ebc06fa3b83bffda487e9065b0"
dependencies = [
"async-trait",
"bytes",
"hex",
"sha2",
"tokio",
]
[[package]]
name = "siphasher"
version = "1.0.1"
@ -228,6 +452,23 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "tokio"
version = "1.43.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d61fa4ffa3de412bfea335c6ecff681de2b609ba3c77ef3e00e521813a9ed9e"
dependencies = [
"backtrace",
"bytes",
"pin-project-lite",
]
[[package]]
name = "typenum"
version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
[[package]]
name = "unicode-ident"
version = "1.0.14"
@ -240,6 +481,12 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "version_check"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"

View File

@ -7,6 +7,10 @@ publish = false
[dependencies]
bloomfilter = "1"
clap = { version = "4", features = ["derive"] }
sha256 = "1.5"
csv = "1.3.0"
serde = "1.0"
serde_derive = "1.0"
[profile.release]
# note that most of these are the default setting anyway
@ -27,4 +31,14 @@ codegen-units = 1
# default false
# lto = 'thin'
lto = 'fat'
lto = 'fat'
[[bin]]
name = "bloom_filter_generator"
test = false
bench = false
[[bin]]
name = "bloom_filter_generator_hashed"
test = false
bench = false

View File

@ -8,12 +8,15 @@ use std::path::PathBuf;
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Opts {
/// Expected: line-separated ASCII file with inputs
#[arg(short, long, help = "Input file", default_value = "addresses.txt")]
input_file: PathBuf,
#[arg(short, long, help = "Output file", default_value = "bloom.dump")]
output_file: PathBuf,
/// Default value comes from number of publicly known bitcoin addresses at the time
/// Adjust for your use case
#[arg(
long,
help = "Bloom filter: number of items",

View File

@ -0,0 +1,95 @@
use bloomfilter::Bloom;
use clap::{arg, Parser};
use serde_derive::Deserialize;
use std::error::Error;
use std::fs::File;
use std::io::{BufWriter, Write};
use std::path::PathBuf;
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Opts {
/// See CSV record type for format details
#[arg(short, long, help = "Input file (mnemonics csv)")]
input_file: PathBuf,
#[arg(short, long, help = "Output file (bloom filter dump of sha256 hashes)")]
output_file: PathBuf,
/// expected during initial use case: 3x 2^32 entries
/// consisting of the 128, 192 and 256 bit bx PRNG ranges with 32 bit complexity each
#[arg(
long,
help = "Bloom filter: number of items",
default_value_t = 12_884_901_888
)]
num_items: usize,
/// highly depends on use case
#[arg(
long,
help = "Bloom filter: acceptable rate of false positives",
default_value_t = 0.000_001
)]
fp_rate: f64,
}
#[derive(Debug, Deserialize)]
struct MnemonicsRecord {
_index: u32,
mnemonic: String,
}
fn main() -> Result<(), Box<dyn Error>> {
let opts: Opts = Opts::parse();
let mut bloom = Bloom::new_for_fp_rate(opts.num_items, opts.fp_rate);
// Improvement idea: use higher than default capacity?
let rdr = csv::ReaderBuilder::new()
.has_headers(false)
.from_path(&opts.input_file)
.expect("Failed to open input_file");
let wtr = File::create(&opts.output_file).expect("error opening output file");
let mut wtr = BufWriter::new(wtr);
let mut count: usize = 0;
rdr.into_deserialize()
.for_each(|result: Result<MnemonicsRecord, csv::Error>| {
if count & 0b1_1111_1111_1111_1111_1111 == 0 {
println!("Read {} lines", count);
}
count += 1;
match result {
Ok(record) => {
let val: String = sha256::digest(record.mnemonic);
bloom.set(&val)
}
Err(err) => eprintln!("Error reading CSV record: {}", err),
}
});
// Dump bloom filter to file
println!("Serializing bloom filter to output file");
// - metadata
wtr.write_all(&bloom.number_of_bits().to_be_bytes())?;
wtr.write_all(&bloom.number_of_hash_functions().to_be_bytes())?;
wtr.write_all(&bloom.sip_keys()[0].0.to_be_bytes())?;
wtr.write_all(&bloom.sip_keys()[0].1.to_be_bytes())?;
wtr.write_all(&bloom.sip_keys()[1].0.to_be_bytes())?;
wtr.write_all(&bloom.sip_keys()[1].1.to_be_bytes())?;
wtr.flush()?;
// - bitmap
wtr.write_all(&bloom.bitmap())?;
wtr.flush()?;
Ok(())
}