From af617807b9c040002feaec37a6916bd16d4a740f Mon Sep 17 00:00:00 2001 From: Christian Reitter Date: Sun, 12 Jan 2025 16:55:00 +0100 Subject: [PATCH] Release research utility for bloom filter usage, written primarily by Heiko Schaefer --- early_research_code/address_filter/Cargo.lock | 436 ++++++++++++++++++ early_research_code/address_filter/Cargo.toml | 12 + early_research_code/address_filter/README.md | 17 + .../address_filter/src/bloom.rs | 44 ++ .../address_filter/src/main.rs | 66 +++ early_research_code/hash_mnemonics/README.md | 4 + 6 files changed, 579 insertions(+) create mode 100644 early_research_code/address_filter/Cargo.lock create mode 100644 early_research_code/address_filter/Cargo.toml create mode 100644 early_research_code/address_filter/README.md create mode 100644 early_research_code/address_filter/src/bloom.rs create mode 100644 early_research_code/address_filter/src/main.rs diff --git a/early_research_code/address_filter/Cargo.lock b/early_research_code/address_filter/Cargo.lock new file mode 100644 index 0000000..6e6c3e0 --- /dev/null +++ b/early_research_code/address_filter/Cargo.lock @@ -0,0 +1,436 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "address_filter" +version = "0.1.0" +dependencies = [ + "bloomfilter", + "clap", + "csv", + "serde", + "serde_derive", +] + +[[package]] +name = "anstream" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + +[[package]] +name = "anstyle-parse" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "bit-vec" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2c54ff287cfc0a34f38a6b832ea1bd8e448a330b3e40a50859e6488bee07f22" + +[[package]] +name = "bloomfilter" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c541c70a910b485670304fd420f0eab8f7bde68439db6a8d98819c3d2774d7e2" +dependencies = [ + "bit-vec", + "getrandom", + "siphasher", +] + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "4.5.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8eb5e908ef3a6efbe1ed62520fb7287959888c88485abe072543190ecc66783" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96b01801b5fc6a0a232407abc821660c9c6d25a1cafc0d4f85f29fb8d9afc121" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54b755194d6389280185988721fffba69495eed5ee9feeee9a599b53db80318c" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" + +[[package]] +name = "colorchoice" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" + +[[package]] +name = "csv" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "wasi", + "wasm-bindgen", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "itoa" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" + +[[package]] +name = "js-sys" +version = "0.3.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "libc" +version = "0.2.169" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "proc-macro2" +version = "1.0.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "serde" +version = "1.0.217" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.217" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "siphasher" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.96" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396" +dependencies = [ + "cfg-if", + "once_cell", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6" + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/early_research_code/address_filter/Cargo.toml b/early_research_code/address_filter/Cargo.toml new file mode 100644 index 0000000..f9fba59 --- /dev/null +++ b/early_research_code/address_filter/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "address_filter" +version = "0.1.0" +edition = "2021" +publish = false + +[dependencies] +serde = "1.0" +serde_derive = "1.0" +csv = "1.3.0" +clap = { version = "4.5.20", features = ["derive"] } +bloomfilter = "1.0.14" \ No newline at end of file diff --git a/early_research_code/address_filter/README.md b/early_research_code/address_filter/README.md new file mode 100644 index 0000000..66fba42 --- /dev/null +++ b/early_research_code/address_filter/README.md @@ -0,0 +1,17 @@ +# Address Checks using a Bloom Filter + +This tool checks addresses against a bloom filter. +We used it as part of our early research while experimenting with address generation, bloom filters, +and sourcing lists of known addresses. + +## Usage + +See the application `--help` output. + +## License + +Licensed under either of `Apache License, Version 2.0` or `MIT` license at your option. + +## Credits + +Written by Heiko Schaefer, with some improvements by Christian Reitter. \ No newline at end of file diff --git a/early_research_code/address_filter/src/bloom.rs b/early_research_code/address_filter/src/bloom.rs new file mode 100644 index 0000000..9b6996d --- /dev/null +++ b/early_research_code/address_filter/src/bloom.rs @@ -0,0 +1,44 @@ +use bloomfilter::Bloom; +use std::error::Error; +use std::fs::File; +use std::io::{BufReader, Read}; +use std::path::Path; + +pub fn load(file: &Path) -> Result, Box> { + let file = File::open(file)?; + let length = file.metadata().unwrap().len(); + + let mut buf = BufReader::new(file); + + let mut num_bits: [u8; 8] = [0; 8]; + buf.read_exact(&mut num_bits)?; + + let mut num_hash_fun: [u8; 4] = [0; 4]; + buf.read_exact(&mut num_hash_fun)?; + + let mut sk00: [u8; 8] = [0; 8]; + buf.read_exact(&mut sk00)?; + let mut sk01: [u8; 8] = [0; 8]; + buf.read_exact(&mut sk01)?; + let mut sk10: [u8; 8] = [0; 8]; + buf.read_exact(&mut sk10)?; + let mut sk11: [u8; 8] = [0; 8]; + buf.read_exact(&mut sk11)?; + + let number_of_bits: u64 = u64::from_be_bytes(num_bits); + let number_of_hash_functions: u32 = u32::from_be_bytes(num_hash_fun); + let sip_keys: [(u64, u64); 2] = [ + (u64::from_be_bytes(sk00), (u64::from_be_bytes(sk01))), + (u64::from_be_bytes(sk10), (u64::from_be_bytes(sk11))), + ]; + + let mut bitmap = vec![0; (length - 8 - 4 - 32) as usize]; + buf.read_exact(&mut bitmap)?; + + Ok(Bloom::from_existing( + &bitmap, + number_of_bits, + number_of_hash_functions, + sip_keys, + )) +} diff --git a/early_research_code/address_filter/src/main.rs b/early_research_code/address_filter/src/main.rs new file mode 100644 index 0000000..cf7c5ac --- /dev/null +++ b/early_research_code/address_filter/src/main.rs @@ -0,0 +1,66 @@ +use clap::{arg, Parser}; +use serde_derive::Deserialize; +use std::path::PathBuf; + +mod bloom; + +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +struct Opts { + /// Read CSV file, see record type for format details + #[arg(short, long, help = "Input file")] + input_file: PathBuf, + + /// Output CSV file with matches, mirroring the input format + #[arg(short, long, help = "Output file")] + output_file: PathBuf, + + /// bloom filter to check the address strings against + #[arg(short, long, help = "Bloom filter data file")] + bloom_file: PathBuf, +} + +#[derive(Debug, Deserialize)] +struct AddressRecord { + index: u32, + address: String, +} + +fn main() { + let opts: Opts = Opts::parse(); + + println!("Loading bloom filter dump ..."); + let bloom = bloom::load(&opts.bloom_file).expect("Failed to load bloom filter"); + println!("... done."); + + let rdr = csv::ReaderBuilder::new() + .has_headers(false) + .from_path(&opts.input_file) + .expect("Failed to open input_file"); + + let mut wtr = csv::Writer::from_path(&opts.output_file).expect("Failed to open output_file"); + + let mut count: usize = 0; + + rdr.into_deserialize() + .for_each(|result: Result| { + if count & 0b1_1111_1111_1111_1111_1111 == 0 { + println!("Processed {} lines", count); + } + count += 1; + + match result { + Ok(record) => { + if bloom.check(&record.address) { + wtr.write_record(&[record.index.to_string(), record.address]) + .unwrap(); + // avoid problem of delayed output to disk + wtr.flush().expect("flush failed"); + } + } + Err(err) => println!("Error reading CSV record: {}", err), + } + }); + + wtr.flush().expect("flush failed"); +} diff --git a/early_research_code/hash_mnemonics/README.md b/early_research_code/hash_mnemonics/README.md index d24fff8..f524ad5 100644 --- a/early_research_code/hash_mnemonics/README.md +++ b/early_research_code/hash_mnemonics/README.md @@ -3,6 +3,10 @@ This tool generates SHA256 hashes over some parts of a CSV file. We used it as part of the data generation for our public BIP39 mnemonic lookup service. +## Usage + +See the application `--help` output. + ## License Licensed under either of `Apache License, Version 2.0` or `MIT` license at your option.