use bloomfilter::Bloom; use clap::{arg, Parser}; use std::error::Error; use std::fs::File; use std::io::{BufRead, BufReader, Write}; use std::path::PathBuf; #[derive(Parser, Debug)] #[command(author, version, about, long_about = None)] struct Opts { #[arg(short, long, help = "Input file", default_value = "addresses.txt")] input_file: PathBuf, #[arg(short, long, help = "Output file", default_value = "bloom.dump")] output_file: PathBuf, #[arg( long, help = "Bloom filter: number of items", default_value_t = 1_180_626_779 )] num_items: usize, #[arg( long, help = "Bloom filter: wanted rate of false positives", default_value_t = 0.000_000_001 )] fp_rate: f64, } fn main() -> Result<(), Box> { let opts: Opts = Opts::parse(); let mut bloom = Bloom::new_for_fp_rate(opts.num_items, opts.fp_rate); let file = File::open(&opts.input_file)?; // buffer capacity in bytes, up from default 8K bytes // this only results in a very minor speedup, if any BufReader::with_capacity(1_000_000, file) .lines() .map(|l| l.unwrap()) .for_each(|line| bloom.set(&line)); // prepare output file let path = PathBuf::from(opts.output_file); let mut outfile = File::create(path)?; // write metadata outfile.write_all(&bloom.number_of_bits().to_be_bytes())?; outfile.write_all(&bloom.number_of_hash_functions().to_be_bytes())?; outfile.write_all(&bloom.sip_keys()[0].0.to_be_bytes())?; outfile.write_all(&bloom.sip_keys()[0].1.to_be_bytes())?; outfile.write_all(&bloom.sip_keys()[1].0.to_be_bytes())?; outfile.write_all(&bloom.sip_keys()[1].1.to_be_bytes())?; // write bitmap outfile.write_all(&bloom.bitmap())?; outfile.flush()?; Ok(()) }