64 lines
1.8 KiB
Rust
64 lines
1.8 KiB
Rust
use bloomfilter::Bloom;
|
|
use clap::{arg, Parser};
|
|
use std::error::Error;
|
|
use std::fs::File;
|
|
use std::io::{BufRead, BufReader, Write};
|
|
use std::path::PathBuf;
|
|
|
|
#[derive(Parser, Debug)]
|
|
#[command(author, version, about, long_about = None)]
|
|
struct Opts {
|
|
#[arg(short, long, help = "Input file", default_value = "addresses.txt")]
|
|
input_file: PathBuf,
|
|
|
|
#[arg(short, long, help = "Output file", default_value = "bloom.dump")]
|
|
output_file: PathBuf,
|
|
|
|
#[arg(
|
|
long,
|
|
help = "Bloom filter: number of items",
|
|
default_value_t = 1_180_626_779
|
|
)]
|
|
num_items: usize,
|
|
|
|
#[arg(
|
|
long,
|
|
help = "Bloom filter: wanted rate of false positives",
|
|
default_value_t = 0.000_000_001
|
|
)]
|
|
fp_rate: f64,
|
|
}
|
|
|
|
fn main() -> Result<(), Box<dyn Error>> {
|
|
let opts: Opts = Opts::parse();
|
|
|
|
let mut bloom = Bloom::new_for_fp_rate(opts.num_items, opts.fp_rate);
|
|
|
|
let file = File::open(&opts.input_file)?;
|
|
// buffer capacity in bytes, up from default 8K bytes
|
|
// this only results in a very minor speedup, if any
|
|
BufReader::with_capacity(1_000_000, file)
|
|
.lines()
|
|
.map(|l| l.unwrap())
|
|
.for_each(|line| bloom.set(&line));
|
|
|
|
// prepare output file
|
|
let path = PathBuf::from(opts.output_file);
|
|
let mut outfile = File::create(path)?;
|
|
|
|
// write metadata
|
|
outfile.write_all(&bloom.number_of_bits().to_be_bytes())?;
|
|
outfile.write_all(&bloom.number_of_hash_functions().to_be_bytes())?;
|
|
outfile.write_all(&bloom.sip_keys()[0].0.to_be_bytes())?;
|
|
outfile.write_all(&bloom.sip_keys()[0].1.to_be_bytes())?;
|
|
outfile.write_all(&bloom.sip_keys()[1].0.to_be_bytes())?;
|
|
outfile.write_all(&bloom.sip_keys()[1].1.to_be_bytes())?;
|
|
|
|
// write bitmap
|
|
outfile.write_all(&bloom.bitmap())?;
|
|
|
|
outfile.flush()?;
|
|
|
|
Ok(())
|
|
}
|