rust-bloom-filter-generator/src/main.rs

64 lines
1.8 KiB
Rust

use bloomfilter::Bloom;
use clap::{arg, Parser};
use std::error::Error;
use std::fs::File;
use std::io::{BufRead, BufReader, Write};
use std::path::PathBuf;
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Opts {
#[arg(short, long, help = "Input file", default_value = "addresses.txt")]
input_file: PathBuf,
#[arg(short, long, help = "Output file", default_value = "bloom.dump")]
output_file: PathBuf,
#[arg(
long,
help = "Bloom filter: number of items",
default_value_t = 1_180_626_779
)]
num_items: usize,
#[arg(
long,
help = "Bloom filter: wanted rate of false positives",
default_value_t = 0.000_000_001
)]
fp_rate: f64,
}
fn main() -> Result<(), Box<dyn Error>> {
let opts: Opts = Opts::parse();
let mut bloom = Bloom::new_for_fp_rate(opts.num_items, opts.fp_rate);
let file = File::open(&opts.input_file)?;
// buffer capacity in bytes, up from default 8K bytes
// this only results in a very minor speedup, if any
BufReader::with_capacity(1_000_000, file)
.lines()
.map(|l| l.unwrap())
.for_each(|line| bloom.set(&line));
// prepare output file
let path = PathBuf::from(opts.output_file);
let mut outfile = File::create(path)?;
// write metadata
outfile.write_all(&bloom.number_of_bits().to_be_bytes())?;
outfile.write_all(&bloom.number_of_hash_functions().to_be_bytes())?;
outfile.write_all(&bloom.sip_keys()[0].0.to_be_bytes())?;
outfile.write_all(&bloom.sip_keys()[0].1.to_be_bytes())?;
outfile.write_all(&bloom.sip_keys()[1].0.to_be_bytes())?;
outfile.write_all(&bloom.sip_keys()[1].1.to_be_bytes())?;
// write bitmap
outfile.write_all(&bloom.bitmap())?;
outfile.flush()?;
Ok(())
}