diff --git a/README.md b/README.md index 6e15fda..679e8e5 100644 --- a/README.md +++ b/README.md @@ -154,7 +154,12 @@ Spec keys: `skip`, `step_by`, `take`, `size`, `rotate_left`, `byte_swap` (`none` | `--emit-unmatched` | Write unmatched candidate chunks to output | | `--no-structural` | Disable structural (region-layout) matching pass | | `--no-byte-swap` | Disable byte-swap heuristic variants | +| `--no-deinterleave` | Disable deinterleave heuristic variants | +| `--no-rotate` | Disable rotate heuristic variants | +| `--no-sliding` | Disable sliding-window heuristic variants | | `--no-expand` | Disable KPKA/PAK and LZMA/XZ archive expansion | +| `--scan-only` | Keep only heuristics that look for the fragment preserved byte-for-byte (exact-CRC + sliding-window + rotate) | +| `--exact-only` | Only test whether the whole candidate is itself the fragment (O(1) per candidate); good for renaming files against a DAT | | `-v, --verbose` | Verbose logging | ## Supported input formats diff --git a/src/main.rs b/src/main.rs index 71683a6..042d2a0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -56,11 +56,38 @@ struct Opt { #[arg(long)] no_byte_swap: bool, + /// Disable deinterleave heuristic variants; speeds up matching when you + /// know the candidate data is not interleaved + #[arg(long)] + no_deinterleave: bool, + /// Disable expansion of KPKA/PAK archives and embedded LZMA/XZ blocks /// within candidates #[arg(long)] no_expand: bool, + /// Disable rotate heuristic variants + #[arg(long)] + no_rotate: bool, + + /// Disable sliding-window heuristic variants + #[arg(long)] + no_sliding: bool, + + /// Scan-only: keep only the heuristics that look for the fragment + /// preserved byte-for-byte (exact-CRC + sliding-window + rotate). + /// Equivalent to --no-deinterleave --no-byte-swap --no-structural + /// --no-expand. + #[arg(long)] + scan_only: bool, + + /// Exact-only: only test whether the whole candidate is itself the + /// fragment (single O(1) CRC check per candidate). Useful for renaming + /// files against a DAT. Equivalent to --scan-only --no-rotate + /// --no-sliding. + #[arg(long)] + exact_only: bool, + /// Verbose logging (-v) #[arg(short, long)] verbose: bool, @@ -95,12 +122,24 @@ fn run_extract(opt: &Opt, spec: chisel::types::ExtractionSpec) -> anyhow::Result } fn main() -> anyhow::Result<()> { - let opt = Opt::parse(); + let mut opt = Opt::parse(); if opt.dat.is_some() && opt.spec.is_some() { anyhow::bail!("--dat and --spec are mutually exclusive"); } + if opt.exact_only { + opt.scan_only = true; + opt.no_rotate = true; + opt.no_sliding = true; + } + if opt.scan_only { + opt.no_deinterleave = true; + opt.no_byte_swap = true; + opt.no_structural = true; + opt.no_expand = true; + } + if let Some(spec) = opt.spec.clone() { return run_extract(&opt, spec); } @@ -138,6 +177,9 @@ fn main() -> anyhow::Result<()> { opt.game_subdirs, opt.verbose, opt.no_byte_swap, + opt.no_deinterleave, + opt.no_rotate, + opt.no_sliding, structural_records, opt.gex.is_some(), )?; @@ -167,6 +209,9 @@ fn main() -> anyhow::Result<()> { opt.game_subdirs, opt.verbose, opt.no_byte_swap, + opt.no_deinterleave, + opt.no_rotate, + opt.no_sliding, records, opt.gex.is_some(), )?; diff --git a/src/pipeline.rs b/src/pipeline.rs index b72472d..b8f1f56 100644 --- a/src/pipeline.rs +++ b/src/pipeline.rs @@ -49,6 +49,9 @@ pub fn run_pipeline( game_subdirs: bool, verbose: bool, no_byte_swap: bool, + no_deinterleave: bool, + no_rotate: bool, + no_sliding: bool, initial_records: Vec, collect_records: bool, ) -> anyhow::Result> { @@ -58,6 +61,15 @@ pub fn run_pipeline( cfg.byte_swaps .retain(|&bs| bs == chisel::types::ByteSwap::None); } + if no_deinterleave { + cfg.deinterleaves.clear(); + } + if no_rotate { + cfg.rotate = false; + } + if no_sliding { + cfg.sliding = false; + } cfg.heuristics() }; let mut pending = Pending::build(roms);