Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions diskann-benchmark/example/multi-vector-3way.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
{
"search_directories": [],
"jobs": [
{
"type": "multi-vector-op",
"content": {
"element_type": "float32",
"isa": "reference",
"runs": [
{ "num_query_vectors": 32, "num_doc_vectors": 256, "dim": 16, "loops_per_measurement": 200, "num_measurements": 50 },
{ "num_query_vectors": 32, "num_doc_vectors": 256, "dim": 64, "loops_per_measurement": 100, "num_measurements": 50 },
{ "num_query_vectors": 32, "num_doc_vectors": 256, "dim": 128, "loops_per_measurement": 50, "num_measurements": 50 },
{ "num_query_vectors": 32, "num_doc_vectors": 256, "dim": 256, "loops_per_measurement": 25, "num_measurements": 50 },
{ "num_query_vectors": 32, "num_doc_vectors": 256, "dim": 512, "loops_per_measurement": 12, "num_measurements": 50 }
]
}
},
{
"type": "multi-vector-op",
"content": {
"element_type": "float32",
"isa": "x86-64-v3",
"runs": [
{ "num_query_vectors": 32, "num_doc_vectors": 256, "dim": 16, "loops_per_measurement": 200, "num_measurements": 50 },
{ "num_query_vectors": 32, "num_doc_vectors": 256, "dim": 64, "loops_per_measurement": 100, "num_measurements": 50 },
{ "num_query_vectors": 32, "num_doc_vectors": 256, "dim": 128, "loops_per_measurement": 50, "num_measurements": 50 },
{ "num_query_vectors": 32, "num_doc_vectors": 256, "dim": 256, "loops_per_measurement": 25, "num_measurements": 50 },
{ "num_query_vectors": 32, "num_doc_vectors": 256, "dim": 512, "loops_per_measurement": 12, "num_measurements": 50 }
]
}
},
{
"type": "multi-vector-op",
"content": {
"element_type": "float32",
"isa": "x86-64-v3-staged",
"runs": [
{ "num_query_vectors": 32, "num_doc_vectors": 256, "dim": 16, "loops_per_measurement": 200, "num_measurements": 50 },
{ "num_query_vectors": 32, "num_doc_vectors": 256, "dim": 64, "loops_per_measurement": 100, "num_measurements": 50 },
{ "num_query_vectors": 32, "num_doc_vectors": 256, "dim": 128, "loops_per_measurement": 50, "num_measurements": 50 },
{ "num_query_vectors": 32, "num_doc_vectors": 256, "dim": 256, "loops_per_measurement": 25, "num_measurements": 50 },
{ "num_query_vectors": 32, "num_doc_vectors": 256, "dim": 512, "loops_per_measurement": 12, "num_measurements": 50 }
]
}
}
]
}
20 changes: 20 additions & 0 deletions diskann-benchmark/example/multi-vector-quant.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"search_directories": [],
"jobs": [
{
"type": "multi-vector-quant-op",
"content": {
"runs": [
{ "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 500, "num_measurements": 50 },
{ "num_query_vectors": 16, "num_doc_vectors": 64, "dim": 256, "loops_per_measurement": 100, "num_measurements": 50 },
{ "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 20, "num_measurements": 50 },
{ "num_query_vectors": 32, "num_doc_vectors": 16, "dim": 256, "loops_per_measurement": 200, "num_measurements": 50 },
{ "num_query_vectors": 32, "num_doc_vectors": 1250, "dim": 128, "loops_per_measurement": 10, "num_measurements": 50 },
{ "num_query_vectors": 64, "num_doc_vectors": 1250, "dim": 512, "loops_per_measurement": 2, "num_measurements": 50 },
{ "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 200, "num_measurements": 50 },
{ "num_query_vectors": 32, "num_doc_vectors": 32, "dim": 512, "loops_per_measurement": 50, "num_measurements": 50 }
]
}
}
]
}
41 changes: 41 additions & 0 deletions diskann-benchmark/example/multi-vector-staged.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{
"search_directories": [],
"jobs": [
{
"type": "multi-vector-op",
"content": {
"element_type": "float32",
"isa": "x86-64-v3",
"runs": [
{ "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 500, "num_measurements": 50 },
{ "num_query_vectors": 16, "num_doc_vectors": 64, "dim": 256, "loops_per_measurement": 100, "num_measurements": 50 },
{ "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 20, "num_measurements": 50 },
{ "num_query_vectors": 32, "num_doc_vectors": 16, "dim": 256, "loops_per_measurement": 200, "num_measurements": 50 },
{ "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 264, "loops_per_measurement": 50, "num_measurements": 50 },
{ "num_query_vectors": 32, "num_doc_vectors": 1250, "dim": 128, "loops_per_measurement": 10, "num_measurements": 50 },
{ "num_query_vectors": 64, "num_doc_vectors": 1250, "dim": 512, "loops_per_measurement": 2, "num_measurements": 50 },
{ "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 200, "num_measurements": 50 },
{ "num_query_vectors": 32, "num_doc_vectors": 32, "dim": 512, "loops_per_measurement": 50, "num_measurements": 50 }
]
}
},
{
"type": "multi-vector-op",
"content": {
"element_type": "float32",
"isa": "x86-64-v3-staged",
"runs": [
{ "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 500, "num_measurements": 50 },
{ "num_query_vectors": 16, "num_doc_vectors": 64, "dim": 256, "loops_per_measurement": 100, "num_measurements": 50 },
{ "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 20, "num_measurements": 50 },
{ "num_query_vectors": 32, "num_doc_vectors": 16, "dim": 256, "loops_per_measurement": 200, "num_measurements": 50 },
{ "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 264, "loops_per_measurement": 50, "num_measurements": 50 },
{ "num_query_vectors": 32, "num_doc_vectors": 1250, "dim": 128, "loops_per_measurement": 10, "num_measurements": 50 },
{ "num_query_vectors": 64, "num_doc_vectors": 1250, "dim": 512, "loops_per_measurement": 2, "num_measurements": 50 },
{ "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 200, "num_measurements": 50 },
{ "num_query_vectors": 32, "num_doc_vectors": 32, "dim": 512, "loops_per_measurement": 50, "num_measurements": 50 }
]
}
}
]
}
82 changes: 82 additions & 0 deletions diskann-benchmark/src/inputs/multi_vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ pub(crate) enum BenchIsa {
#[serde(rename = "x86-64-v3")]
#[allow(non_camel_case_types)]
X86_64_V3,
#[serde(rename = "x86-64-v3-staged")]
#[allow(non_camel_case_types)]
X86_64_V3_Staged,
Neon,
Scalar,
Reference,
Expand All @@ -37,6 +40,7 @@ impl std::fmt::Display for BenchIsa {
let st = match self {
Self::X86_64_V4 => "x86-64-v4",
Self::X86_64_V3 => "x86-64-v3",
Self::X86_64_V3_Staged => "x86-64-v3-staged",
Self::Neon => "neon",
Self::Scalar => "scalar",
Self::Reference => "reference",
Expand All @@ -51,6 +55,7 @@ impl From<BenchIsa> for MaxSimIsa {
match b {
BenchIsa::X86_64_V4 => MaxSimIsa::X86_64_V4,
BenchIsa::X86_64_V3 => MaxSimIsa::X86_64_V3,
BenchIsa::X86_64_V3_Staged => MaxSimIsa::X86_64_V3_Staged,
BenchIsa::Neon => MaxSimIsa::Neon,
BenchIsa::Scalar => MaxSimIsa::Scalar,
BenchIsa::Reference => MaxSimIsa::Reference,
Expand Down Expand Up @@ -149,3 +154,80 @@ impl std::fmt::Display for MultiVectorOp {
Ok(())
}
}

///////////////////////////////
// Multi-Vector Quantized Op //
///////////////////////////////

/// A 4-bit MinMax **quantized** multi-vector MaxSim A/B benchmark job: the
/// experimental staged integer kernel vs the scalar `MinMaxKernel` reference,
/// at identical shapes and quantization.
///
/// The element type is implicitly f32 input → 4-bit MinMax codes, and the ISA is
/// fixed to V3/AVX2 (the only quantized staged kernel), so neither is a JSON
/// field. x86_64-only, like the kernel it drives.
#[cfg(all(feature = "multi-vector", target_arch = "x86_64"))]
#[derive(Debug, Serialize, Deserialize)]
pub(crate) struct MultiVectorQuantOp {
pub(crate) runs: Vec<Run>,
}

#[cfg(all(feature = "multi-vector", target_arch = "x86_64"))]
impl MultiVectorQuantOp {
pub(crate) const fn tag() -> &'static str {
"multi-vector-quant-op"
}
}

#[cfg(all(feature = "multi-vector", target_arch = "x86_64"))]
impl Input for MultiVectorQuantOp {
type Raw = Self;

fn tag() -> &'static str {
Self::tag()
}

fn from_raw(raw: Self::Raw, _checker: &mut Checker) -> anyhow::Result<Self> {
Ok(raw)
}

fn serialize(&self) -> anyhow::Result<serde_json::Value> {
Ok(serde_json::to_value(self)?)
}

fn example() -> Self {
const NUM_DOC_VECTORS: NonZeroUsize = NonZeroUsize::new(64).unwrap();
const DIM: NonZeroUsize = NonZeroUsize::new(128).unwrap();
const LOOPS_PER_MEASUREMENT: NonZeroUsize = NonZeroUsize::new(50).unwrap();
const NUM_MEASUREMENTS: NonZeroUsize = NonZeroUsize::new(20).unwrap();

let runs = vec![
Run {
num_query_vectors: NonZeroUsize::new(32).unwrap(),
num_doc_vectors: NUM_DOC_VECTORS,
dim: DIM,
loops_per_measurement: LOOPS_PER_MEASUREMENT,
num_measurements: NUM_MEASUREMENTS,
},
Run {
num_query_vectors: NonZeroUsize::new(64).unwrap(),
num_doc_vectors: NUM_DOC_VECTORS,
dim: DIM,
loops_per_measurement: LOOPS_PER_MEASUREMENT,
num_measurements: NUM_MEASUREMENTS,
},
];

Self { runs }
}
}

#[cfg(all(feature = "multi-vector", target_arch = "x86_64"))]
impl std::fmt::Display for MultiVectorQuantOp {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "Multi-Vector Quantized Operation (4-bit MinMax)\n")?;
write_field!(f, "tag", Self::tag())?;
write_field!(f, "number of runs", self.runs.len())?;
Ok(())
}
}
8 changes: 7 additions & 1 deletion diskann-benchmark/src/multi_vector/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,15 @@ cfg_if::cfg_if! {
if #[cfg(feature = "multi-vector")] {
mod driver;
mod kernels;
// The quantized A/B op drives the V3-only staged integer kernel.
#[cfg(target_arch = "x86_64")]
mod quant;

pub(super) fn register_benchmarks(registry: &mut Registry) -> anyhow::Result<()> {
kernels::register(registry)
kernels::register(registry)?;
#[cfg(target_arch = "x86_64")]
quant::register(registry)?;
Ok(())
}
} else {
crate::utils::stub_impl!("multi-vector", inputs::multi_vector::MultiVectorOp);
Expand Down
Loading
Loading