Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ members = [
"diskann-benchmark",
"diskann-tools",
"vectorset",
"diskann-bftree",
"diskann-bftree", "diskann-inmem",
]

default-members = [
Expand Down Expand Up @@ -59,6 +59,7 @@ diskann-platform = { path = "diskann-platform", version = "0.54.0" }
diskann = { path = "diskann", version = "0.54.0" }
# Providers
diskann-providers = { path = "diskann-providers", default-features = false, version = "0.54.0" }
diskann-inmem = { path = "diskann-inmem", default-features = false, version = "0.54.0" }
diskann-disk = { path = "diskann-disk", version = "0.54.0" }
diskann-label-filter = { path = "diskann-label-filter", version = "0.54.0" }
# Infra
Expand Down Expand Up @@ -118,3 +119,7 @@ opt-level = 1
debug = true
debug-assertions = true
overflow-checks = true

[profile.samply]
inherits = "release"
debug = true
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use super::{parsing, validate};
///
/// If using this struct as a [`streaming::Executor`], consider using the
/// [`super::WithData`] adaptor to provide dataset and query matrices.
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct RunBook {
// The individual runbook stages.
stages: Vec<Stage>,
Expand Down
6 changes: 3 additions & 3 deletions diskann-benchmark-runner/src/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ impl App {
writeln!(output)?;
} else {
writeln!(output)?;
write!(output, "{}", Indent::new(&description, 8))?;
writeln!(output, "{}", Indent::new(&description, 8))?;
}
}
}
Expand Down Expand Up @@ -258,8 +258,8 @@ impl App {
)?;
writeln!(output, "Closest matches:\n")?;
for (i, mismatch) in mismatches.into_iter().enumerate() {
writeln!(output, " {}. \"{}\":", i + 1, mismatch.method(),)?;
writeln!(output, "{}", Indent::new(mismatch.reason(), 8),)?;
writeln!(output, " {}. \"{}\":", i + 1, mismatch.method())?;
writeln!(output, "{}\n", Indent::new(mismatch.reason(), 8))?;
}
writeln!(output)?;

Expand Down
30 changes: 30 additions & 0 deletions diskann-benchmark-runner/src/checker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,36 @@ impl Checker {
self.search_directories(),
)))
}

pub fn __check_dir(&self, dir: &Path) -> Result<PathBuf, anyhow::Error> {
// Check if the file exists (allowing for relative paths with respect to the current
// directory.
//
// If the path is an absolute path and the file does not exist, then bail.
if dir.is_absolute() {
if dir.is_dir() {
return Ok(dir.into());
} else {
return Err(anyhow::Error::msg(format!(
"input file with absolute path \"{}\" either does not exist or is not a file",
dir.display()
)));
}
};

// At this point, start searching in the provided directories.
for d in self.search_directories() {
let absolute = d.join(dir);
if absolute.is_dir() {
return Ok(absolute);
}
}
Err(anyhow::Error::msg(format!(
"could not find input file \"{}\" in the search directories \"{:?}\"",
dir.display(),
self.search_directories(),
)))
}
}

///////////
Expand Down
6 changes: 6 additions & 0 deletions diskann-benchmark-runner/src/files.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ impl std::ops::Deref for InputFile {
}
}

impl std::fmt::Display for InputFile {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.display())
}
}

///////////
// Tests //
///////////
Expand Down
153 changes: 146 additions & 7 deletions diskann-benchmark-runner/src/utils/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ impl std::fmt::Display for Banner<'_> {
/// use diskann_benchmark_runner::utils::fmt::Indent;
///
/// let indented = Indent::new("hello\nworld", 4).to_string();
/// assert_eq!(indented, " hello\n world\n");
/// assert_eq!(indented, " hello\n world");
/// ```
#[derive(Debug, Clone, Copy)]
pub struct Indent<'a> {
Expand All @@ -221,9 +221,15 @@ impl<'a> Indent<'a> {
impl std::fmt::Display for Indent<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let spaces = self.spaces;
self.string
.lines()
.try_for_each(|ln| writeln!(f, "{: >spaces$}{}", "", ln))
let mut first = true;
for ln in self.string.lines() {
if !first {
writeln!(f)?;
}
write!(f, "{: >spaces$}{}", "", ln)?;
first = false;
}
Ok(())
}
}

Expand Down Expand Up @@ -369,6 +375,139 @@ where
}
}

//////////////
// KeyValue //
//////////////

enum MaybeLazy<'a> {
Lazy(&'a dyn std::fmt::Display),
Eager(String),
}

impl std::fmt::Display for MaybeLazy<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Lazy(lazy) => write!(f, "{}", lazy),
Self::Eager(s) => f.write_str(s),
}
}
}

impl std::fmt::Debug for MaybeLazy<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
struct AsDisplay<'a>(&'a dyn std::fmt::Display);
impl std::fmt::Debug for AsDisplay<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}

match self {
Self::Lazy(o) => {
let as_display = AsDisplay(o);
f.debug_tuple("MaybeLazy::Lazy").field(&as_display).finish()
}
Self::Eager(s) => f.debug_tuple("MaybeLazy::Eager").field(s).finish(),
}
}
}

/// Display a dynamic list of key-value pairs in a YAML-like style.
///
/// Keys are left-aligned and single-line values are aligned into a common column
/// just past the longest key. A value that renders to multiple lines (for example
/// a nested [`KeyValue`] or any other multi-line block) is placed on the lines
/// following its key, indented by two spaces. This keeps nested structures visibly
/// subordinate to their key regardless of whether the value is itself a key-value
/// list or an opaque block.
///
/// # Examples
///
/// ```
/// use diskann_benchmark_runner::utils::fmt::KeyValue;
///
/// let mut kv = KeyValue::new();
/// kv.push("a", &1);
/// kv.push("hello", &"world");
///
/// let expected = "a: 1\nhello: world";
///
/// assert_eq!(kv.to_string(), expected);
/// ```
///
/// Multi-line values are indented beneath their key:
///
/// ```
/// use diskann_benchmark_runner::utils::fmt::KeyValue;
///
/// let mut inner = KeyValue::new();
/// inner.push("x", &1);
/// inner.push("yy", &2);
/// let inner = inner.to_string();
///
/// let mut kv = KeyValue::new();
/// kv.push("name", &"example");
/// kv.push("nested", &inner);
///
/// let expected = "name: example\nnested:\n x: 1\n yy: 2";
///
/// assert_eq!(kv.to_string(), expected);
/// ```
#[derive(Debug, Default)]
pub struct KeyValue<'a> {
kv: Vec<(&'a str, MaybeLazy<'a>)>,
max_key_length: usize,
}

impl<'a> KeyValue<'a> {
/// Create a new empty [`KeyValue`] formatter.
pub fn new() -> Self {
Self {
kv: Vec::new(),
max_key_length: 0,
}
}

/// Push the key-value pair to `self` for formatting.
pub fn push(&mut self, key: &'a str, value: &'a dyn std::fmt::Display) {
self.max_key_length = self.max_key_length.max(key.len());
self.kv.push((key, MaybeLazy::Lazy(value)))
}

/// Push the key-value pair to `self` for formatting - eagerly formatting `value`.
pub fn push_eager<D>(&mut self, key: &'a str, value: D)
where
D: std::fmt::Display,
{
self.max_key_length = self.max_key_length.max(key.len());
self.kv.push((key, MaybeLazy::Eager(value.to_string())))
}

pub fn render(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self)
}
}

impl std::fmt::Display for KeyValue<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let width = self.max_key_length;
let mut prefix = "";
for (k, v) in self.kv.iter() {
let rendered = v.to_string();
if rendered.contains('\n') {
write!(f, "{}{}:\n{}", prefix, k, Indent::new(&rendered, 2))?
} else {
// Left-align the key and pad so that all single-line values line up in a
// column one space past the longest key's colon.
let pad = (width + 1).saturating_sub(k.len());
write!(f, "{}{}:{:pad$}{rendered}", prefix, k, "")?;
}
prefix = "\n";
}
Ok(())
}
}

///////////
// Tests //
///////////
Expand Down Expand Up @@ -511,19 +650,19 @@ string, , string
#[test]
fn test_indent_single_line() {
let s = Indent::new("hello", 4).to_string();
assert_eq!(s, " hello\n");
assert_eq!(s, " hello");
}

#[test]
fn test_indent_multi_line() {
let s = Indent::new("hello\nworld\nfoo", 2).to_string();
assert_eq!(s, " hello\n world\n foo\n");
assert_eq!(s, " hello\n world\n foo");
}

#[test]
fn test_indent_zero_spaces() {
let s = Indent::new("hello\nworld", 0).to_string();
assert_eq!(s, "hello\nworld\n");
assert_eq!(s, "hello\nworld");
}

#[test]
Expand Down
1 change: 1 addition & 0 deletions diskann-benchmark/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ opentelemetry_sdk = { workspace = true, optional = true }
scopeguard = { version = "1.2", optional = true }
diskann-benchmark-core = { workspace = true, features = ["bigann"] }
itertools.workspace = true
diskann-inmem = { workspace = true }

[lints]
clippy.undocumented_unsafe_blocks = "warn"
Expand Down
Loading
Loading