Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions Makefile.toml
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,10 @@ args = [
# | baseline-live-docker-clean | command | |
# | baseline-production-synthetic | command | |
# | baseline-production-private | command | |
# | baseline-production-private-addendum | command | |
# | baseline-backfill-10k-docker | command | |
# | baseline-backfill-100k-docker | command | |
# | baseline-soak-docker | command | |

[tasks.baseline-live-docker]
workspace = false
Expand Down Expand Up @@ -354,6 +358,38 @@ args = [
"set -euo pipefail; manifest=\"$(printenv ELF_BASELINE_PRODUCTION_CORPUS_MANIFEST || true)\"; if [ -z \"$manifest\" ]; then echo \"ELF_BASELINE_PRODUCTION_CORPUS_MANIFEST is required for baseline-production-private\" >&2; exit 1; fi; head=\"$(git rev-parse HEAD)\"; if [ -n \"$(git status --porcelain)\" ]; then head=\"$head+dirty\"; fi; selected_projects=\"$(printenv ELF_BASELINE_PROJECTS || true)\"; if [ -z \"$selected_projects\" ]; then selected_projects=\"ELF\"; fi; export ELF_BASELINE_ELF_HEAD=\"$head\"; export ELF_BASELINE_PROJECTS=\"$selected_projects\"; export ELF_BASELINE_PROFILE=production-private; docker compose -f docker-compose.baseline.yml run --build --rm baseline-runner",
]

[tasks.baseline-production-private-addendum]
workspace = false
command = "bash"
args = [
"-lc",
"set -euo pipefail; manifest=\"$(printenv ELF_BASELINE_PRODUCTION_CORPUS_MANIFEST || true)\"; if [ -z \"$manifest\" ]; then echo \"ELF_BASELINE_PRODUCTION_CORPUS_MANIFEST is required for baseline-production-private-addendum\" >&2; exit 1; fi; head=\"$(git rev-parse HEAD)\"; if [ -n \"$(git status --porcelain)\" ]; then head=\"$head+dirty\"; fi; selected_projects=\"$(printenv ELF_BASELINE_PROJECTS || true)\"; if [ -z \"$selected_projects\" ]; then selected_projects=\"ELF\"; fi; addendum=\"$(printenv ELF_BASELINE_PRIVATE_ADDENDUM || true)\"; if [ -z \"$addendum\" ]; then addendum=\"tmp/live-baseline/private-production-addendum.md\"; fi; export ELF_BASELINE_ELF_HEAD=\"$head\"; export ELF_BASELINE_PROJECTS=\"$selected_projects\"; export ELF_BASELINE_PROFILE=production-private; docker compose -f docker-compose.baseline.yml run --build --rm baseline-runner; ELF_BASELINE_MARKDOWN_REPORT=\"$addendum\" cargo make baseline-live-report; echo \"Private production addendum: $addendum\"",
]

[tasks.baseline-backfill-10k-docker]
workspace = false
command = "bash"
args = [
"-lc",
"set -euo pipefail; head=\"$(git rev-parse HEAD)\"; if [ -n \"$(git status --porcelain)\" ]; then head=\"$head+dirty\"; fi; backfill_docs=\"$(printenv ELF_BASELINE_BACKFILL_DOCS || true)\"; if [ -z \"$backfill_docs\" ]; then backfill_docs=\"10000\"; fi; elf_timeout=\"$(printenv ELF_BASELINE_ELF_TIMEOUT_SECONDS || true)\"; if [ -z \"$elf_timeout\" ]; then elf_timeout=\"14400\"; fi; max_elf_seconds=\"$(printenv ELF_BASELINE_MAX_ELF_SECONDS || true)\"; if [ -z \"$max_elf_seconds\" ]; then max_elf_seconds=\"$elf_timeout\"; fi; export ELF_BASELINE_ELF_HEAD=\"$head\"; export ELF_BASELINE_PROJECTS=ELF; export ELF_BASELINE_PROFILE=backfill; export ELF_BASELINE_BACKFILL_DOCS=\"$backfill_docs\"; export ELF_BASELINE_ELF_TIMEOUT_SECONDS=\"$elf_timeout\"; export ELF_BASELINE_MAX_ELF_SECONDS=\"$max_elf_seconds\"; docker compose -f docker-compose.baseline.yml run --build --rm baseline-runner",
]

[tasks.baseline-backfill-100k-docker]
workspace = false
command = "bash"
args = [
"-lc",
"set -euo pipefail; enabled=\"$(printenv ELF_BASELINE_ENABLE_EXPENSIVE || true)\"; if [ \"$enabled\" != \"1\" ]; then echo \"ELF_BASELINE_ENABLE_EXPENSIVE=1 is required for baseline-backfill-100k-docker\" >&2; exit 1; fi; head=\"$(git rev-parse HEAD)\"; if [ -n \"$(git status --porcelain)\" ]; then head=\"$head+dirty\"; fi; backfill_docs=\"$(printenv ELF_BASELINE_BACKFILL_DOCS || true)\"; if [ -z \"$backfill_docs\" ]; then backfill_docs=\"100000\"; fi; elf_timeout=\"$(printenv ELF_BASELINE_ELF_TIMEOUT_SECONDS || true)\"; if [ -z \"$elf_timeout\" ]; then elf_timeout=\"86400\"; fi; max_elf_seconds=\"$(printenv ELF_BASELINE_MAX_ELF_SECONDS || true)\"; if [ -z \"$max_elf_seconds\" ]; then max_elf_seconds=\"$elf_timeout\"; fi; export ELF_BASELINE_ELF_HEAD=\"$head\"; export ELF_BASELINE_PROJECTS=ELF; export ELF_BASELINE_PROFILE=backfill; export ELF_BASELINE_BACKFILL_DOCS=\"$backfill_docs\"; export ELF_BASELINE_ELF_TIMEOUT_SECONDS=\"$elf_timeout\"; export ELF_BASELINE_MAX_ELF_SECONDS=\"$max_elf_seconds\"; docker compose -f docker-compose.baseline.yml run --build --rm baseline-runner",
]

[tasks.baseline-soak-docker]
workspace = false
command = "bash"
args = [
"-lc",
"set -euo pipefail; head=\"$(git rev-parse HEAD)\"; if [ -n \"$(git status --porcelain)\" ]; then head=\"$head+dirty\"; fi; soak_seconds=\"$(printenv ELF_BASELINE_SOAK_SECONDS || true)\"; if [ -z \"$soak_seconds\" ]; then soak_seconds=\"3600\"; fi; elf_timeout=\"$(printenv ELF_BASELINE_ELF_TIMEOUT_SECONDS || true)\"; if [ -z \"$elf_timeout\" ]; then elf_timeout=\"$((soak_seconds + 1800))\"; fi; max_elf_seconds=\"$(printenv ELF_BASELINE_MAX_ELF_SECONDS || true)\"; if [ -z \"$max_elf_seconds\" ]; then max_elf_seconds=\"$elf_timeout\"; fi; export ELF_BASELINE_ELF_HEAD=\"$head\"; export ELF_BASELINE_PROJECTS=ELF; export ELF_BASELINE_PROFILE=stress; export ELF_BASELINE_SOAK_SECONDS=\"$soak_seconds\"; export ELF_BASELINE_ELF_TIMEOUT_SECONDS=\"$elf_timeout\"; export ELF_BASELINE_MAX_ELF_SECONDS=\"$max_elf_seconds\"; docker compose -f docker-compose.baseline.yml run --build --rm baseline-runner",
]


# Real-world job benchmark smoke
# | task | type | cwd |
Expand Down
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,12 @@ with the production embedding provider path, `Qwen3-Embedding-8B`, and
those states are reported as limitations, not hidden as proof.
- The benchmark runner and report publisher are checked in and Docker-isolated:
`cargo make baseline-live-docker`, `cargo make baseline-backfill-docker`,
`cargo make baseline-live-report`, and `cargo make baseline-live-docker-clean`.
`cargo make baseline-production-private-addendum`,
`cargo make baseline-backfill-10k-docker`,
`cargo make baseline-backfill-100k-docker`,
`cargo make baseline-soak-docker`, `cargo make baseline-live-report`, and
`cargo make baseline-live-docker-clean`. Expensive 100k and long-soak profiles are
opt-in and do not run in normal checks.

Detailed evidence and interpretation:

Expand Down
Loading