diff --git a/scripts/hooks/background-memory-update b/scripts/hooks/background-memory-update index 605d48f2..3f054596 100755 --- a/scripts/hooks/background-memory-update +++ b/scripts/hooks/background-memory-update @@ -45,7 +45,12 @@ source "$SCRIPT_DIR/get-mtime" || { echo "background-memory-update: failed to so log "Starting (CWD=$CWD)" # --- Resolve paths --- -DEVFLOW_DIR="$CWD/.devflow" +# Anchor .devflow/ to the project root (prevents a stray nested .devflow/ when the +# worker is spawned with a CWD inside .devflow/...). Empty → fall back to CWD. +source "$SCRIPT_DIR/resolve-project-root" 2>/dev/null || true +PROJECT_ROOT="$(df_resolve_root "$CWD" 2>/dev/null || true)" +[ -n "$PROJECT_ROOT" ] || PROJECT_ROOT="$CWD" +DEVFLOW_DIR="$PROJECT_ROOT/.devflow" MEMORY_DIR="$DEVFLOW_DIR/memory" DREAM_DIR="$DEVFLOW_DIR/dream" QUEUE_FILE="$MEMORY_DIR/.pending-turns.jsonl" diff --git a/scripts/hooks/dream-capture b/scripts/hooks/dream-capture index 17a6e22b..dc747ad7 100755 --- a/scripts/hooks/dream-capture +++ b/scripts/hooks/dream-capture @@ -57,7 +57,13 @@ dbg "CWD=$CWD" # Only log metadata (length, keys, presence checks). dbg "ASSISTANT_MSG length=${#ASSISTANT_MSG}" -DEVFLOW_DIR="$CWD/.devflow" +# Anchor .devflow/ to the project root (prevents a stray nested .devflow/ when this +# hook runs with a CWD inside .devflow/...). Empty → fall back to CWD (old behavior). +source "$SCRIPT_DIR/resolve-project-root" 2>/dev/null || true +PROJECT_ROOT="$(df_resolve_root "$CWD" 2>/dev/null || true)" +[ -n "$PROJECT_ROOT" ] || PROJECT_ROOT="$CWD" + +DEVFLOW_DIR="$PROJECT_ROOT/.devflow" MEMORY_DIR="$DEVFLOW_DIR/memory" DREAM_DIR="$DEVFLOW_DIR/dream" @@ -98,7 +104,7 @@ if [ -f "$SCANNER" ] && printf '%s' "$ASSISTANT_MSG" | grep -qE 'ADR-[0-9]+|PF-[ [ -f "$DEVFLOW_DIR/decisions/.disabled" ] && _DEC_ENABLED_CAPTURE="false" if [ "$_DEC_ENABLED_CAPTURE" = "true" ]; then dbg "Running decisions usage scanner" - printf '%s' "$ASSISTANT_MSG" | node "$SCANNER" --cwd "$CWD" 2>/dev/null || true + printf '%s' "$ASSISTANT_MSG" | node "$SCANNER" --cwd "$PROJECT_ROOT" 2>/dev/null || true fi fi diff --git a/scripts/hooks/dream-dispatch b/scripts/hooks/dream-dispatch index f61add5a..364e8265 100755 --- a/scripts/hooks/dream-dispatch +++ b/scripts/hooks/dream-dispatch @@ -33,7 +33,13 @@ if [ -z "$CWD" ] || [ ! -d "$CWD" ]; then dbg "EXIT: bad CWD"; exit 0; fi devflow_debug_set_cwd "$CWD" -DEVFLOW_DIR="$CWD/.devflow" +# Anchor .devflow/ to the project root (prevents a stray nested .devflow/ when this +# hook runs with a CWD inside .devflow/...). Empty → fall back to CWD (old behavior). +source "$SCRIPT_DIR/resolve-project-root" 2>/dev/null || true +PROJECT_ROOT="$(df_resolve_root "$CWD" 2>/dev/null || true)" +[ -n "$PROJECT_ROOT" ] || PROJECT_ROOT="$CWD" + +DEVFLOW_DIR="$PROJECT_ROOT/.devflow" MEMORY_DIR="$DEVFLOW_DIR/memory" DREAM_DIR="$DEVFLOW_DIR/dream" diff --git a/scripts/hooks/dream-evaluate b/scripts/hooks/dream-evaluate index 0cb90df4..0ff08300 100755 --- a/scripts/hooks/dream-evaluate +++ b/scripts/hooks/dream-evaluate @@ -33,7 +33,13 @@ if [ -z "$CWD" ] || [ ! -d "$CWD" ]; then dbg "EXIT: bad CWD"; exit 0; fi devflow_debug_set_cwd "$CWD" dbg "CWD=$CWD" -DEVFLOW_DIR="$CWD/.devflow" +# Anchor .devflow/ to the project root (prevents a stray nested .devflow/ when this +# hook runs with a CWD inside .devflow/...). Empty → fall back to CWD (old behavior). +source "$SCRIPT_DIR/resolve-project-root" 2>/dev/null || true +PROJECT_ROOT="$(df_resolve_root "$CWD" 2>/dev/null || true)" +[ -n "$PROJECT_ROOT" ] || PROJECT_ROOT="$CWD" + +DEVFLOW_DIR="$PROJECT_ROOT/.devflow" [ ! -d "$DEVFLOW_DIR" ] && exit 0 MEMORY_DIR="$DEVFLOW_DIR/memory" diff --git a/scripts/hooks/ensure-devflow-init b/scripts/hooks/ensure-devflow-init index 4dd2335a..158d2c5c 100755 --- a/scripts/hooks/ensure-devflow-init +++ b/scripts/hooks/ensure-devflow-init @@ -7,7 +7,15 @@ [ -z "$1" ] && return 1 -_DEVFLOW_DIR="$1/.devflow" +# Anchor to the project root so a CWD nested inside .devflow/ never scaffolds a +# stray nested .devflow/. Resolve our own dir via BASH_SOURCE (works whether sourced +# by a hook or directly by tests). Empty → fall back to "$1" (old behavior). +_EDI_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$_EDI_DIR/resolve-project-root" 2>/dev/null || true +_EDI_ROOT="$(df_resolve_root "$1" 2>/dev/null || true)" +[ -n "$_EDI_ROOT" ] || _EDI_ROOT="$1" + +_DEVFLOW_DIR="$_EDI_ROOT/.devflow" # Fast-path: if all subdirectories already exist, skip mkdir and gitignore setup if [ -d "$_DEVFLOW_DIR/memory" ] && [ -d "$_DEVFLOW_DIR/docs" ] && \ @@ -34,8 +42,6 @@ fi # One-time root .gitignore setup — delegated to the sibling ensure-root-gitignore # helper (single source of truth) so the always-on, memory-independent -# session-start-context hook applies the identical rule. Resolve our own directory -# via BASH_SOURCE so this works whether sourced by a hook (which sets SCRIPT_DIR) or -# sourced directly (tests source this file with no SCRIPT_DIR in scope). -_EDI_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -source "$_EDI_DIR/ensure-root-gitignore" "$1" +# session-start-context hook applies the identical rule. _EDI_DIR and _EDI_ROOT +# were resolved at the top of this script. +source "$_EDI_DIR/ensure-root-gitignore" "$_EDI_ROOT" diff --git a/scripts/hooks/pre-compact-memory b/scripts/hooks/pre-compact-memory index a936a20f..98b80d57 100644 --- a/scripts/hooks/pre-compact-memory +++ b/scripts/hooks/pre-compact-memory @@ -31,7 +31,13 @@ fi devflow_debug_set_cwd "$CWD" dbg "CWD=$CWD" -DEVFLOW_DIR="$CWD/.devflow" +# Anchor .devflow/ to the project root (prevents a stray nested .devflow/ when this +# hook runs with a CWD inside .devflow/...). Empty → fall back to CWD (old behavior). +source "$SCRIPT_DIR/resolve-project-root" 2>/dev/null || true +PROJECT_ROOT="$(df_resolve_root "$CWD" 2>/dev/null || true)" +[ -n "$PROJECT_ROOT" ] || PROJECT_ROOT="$CWD" + +DEVFLOW_DIR="$PROJECT_ROOT/.devflow" MEMORY_DIR="$DEVFLOW_DIR/memory" DREAM_DIR="$DEVFLOW_DIR/dream" diff --git a/scripts/hooks/resolve-project-root b/scripts/hooks/resolve-project-root new file mode 100644 index 00000000..4fac11a4 --- /dev/null +++ b/scripts/hooks/resolve-project-root @@ -0,0 +1,37 @@ +#!/bin/bash +# resolve-project-root — anchor .devflow/ resolution to the project root. +# +# Hooks compute their .devflow/ path from the session/worker CWD. When a worker +# is spawned with a CWD deep inside the project — e.g. inside +# .devflow/docs/<...>/tickets/ — computing "$CWD/.devflow" there scaffolds a +# STRAY nested .devflow/. This helper resolves any CWD back to the real project +# root, mirroring the TS CLI's getGitRoot() (src/cli/utils/git.ts) so the shell +# side anchors identically. +# +# Sourced by: the memory/dream/session hooks and ensure-devflow-init. +# Sourced helper: uses `return`-free pure function; _-prefixed locals (never +# clobbers caller vars). Safe under `set -e` (git failure is guarded with || true). +# +# Usage: +# source resolve-project-root +# PROJECT_ROOT="$(df_resolve_root "$CWD")" +# +# df_resolve_root prints the project root for : +# 1. git top-level — git walks up to the real repo root even from a +# .devflow/-nested subdir, so the nested case is fixed for git repos. +# 2. non-git fallback — strip from the first /.devflow/ onward (or a trailing +# /.devflow), else print unchanged. +df_resolve_root() { + local _cwd="$1" _root="" + # || true: under `set -e`, a failing command substitution would otherwise abort + # the caller (e.g. a non-git path). Empty output then routes to the fallback. + _root="$(git -C "$_cwd" rev-parse --show-toplevel 2>/dev/null || true)" + if [ -z "$_root" ]; then + case "$_cwd" in + */.devflow/*) _root="${_cwd%%/.devflow/*}" ;; + */.devflow) _root="${_cwd%/.devflow}" ;; + *) _root="$_cwd" ;; + esac + fi + printf '%s\n' "$_root" +} diff --git a/scripts/hooks/session-start-context b/scripts/hooks/session-start-context index 0e6eea25..55ae1af0 100755 --- a/scripts/hooks/session-start-context +++ b/scripts/hooks/session-start-context @@ -32,16 +32,22 @@ fi devflow_debug_set_cwd "$CWD" dbg "CWD=$CWD" +# Anchor .devflow/ to the project root (prevents a stray nested .devflow/ when this +# hook runs with a CWD inside .devflow/...). Empty → fall back to CWD (old behavior). +source "$SCRIPT_DIR/resolve-project-root" 2>/dev/null || true +PROJECT_ROOT="$(df_resolve_root "$CWD" 2>/dev/null || true)" +[ -n "$PROJECT_ROOT" ] || PROJECT_ROOT="$CWD" + # Ensure the project root .gitignore ignores .devflow/ wholesale. This runs on every # session regardless of feature toggles, so memory-off projects (decisions/knowledge # only) still get .devflow/ ignored — this is the memory-independent path that fixes # the gitignore/memory coupling (PF-014). Single source of truth: ensure-root-gitignore. # Soft-fail: a gitignore write must never block context injection. Marker keeps it O(1). -[ -d "$CWD" ] && [ -f "$SCRIPT_DIR/ensure-root-gitignore" ] && source "$SCRIPT_DIR/ensure-root-gitignore" "$CWD" || true +[ -d "$PROJECT_ROOT" ] && [ -f "$SCRIPT_DIR/ensure-root-gitignore" ] && source "$SCRIPT_DIR/ensure-root-gitignore" "$PROJECT_ROOT" || true CONTEXT="" -DEVFLOW_DIR="$CWD/.devflow" +DEVFLOW_DIR="$PROJECT_ROOT/.devflow" MEMORY_DIR="$DEVFLOW_DIR/memory" DREAM_DIR="$DEVFLOW_DIR/dream" DECISIONS_CONTENT_DIR="$DEVFLOW_DIR/decisions" diff --git a/scripts/hooks/session-start-memory b/scripts/hooks/session-start-memory index 36a4cede..cbf98967 100644 --- a/scripts/hooks/session-start-memory +++ b/scripts/hooks/session-start-memory @@ -31,7 +31,13 @@ fi devflow_debug_set_cwd "$CWD" dbg "CWD=$CWD" -DEVFLOW_DIR="$CWD/.devflow" +# Anchor .devflow/ to the project root (prevents a stray nested .devflow/ when this +# hook runs with a CWD inside .devflow/...). Empty → fall back to CWD (old behavior). +source "$SCRIPT_DIR/resolve-project-root" 2>/dev/null || true +PROJECT_ROOT="$(df_resolve_root "$CWD" 2>/dev/null || true)" +[ -n "$PROJECT_ROOT" ] || PROJECT_ROOT="$CWD" + +DEVFLOW_DIR="$PROJECT_ROOT/.devflow" MEMORY_DIR="$DEVFLOW_DIR/memory" DREAM_DIR="$DEVFLOW_DIR/dream" diff --git a/shared/agents/coder.md b/shared/agents/coder.md index 5a8cd4cd..90d72b24 100644 --- a/shared/agents/coder.md +++ b/shared/agents/coder.md @@ -89,6 +89,19 @@ When you apply a decision from `.devflow/decisions/decisions.md` or avoid a pitf 8. **Generate handoff** (if HANDOFF_REQUIRED=true): Include implementation summary for next Coder (see Output section). +## Long-running commands (self-verifying builds/tests that may run >120s) + +You run builds and tests to verify your own work — including **self-verifying that each fix compiles** when no separate Validator runs between review cycles. A plain `Bash` call defaults to a 120s timeout, and inside a dynamic Workflow a sub-agent that emits no output for 180s is KILLED ("agent stalled"). For any build/test that may run silent longer than ~120s (cold `cargo build`/`cargo test`, large `tsc`, `gradle`, `go build ./...`), do NOT run it as one silent foreground command. Instead: + +1. Run it in the BACKGROUND with the Bash tool (`run_in_background: true`), capturing output + exit code under a unique `` reused in step 2: + ` > /tmp/df-build-.log 2>&1; echo "EXIT=$?" > /tmp/df-build-.done` +2. Poll with the `Monitor` tool (load it via ToolSearch `select:Monitor` if it is not available): set `persistent: false`, `timeout_ms` above the expected run time (e.g. 600000), and + `command: until [ -f /tmp/df-build-.done ]; do echo building; sleep 25; done; echo DONE; cat /tmp/df-build-.done` + The 25s heartbeat (≪ 180s) is delivered as a notification that keeps you alive past the watchdog. +3. When the monitor reports `DONE`: the command PASSED iff the `.done` file contains `EXIT=0`. Read the `.log`, fix any failures, and only then proceed. + +For a foreground command that exceeds the 120s default but stays under 180s, pass an explicit higher `timeout` to the Bash tool (up to 600000ms). Prefer package-scoped commands (`cargo build -p `) during the engine; the full-workspace regression is the human's job after the wave. + ## Principles 1. **Work on feature branch** - All operations happen on the current feature branch diff --git a/shared/agents/tester.md b/shared/agents/tester.md index d8541137..5d2dd34a 100644 --- a/shared/agents/tester.md +++ b/shared/agents/tester.md @@ -71,6 +71,19 @@ For each scenario: If a previous run failed (PREVIOUS_FAILURES provided), prioritize re-testing those scenarios first. +## Long-running commands (test/build commands that may run >120s) + +A plain `Bash` call defaults to a 120s timeout, and inside a dynamic Workflow a sub-agent that emits no output for 180s is KILLED ("agent stalled"). For any scenario whose command may run silent longer than ~120s (a full `cargo test` / `go test ./...`, a build step, a slow integration suite), do NOT run it as one silent foreground command. Instead: + +1. Run it in the BACKGROUND with the Bash tool (`run_in_background: true`), capturing output + exit code under a unique `` reused in step 2: + ` > /tmp/df-test-.log 2>&1; echo "EXIT=$?" > /tmp/df-test-.done` +2. Poll with the `Monitor` tool (load it via ToolSearch `select:Monitor` if it is not available): set `persistent: false`, `timeout_ms` above the expected run time (e.g. 600000), and + `command: until [ -f /tmp/df-test-.done ]; do echo running; sleep 25; done; echo DONE; cat /tmp/df-test-.done` + The 25s heartbeat (≪ 180s) is delivered as a notification that keeps you alive past the watchdog. +3. When the monitor reports `DONE`: the scenario's command PASSED iff the `.done` file contains `EXIT=0`. Read the `.log` for evidence. + +For a foreground command that exceeds the 120s default but stays under 180s, pass an explicit higher `timeout` to the Bash tool (up to 600000ms). Prefer scoping to the changed package/path where possible. + ## Output Return structured QA report: diff --git a/shared/agents/validator.md b/shared/agents/validator.md index d60fa39e..be9520c3 100644 --- a/shared/agents/validator.md +++ b/shared/agents/validator.md @@ -38,6 +38,19 @@ Execute in this order, stopping on first failure: | 3 | Lint | `npm run lint`, `cargo clippy`, `make lint` | | 4 | Test | `npm test`, `cargo test`, `make test` | +## Long-running commands (builds/tests that may run >120s) + +A plain `Bash` call defaults to a 120s timeout, and inside a dynamic Workflow a sub-agent that emits no output for 180s is KILLED ("agent stalled"). For any build/test that may run silent longer than ~120s (cold `cargo build`/`cargo test`, large `tsc`, `gradle`, `go build ./...`), do NOT run it as one silent foreground command. Instead: + +1. Run it in the BACKGROUND, capturing output + exit code. With the Bash tool set `run_in_background: true` and pick a unique `` (reuse the same paths in step 2): + ` > /tmp/df-val-.log 2>&1; echo "EXIT=$?" > /tmp/df-val-.done` +2. Poll with the `Monitor` tool (load it via ToolSearch `select:Monitor` if it is not available): set `persistent: false`, `timeout_ms` above the expected run time (e.g. 600000), and + `command: until [ -f /tmp/df-val-.done ]; do echo running; sleep 25; done; echo DONE; cat /tmp/df-val-.done` + The 25s heartbeat (≪ 180s) is delivered as a notification that keeps you alive past the watchdog. +3. When the monitor reports `DONE`: the command PASSED iff the `.done` file contains `EXIT=0`. Read the `.log` for failure details to parse. + +For a foreground command that merely exceeds the 120s default but stays well under 180s, simply pass an explicit higher `timeout` to the Bash tool (up to 600000ms). Prefer package-scoped commands (`cargo build -p `, `cargo test -p `) when the project supports them. + ## Principles 1. **Report only** - Never fix code, never commit, never modify files diff --git a/shared/recipes/_engine.mds b/shared/recipes/_engine.mds index e62d30ed..75da7cf8 100644 --- a/shared/recipes/_engine.mds +++ b/shared/recipes/_engine.mds @@ -1,9 +1,10 @@ @define gate1_postcode(): -### GATE 1 — Post-code pipeline (fires after EVERY code mutation) +### GATE 1 — Post-code pipeline (runs exactly TWICE per ticket) ORDER IS LOAD-BEARING. Run exactly in this sequence: 1. **Validator** — build / typecheck / lint / test + - Build/test commands that may run silent for >~120s MUST follow `build_execution_doctrine()` (background Bash + Monitor poll), or they trip the 180s workflow watchdog. - FAIL → Coder fix (max 2 retries) → re-Validator - If still FAIL after 2 retries → escalate (do not loop endlessly) 2. **Simplifier** — reduce complexity, remove duplication @@ -12,9 +13,13 @@ ORDER IS LOAD-BEARING. Run exactly in this sequence: **Gate 1 contains NO Evaluator and NO Tester.** Those are Gate 2 only. -Depth scales to change size + budget: a trivial one-line fix warrants a lighter pass; a multi-file refactor warrants the full depth. But Gate 1 is **never skipped**, regardless of change size. +Depth scales to change size + budget: a trivial one-line fix warrants a lighter pass; a multi-file refactor warrants the full depth. -Gate 1 runs after: initial Coder implementation, every review-fix, every alignment-fix (Gate 2 demanded change), every QA-fix. It is the invariant that all written code must pass. +**Cadence — Gate 1 runs at exactly TWO points per ticket:** +1. **Gate 1 #1** — immediately after the initial Coder implementation (inside `implement_bundle()`). +2. **Gate 1 #2** — the FINAL gate, after ALL Gate-2 fixes AND the entire review loop have completed. + +It does NOT run between review cycles, nor after each individual Gate-2 / review / QA fix. At those points the fixing Coder self-verifies its OWN build compiles (see `review_loop()` and the Coder agent's "Long-running commands" discipline). The final Gate 1 #2 is the invariant that all written code passes before merge. @end @define gate2_acceptance(): @@ -106,7 +111,7 @@ Majority-survives: a finding needs >50% of verification lenses to confirm it. St If no surviving findings: break (early exit — do not run unnecessary cycles). -If survivors remain: Coder fixes them (batched per concurrency doctrine — see `concurrency_doctrine()`). Then → Gate 1 ONLY. Review-fixes do NOT get Gate 2 (no Evaluator, no Tester). +If survivors remain: Coder fixes them (batched per concurrency doctrine — see `concurrency_doctrine()`). The fixing Coder **self-verifies its own fix builds** (build/typecheck per the Coder's "Long-running commands" discipline). Do **NOT** run Gate 1 between cycles, and do NOT run Gate 2 (no Validator, no Simplifier, no Scrutinizer, no Evaluator, no Tester between cycles). The engine runs ONE final Gate 1 after the loop exits — see the `gate1_postcode()` cadence (Gate 1 #2). @end @define concurrency_doctrine(): @@ -125,6 +130,34 @@ If survivors remain: Coder fixes them (batched per concurrency doctrine — see This applies to both: multi-Coder work on a single ticket AND multi-ticket scheduling in a wave. @end +@define build_execution_doctrine(): +### Build execution doctrine — long-running commands (LOAD-BEARING) + +The Workflow runtime KILLS any sub-agent that emits no output for 180 seconds. A cold `cargo build`, `cargo test`, a large `tsc`, `gradle build`, `go build ./...`, etc. routinely runs silent far longer and trips this watchdog (the failure reads `agent stalled on all N attempts`). Plain foreground `Bash` also defaults to a 120s timeout. + +**RULE: any agent (Validator, Coder, Tester) running a build / test / compile / install that may run silent for more than ~120s MUST run it in the BACKGROUND and POLL — never as a single silent foreground command.** + +Mechanical procedure (spike-verified — a workflow sub-agent survived a 253s job this way): + +1. Choose ONE unique base path for this run and reuse it verbatim in steps 1–3, e.g. `BASE=/tmp/df-build-`. Launch the command with the Bash tool using `run_in_background: true`: + ``` + > .log 2>&1; echo "EXIT=$?" > .done + ``` + This returns immediately with a background task id — do NOT block on it. +2. Poll with the `Monitor` tool (load it first via ToolSearch `select:Monitor` if it is not already available). Arm ONE monitor that emits a heartbeat well under 180s AND exits when the job finishes: + - description: short, e.g. `await ` + - persistent: false + - timeout_ms: comfortably ABOVE the expected job time (e.g. 600000) + - command: `until [ -f .done ]; do echo building; sleep 25; done; echo BUILD_DONE; cat .done` + + The `building` heartbeat every 25s (≪ 180s) is delivered as a notification that re-invokes you, so the watchdog never sees a >180s gap. `BUILD_DONE` + the `EXIT=` line signal completion. +3. When the monitor reports `BUILD_DONE`: the job PASSES iff `.done` contains `EXIT=0`. Read `.log` for output/failure detail. + +**Scope commands to stay short.** During the engine, PREFER crate/package-scoped builds and tests — `cargo build -p `, `cargo test -p `, `npm test -- `, `go test ./pkg/...` — over the whole workspace. The full-workspace regression is the human's job after the wave (the wave already hands the integrated branch back to the user). Scoping keeps most commands under the watchdog window and under budget. + +**Invariants:** heartbeat interval MUST stay well under 180s (25–30s is the tested value); Monitor `timeout_ms` MUST exceed the expected job duration (a too-short timeout kills the poll, not the build). Never substitute a single silent long command for this procedure. +@end + @define engine_output_schema(): ### Engine output schema @@ -175,5 +208,6 @@ Each ticket engine run returns a structured result. The Synthesizer or the wave @export implement_bundle @export review_loop @export concurrency_doctrine +@export build_execution_doctrine @export engine_output_schema @export engine_invariants diff --git a/shared/recipes/_preamble.mds b/shared/recipes/_preamble.mds index 6261f9c6..a120e220 100644 --- a/shared/recipes/_preamble.mds +++ b/shared/recipes/_preamble.mds @@ -44,9 +44,21 @@ Valid `agentType` values: Coder, Validator, Simplifier, Scrutinizer, Evaluator, Do not write logic that depends on an agent enumerating its own skills. Skills are loaded (confirmed by spike F5) but agents do not reliably self-report them. +### Pre-flight self-check — MANDATORY before the first live `Workflow` call + +Two distinct LLM-authored-script bugs have crashed whole runs in the field: `TypeError: pipeline() expects an array as the first argument` and `undefined is not an object (evaluating 'SEAL.num')`. Before you invoke the `Workflow` tool, audit the script you just authored against this checklist — it targets exactly those crash classes: + +- **`meta` is a pure literal** — `name` and `description` present; NO variables, function calls, spreads, or template interpolation anywhere inside `meta`. +- **Every `pipeline(x, …)` / `parallel(x)` first argument is a real array** — never a function, object, or possibly-undefined value. If it derives from a prior agent result, coerce it: `parallel((maybeList || []).map(...))`. +- **No reference to a possibly-undefined field** — guard every cross-result field access with optional chaining (e.g. `result?.findings`, `seal?.num`). This is the `SEAL.num` crash class: an agent returned a shape without `num`, so `SEAL.num` threw. +- **`.filter(Boolean)` before mapping over `agent()` / `parallel()` results** — an agent can return `null` (skipped, or died after retries); strip nulls before `.map` or field access. +- **`phase()` titles match the phases declared in `meta`** — every `phase("…")` call has a matching declared phase, and vice-versa. + +Then run a cheap syntax gate: write the authored script to a scratch temp file (e.g. `/tmp/df-wf-check.js`) purely to validate it, run `node --check /tmp/df-wf-check.js`, and only then pass the script text to the `Workflow` tool as usual. `node --check` catches SYNTAX errors only — it does NOT catch the runtime type errors above, so the checklist is the real safeguard. + ### --dry-run affordance -If the user's input includes `--dry-run`, PRINT the authored workflow script (inside a fenced code block) for inspection, then STOP — do not invoke the Workflow tool. This lets the user review the script before execution. +If the user's input includes `--dry-run`, PRINT the authored workflow script (inside a fenced code block) for inspection, then STOP — do not invoke the Workflow tool. This lets the user review the script before execution. For a large or novel script, prefer this dry-run path by default — eyeball the script once before the first live run. ### Budget scaling diff --git a/shared/recipes/_wave.mds b/shared/recipes/_wave.mds index 1f72b15b..00fbdcab 100644 --- a/shared/recipes/_wave.mds +++ b/shared/recipes/_wave.mds @@ -30,7 +30,7 @@ After the round's merges, spawn the reader agent again with updated issue states **Termination conditions (checked each round):** - All tickets processed: done, write final report -- Nothing ready but tickets remain (circular or all-blocked): end with escalation report +- Nothing ready but tickets remain (circular or all-blocked): end with an escalation report that, for EACH remaining ticket, names the specific unmet dependency or unresolved decision blocking it - MAX_ROUNDS exceeded: end with partial-progress report (safeguard — never infinite) MAX_ROUNDS = LLM judgment based on ticket count (heuristic: ticket_count * 2 + 5, minimum 10). Always finite. @@ -87,9 +87,9 @@ A workflow cannot pause mid-run (F4). "Escalate" means: quarantine-and-continue **Escalation procedure:** 1. Quarantine the affected ticket (do not merge its branch) 2. Continue with all independent remaining tickets (escalation does not block siblings) -3. Add to the escalations list in the final report with: ticket ID, escalation type, context needed for resolution +3. Add to the escalations list in the final report. For EACH quarantined/blocked ticket, state explicitly: ticket ID; escalation type; the precise REASON it is blocked — name the specific failed dependency ticket, the failing gate, or the exact unresolved decision (never a bare "blocked"); the context needed to resolve it; and the resume handle (the workflow `runId` / journal path) so the user can re-run from where it stopped. -**The user's action on the report:** review escalations, resolve the conflicts / answer the questions, then re-run (resume via runId/journal if available — partial progress is preserved). +**The user's action on the report:** review escalations, resolve the conflicts / answer the questions, then re-run (resume via the cited runId/journal if available — partial progress is preserved). **No silent skips.** Every quarantined ticket appears in the report. The run is only "done" when the report says it is — not when the wave loop ends. @end diff --git a/shared/recipes/dynamic-build.mds b/shared/recipes/dynamic-build.mds index 50fb3764..517a0fda 100644 --- a/shared/recipes/dynamic-build.mds +++ b/shared/recipes/dynamic-build.mds @@ -4,7 +4,7 @@ argument-hint: "[ticket | issue-url | plan-doc | --dry-run]" --- @import { authoring_preamble } from "./_preamble.mds" @import { agent_roster, agent_caveats } from "./_roster.mds" -@import { gate1_postcode, gate2_acceptance, evaluator_panel, implement_bundle, review_loop, concurrency_doctrine, engine_output_schema, engine_invariants } from "./_engine.mds" +@import { gate1_postcode, gate2_acceptance, evaluator_panel, implement_bundle, review_loop, concurrency_doctrine, build_execution_doctrine, engine_output_schema, engine_invariants } from "./_engine.mds" @import { wave_loop, branch_merge_model, merge_doctrine, escalation_model } from "./_wave.mds" @import { acceptance_criteria_contract } from "./_plan_contract.mds" @@ -86,7 +86,7 @@ Author a workflow script shaped like: export const meta = { name: "devflow-dynamic-build", description: "Single-ticket build: implement → Gate 1 → Gate 2 → review → verify → fix", - phases: ["setup", "implement", "gate1", "gate2", "review-loop", "report"] + phases: ["setup", "implement", "gate1", "gate2", "review-loop", "gate1-final", "report"] }; // SINGLE mode: one ticket, one branch, full engine @@ -118,6 +118,8 @@ ${PLAN ? `Implementation plan:\n${PLAN}` : "No plan provided — use best judgme Relevant architectural decisions (apply devflow:apply-decisions algorithm): ${DECISIONS_CONTEXT} +When you build or run tests to verify your work, use your "Long-running commands" discipline (background-Bash + Monitor poll) for anything that may run silent >120s, and prefer package-scoped commands. + After implementing, commit your changes with a conventional-commit message and report: - Files changed - Summary of changes @@ -126,7 +128,9 @@ After implementing, commit your changes with a conventional-commit message and r // Phase 3: Gate 1 — post-code pipeline const gate1 = await phase("gate1", async () => { + // Gate 1 #1 — runs once, immediately after the initial implementation. const validation = await agent(`Run build, typecheck, lint, and tests on branch ${BRANCH}. +For any build/test that may run silent >120s, use the background-Bash + Monitor poll procedure (your "Long-running commands" discipline) so you never trip the 180s watchdog; prefer package-scoped commands. Report: PASS or FAIL with details.`, { agentType: "Validator" }); if (validation.verdict === "FAIL") { @@ -171,14 +175,12 @@ Report: PASS or FAIL with rationale.`, { agentType: "Evaluator" }), ]); evalVerdict = panel.every(p => p.verdict === "PASS") ? "PASS" : "FAIL"; if (evalVerdict === "FAIL") { - // One Gate-2-demanded fix attempt → Gate 1 → re-evaluate + // Gate-2-demanded fix: a plain Coder fix that self-verifies its OWN build. + // No inline Gate 1 (Validator/Simplifier/Scrutinizer) here — the final Gate 1 (#2) + // after the review loop is the build gate before the branch is handed back. await agent(`Fix the alignment issues identified by the Evaluator panel on branch ${BRANCH}: ${panel.filter(p => p.verdict === "FAIL").map(p => p.rationale).join("\n")} -Commit fixes.`, { agentType: "Coder" }); - // Gate 1 on the fix - await agent(`Run build, typecheck, lint, tests on branch ${BRANCH}. Report: PASS or FAIL.`, { agentType: "Validator" }); - await agent(`Simplify recent changes on branch ${BRANCH}.`, { agentType: "Simplifier" }); - await agent(`9-pillar review of recent changes on branch ${BRANCH}.`, { agentType: "Scrutinizer" }); +Self-verify your fix compiles (background-Bash + Monitor for any build >120s — see your "Long-running commands" discipline). Commit fixes.`, { agentType: "Coder" }); } } @@ -186,13 +188,14 @@ Commit fixes.`, { agentType: "Coder" }); if (CRITERIA) { const testerResult = await agent(`Run scenario-based acceptance tests on branch ${BRANCH} against these criteria: ${CRITERIA} +For any test/build command that may run silent >120s, use the background-Bash + Monitor poll procedure (your "Long-running commands" discipline) so you never trip the 180s watchdog. Cover: functionality, API contracts, performance. Report: PASS or FAIL per scenario.`, { agentType: "Tester" }); testerVerdict = testerResult.verdict; if (testerVerdict === "FAIL") { + // Gate-2-demanded QA fix: plain Coder fix that self-verifies. No inline Gate 1. await agent(`Fix the failing acceptance test scenarios on branch ${BRANCH}: ${testerResult.failures} -Commit fixes.`, { agentType: "Coder" }); - await agent(`Run build, typecheck, lint, tests on branch ${BRANCH}. Report: PASS or FAIL.`, { agentType: "Validator" }); +Self-verify your fix compiles and the scenarios pass (background-Bash + Monitor for any build/test >120s). Commit fixes.`, { agentType: "Coder" }); } } @@ -251,30 +254,58 @@ ${JSON.stringify(allFindings.map((f, i) => ({ index: i, description: f.descripti if (survivingFindings.length === 0) break; // early exit — clean review - // Coder fixes survivors (sequential per concurrency doctrine) + // Coder fixes survivors (sequential per concurrency doctrine). The fixing Coder + // SELF-VERIFIES its own fix compiles — NO Gate 1 (Validator/Simplifier/Scrutinizer) + // between review cycles. The single final Gate 1 (#2) after this loop is the build gate. await agent(`Fix the following confirmed review findings on branch ${BRANCH}: ${survivingFindings.map(f => `- ${f.description} (${f.severity})`).join("\n")} -Fix all findings in this batch. Commit with conventional-commit message.`, { agentType: "Coder" }); - - // Gate 1 only — no Gate 2 for review-fixes - await agent(`Run build, typecheck, lint, tests on branch ${BRANCH}. Report: PASS or FAIL.`, { agentType: "Validator" }); - await agent(`Simplify recent fixes on branch ${BRANCH}.`, { agentType: "Simplifier" }); - await agent(`9-pillar review of recent fixes on branch ${BRANCH}.`, { agentType: "Scrutinizer" }); +Fix all findings in this batch. Self-verify your fix compiles (background-Bash + Monitor for any build >120s — see your "Long-running commands" discipline). Commit with conventional-commit message.`, { agentType: "Coder" }); } return { cyclesRun, survivingFindings }; }); +// Phase 5.5: Gate 1 #2 — FINAL post-fix gate (full Validator → Simplifier → Scrutinizer). +// Runs ONCE, after the review loop. Between cycles the Coder self-verified its own fixes; +// this is the build gate before the branch is handed back. See gate1_postcode() cadence. +const gate1Final = await phase("gate1-final", async () => { + const validation = await agent(`Run build, typecheck, lint, and tests on branch ${BRANCH} (final gate after all fixing). +For any build/test that may run silent >120s, use the background-Bash + Monitor poll procedure (your "Long-running commands" discipline) so you never trip the 180s watchdog; prefer package-scoped commands. +Report: PASS or FAIL with details.`, { agentType: "Validator" }); + + if (validation.verdict === "FAIL") { + for (let attempt = 1; attempt <= 2; attempt++) { + await agent(`Fix the final validation failures on branch ${BRANCH}: +${validation.details} +Self-verify your fix compiles. Commit fixes with conventional-commit message.`, { agentType: "Coder" }); + const recheck = await agent(`Re-run build, typecheck, lint, tests on branch ${BRANCH} (background+Monitor for long commands). Report: PASS or FAIL.`, { agentType: "Validator" }); + if (recheck.verdict === "PASS") break; + if (attempt === 2) return { verdict: "ESCALATED", reason: "Final Gate 1 validation exhausted after 2 Coder fix attempts" }; + } + } + + await agent(`Simplify and reduce complexity of recent changes on branch ${BRANCH}. Commit any improvements.`, { agentType: "Simplifier" }); + + const scrutiny = await agent(`9-pillar self-review of recent changes on branch ${BRANCH}. Report any code you changed and your findings.`, { agentType: "Scrutinizer" }); + + if (scrutiny.codeChanged) { + await agent(`Re-run build, typecheck, lint, tests on branch ${BRANCH} (Scrutinizer made changes; background+Monitor for long commands). Report: PASS or FAIL.`, { agentType: "Validator" }); + } + + return { verdict: "PASS" }; +}); + // Phase 6: Report return phase("report", () => agent(`Synthesize the build run for ticket ${TICKET} on branch ${BRANCH}: - Implementation summary -- Gate 1 result +- Gate 1 (#1 post-implementation) result - Gate 2 result: ${JSON.stringify(gate2)} - Review cycles: ${reviewResult.cyclesRun} +- Final Gate 1 (#2 post-fix): ${JSON.stringify(gate1Final)} - Surviving findings: ${JSON.stringify(reviewResult.survivingFindings)} -- Overall verdict: ${reviewResult.survivingFindings.length === 0 ? "PASS" : "PARTIAL"} +- Overall verdict: ${reviewResult.survivingFindings.length === 0 && gate1Final.verdict !== "ESCALATED" ? "PASS" : "PARTIAL"} Write a concise report. The branch is ready for user review — do NOT merge to main.`, { agentType: "Synthesizer" }) ); @@ -282,6 +313,8 @@ Write a concise report. The branch is ready for user review — do NOT merge to {concurrency_doctrine()} +{build_execution_doctrine()} + {implement_bundle()} {gate1_postcode()} @@ -318,6 +351,18 @@ The wave workflow uses the same phases as SINGLE but wraps them in a wave loop. --- +### After the workflow returns — batch decisions at the command boundary (F4) + +A workflow cannot pause mid-run. After the build/wave workflow returns, you (the main model) surface anything that needs a human decision — all at once: + +1. Read the wave report (`.devflow/docs/waves/\{slug\}/\{ts\}/wave-report.md`) for its **escalations** (quarantined/blocked tickets and why), and read any `DECISIONS-NEEDED.md` left by a prior `/devflow:dynamic-plan` run for this initiative. +2. Surface ALL of them — escalations AND open decisions — to the user in ONE batched `AskUserQuestion` (never one-at-a-time). `_wave.mds`'s escalation model already quarantines-and-continues; this batches the surfacing so the user answers everything in a single pass. +3. If `~/.devflow/preference-profile.md` was absent, note in your summary: "no preference profile found — N decisions surfaced that a profile might have auto-resolved; consider `/devflow:dynamic-profile`." + +Do NOT ask questions mid-workflow — that is impossible (F4). The workflow only WRITES the report; you read it and ask. + +--- + ### Maintenance note This recipe encodes the current `/implement` + `/code-review` + `/resolve` orchestration shape as of the authoring date (2026-06-12). When those base commands change their orchestration, update this recipe to match. No tooling detects drift — by design (ADR-008 Iron Rule). The reminder lives in the design doc §16. diff --git a/shared/recipes/dynamic-plan.mds b/shared/recipes/dynamic-plan.mds index 3f9d02e1..5c234be2 100644 --- a/shared/recipes/dynamic-plan.mds +++ b/shared/recipes/dynamic-plan.mds @@ -76,7 +76,7 @@ A workflow cannot pause mid-run. Open design decisions collected in `DECISIONS-N After the workflow completes: 1. Read the `decisionsNeededPath` returned by the workflow (e.g. `.devflow/docs/design///DECISIONS-NEEDED.md`). Always use the OUTDIR-scoped path the workflow wrote — never the flat `.devflow/docs/design/DECISIONS-NEEDED.md`. -2. If it contains any open decisions, surface them to the user via `AskUserQuestion`. +2. First surface the **Auto-Resolved Decisions** section (decision → resolution → source) for audit, then surface ALL open **Decisions Needed** to the user in ONE batched `AskUserQuestion` (never one-at-a-time). 3. The user's answers feed into their own plan edits or a follow-up `/devflow:dynamic-plan` run. State this explicitly in the workflow script as a comment: `// AskUserQuestion happens at the command boundary after this workflow returns — NOT here.` @@ -198,9 +198,10 @@ For each ticket, write ${OUTDIR}/\{ticket-slug\}-plan.md containing: - The plan body (incorporate cross-plan amendments) - ## Acceptance Criteria (numbered, positive + negative) - ## Test Plan (per-criterion scenarios) -- ## Auto-Resolved Decisions (if any, with rationale) +- ## Auto-Resolved Decisions (if any — list each as: decision → resolution → source) Then write ${OUTDIR}/DECISIONS-NEEDED.md: +- ## Auto-Resolved Decisions — list each silently-resolved decision as: decision → resolution → source (preference profile / ADR-NNN), so auto-resolution is auditable and reversible. If none, write "None." - ## Decisions Needed - One section per open decision: what the decision is, why it matters, what options exist. - Include cross-plan conflicts that were not auto-resolved. @@ -241,7 +242,7 @@ The workflow returns: } ``` -After the workflow returns: read `DECISIONS-NEEDED.md` and surface any open decisions to the user via `AskUserQuestion`. Do not ask questions mid-workflow — this is F4. +After the workflow returns: read `DECISIONS-NEEDED.md`. First briefly surface the **Auto-Resolved Decisions** section (decision → resolution → source) so silently-settled calls are visible and reversible, then surface ALL open **Decisions Needed** in ONE batched `AskUserQuestion` (never one-at-a-time). Do not ask questions mid-workflow — this is F4. If no preference profile was found, note in your summary: "no preference profile found — N decisions were surfaced that a profile might have auto-resolved; consider `/devflow:dynamic-profile`." --- diff --git a/tests/build-recipes.test.ts b/tests/build-recipes.test.ts index 8d243230..51c53c1b 100644 --- a/tests/build-recipes.test.ts +++ b/tests/build-recipes.test.ts @@ -21,7 +21,7 @@ * guarantee without corrupting source files. */ -import { describe, it, expect } from 'vitest'; +import { describe, it, expect, beforeAll } from 'vitest'; import { promises as fs } from 'fs'; import * as path from 'path'; import { spawnSync } from 'child_process'; @@ -198,3 +198,48 @@ describe('build-recipes.ts script subprocess contract', () => { }); }); +// --------------------------------------------------------------------------- +// 5. Compiled dynamic-build.md encodes the Gate-1-twice cadence + build doctrine +// (Fixes 1 & 2 — assert via compiled-output grep, since recipe behavior is prose) +// --------------------------------------------------------------------------- + +describe('compiled dynamic-build.md: Gate-1-twice cadence + build execution doctrine', () => { + let compiled: string; + + beforeAll(async () => { + // Compile fresh so this assertion is independent of test order / a prior build. + const result = spawnSync('npx', ['tsx', path.join(ROOT, 'scripts', 'build-recipes.ts')], { + cwd: ROOT, + encoding: 'utf-8', + timeout: 60_000, + }); + if (result.error) throw result.error; + compiled = await fs.readFile(path.join(COMMANDS_DIR, 'dynamic-build.md'), 'utf-8'); + }); + + it('renders the build_execution_doctrine (background Bash + Monitor poll)', () => { + expect(compiled).toContain('Build execution doctrine'); + expect(compiled).toContain('run_in_background'); + expect(compiled).toContain('Monitor'); + }); + + it('runs ONE final Gate 1 (#2) after the review loop', () => { + expect(compiled).toContain('gate1-final'); + expect(compiled).toContain('Gate 1 #2'); + }); + + it('does NOT run Gate 1 (Validator/Simplifier/Scrutinizer) between review cycles', () => { + // The old per-cycle Gate-1 strings must be gone. + expect(compiled).not.toContain('Gate 1 only — no Gate 2 for review-fixes'); + expect(compiled).not.toContain('Simplify recent fixes'); + expect(compiled).not.toContain('9-pillar review of recent fixes'); + }); + + it('spawns Simplifier and Scrutinizer exactly twice each (Gate 1 #1 + Gate 1 #2)', () => { + const simplifier = (compiled.match(/agentType: "Simplifier"/g) || []).length; + const scrutinizer = (compiled.match(/agentType: "Scrutinizer"/g) || []).length; + expect(simplifier, 'Simplifier should run only in the two Gate-1 passes').toBe(2); + expect(scrutinizer, 'Scrutinizer should run only in the two Gate-1 passes').toBe(2); + }); +}); + diff --git a/tests/shell-hooks.test.ts b/tests/shell-hooks.test.ts index 4660d169..0487755f 100644 --- a/tests/shell-hooks.test.ts +++ b/tests/shell-hooks.test.ts @@ -25,6 +25,7 @@ const HOOK_SCRIPTS = [ 'get-mtime', 'ensure-devflow-init', 'ensure-root-gitignore', + 'resolve-project-root', 'dream-capture', 'dream-evaluate', 'dream-dispatch', @@ -968,6 +969,111 @@ describe('working memory queue behavior', () => { }); }); +// ============================================================================= +// resolve-project-root — anchor .devflow/ to the project root (Fix 6) +// ============================================================================= + +describe('resolve-project-root: df_resolve_root', () => { + const RESOLVE = path.join(HOOKS_DIR, 'resolve-project-root'); + + function resolveRoot(cwd: string): string { + return execSync(`bash -c 'source "${RESOLVE}"; df_resolve_root "${cwd}"'`, { stdio: 'pipe' }) + .toString() + .trim(); + } + + it('(a) returns the git toplevel for a normal subdir inside a repo', () => { + const repo = fs.mkdtempSync(path.join(os.tmpdir(), 'devflow-rpr-git-')); + try { + execSync(`git init -q "${repo}"`, { stdio: 'pipe' }); + const real = fs.realpathSync(repo); + const sub = path.join(real, 'src', 'deep', 'nested'); + fs.mkdirSync(sub, { recursive: true }); + expect(fs.realpathSync(resolveRoot(sub))).toBe(real); + } finally { + fs.rmSync(repo, { recursive: true, force: true }); + } + }); + + it('(b) returns the repo root for a path inside .devflow/ — git walks up (the stray-nesting fix)', () => { + const repo = fs.mkdtempSync(path.join(os.tmpdir(), 'devflow-rpr-dev-')); + try { + execSync(`git init -q "${repo}"`, { stdio: 'pipe' }); + const real = fs.realpathSync(repo); + const nested = path.join(real, '.devflow', 'docs', 'waves', 'x', 'tickets'); + fs.mkdirSync(nested, { recursive: true }); + expect(fs.realpathSync(resolveRoot(nested))).toBe(real); + } finally { + fs.rmSync(repo, { recursive: true, force: true }); + } + }); + + it('(c) non-git: strips from the first /.devflow/ onward', () => { + const base = fs.mkdtempSync(path.join(os.tmpdir(), 'devflow-rpr-nogit-')); + try { + const real = fs.realpathSync(base); + const nested = path.join(real, '.devflow', 'docs', 'tickets'); + fs.mkdirSync(nested, { recursive: true }); + // No git repo above os.tmpdir() → fallback strip yields the path before /.devflow/. + expect(resolveRoot(nested)).toBe(real); + } finally { + fs.rmSync(base, { recursive: true, force: true }); + } + }); + + it('(c2) non-git, no .devflow in path: returns cwd unchanged', () => { + const base = fs.mkdtempSync(path.join(os.tmpdir(), 'devflow-rpr-plain-')); + try { + const real = fs.realpathSync(base); + const sub = path.join(real, 'a', 'b'); + fs.mkdirSync(sub, { recursive: true }); + expect(resolveRoot(sub)).toBe(sub); + } finally { + fs.rmSync(base, { recursive: true, force: true }); + } + }); +}); + +describe('hooks anchor .devflow/ to the project root (no stray nested .devflow/)', () => { + const STOP_HOOK = path.join(HOOKS_DIR, 'dream-capture'); + + it('dream-capture run with a CWD inside .devflow/ writes the queue at the repo root, not a nested .devflow/', () => { + const repo = fs.mkdtempSync(path.join(os.tmpdir(), 'devflow-anchor-')); + try { + execSync(`git init -q "${repo}"`, { stdio: 'pipe' }); + const real = fs.realpathSync(repo); + + // Pre-scaffold the real .devflow/memory and a fresh trigger so the hook stays + // throttled and never spawns a background worker during the test. + const memDir = path.join(real, '.devflow', 'memory'); + fs.mkdirSync(memDir, { recursive: true }); + fs.writeFileSync(path.join(memDir, '.working-memory-last-trigger'), ''); + + // The hook runs with a CWD deep inside .devflow/ — the stray-nesting scenario. + const nestedCwd = path.join(real, '.devflow', 'docs', 'waves', 'w', 'tickets'); + fs.mkdirSync(nestedCwd, { recursive: true }); + + const input = JSON.stringify({ + cwd: nestedCwd, + session_id: 'anchor-test', + last_assistant_message: 'hello from a nested cwd', + }); + execSync(`bash "${STOP_HOOK}"`, { input, stdio: ['pipe', 'pipe', 'pipe'] }); + + // Queue written at the REAL repo root .devflow/memory/ ... + const rootQueue = path.join(real, '.devflow', 'memory', '.pending-turns.jsonl'); + expect(fs.existsSync(rootQueue)).toBe(true); + const entry = JSON.parse(fs.readFileSync(rootQueue, 'utf-8').trim().split('\n').filter(Boolean)[0]); + expect(entry.role).toBe('assistant'); + + // ... and NO stray nested .devflow/ was scaffolded under the nested cwd. + expect(fs.existsSync(path.join(nestedCwd, '.devflow'))).toBe(false); + } finally { + fs.rmSync(repo, { recursive: true, force: true }); + } + }); +}); + // ============================================================================= // preamble keyword detection — Suites 1-4 // =============================================================================