dean0x · dean0x · Jun 21, 2026 · Jun 21, 2026
@@ -45,7 +45,12 @@ source "$SCRIPT_DIR/get-mtime" || { echo "background-memory-update: failed to so
 log "Starting (CWD=$CWD)"
 
 # --- Resolve paths ---
-DEVFLOW_DIR="$CWD/.devflow"
+# Anchor .devflow/ to the project root (prevents a stray nested .devflow/ when the
+# worker is spawned with a CWD inside .devflow/...). Empty → fall back to CWD.
+source "$SCRIPT_DIR/resolve-project-root" 2>/dev/null || true
+PROJECT_ROOT="$(df_resolve_root "$CWD" 2>/dev/null || true)"
+[ -n "$PROJECT_ROOT" ] || PROJECT_ROOT="$CWD"
+DEVFLOW_DIR="$PROJECT_ROOT/.devflow"
 MEMORY_DIR="$DEVFLOW_DIR/memory"
 DREAM_DIR="$DEVFLOW_DIR/dream"
 QUEUE_FILE="$MEMORY_DIR/.pending-turns.jsonl"

@@ -57,7 +57,13 @@ dbg "CWD=$CWD"
 # Only log metadata (length, keys, presence checks).
 dbg "ASSISTANT_MSG length=${#ASSISTANT_MSG}"
 
-DEVFLOW_DIR="$CWD/.devflow"
+# Anchor .devflow/ to the project root (prevents a stray nested .devflow/ when this
+# hook runs with a CWD inside .devflow/...). Empty → fall back to CWD (old behavior).
+source "$SCRIPT_DIR/resolve-project-root" 2>/dev/null || true
+PROJECT_ROOT="$(df_resolve_root "$CWD" 2>/dev/null || true)"
+[ -n "$PROJECT_ROOT" ] || PROJECT_ROOT="$CWD"
+
+DEVFLOW_DIR="$PROJECT_ROOT/.devflow"
 MEMORY_DIR="$DEVFLOW_DIR/memory"
 DREAM_DIR="$DEVFLOW_DIR/dream"
 
@@ -98,7 +104,7 @@ if [ -f "$SCANNER" ] && printf '%s' "$ASSISTANT_MSG" | grep -qE 'ADR-[0-9]+|PF-[
   [ -f "$DEVFLOW_DIR/decisions/.disabled" ] && _DEC_ENABLED_CAPTURE="false"
   if [ "$_DEC_ENABLED_CAPTURE" = "true" ]; then
     dbg "Running decisions usage scanner"
-    printf '%s' "$ASSISTANT_MSG" | node "$SCANNER" --cwd "$CWD" 2>/dev/null || true
+    printf '%s' "$ASSISTANT_MSG" | node "$SCANNER" --cwd "$PROJECT_ROOT" 2>/dev/null || true
   fi
 fi
 

@@ -33,7 +33,13 @@ if [ -z "$CWD" ] || [ ! -d "$CWD" ]; then dbg "EXIT: bad CWD"; exit 0; fi
 
 devflow_debug_set_cwd "$CWD"
 
-DEVFLOW_DIR="$CWD/.devflow"
+# Anchor .devflow/ to the project root (prevents a stray nested .devflow/ when this
+# hook runs with a CWD inside .devflow/...). Empty → fall back to CWD (old behavior).
+source "$SCRIPT_DIR/resolve-project-root" 2>/dev/null || true
+PROJECT_ROOT="$(df_resolve_root "$CWD" 2>/dev/null || true)"
+[ -n "$PROJECT_ROOT" ] || PROJECT_ROOT="$CWD"
+
+DEVFLOW_DIR="$PROJECT_ROOT/.devflow"
 MEMORY_DIR="$DEVFLOW_DIR/memory"
 DREAM_DIR="$DEVFLOW_DIR/dream"
 

@@ -33,7 +33,13 @@ if [ -z "$CWD" ] || [ ! -d "$CWD" ]; then dbg "EXIT: bad CWD"; exit 0; fi
 devflow_debug_set_cwd "$CWD"
 dbg "CWD=$CWD"
 
-DEVFLOW_DIR="$CWD/.devflow"
+# Anchor .devflow/ to the project root (prevents a stray nested .devflow/ when this
+# hook runs with a CWD inside .devflow/...). Empty → fall back to CWD (old behavior).
+source "$SCRIPT_DIR/resolve-project-root" 2>/dev/null || true
+PROJECT_ROOT="$(df_resolve_root "$CWD" 2>/dev/null || true)"
+[ -n "$PROJECT_ROOT" ] || PROJECT_ROOT="$CWD"
+
+DEVFLOW_DIR="$PROJECT_ROOT/.devflow"
 [ ! -d "$DEVFLOW_DIR" ] && exit 0
 
 MEMORY_DIR="$DEVFLOW_DIR/memory"

@@ -7,7 +7,15 @@
 
 [ -z "$1" ] && return 1
 
-_DEVFLOW_DIR="$1/.devflow"
+# Anchor to the project root so a CWD nested inside .devflow/ never scaffolds a
+# stray nested .devflow/. Resolve our own dir via BASH_SOURCE (works whether sourced
+# by a hook or directly by tests). Empty → fall back to "$1" (old behavior).
+_EDI_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$_EDI_DIR/resolve-project-root" 2>/dev/null || true
+_EDI_ROOT="$(df_resolve_root "$1" 2>/dev/null || true)"
+[ -n "$_EDI_ROOT" ] || _EDI_ROOT="$1"
+
+_DEVFLOW_DIR="$_EDI_ROOT/.devflow"
 
 # Fast-path: if all subdirectories already exist, skip mkdir and gitignore setup
 if [ -d "$_DEVFLOW_DIR/memory" ] && [ -d "$_DEVFLOW_DIR/docs" ] && \
@@ -34,8 +42,6 @@ fi
 
 # One-time root .gitignore setup — delegated to the sibling ensure-root-gitignore
 # helper (single source of truth) so the always-on, memory-independent
-# session-start-context hook applies the identical rule. Resolve our own directory
-# via BASH_SOURCE so this works whether sourced by a hook (which sets SCRIPT_DIR) or
-# sourced directly (tests source this file with no SCRIPT_DIR in scope).
-_EDI_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-source "$_EDI_DIR/ensure-root-gitignore" "$1"
+# session-start-context hook applies the identical rule. _EDI_DIR and _EDI_ROOT
+# were resolved at the top of this script.
+source "$_EDI_DIR/ensure-root-gitignore" "$_EDI_ROOT"
@@ -31,7 +31,13 @@ fi
 devflow_debug_set_cwd "$CWD"
 dbg "CWD=$CWD"
 
-DEVFLOW_DIR="$CWD/.devflow"
+# Anchor .devflow/ to the project root (prevents a stray nested .devflow/ when this
+# hook runs with a CWD inside .devflow/...). Empty → fall back to CWD (old behavior).
+source "$SCRIPT_DIR/resolve-project-root" 2>/dev/null || true
+PROJECT_ROOT="$(df_resolve_root "$CWD" 2>/dev/null || true)"
+[ -n "$PROJECT_ROOT" ] || PROJECT_ROOT="$CWD"
+
+DEVFLOW_DIR="$PROJECT_ROOT/.devflow"
 MEMORY_DIR="$DEVFLOW_DIR/memory"
 DREAM_DIR="$DEVFLOW_DIR/dream"
 

@@ -0,0 +1,37 @@
+#!/bin/bash
+# resolve-project-root — anchor .devflow/ resolution to the project root.
+#
+# Hooks compute their .devflow/ path from the session/worker CWD. When a worker
+# is spawned with a CWD deep inside the project — e.g. inside
+# .devflow/docs/<...>/tickets/ — computing "$CWD/.devflow" there scaffolds a
+# STRAY nested .devflow/. This helper resolves any CWD back to the real project
+# root, mirroring the TS CLI's getGitRoot() (src/cli/utils/git.ts) so the shell
+# side anchors identically.
+#
+# Sourced by: the memory/dream/session hooks and ensure-devflow-init.
+# Sourced helper: uses `return`-free pure function; _-prefixed locals (never
+# clobbers caller vars). Safe under `set -e` (git failure is guarded with || true).
+#
+# Usage:
+#   source resolve-project-root
+#   PROJECT_ROOT="$(df_resolve_root "$CWD")"
+#
+# df_resolve_root <cwd> prints the project root for <cwd>:
+#   1. git top-level — git walks up to the real repo root even from a
+#      .devflow/-nested subdir, so the nested case is fixed for git repos.
+#   2. non-git fallback — strip from the first /.devflow/ onward (or a trailing
+#      /.devflow), else print <cwd> unchanged.
+df_resolve_root() {
+  local _cwd="$1" _root=""
+  # || true: under `set -e`, a failing command substitution would otherwise abort
+  # the caller (e.g. a non-git path). Empty output then routes to the fallback.
+  _root="$(git -C "$_cwd" rev-parse --show-toplevel 2>/dev/null || true)"
+  if [ -z "$_root" ]; then
+    case "$_cwd" in
+      */.devflow/*) _root="${_cwd%%/.devflow/*}" ;;
+      */.devflow)   _root="${_cwd%/.devflow}" ;;
+      *)            _root="$_cwd" ;;
+    esac
+  fi
+  printf '%s\n' "$_root"
+}
@@ -32,16 +32,22 @@ fi
 devflow_debug_set_cwd "$CWD"
 dbg "CWD=$CWD"
 
+# Anchor .devflow/ to the project root (prevents a stray nested .devflow/ when this
+# hook runs with a CWD inside .devflow/...). Empty → fall back to CWD (old behavior).
+source "$SCRIPT_DIR/resolve-project-root" 2>/dev/null || true
+PROJECT_ROOT="$(df_resolve_root "$CWD" 2>/dev/null || true)"
+[ -n "$PROJECT_ROOT" ] || PROJECT_ROOT="$CWD"
+
 # Ensure the project root .gitignore ignores .devflow/ wholesale. This runs on every
 # session regardless of feature toggles, so memory-off projects (decisions/knowledge
 # only) still get .devflow/ ignored — this is the memory-independent path that fixes
 # the gitignore/memory coupling (PF-014). Single source of truth: ensure-root-gitignore.
 # Soft-fail: a gitignore write must never block context injection. Marker keeps it O(1).
-[ -d "$CWD" ] && [ -f "$SCRIPT_DIR/ensure-root-gitignore" ] && source "$SCRIPT_DIR/ensure-root-gitignore" "$CWD" || true
+[ -d "$PROJECT_ROOT" ] && [ -f "$SCRIPT_DIR/ensure-root-gitignore" ] && source "$SCRIPT_DIR/ensure-root-gitignore" "$PROJECT_ROOT" || true
 
 CONTEXT=""
 
-DEVFLOW_DIR="$CWD/.devflow"
+DEVFLOW_DIR="$PROJECT_ROOT/.devflow"
 MEMORY_DIR="$DEVFLOW_DIR/memory"
 DREAM_DIR="$DEVFLOW_DIR/dream"
 DECISIONS_CONTENT_DIR="$DEVFLOW_DIR/decisions"

@@ -31,7 +31,13 @@ fi
 devflow_debug_set_cwd "$CWD"
 dbg "CWD=$CWD"
 
-DEVFLOW_DIR="$CWD/.devflow"
+# Anchor .devflow/ to the project root (prevents a stray nested .devflow/ when this
+# hook runs with a CWD inside .devflow/...). Empty → fall back to CWD (old behavior).
+source "$SCRIPT_DIR/resolve-project-root" 2>/dev/null || true
+PROJECT_ROOT="$(df_resolve_root "$CWD" 2>/dev/null || true)"
+[ -n "$PROJECT_ROOT" ] || PROJECT_ROOT="$CWD"
+
+DEVFLOW_DIR="$PROJECT_ROOT/.devflow"
 MEMORY_DIR="$DEVFLOW_DIR/memory"
 DREAM_DIR="$DEVFLOW_DIR/dream"
 

@@ -89,6 +89,19 @@ When you apply a decision from `.devflow/decisions/decisions.md` or avoid a pitf
 
 8. **Generate handoff** (if HANDOFF_REQUIRED=true): Include implementation summary for next Coder (see Output section).
 
+## Long-running commands (self-verifying builds/tests that may run >120s)
+
+You run builds and tests to verify your own work — including **self-verifying that each fix compiles** when no separate Validator runs between review cycles. A plain `Bash` call defaults to a 120s timeout, and inside a dynamic Workflow a sub-agent that emits no output for 180s is KILLED ("agent stalled"). For any build/test that may run silent longer than ~120s (cold `cargo build`/`cargo test`, large `tsc`, `gradle`, `go build ./...`), do NOT run it as one silent foreground command. Instead:
+
+1. Run it in the BACKGROUND with the Bash tool (`run_in_background: true`), capturing output + exit code under a unique `<slug>` reused in step 2:
+   `<command> > /tmp/df-build-<slug>.log 2>&1; echo "EXIT=$?" > /tmp/df-build-<slug>.done`
+2. Poll with the `Monitor` tool (load it via ToolSearch `select:Monitor` if it is not available): set `persistent: false`, `timeout_ms` above the expected run time (e.g. 600000), and
+   `command: until [ -f /tmp/df-build-<slug>.done ]; do echo building; sleep 25; done; echo DONE; cat /tmp/df-build-<slug>.done`
+   The 25s heartbeat (≪ 180s) is delivered as a notification that keeps you alive past the watchdog.
+3. When the monitor reports `DONE`: the command PASSED iff the `.done` file contains `EXIT=0`. Read the `.log`, fix any failures, and only then proceed.
+
+For a foreground command that exceeds the 120s default but stays under 180s, pass an explicit higher `timeout` to the Bash tool (up to 600000ms). Prefer package-scoped commands (`cargo build -p <crate>`) during the engine; the full-workspace regression is the human's job after the wave.
+
 ## Principles
 
 1. **Work on feature branch** - All operations happen on the current feature branch

@@ -71,6 +71,19 @@ For each scenario:
 
 If a previous run failed (PREVIOUS_FAILURES provided), prioritize re-testing those scenarios first.
 
+## Long-running commands (test/build commands that may run >120s)
+
+A plain `Bash` call defaults to a 120s timeout, and inside a dynamic Workflow a sub-agent that emits no output for 180s is KILLED ("agent stalled"). For any scenario whose command may run silent longer than ~120s (a full `cargo test` / `go test ./...`, a build step, a slow integration suite), do NOT run it as one silent foreground command. Instead:
+
+1. Run it in the BACKGROUND with the Bash tool (`run_in_background: true`), capturing output + exit code under a unique `<slug>` reused in step 2:
+   `<command> > /tmp/df-test-<slug>.log 2>&1; echo "EXIT=$?" > /tmp/df-test-<slug>.done`
+2. Poll with the `Monitor` tool (load it via ToolSearch `select:Monitor` if it is not available): set `persistent: false`, `timeout_ms` above the expected run time (e.g. 600000), and
+   `command: until [ -f /tmp/df-test-<slug>.done ]; do echo running; sleep 25; done; echo DONE; cat /tmp/df-test-<slug>.done`
+   The 25s heartbeat (≪ 180s) is delivered as a notification that keeps you alive past the watchdog.
+3. When the monitor reports `DONE`: the scenario's command PASSED iff the `.done` file contains `EXIT=0`. Read the `.log` for evidence.
+
+For a foreground command that exceeds the 120s default but stays under 180s, pass an explicit higher `timeout` to the Bash tool (up to 600000ms). Prefer scoping to the changed package/path where possible.
+
 ## Output
 
 Return structured QA report:

@@ -38,6 +38,19 @@ Execute in this order, stopping on first failure:
 | 3 | Lint | `npm run lint`, `cargo clippy`, `make lint` |
 | 4 | Test | `npm test`, `cargo test`, `make test` |
 
+## Long-running commands (builds/tests that may run >120s)
+
+A plain `Bash` call defaults to a 120s timeout, and inside a dynamic Workflow a sub-agent that emits no output for 180s is KILLED ("agent stalled"). For any build/test that may run silent longer than ~120s (cold `cargo build`/`cargo test`, large `tsc`, `gradle`, `go build ./...`), do NOT run it as one silent foreground command. Instead:
+
+1. Run it in the BACKGROUND, capturing output + exit code. With the Bash tool set `run_in_background: true` and pick a unique `<slug>` (reuse the same paths in step 2):
+   `<command> > /tmp/df-val-<slug>.log 2>&1; echo "EXIT=$?" > /tmp/df-val-<slug>.done`
+2. Poll with the `Monitor` tool (load it via ToolSearch `select:Monitor` if it is not available): set `persistent: false`, `timeout_ms` above the expected run time (e.g. 600000), and
+   `command: until [ -f /tmp/df-val-<slug>.done ]; do echo running; sleep 25; done; echo DONE; cat /tmp/df-val-<slug>.done`
+   The 25s heartbeat (≪ 180s) is delivered as a notification that keeps you alive past the watchdog.
+3. When the monitor reports `DONE`: the command PASSED iff the `.done` file contains `EXIT=0`. Read the `.log` for failure details to parse.
+
+For a foreground command that merely exceeds the 120s default but stays well under 180s, simply pass an explicit higher `timeout` to the Bash tool (up to 600000ms). Prefer package-scoped commands (`cargo build -p <crate>`, `cargo test -p <crate>`) when the project supports them.
+
 ## Principles
 
 1. **Report only** - Never fix code, never commit, never modify files

@@ -1,9 +1,10 @@
 @define gate1_postcode():
-### GATE 1 — Post-code pipeline (fires after EVERY code mutation)
+### GATE 1 — Post-code pipeline (runs exactly TWICE per ticket)
 
 ORDER IS LOAD-BEARING. Run exactly in this sequence:
 
 1. **Validator** — build / typecheck / lint / test
+   - Build/test commands that may run silent for >~120s MUST follow `build_execution_doctrine()` (background Bash + Monitor poll), or they trip the 180s workflow watchdog.
    - FAIL → Coder fix (max 2 retries) → re-Validator
    - If still FAIL after 2 retries → escalate (do not loop endlessly)
 2. **Simplifier** — reduce complexity, remove duplication
@@ -12,9 +13,13 @@ ORDER IS LOAD-BEARING. Run exactly in this sequence:
 
 **Gate 1 contains NO Evaluator and NO Tester.** Those are Gate 2 only.
 
-Depth scales to change size + budget: a trivial one-line fix warrants a lighter pass; a multi-file refactor warrants the full depth. But Gate 1 is **never skipped**, regardless of change size.
+Depth scales to change size + budget: a trivial one-line fix warrants a lighter pass; a multi-file refactor warrants the full depth.
 
-Gate 1 runs after: initial Coder implementation, every review-fix, every alignment-fix (Gate 2 demanded change), every QA-fix. It is the invariant that all written code must pass.
+**Cadence — Gate 1 runs at exactly TWO points per ticket:**
+1. **Gate 1 #1** — immediately after the initial Coder implementation (inside `implement_bundle()`).
+2. **Gate 1 #2** — the FINAL gate, after ALL Gate-2 fixes AND the entire review loop have completed.
+
+It does NOT run between review cycles, nor after each individual Gate-2 / review / QA fix. At those points the fixing Coder self-verifies its OWN build compiles (see `review_loop()` and the Coder agent's "Long-running commands" discipline). The final Gate 1 #2 is the invariant that all written code passes before merge.
 @end
 
 @define gate2_acceptance():
@@ -106,7 +111,7 @@ Majority-survives: a finding needs >50% of verification lenses to confirm it. St
 
 If no surviving findings: break (early exit — do not run unnecessary cycles).
 
-If survivors remain: Coder fixes them (batched per concurrency doctrine — see `concurrency_doctrine()`). Then → Gate 1 ONLY. Review-fixes do NOT get Gate 2 (no Evaluator, no Tester).
+If survivors remain: Coder fixes them (batched per concurrency doctrine — see `concurrency_doctrine()`). The fixing Coder **self-verifies its own fix builds** (build/typecheck per the Coder's "Long-running commands" discipline). Do **NOT** run Gate 1 between cycles, and do NOT run Gate 2 (no Validator, no Simplifier, no Scrutinizer, no Evaluator, no Tester between cycles). The engine runs ONE final Gate 1 after the loop exits — see the `gate1_postcode()` cadence (Gate 1 #2).
 @end
 
 @define concurrency_doctrine():
@@ -125,6 +130,34 @@ If survivors remain: Coder fixes them (batched per concurrency doctrine — see
 This applies to both: multi-Coder work on a single ticket AND multi-ticket scheduling in a wave.
 @end
 
+@define build_execution_doctrine():
+### Build execution doctrine — long-running commands (LOAD-BEARING)
+
+The Workflow runtime KILLS any sub-agent that emits no output for 180 seconds. A cold `cargo build`, `cargo test`, a large `tsc`, `gradle build`, `go build ./...`, etc. routinely runs silent far longer and trips this watchdog (the failure reads `agent stalled on all N attempts`). Plain foreground `Bash` also defaults to a 120s timeout.
+
+**RULE: any agent (Validator, Coder, Tester) running a build / test / compile / install that may run silent for more than ~120s MUST run it in the BACKGROUND and POLL — never as a single silent foreground command.**
+
+Mechanical procedure (spike-verified — a workflow sub-agent survived a 253s job this way):
+
+1. Choose ONE unique base path for this run and reuse it verbatim in steps 1–3, e.g. `BASE=/tmp/df-build-<ticket-slug>`. Launch the command with the Bash tool using `run_in_background: true`:
+   ```
+   <build/test command> > <BASE>.log 2>&1; echo "EXIT=$?" > <BASE>.done
+   ```
+   This returns immediately with a background task id — do NOT block on it.
+2. Poll with the `Monitor` tool (load it first via ToolSearch `select:Monitor` if it is not already available). Arm ONE monitor that emits a heartbeat well under 180s AND exits when the job finishes:
+   - description: short, e.g. `await <build cmd>`
+   - persistent: false
+   - timeout_ms: comfortably ABOVE the expected job time (e.g. 600000)
+   - command: `until [ -f <BASE>.done ]; do echo building; sleep 25; done; echo BUILD_DONE; cat <BASE>.done`
+
+   The `building` heartbeat every 25s (≪ 180s) is delivered as a notification that re-invokes you, so the watchdog never sees a >180s gap. `BUILD_DONE` + the `EXIT=` line signal completion.
+3. When the monitor reports `BUILD_DONE`: the job PASSES iff `<BASE>.done` contains `EXIT=0`. Read `<BASE>.log` for output/failure detail.
+
+**Scope commands to stay short.** During the engine, PREFER crate/package-scoped builds and tests — `cargo build -p <crate>`, `cargo test -p <crate>`, `npm test -- <path>`, `go test ./pkg/...` — over the whole workspace. The full-workspace regression is the human's job after the wave (the wave already hands the integrated branch back to the user). Scoping keeps most commands under the watchdog window and under budget.
+
+**Invariants:** heartbeat interval MUST stay well under 180s (25–30s is the tested value); Monitor `timeout_ms` MUST exceed the expected job duration (a too-short timeout kills the poll, not the build). Never substitute a single silent long command for this procedure.
+@end
+
 @define engine_output_schema():
 ### Engine output schema
 
@@ -175,5 +208,6 @@ Each ticket engine run returns a structured result. The Synthesizer or the wave
 @export implement_bundle
 @export review_loop
 @export concurrency_doctrine
+@export build_execution_doctrine
 @export engine_output_schema
 @export engine_invariants