From 6a2763eb9a8e9415b2a1cf00d92ff0be8f60380b Mon Sep 17 00:00:00 2001
From: Mateusz Konopelski <mkonopelski@griddynamics.com>
Date: Wed, 1 Jul 2026 16:35:36 +0200
Subject: [PATCH 1/8] feat(backend): make SQLite the default local/Docker
 database
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces the Firestore emulator sidecar with a new SqliteDatabase (IDatabase
impl) as the sole local/Docker-dev backend. Firestore now only connects to an
already-hosted, GCP-managed instance (DATABASE_TYPE=firestore) or a manually-run
emulator process (DATABASE_TYPE=emulator) — SpecFlow never deploys/manages
Firestore itself locally anymore.

- docker-compose.yml: drop firestore-emulator/firestore-exporter services;
  backend defaults to DATABASE_TYPE=sqlite and bind-mounts the host's
  ~/.specflow/ directory (one central db shared across local projects/MCP
  sessions, matching the old shared-emulator model).
- Makefile: init-firestore(-dry) -> init-db(-dry) (aliases kept), isolated
  test-stack gets its own nested sqlite path so tests never touch the real
  central db.
- init_firestore.py -> init_db.py; emulator-host requirement now conditional
  on DATABASE_TYPE.
- create_generation_session_repos.py: DATABASE_TYPE gate now rejects only
  memory (was firestore-only).
- startup_validation.py: generalized Firestore-only connectivity check to a
  DB-agnostic one; TOKEN_ENCRYPTION_KEY/GitHub-secrets requirement now covers
  sqlite too (it persists across restarts, unlike memory).
- Extracted shared IDatabase contract tests (db_contract.py) run against both
  memory and sqlite backends.
---
 Makefile                                      | 113 +++--
 backend/Dockerfile                            |   3 +
 backend/app/core/app_lifecycle.py             |   5 +
 backend/app/core/config.py                    |   5 +
 backend/app/core/enums.py                     |   1 +
 backend/app/core/local_identity.py            |   4 +-
 backend/app/database/__init__.py              |   7 +-
 backend/app/database/factory.py               |  25 +-
 backend/app/database/sqlite.py                | 448 ++++++++++++++++++
 backend/app/middleware/local_auth.py          |  10 +-
 backend/app/services/startup_validation.py    |  83 ++--
 .../create_generation_session_repos.py        |  50 +-
 backend/scripts/fix_stuck_workspaces.py       |   6 +-
 .../scripts/{init_firestore.py => init_db.py} |  44 +-
 backend/test/database/db_contract.py          | 402 ++++++++++++++++
 backend/test/database/test_factory.py         |  43 +-
 backend/test/database/test_memory_db.py       | 401 +---------------
 backend/test/database/test_sqlite_db.py       | 151 ++++++
 .../test_firestore_emulator_persistence.py    | 171 -------
 backend/test/middleware/test_local_auth.py    |   2 +-
 .../test_create_generation_session_repos.py   |   4 +-
 ...test_init_firestore.py => test_init_db.py} |  44 +-
 .../test/scripts/test_specflow_init_script.py |  76 +--
 .../test/services/test_startup_validation.py  |  60 ++-
 docker-compose.yml                            |  99 +---
 .../use-sqlite-instead-firestore-SPECS.md     | 147 ++++++
 scripts/firestore-emulator-entrypoint.sh      |  42 --
 scripts/firestore-emulator-exporter.sh        |  83 ----
 scripts/get-api-key.py                        |   7 +-
 specflow-init.sh                              |  12 +-
 30 files changed, 1590 insertions(+), 958 deletions(-)
 create mode 100644 backend/app/database/sqlite.py
 rename backend/scripts/{init_firestore.py => init_db.py} (94%)
 create mode 100644 backend/test/database/db_contract.py
 create mode 100644 backend/test/database/test_sqlite_db.py
 delete mode 100644 backend/test/integration/test_firestore_emulator_persistence.py
 rename backend/test/scripts/{test_init_firestore.py => test_init_db.py} (91%)
 create mode 100644 plans/use-sqlite-instead-firestore/use-sqlite-instead-firestore-SPECS.md
 delete mode 100644 scripts/firestore-emulator-entrypoint.sh
 delete mode 100644 scripts/firestore-emulator-exporter.sh

diff --git a/Makefile b/Makefile
index c6cff02..fc2ee6a 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@
 # Default target
 .DEFAULT_GOAL := build
 
-# One-command local quickstart: bootstrap emulator + backend, seed Firestore, emit MCP config
+# One-command local quickstart: bootstrap backend, seed the database, emit MCP config
 quickstart:
 	@./specflow-init.sh $(ARGS)
 
@@ -11,6 +11,16 @@ quickstart:
 WORKSPACE_MOUNT_PATH ?= ./workspaces
 
 BACKEND_URL ?= http://localhost:8000
+# Local/Docker default. Override to "firestore" to connect to an already-hosted, GCP-managed
+# Firestore instance, or "emulator" to connect to a manually-run Firestore emulator process —
+# SpecFlow itself never deploys/manages either locally.
+DATABASE_TYPE ?= sqlite
+# Central SQLite file, bind-mounted into the backend container at the same path (see
+# docker-compose.yml) — one database shared across every local project/MCP session, matching
+# the old shared Firestore-emulator model. Host-side scripts (init_db.py, tests) read/write
+# the exact same file directly; no docker exec needed.
+SPECFLOW_HOME_PATH ?= $(HOME)/.specflow
+SQLITE_DB_PATH ?= $(SPECFLOW_HOME_PATH)/specflow.db
 FIRESTORE_EMULATOR_HOST ?= localhost:8080
 # Must match docker-compose.yml defaults so host-side seeding/tests see the same
 # named Firestore database as the backend container (quickstart sets these via specflow-init.sh).
@@ -18,7 +28,7 @@ GCP_PROJECT_ID ?= local-dev
 FIRESTORE_DATABASE_NAME ?= specflow
 E2E_WORKSPACE_COUNT ?= 3
 
-# Workspace-pool repos used to prefill the test Firestore (init_firestore.py). REQUIRED: there are
+# Workspace-pool repos used to prefill the test database (init_db.py). REQUIRED: there are
 # no default repos, and SKIP_MODE still clones each repo during allocation — so point this at a JSON
 # list of YOUR test repos. The e2e targets refuse to run without it.
 # Schema: [{"workspace_id": "ws-01-1", "repo_url": "https://github.com/org/repo",
@@ -30,33 +40,33 @@ E2E_WORKSPACE_COUNT ?= 3
 # Or point at any path explicitly:  make skip-mode-e2e-tests E2E_WORKSPACE_CONFIG=my-test-repos.json
 E2E_WORKSPACE_CONFIG ?= $(wildcard e2e-workspace-config.json)
 # --yes keeps re-runs non-interactive; --workspace-config supplies the (required) workspace pool.
-INIT_FIRESTORE_ARGS := --yes
+INIT_DB_ARGS := --yes
 ifneq ($(strip $(E2E_WORKSPACE_CONFIG)),)
-INIT_FIRESTORE_ARGS += --workspace-config $(abspath $(E2E_WORKSPACE_CONFIG))
+INIT_DB_ARGS += --workspace-config $(abspath $(E2E_WORKSPACE_CONFIG))
 endif
 
 # ── Isolated local-testing stack ───────────────────────────────────────────────────────────
 # Contributor test runs (e2e + integration) use a SEPARATE docker-compose project, container
-# names, host ports, and workspace/Firestore volume from a self-hosted quickstart deployment
-# (quickstart uses the default project + ./workspaces). So a test run never clobbers a running
-# quickstart: `make stop` only tears down the test stack and removes its ephemeral ./.specflow-test
-# state.
+# names, host ports, and workspace/database paths from a self-hosted quickstart deployment
+# (quickstart uses the default project + ./workspaces + ~/.specflow). So a test run never
+# clobbers a running quickstart OR the real central SQLite database: `make stop` tears down
+# the test stack and removes its ephemeral ./.specflow-test state (which nests its own
+# isolated specflow-home/specflow.db, cleaned up the same way).
 #
 # Applied as target-specific *exported* vars so they also reach prerequisite targets
 # (run-detached / run-detached-skip) and sub-makes (`$(MAKE) stop`, `$(MAKE) e2e-setup`).
 SPECFLOW_BACKEND_CONTAINER ?= specflow-backend
 TEST_WORKSPACE_MOUNT_PATH := ./.specflow-test
+TEST_SPECFLOW_HOME_PATH := $(TEST_WORKSPACE_MOUNT_PATH)/specflow-home
 TEST_STACK_TARGETS := e2e-setup skip-mode-e2e-tests contract-validation-e2e-tests shutdown-recovery-e2e-tests real-e2e-tests integration-tests stop stop-test
 $(TEST_STACK_TARGETS): export COMPOSE_PROJECT_NAME := specflow-test
 $(TEST_STACK_TARGETS): export WORKSPACE_MOUNT_PATH := $(TEST_WORKSPACE_MOUNT_PATH)
 $(TEST_STACK_TARGETS): export SPECFLOW_BACKEND_CONTAINER := specflow-test-backend
-$(TEST_STACK_TARGETS): export SPECFLOW_FIRESTORE_CONTAINER := specflow-test-firestore-emulator
-$(TEST_STACK_TARGETS): export SPECFLOW_FIRESTORE_EXPORTER_CONTAINER := specflow-test-firestore-exporter
 $(TEST_STACK_TARGETS): export SPECFLOW_MCP_CONTAINER := specflow-test-mcp-server
 $(TEST_STACK_TARGETS): export SPECFLOW_BACKEND_PORT := 18000
-$(TEST_STACK_TARGETS): export SPECFLOW_FIRESTORE_PORT := 18080
 $(TEST_STACK_TARGETS): export BACKEND_URL := http://localhost:18000
-$(TEST_STACK_TARGETS): export FIRESTORE_EMULATOR_HOST := localhost:18080
+$(TEST_STACK_TARGETS): export SPECFLOW_HOME_MOUNT_PATH := $(TEST_SPECFLOW_HOME_PATH)
+$(TEST_STACK_TARGETS): export SQLITE_DB_PATH := $(TEST_SPECFLOW_HOME_PATH)/specflow.db
 $(TEST_STACK_TARGETS): export GCP_PROJECT_ID := $(GCP_PROJECT_ID)
 $(TEST_STACK_TARGETS): export FIRESTORE_DATABASE_NAME := $(FIRESTORE_DATABASE_NAME)
 
@@ -87,16 +97,16 @@ build:
 	docker-compose build
 
 run:
-	@echo "🚀 Starting services in DEV mode (with Firestore Emulator)..."
+	@echo "🚀 Starting services in DEV mode (DATABASE_TYPE=$(DATABASE_TYPE))..."
 	@echo "📁 Using WORKSPACE_MOUNT_PATH: $(WORKSPACE_MOUNT_PATH)"
-	@echo "💾 Database: Firestore Emulator (no GCP credentials needed)"
+	@echo "💾 Database: SQLite at $(SQLITE_DB_PATH) (no GCP credentials needed)"
 	WORKSPACE_MOUNT_PATH=$(WORKSPACE_MOUNT_PATH) docker-compose up --no-build
 
 # Run with SKIP_MODE enabled (agents return immediately without execution)
 run-skip:
 	@echo "🚀 Starting services in DEV mode with SKIP_MODE enabled..."
 	@echo "📁 Using WORKSPACE_MOUNT_PATH: $(WORKSPACE_MOUNT_PATH)"
-	@echo "💾 Database: Firestore Emulator (no GCP credentials needed)"
+	@echo "💾 Database: SQLite at $(SQLITE_DB_PATH) (no GCP credentials needed)"
 	@echo "⏭️  Agent execution: SKIPPED (testing mode)"
 	WORKSPACE_MOUNT_PATH=$(WORKSPACE_MOUNT_PATH) SKIP_AGENT_EXECUTION=true docker-compose up --no-build
 
@@ -104,19 +114,19 @@ run-skip:
 run-detached: build
 	@echo "🚀 Starting services in background (DEV mode)..."
 	@echo "📁 Using WORKSPACE_MOUNT_PATH: $(WORKSPACE_MOUNT_PATH)"
-	@echo "💾 Database: Firestore Emulator"
+	@echo "💾 Database: SQLite at $(SQLITE_DB_PATH)"
 	WORKSPACE_MOUNT_PATH=$(WORKSPACE_MOUNT_PATH) docker-compose up -d --no-build
 
 # Run in detached mode with SKIP_MODE (same as run-detached: build then up — cache-friendly)
 run-detached-skip: build
 	@echo "🚀 Starting services in background (DEV mode) with SKIP_MODE..."
 	@echo "📁 Using WORKSPACE_MOUNT_PATH: $(WORKSPACE_MOUNT_PATH)"
-	@echo "💾 Database: Firestore Emulator"
+	@echo "💾 Database: SQLite at $(SQLITE_DB_PATH)"
 	@echo "⏭️  Agent execution: SKIPPED (testing mode)"
 	WORKSPACE_MOUNT_PATH=$(WORKSPACE_MOUNT_PATH) SKIP_AGENT_EXECUTION=true docker-compose up -d --no-build
 
 # Stop ONLY the isolated local-testing stack (project: specflow-test) and wipe its ephemeral
-# workspace/Firestore state. Quickstart is stopped outside this Make target.
+# workspace/database state. Quickstart is stopped outside this Make target.
 stop:
 	@echo "🛑 Stopping the isolated local-testing stack (project: specflow-test)..."
 	docker-compose down --timeout 90
@@ -221,19 +231,35 @@ format:
 	@cd backend && uv run ruff check . --fix
 	@echo "✅ Code formatted"
 
-# Initialize Firestore (with emulator)
+# Initialize the active database backend (sqlite by default; override DATABASE_TYPE for
+# firestore/emulator). Runs host-side against the same file the backend container has
+# bind-mounted (sqlite) or the same emulator host:port (emulator) — no docker exec needed.
+init-db:
+	$(require-e2e-workspace-config)
+	@echo "🔧 Initializing database (DATABASE_TYPE=$(DATABASE_TYPE))..."
+	@cd backend && \
+		DATABASE_TYPE=$(DATABASE_TYPE) \
+		SQLITE_DB_PATH=$(SQLITE_DB_PATH) \
+		FIRESTORE_EMULATOR_HOST=$(FIRESTORE_EMULATOR_HOST) \
+		uv run scripts/init_db.py $(INIT_DB_ARGS)
+
+# Backward-compatible alias.
 init-firestore:
+	@$(MAKE) init-db
+
+# Initialize the active database backend (dry run)
+init-db-dry:
 	$(require-e2e-workspace-config)
-	@echo "🔧 Initializing Firestore database..."
-	@echo "⚠️  Using FIRESTORE_EMULATOR_HOST=$(FIRESTORE_EMULATOR_HOST)"
-	@cd backend && FIRESTORE_EMULATOR_HOST=$(FIRESTORE_EMULATOR_HOST) uv run scripts/init_firestore.py $(INIT_FIRESTORE_ARGS)
+	@echo "🔧 Dry run: Initializing database (DATABASE_TYPE=$(DATABASE_TYPE))..."
+	@cd backend && \
+		DATABASE_TYPE=$(DATABASE_TYPE) \
+		SQLITE_DB_PATH=$(SQLITE_DB_PATH) \
+		FIRESTORE_EMULATOR_HOST=$(FIRESTORE_EMULATOR_HOST) \
+		uv run scripts/init_db.py --dry-run $(INIT_DB_ARGS)
 
-# Initialize Firestore (dry run)
+# Backward-compatible alias.
 init-firestore-dry:
-	$(require-e2e-workspace-config)
-	@echo "🔧 Dry run: Initializing Firestore database..."
-	@echo "⚠️  Using FIRESTORE_EMULATOR_HOST=$(FIRESTORE_EMULATOR_HOST)"
-	@cd backend && FIRESTORE_EMULATOR_HOST=$(FIRESTORE_EMULATOR_HOST) uv run scripts/init_firestore.py --dry-run $(INIT_FIRESTORE_ARGS)
+	@$(MAKE) init-db-dry
 
 # Create estimation repositories
 # Usage: make create-repos START=7 END=9
@@ -268,15 +294,17 @@ unit-tests:
 	@cd mcp_server && uv run pytest tests/ -v
 	@echo "✅ Unit tests passed"
 
-# Run integration tests (with Firestore Emulator)
+# Run integration tests (sqlite by default; override DATABASE_TYPE=emulator/firestore)
 integration-tests:
 	@$(MAKE) stop
 	@$(MAKE) run-detached
 	@echo "⏳ Waiting for services to be ready..."
 	@sleep 5
-	@echo "🧪 Running integration tests (Firestore Emulator, database=$(FIRESTORE_DATABASE_NAME))..."
+	@echo "🧪 Running integration tests (DATABASE_TYPE=$(DATABASE_TYPE))..."
 	@cd backend && \
-		DATABASE_TYPE=emulator \
+		DATABASE_TYPE=$(DATABASE_TYPE) \
+		SQLITE_DB_PATH=$(SQLITE_DB_PATH) \
+		FIRESTORE_EMULATOR_HOST=$(FIRESTORE_EMULATOR_HOST) \
 		AUTH_MODE=api_key \
 		RUN_GIT_INTEGRATION_TESTS=1 \
 		uv run pytest test/ -v --cov=app
@@ -302,10 +330,13 @@ e2e-setup:
 		echo "   Attempt $$i/10..."; \
 		sleep 2; \
 	done
-	@echo "🔧 Initializing Firestore database (database=$(FIRESTORE_DATABASE_NAME))..."
+	@echo "🔧 Initializing database (DATABASE_TYPE=$(DATABASE_TYPE))..."
 	@cd backend && \
 		GITHUB_TOKEN=$${GITHUB_TOKEN:-} \
-		uv run scripts/init_firestore.py $(INIT_FIRESTORE_ARGS) || (echo "⚠️  Firestore initialization failed. Services may still be starting. Retry with: make init-firestore" && exit 1)
+		DATABASE_TYPE=$(DATABASE_TYPE) \
+		SQLITE_DB_PATH=$(SQLITE_DB_PATH) \
+		FIRESTORE_EMULATOR_HOST=$(FIRESTORE_EMULATOR_HOST) \
+		uv run scripts/init_db.py $(INIT_DB_ARGS) || (echo "⚠️  Database initialization failed. Services may still be starting. Retry with: make init-db" && exit 1)
 	@echo "🔑 Fetching API key..."
 	@cd backend && \
 		uv run python ../scripts/get-api-key.py || (echo "⚠️  Could not fetch API key" && exit 1)
@@ -317,8 +348,8 @@ e2e-setup:
 	@echo "=========================================="
 	@echo ""
 	@echo "📋 Services running:"
-	@echo "  - Backend API:     $(BACKEND_URL)"
-	@echo "  - Firestore Emulator: $(FIRESTORE_EMULATOR_HOST)"
+	@echo "  - Backend API: $(BACKEND_URL)"
+	@echo "  - Database:    $(DATABASE_TYPE) ($(SQLITE_DB_PATH))"
 	@echo ""
 	@echo "📁 Example specifications created at:"
 	@echo "  /tmp/specflow-e2e-specs"
@@ -410,7 +441,7 @@ help:
 	@echo "  make quickstart ARGS='--skip-repos'             - Skip GitHub repo creation (supply .specflow-local/workspaces.json)"
 	@echo "  make build                                      - Build base image and all services (default)"
 	@echo "  make base                                       - Build only the base image"
-	@echo "  make run                                        - Start in DEV mode (Firestore Emulator)"
+	@echo "  make run                                        - Start in DEV mode (SQLite, no GCP credentials needed)"
 	@echo "  make run-skip                                   - Start in DEV mode with SKIP_MODE (agents return immediately)"
 	@echo "  make run WORKSPACE_MOUNT_PATH=/path             - Start with custom workspace mount"
 	@echo "  make run-detached                               - Start in background (DEV mode)"
@@ -432,16 +463,16 @@ help:
 	@echo "  make secret-scan-history                        - Run secret scans including full local git history"
 	@echo "  make format                                     - Format code with ruff"
 	@echo "  make unit-tests                                 - Run unit tests (in-memory database, fast)"
-	@echo "  make integration-tests                          - Run integration tests (Firestore Emulator)"
+	@echo "  make integration-tests                          - Run integration tests (SQLite by default; DATABASE_TYPE=emulator|firestore to override)"
 	@echo "  (e2e + integration tests run in an isolated ephemeral stack: project specflow-test, mount ./.specflow-test)"
-	@echo "  make e2e-setup                                  - Setup E2E environment (starts services, initializes Firestore, creates example specs)"
+	@echo "  make e2e-setup                                  - Setup E2E environment (starts services, initializes the database, creates example specs)"
 	@echo "  make skip-mode-e2e-tests                        - Fast E2E of the MCP tool sequence + contract gate (SKIP mode)"
 	@echo "      E2E_WORKSPACE_CONFIG=path.json              - REQUIRED: prefill the test pool with your own repos (no defaults)"
 	@echo "  make contract-validation-e2e-tests              - E2E: contract rejections reject before allocating (no orphan workspaces)"
 	@echo "  make shutdown-recovery-e2e-tests                - E2E: restart backend mid-run, verify graceful shutdown + boot recovery"
 	@echo "  make real-e2e-tests                             - Full real-agent E2E (slow, 30-90 min)"
-	@echo "  make init-firestore-dry                         - Initialize Firestore (dry run, shows what would be done)"
-	@echo "  make init-firestore                             - Initialize Firestore database (requires FIRESTORE_EMULATOR_HOST)"
+	@echo "  make init-db-dry                                - Initialize the active database (dry run, shows what would be done)"
+	@echo "  make init-db                                    - Initialize the active database (SQLite by default; DATABASE_TYPE to override)"
 	@echo "  make create-repos START=7 END=9                 - Create generation workspace repositories"
 	@echo "  make create-repos START=1 END=3 PREFIX=test     - Create repos with custom prefix"
 	@echo ""
@@ -456,7 +487,9 @@ help:
 	@echo "  make ops-retry-run generation_id=X BACKEND_URL=http://host:8000  - Override backend URL"
 	@echo ""
 	@echo "Database Modes:"
-	@echo "  DEV mode:    Uses Firestore Emulator (no GCP credentials needed)"
+	@echo "  DEV mode:    Uses SQLite (no GCP credentials needed, single central db at ~/.specflow/specflow.db)"
+	@echo "  Override:    DATABASE_TYPE=firestore to connect to an already-hosted GCP Firestore instance"
+	@echo "               DATABASE_TYPE=emulator to connect to a manually-run Firestore emulator process"
 	@echo ""
 	@echo "SKIP_MODE:"
 	@echo "  When enabled, agent_query returns immediately with 'SKIP_MODE' response"
diff --git a/backend/Dockerfile b/backend/Dockerfile
index 78166ba..1a84c6b 100644
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -104,6 +104,9 @@ RUN uv sync --frozen --no-dev || uv sync --no-dev && \
 
 # Copy application code (preserve the app directory structure)
 COPY app ./app
+# scripts/ (init_db.py etc.) — needed at runtime via `docker compose exec backend
+# .venv/bin/python scripts/init_db.py` (sqlite has no exposed port for host-side seeding).
+COPY scripts ./scripts
 
 # Create .claude directory and copy settings to root's home directory
 RUN mkdir -p /root/.claude
diff --git a/backend/app/core/app_lifecycle.py b/backend/app/core/app_lifecycle.py
index 6ae79a0..36ea993 100644
--- a/backend/app/core/app_lifecycle.py
+++ b/backend/app/core/app_lifecycle.py
@@ -28,6 +28,7 @@
 )
 from app.core.logging import _cleanup_queue_logging
 from app.database.factory import get_database
+from app.database.sqlite import SqliteDatabase
 from app.jobs.shutdown_interrupted_recovery import recover_interrupted_sessions
 from app.jobs.stuck_cleaning_recovery import recover_stuck_cleaning
 from app.jobs.stuck_initializing_detector import detect_stuck_initializing
@@ -188,6 +189,10 @@ async def lifespan(app: FastAPI):
 
     await run_shutdown_session_handling(db)
 
+    if isinstance(raw_db, SqliteDatabase):
+        logger.info("Checkpointing SQLite WAL before shutdown...")
+        raw_db.close()
+
     if not validation_task.done():
         logger.info("Cancelling startup validation task...")
         validation_task.cancel()
diff --git a/backend/app/core/config.py b/backend/app/core/config.py
index bf494ad..98c8fa2 100644
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -227,6 +227,11 @@ def _empty_str_to_none_int(cls, v: object) -> object:
     FIRESTORE_EMULATOR_HOST: Optional[str] = None  # e.g., localhost:8080 or firestore-emulator:8080
     GCP_PROJECT_ID: Optional[str] = None  # GCP project ID for Firestore
     FIRESTORE_DATABASE_NAME: str = "default"  # Firestore database name (default: "(default)")
+    # SQLite file path for DATABASE_TYPE=sqlite (single-writer, local/Docker-dev default).
+    # Container-side path where docker-compose bind-mounts the host's ~/.specflow/ directory
+    # (one central database shared across every local project/MCP session, like the Firestore
+    # emulator used to be). MUST be on block storage, never NFS/Filestore.
+    SQLITE_DB_PATH: str = "/root/.specflow/specflow.db"
 
     # LLM Provider Configuration
     # Active LLM provider: "openrouter" (default) or "anthropic".
diff --git a/backend/app/core/enums.py b/backend/app/core/enums.py
index c9019ae..178f520 100644
--- a/backend/app/core/enums.py
+++ b/backend/app/core/enums.py
@@ -23,3 +23,4 @@ class DatabaseType(StrEnum):
     MEMORY = "memory"
     EMULATOR = "emulator"
     FIRESTORE = "firestore"
+    SQLITE = "sqlite"
diff --git a/backend/app/core/local_identity.py b/backend/app/core/local_identity.py
index eccc020..e32b2f3 100644
--- a/backend/app/core/local_identity.py
+++ b/backend/app/core/local_identity.py
@@ -2,11 +2,11 @@
 Local identity constants for single-user local auth mode.
 
 Single source of truth for the sentinel api_keys document used by
-LocalAuthMiddleware (Phase 3) and init_firestore.py sentinel seeding (Phase 5).
+LocalAuthMiddleware (Phase 3) and init_db.py sentinel seeding (Phase 5).
 """
 
 LOCAL_API_KEY_DOC_ID: str = "local"
-"""Firestore document-id for the local sentinel api_keys doc."""
+"""Document-id for the local sentinel api_keys doc."""
 
 LOCAL_KEY_UID: str = "00000000-10ca-0000-0000-000000000001"
 """
diff --git a/backend/app/database/__init__.py b/backend/app/database/__init__.py
index a2fc998..23de50f 100644
--- a/backend/app/database/__init__.py
+++ b/backend/app/database/__init__.py
@@ -2,9 +2,10 @@
 Database abstraction layer for SpecFlow backend.
 
 Provides a unified interface for database operations with support for:
-- Production: Cloud Firestore
-- Development: Firestore Emulator
+- Local / Docker dev (default): SQLite (single-writer, persistent)
+- Production, or connecting to an already-hosted GCP instance: Firestore
 - Testing: In-memory database
+- Manual Firestore-emulator use (not started by docker-compose): Emulator
 
 The factory pattern selects the appropriate implementation based on environment configuration.
 
@@ -34,6 +35,7 @@
 from app.database.memory import InMemoryDatabase
 from app.database.firestore import FirestoreDatabase
 from app.database.emulator import EmulatorDatabase
+from app.database.sqlite import SqliteDatabase
 from app.database.factory import get_database, reset_database, clear_test_data
 from app.database.dependencies import get_db
 
@@ -47,6 +49,7 @@
     "InMemoryDatabase",
     "FirestoreDatabase",
     "EmulatorDatabase",
+    "SqliteDatabase",
     # Factory functions (recommended way to get database instance)
     "get_database",
     "reset_database",
diff --git a/backend/app/database/factory.py b/backend/app/database/factory.py
index 970488c..d87c032 100644
--- a/backend/app/database/factory.py
+++ b/backend/app/database/factory.py
@@ -14,6 +14,7 @@
 from app.database.memory import InMemoryDatabase
 from app.database.emulator import EmulatorDatabase
 from app.database.firestore import FirestoreDatabase
+from app.database.sqlite import SqliteDatabase
 
 
 # Singleton instance
@@ -29,17 +30,19 @@ def get_database() -> IDatabase:
     
     The implementation is selected based on the DATABASE_TYPE setting:
     - "memory": InMemoryDatabase (for unit tests)
-    - "emulator": EmulatorDatabase (for local development with Firestore Emulator)
-    - "firestore": FirestoreDatabase (for production GCP Firestore)
-    
+    - "sqlite": SqliteDatabase (local/Docker-dev default; single-writer, persistent)
+    - "emulator": EmulatorDatabase (manual Firestore-emulator use; not started by docker-compose)
+    - "firestore": FirestoreDatabase (production, or connecting to an already-hosted GCP instance)
+
     Returns:
         IDatabase: Configured database instance
-        
+
     Raises:
         ValueError: If DATABASE_TYPE is invalid
-        
+
     Environment Variables:
-        DATABASE_TYPE: Type of database to use (memory|emulator|firestore)
+        DATABASE_TYPE: Type of database to use (memory|sqlite|emulator|firestore)
+        SQLITE_DB_PATH: SQLite file path (required for sqlite mode)
         FIRESTORE_EMULATOR_HOST: Emulator host:port (required for emulator mode)
         GCP_PROJECT_ID: GCP project ID (optional for firestore mode)
         
@@ -59,6 +62,8 @@ def get_database() -> IDatabase:
 
     if db_type == DatabaseType.MEMORY:
         _database_instance = InMemoryDatabase()
+    elif db_type == DatabaseType.SQLITE:
+        _database_instance = SqliteDatabase(db_path=settings.SQLITE_DB_PATH)
     elif db_type == DatabaseType.EMULATOR:
         if not settings.FIRESTORE_EMULATOR_HOST:
             raise ValueError(
@@ -79,7 +84,7 @@ def get_database() -> IDatabase:
     else:
         raise ValueError(
             f"Invalid DATABASE_TYPE: {db_type}. "
-            f"Must be one of: memory, emulator, firestore"
+            f"Must be one of: memory, sqlite, emulator, firestore"
         )
     
     return _database_instance
@@ -129,10 +134,10 @@ def clear_test_data(collections: Optional[list[str]] = None) -> None:
     
     # Check if we're in a safe environment
     db_type = settings.DATABASE_TYPE
-    if db_type not in (DatabaseType.MEMORY, DatabaseType.EMULATOR):
+    if db_type not in (DatabaseType.MEMORY, DatabaseType.EMULATOR, DatabaseType.SQLITE):
         raise RuntimeError(
-            f"clear_test_data() should only be used with 'memory' or 'emulator' databases. "
-            f"Current database type: {db_type}"
+            f"clear_test_data() should only be used with 'memory', 'emulator', or 'sqlite' "
+            f"databases. Current database type: {db_type}"
         )
     
     # Call clear_all if the database supports it
diff --git a/backend/app/database/sqlite.py b/backend/app/database/sqlite.py
new file mode 100644
index 0000000..bf9175c
--- /dev/null
+++ b/backend/app/database/sqlite.py
@@ -0,0 +1,448 @@
+"""
+SQLite database implementation for local / single-node persistence.
+
+A SQL-native document store: it persists the same document-shaped model used by
+Firestore and the in-memory backend (``collection / doc_id / {JSON}`` plus
+subcollections), so services, state machines, and workflows are unchanged. This is
+the local/Docker-dev default backend. It is NOT a production replacement for
+Firestore (single-file, single-writer — no cross-node distributed locking).
+
+Design notes:
+- Two tables hold JSON blobs (``documents``, ``subdocuments``); queries push
+  filters/order/limit into SQL via ``json_extract`` so behavior matches the
+  in-memory reference (``app/database/memory.py``).
+- Transactions use a real ``BEGIN IMMEDIATE`` (genuine ACID), a strict upgrade
+  over the write-buffering bridge in ``app/state/db_adapter.py``.
+- Datetime contract: every datetime is stored as a fixed-width ISO-8601 UTC
+  string (lexical order == chronological order) and decoded back to a tz-aware
+  ``datetime`` on read, so background jobs (stuck detectors, lease recovery)
+  that compare against ``datetime.now(timezone.utc)`` behave identically to
+  production.
+- Concurrency: one writer process only (WAL mode). Multi-replica stays on
+  Firestore.
+"""
+
+from __future__ import annotations
+
+import json
+import re
+import sqlite3
+import threading
+from datetime import UTC, datetime
+from enum import Enum
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, TypeVar
+
+from app.database.interface import (
+    DocumentNotFoundError,
+    FilterTuple,
+    IDatabase,
+    ITransactionContext,
+)
+
+T = TypeVar("T")
+
+# Default collections cleared by clear_all(None) when callers do not specify a set.
+_DEFAULT_CLEAR_COLLECTIONS = ("api_keys", "generation_sessions", "workspaces")
+
+# Strict-enough ISO-8601 datetime shape (must carry a time and a tz designator) so
+# only values we wrote as canonical timestamps are decoded back to datetime.
+_ISO_DATETIME_RE = re.compile(
+    r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:\d{2})$"
+)
+
+
+class _ServerTimestamp:
+    """Sentinel for a server-assigned timestamp (resolved to now-UTC on write)."""
+
+
+def _canonical_dt(value: datetime) -> str:
+    """Render a datetime as a fixed-width ISO-8601 UTC string (tz-naive assumed UTC)."""
+    if value.tzinfo is None:
+        value = value.replace(tzinfo=UTC)
+    return value.astimezone(UTC).isoformat(timespec="microseconds")
+
+
+def _encode_for_storage(value: Any) -> Any:
+    """Replace server-timestamp sentinels, normalize datetimes/enums, recurse into dict/list."""
+    if isinstance(value, _ServerTimestamp):
+        return _canonical_dt(datetime.now(UTC))
+    if isinstance(value, datetime):
+        return _canonical_dt(value)
+    if isinstance(value, Enum):
+        return value.value
+    if isinstance(value, dict):
+        return {k: _encode_for_storage(v) for k, v in value.items()}
+    if isinstance(value, list):
+        return [_encode_for_storage(v) for v in value]
+    return value
+
+
+def _decode_from_storage(value: Any) -> Any:
+    """Decode canonical ISO-8601 strings back to tz-aware datetimes, recurse into dict/list."""
+    if isinstance(value, str) and _ISO_DATETIME_RE.match(value):
+        return datetime.fromisoformat(value).astimezone(UTC)
+    if isinstance(value, dict):
+        return {k: _decode_from_storage(v) for k, v in value.items()}
+    if isinstance(value, list):
+        return [_decode_from_storage(v) for v in value]
+    return value
+
+
+def _to_sql_param(value: Any) -> Any:
+    """Coerce a Python filter value to a SQLite-bindable scalar."""
+    if isinstance(value, datetime):
+        return _canonical_dt(value)
+    if isinstance(value, Enum):
+        return value.value
+    if isinstance(value, bool):
+        return int(value)
+    return value
+
+
+def _json_path(field: str) -> str:
+    """Convert a dotted field name to a JSON path (e.g. 'metadata.x' -> '$.metadata.x')."""
+    return "$." + field
+
+
+class SqliteTransactionContext(ITransactionContext):
+    """Transaction context operating directly on a connection inside BEGIN IMMEDIATE.
+
+    Callers must perform all reads before any writes (interface contract), so
+    operating on the live transaction is safe and gives real atomicity.
+    """
+
+    def __init__(self, conn: sqlite3.Connection) -> None:
+        self._conn = conn
+
+    def get(self, collection: str, doc_id: str) -> Optional[Dict[str, Any]]:
+        row = self._conn.execute(
+            "SELECT data FROM documents WHERE collection = ? AND doc_id = ?",
+            (collection, doc_id),
+        ).fetchone()
+        if row is None:
+            return None
+        return _decode_from_storage(json.loads(row[0]))
+
+    def set(self, collection: str, doc_id: str, data: Dict[str, Any]) -> None:
+        payload = json.dumps(_encode_for_storage(data))
+        self._conn.execute(
+            "INSERT INTO documents (collection, doc_id, data) VALUES (?, ?, ?) "
+            "ON CONFLICT(collection, doc_id) DO UPDATE SET data = excluded.data",
+            (collection, doc_id, payload),
+        )
+
+    def update(self, collection: str, doc_id: str, data: Dict[str, Any]) -> None:
+        existing = self.get(collection, doc_id)
+        if existing is None:
+            raise DocumentNotFoundError(collection, doc_id)
+        existing.update(data)
+        self.set(collection, doc_id, existing)
+
+    def delete(self, collection: str, doc_id: str) -> None:
+        self._conn.execute(
+            "DELETE FROM documents WHERE collection = ? AND doc_id = ?",
+            (collection, doc_id),
+        )
+
+    def get_subdocument(
+        self,
+        parent_collection: str,
+        parent_doc_id: str,
+        subcollection: str,
+        doc_id: str,
+    ) -> Optional[Dict[str, Any]]:
+        row = self._conn.execute(
+            "SELECT data FROM subdocuments WHERE parent_collection = ? AND "
+            "parent_doc_id = ? AND subcollection = ? AND doc_id = ?",
+            (parent_collection, parent_doc_id, subcollection, doc_id),
+        ).fetchone()
+        if row is None:
+            return None
+        return _decode_from_storage(json.loads(row[0]))
+
+    def set_subdocument(
+        self,
+        parent_collection: str,
+        parent_doc_id: str,
+        subcollection: str,
+        doc_id: str,
+        data: Dict[str, Any],
+    ) -> None:
+        payload = json.dumps(_encode_for_storage(data))
+        self._conn.execute(
+            "INSERT INTO subdocuments "
+            "(parent_collection, parent_doc_id, subcollection, doc_id, data) "
+            "VALUES (?, ?, ?, ?, ?) "
+            "ON CONFLICT(parent_collection, parent_doc_id, subcollection, doc_id) "
+            "DO UPDATE SET data = excluded.data",
+            (parent_collection, parent_doc_id, subcollection, doc_id, payload),
+        )
+
+
+class SqliteDatabase(IDatabase):
+    """Persistent document store backed by a single SQLite file (WAL, single-writer)."""
+
+    def __init__(self, db_path: str, busy_timeout_ms: int = 5000, max_retries: int = 5) -> None:
+        self._path = db_path
+        self._max_retries = max_retries
+        self._lock = threading.RLock()
+
+        if db_path != ":memory:":
+            Path(db_path).expanduser().parent.mkdir(parents=True, exist_ok=True)
+
+        self._conn = sqlite3.connect(db_path, check_same_thread=False, isolation_level=None)
+        if db_path != ":memory:":
+            self._conn.execute("PRAGMA journal_mode=WAL")
+        self._conn.execute(f"PRAGMA busy_timeout={busy_timeout_ms}")
+        self._init_schema()
+
+    def _init_schema(self) -> None:
+        with self._lock:
+            self._conn.execute(
+                "CREATE TABLE IF NOT EXISTS documents ("
+                "collection TEXT NOT NULL, doc_id TEXT NOT NULL, data TEXT NOT NULL, "
+                "PRIMARY KEY (collection, doc_id))"
+            )
+            self._conn.execute(
+                "CREATE TABLE IF NOT EXISTS subdocuments ("
+                "parent_collection TEXT NOT NULL, parent_doc_id TEXT NOT NULL, "
+                "subcollection TEXT NOT NULL, doc_id TEXT NOT NULL, data TEXT NOT NULL, "
+                "PRIMARY KEY (parent_collection, parent_doc_id, subcollection, doc_id))"
+            )
+
+    # ------------------------------------------------------------------
+    # CRUD
+    # ------------------------------------------------------------------
+
+    def get(self, collection: str, doc_id: str) -> Optional[Dict[str, Any]]:
+        with self._lock:
+            row = self._conn.execute(
+                "SELECT data FROM documents WHERE collection = ? AND doc_id = ?",
+                (collection, doc_id),
+            ).fetchone()
+            if row is None:
+                return None
+            return _decode_from_storage(json.loads(row[0]))
+
+    def set(self, collection: str, doc_id: str, data: Dict[str, Any]) -> None:
+        payload = json.dumps(_encode_for_storage(data))
+        with self._lock:
+            self._conn.execute(
+                "INSERT INTO documents (collection, doc_id, data) VALUES (?, ?, ?) "
+                "ON CONFLICT(collection, doc_id) DO UPDATE SET data = excluded.data",
+                (collection, doc_id, payload),
+            )
+
+    def update(self, collection: str, doc_id: str, data: Dict[str, Any]) -> None:
+        with self._lock:
+            existing = self.get(collection, doc_id)
+            if existing is None:
+                raise DocumentNotFoundError(collection, doc_id)
+            existing.update(data)
+            self.set(collection, doc_id, existing)
+
+    def delete(self, collection: str, doc_id: str) -> None:
+        with self._lock:
+            self._conn.execute(
+                "DELETE FROM documents WHERE collection = ? AND doc_id = ?",
+                (collection, doc_id),
+            )
+
+    # ------------------------------------------------------------------
+    # Query
+    # ------------------------------------------------------------------
+
+    def query(
+        self,
+        collection: str,
+        filters: Optional[List[FilterTuple]] = None,
+        order_by: Optional[str] = None,
+        limit: Optional[int] = None,
+    ) -> List[Dict[str, Any]]:
+        sql = "SELECT doc_id, data FROM documents WHERE collection = ?"
+        params: List[Any] = [collection]
+
+        for field, operator, value in filters or []:
+            clause, clause_params = self._filter_clause(field, operator, value)
+            sql += f" AND {clause}"
+            params.extend(clause_params)
+
+        if order_by:
+            descending = order_by.startswith("-")
+            field = order_by[1:] if descending else order_by
+            sql += " ORDER BY json_extract(data, ?) " + ("DESC" if descending else "ASC")
+            params.append(_json_path(field))
+
+        if limit:
+            sql += " LIMIT ?"
+            params.append(limit)
+
+        with self._lock:
+            rows = self._conn.execute(sql, params).fetchall()
+
+        results: List[Dict[str, Any]] = []
+        for doc_id, data in rows:
+            doc = _decode_from_storage(json.loads(data))
+            doc["_id"] = doc_id
+            results.append(doc)
+        return results
+
+    def _filter_clause(self, field: str, operator: str, value: Any) -> tuple[str, List[Any]]:
+        """Translate a (field, op, value) filter into a SQL clause + bind params."""
+        path = _json_path(field)
+        extract = "json_extract(data, ?)"
+
+        match operator:
+            case "==":
+                if value is None:
+                    return f"{extract} IS NULL", [path]
+                return f"{extract} = ?", [path, _to_sql_param(value)]
+            case "!=":
+                if value is None:
+                    return f"{extract} IS NOT NULL", [path]
+                # Include docs missing the field (None != value is True in the reference impl).
+                return f"({extract} <> ? OR {extract} IS NULL)", [
+                    path,
+                    _to_sql_param(value),
+                    path,
+                ]
+            case "<" | "<=" | ">" | ">=":
+                return f"{extract} {operator} ?", [path, _to_sql_param(value)]
+            case "in":
+                values = list(value)
+                if not values:
+                    return "0", []
+                placeholders = ", ".join("?" for _ in values)
+                return f"{extract} IN ({placeholders})", [path] + [
+                    _to_sql_param(v) for v in values
+                ]
+            case "array_contains":
+                return (
+                    "EXISTS (SELECT 1 FROM json_each(data, ?) WHERE value = ?)",
+                    [path, _to_sql_param(value)],
+                )
+            case _:
+                raise ValueError(f"Unsupported operator: {operator}")
+
+    # ------------------------------------------------------------------
+    # Transactions
+    # ------------------------------------------------------------------
+
+    def run_transaction(self, callback: Callable[[ITransactionContext], T]) -> T:
+        with self._lock:
+            for attempt in range(self._max_retries):
+                try:
+                    self._conn.execute("BEGIN IMMEDIATE")
+                    try:
+                        result = callback(SqliteTransactionContext(self._conn))
+                    except Exception:
+                        self._conn.rollback()
+                        raise
+                    self._conn.commit()
+                    return result
+                except sqlite3.OperationalError as exc:
+                    self._safe_rollback()
+                    if "locked" in str(exc).lower() and attempt < self._max_retries - 1:
+                        continue
+                    raise
+            # Unreachable: the final attempt either returns or re-raises above.
+            raise RuntimeError("run_transaction exhausted retries without result")
+
+    def _safe_rollback(self) -> None:
+        try:
+            self._conn.rollback()
+        except sqlite3.OperationalError:
+            pass
+
+    # ------------------------------------------------------------------
+    # Array / subcollection / timestamp / lookups
+    # ------------------------------------------------------------------
+
+    def array_union(
+        self, collection: str, doc_id: str, field: str, values: List[Any]
+    ) -> None:
+        with self._lock:
+            doc = self.get(collection, doc_id)
+            if doc is None:
+                raise DocumentNotFoundError(collection, doc_id)
+            current = doc.get(field)
+            array = list(current) if isinstance(current, list) else []
+            for value in values:
+                if value not in array:
+                    array.append(value)
+            doc[field] = array
+            self.set(collection, doc_id, doc)
+
+    def list_subcollection(
+        self,
+        parent_collection: str,
+        parent_doc_id: str,
+        subcollection: str,
+    ) -> List[Dict[str, Any]]:
+        with self._lock:
+            rows = self._conn.execute(
+                "SELECT doc_id, data FROM subdocuments WHERE parent_collection = ? AND "
+                "parent_doc_id = ? AND subcollection = ?",
+                (parent_collection, parent_doc_id, subcollection),
+            ).fetchall()
+        out: List[Dict[str, Any]] = []
+        for doc_id, data in rows:
+            row = _decode_from_storage(json.loads(data))
+            row["_id"] = doc_id
+            out.append(row)
+        return out
+
+    def server_timestamp(self) -> Any:
+        return _ServerTimestamp()
+
+    def get_api_key_by_uid(self, key_uid: str) -> Optional[Dict[str, Any]]:
+        with self._lock:
+            row = self._conn.execute(
+                "SELECT doc_id, data FROM documents WHERE collection = 'api_keys' AND "
+                "json_extract(data, '$.key_uid') = ?",
+                (key_uid,),
+            ).fetchone()
+        if row is None:
+            return None
+        result = _decode_from_storage(json.loads(row[1]))
+        result["_id"] = row[0]
+        return result
+
+    # ------------------------------------------------------------------
+    # Test / maintenance helpers (parity with InMemoryDatabase / FirestoreDatabase)
+    # ------------------------------------------------------------------
+
+    def clear_all(self, collections: Optional[List[str]] = None) -> None:
+        """Delete documents (and parented subdocuments). None clears the default test set."""
+        targets = list(collections) if collections is not None else list(_DEFAULT_CLEAR_COLLECTIONS)
+        with self._lock:
+            if not targets:
+                return
+            placeholders = ", ".join("?" for _ in targets)
+            self._conn.execute(
+                f"DELETE FROM documents WHERE collection IN ({placeholders})", targets
+            )
+            self._conn.execute(
+                f"DELETE FROM subdocuments WHERE parent_collection IN ({placeholders})",
+                targets,
+            )
+
+    def clear(self) -> None:
+        """Drop all rows from both tables (full reset)."""
+        with self._lock:
+            self._conn.execute("DELETE FROM documents")
+            self._conn.execute("DELETE FROM subdocuments")
+
+    def close(self) -> None:
+        """Checkpoint the WAL back into the main file, then close the connection.
+
+        Bounds WAL growth across restarts and ensures a bare `sqlite3 specflow.db`
+        (opened outside this process) sees committed data immediately.
+        """
+        with self._lock:
+            try:
+                self._conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
+            except sqlite3.OperationalError:
+                pass
+            self._conn.close()
diff --git a/backend/app/middleware/local_auth.py b/backend/app/middleware/local_auth.py
index 74da6c2..ace6edc 100644
--- a/backend/app/middleware/local_auth.py
+++ b/backend/app/middleware/local_auth.py
@@ -1,7 +1,7 @@
 """
 Local authentication middleware for single-user self-hosted mode.
 
-Reads the sentinel api_keys document (doc-id "local") seeded by init_firestore.py
+Reads the sentinel api_keys document (doc-id "local") seeded by init_db.py
 and populates all 7 request.state identity fields without requiring any inbound
 API key header. Intended exclusively for AUTH_MODE=local.
 """
@@ -55,13 +55,13 @@ async def dispatch(self, request: Request, call_next):
         if not doc:
             logger.error(
                 "Local identity sentinel doc '%s' missing in api_keys — "
-                "run init_firestore.py to seed it.",
+                "run init_db.py to seed it.",
                 LOCAL_API_KEY_DOC_ID,
             )
             return JSONResponse(
                 content={
                     "detail": (
-                        "Local identity not seeded — run init_firestore.py "
+                        "Local identity not seeded — run init_db.py "
                         "to initialise the local user identity before starting the server."
                     )
                 },
@@ -73,13 +73,13 @@ async def dispatch(self, request: Request, call_next):
         if not user_id_raw or not user_name_raw:
             logger.error(
                 "Local sentinel doc '%s' is missing required fields (user_id/user_name) — "
-                "re-run init_firestore.py to re-seed it.",
+                "re-run init_db.py to re-seed it.",
                 LOCAL_API_KEY_DOC_ID,
             )
             return JSONResponse(
                 content={
                     "detail": (
-                        "Local identity is incomplete — re-run init_firestore.py "
+                        "Local identity is incomplete — re-run init_db.py "
                         "to re-seed the local user identity."
                     )
                 },
diff --git a/backend/app/services/startup_validation.py b/backend/app/services/startup_validation.py
index 1779291..e974d8f 100644
--- a/backend/app/services/startup_validation.py
+++ b/backend/app/services/startup_validation.py
@@ -6,7 +6,7 @@
 
 Critical Checks:
 - Environment variables are set
-- Firestore is reachable
+- The active database backend is reachable
 - Workspace pool has available workspaces
 - Filestore mount is accessible
 
@@ -83,20 +83,20 @@ async def run_all_checks(self) -> Dict[str, Any]:
                 f"Environment check failed: {results['environment']['error']}"
             )
         
-        # 2. Firestore connectivity (CRITICAL)
-        results["firestore"] = await self._check_firestore_connectivity()
-        if not results["firestore"]["passed"]:
+        # 2. Database connectivity (CRITICAL)
+        results["database"] = await self._check_database_connectivity()
+        if not results["database"]["passed"]:
             raise StartupValidationError(
-                f"Firestore check failed: {results['firestore']['error']}"
+                f"Database check failed: {results['database']['error']}"
             )
-        
+
         # 3. Workspace pool validation (CRITICAL in production, WARNING in test/dev)
         results["workspace_pool"] = await self._check_workspace_pool()
         database_type = os.getenv("DATABASE_TYPE", "firestore")
-        
+
         if not results["workspace_pool"]["passed"]:
-            # Non-critical in test/dev environments (emulator, memory)
-            if database_type in ["emulator", "memory"]:
+            # Non-critical in test/dev environments (sqlite, emulator, memory)
+            if database_type in ["sqlite", "emulator", "memory"]:
                 logger.warning(
                     f"Workspace pool check: {results['workspace_pool']['error']} "
                     "(non-critical in test/dev mode)"
@@ -106,13 +106,13 @@ async def run_all_checks(self) -> Dict[str, Any]:
                 raise StartupValidationError(
                     f"Workspace pool check failed: {results['workspace_pool']['error']}"
                 )
-        
+
         # 4. Filestore mount check (CRITICAL in production, WARNING in test/dev)
         results["filestore"] = await self._check_filestore_mount()
-        
+
         if not results["filestore"]["passed"]:
             # Non-critical in test/dev environments
-            if database_type in ["emulator", "memory"]:
+            if database_type in ["sqlite", "emulator", "memory"]:
                 logger.warning(
                     f"Filestore check: {results['filestore']['error']} "
                     "(non-critical in test/dev mode)"
@@ -122,7 +122,7 @@ async def run_all_checks(self) -> Dict[str, Any]:
                 raise StartupValidationError(
                     f"Filestore check failed: {results['filestore']['error']}"
                 )
-        
+
         return results
     
     async def _check_environment(self) -> Dict[str, Any]:
@@ -130,8 +130,10 @@ async def _check_environment(self) -> Dict[str, Any]:
         Validate required environment variables are set.
 
         - Active LLM provider key: required based on settings.DEFAULT_PROVIDER.
-        - Git platform (Fernet key + default PAT + git user): required for firestore/emulator
-          deployments unless platform secrets are loaded from Kubernetes at startup.
+        - Git platform (Fernet key + default PAT + git user): required for sqlite/firestore/
+          emulator deployments (all persist real workspace state across restarts and clone
+          real GitHub repos) unless platform secrets are loaded from Kubernetes at startup.
+          memory is the only backend exempt — it's throwaway/ephemeral (unit tests only).
         """
         missing: list[str] = []
 
@@ -159,7 +161,7 @@ async def _check_environment(self) -> Dict[str, Any]:
         except ValueError:
             database_type = DatabaseType.MEMORY
 
-        if database_type in (DatabaseType.FIRESTORE, DatabaseType.EMULATOR):
+        if database_type in (DatabaseType.SQLITE, DatabaseType.FIRESTORE, DatabaseType.EMULATOR):
             in_cluster = bool(os.environ.get("KUBERNETES_SERVICE_HOST"))
 
             k8s_ready = (
@@ -196,21 +198,22 @@ async def _check_environment(self) -> Dict[str, Any]:
 
         return {"passed": True, "error": None}
     
-    async def _check_firestore_connectivity(self) -> Dict[str, Any]:
+    async def _check_database_connectivity(self) -> Dict[str, Any]:
         """
-        Verify Firestore is reachable and responsive.
-        
-        Uses a simple read operation to validate connectivity.
-        Includes retry logic with exponential backoff for emulator startup.
+        Verify the active database backend is reachable and responsive.
+
+        Uses a simple read operation to validate connectivity. Includes retry logic
+        with exponential backoff for emulator startup; sqlite/memory go straight to
+        the generic retry loop since there's no separate process to wait on.
         """
         import os
-        
+
         # Log diagnostic information
         emulator_host = os.getenv("FIRESTORE_EMULATOR_HOST")
         database_type = os.getenv("DATABASE_TYPE", "memory")
-        
+
         logger.info(
-            f"Firestore connectivity check - "
+            f"Database connectivity check - "
             f"DATABASE_TYPE={database_type}, "
             f"FIRESTORE_EMULATOR_HOST={emulator_host}"
         )
@@ -264,37 +267,41 @@ async def _check_firestore_connectivity(self) -> Dict[str, Any]:
                     asyncio.to_thread(self._db.query, "_health_check", []),
                     timeout=10.0  # 10 second timeout per attempt
                 )
-                
-                logger.info(f"Firestore connectivity check passed on attempt {attempt + 1}")
+
+                logger.info(f"Database connectivity check passed on attempt {attempt + 1}")
                 return {"passed": True, "error": None}
-            
+
             except asyncio.TimeoutError:
                 if attempt < max_retries - 1:
                     logger.warning(
-                        f"Firestore connectivity check timed out (attempt {attempt + 1}/{max_retries}), "
+                        f"Database connectivity check timed out (attempt {attempt + 1}/{max_retries}), "
                         f"retrying in {retry_delay}s..."
                     )
                     await asyncio.sleep(retry_delay)
                     retry_delay *= 2  # Exponential backoff
                 else:
+                    hint = (
+                        f"Check if emulator is running at {emulator_host}"
+                        if database_type == "emulator"
+                        else f"Check DATABASE_TYPE={database_type} configuration"
+                    )
                     return {
                         "passed": False,
-                        "error": f"Firestore connection timeout after {max_retries} attempts. "
-                                f"Check if emulator is running at {emulator_host}"
+                        "error": f"Database connection timeout after {max_retries} attempts. {hint}"
                     }
-            
+
             except Exception as e:
                 error_msg = str(e)
-                
+
                 # Check if it's a connection error that might be transient
                 is_connection_error = any(
-                    keyword in error_msg.lower() 
+                    keyword in error_msg.lower()
                     for keyword in ["connection refused", "failed to connect", "timeout", "unavailable"]
                 )
-                
+
                 if is_connection_error and attempt < max_retries - 1:
                     logger.warning(
-                        f"Firestore connection error (attempt {attempt + 1}/{max_retries}): {error_msg}, "
+                        f"Database connection error (attempt {attempt + 1}/{max_retries}): {error_msg}, "
                         f"retrying in {retry_delay}s..."
                     )
                     await asyncio.sleep(retry_delay)
@@ -302,13 +309,13 @@ async def _check_firestore_connectivity(self) -> Dict[str, Any]:
                 else:
                     return {
                         "passed": False,
-                        "error": f"Firestore connection failed: {error_msg}"
+                        "error": f"Database connection failed: {error_msg}"
                     }
-        
+
         # Should never reach here, but just in case
         return {
             "passed": False,
-            "error": "Firestore connectivity check failed after all retries"
+            "error": "Database connectivity check failed after all retries"
         }
     
     async def _check_workspace_pool(self) -> Dict[str, Any]:
diff --git a/backend/scripts/create_generation_session_repos.py b/backend/scripts/create_generation_session_repos.py
index f227eec..12d3040 100755
--- a/backend/scripts/create_generation_session_repos.py
+++ b/backend/scripts/create_generation_session_repos.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python3
 """
-Create GitHub repositories for generation workspaces, optionally upsert Firestore workspaces,
-and trigger P10y metrics.
+Create GitHub repositories for generation workspaces, optionally upsert workspaces into the
+active database (sqlite, firestore, or an explicit hosted-Firestore target), and trigger P10y
+metrics.
 
 Example:
     uv run python scripts/create_generation_session_repos.py \
@@ -20,16 +21,19 @@
 2. Grants a team in that org Write access (GitHub API permission "push") on each repo
 3. Starts metric calculation for the created repositories via the P10y enable/metrics API
 4. Polls P10y to check when repositories are live with metrics
-5. Upserts the workspace pool in Firestore (--gcp-project / --firestore-database target the DB)
+5. Upserts the workspace pool into the active database (--gcp-project / --firestore-database
+   target a hosted Firestore instance directly; otherwise the active DATABASE_TYPE is used —
+   sqlite by default)
 
 Further usage:
     python scripts/create_generation_session_repos.py --github-org MyOrg --team my-team-slug --start 7 --end 9 \\
       --gcp-project my-project --firestore-database default
 
 Environment (optional defaults for flags):
-    When both --gcp-project and --firestore-database are set, Firestore writes use only those
-    values (not Settings / not DATABASE_TYPE). Otherwise use get_database() and set
-    DATABASE_TYPE=firestore and GCP_PROJECT_ID / FIRESTORE_DATABASE_NAME in .env or the shell.
+    When both --gcp-project and --firestore-database are set, writes target that hosted
+    Firestore instance only (not Settings / not DATABASE_TYPE). Otherwise use get_database(),
+    which honors DATABASE_TYPE (sqlite by default; set to firestore + GCP_PROJECT_ID /
+    FIRESTORE_DATABASE_NAME in .env or the shell to write against a hosted instance instead).
     GITHUB_ORG or GITHUB_ORG_DEFAULT — organization login (owner of repos)
     GITHUB_TEAM or GITHUB_TEAM_SLUG — team slug within that org
 """
@@ -752,7 +756,7 @@ def emit_workspace_config(
 ) -> None:
     """
     Write a JSON workspace-config file in the exact schema consumed by
-    ``init_firestore.py --workspace-config``:
+    ``init_db.py --workspace-config``:
 
         [{"workspace_id": str, "repo_url": str,
           "p10y_repository_id": int, "workspace_pool": str}, ...]
@@ -902,7 +906,7 @@ async def main():
     parser.add_argument(
         "--skip-firestore",
         action="store_true",
-        help="Skip adding workspaces to Firestore database"
+        help="Skip adding workspaces to the active database (sqlite/firestore)"
     )
     parser.add_argument(
         "--skip-metrics",
@@ -920,7 +924,7 @@ async def main():
         action="store_true",
         help=(
             "Print token actor, org, team, and full repo URLs; exit before any GitHub mutations, "
-            "P10y, or Firestore (P10Y_* env not required)"
+            "P10y, or database writes (P10Y_* env not required)"
         ),
     )
     parser.add_argument(
@@ -930,7 +934,7 @@ async def main():
         metavar="FILE",
         help=(
             "After repo_id_map resolves (Step 3), write a JSON workspace-config file at FILE "
-            "in the exact schema consumed by init_firestore.py --workspace-config: "
+            "in the exact schema consumed by init_db.py --workspace-config: "
             "[{workspace_id, repo_url, p10y_repository_id (int), workspace_pool}, ...]. "
             "Does not write to Firestore directly."
         ),
@@ -974,7 +978,7 @@ async def main():
 
     if not args.dry_run and not args.skip_firestore and not github_org:
         print(
-            "❌ Error: --github-org is required for Firestore repo URLs "
+            "❌ Error: --github-org is required to record workspace repo URLs "
             "(or set GITHUB_ORG / GITHUB_ORG_DEFAULT)"
         )
         sys.exit(1)
@@ -983,16 +987,20 @@ async def main():
         if firestore_target_from_cli:
             pass
         else:
-            if not (cfg.GCP_PROJECT_ID or "").strip():
+            # These write real GitHub-backed workspace repos into the active database — reject
+            # only DatabaseType.MEMORY (throwaway, non-persistent). sqlite (local default) and
+            # firestore (production / hosted-GCP) are both valid persistent targets.
+            if cfg.DATABASE_TYPE == DatabaseType.MEMORY:
                 print(
-                    "❌ Error: pass both --gcp-project and --firestore-database for direct Firestore "
-                    "writes, or set GCP_PROJECT_ID (and DATABASE_TYPE=firestore) for settings-based mode"
+                    "❌ Error: DATABASE_TYPE must not be memory (throwaway) when writing real "
+                    "workspace repos — use sqlite (default), firestore, or pass both "
+                    "--gcp-project and --firestore-database for direct Firestore writes"
                 )
                 sys.exit(1)
-            if cfg.DATABASE_TYPE != DatabaseType.FIRESTORE:
+            if cfg.DATABASE_TYPE == DatabaseType.FIRESTORE and not (cfg.GCP_PROJECT_ID or "").strip():
                 print(
-                    "❌ Error: DATABASE_TYPE must be firestore when not passing both "
-                    "--gcp-project and --firestore-database (backend default is memory)"
+                    "❌ Error: GCP_PROJECT_ID must be set when DATABASE_TYPE=firestore "
+                    "(or pass both --gcp-project and --firestore-database for direct writes)"
                 )
                 sys.exit(1)
 
@@ -1143,7 +1151,7 @@ async def main():
 
         repo_ids = list(repo_id_map.values())
 
-        # Emit workspace config JSON if requested (schema matches init_firestore.py --workspace-config)
+        # Emit workspace config JSON if requested (schema matches init_db.py --workspace-config)
         if args.output_workspace_config:
             emit_workspace_config(
                 repo_id_map=repo_id_map,
@@ -1179,9 +1187,9 @@ async def main():
         else:
             final_statuses = {}
         
-        # Step 6: Add workspaces to Firestore
-        # --repos path: workspace-config JSON is written above; Firestore seeding is done
-        # separately by init_firestore.py --workspace-config. add_workspaces_to_firestore
+        # Step 6: Add workspaces to the active database
+        # --repos path: workspace-config JSON is written above; database seeding is done
+        # separately by init_db.py --workspace-config. add_workspaces_to_firestore
         # extracts workspace IDs from {prefix}{num} names and cannot handle arbitrary names.
         if not args.skip_firestore and own_repo_list is None:
             await add_workspaces_to_firestore(
diff --git a/backend/scripts/fix_stuck_workspaces.py b/backend/scripts/fix_stuck_workspaces.py
index 85f10a3..caadf1a 100755
--- a/backend/scripts/fix_stuck_workspaces.py
+++ b/backend/scripts/fix_stuck_workspaces.py
@@ -51,9 +51,13 @@
 if dotenv_path.exists():
     load_dotenv(dotenv_path)
 
-# Set DATABASE_TYPE early if FIRESTORE_EMULATOR_HOST is set
+# Set DATABASE_TYPE early: emulator auto-detect takes priority (manually-run emulator),
+# otherwise default to sqlite (the local/Docker-dev default) rather than the ephemeral
+# in-memory fallback, which would make this script find an always-empty pool.
 if os.getenv("FIRESTORE_EMULATOR_HOST") and not os.getenv("DATABASE_TYPE"):
     os.environ["DATABASE_TYPE"] = "emulator"
+elif not os.getenv("DATABASE_TYPE"):
+    os.environ["DATABASE_TYPE"] = "sqlite"
 
 from app.database.factory import get_database  # noqa: E402
 from app.services.workspace_pool import WorkspacePoolService  # noqa: E402
diff --git a/backend/scripts/init_firestore.py b/backend/scripts/init_db.py
similarity index 94%
rename from backend/scripts/init_firestore.py
rename to backend/scripts/init_db.py
index b8cba43..f1e8f8c 100755
--- a/backend/scripts/init_firestore.py
+++ b/backend/scripts/init_db.py
@@ -1,10 +1,18 @@
 #!/usr/bin/env python3
 """
-Firestore Initialization Script
+Database Initialization Script (DB-type aware)
 
 TO BE USED WITH LOCAL TESTING ONLY - `make e2e-setup`
 
-Initializes Firestore database with workspace pool and API keys.
+Seeds the active state backend (selected by DATABASE_TYPE) with the workspace pool,
+API keys, and the local-auth identity sentinel. The seeding body is backend-agnostic —
+it goes through the IDatabase abstraction (get_database()) — so the same script works
+for every backend. Only the per-type precheck differs:
+
+    DATABASE_TYPE=sqlite     -> single local file (no Docker); path = SQLITE_DB_PATH
+    DATABASE_TYPE=emulator   -> requires FIRESTORE_EMULATOR_HOST (manually-run emulator)
+    DATABASE_TYPE=firestore  -> requires GCP_PROJECT_ID (--prod; production, or an
+                                 already-hosted GCP-managed instance)
 
 This script:
 1. Creates a default API key if none exists (solves chicken-and-egg problem)
@@ -14,13 +22,16 @@
 5. Is idempotent (safe to run multiple times)
 
 Usage:
-    # With Firestore Emulator
+    # Local SQLite (default)
+    python scripts/init_db.py --workspace-config repos.json --yes
+
+    # Manually-run Firestore emulator
     export FIRESTORE_EMULATOR_HOST=localhost:8080
-    python scripts/init_firestore.py
-    
-    # With real Firestore (be careful!)
+    python scripts/init_db.py --workspace-config repos.json --yes
+
+    # Real / already-hosted GCP Firestore (be careful!)
     export GCP_PROJECT_ID=your-project-id
-    python scripts/init_firestore.py --prod
+    python scripts/init_db.py --prod --workspace-config repos.json
 """
 
 import sys
@@ -171,7 +182,7 @@ def initialize_api_key(db: IDatabase, dry_run: bool = False) -> None:
             "permissions": ["admin"],
             "workspace_pool": pool,
             "metadata": {
-                "created_by": "init_firestore.py",
+                "created_by": "init_db.py",
                 "purpose": "bootstrap_key"
             },
             "max_concurrent_sessions": 5,
@@ -620,19 +631,20 @@ def main():
             print("Aborted.")
             sys.exit(0)
     else:
-        # Ensure emulator is set
-        if not os.getenv("FIRESTORE_EMULATOR_HOST"):
+        db_type = os.getenv("DATABASE_TYPE", "memory").lower()
+
+        # Emulator mode needs a reachable host:port; sqlite/memory need no external process.
+        if db_type == "emulator" and not os.getenv("FIRESTORE_EMULATOR_HOST"):
             print("ERROR: FIRESTORE_EMULATOR_HOST not set")
             print("Run: export FIRESTORE_EMULATOR_HOST=localhost:8080")
             sys.exit(1)
 
-        # Check database type (should already be set at import time if FIRESTORE_EMULATOR_HOST was set)
-        db_type = os.getenv("DATABASE_TYPE", "memory").lower()
         print(f"✓ Database type: {db_type}")
         if db_type == "memory":
             print("⚠️  WARNING: DATABASE_TYPE=memory will not persist data!")
-            print("   The script auto-sets DATABASE_TYPE=emulator when FIRESTORE_EMULATOR_HOST is set")
-            print("   If you see this, something went wrong with auto-detection")
+            print("   Set DATABASE_TYPE=sqlite for a persistent local database")
+        elif db_type == "sqlite":
+            print(f"✓ Using SQLite at {settings.SQLITE_DB_PATH}")
         elif db_type == "emulator":
             emulator_host = os.getenv("FIRESTORE_EMULATOR_HOST")
             print(f"✓ Using Firestore Emulator at {emulator_host}")
@@ -675,8 +687,8 @@ def main():
     # Attach GitHub tokens to pool-specific keys
     attach_github_tokens(dry_run=args.dry_run)
 
-    # Reminder about indexes
-    if not args.dry_run and not os.getenv("FIRESTORE_EMULATOR_HOST"):
+    # Reminder about indexes (Firestore production/hosted only; sqlite/emulator need none)
+    if not args.dry_run and db_type == "firestore":
         print("\n" + "="*60)
         print("⚠️  IMPORTANT: Create Firestore Indexes")
         print("="*60)
diff --git a/backend/test/database/db_contract.py b/backend/test/database/db_contract.py
new file mode 100644
index 0000000..f7fd6d6
--- /dev/null
+++ b/backend/test/database/db_contract.py
@@ -0,0 +1,402 @@
+"""
+Shared IDatabase contract tests.
+
+Every concrete backend (InMemoryDatabase, SqliteDatabase, ...) must satisfy the exact
+same behavior for every IDatabase method. Each test module here has no ``db`` fixture —
+concrete test modules (test_memory_db.py, test_sqlite_db.py) supply their own fixture
+and subclass these classes so pytest collects them once per backend.
+"""
+
+from datetime import datetime
+
+import pytest
+
+from app.database.interface import DocumentNotFoundError
+
+
+class TestBasicCRUD:
+    """Test basic CRUD operations."""
+
+    def test_set_and_get(self, db):
+        """Test setting and getting a document."""
+        db.set("users", "user-1", {"name": "Alice", "age": 30})
+
+        result = db.get("users", "user-1")
+        assert result is not None
+        assert result["name"] == "Alice"
+        assert result["age"] == 30
+
+    def test_get_nonexistent(self, db):
+        """Test getting a document that doesn't exist."""
+        result = db.get("users", "nonexistent")
+        assert result is None
+
+    def test_set_overwrites(self, db):
+        """Test that set overwrites existing data."""
+        db.set("users", "user-1", {"name": "Alice", "age": 30})
+        db.set("users", "user-1", {"name": "Bob", "age": 25})
+
+        result = db.get("users", "user-1")
+        assert result["name"] == "Bob"
+        assert result["age"] == 25
+
+    def test_update_existing(self, db):
+        """Test updating an existing document."""
+        db.set("users", "user-1", {"name": "Alice", "age": 30, "city": "NYC"})
+        db.update("users", "user-1", {"age": 31})
+
+        result = db.get("users", "user-1")
+        assert result["name"] == "Alice"
+        assert result["age"] == 31
+        assert result["city"] == "NYC"
+
+    def test_update_nonexistent(self, db):
+        """Test updating a document that doesn't exist."""
+        with pytest.raises(DocumentNotFoundError) as exc_info:
+            db.update("users", "nonexistent", {"age": 30})
+
+        assert exc_info.value.collection == "users"
+        assert exc_info.value.doc_id == "nonexistent"
+
+    def test_delete(self, db):
+        """Test deleting a document."""
+        db.set("users", "user-1", {"name": "Alice"})
+        db.delete("users", "user-1")
+
+        result = db.get("users", "user-1")
+        assert result is None
+
+    def test_delete_nonexistent(self, db):
+        """Test deleting a document that doesn't exist (should not raise)."""
+        db.delete("users", "nonexistent")  # Should not raise
+
+    def test_isolation_between_collections(self, db):
+        """Test that collections are isolated."""
+        db.set("users", "id-1", {"type": "user"})
+        db.set("posts", "id-1", {"type": "post"})
+
+        user = db.get("users", "id-1")
+        post = db.get("posts", "id-1")
+
+        assert user["type"] == "user"
+        assert post["type"] == "post"
+
+
+class TestQuery:
+    """Test query operations."""
+
+    def test_query_all(self, db):
+        """Test querying all documents in collection."""
+        db.set("users", "user-1", {"name": "Alice", "age": 30})
+        db.set("users", "user-2", {"name": "Bob", "age": 25})
+        db.set("users", "user-3", {"name": "Charlie", "age": 35})
+
+        results = db.query("users")
+        assert len(results) == 3
+
+        # All results should have _id field
+        ids = {r["_id"] for r in results}
+        assert ids == {"user-1", "user-2", "user-3"}
+
+    def test_query_empty_collection(self, db):
+        """Test querying an empty collection."""
+        results = db.query("users")
+        assert results == []
+
+    def test_query_filter_equals(self, db):
+        """Test query with equals filter."""
+        db.set("users", "user-1", {"name": "Alice", "age": 30})
+        db.set("users", "user-2", {"name": "Bob", "age": 25})
+        db.set("users", "user-3", {"name": "Alice", "age": 35})
+
+        results = db.query("users", filters=[("name", "==", "Alice")])
+        assert len(results) == 2
+        assert all(r["name"] == "Alice" for r in results)
+
+    def test_query_filter_not_equals(self, db):
+        """Test query with not equals filter."""
+        db.set("users", "user-1", {"name": "Alice", "status": "active"})
+        db.set("users", "user-2", {"name": "Bob", "status": "inactive"})
+
+        results = db.query("users", filters=[("status", "!=", "inactive")])
+        assert len(results) == 1
+        assert results[0]["name"] == "Alice"
+
+    def test_query_filter_comparison(self, db):
+        """Test query with comparison filters."""
+        db.set("users", "user-1", {"name": "Alice", "age": 30})
+        db.set("users", "user-2", {"name": "Bob", "age": 25})
+        db.set("users", "user-3", {"name": "Charlie", "age": 35})
+
+        # Greater than
+        results = db.query("users", filters=[("age", ">", 30)])
+        assert len(results) == 1
+        assert results[0]["name"] == "Charlie"
+
+        # Greater than or equal
+        results = db.query("users", filters=[("age", ">=", 30)])
+        assert len(results) == 2
+
+        # Less than
+        results = db.query("users", filters=[("age", "<", 30)])
+        assert len(results) == 1
+        assert results[0]["name"] == "Bob"
+
+        # Less than or equal
+        results = db.query("users", filters=[("age", "<=", 30)])
+        assert len(results) == 2
+
+    def test_query_filter_in(self, db):
+        """Test query with 'in' filter."""
+        db.set("users", "user-1", {"name": "Alice", "role": "admin"})
+        db.set("users", "user-2", {"name": "Bob", "role": "user"})
+        db.set("users", "user-3", {"name": "Charlie", "role": "moderator"})
+
+        results = db.query("users", filters=[("role", "in", ["admin", "moderator"])])
+        assert len(results) == 2
+        names = {r["name"] for r in results}
+        assert names == {"Alice", "Charlie"}
+
+    def test_query_filter_array_contains(self, db):
+        """Test query with array_contains filter."""
+        db.set("users", "user-1", {"name": "Alice", "tags": ["python", "react"]})
+        db.set("users", "user-2", {"name": "Bob", "tags": ["java", "spring"]})
+        db.set("users", "user-3", {"name": "Charlie", "tags": ["python", "django"]})
+
+        results = db.query("users", filters=[("tags", "array_contains", "python")])
+        assert len(results) == 2
+        names = {r["name"] for r in results}
+        assert names == {"Alice", "Charlie"}
+
+    def test_query_multiple_filters(self, db):
+        """Test query with multiple filters (AND logic)."""
+        db.set("users", "user-1", {"name": "Alice", "age": 30, "status": "active"})
+        db.set("users", "user-2", {"name": "Bob", "age": 25, "status": "active"})
+        db.set("users", "user-3", {"name": "Charlie", "age": 30, "status": "inactive"})
+
+        results = db.query("users", filters=[
+            ("age", "==", 30),
+            ("status", "==", "active")
+        ])
+        assert len(results) == 1
+        assert results[0]["name"] == "Alice"
+
+    def test_query_order_by_ascending(self, db):
+        """Test query with ascending order."""
+        db.set("users", "user-1", {"name": "Charlie", "age": 35})
+        db.set("users", "user-2", {"name": "Alice", "age": 30})
+        db.set("users", "user-3", {"name": "Bob", "age": 25})
+
+        results = db.query("users", order_by="age")
+        assert len(results) == 3
+        assert results[0]["name"] == "Bob"
+        assert results[1]["name"] == "Alice"
+        assert results[2]["name"] == "Charlie"
+
+    def test_query_order_by_descending(self, db):
+        """Test query with descending order."""
+        db.set("users", "user-1", {"name": "Charlie", "age": 35})
+        db.set("users", "user-2", {"name": "Alice", "age": 30})
+        db.set("users", "user-3", {"name": "Bob", "age": 25})
+
+        results = db.query("users", order_by="-age")
+        assert len(results) == 3
+        assert results[0]["name"] == "Charlie"
+        assert results[1]["name"] == "Alice"
+        assert results[2]["name"] == "Bob"
+
+    def test_query_limit(self, db):
+        """Test query with limit."""
+        db.set("users", "user-1", {"name": "Alice"})
+        db.set("users", "user-2", {"name": "Bob"})
+        db.set("users", "user-3", {"name": "Charlie"})
+
+        results = db.query("users", limit=2)
+        assert len(results) == 2
+
+    def test_query_combined(self, db):
+        """Test query with filters, ordering, and limit."""
+        db.set("users", "user-1", {"name": "Alice", "age": 30, "status": "active"})
+        db.set("users", "user-2", {"name": "Bob", "age": 25, "status": "active"})
+        db.set("users", "user-3", {"name": "Charlie", "age": 35, "status": "active"})
+        db.set("users", "user-4", {"name": "Dave", "age": 28, "status": "inactive"})
+
+        results = db.query(
+            "users",
+            filters=[("status", "==", "active")],
+            order_by="-age",
+            limit=2
+        )
+
+        assert len(results) == 2
+        assert results[0]["name"] == "Charlie"
+        assert results[1]["name"] == "Alice"
+
+
+class TestTransactions:
+    """Test transaction operations."""
+
+    def test_transaction_commit(self, db):
+        """Test successful transaction commit."""
+        db.set("users", "user-1", {"name": "Alice", "balance": 100})
+        db.set("users", "user-2", {"name": "Bob", "balance": 50})
+
+        def transfer(tx):
+            alice = tx.get("users", "user-1")
+            bob = tx.get("users", "user-2")
+
+            tx.update("users", "user-1", {"balance": alice["balance"] - 20})
+            tx.update("users", "user-2", {"balance": bob["balance"] + 20})
+
+            return "success"
+
+        result = db.run_transaction(transfer)
+        assert result == "success"
+
+        alice = db.get("users", "user-1")
+        bob = db.get("users", "user-2")
+
+        assert alice["balance"] == 80
+        assert bob["balance"] == 70
+
+    def test_transaction_rollback(self, db):
+        """Test transaction rollback on error."""
+        db.set("users", "user-1", {"name": "Alice", "balance": 100})
+
+        def failing_transaction(tx):
+            tx.update("users", "user-1", {"balance": 50})
+            raise ValueError("Something went wrong")
+
+        with pytest.raises(ValueError):
+            db.run_transaction(failing_transaction)
+
+        # Balance should not have changed
+        alice = db.get("users", "user-1")
+        assert alice["balance"] == 100
+
+    def test_transaction_set(self, db):
+        """Test transaction with set operation."""
+        def create_user(tx):
+            tx.set("users", "user-1", {"name": "Alice", "age": 30})
+            return "created"
+
+        result = db.run_transaction(create_user)
+        assert result == "created"
+
+        user = db.get("users", "user-1")
+        assert user["name"] == "Alice"
+
+    def test_transaction_delete(self, db):
+        """Test transaction with delete operation."""
+        db.set("users", "user-1", {"name": "Alice"})
+
+        def delete_user(tx):
+            tx.delete("users", "user-1")
+
+        db.run_transaction(delete_user)
+
+        user = db.get("users", "user-1")
+        assert user is None
+
+    def test_transaction_update_nonexistent(self, db):
+        """Test transaction fails when updating nonexistent document."""
+        def failing_update(tx):
+            tx.update("users", "nonexistent", {"name": "Alice"})
+
+        with pytest.raises(DocumentNotFoundError):
+            db.run_transaction(failing_update)
+
+
+class TestArrayOperations:
+    """Test array operations."""
+
+    def test_array_union_new_field(self, db):
+        """Test array_union creates new array field."""
+        db.set("users", "user-1", {"name": "Alice"})
+        db.array_union("users", "user-1", "tags", ["python", "react"])
+
+        user = db.get("users", "user-1")
+        assert "tags" in user
+        assert set(user["tags"]) == {"python", "react"}
+
+    def test_array_union_existing_field(self, db):
+        """Test array_union adds to existing array."""
+        db.set("users", "user-1", {"name": "Alice", "tags": ["python"]})
+        db.array_union("users", "user-1", "tags", ["react", "vue"])
+
+        user = db.get("users", "user-1")
+        assert set(user["tags"]) == {"python", "react", "vue"}
+
+    def test_array_union_no_duplicates(self, db):
+        """Test array_union doesn't add duplicates."""
+        db.set("users", "user-1", {"name": "Alice", "tags": ["python", "react"]})
+        db.array_union("users", "user-1", "tags", ["python", "vue"])
+
+        user = db.get("users", "user-1")
+        assert set(user["tags"]) == {"python", "react", "vue"}
+
+    def test_array_union_nonexistent_doc(self, db):
+        """Test array_union fails on nonexistent document."""
+        with pytest.raises(DocumentNotFoundError):
+            db.array_union("users", "nonexistent", "tags", ["python"])
+
+
+class TestServerTimestamp:
+    """Test server timestamp functionality."""
+
+    def test_server_timestamp_on_set(self, db):
+        """Test server timestamp is replaced with actual time."""
+        db.set("users", "user-1", {
+            "name": "Alice",
+            "created_at": db.server_timestamp()
+        })
+
+        user = db.get("users", "user-1")
+        assert "created_at" in user
+        assert isinstance(user["created_at"], datetime)
+
+    def test_server_timestamp_on_update(self, db):
+        """Test server timestamp in update operation."""
+        db.set("users", "user-1", {"name": "Alice"})
+        db.update("users", "user-1", {"updated_at": db.server_timestamp()})
+
+        user = db.get("users", "user-1")
+        assert "updated_at" in user
+        assert isinstance(user["updated_at"], datetime)
+
+    def test_server_timestamp_in_transaction(self, db):
+        """Test server timestamp in transaction."""
+        def create_with_timestamp(tx):
+            tx.set("users", "user-1", {
+                "name": "Alice",
+                "created_at": db.server_timestamp()
+            })
+
+        db.run_transaction(create_with_timestamp)
+
+        user = db.get("users", "user-1")
+        assert isinstance(user["created_at"], datetime)
+
+
+class TestIsolation:
+    """Test data isolation and cleanup."""
+
+    def test_clear(self, db):
+        """Test clearing database."""
+        db.set("users", "user-1", {"name": "Alice"})
+        db.set("posts", "post-1", {"title": "Hello"})
+
+        db.clear()
+
+        assert db.get("users", "user-1") is None
+        assert db.get("posts", "post-1") is None
+
+    def test_get_returns_copy(self, db):
+        """Test that get returns a copy, not reference."""
+        db.set("users", "user-1", {"name": "Alice"})
+
+        user1 = db.get("users", "user-1")
+        user1["name"] = "Bob"
+
+        user2 = db.get("users", "user-1")
+        assert user2["name"] == "Alice"
diff --git a/backend/test/database/test_factory.py b/backend/test/database/test_factory.py
index 355d226..bd0bec5 100644
--- a/backend/test/database/test_factory.py
+++ b/backend/test/database/test_factory.py
@@ -9,10 +9,11 @@
 import pytest
 from unittest.mock import patch
 
-from app.database.factory import get_database, reset_database
+from app.database.factory import clear_test_data, get_database, reset_database
 from app.database.memory import InMemoryDatabase
 from app.database.emulator import EmulatorDatabase
 from app.database.firestore import FirestoreDatabase
+from app.database.sqlite import SqliteDatabase
 
 
 @pytest.fixture(autouse=True)
@@ -36,6 +37,19 @@ def test_factory_returns_memory_database(self):
                 db = get_database()
                 assert isinstance(db, InMemoryDatabase)
     
+    def test_factory_returns_sqlite_database(self, tmp_path):
+        """Test factory returns SqliteDatabase when DATABASE_TYPE=sqlite."""
+        db_path = str(tmp_path / "specflow.db")
+        with patch.dict(os.environ, {
+            "DATABASE_TYPE": "sqlite",
+            "SQLITE_DB_PATH": db_path,
+        }, clear=True):
+            from app.core.config import Settings
+            with patch("app.database.factory.settings", Settings()):
+                reset_database()
+                db = get_database()
+                assert isinstance(db, SqliteDatabase)
+
     def test_factory_returns_emulator_database(self):
         """Test factory returns EmulatorDatabase when DATABASE_TYPE=emulator."""
         with patch.dict(os.environ, {
@@ -143,6 +157,33 @@ def test_firestore_allows_missing_project_id(self):
                     mock_client.assert_called_once_with(project=None, database='(default)')
 
 
+class TestClearTestData:
+    """clear_test_data() must accept sqlite (local/test-safe) and reject firestore."""
+
+    def test_clear_test_data_allows_sqlite(self, tmp_path):
+        db_path = str(tmp_path / "specflow.db")
+        with patch.dict(os.environ, {
+            "DATABASE_TYPE": "sqlite",
+            "SQLITE_DB_PATH": db_path,
+        }, clear=True):
+            from app.core.config import Settings
+            with patch("app.database.factory.settings", Settings()):
+                reset_database()
+                db = get_database()
+                db.set("api_keys", "k1", {"foo": "bar"})
+                clear_test_data(["api_keys"])
+                assert db.get("api_keys", "k1") is None
+
+    def test_clear_test_data_rejects_firestore(self):
+        with patch.dict(os.environ, {"DATABASE_TYPE": "firestore"}, clear=True):
+            from app.core.config import Settings
+            with patch("app.database.firestore.firestore.Client"):
+                with patch("app.database.factory.settings", Settings()):
+                    reset_database()
+                    with pytest.raises(RuntimeError, match="should only be used with"):
+                        clear_test_data()
+
+
 class TestDatabaseFactoryIntegration:
     """Integration tests for factory with actual database operations."""
     
diff --git a/backend/test/database/test_memory_db.py b/backend/test/database/test_memory_db.py
index 32b5f8a..3452d37 100644
--- a/backend/test/database/test_memory_db.py
+++ b/backend/test/database/test_memory_db.py
@@ -1,13 +1,20 @@
 """
 Tests for InMemoryDatabase implementation.
 
-Comprehensive test suite covering all IDatabase interface methods.
+Runs the shared IDatabase contract (db_contract.py) against the in-memory backend.
 """
 
 import pytest
-from datetime import datetime
 from app.database.memory import InMemoryDatabase
-from app.database.interface import DocumentNotFoundError
+
+from test.database.db_contract import (
+    TestArrayOperations as _TestArrayOperations,
+    TestBasicCRUD as _TestBasicCRUD,
+    TestIsolation as _TestIsolation,
+    TestQuery as _TestQuery,
+    TestServerTimestamp as _TestServerTimestamp,
+    TestTransactions as _TestTransactions,
+)
 
 
 @pytest.fixture
@@ -18,389 +25,25 @@ def db():
     database.clear()
 
 
-class TestBasicCRUD:
-    """Test basic CRUD operations."""
-
-    def test_set_and_get(self, db):
-        """Test setting and getting a document."""
-        db.set("users", "user-1", {"name": "Alice", "age": 30})
-        
-        result = db.get("users", "user-1")
-        assert result is not None
-        assert result["name"] == "Alice"
-        assert result["age"] == 30
-
-    def test_get_nonexistent(self, db):
-        """Test getting a document that doesn't exist."""
-        result = db.get("users", "nonexistent")
-        assert result is None
-
-    def test_set_overwrites(self, db):
-        """Test that set overwrites existing data."""
-        db.set("users", "user-1", {"name": "Alice", "age": 30})
-        db.set("users", "user-1", {"name": "Bob", "age": 25})
-        
-        result = db.get("users", "user-1")
-        assert result["name"] == "Bob"
-        assert result["age"] == 25
-
-    def test_update_existing(self, db):
-        """Test updating an existing document."""
-        db.set("users", "user-1", {"name": "Alice", "age": 30, "city": "NYC"})
-        db.update("users", "user-1", {"age": 31})
-        
-        result = db.get("users", "user-1")
-        assert result["name"] == "Alice"
-        assert result["age"] == 31
-        assert result["city"] == "NYC"
-
-    def test_update_nonexistent(self, db):
-        """Test updating a document that doesn't exist."""
-        with pytest.raises(DocumentNotFoundError) as exc_info:
-            db.update("users", "nonexistent", {"age": 30})
-        
-        assert exc_info.value.collection == "users"
-        assert exc_info.value.doc_id == "nonexistent"
-
-    def test_delete(self, db):
-        """Test deleting a document."""
-        db.set("users", "user-1", {"name": "Alice"})
-        db.delete("users", "user-1")
-        
-        result = db.get("users", "user-1")
-        assert result is None
-
-    def test_delete_nonexistent(self, db):
-        """Test deleting a document that doesn't exist (should not raise)."""
-        db.delete("users", "nonexistent")  # Should not raise
-
-    def test_isolation_between_collections(self, db):
-        """Test that collections are isolated."""
-        db.set("users", "id-1", {"type": "user"})
-        db.set("posts", "id-1", {"type": "post"})
-        
-        user = db.get("users", "id-1")
-        post = db.get("posts", "id-1")
-        
-        assert user["type"] == "user"
-        assert post["type"] == "post"
-
-
-class TestQuery:
-    """Test query operations."""
-
-    def test_query_all(self, db):
-        """Test querying all documents in collection."""
-        db.set("users", "user-1", {"name": "Alice", "age": 30})
-        db.set("users", "user-2", {"name": "Bob", "age": 25})
-        db.set("users", "user-3", {"name": "Charlie", "age": 35})
-        
-        results = db.query("users")
-        assert len(results) == 3
-        
-        # All results should have _id field
-        ids = {r["_id"] for r in results}
-        assert ids == {"user-1", "user-2", "user-3"}
-
-    def test_query_empty_collection(self, db):
-        """Test querying an empty collection."""
-        results = db.query("users")
-        assert results == []
-
-    def test_query_filter_equals(self, db):
-        """Test query with equals filter."""
-        db.set("users", "user-1", {"name": "Alice", "age": 30})
-        db.set("users", "user-2", {"name": "Bob", "age": 25})
-        db.set("users", "user-3", {"name": "Alice", "age": 35})
-        
-        results = db.query("users", filters=[("name", "==", "Alice")])
-        assert len(results) == 2
-        assert all(r["name"] == "Alice" for r in results)
-
-    def test_query_filter_not_equals(self, db):
-        """Test query with not equals filter."""
-        db.set("users", "user-1", {"name": "Alice", "status": "active"})
-        db.set("users", "user-2", {"name": "Bob", "status": "inactive"})
-        
-        results = db.query("users", filters=[("status", "!=", "inactive")])
-        assert len(results) == 1
-        assert results[0]["name"] == "Alice"
-
-    def test_query_filter_comparison(self, db):
-        """Test query with comparison filters."""
-        db.set("users", "user-1", {"name": "Alice", "age": 30})
-        db.set("users", "user-2", {"name": "Bob", "age": 25})
-        db.set("users", "user-3", {"name": "Charlie", "age": 35})
-        
-        # Greater than
-        results = db.query("users", filters=[("age", ">", 30)])
-        assert len(results) == 1
-        assert results[0]["name"] == "Charlie"
-        
-        # Greater than or equal
-        results = db.query("users", filters=[("age", ">=", 30)])
-        assert len(results) == 2
-        
-        # Less than
-        results = db.query("users", filters=[("age", "<", 30)])
-        assert len(results) == 1
-        assert results[0]["name"] == "Bob"
-        
-        # Less than or equal
-        results = db.query("users", filters=[("age", "<=", 30)])
-        assert len(results) == 2
-
-    def test_query_filter_in(self, db):
-        """Test query with 'in' filter."""
-        db.set("users", "user-1", {"name": "Alice", "role": "admin"})
-        db.set("users", "user-2", {"name": "Bob", "role": "user"})
-        db.set("users", "user-3", {"name": "Charlie", "role": "moderator"})
-        
-        results = db.query("users", filters=[("role", "in", ["admin", "moderator"])])
-        assert len(results) == 2
-        names = {r["name"] for r in results}
-        assert names == {"Alice", "Charlie"}
-
-    def test_query_filter_array_contains(self, db):
-        """Test query with array_contains filter."""
-        db.set("users", "user-1", {"name": "Alice", "tags": ["python", "react"]})
-        db.set("users", "user-2", {"name": "Bob", "tags": ["java", "spring"]})
-        db.set("users", "user-3", {"name": "Charlie", "tags": ["python", "django"]})
-        
-        results = db.query("users", filters=[("tags", "array_contains", "python")])
-        assert len(results) == 2
-        names = {r["name"] for r in results}
-        assert names == {"Alice", "Charlie"}
-
-    def test_query_multiple_filters(self, db):
-        """Test query with multiple filters (AND logic)."""
-        db.set("users", "user-1", {"name": "Alice", "age": 30, "status": "active"})
-        db.set("users", "user-2", {"name": "Bob", "age": 25, "status": "active"})
-        db.set("users", "user-3", {"name": "Charlie", "age": 30, "status": "inactive"})
-        
-        results = db.query("users", filters=[
-            ("age", "==", 30),
-            ("status", "==", "active")
-        ])
-        assert len(results) == 1
-        assert results[0]["name"] == "Alice"
-
-    def test_query_order_by_ascending(self, db):
-        """Test query with ascending order."""
-        db.set("users", "user-1", {"name": "Charlie", "age": 35})
-        db.set("users", "user-2", {"name": "Alice", "age": 30})
-        db.set("users", "user-3", {"name": "Bob", "age": 25})
-        
-        results = db.query("users", order_by="age")
-        assert len(results) == 3
-        assert results[0]["name"] == "Bob"
-        assert results[1]["name"] == "Alice"
-        assert results[2]["name"] == "Charlie"
-
-    def test_query_order_by_descending(self, db):
-        """Test query with descending order."""
-        db.set("users", "user-1", {"name": "Charlie", "age": 35})
-        db.set("users", "user-2", {"name": "Alice", "age": 30})
-        db.set("users", "user-3", {"name": "Bob", "age": 25})
-        
-        results = db.query("users", order_by="-age")
-        assert len(results) == 3
-        assert results[0]["name"] == "Charlie"
-        assert results[1]["name"] == "Alice"
-        assert results[2]["name"] == "Bob"
-
-    def test_query_limit(self, db):
-        """Test query with limit."""
-        db.set("users", "user-1", {"name": "Alice"})
-        db.set("users", "user-2", {"name": "Bob"})
-        db.set("users", "user-3", {"name": "Charlie"})
-        
-        results = db.query("users", limit=2)
-        assert len(results) == 2
-
-    def test_query_combined(self, db):
-        """Test query with filters, ordering, and limit."""
-        db.set("users", "user-1", {"name": "Alice", "age": 30, "status": "active"})
-        db.set("users", "user-2", {"name": "Bob", "age": 25, "status": "active"})
-        db.set("users", "user-3", {"name": "Charlie", "age": 35, "status": "active"})
-        db.set("users", "user-4", {"name": "Dave", "age": 28, "status": "inactive"})
-        
-        results = db.query(
-            "users",
-            filters=[("status", "==", "active")],
-            order_by="-age",
-            limit=2
-        )
-        
-        assert len(results) == 2
-        assert results[0]["name"] == "Charlie"
-        assert results[1]["name"] == "Alice"
-
-
-class TestTransactions:
-    """Test transaction operations."""
-
-    def test_transaction_commit(self, db):
-        """Test successful transaction commit."""
-        db.set("users", "user-1", {"name": "Alice", "balance": 100})
-        db.set("users", "user-2", {"name": "Bob", "balance": 50})
-        
-        def transfer(tx):
-            alice = tx.get("users", "user-1")
-            bob = tx.get("users", "user-2")
-            
-            tx.update("users", "user-1", {"balance": alice["balance"] - 20})
-            tx.update("users", "user-2", {"balance": bob["balance"] + 20})
-            
-            return "success"
-        
-        result = db.run_transaction(transfer)
-        assert result == "success"
-        
-        alice = db.get("users", "user-1")
-        bob = db.get("users", "user-2")
-        
-        assert alice["balance"] == 80
-        assert bob["balance"] == 70
-
-    def test_transaction_rollback(self, db):
-        """Test transaction rollback on error."""
-        db.set("users", "user-1", {"name": "Alice", "balance": 100})
-        
-        def failing_transaction(tx):
-            tx.update("users", "user-1", {"balance": 50})
-            raise ValueError("Something went wrong")
-        
-        with pytest.raises(ValueError):
-            db.run_transaction(failing_transaction)
-        
-        # Balance should not have changed
-        alice = db.get("users", "user-1")
-        assert alice["balance"] == 100
-
-    def test_transaction_set(self, db):
-        """Test transaction with set operation."""
-        def create_user(tx):
-            tx.set("users", "user-1", {"name": "Alice", "age": 30})
-            return "created"
-        
-        result = db.run_transaction(create_user)
-        assert result == "created"
-        
-        user = db.get("users", "user-1")
-        assert user["name"] == "Alice"
-
-    def test_transaction_delete(self, db):
-        """Test transaction with delete operation."""
-        db.set("users", "user-1", {"name": "Alice"})
-        
-        def delete_user(tx):
-            tx.delete("users", "user-1")
-        
-        db.run_transaction(delete_user)
-        
-        user = db.get("users", "user-1")
-        assert user is None
-
-    def test_transaction_update_nonexistent(self, db):
-        """Test transaction fails when updating nonexistent document."""
-        def failing_update(tx):
-            tx.update("users", "nonexistent", {"name": "Alice"})
-        
-        with pytest.raises(DocumentNotFoundError):
-            db.run_transaction(failing_update)
-
-
-class TestArrayOperations:
-    """Test array operations."""
-
-    def test_array_union_new_field(self, db):
-        """Test array_union creates new array field."""
-        db.set("users", "user-1", {"name": "Alice"})
-        db.array_union("users", "user-1", "tags", ["python", "react"])
-        
-        user = db.get("users", "user-1")
-        assert "tags" in user
-        assert set(user["tags"]) == {"python", "react"}
-
-    def test_array_union_existing_field(self, db):
-        """Test array_union adds to existing array."""
-        db.set("users", "user-1", {"name": "Alice", "tags": ["python"]})
-        db.array_union("users", "user-1", "tags", ["react", "vue"])
-        
-        user = db.get("users", "user-1")
-        assert set(user["tags"]) == {"python", "react", "vue"}
-
-    def test_array_union_no_duplicates(self, db):
-        """Test array_union doesn't add duplicates."""
-        db.set("users", "user-1", {"name": "Alice", "tags": ["python", "react"]})
-        db.array_union("users", "user-1", "tags", ["python", "vue"])
-        
-        user = db.get("users", "user-1")
-        assert set(user["tags"]) == {"python", "react", "vue"}
+class TestBasicCRUD(_TestBasicCRUD):
+    pass
 
-    def test_array_union_nonexistent_doc(self, db):
-        """Test array_union fails on nonexistent document."""
-        with pytest.raises(DocumentNotFoundError):
-            db.array_union("users", "nonexistent", "tags", ["python"])
 
+class TestQuery(_TestQuery):
+    pass
 
-class TestServerTimestamp:
-    """Test server timestamp functionality."""
 
-    def test_server_timestamp_on_set(self, db):
-        """Test server timestamp is replaced with actual time."""
-        db.set("users", "user-1", {
-            "name": "Alice",
-            "created_at": db.server_timestamp()
-        })
-        
-        user = db.get("users", "user-1")
-        assert "created_at" in user
-        assert isinstance(user["created_at"], datetime)
+class TestTransactions(_TestTransactions):
+    pass
 
-    def test_server_timestamp_on_update(self, db):
-        """Test server timestamp in update operation."""
-        db.set("users", "user-1", {"name": "Alice"})
-        db.update("users", "user-1", {"updated_at": db.server_timestamp()})
-        
-        user = db.get("users", "user-1")
-        assert "updated_at" in user
-        assert isinstance(user["updated_at"], datetime)
 
-    def test_server_timestamp_in_transaction(self, db):
-        """Test server timestamp in transaction."""
-        def create_with_timestamp(tx):
-            tx.set("users", "user-1", {
-                "name": "Alice",
-                "created_at": db.server_timestamp()
-            })
-        
-        db.run_transaction(create_with_timestamp)
-        
-        user = db.get("users", "user-1")
-        assert isinstance(user["created_at"], datetime)
+class TestArrayOperations(_TestArrayOperations):
+    pass
 
 
-class TestIsolation:
-    """Test data isolation and cleanup."""
+class TestServerTimestamp(_TestServerTimestamp):
+    pass
 
-    def test_clear(self, db):
-        """Test clearing database."""
-        db.set("users", "user-1", {"name": "Alice"})
-        db.set("posts", "post-1", {"title": "Hello"})
-        
-        db.clear()
-        
-        assert db.get("users", "user-1") is None
-        assert db.get("posts", "post-1") is None
 
-    def test_get_returns_copy(self, db):
-        """Test that get returns a copy, not reference."""
-        db.set("users", "user-1", {"name": "Alice"})
-        
-        user1 = db.get("users", "user-1")
-        user1["name"] = "Bob"
-        
-        user2 = db.get("users", "user-1")
-        assert user2["name"] == "Alice"
+class TestIsolation(_TestIsolation):
+    pass
diff --git a/backend/test/database/test_sqlite_db.py b/backend/test/database/test_sqlite_db.py
new file mode 100644
index 0000000..2ae67f7
--- /dev/null
+++ b/backend/test/database/test_sqlite_db.py
@@ -0,0 +1,151 @@
+"""
+SqliteDatabase contract + datetime tests.
+
+Runs the shared IDatabase contract (db_contract.py) against the SQLite backend so it
+must satisfy the exact same behavior as the in-memory reference, plus SQLite-specific
+datetime-boundary tests (the one place "works on memory" can differ from "works
+persisted").
+"""
+
+from datetime import datetime, timedelta, timezone
+
+import pytest
+
+from app.database.sqlite import SqliteDatabase
+
+from test.database.db_contract import (
+    TestArrayOperations as _TestArrayOperations,
+    TestBasicCRUD as _TestBasicCRUD,
+    TestIsolation as _TestIsolation,
+    TestQuery as _TestQuery,
+    TestServerTimestamp as _TestServerTimestamp,
+    TestTransactions as _TestTransactions,
+)
+
+
+@pytest.fixture
+def db():
+    """Create a fresh in-memory SQLite database for each test."""
+    database = SqliteDatabase(":memory:")
+    yield database
+    database.close()
+
+
+class TestBasicCRUD(_TestBasicCRUD):
+    pass
+
+
+class TestQuery(_TestQuery):
+    pass
+
+
+class TestTransactions(_TestTransactions):
+    pass
+
+
+class TestArrayOperations(_TestArrayOperations):
+    pass
+
+
+class TestServerTimestamp(_TestServerTimestamp):
+    pass
+
+
+class TestIsolation(_TestIsolation):
+    pass
+
+
+class TestSqliteDatetime:
+    """Datetime contract: round-trips as tz-aware UTC and compares correctly in SQL."""
+
+    def test_datetime_roundtrip_is_tz_aware(self, db):
+        stored = datetime(2026, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
+        db.set("c", "d", {"ts": stored})
+
+        got = db.get("c", "d")["ts"]
+        assert isinstance(got, datetime)
+        assert got.tzinfo is not None
+        assert got == stored
+
+    def test_naive_datetime_assumed_utc(self, db):
+        db.set("c", "d", {"ts": datetime(2026, 1, 1, 12, 0, 0)})
+
+        got = db.get("c", "d")["ts"]
+        assert got == datetime(2026, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
+
+    def test_datetime_filter_less_than(self, db):
+        now = datetime.now(timezone.utc)
+        db.set("gen", "old", {"last_activity_at": now - timedelta(minutes=60)})
+        db.set("gen", "fresh", {"last_activity_at": now - timedelta(minutes=1)})
+
+        cutoff = now - timedelta(minutes=30)
+        results = db.query("gen", filters=[("last_activity_at", "<", cutoff)])
+
+        assert {r["_id"] for r in results} == {"old"}
+
+    def test_none_datetime_excluded_by_less_than(self, db):
+        """None last_activity_at must be invisible to '<' (matches reference impl)."""
+        now = datetime.now(timezone.utc)
+        db.set("gen", "none", {"last_activity_at": None})
+
+        results = db.query("gen", filters=[("last_activity_at", "<", now)])
+        assert results == []
+
+    def test_nested_datetime_in_list_roundtrips(self, db):
+        ts = datetime(2026, 3, 1, 9, 30, 0, tzinfo=timezone.utc)
+        db.set("api_keys", "k", {
+            "active_generation_sessions": [{"generation_id": "g1", "lease_started_at": ts}],
+        })
+
+        got = db.get("api_keys", "k")
+        assert got["active_generation_sessions"][0]["lease_started_at"] == ts
+
+
+class TestSqlitePersistence:
+    """A SQLite file persists across connections (the whole point vs in-memory)."""
+
+    def test_state_survives_reopen(self, tmp_path):
+        db_path = str(tmp_path / "specflow.db")
+
+        db1 = SqliteDatabase(db_path)
+        db1.set("generation_sessions", "est-1", {"status": "running"})
+        db1.close()
+
+        db2 = SqliteDatabase(db_path)
+        try:
+            result = db2.get("generation_sessions", "est-1")
+            assert result == {"status": "running"}
+        finally:
+            db2.close()
+
+
+@pytest.mark.asyncio
+async def test_stuck_running_detector_against_sqlite():
+    """
+    End-to-end datetime boundary: a RUNNING generation with a stale last_activity_at
+    stored in SQLite must be detected and marked FAILED by the background job (proves
+    tz-aware cutoff vs SQL-stored datetime agree, and that the async
+    StateMachineDBAdapter works on the SQLite backend).
+    """
+    from app.database.sqlite import SqliteDatabase as _SqliteDatabase
+    from app.state.db_adapter import StateMachineDBAdapter, COL_GENERATION_SESSIONS
+    from app.jobs.stuck_running_detector import detect_stuck_running
+    from app.schemas.generation_workflow_enums import GenerationStatus
+
+    raw = _SqliteDatabase(":memory:")
+    adapter = StateMachineDBAdapter(raw)
+
+    now = datetime.now(timezone.utc)
+    raw.set(COL_GENERATION_SESSIONS, "est-stale", {
+        "status": GenerationStatus.RUNNING,
+        "last_activity_at": now - timedelta(minutes=60),
+        "state_history": [],
+    })
+
+    try:
+        await detect_stuck_running(adapter, threshold_minutes=30)
+        est = raw.get(COL_GENERATION_SESSIONS, "est-stale")
+        assert est["status"] == GenerationStatus.FAILED
+        assert est.get("failed_at") is not None
+    finally:
+        raw.close()
diff --git a/backend/test/integration/test_firestore_emulator_persistence.py b/backend/test/integration/test_firestore_emulator_persistence.py
deleted file mode 100644
index ff58612..0000000
--- a/backend/test/integration/test_firestore_emulator_persistence.py
+++ /dev/null
@@ -1,171 +0,0 @@
-"""Round-trip persistence test for the local-quickstart Firestore emulator.
-
-Proves the behaviour that ``docker-compose.yml`` + ``specflow-init.sh`` only assert
-*textually* elsewhere: a document written to the emulator survives a full
-``docker compose down`` / ``up`` cycle via the native ``--export-on-exit`` /
-``--import-data`` flags over the host bind mount.
-
-This is a real system test — it drives the actual emulator container — so it is
-gated behind ``RUN_FIRESTORE_PERSISTENCE_TEST=1`` and never runs in ``make
-unit-tests``. It also needs port 8080 free, so it skips if a dev emulator is
-already up to avoid clobbering it.
-
-Run it with::
-
-    RUN_FIRESTORE_PERSISTENCE_TEST=1 uv run pytest \\
-        test/integration/test_firestore_emulator_persistence.py -v
-"""
-import os
-import socket
-import subprocess
-import tempfile
-import time
-from pathlib import Path
-
-import pytest
-from google.cloud import firestore
-
-_REPO_ROOT = Path(__file__).resolve().parents[3]
-_COMPOSE_FILE = _REPO_ROOT / "docker-compose.yml"
-_SERVICE = "firestore-emulator"
-_EMULATOR_HOST = "localhost:8080"
-_PROJECT_ID = "local-dev"
-_DATABASE_ID = "specflow"
-_COLLECTION = "persistence_probe"
-
-pytestmark = pytest.mark.skipif(
-    os.environ.get("RUN_FIRESTORE_PERSISTENCE_TEST") != "1",
-    reason="System test; set RUN_FIRESTORE_PERSISTENCE_TEST=1 (needs docker + free port 8080).",
-)
-
-
-def _port_open(host: str = "localhost", port: int = 8080) -> bool:
-    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
-        sock.settimeout(1)
-        return sock.connect_ex((host, port)) == 0
-
-
-def _compose(*args: str, env: dict) -> None:
-    subprocess.run(
-        ["docker", "compose", "-f", str(_COMPOSE_FILE), *args],
-        cwd=str(_REPO_ROOT),
-        env=env,
-        check=True,
-        capture_output=True,
-        text=True,
-    )
-
-
-def _wait_until_ready(timeout: float = 90.0) -> None:
-    deadline = time.monotonic() + timeout
-    while time.monotonic() < deadline:
-        try:
-            subprocess.run(
-                ["curl", "-sf", "--max-time", "2", f"http://{_EMULATOR_HOST}"],
-                check=True,
-                capture_output=True,
-            )
-            return
-        except subprocess.CalledProcessError:
-            time.sleep(2)
-    raise TimeoutError(f"Emulator not ready within {timeout}s")
-
-
-def _latest_export_mtime(export_dir: Path) -> float:
-    metadata_files = list(export_dir.rglob("*overall_export_metadata"))
-    if not metadata_files:
-        return 0.0
-    return max(path.stat().st_mtime for path in metadata_files)
-
-
-def _wait_for_export(export_dir: Path, newer_than: float = 0.0, timeout: float = 60.0) -> None:
-    deadline = time.monotonic() + timeout
-    while time.monotonic() < deadline:
-        if _latest_export_mtime(export_dir) > newer_than:
-            return
-        time.sleep(1)
-    raise TimeoutError(f"Export metadata not written under {export_dir}")
-
-
-def _client() -> firestore.Client:
-    os.environ["FIRESTORE_EMULATOR_HOST"] = _EMULATOR_HOST
-    return firestore.Client(project=_PROJECT_ID, database=_DATABASE_ID)
-
-
-def test_document_survives_compose_down_up():
-    if _port_open():
-        pytest.skip("Port 8080 already in use — refusing to clobber a running emulator.")
-
-    with tempfile.TemporaryDirectory(prefix="fs-persist-") as tmp:
-        # Isolate the bind mount from the developer's real ./workspaces.
-        env = {
-            **os.environ,
-            "WORKSPACE_MOUNT_PATH": tmp,
-            "FIRESTORE_EMULATOR_HOST": _EMULATOR_HOST,
-            "GCP_PROJECT_ID": _PROJECT_ID,
-            "FIRESTORE_DATABASE_NAME": _DATABASE_ID,
-        }
-        export_dir = Path(tmp) / "firestore_emulator" / "current"
-        doc_id = "probe-doc"
-        sentinel = f"persisted-{int(time.time())}"
-
-        try:
-            # --- First boot: empty DB, write a sentinel doc ---
-            _compose("up", "-d", _SERVICE, env=env)
-            _wait_until_ready()
-
-            client = _client()
-            client.collection(_COLLECTION).document(doc_id).set({"value": sentinel})
-
-            # --- Graceful teardown: --export-on-exit must flush to the host dir ---
-            _compose("down", env=env)
-            assert any(export_dir.rglob("*overall_export_metadata")), (
-                "Emulator did not export on graceful shutdown — export-on-exit / "
-                "stop_grace_period / init regression."
-            )
-
-            # --- Second boot: must --import-data and serve the doc again ---
-            _compose("up", "-d", _SERVICE, env=env)
-            _wait_until_ready()
-
-            restored = _client().collection(_COLLECTION).document(doc_id).get()
-            assert restored.exists, "Document was lost across compose down/up."
-            assert restored.to_dict()["value"] == sentinel
-        finally:
-            _compose("down", env=env)
-
-
-def test_document_survives_emulator_kill_after_periodic_export():
-    if _port_open():
-        pytest.skip("Port 8080 already in use — refusing to clobber a running emulator.")
-
-    with tempfile.TemporaryDirectory(prefix="fs-periodic-") as tmp:
-        env = {
-            **os.environ,
-            "WORKSPACE_MOUNT_PATH": tmp,
-            "FIRESTORE_EMULATOR_HOST": _EMULATOR_HOST,
-            "GCP_PROJECT_ID": _PROJECT_ID,
-            "FIRESTORE_DATABASE_NAME": _DATABASE_ID,
-            "FIRESTORE_EXPORT_INTERVAL_SECONDS": "2",
-        }
-        export_dir = Path(tmp) / "firestore_emulator" / "current"
-        doc_id = "periodic-probe-doc"
-        sentinel = f"periodic-{int(time.time())}"
-
-        try:
-            _compose("up", "-d", _SERVICE, "firestore-exporter", env=env)
-            _wait_until_ready()
-
-            last_export_mtime = _latest_export_mtime(export_dir)
-            _client().collection(_COLLECTION).document(doc_id).set({"value": sentinel})
-            _wait_for_export(export_dir, newer_than=last_export_mtime)
-
-            # SIGKILL bypasses gcloud's --export-on-exit path; restore must use the sidecar snapshot.
-            _compose("kill", "-s", "KILL", _SERVICE, env=env)
-            _wait_until_ready()
-
-            restored = _client().collection(_COLLECTION).document(doc_id).get()
-            assert restored.exists, "Document was lost after emulator kill/restart."
-            assert restored.to_dict()["value"] == sentinel
-        finally:
-            _compose("down", env=env)
diff --git a/backend/test/middleware/test_local_auth.py b/backend/test/middleware/test_local_auth.py
index 72dff87..1b28702 100644
--- a/backend/test/middleware/test_local_auth.py
+++ b/backend/test/middleware/test_local_auth.py
@@ -200,7 +200,7 @@ def test_missing_sentinel_returns_503(db_empty):
     response = client.get("/protected")
     assert response.status_code == 503
     detail = response.json()["detail"]
-    assert "init_firestore.py" in detail
+    assert "init_db.py" in detail
     assert "Local identity not seeded" in detail
 
 
diff --git a/backend/test/scripts/test_create_generation_session_repos.py b/backend/test/scripts/test_create_generation_session_repos.py
index 50a0368..a2aaba0 100644
--- a/backend/test/scripts/test_create_generation_session_repos.py
+++ b/backend/test/scripts/test_create_generation_session_repos.py
@@ -3,7 +3,7 @@
 
 Coverage:
   (a) emit_workspace_config writes JSON whose entries each construct a WorkspaceConfig
-      without error, proving schema parity with init_firestore.py --workspace-config.
+      without error, proving schema parity with init_db.py --workspace-config.
   (b) get_repository_ids calls list_repositories WITHOUT project_ids (no filter).
   (c) No code path calls add_repositories_to_project — the function is absent from the
       module and the compiled AST contains no reference to it.
@@ -21,7 +21,7 @@
 import pytest
 
 import scripts.create_generation_session_repos as cgsr
-from scripts.init_firestore import WorkspaceConfig
+from scripts.init_db import WorkspaceConfig
 
 # ---------------------------------------------------------------------------
 # Path constants
diff --git a/backend/test/scripts/test_init_firestore.py b/backend/test/scripts/test_init_db.py
similarity index 91%
rename from backend/test/scripts/test_init_firestore.py
rename to backend/test/scripts/test_init_db.py
index 5e1e787..9efd10a 100644
--- a/backend/test/scripts/test_init_firestore.py
+++ b/backend/test/scripts/test_init_db.py
@@ -1,5 +1,5 @@
 """
-Tests for backend/scripts/init_firestore.py — Phase 5 (Config-driven Firestore seeding).
+Tests for backend/scripts/init_db.py — Phase 5 (Config-driven database seeding).
 
 Coverage:
   (a) --workspace-config load replaces the hardcoded WORKSPACE_CONFIGS list.
@@ -26,7 +26,7 @@
 # get_database() returns the InMemoryDatabase.  We patch in our own db
 # instance rather than calling get_database() at all.
 # ---------------------------------------------------------------------------
-import scripts.init_firestore as init_script
+import scripts.init_db as init_script
 
 
 # ---------------------------------------------------------------------------
@@ -146,7 +146,7 @@ def test_workspace_pool_initialized_with_config(self, tmp_path):
 class TestWorkspaceConfigRequired:
     def test_main_without_workspace_config_exits(self, capsys):
         """main() refuses to run (exit 1) when --workspace-config is absent."""
-        with patch("sys.argv", ["init_firestore.py", "--yes"]):
+        with patch("sys.argv", ["init_db.py", "--yes"]):
             with pytest.raises(SystemExit) as exc:
                 init_script.main()
         assert exc.value.code == 1
@@ -156,13 +156,49 @@ def test_main_without_workspace_config_exits(self, capsys):
 
     def test_main_without_workspace_config_does_not_touch_db(self):
         """The gate fires before any database access."""
-        with patch("sys.argv", ["init_firestore.py"]):
+        with patch("sys.argv", ["init_db.py"]):
             with patch.object(init_script, "get_database") as mock_get_db:
                 with pytest.raises(SystemExit):
                     init_script.main()
         mock_get_db.assert_not_called()
 
 
+# ---------------------------------------------------------------------------
+# (g) sqlite mode does not require FIRESTORE_EMULATOR_HOST (regression: the
+#     emulator-host gate must be conditional on DATABASE_TYPE, not unconditional).
+# ---------------------------------------------------------------------------
+
+class TestSqliteModeNoEmulatorRequired:
+    def test_sqlite_mode_does_not_require_emulator_host(self, tmp_path, monkeypatch):
+        """main() must not exit for a missing FIRESTORE_EMULATOR_HOST under sqlite mode."""
+        monkeypatch.delenv("FIRESTORE_EMULATOR_HOST", raising=False)
+        monkeypatch.setenv("DATABASE_TYPE", "sqlite")
+
+        path = _workspace_config_file(tmp_path, SAMPLE_WORKSPACE_ENTRIES)
+        db = _make_db()
+
+        with patch("sys.argv", ["init_db.py", "--yes", "--workspace-config", path]):
+            with patch.object(init_script, "get_database", return_value=db):
+                with patch.object(init_script, "attach_github_tokens"):
+                    # Should complete without SystemExit due to a missing emulator host.
+                    init_script.main()
+
+        assert db.get("workspaces", "ws-test-1") is not None
+
+    def test_emulator_mode_still_requires_emulator_host(self, tmp_path, monkeypatch, capsys):
+        """Regression guard: emulator mode keeps requiring FIRESTORE_EMULATOR_HOST."""
+        monkeypatch.delenv("FIRESTORE_EMULATOR_HOST", raising=False)
+        monkeypatch.setenv("DATABASE_TYPE", "emulator")
+
+        path = _workspace_config_file(tmp_path, SAMPLE_WORKSPACE_ENTRIES)
+
+        with patch("sys.argv", ["init_db.py", "--yes", "--workspace-config", path]):
+            with pytest.raises(SystemExit) as exc:
+                init_script.main()
+        assert exc.value.code == 1
+        assert "FIRESTORE_EMULATOR_HOST not set" in capsys.readouterr().out
+
+
 # ---------------------------------------------------------------------------
 # (b) --yes is non-interactive: builtins.input is NEVER called
 # ---------------------------------------------------------------------------
diff --git a/backend/test/scripts/test_specflow_init_script.py b/backend/test/scripts/test_specflow_init_script.py
index 270868b..547c7c5 100644
--- a/backend/test/scripts/test_specflow_init_script.py
+++ b/backend/test/scripts/test_specflow_init_script.py
@@ -24,12 +24,12 @@ def test_workspace_config_generation_skips_firestore_write():
 
 
 def test_docker_starts_before_workspace_config_generation():
-    """No-arg quickstart starts emulator/backend before GitHub/P10Y workspace prep."""
+    """No-arg quickstart starts backend before GitHub/P10Y workspace prep."""
     text = _script_text()
 
     compose_pos = text.index('docker compose -f "${SCRIPT_DIR}/docker-compose.yml" up -d')
     repo_script_pos = text.index("uv run python scripts/create_generation_session_repos.py")
-    seed_pos = text.index("uv run scripts/init_firestore.py")
+    seed_pos = text.index("uv run scripts/init_db.py")
 
     assert compose_pos < repo_script_pos < seed_pos
 
@@ -44,57 +44,57 @@ def test_subprocess_output_is_sanitized_before_init_log():
     assert "input_value=" in text
 
 
-def test_firestore_emulator_persists_under_workspace_mount():
-    """Local quickstart should keep emulator exports beside workspace artifacts."""
+def test_no_firestore_emulator_services_in_default_stack():
+    """The firestore-emulator/exporter services and their entrypoint scripts must be gone —
+    sqlite is now the sole local/Docker default; Firestore only connects to an already-hosted
+    instance. Regression guard against silently reintroducing the removed local emulator."""
     text = _compose_text()
-    entrypoint_text = (
+
+    assert "firestore-emulator:" not in text
+    assert "firestore-exporter:" not in text
+    assert not (
         Path(__file__).resolve().parents[3] / "scripts/firestore-emulator-entrypoint.sh"
-    ).read_text()
-
-    assert "${WORKSPACE_MOUNT_PATH:-./workspaces}/firestore_emulator:/firestore-data:rw" in text
-    assert "firestore-emulator-entrypoint.sh" in text
-    assert "firestore-exporter" in text
-    assert "firestore-emulator-exporter.sh" in text
-    assert "FIRESTORE_EXPORT_INTERVAL_SECONDS" in text
-    assert "${FIRESTORE_DATABASE_NAME:-specflow}" in text
-    assert "export_metadata_file()" in entrypoint_text
-    assert "CURRENT_METADATA_FILE=" in entrypoint_text
-    assert "IMPORT_ARGS=\"--import-data=${CURRENT_METADATA_FILE}\"" in entrypoint_text
-    assert "--import-data=${CURRENT_EXPORT_DIR}" not in entrypoint_text
-    assert "--export-on-exit=\"${CURRENT_EXPORT_DIR}\"" in entrypoint_text
-
-
-def test_compose_shutdown_order_keeps_exporter_after_backend():
-    """Backend must stop before exporter so shutdown state is included in the final export."""
-    text = _compose_text()
+    ).exists()
+    assert not (
+        Path(__file__).resolve().parents[3] / "scripts/firestore-emulator-exporter.sh"
+    ).exists()
 
-    backend_pos = text.index("  backend:")
-    exporter_dependency_pos = text.index("      firestore-exporter:", backend_pos)
-    exporter_pos = text.index("  firestore-exporter:")
-    emulator_dependency_pos = text.index("      firestore-emulator:", exporter_pos)
 
-    assert backend_pos < exporter_dependency_pos
-    assert exporter_pos < emulator_dependency_pos
-    assert text.count("stop_grace_period: 120s") >= 3
+def test_compose_backend_bind_mounts_specflow_home_for_sqlite():
+    """Backend must bind-mount the host's ~/.specflow/ directory (central SQLite db, shared
+    with the host-side TUI/CLI config) and default DATABASE_TYPE to sqlite."""
+    text = _compose_text()
+
+    assert "DATABASE_TYPE=${DATABASE_TYPE:-sqlite}" in text
+    assert "SQLITE_DB_PATH=${SQLITE_DB_PATH:-/root/.specflow/specflow.db}" in text
+    assert "${SPECFLOW_HOME_MOUNT_PATH:-${HOME}/.specflow}:/root/.specflow:rw" in text
 
 
-def test_makefile_test_stack_uses_separate_names_and_ports():
-    """Integration/E2E stacks must not collide with quickstart containers or host ports."""
+def test_makefile_test_stack_uses_separate_names_ports_and_isolated_sqlite_path():
+    """Integration/E2E stacks must not collide with quickstart containers, host ports, or the
+    real central SQLite database at ~/.specflow/specflow.db."""
     makefile_text = (Path(__file__).resolve().parents[3] / "Makefile").read_text()
 
-    assert (
-        "$(TEST_STACK_TARGETS): export SPECFLOW_FIRESTORE_EXPORTER_CONTAINER := "
-        "specflow-test-firestore-exporter"
-    ) in makefile_text
     assert "$(TEST_STACK_TARGETS): export SPECFLOW_BACKEND_PORT := 18000" in makefile_text
-    assert "$(TEST_STACK_TARGETS): export SPECFLOW_FIRESTORE_PORT := 18080" in makefile_text
     assert "$(TEST_STACK_TARGETS): export BACKEND_URL := http://localhost:18000" in makefile_text
-    assert "$(TEST_STACK_TARGETS): export FIRESTORE_EMULATOR_HOST := localhost:18080" in makefile_text
     assert "$(TEST_STACK_TARGETS): export GCP_PROJECT_ID := $(GCP_PROJECT_ID)" in makefile_text
     assert (
         "$(TEST_STACK_TARGETS): export FIRESTORE_DATABASE_NAME := $(FIRESTORE_DATABASE_NAME)"
         in makefile_text
     )
+    assert (
+        "$(TEST_STACK_TARGETS): export SPECFLOW_HOME_MOUNT_PATH := $(TEST_SPECFLOW_HOME_PATH)"
+        in makefile_text
+    )
+    assert (
+        "$(TEST_STACK_TARGETS): export SQLITE_DB_PATH := $(TEST_SPECFLOW_HOME_PATH)/specflow.db"
+        in makefile_text
+    )
+    # The isolated sqlite-home path must nest under the test workspace mount so `make stop`'s
+    # existing `rm -rf "$(WORKSPACE_MOUNT_PATH)"` also wipes it — no separate cleanup needed.
+    assert (
+        "TEST_SPECFLOW_HOME_PATH := $(TEST_WORKSPACE_MOUNT_PATH)/specflow-home" in makefile_text
+    )
 
 
 def test_specflow_init_defaults_local_firestore_database_name():
diff --git a/backend/test/services/test_startup_validation.py b/backend/test/services/test_startup_validation.py
index ee6a540..f4b5e45 100644
--- a/backend/test/services/test_startup_validation.py
+++ b/backend/test/services/test_startup_validation.py
@@ -131,23 +131,23 @@ async def test_environment_check_firestore_requires_git_secrets(self, validator)
         assert "TOKEN_ENCRYPTION_KEY" in result["error"]
 
 
-class TestFirestoreConnectivity:
-    """Test Firestore connectivity check."""
-    
+class TestDatabaseConnectivity:
+    """Test database connectivity check."""
+
     @pytest.mark.asyncio
-    async def test_firestore_check_passes(self, validator):
-        """Firestore check passes with working connection."""
-        result = await validator._check_firestore_connectivity()
-        
+    async def test_database_check_passes(self, validator):
+        """Database check passes with working connection."""
+        result = await validator._check_database_connectivity()
+
         assert result["passed"] is True
         assert result["error"] is None
-    
+
     @pytest.mark.asyncio
-    async def test_firestore_check_fails(self, validator):
-        """Firestore check fails with connection error."""
+    async def test_database_check_fails(self, validator):
+        """Database check fails with connection error."""
         with patch.object(validator._db, "query", side_effect=Exception("Connection failed")):
-            result = await validator._check_firestore_connectivity()
-        
+            result = await validator._check_database_connectivity()
+
         assert result["passed"] is False
         assert "Connection failed" in result["error"]
 
@@ -241,10 +241,29 @@ async def test_startup_validation_passes(self, validator, sample_workspaces, tmp
             results = await validator.run_all_checks()
 
         assert results["environment"]["passed"] is True
-        assert results["firestore"]["passed"] is True
+        assert results["database"]["passed"] is True
         assert results["workspace_pool"]["passed"] is True
         assert results["filestore"]["passed"] is True
-    
+
+    @pytest.mark.asyncio
+    async def test_sqlite_empty_pool_is_non_critical(self, validator, tmp_path):
+        """Regression: sqlite seeds AFTER the backend boots (start -> health-gate -> seed),
+        so an empty workspace pool at startup must be a warning, not a fatal
+        StartupValidationError — sqlite must be treated like memory/emulator here."""
+        with patch.dict(os.environ, {
+            "OPENROUTER_API_KEY": "test-key",
+            "DATABASE_TYPE": "sqlite",
+            "WORKSPACE_BASE_PATH": str(tmp_path),
+            "TOKEN_ENCRYPTION_KEY": "test-fernet",
+            "GITHUB_TOKEN_DEFAULT": "ghp-test",
+            "GIT_USER_NAME_DEFAULT": "tester",
+        }):
+            # Must not raise even though the pool is empty.
+            results = await validator.run_all_checks()
+
+        assert results["environment"]["passed"] is True
+        assert results["workspace_pool"]["passed"] is False
+
     @pytest.mark.asyncio
     async def test_startup_validation_fails_on_critical(self, validator, sample_workspaces):
         """Startup validation fails on critical check failure."""
@@ -346,6 +365,19 @@ async def test_memory_skips_git_secrets(self, validator):
         # Confirm no git-secret keys were required
         assert result["error"] is None
 
+    @pytest.mark.asyncio
+    async def test_sqlite_requires_token_encryption_key(self, validator):
+        """sqlite mode requires TOKEN_ENCRYPTION_KEY, same as firestore/emulator — sqlite
+        persists across restarts (unlike memory), so an ephemeral per-boot key would make
+        previously-encrypted GitHub tokens undecryptable after a restart."""
+        with patch.dict(os.environ, {
+            "OPENROUTER_API_KEY": "or-key",
+            "DATABASE_TYPE": "sqlite",
+        }, clear=True):
+            result = await validator._check_environment()
+        assert result["passed"] is False
+        assert "TOKEN_ENCRYPTION_KEY" in result["error"]
+
     @pytest.mark.asyncio
     async def test_emulator_requires_token_encryption_key(self, validator):
         """emulator mode requires TOKEN_ENCRYPTION_KEY (FR-5)."""
diff --git a/docker-compose.yml b/docker-compose.yml
index 4e917f1..3bb9a5b 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,77 +1,14 @@
 # Docker Compose Configuration for SpecFlow Backend
 #
 # Profiles:
-#   - default (dev): Uses Firestore Emulator for local development (no GCP credentials needed)
-#   - test: Uses real GCP Firestore (requires GCP credentials and proper .env setup)
-#   - mcp: MCP Server for Cursor IDE integration (starts backend + mcp-server)
+#   - default: backend + SQLite (local dev — no GCP credentials, no separate DB container)
+#   - test: Connect to an already-hosted GCP Firestore instance (requires credentials)
 #
 # Usage:
-#   docker-compose up                                    # Dev mode: backend + emulator
-#   docker-compose --profile test up                     # Test mode: backend + real Firestore
+#   docker-compose up                                    # Dev mode: backend + sqlite
+#   docker-compose --profile test up                     # Test mode: connect to hosted Firestore
 
 services:
-  # Firestore Emulator - Only runs in dev mode (default)
-  firestore-emulator:
-    image: gcr.io/google.com/cloudsdktool/google-cloud-cli:emulators
-    # Container name is parameterized so an isolated test stack (COMPOSE_PROJECT_NAME=specflow-test)
-    # can run its own containers without colliding with a self-hosted quickstart deployment.
-    container_name: ${SPECFLOW_FIRESTORE_CONTAINER:-specflow-firestore-emulator}
-    ports:
-      - "${SPECFLOW_FIRESTORE_PORT:-8080}:8080"
-    # gcloud wraps a JVM child; an init reaps zombies and forwards SIGTERM to it
-    # so --export-on-exit actually fires on `docker compose down`.
-    init: true
-    # Stop order is backend -> firestore-exporter -> firestore-emulator, so the
-    # exporter can take one final snapshot while the emulator is still alive.
-    stop_grace_period: 120s
-    environment:
-      - GCP_PROJECT_ID=${GCP_PROJECT_ID:-local-dev}
-      - FIRESTORE_DATABASE_NAME=${FIRESTORE_DATABASE_NAME:-specflow}
-    command:
-      - /bin/sh
-      - /specflow-scripts/firestore-emulator-entrypoint.sh
-    volumes:
-      - ${WORKSPACE_MOUNT_PATH:-./workspaces}/firestore_emulator:/firestore-data:rw
-      - ./scripts:/specflow-scripts:ro
-    networks:
-      - specflow-network
-    restart: unless-stopped
-    healthcheck:
-      # Check if emulator is responding (returns "OK" on HTTP)
-      # The emulator exposes an HTTP endpoint that returns "OK" when ready
-      test: ["CMD-SHELL", "curl -f http://localhost:8080 || exit 1"]
-      interval: 5s
-      timeout: 3s
-      retries: 10
-      start_period: 30s  # Give emulator more time to start (gRPC can take a while)
-    # Emulator only needed in dev mode, not in test profile
-    profiles: ["", "dev"]  # Empty string means default profile
-
-  # Firestore Emulator Exporter - Periodic local-only snapshots for quickstart durability.
-  # The emulator is in-memory; --export-on-exit is retained as a shutdown fallback, while
-  # this sidecar limits data loss when the emulator does not exit cleanly.
-  firestore-exporter:
-    image: gcr.io/google.com/cloudsdktool/google-cloud-cli:emulators
-    container_name: ${SPECFLOW_FIRESTORE_EXPORTER_CONTAINER:-specflow-firestore-exporter}
-    depends_on:
-      firestore-emulator:
-        condition: service_healthy
-    environment:
-      - GCP_PROJECT_ID=${GCP_PROJECT_ID:-local-dev}
-      - FIRESTORE_DATABASE_NAME=${FIRESTORE_DATABASE_NAME:-specflow}
-      - FIRESTORE_EXPORT_INTERVAL_SECONDS=${FIRESTORE_EXPORT_INTERVAL_SECONDS:-60}
-    command:
-      - /bin/sh
-      - /specflow-scripts/firestore-emulator-exporter.sh
-    volumes:
-      - ${WORKSPACE_MOUNT_PATH:-./workspaces}/firestore_emulator:/firestore-data:rw
-      - ./scripts:/specflow-scripts:ro
-    networks:
-      - specflow-network
-    restart: unless-stopped
-    stop_grace_period: 120s
-    profiles: ["", "dev"]
-
   # Backend Service - Runs in both dev and test modes
   backend:
     build:
@@ -99,10 +36,13 @@ services:
       - P10Y_ORGANISATION_ID=${P10Y_ORGANISATION_ID}
       # Skip mode for testing workflows without agent execution
       - SKIP_AGENT_EXECUTION=${SKIP_AGENT_EXECUTION:-false}
-      # Database configuration - defaults to emulator mode
-      # Override via .env file for test mode
-      - DATABASE_TYPE=${DATABASE_TYPE:-emulator}
-      - FIRESTORE_EMULATOR_HOST=firestore-emulator:8080
+      # Database configuration - defaults to sqlite (local dev, no separate DB container).
+      # Override via .env for DATABASE_TYPE=firestore to connect to an already-hosted,
+      # GCP-managed Firestore instance (SpecFlow never deploys/manages Firestore itself),
+      # or DATABASE_TYPE=emulator to connect to a manually-run Firestore emulator process.
+      - DATABASE_TYPE=${DATABASE_TYPE:-sqlite}
+      - SQLITE_DB_PATH=${SQLITE_DB_PATH:-/root/.specflow/specflow.db}
+      - FIRESTORE_EMULATOR_HOST=${FIRESTORE_EMULATOR_HOST}
       - GCP_PROJECT_ID=${GCP_PROJECT_ID:-local-dev}
       - FIRESTORE_DATABASE_NAME=${FIRESTORE_DATABASE_NAME:-specflow}
       - POSTHOG_API_KEY=${POSTHOG_API_KEY}
@@ -131,7 +71,14 @@ services:
       - ${AGENT_LOGS_MOUNT_PATH:-./agent_logs}:/agent_logs:rw
       # Note: agent_logs are stored in dedicated mount at /agent_logs
       # For local development, this can be configured via AGENT_LOGS_MOUNT_PATH environment variable
-      # For test profile with real Firestore, optionally mount GCP credentials
+      # Bind mount (not a named volume) to the host's ~/.specflow/ directory — the SAME
+      # directory the host-side TUI/CLI writes config.json into, and the single central
+      # SQLite database shared across every local project/MCP session on this machine
+      # (the local-dev analogue of the old shared Firestore emulator). NEVER point
+      # SPECFLOW_HOME_MOUNT_PATH at NFS/Filestore-backed storage.
+      - ${SPECFLOW_HOME_MOUNT_PATH:-${HOME}/.specflow}:/root/.specflow:rw
+      # For test profile connecting to a hosted GCP Firestore instance, optionally mount
+      # GCP credentials
       # - ${GOOGLE_APPLICATION_CREDENTIALS:-}:/app/credentials.json:ro
     networks:
       - specflow-network
@@ -142,14 +89,6 @@ services:
       timeout: 10s
       retries: 3
       start_period: 40s
-    # In dev mode, wait for the exporter too. Compose stops services in reverse
-    # dependency order, so backend shutdown marks in-flight runs failed before
-    # the exporter performs its final snapshot and before the emulator exits.
-    depends_on:
-      firestore-emulator:
-        condition: service_healthy
-      firestore-exporter:
-        condition: service_started
     stop_grace_period: 120s
 
 networks:
diff --git a/plans/use-sqlite-instead-firestore/use-sqlite-instead-firestore-SPECS.md b/plans/use-sqlite-instead-firestore/use-sqlite-instead-firestore-SPECS.md
new file mode 100644
index 0000000..7e68dfc
--- /dev/null
+++ b/plans/use-sqlite-instead-firestore/use-sqlite-instead-firestore-SPECS.md
@@ -0,0 +1,147 @@
+# Tech Spec — Replace Firestore Emulator with SQLite in Docker dev stack
+
+Status: Draft (awaiting review)
+Branch: `feature/use-sqllite-instead-firestore`
+Source: adapted from `griddynamics/gd-specflow` PR #314 ("Local quickstart: SQLite backend + no-Docker bare-mode"), scoped down.
+
+## Scope
+
+PR #314 upstream does two independent things: (1) a SQLite `IDatabase` backend, (2) a
+`--bare-mode` no-Docker host runner built on top of it. We only want (1), applied to the
+**existing Docker dev stack** — `docker-compose up` should run backend + Postgres-free,
+Java-free SQLite instead of backend + `firestore-emulator` + `firestore-exporter`.
+
+**In scope:**
+- New `SqliteDatabase` (`IDatabase` implementation), wired through `factory.py`.
+- `docker-compose.yml` dev profile: drop `firestore-emulator` / `firestore-exporter`
+  services; backend gets `DATABASE_TYPE=sqlite` + a volume-mounted db file.
+- Seeding script generalization (`init_firestore.py` → `init_db.py`) so `make init-db`
+  works against whichever backend is active.
+- Startup validation treating `sqlite` as a non-critical-pool backend, same as `emulator`/`memory`.
+- Test suite: shared `IDatabase` contract extracted once, run against both memory and sqlite.
+
+**Out of scope (stays as upstream's follow-on, not ported here):**
+- `specflow-init.sh --bare-mode` / host-runtime detection.
+- `scripts/stage-rosetta-plugin.sh`.
+- Any "run without Docker at all" documentation or tooling.
+- Production Firestore path — untouched; `sqlite` is dev/local only, single-writer,
+  no cross-node locking (matches [[STEEL COMMANDMENT XI scope note]] — this doesn't touch
+  coding/deploy archival guarantees, it only changes the *local dev* persistence layer).
+
+## Design (ported as-is from PR #314, verified against current repo state)
+
+### 1. `backend/app/database/sqlite.py` (new file, ~440 lines)
+Self-contained `IDatabase` implementation, no dependency on bare-mode code:
+- Two JSON-blob tables (`documents`, `subdocuments`), primary-keyed on
+  `(collection, doc_id)` / `(parent_collection, parent_doc_id, subcollection, doc_id)`.
+- Filters pushed into SQL via `json_extract` (`==`, `!=`, `<`/`<=`/`>`/`>=`, `in`, `array_contains`).
+- `run_transaction()` uses real `BEGIN IMMEDIATE` with busy-retry on `sqlite3.OperationalError`
+  ("database is locked").
+- Datetime contract: every datetime stored as fixed-width ISO-8601 UTC text (`_canonical_dt`),
+  decoded back to tz-aware `datetime` on read (`_decode_from_storage`) — required so
+  `stuck_running_detector` / lease-recovery comparisons against `datetime.now(UTC)` behave
+  identically to Firestore.
+- WAL mode + `busy_timeout`; single-writer by design (this is the local/dev story, not a
+  distributed-locking replacement for prod Firestore).
+- Ported verbatim — no adaptation needed, it has no bare-mode coupling.
+
+### 2. Wiring
+- `backend/app/core/enums.py`: add `SQLITE = "sqlite"` to `DatabaseType`.
+- `backend/app/core/config.py`: add `SQLITE_DB_PATH: str = "./.specflow-local/specflow.db"`
+  — comment carries over the NFS warning (SQLite file locking is unsafe over NFS; must be
+  block storage). For the Docker case this becomes a named Docker volume, not a bind mount
+  to NFS-backed `WORKSPACE_MOUNT_PATH`.
+- `backend/app/database/factory.py`: add `elif db_type == DatabaseType.SQLITE:` branch
+  constructing `SqliteDatabase(db_path=settings.SQLITE_DB_PATH)`; extend
+  `clear_test_data()`'s allowed-types tuple to include `SQLITE`.
+- `backend/app/services/startup_validation.py`: extend the existing
+  `database_type in ("emulator", "memory")` non-critical checks to
+  `("emulator", "memory", "sqlite")` (workspace-pool + filestore warnings, not fatal) and
+  the k8s-readiness branch's `(FIRESTORE, EMULATOR)` tuple to include `SQLITE`.
+
+### 3. Seeding: `init_firestore.py` → `init_db.py`
+Rename + generalize (upstream's version, minus the bare-mode-only env-precheck removal
+rationale — we keep that part too since it's a genuine simplification): drop the
+Firestore-only env prechecks from `main()` since misconfiguration now fails fast inside
+`get_database()` itself (single source of truth, per this repo's "no duplicate validators"
+convention). Update all doc/error-string references in `local_identity.py` and
+`local_auth.py` (comment-only changes, no behavior change).
+
+### 4. `docker-compose.yml` (not touched by upstream PR #314 — new for this scoped port)
+Current dev profile: `firestore-emulator` + `firestore-exporter` sidecar, backend depends on
+emulator health check, `DATABASE_TYPE=emulator`.
+
+New dev profile:
+- Remove `firestore-emulator` and `firestore-exporter` services (and their `profiles: ["", "dev"]`
+  entries) — or gate them behind a `firestore-emulator` profile so `--profile test` (real GCP
+  Firestore path) and anyone who wants to force the emulator can still opt in explicitly.
+- Backend service: `DATABASE_TYPE=sqlite`, `SQLITE_DB_PATH=/data/specflow.db` (container path),
+  new named volume `sqlite-data:/data`. Drop `depends_on: firestore-emulator`.
+- `make init-db` / `make e2e-setup`: seeding no longer needs `FIRESTORE_EMULATOR_HOST` —
+  runs `DATABASE_TYPE=sqlite uv run scripts/init_db.py $(INIT_DB_ARGS)` inside the backend
+  container (or via `docker compose exec backend ...`, matching how `init-firestore` currently
+  execs against the running stack — needs confirmation, see open question below).
+- `--profile test` (real Firestore) path is untouched.
+
+### 5. Tests
+- Extract `backend/test/database/db_contract.py`: shared `IDatabase` contract test classes
+  (`TestBasicCRUD`, `TestQuery`, `TestTransactions`, `TestArrayOperations`, `TestIsolation`,
+  `TestServerTimestamp`, `TestApiKeyByUid`), parametrized by a `db` fixture. `test_memory_db.py`
+  shrinks to import + run the shared contract against `InMemoryDatabase`.
+- `backend/test/database/test_sqlite_db.py`: runs the same shared contract against
+  `SqliteDatabase(":memory:")`, plus SQLite-only datetime-boundary tests (tz-aware roundtrip,
+  naive-assumed-UTC, `<` filter excludes `None`, nested datetime-in-list roundtrip) and a
+  persistence test (`state_survives_reopen` — write via one connection, read via a fresh one)
+  and an end-to-end `detect_stuck_running` test against the real `StateMachineDBAdapter`.
+- `test_factory.py`: add `test_factory_returns_sqlite_database`.
+- `test_startup_validation.py`: add `test_sqlite_empty_pool_is_non_critical` regression test.
+- `test_init_db.py` (renamed from `test_init_firestore.py`): update references.
+
+## Open questions (need your call before implementation)
+
+1. **Docker volume path for the SQLite file** — a named Docker volume (`sqlite-data:/data`,
+   survives `docker compose down` but not `down -v`) vs. a bind mount under
+   `${WORKSPACE_MOUNT_PATH}` like the current emulator export dir. Given the NFS-unsafe
+   warning in `config.py`'s upstream comment, and that `WORKSPACE_MOUNT_PATH` is Filestore/NFS
+   in this repo's architecture, a **named volume is the safe default** — recommend that unless
+   you want the db file inspectable on the host.
+2. **Keep the Firestore-emulator path as an opt-in profile**, or delete it outright? Recommend
+   keeping it behind a profile (e.g. `--profile firestore-emulator`) for anyone still testing
+   against real Firestore semantics locally, rather than a hard rip-out — cheap to keep, and
+   avoids a one-way door.
+3. **`make init-db` execution context** — currently `init-firestore` runs from the host via
+   `uv run` against `FIRESTORE_EMULATOR_HOST=localhost:8080` (the emulator's exposed port).
+   SQLite has no exposed port — seeding must run either (a) inside the backend container via
+   `docker compose exec`, or (b) from the host directly against the same file if the volume is
+   also bind-mounted to a host path. Recommend (a) for consistency with "the file lives in the
+   container's volume, nothing outside touches it directly."
+4. Do you want the upstream rename `init_firestore.py` → `init_db.py` (touches Makefile,
+   local_auth.py comments, local_identity.py comments, error strings), or keep the filename and
+   just add a sqlite branch inside it? Recommend the rename — matches upstream and this repo's
+   "single source of truth, no special-casing" convention — but it's a wider diff (touches
+   4-5 files for renames alone) if you want to minimize footprint instead.
+
+## File list (estimated)
+
+| File | Change |
+|---|---|
+| `backend/app/database/sqlite.py` | new (~440 lines, ported verbatim) |
+| `backend/app/core/enums.py` | +1 line |
+| `backend/app/core/config.py` | +3 lines |
+| `backend/app/database/factory.py` | ~+10/-4 |
+| `backend/app/services/startup_validation.py` | ~+9/-10 |
+| `backend/scripts/init_firestore.py` → `init_db.py` | rename + ~-13 lines |
+| `backend/app/core/local_identity.py` | docstring ref only |
+| `backend/app/middleware/local_auth.py` | doc/error-string refs only |
+| `docker-compose.yml` | dev profile: swap emulator services for sqlite volume (new design, not from upstream) |
+| `Makefile` | rename `init-firestore*` → `init-db*`, drop `FIRESTORE_EMULATOR_HOST` requirement in the sqlite path |
+| `.env.example` / `.env.quickstart.example` (whichever this repo uses) | `DATABASE_TYPE` default, `SQLITE_DB_PATH` |
+| `backend/test/database/db_contract.py` | new (shared contract, extracted from `test_memory_db.py`) |
+| `backend/test/database/test_memory_db.py` | shrinks to contract-runner |
+| `backend/test/database/test_sqlite_db.py` | new |
+| `backend/test/database/test_factory.py` | +1 test |
+| `backend/test/services/test_startup_validation.py` | +1 test |
+| `backend/test/scripts/test_init_firestore.py` → `test_init_db.py` | rename + ref updates |
+
+Not ported: `specflow-init.sh`, `scripts/stage-rosetta-plugin.sh`, `QUICKSTART.md` bare-mode
+sections, `agents/IMPLEMENTATION.md` (upstream changelog entry, not applicable here).
diff --git a/scripts/firestore-emulator-entrypoint.sh b/scripts/firestore-emulator-entrypoint.sh
deleted file mode 100644
index 2ba6aff..0000000
--- a/scripts/firestore-emulator-entrypoint.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/sh
-set -eu
-
-DATA_DIR="${FIRESTORE_EMULATOR_DATA_DIR:-/firestore-data}"
-CURRENT_EXPORT_DIR="${FIRESTORE_CURRENT_EXPORT_DIR:-${DATA_DIR}/current}"
-PROJECT_ID="${GCP_PROJECT_ID:-local-dev}"
-DATABASE_ID="${FIRESTORE_DATABASE_ID:-${FIRESTORE_DATABASE_NAME:-default}}"
-
-if [ "${DATABASE_ID}" = "default" ]; then
-  DATABASE_ID="(default)"
-fi
-
-export CLOUDSDK_CORE_PROJECT="${PROJECT_ID}"
-export FIRESTORE_DATABASE_ID="${DATABASE_ID}"
-
-export_metadata_file() {
-  find "$1" -maxdepth 4 -type f -name "*overall_export_metadata" -print -quit 2>/dev/null
-}
-
-mkdir -p "${DATA_DIR}" "${CURRENT_EXPORT_DIR}"
-
-IMPORT_ARGS=""
-CURRENT_METADATA_FILE="$(export_metadata_file "${CURRENT_EXPORT_DIR}")"
-LEGACY_METADATA_FILE="$(export_metadata_file "${DATA_DIR}")"
-if [ -n "${CURRENT_METADATA_FILE}" ]; then
-  echo "Restoring Firestore emulator data from ${CURRENT_METADATA_FILE}"
-  IMPORT_ARGS="--import-data=${CURRENT_METADATA_FILE}"
-elif [ -n "${LEGACY_METADATA_FILE}" ]; then
-  echo "Restoring legacy Firestore emulator data from ${LEGACY_METADATA_FILE}"
-  IMPORT_ARGS="--import-data=${LEGACY_METADATA_FILE}"
-else
-  echo "No Firestore emulator export found; starting empty"
-fi
-
-echo "Using Firestore emulator project ${PROJECT_ID} database ${DATABASE_ID}"
-
-# shellcheck disable=SC2086
-exec gcloud emulators firestore start \
-  --project="${PROJECT_ID}" \
-  --host-port=0.0.0.0:8080 \
-  ${IMPORT_ARGS} \
-  --export-on-exit="${CURRENT_EXPORT_DIR}"
diff --git a/scripts/firestore-emulator-exporter.sh b/scripts/firestore-emulator-exporter.sh
deleted file mode 100644
index 44cdfa5..0000000
--- a/scripts/firestore-emulator-exporter.sh
+++ /dev/null
@@ -1,83 +0,0 @@
-#!/bin/sh
-set -eu
-
-DATA_DIR="${FIRESTORE_EMULATOR_DATA_DIR:-/firestore-data}"
-CURRENT_EXPORT_DIR="${FIRESTORE_CURRENT_EXPORT_DIR:-${DATA_DIR}/current}"
-NEXT_EXPORT_DIR="${FIRESTORE_NEXT_EXPORT_DIR:-${DATA_DIR}/next}"
-PREVIOUS_EXPORT_DIR="${FIRESTORE_PREVIOUS_EXPORT_DIR:-${DATA_DIR}/previous}"
-EMULATOR_HOST="${FIRESTORE_EMULATOR_INTERNAL_HOST:-firestore-emulator:8080}"
-PROJECT_ID="${GCP_PROJECT_ID:-local-dev}"
-DATABASE_ID="${FIRESTORE_DATABASE_ID:-${FIRESTORE_DATABASE_NAME:-default}}"
-EXPORT_INTERVAL_SECONDS="${FIRESTORE_EXPORT_INTERVAL_SECONDS:-60}"
-EXPORT_TIMEOUT_SECONDS="${FIRESTORE_EXPORT_TIMEOUT_SECONDS:-120}"
-
-if [ "${DATABASE_ID}" = "default" ]; then
-  DATABASE_ID="(default)"
-fi
-
-DATABASE_PATH="projects/${PROJECT_ID}/databases/${DATABASE_ID}"
-EXPORT_ENDPOINT="http://${EMULATOR_HOST}/emulator/v1/projects/${PROJECT_ID}:export"
-
-log() {
-  printf '%s %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$*"
-}
-
-has_export_metadata() {
-  find "$1" -maxdepth 4 -name "*overall_export_metadata" -print -quit 2>/dev/null | grep -q .
-}
-
-wait_for_emulator() {
-  until curl -sf --max-time 2 "http://${EMULATOR_HOST}" >/dev/null; do
-    log "Waiting for Firestore emulator at ${EMULATOR_HOST} ..."
-    sleep 2
-  done
-}
-
-export_once() {
-  log "Exporting Firestore emulator database ${DATABASE_PATH}"
-  rm -rf "${NEXT_EXPORT_DIR}"
-  mkdir -p "${NEXT_EXPORT_DIR}"
-
-  payload=$(printf '{"database":"%s","export_directory":"%s"}' "${DATABASE_PATH}" "${NEXT_EXPORT_DIR}")
-  curl -sf --max-time "${EXPORT_TIMEOUT_SECONDS}" \
-    -X POST "${EXPORT_ENDPOINT}" \
-    -H "Content-Type: application/json" \
-    -d "${payload}" >/tmp/firestore-export-response.json
-
-  if ! has_export_metadata "${NEXT_EXPORT_DIR}"; then
-    log "Export API returned successfully but no export metadata was written"
-    return 1
-  fi
-
-  rm -rf "${PREVIOUS_EXPORT_DIR}"
-  if [ -e "${CURRENT_EXPORT_DIR}" ]; then
-    mv "${CURRENT_EXPORT_DIR}" "${PREVIOUS_EXPORT_DIR}"
-  fi
-  mv "${NEXT_EXPORT_DIR}" "${CURRENT_EXPORT_DIR}"
-  log "Firestore emulator export persisted to ${CURRENT_EXPORT_DIR}"
-}
-
-shutdown() {
-  log "Stop signal received; attempting final Firestore emulator export"
-  if ! export_once; then
-    log "Final Firestore emulator export failed; keeping previous snapshot"
-  fi
-  exit 0
-}
-
-trap shutdown TERM INT
-
-mkdir -p "${DATA_DIR}"
-wait_for_emulator
-
-if ! export_once; then
-  log "Initial Firestore emulator export failed; will retry on interval"
-fi
-
-while :; do
-  sleep "${EXPORT_INTERVAL_SECONDS}" &
-  wait "$!" || true
-  if ! export_once; then
-    log "Periodic Firestore emulator export failed; will retry"
-  fi
-done
diff --git a/scripts/get-api-key.py b/scripts/get-api-key.py
index 3a9e0b4..bfefcaf 100755
--- a/scripts/get-api-key.py
+++ b/scripts/get-api-key.py
@@ -13,9 +13,12 @@
 # Add backend to path
 sys.path.insert(0, str(Path(__file__).parent.parent / "backend"))
 
-# Set DATABASE_TYPE early if FIRESTORE_EMULATOR_HOST is set
+# Set DATABASE_TYPE early: emulator auto-detect takes priority (manually-run emulator),
+# otherwise default to sqlite (the local/Docker-dev default).
 if os.getenv("FIRESTORE_EMULATOR_HOST") and not os.getenv("DATABASE_TYPE"):
     os.environ["DATABASE_TYPE"] = "emulator"
+elif not os.getenv("DATABASE_TYPE"):
+    os.environ["DATABASE_TYPE"] = "sqlite"
 
 from app.database.factory import get_database
 
@@ -29,7 +32,7 @@ def main():
         
         if not api_keys:
             print("⚠️  No API keys found in database")
-            print("   Run 'make init-firestore' to create one")
+            print("   Run 'make init-db' to create one")
             sys.exit(1)
         
         # Find active keys
diff --git a/specflow-init.sh b/specflow-init.sh
index b4fb681..87a4b31 100755
--- a/specflow-init.sh
+++ b/specflow-init.sh
@@ -12,7 +12,7 @@
 #   3. Create .specflow-local/ and write workspaces.json, init.log, mcp-config.json
 #   4. Start the backend stack only (emulator + backend; NOT mcp-server profile)
 #   5. Health-gate: poll /health/ready before seeding
-#   6. Seed Firestore via init_firestore.py
+#   6. Seed Firestore via init_db.py
 #   7. Write .specflow-local/mcp-config.json (keyless IDE MCP-client snippet)
 #   8. Install the local SpecFlow CLI entry point
 #   9. Print manual-install instruction for the user
@@ -500,17 +500,17 @@ if [[ "${RESET_LOCAL_DB}" == "true" ]]; then
 fi
 
 if [[ "${DRY_RUN}" == "true" ]]; then
-    info "[DRY RUN] Would run: FIRESTORE_EMULATOR_HOST=${_FIRESTORE_HOST} uv run scripts/init_firestore.py --dry-run ${_SEED_FLAGS[*]}"
-    log "INFO: [DRY RUN] init_firestore.py --dry-run ${_SEED_FLAGS[*]}"
+    info "[DRY RUN] Would run: FIRESTORE_EMULATOR_HOST=${_FIRESTORE_HOST} uv run scripts/init_db.py --dry-run ${_SEED_FLAGS[*]}"
+    log "INFO: [DRY RUN] init_db.py --dry-run ${_SEED_FLAGS[*]}"
 else
     info "Seeding Firestore emulator ..."
-    log "INFO: Running init_firestore.py ${_SEED_FLAGS[*]} against ${_FIRESTORE_HOST}"
+    log "INFO: Running init_db.py ${_SEED_FLAGS[*]} against ${_FIRESTORE_HOST}"
     (
         cd "${SCRIPT_DIR}/backend"
         FIRESTORE_EMULATOR_HOST="${_FIRESTORE_HOST}" \
-            uv run scripts/init_firestore.py "${_SEED_FLAGS[@]}"
+            uv run scripts/init_db.py "${_SEED_FLAGS[@]}"
     ) > >(log_stream) 2> >(log_stream) \
-        || error "init_firestore.py failed. Check ${LOG_FILE} for details."
+        || error "init_db.py failed. Check ${LOG_FILE} for details."
     info "Firestore seeded successfully."
 fi
 

From 14a9d98551a0fe96fe716d682f3497cf0e602fc6 Mon Sep 17 00:00:00 2001
From: Mateusz Konopelski <mkonopelski@griddynamics.com>
Date: Wed, 1 Jul 2026 16:43:08 +0200
Subject: [PATCH 2/8] fix(tui): TUI/quickstart readiness and seeding follow
 sqlite default
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The TUI's container-readiness check and specflow-init.sh's onboarding flow
still assumed a firestore-emulator container existed. Fixes the resulting
breakage from the sqlite-default switch:

- local_env.py: containers_running() now checks only the backend container
  (sqlite has no separate container — it's a bind-mounted file).
- specflow-init.sh: seeding step no longer hardcodes FIRESTORE_EMULATOR_HOST
  unconditionally; --reset-local-db now clears the SQLite file + WAL/SHM
  sidecars instead of an emulator export directory.
- .env.quickstart.example: DATABASE_TYPE default flips to sqlite, documents
  SQLITE_DB_PATH/SPECFLOW_HOME_MOUNT_PATH, reframes Firestore vars as
  hosted-connect-only.
- mcp_server/tests/e2e/runner.py + cli.py: default to sqlite for API-key
  fetch and update --reset-local-db help text.
---
 .env.quickstart.example                       | 54 +++++++++-------
 .../test/scripts/test_specflow_init_script.py | 16 ++---
 mcp_server/cli.py                             |  2 +-
 mcp_server/services/local_env.py              | 23 +++----
 mcp_server/tests/e2e/runner.py                | 10 ++-
 mcp_server/tests/test_local_env.py            | 14 +++--
 specflow-init.sh                              | 61 ++++++++++---------
 7 files changed, 98 insertions(+), 82 deletions(-)

diff --git a/.env.quickstart.example b/.env.quickstart.example
index ab79258..805aacf 100644
--- a/.env.quickstart.example
+++ b/.env.quickstart.example
@@ -48,30 +48,47 @@ AUTH_MODE=local
 
 # DATABASE_TYPE
 #   What:  Which persistence backend to use for state and session data.
-#          "emulator" = bundled Firestore emulator container (no GCP needed).
-#          "firestore" = real GCP Firestore (production).
+#          "sqlite" = local file, no GCP needed, no separate container (default).
+#          "firestore" = connect to an already-hosted, GCP-managed Firestore
+#                        instance (production, or your own hosted instance).
+#          "emulator" = connect to a manually-run Firestore emulator process
+#                       (SpecFlow does not start one for you).
 #          "memory" = in-memory only (unit tests).
-#   How:   Leave as "emulator" for quickstart. No GCP credentials required.
-DATABASE_TYPE=emulator
+#   How:   Leave as "sqlite" for quickstart. No GCP credentials required.
+DATABASE_TYPE=sqlite
+
+# SQLITE_DB_PATH
+#   What:  Container-internal path to the SQLite file (only used when
+#          DATABASE_TYPE=sqlite). docker-compose bind-mounts your host's
+#          ~/.specflow/ directory here — one central database shared across
+#          every local project/MCP session on this machine, the same way the
+#          Firestore emulator used to be one shared local instance.
+#   How:   Leave as-is unless you change the docker-compose bind-mount.
+SQLITE_DB_PATH=/root/.specflow/specflow.db
+
+# SPECFLOW_HOME_MOUNT_PATH
+#   What:  Host-side directory bind-mounted into the backend container at
+#          SQLITE_DB_PATH's parent. Must be on real (block) storage, never
+#          NFS/Filestore — SQLite's file locking is unsafe over NFS.
+#   How:   Leave unset to default to ~/.specflow.
+# SPECFLOW_HOME_MOUNT_PATH=
 
 # FIRESTORE_EMULATOR_HOST
-#   What:  Network address of the Firestore emulator. Only used when
-#          DATABASE_TYPE=emulator.
-#   How:   Leave as "localhost:8080". Host-side scripts (init_firestore.py)
-#          connect here. Inside the backend container, docker-compose
-#          automatically injects "firestore-emulator:8080".
+#   What:  Network address of a manually-run Firestore emulator. Only used
+#          when DATABASE_TYPE=emulator.
+#   How:   Leave as "localhost:8080" if you run one yourself; otherwise unused.
 FIRESTORE_EMULATOR_HOST=localhost:8080
 
 # GCP_PROJECT_ID
-#   What:  GCP project identifier passed to the Firestore client.
-#          For the emulator this is an arbitrary string (not validated).
-#          For production Firestore, use your real GCP project ID.
-#   How:   Leave as "local-dev" for quickstart.
+#   What:  GCP project identifier passed to the Firestore client. Only used
+#          when DATABASE_TYPE=firestore or emulator.
+#   How:   Leave as "local-dev" for quickstart (sqlite ignores this).
 GCP_PROJECT_ID=local-dev
 
 # FIRESTORE_DATABASE_NAME
-#   What:  Logical database name within the Firestore project.
-#   How:   Leave as "specflow" for quickstart.
+#   What:  Logical database name within the Firestore project. Only used
+#          when DATABASE_TYPE=firestore or emulator.
+#   How:   Leave as "specflow" for quickstart (sqlite ignores this).
 FIRESTORE_DATABASE_NAME=specflow
 
 # WORKSPACE_BASE_PATH / AGENT_LOGS_BASE_PATH
@@ -82,13 +99,6 @@ FIRESTORE_DATABASE_NAME=specflow
 WORKSPACE_BASE_PATH=/workspaces
 AGENT_LOGS_BASE_PATH=/agent_logs
 
-# FIRESTORE_EXPORT_INTERVAL_SECONDS
-#   What:  How often (in seconds) the emulator sidecar exports Firestore state
-#          to ./workspaces/firestore_emulator/ for crash durability. On restart
-#          the emulator restores from the last export automatically.
-#   How:   Leave as 25. Reset all exported data with: ./specflow-init.sh --reset-local-db
-FIRESTORE_EXPORT_INTERVAL_SECONDS=25
-
 # USER_EMAIL / GIT_USER_EMAIL
 #   What:  Your email identity used for MCP config, notification emails, and
 #          git commit authorship on workspaces.
diff --git a/backend/test/scripts/test_specflow_init_script.py b/backend/test/scripts/test_specflow_init_script.py
index 547c7c5..6d4b6d8 100644
--- a/backend/test/scripts/test_specflow_init_script.py
+++ b/backend/test/scripts/test_specflow_init_script.py
@@ -98,17 +98,19 @@ def test_makefile_test_stack_uses_separate_names_ports_and_isolated_sqlite_path(
 
 
 def test_specflow_init_defaults_local_firestore_database_name():
-    """Host-side seeding and Compose must agree on the quickstart database name."""
+    """Host-side seeding and Compose must agree on the hosted-Firestore database name
+    (only relevant when DATABASE_TYPE=firestore/emulator; sqlite ignores it)."""
     text = _script_text()
 
     assert 'export FIRESTORE_DATABASE_NAME="${FIRESTORE_DATABASE_NAME:-specflow}"' in text
 
 
-def test_reset_local_db_clears_persisted_emulator_export_only():
-    """A reset must clear the host export dir that survives docker compose down -v."""
+def test_reset_local_db_clears_sqlite_file_not_a_directory():
+    """A reset must clear the central SQLite file (+ WAL/SHM sidecars) that survives
+    docker compose down -v, guarded against clearing an unexpected path."""
     text = _script_text()
 
-    assert 'FIRESTORE_EMULATOR_DATA_DIR="${SCRIPT_DIR}/${_WORKSPACE_MOUNT_PATH#./}' in text
-    assert 'basename "${FIRESTORE_EMULATOR_DATA_DIR}"' in text
-    assert '!= "firestore_emulator"' in text
-    assert 'rm -rf "${FIRESTORE_EMULATOR_DATA_DIR}"' in text
+    assert 'SPECFLOW_HOME_PATH="${SPECFLOW_HOME_MOUNT_PATH:-${HOME}/.specflow}"' in text
+    assert 'basename "${SPECFLOW_HOME_PATH}"' in text
+    assert '!= ".specflow"' in text
+    assert 'rm -f "${SPECFLOW_HOME_PATH}/specflow.db"' in text
diff --git a/mcp_server/cli.py b/mcp_server/cli.py
index e3b7355..ac716e5 100644
--- a/mcp_server/cli.py
+++ b/mcp_server/cli.py
@@ -530,7 +530,7 @@ def _build_parser() -> argparse.ArgumentParser:
         "--reset-local-db",
         action="store_true",
         dest="reset_local_db",
-        help="Reset the local Firestore emulator data before seeding",
+        help="Reset the local SQLite database before seeding",
     )
     p_init.add_argument(
         "--provide-own-repos",
diff --git a/mcp_server/services/local_env.py b/mcp_server/services/local_env.py
index 503c91b..5cb9c46 100644
--- a/mcp_server/services/local_env.py
+++ b/mcp_server/services/local_env.py
@@ -36,9 +36,10 @@
 _ENV_EXAMPLE_FILENAME = ".env.quickstart.example"
 _INIT_SCRIPT = "specflow-init.sh"
 
-# Mirror docker-compose.yml container-name env-var defaults.
+# Mirror docker-compose.yml container-name env-var defaults. SQLite is the local/Docker
+# default and has no separate container (it's a bind-mounted file); the Firestore emulator
+# is no longer started by docker-compose, so only the backend container is checked.
 _BACKEND_CONTAINER_DEFAULT = "specflow-backend"
-_FIRESTORE_CONTAINER_DEFAULT = "specflow-firestore-emulator"
 
 
 # ---------------------------------------------------------------------------
@@ -189,20 +190,18 @@ def is_setup_complete(root: Path) -> bool:
 # ---------------------------------------------------------------------------
 
 
-def _container_names() -> tuple[str, str]:
-    backend = os.getenv("SPECFLOW_BACKEND_CONTAINER", _BACKEND_CONTAINER_DEFAULT)
-    firestore = os.getenv("SPECFLOW_FIRESTORE_CONTAINER", _FIRESTORE_CONTAINER_DEFAULT)
-    return backend, firestore
+def _container_name() -> str:
+    return os.getenv("SPECFLOW_BACKEND_CONTAINER", _BACKEND_CONTAINER_DEFAULT)
 
 
 def containers_running(root: Path | None = None) -> bool:
-    """True iff BOTH SpecFlow containers are currently running.
+    """True iff the SpecFlow backend container is currently running.
 
-    Uses ``docker ps`` filtered by the compose container names. A missing docker
+    Uses ``docker ps`` filtered by the compose container name. A missing docker
     CLI or any error is treated as "not running" (the caller then offers to
     start them, which surfaces the real failure with streamed output).
     """
-    backend, firestore = _container_names()
+    backend = _container_name()
     try:
         completed = subprocess.run(
             [
@@ -210,8 +209,6 @@ def containers_running(root: Path | None = None) -> bool:
                 "ps",
                 "--filter",
                 f"name={backend}",
-                "--filter",
-                f"name={firestore}",
                 "--format",
                 "{{.Names}}",
             ],
@@ -224,7 +221,7 @@ def containers_running(root: Path | None = None) -> bool:
     if completed.returncode != 0:
         return False
     names = {line.strip() for line in completed.stdout.splitlines() if line.strip()}
-    return backend in names and firestore in names
+    return backend in names
 
 
 # ---------------------------------------------------------------------------
@@ -394,7 +391,7 @@ async def run_init(
 ) -> int:
     """Run ``bash ./specflow-init.sh <flags>`` from ``root``, streaming output.
 
-    The script owns all state mutation (docker up, repo provisioning, firestore
+    The script owns all state mutation (docker up, repo provisioning, database
     seed, mcp-config write); this only invokes and streams it. Returns the exit
     code.
     """
diff --git a/mcp_server/tests/e2e/runner.py b/mcp_server/tests/e2e/runner.py
index 4996110..1488673 100644
--- a/mcp_server/tests/e2e/runner.py
+++ b/mcp_server/tests/e2e/runner.py
@@ -116,7 +116,8 @@ def assert_files(base_dir: Path, required_globs: list[str]) -> None:
 def resolve_api_credentials() -> tuple[str, str]:
     """
     Return (SPECFLOW_API_KEY, USER_EMAIL): from env vars if both are set, otherwise
-    fetch from the local Firestore emulator by running scripts/get-api-key.py.
+    fetch from the active database backend (sqlite by default) by running
+    scripts/get-api-key.py.
     """
     key = os.environ.get("SPECFLOW_API_KEY", "").strip()
     email = os.environ.get("USER_EMAIL", "").strip()
@@ -130,10 +131,13 @@ def resolve_api_credentials() -> tuple[str, str]:
         cwd=str(REPO_ROOT / "backend"),
         env={
             **os.environ,
+            "DATABASE_TYPE": os.getenv("DATABASE_TYPE", "sqlite"),
+            "SQLITE_DB_PATH": os.getenv(
+                "SQLITE_DB_PATH", str(Path.home() / ".specflow" / "specflow.db")
+            ),
             "FIRESTORE_EMULATOR_HOST": os.getenv("FIRESTORE_EMULATOR_HOST", "localhost:8080"),
             "GCP_PROJECT_ID": os.getenv("GCP_PROJECT_ID", "local-dev"),
             "FIRESTORE_DATABASE_NAME": os.getenv("FIRESTORE_DATABASE_NAME", "specflow"),
-            "DATABASE_TYPE": "emulator",
         },
     )
     if result.returncode != 0:
@@ -158,7 +162,7 @@ def resolve_api_credentials() -> tuple[str, str]:
         return found_key, found_email
 
     raise RuntimeError(
-        "SPECFLOW_API_KEY/USER_EMAIL not set and could not be fetched from Firestore emulator.\n"
+        "SPECFLOW_API_KEY/USER_EMAIL not set and could not be fetched from the database.\n"
         f"get-api-key.py stdout:\n{result.stdout}\n"
         f"get-api-key.py stderr:\n{result.stderr}"
     )
diff --git a/mcp_server/tests/test_local_env.py b/mcp_server/tests/test_local_env.py
index 5b80843..d3a14ab 100644
--- a/mcp_server/tests/test_local_env.py
+++ b/mcp_server/tests/test_local_env.py
@@ -158,20 +158,22 @@ def test_is_setup_complete_requires_both(self, tmp_path):
 
 
 class TestContainersRunning:
+    """SQLite has no separate container (bind-mounted file); only the backend
+    container's presence determines readiness."""
+
     def _run(self, stdout: str, returncode: int = 0):
         from types import SimpleNamespace
 
         return SimpleNamespace(stdout=stdout, returncode=returncode)
 
-    def test_both_present(self):
-        out = "specflow-backend\nspecflow-firestore-emulator\n"
-        with patch("services.local_env.subprocess.run", return_value=self._run(out)):
-            assert local_env.containers_running() is True
-
-    def test_one_missing(self):
+    def test_backend_present(self):
         with patch(
             "services.local_env.subprocess.run", return_value=self._run("specflow-backend\n")
         ):
+            assert local_env.containers_running() is True
+
+    def test_backend_missing(self):
+        with patch("services.local_env.subprocess.run", return_value=self._run("")):
             assert local_env.containers_running() is False
 
     def test_nonzero_returncode(self):
diff --git a/specflow-init.sh b/specflow-init.sh
index 87a4b31..08089e8 100755
--- a/specflow-init.sh
+++ b/specflow-init.sh
@@ -10,9 +10,9 @@
 #   1. Load .env (user must have copied .env.quickstart.example → .env)
 #   2. Generate TOKEN_ENCRYPTION_KEY if blank (via Fernet, never echoed)
 #   3. Create .specflow-local/ and write workspaces.json, init.log, mcp-config.json
-#   4. Start the backend stack only (emulator + backend; NOT mcp-server profile)
+#   4. Start the backend stack only (backend + sqlite; NOT mcp-server profile)
 #   5. Health-gate: poll /health/ready before seeding
-#   6. Seed Firestore via init_db.py
+#   6. Seed the local SQLite database via init_db.py
 #   7. Write .specflow-local/mcp-config.json (keyless IDE MCP-client snippet)
 #   8. Install the local SpecFlow CLI entry point
 #   9. Print manual-install instruction for the user
@@ -60,7 +60,7 @@ Options:
   --provide-own-repos LIST
                           Comma-separated bare repo names (or org/repo) instead of
                           creating new repos; requires at least K×3 entries
-  --reset-local-db        Reset local Firestore emulator data before seeding
+  --reset-local-db        Reset the local SQLite database before seeding
   -h, -H, --help          Show this help message and exit
 
 Examples:
@@ -159,18 +159,19 @@ error() {
     exit 1
 }
 
-clear_firestore_emulator_data_dir() {
-    if [[ -z "${FIRESTORE_EMULATOR_DATA_DIR:-}" ]]; then
-        error "FIRESTORE_EMULATOR_DATA_DIR is empty; refusing to clear local emulator data."
+clear_local_sqlite_db() {
+    if [[ -z "${SPECFLOW_HOME_PATH:-}" ]]; then
+        error "SPECFLOW_HOME_PATH is empty; refusing to clear the local database."
     fi
-    if [[ "$(basename "${FIRESTORE_EMULATOR_DATA_DIR}")" != "firestore_emulator" ]]; then
-        error "Refusing to clear unexpected Firestore emulator data path: ${FIRESTORE_EMULATOR_DATA_DIR}"
+    if [[ "$(basename "${SPECFLOW_HOME_PATH}")" != ".specflow" ]]; then
+        error "Refusing to clear unexpected SpecFlow home path: ${SPECFLOW_HOME_PATH}"
     fi
 
-    info "Clearing persisted Firestore emulator export at ${FIRESTORE_EMULATOR_DATA_DIR} ..."
-    log "INFO: rm -rf <firestore emulator data dir>"
-    rm -rf "${FIRESTORE_EMULATOR_DATA_DIR}"
-    mkdir -p "${FIRESTORE_EMULATOR_DATA_DIR}"
+    info "Clearing local SQLite database at ${SPECFLOW_HOME_PATH}/specflow.db ..."
+    log "INFO: rm -f <sqlite db + wal/shm sidecars>"
+    rm -f "${SPECFLOW_HOME_PATH}/specflow.db" \
+          "${SPECFLOW_HOME_PATH}/specflow.db-wal" \
+          "${SPECFLOW_HOME_PATH}/specflow.db-shm"
 }
 
 set_env_value() {
@@ -225,13 +226,10 @@ set +a
 log "INFO: Loaded .env from ${ENV_FILE}"
 
 _WORKSPACE_MOUNT_PATH="${WORKSPACE_MOUNT_PATH:-./workspaces}"
-case "${_WORKSPACE_MOUNT_PATH}" in
-    /*) FIRESTORE_EMULATOR_DATA_DIR="${_WORKSPACE_MOUNT_PATH}/firestore_emulator" ;;
-    *) FIRESTORE_EMULATOR_DATA_DIR="${SCRIPT_DIR}/${_WORKSPACE_MOUNT_PATH#./}/firestore_emulator" ;;
-esac
+export SPECFLOW_HOME_PATH="${SPECFLOW_HOME_MOUNT_PATH:-${HOME}/.specflow}"
 export FIRESTORE_DATABASE_NAME="${FIRESTORE_DATABASE_NAME:-specflow}"
-log "INFO: Firestore emulator data dir: ${FIRESTORE_EMULATOR_DATA_DIR}"
-log "INFO: Firestore database name: ${FIRESTORE_DATABASE_NAME}"
+log "INFO: SpecFlow home (central SQLite db) dir: ${SPECFLOW_HOME_PATH}"
+log "INFO: Firestore database name (hosted-connect mode only): ${FIRESTORE_DATABASE_NAME}"
 
 # ---------------------------------------------------------------------------
 # Step 1b: Derive local LLM provider
@@ -348,13 +346,13 @@ if [[ -z "${TOKEN_ENCRYPTION_KEY:-}" ]]; then
 fi
 
 # ---------------------------------------------------------------------------
-# Step 3: Start the backend stack (emulator + backend; NOT mcp-server profile)
+# Step 3: Start the backend stack (backend + sqlite; NOT mcp-server profile)
 # ---------------------------------------------------------------------------
 if [[ "${DRY_RUN}" == "true" ]]; then
-    info "[DRY RUN] Would start backend stack: docker compose up -d (emulator + backend)"
+    info "[DRY RUN] Would start backend stack: docker compose up -d (backend)"
     info "[DRY RUN] Would skip mcp-server profile (IDE-side only)."
     if [[ "${RESET_LOCAL_DB}" == "true" ]]; then
-        info "[DRY RUN] Would clear persisted Firestore emulator export at ${FIRESTORE_EMULATOR_DATA_DIR}"
+        info "[DRY RUN] Would clear the local SQLite database at ${SPECFLOW_HOME_PATH}/specflow.db"
     fi
 else
     BUILD_FLAG=""
@@ -370,10 +368,10 @@ else
         info "Resetting local state (--reset-local-db): stopping and removing containers+volumes ..."
         log "INFO: docker compose down -v"
         docker compose -f "${SCRIPT_DIR}/docker-compose.yml" down -v > >(log_stream) 2> >(log_stream) || true
-        clear_firestore_emulator_data_dir
+        clear_local_sqlite_db
     fi
 
-    info "Starting emulator + backend (no mcp-server profile) ..."
+    info "Starting backend (sqlite; no mcp-server profile) ..."
     log "INFO: docker compose up -d"
     docker compose -f "${SCRIPT_DIR}/docker-compose.yml" up -d > >(log_stream) 2> >(log_stream) \
         || error "docker compose up failed. Check ${LOG_FILE} for details."
@@ -483,9 +481,10 @@ else
 fi
 
 # ---------------------------------------------------------------------------
-# Step 6: Seed Firestore emulator from durable workspace config
+# Step 6: Seed the active database from durable workspace config
 # ---------------------------------------------------------------------------
-_FIRESTORE_HOST="localhost:8080"
+_DATABASE_TYPE="${DATABASE_TYPE:-sqlite}"
+_SQLITE_DB_PATH="${SQLITE_DB_PATH:-${SPECFLOW_HOME_PATH}/specflow.db}"
 
 # Guard (H): the provisioning steps above always write workspaces.json. A missing file
 # here means an anomalous failure — refuse to seed an empty pool.
@@ -500,18 +499,20 @@ if [[ "${RESET_LOCAL_DB}" == "true" ]]; then
 fi
 
 if [[ "${DRY_RUN}" == "true" ]]; then
-    info "[DRY RUN] Would run: FIRESTORE_EMULATOR_HOST=${_FIRESTORE_HOST} uv run scripts/init_db.py --dry-run ${_SEED_FLAGS[*]}"
+    info "[DRY RUN] Would run: DATABASE_TYPE=${_DATABASE_TYPE} uv run scripts/init_db.py --dry-run ${_SEED_FLAGS[*]}"
     log "INFO: [DRY RUN] init_db.py --dry-run ${_SEED_FLAGS[*]}"
 else
-    info "Seeding Firestore emulator ..."
-    log "INFO: Running init_db.py ${_SEED_FLAGS[*]} against ${_FIRESTORE_HOST}"
+    info "Seeding the ${_DATABASE_TYPE} database ..."
+    log "INFO: Running init_db.py ${_SEED_FLAGS[*]} (DATABASE_TYPE=${_DATABASE_TYPE})"
     (
         cd "${SCRIPT_DIR}/backend"
-        FIRESTORE_EMULATOR_HOST="${_FIRESTORE_HOST}" \
+        DATABASE_TYPE="${_DATABASE_TYPE}" \
+        SQLITE_DB_PATH="${_SQLITE_DB_PATH}" \
+        FIRESTORE_EMULATOR_HOST="${FIRESTORE_EMULATOR_HOST:-localhost:8080}" \
             uv run scripts/init_db.py "${_SEED_FLAGS[@]}"
     ) > >(log_stream) 2> >(log_stream) \
         || error "init_db.py failed. Check ${LOG_FILE} for details."
-    info "Firestore seeded successfully."
+    info "Database seeded successfully."
 fi
 
 # ---------------------------------------------------------------------------

From 6712186e43be01f10f671ae04f92eeec8b92dac0 Mon Sep 17 00:00:00 2001
From: Mateusz Konopelski <mkonopelski@griddynamics.com>
Date: Wed, 1 Jul 2026 16:44:44 +0200
Subject: [PATCH 3/8] docs: update QUICKSTART and ARCHITECTURE for sqlite
 default
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up to the sqlite migration — these still described the removed
Firestore emulator.
---
 QUICKSTART.md        |  2 +-
 docs/ARCHITECTURE.md | 13 +++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/QUICKSTART.md b/QUICKSTART.md
index 519c71c..73d5552 100644
--- a/QUICKSTART.md
+++ b/QUICKSTART.md
@@ -148,7 +148,7 @@ Stop the local stack:
 docker compose down --timeout 90
 ```
 
-Reset local Firestore state and reseed:
+Reset the local SQLite database and reseed:
 
 ```bash
 specflow init --reset-local-db
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index 55140cd..d0529f2 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -38,7 +38,7 @@
 - **Services** (`backend/app/services/`): GenerationService, WorkspacePoolService, GenerationRetryService, ContractValidator, CrashRecoveryService, GitArchiveService, ClaudeCodeService.
 - **State Machines** (`backend/app/state/`): GenerationSM (PENDING→INITIALIZING→RUNNING→COMPLETED/FAILED), WorkspaceSM (AVAILABLE→ALLOCATED→CLEANING→AVAILABLE). Only writer of status/checkpoint. CI enforced.
 - **Workflows** (`backend/app/workflows/`): `generate_app_workflow` (KB init, parallel codegen, deploy/E2E, P10Y). No backend spec-analysis or planning agents after PR255.
-- **Database** (`backend/app/database/`): IDatabase interface — Firestore (prod), Emulator (local), InMemory (tests).
+- **Database** (`backend/app/database/`): IDatabase interface — SQLite (local/Docker default), Firestore (prod, or connecting to an already-hosted GCP instance), Emulator (manually-run Firestore emulator, not started by docker-compose), InMemory (tests).
 
 ## Data Flow
 
@@ -147,8 +147,9 @@ Legacy checkpoints (`planning_done`, `plan_synced`, `spec_check_done`) are not u
 
 **Self-hosted backend**: the backend can run on Kubernetes or Docker Compose with persistent workspace and log volumes sized for long-running code generation.
 
-**Local**: `make run` → Docker Compose (backend:8000 + firestore-emulator:8080).
-The emulator persists quickstart state with periodic sidecar exports plus native
-shutdown export under `./workspaces/firestore_emulator/`; generated outputs use
-`./workspaces/artifacts/`. Compose shutdown order keeps the emulator alive while
-the backend marks interrupted runs failed and the exporter takes a final snapshot.
+**Local**: `make run` → Docker Compose (backend:8000, SQLite). The backend container
+bind-mounts the host's `~/.specflow/` directory — one central SQLite database shared
+across every local project/MCP session on the machine (the local-dev analogue of the
+old shared Firestore emulator); generated outputs use `./workspaces/artifacts/`.
+`DATABASE_TYPE=firestore` connects to an already-hosted, GCP-managed Firestore instance
+instead — SpecFlow never deploys or manages Firestore itself locally.

From 641f203fbad5a3c0bfe869d11726b89172d527c4 Mon Sep 17 00:00:00 2001
From: Mateusz Konopelski <mkonopelski@griddynamics.com>
Date: Thu, 2 Jul 2026 10:31:40 +0200
Subject: [PATCH 4/8] =?UTF-8?q?fix(sqlite):=20address=20PR=20review=20?=
 =?UTF-8?q?=E2=80=94=20db=20subdir,=20docs,=20comment=20trims?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Nest specflow.db/-wal/-shm under a db/ subdirectory (~/.specflow/db/,
  container /root/.specflow/db/) instead of directly in ~/.specflow/, so the
  database files stay separate from other files (e.g. config.json) in that
  directory. Updated docker-compose.yml, config.py, Makefile, specflow-init.sh,
  .env.quickstart.example, and the mcp_server e2e runner default accordingly.
- docs/ARCHITECTURE.md: answer where ~/.specflow/ and its db/ subdir get
  created on a fresh install (Docker bind-mount + SqliteDatabase.__init__).
- Trim two over-long inline comments (backend/Dockerfile, local_env.py) to
  one-liners per review feedback.
---
 .env.quickstart.example                         | 17 +++++++++++------
 Makefile                                        |  8 ++++----
 backend/Dockerfile                              |  3 +--
 backend/app/core/config.py                      |  2 +-
 .../test/scripts/test_specflow_init_script.py   |  8 ++++----
 docker-compose.yml                              |  2 +-
 docs/ARCHITECTURE.md                            |  6 +++++-
 mcp_server/services/local_env.py                |  4 +---
 mcp_server/tests/e2e/runner.py                  |  2 +-
 specflow-init.sh                                | 12 ++++++------
 10 files changed, 35 insertions(+), 29 deletions(-)

diff --git a/.env.quickstart.example b/.env.quickstart.example
index 805aacf..4bc6478 100644
--- a/.env.quickstart.example
+++ b/.env.quickstart.example
@@ -59,17 +59,22 @@ DATABASE_TYPE=sqlite
 
 # SQLITE_DB_PATH
 #   What:  Container-internal path to the SQLite file (only used when
-#          DATABASE_TYPE=sqlite). docker-compose bind-mounts your host's
-#          ~/.specflow/ directory here — one central database shared across
+#          DATABASE_TYPE=sqlite), nested under a db/ subdir so the specflow.db /
+#          -wal / -shm files stay separate from other files (e.g. config.json)
+#          docker-compose bind-mounts into the same ~/.specflow/ host directory
+#          (SPECFLOW_HOME_MOUNT_PATH below) — one central database shared across
 #          every local project/MCP session on this machine, the same way the
-#          Firestore emulator used to be one shared local instance.
+#          Firestore emulator used to be one shared local instance. The parent
+#          dir is created automatically on first backend startup if missing.
 #   How:   Leave as-is unless you change the docker-compose bind-mount.
-SQLITE_DB_PATH=/root/.specflow/specflow.db
+SQLITE_DB_PATH=/root/.specflow/db/specflow.db
 
 # SPECFLOW_HOME_MOUNT_PATH
 #   What:  Host-side directory bind-mounted into the backend container at
-#          SQLITE_DB_PATH's parent. Must be on real (block) storage, never
-#          NFS/Filestore — SQLite's file locking is unsafe over NFS.
+#          /root/.specflow (SQLITE_DB_PATH's grandparent). Must be on real
+#          (block) storage, never NFS/Filestore — SQLite's file locking is
+#          unsafe over NFS. Docker creates this host directory automatically
+#          on first `docker compose up` if it doesn't already exist.
 #   How:   Leave unset to default to ~/.specflow.
 # SPECFLOW_HOME_MOUNT_PATH=
 
diff --git a/Makefile b/Makefile
index fc2ee6a..44ae994 100644
--- a/Makefile
+++ b/Makefile
@@ -20,7 +20,7 @@ DATABASE_TYPE ?= sqlite
 # the old shared Firestore-emulator model. Host-side scripts (init_db.py, tests) read/write
 # the exact same file directly; no docker exec needed.
 SPECFLOW_HOME_PATH ?= $(HOME)/.specflow
-SQLITE_DB_PATH ?= $(SPECFLOW_HOME_PATH)/specflow.db
+SQLITE_DB_PATH ?= $(SPECFLOW_HOME_PATH)/db/specflow.db
 FIRESTORE_EMULATOR_HOST ?= localhost:8080
 # Must match docker-compose.yml defaults so host-side seeding/tests see the same
 # named Firestore database as the backend container (quickstart sets these via specflow-init.sh).
@@ -51,7 +51,7 @@ endif
 # (quickstart uses the default project + ./workspaces + ~/.specflow). So a test run never
 # clobbers a running quickstart OR the real central SQLite database: `make stop` tears down
 # the test stack and removes its ephemeral ./.specflow-test state (which nests its own
-# isolated specflow-home/specflow.db, cleaned up the same way).
+# isolated specflow-home/db/specflow.db, cleaned up the same way).
 #
 # Applied as target-specific *exported* vars so they also reach prerequisite targets
 # (run-detached / run-detached-skip) and sub-makes (`$(MAKE) stop`, `$(MAKE) e2e-setup`).
@@ -66,7 +66,7 @@ $(TEST_STACK_TARGETS): export SPECFLOW_MCP_CONTAINER := specflow-test-mcp-server
 $(TEST_STACK_TARGETS): export SPECFLOW_BACKEND_PORT := 18000
 $(TEST_STACK_TARGETS): export BACKEND_URL := http://localhost:18000
 $(TEST_STACK_TARGETS): export SPECFLOW_HOME_MOUNT_PATH := $(TEST_SPECFLOW_HOME_PATH)
-$(TEST_STACK_TARGETS): export SQLITE_DB_PATH := $(TEST_SPECFLOW_HOME_PATH)/specflow.db
+$(TEST_STACK_TARGETS): export SQLITE_DB_PATH := $(TEST_SPECFLOW_HOME_PATH)/db/specflow.db
 $(TEST_STACK_TARGETS): export GCP_PROJECT_ID := $(GCP_PROJECT_ID)
 $(TEST_STACK_TARGETS): export FIRESTORE_DATABASE_NAME := $(FIRESTORE_DATABASE_NAME)
 
@@ -487,7 +487,7 @@ help:
 	@echo "  make ops-retry-run generation_id=X BACKEND_URL=http://host:8000  - Override backend URL"
 	@echo ""
 	@echo "Database Modes:"
-	@echo "  DEV mode:    Uses SQLite (no GCP credentials needed, single central db at ~/.specflow/specflow.db)"
+	@echo "  DEV mode:    Uses SQLite (no GCP credentials needed, single central db at ~/.specflow/db/specflow.db)"
 	@echo "  Override:    DATABASE_TYPE=firestore to connect to an already-hosted GCP Firestore instance"
 	@echo "               DATABASE_TYPE=emulator to connect to a manually-run Firestore emulator process"
 	@echo ""
diff --git a/backend/Dockerfile b/backend/Dockerfile
index 1a84c6b..9d2c0b9 100644
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -104,8 +104,7 @@ RUN uv sync --frozen --no-dev || uv sync --no-dev && \
 
 # Copy application code (preserve the app directory structure)
 COPY app ./app
-# scripts/ (init_db.py etc.) — needed at runtime via `docker compose exec backend
-# .venv/bin/python scripts/init_db.py` (sqlite has no exposed port for host-side seeding).
+# scripts/ (init_db.py etc.) — needed at runtime via `docker compose exec backend`.
 COPY scripts ./scripts
 
 # Create .claude directory and copy settings to root's home directory
diff --git a/backend/app/core/config.py b/backend/app/core/config.py
index 98c8fa2..44c4073 100644
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -231,7 +231,7 @@ def _empty_str_to_none_int(cls, v: object) -> object:
     # Container-side path where docker-compose bind-mounts the host's ~/.specflow/ directory
     # (one central database shared across every local project/MCP session, like the Firestore
     # emulator used to be). MUST be on block storage, never NFS/Filestore.
-    SQLITE_DB_PATH: str = "/root/.specflow/specflow.db"
+    SQLITE_DB_PATH: str = "/root/.specflow/db/specflow.db"
 
     # LLM Provider Configuration
     # Active LLM provider: "openrouter" (default) or "anthropic".
diff --git a/backend/test/scripts/test_specflow_init_script.py b/backend/test/scripts/test_specflow_init_script.py
index 6d4b6d8..439fe75 100644
--- a/backend/test/scripts/test_specflow_init_script.py
+++ b/backend/test/scripts/test_specflow_init_script.py
@@ -66,13 +66,13 @@ def test_compose_backend_bind_mounts_specflow_home_for_sqlite():
     text = _compose_text()
 
     assert "DATABASE_TYPE=${DATABASE_TYPE:-sqlite}" in text
-    assert "SQLITE_DB_PATH=${SQLITE_DB_PATH:-/root/.specflow/specflow.db}" in text
+    assert "SQLITE_DB_PATH=${SQLITE_DB_PATH:-/root/.specflow/db/specflow.db}" in text
     assert "${SPECFLOW_HOME_MOUNT_PATH:-${HOME}/.specflow}:/root/.specflow:rw" in text
 
 
 def test_makefile_test_stack_uses_separate_names_ports_and_isolated_sqlite_path():
     """Integration/E2E stacks must not collide with quickstart containers, host ports, or the
-    real central SQLite database at ~/.specflow/specflow.db."""
+    real central SQLite database at ~/.specflow/db/specflow.db."""
     makefile_text = (Path(__file__).resolve().parents[3] / "Makefile").read_text()
 
     assert "$(TEST_STACK_TARGETS): export SPECFLOW_BACKEND_PORT := 18000" in makefile_text
@@ -87,7 +87,7 @@ def test_makefile_test_stack_uses_separate_names_ports_and_isolated_sqlite_path(
         in makefile_text
     )
     assert (
-        "$(TEST_STACK_TARGETS): export SQLITE_DB_PATH := $(TEST_SPECFLOW_HOME_PATH)/specflow.db"
+        "$(TEST_STACK_TARGETS): export SQLITE_DB_PATH := $(TEST_SPECFLOW_HOME_PATH)/db/specflow.db"
         in makefile_text
     )
     # The isolated sqlite-home path must nest under the test workspace mount so `make stop`'s
@@ -113,4 +113,4 @@ def test_reset_local_db_clears_sqlite_file_not_a_directory():
     assert 'SPECFLOW_HOME_PATH="${SPECFLOW_HOME_MOUNT_PATH:-${HOME}/.specflow}"' in text
     assert 'basename "${SPECFLOW_HOME_PATH}"' in text
     assert '!= ".specflow"' in text
-    assert 'rm -f "${SPECFLOW_HOME_PATH}/specflow.db"' in text
+    assert 'rm -f "${SPECFLOW_HOME_PATH}/db/specflow.db"' in text
diff --git a/docker-compose.yml b/docker-compose.yml
index 3bb9a5b..e2ab85f 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -41,7 +41,7 @@ services:
       # GCP-managed Firestore instance (SpecFlow never deploys/manages Firestore itself),
       # or DATABASE_TYPE=emulator to connect to a manually-run Firestore emulator process.
       - DATABASE_TYPE=${DATABASE_TYPE:-sqlite}
-      - SQLITE_DB_PATH=${SQLITE_DB_PATH:-/root/.specflow/specflow.db}
+      - SQLITE_DB_PATH=${SQLITE_DB_PATH:-/root/.specflow/db/specflow.db}
       - FIRESTORE_EMULATOR_HOST=${FIRESTORE_EMULATOR_HOST}
       - GCP_PROJECT_ID=${GCP_PROJECT_ID:-local-dev}
       - FIRESTORE_DATABASE_NAME=${FIRESTORE_DATABASE_NAME:-specflow}
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index d0529f2..26b35eb 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -150,6 +150,10 @@ Legacy checkpoints (`planning_done`, `plan_synced`, `spec_check_done`) are not u
 **Local**: `make run` → Docker Compose (backend:8000, SQLite). The backend container
 bind-mounts the host's `~/.specflow/` directory — one central SQLite database shared
 across every local project/MCP session on the machine (the local-dev analogue of the
-old shared Firestore emulator); generated outputs use `./workspaces/artifacts/`.
+old shared Firestore emulator), with the db file itself nested at `~/.specflow/db/specflow.db`
+so it stays separate from other files (e.g. `config.json`) in that directory. Neither
+directory needs to pre-exist: Docker creates the host `~/.specflow/` bind-mount path on
+first `docker compose up` if missing, and `SqliteDatabase.__init__` creates the `db/`
+subdirectory on first backend connection. Generated outputs use `./workspaces/artifacts/`.
 `DATABASE_TYPE=firestore` connects to an already-hosted, GCP-managed Firestore instance
 instead — SpecFlow never deploys or manages Firestore itself locally.
diff --git a/mcp_server/services/local_env.py b/mcp_server/services/local_env.py
index 5cb9c46..0f00886 100644
--- a/mcp_server/services/local_env.py
+++ b/mcp_server/services/local_env.py
@@ -36,9 +36,7 @@
 _ENV_EXAMPLE_FILENAME = ".env.quickstart.example"
 _INIT_SCRIPT = "specflow-init.sh"
 
-# Mirror docker-compose.yml container-name env-var defaults. SQLite is the local/Docker
-# default and has no separate container (it's a bind-mounted file); the Firestore emulator
-# is no longer started by docker-compose, so only the backend container is checked.
+# Mirror docker-compose.yml container-name env-var defaults; sqlite has no separate container.
 _BACKEND_CONTAINER_DEFAULT = "specflow-backend"
 
 
diff --git a/mcp_server/tests/e2e/runner.py b/mcp_server/tests/e2e/runner.py
index 1488673..a3a4bae 100644
--- a/mcp_server/tests/e2e/runner.py
+++ b/mcp_server/tests/e2e/runner.py
@@ -133,7 +133,7 @@ def resolve_api_credentials() -> tuple[str, str]:
             **os.environ,
             "DATABASE_TYPE": os.getenv("DATABASE_TYPE", "sqlite"),
             "SQLITE_DB_PATH": os.getenv(
-                "SQLITE_DB_PATH", str(Path.home() / ".specflow" / "specflow.db")
+                "SQLITE_DB_PATH", str(Path.home() / ".specflow" / "db" / "specflow.db")
             ),
             "FIRESTORE_EMULATOR_HOST": os.getenv("FIRESTORE_EMULATOR_HOST", "localhost:8080"),
             "GCP_PROJECT_ID": os.getenv("GCP_PROJECT_ID", "local-dev"),
diff --git a/specflow-init.sh b/specflow-init.sh
index 08089e8..8cdb256 100755
--- a/specflow-init.sh
+++ b/specflow-init.sh
@@ -167,11 +167,11 @@ clear_local_sqlite_db() {
         error "Refusing to clear unexpected SpecFlow home path: ${SPECFLOW_HOME_PATH}"
     fi
 
-    info "Clearing local SQLite database at ${SPECFLOW_HOME_PATH}/specflow.db ..."
+    info "Clearing local SQLite database at ${SPECFLOW_HOME_PATH}/db/specflow.db ..."
     log "INFO: rm -f <sqlite db + wal/shm sidecars>"
-    rm -f "${SPECFLOW_HOME_PATH}/specflow.db" \
-          "${SPECFLOW_HOME_PATH}/specflow.db-wal" \
-          "${SPECFLOW_HOME_PATH}/specflow.db-shm"
+    rm -f "${SPECFLOW_HOME_PATH}/db/specflow.db" \
+          "${SPECFLOW_HOME_PATH}/db/specflow.db-wal" \
+          "${SPECFLOW_HOME_PATH}/db/specflow.db-shm"
 }
 
 set_env_value() {
@@ -352,7 +352,7 @@ if [[ "${DRY_RUN}" == "true" ]]; then
     info "[DRY RUN] Would start backend stack: docker compose up -d (backend)"
     info "[DRY RUN] Would skip mcp-server profile (IDE-side only)."
     if [[ "${RESET_LOCAL_DB}" == "true" ]]; then
-        info "[DRY RUN] Would clear the local SQLite database at ${SPECFLOW_HOME_PATH}/specflow.db"
+        info "[DRY RUN] Would clear the local SQLite database at ${SPECFLOW_HOME_PATH}/db/specflow.db"
     fi
 else
     BUILD_FLAG=""
@@ -484,7 +484,7 @@ fi
 # Step 6: Seed the active database from durable workspace config
 # ---------------------------------------------------------------------------
 _DATABASE_TYPE="${DATABASE_TYPE:-sqlite}"
-_SQLITE_DB_PATH="${SQLITE_DB_PATH:-${SPECFLOW_HOME_PATH}/specflow.db}"
+_SQLITE_DB_PATH="${SQLITE_DB_PATH:-${SPECFLOW_HOME_PATH}/db/specflow.db}"
 
 # Guard (H): the provisioning steps above always write workspaces.json. A missing file
 # here means an anomalous failure — refuse to seed an empty pool.

From f95609bc29919b5f2534b1bb78e68c2729c8f721 Mon Sep 17 00:00:00 2001
From: Mateusz Konopelski <mkonopelski@griddynamics.com>
Date: Thu, 2 Jul 2026 10:44:55 +0200
Subject: [PATCH 5/8] fix(backend): default init_db.py to sqlite; finish
 backend docs migration

- init_db.py: default DATABASE_TYPE to sqlite (not memory) when unset, matching
  the sqlite-is-the-default intent everywhere else. Previously, running the
  script directly without exporting DATABASE_TYPE silently fell back to the
  ephemeral in-memory backend.
- scripts/README.md: documents the sqlite default; drops the stale "defaults
  to memory, data won't persist" warning.
- docs/backend/ARCHITECTURE.md, docs/backend/DEVELOPMENT.md: backend
  contributor docs updated for the sqlite migration (missed in the earlier
  top-level docs/ARCHITECTURE.md and QUICKSTART.md pass), using the db/
  subdirectory path from the PR review fixup.
---
 backend/scripts/README.md    | 36 +++++++++++--------
 backend/scripts/init_db.py   |  7 ++--
 docs/backend/ARCHITECTURE.md | 46 +++++++++++++------------
 docs/backend/DEVELOPMENT.md  | 67 ++++++++++++++++++++----------------
 4 files changed, 88 insertions(+), 68 deletions(-)

diff --git a/backend/scripts/README.md b/backend/scripts/README.md
index 0079197..129dc83 100644
--- a/backend/scripts/README.md
+++ b/backend/scripts/README.md
@@ -1,18 +1,21 @@
 # Database Initialization Scripts
 
-This directory contains scripts for initializing and managing the Firestore database.
+This directory contains scripts for initializing and managing the SpecFlow database
+(SQLite locally by default, or Firestore for production / an already-hosted GCP instance).
 
 ## Overview
 
-The SpecFlow backend uses Firestore to manage:
+The SpecFlow backend manages:
 - **Workspace pool**: configured workspace sets for running generations
 - **Generations**: Generation jobs with state tracking and crash recovery
 
 ## Scripts
 
-### `init_firestore.py`
+### `init_db.py`
 
-Initializes the Firestore workspace pool from a required workspace-config JSON file.
+Initializes the active database's workspace pool from a required workspace-config JSON
+file. Backend-agnostic — it goes through the `IDatabase` abstraction, so the same script
+seeds sqlite, a manually-run emulator, or Firestore.
 
 **Usage:**
 
@@ -21,34 +24,37 @@ Initializes the Firestore workspace pool from a required workspace-config JSON f
 ```bash
 # IMPORTANT: Run from the backend/ directory (where pyproject.toml is located)
 
-# With Firestore Emulator (recommended for development)
+# SQLite (local/Docker default — no separate process needed)
 cd backend
-export FIRESTORE_EMULATOR_HOST=localhost:8080
-# DATABASE_TYPE=emulator is auto-set when FIRESTORE_EMULATOR_HOST is set
-uv run scripts/init_firestore.py --workspace-config ../my-test-repos.json
+export DATABASE_TYPE=sqlite
+uv run scripts/init_db.py --workspace-config ../my-test-repos.json
 
 # Dry run (show what would be done)
 cd backend
+uv run scripts/init_db.py --dry-run --workspace-config ../my-test-repos.json
+
+# Manually-run Firestore emulator
+cd backend
 export FIRESTORE_EMULATOR_HOST=localhost:8080
-uv run scripts/init_firestore.py --dry-run --workspace-config ../my-test-repos.json
+# DATABASE_TYPE=emulator is auto-set when FIRESTORE_EMULATOR_HOST is set
+uv run scripts/init_db.py --workspace-config ../my-test-repos.json
 
-# Production (BE CAREFUL!)
+# Production, or an already-hosted GCP instance (BE CAREFUL!)
 cd backend
 export GCP_PROJECT_ID=your-project-id
 export DATABASE_TYPE=firestore
-uv run scripts/init_firestore.py --prod --workspace-config ../my-test-repos.json
+uv run scripts/init_db.py --prod --workspace-config ../my-test-repos.json
 ```
 
 **Alternative (from project root):**
 ```bash
 # Using uv's --directory flag
-export FIRESTORE_EMULATOR_HOST=localhost:8080
-uv run --directory backend scripts/init_firestore.py --dry-run --workspace-config my-test-repos.json
+uv run --directory backend scripts/init_db.py --dry-run --workspace-config my-test-repos.json
 ```
 
 **Important Notes:**
-- The script automatically sets `DATABASE_TYPE=emulator` when `FIRESTORE_EMULATOR_HOST` is set
-- If `DATABASE_TYPE` is not set and `FIRESTORE_EMULATOR_HOST` is missing, it defaults to `memory` (data won't persist!)
+- Defaults to `sqlite` unless `DATABASE_TYPE` is set or `FIRESTORE_EMULATOR_HOST` triggers
+  emulator auto-detect
 - The script will show debug output about how many keys it finds in the database
 
 **What it does:**
diff --git a/backend/scripts/init_db.py b/backend/scripts/init_db.py
index f1e8f8c..60e4197 100755
--- a/backend/scripts/init_db.py
+++ b/backend/scripts/init_db.py
@@ -48,10 +48,13 @@
 # Add backend to path
 sys.path.insert(0, str(Path(__file__).parent.parent))
 
-# Set DATABASE_TYPE early if FIRESTORE_EMULATOR_HOST is set (before importing settings)
-# This ensures we use the emulator instead of in-memory database
+# Set DATABASE_TYPE early (before importing settings): emulator auto-detect takes
+# priority (manually-run emulator), otherwise default to sqlite (the local/Docker-dev
+# default) rather than the ephemeral in-memory fallback.
 if os.getenv("FIRESTORE_EMULATOR_HOST") and not os.getenv("DATABASE_TYPE"):
     os.environ["DATABASE_TYPE"] = "emulator"
+elif not os.getenv("DATABASE_TYPE"):
+    os.environ["DATABASE_TYPE"] = "sqlite"
 
 import httpx
 
diff --git a/docs/backend/ARCHITECTURE.md b/docs/backend/ARCHITECTURE.md
index 10e5553..18041bc 100644
--- a/docs/backend/ARCHITECTURE.md
+++ b/docs/backend/ARCHITECTURE.md
@@ -183,34 +183,36 @@ class DatabaseInterface(ABC):
 - No persistence
 - Thread-safe (asyncio locks)
 
-**2. EmulatorDatabase** - Firestore emulator
-- Local development
-- Realistic Firestore behavior
-- Fast startup
-- Data reset on restart
-
-**3. FirestoreDatabase** - Google Cloud Firestore
-- Production
+**2. SqliteDatabase** - Local file, no separate process
+- Local/Docker-dev default
+- Single-writer, WAL mode, real ACID transactions
+- Persists across restarts (bind-mounted at `~/.specflow/db/specflow.db`)
+- Not a production replacement for Firestore — no cross-node distributed locking
+
+**3. EmulatorDatabase** - Firestore emulator
+- Connects to a manually-run Firestore emulator process (docker-compose does not
+  start one)
+- Realistic Firestore behavior for anyone testing against real Firestore semantics
+
+**4. FirestoreDatabase** - Google Cloud Firestore
+- Production, or connecting to an already-hosted GCP-managed instance
 - Native transactions
 - Distributed locking
-- Persistent state
 
 ### Factory Pattern
 
 ```python
-# app/database/__init__.py
-def get_database(db_type: str = None) -> DatabaseInterface:
-    if db_type == "memory":
-        return MemoryDatabase()
-    elif db_type == "emulator":
-        return EmulatorDatabase()
-    elif db_type == "firestore":
-        return FirestoreDatabase()
-    else:
-        # Auto-detect from environment
-        if os.getenv("FIRESTORE_EMULATOR_HOST"):
-            return EmulatorDatabase()
-        return FirestoreDatabase()
+# app/database/factory.py
+def get_database() -> IDatabase:
+    db_type = settings.DATABASE_TYPE
+    if db_type == DatabaseType.MEMORY:
+        return InMemoryDatabase()
+    elif db_type == DatabaseType.SQLITE:
+        return SqliteDatabase(db_path=settings.SQLITE_DB_PATH)
+    elif db_type == DatabaseType.EMULATOR:
+        return EmulatorDatabase(...)
+    elif db_type == DatabaseType.FIRESTORE:
+        return FirestoreDatabase(...)
 ```
 
 ### Transaction Support
diff --git a/docs/backend/DEVELOPMENT.md b/docs/backend/DEVELOPMENT.md
index a272c5b..104dd6d 100644
--- a/docs/backend/DEVELOPMENT.md
+++ b/docs/backend/DEVELOPMENT.md
@@ -4,7 +4,7 @@
 >
 > **Just want to run SpecFlow locally, not develop it?** See the
 > [Local Self-Host Quickstart](../../QUICKSTART.md) — a one-command
-> bootstrap (`./specflow-init.sh`) that starts the stack and seeds the emulator without
+> bootstrap (`./specflow-init.sh`) that starts the stack and seeds the database without
 > the manual steps below. This guide is for contributors working on the backend itself.
 
 ## Table of Contents
@@ -26,7 +26,7 @@
 
 ### Required
 
-- **Docker** - For Firestore emulator and containerization
+- **Docker** - For running the backend container (SQLite is the local default; no separate database container needed)
 - **Make** - For command shortcuts
 - **Python 3.13+** - Latest Python version
 - **uv** - Fast Python package manager ([install](https://github.com/astral-sh/uv))
@@ -77,8 +77,8 @@ make run
 
 This starts:
 - **Backend API** on `http://localhost:8000`
-- **Firestore Emulator** on `http://localhost:8080`
-- **Firestore UI** on `http://localhost:4000` (optional)
+- **SQLite database** at `~/.specflow/db/specflow.db` on the host (bind-mounted into the
+  container) — no separate container or port
 
 ### 4. Verify Health
 
@@ -119,9 +119,10 @@ EMAIL_PASSWORD=<smtp-password>
 EMAIL_FROM=noreply@example.com
 
 # Database Configuration
-DATABASE_TYPE=emulator  # Options: emulator, firestore, memory
-FIRESTORE_EMULATOR_HOST=localhost:8080
-GCP_PROJECT_ID=local-dev
+DATABASE_TYPE=sqlite  # Options: sqlite, emulator, firestore, memory
+SQLITE_DB_PATH=/root/.specflow/db/specflow.db  # sqlite only
+FIRESTORE_EMULATOR_HOST=localhost:8080  # emulator only (manually-run process)
+GCP_PROJECT_ID=local-dev  # emulator/firestore only
 
 # Logging
 LOG_LEVEL=INFO  # Options: DEBUG, INFO, WARNING, ERROR
@@ -133,8 +134,11 @@ SKIP_AGENT_EXECUTION=false  # Set to true for fast workflow testing
 ### Database Type Options
 
 - **memory** - In-memory database (fast, for unit tests)
-- **emulator** - Firestore emulator (local development, integration tests)
-- **firestore** - Real Firestore backend for self-hosted deployments
+- **sqlite** - Local file, no separate process (local/Docker-dev default)
+- **emulator** - Connects to a manually-run Firestore emulator process (SpecFlow does
+  not start one for you)
+- **firestore** - Real Firestore backend for production, or an already-hosted GCP
+  instance you connect to directly
 
 ---
 
@@ -176,13 +180,9 @@ cd backend
 # Install dependencies
 uv sync
 
-# Start Firestore emulator (in separate terminal)
-docker run -p 8080:8080 google/cloud-sdk:latest \
-  gcloud beta emulators firestore start --host-port=0.0.0.0:8080
-
-# Set environment
-export DATABASE_TYPE=emulator
-export FIRESTORE_EMULATOR_HOST=localhost:8080
+# Set environment (sqlite needs no separate process)
+export DATABASE_TYPE=sqlite
+export SQLITE_DB_PATH=~/.specflow/db/specflow.db
 
 # Run backend
 uv run uvicorn app.main:app --reload --port 8000
@@ -216,7 +216,8 @@ In SKIP_MODE:
 ### Skip-mode E2E tests (workspace pool setup)
 
 `make skip-mode-e2e-tests` boots the isolated test stack in SKIP_MODE, prefills the test
-container's Firestore with a workspace pool, and drives the MCP tool sequence end to end.
+stack's database (its own isolated SQLite file, not the real central one) with a
+workspace pool, and drives the MCP tool sequence end to end.
 The test stack is ephemeral: every test setup starts by running `make stop`, which tears down
 the `specflow-test` compose project and removes `./.specflow-test` before starting again.
 
@@ -255,8 +256,8 @@ cp e2e-workspace-config.example.json my-test-repos.json
 make skip-mode-e2e-tests E2E_WORKSPACE_CONFIG=my-test-repos.json
 ```
 
-`E2E_WORKSPACE_CONFIG` is required by `make e2e-setup`, `make init-firestore`, and
-`make init-firestore-dry`. Schema:
+`E2E_WORKSPACE_CONFIG` is required by `make e2e-setup`, `make init-db`, and
+`make init-db-dry`. Schema:
 
 ```json
 [
@@ -286,7 +287,7 @@ make integration-tests
 ```
 
 **Characteristics:**
-- Uses Firestore emulator
+- Uses SQLite by default (override with `DATABASE_TYPE=emulator` or `firestore`)
 - Uses the isolated, ephemeral `specflow-test` Docker Compose project
 - Tests complete workflows
 - Validates database transactions
@@ -389,10 +390,11 @@ backend/
 │   │   │   └── workspaces.py    # Workspace management
 │   │   └── router.py        # API router configuration
 │   ├── database/            # Database abstraction layer
-│   │   ├── base.py          # Abstract database interface
+│   │   ├── interface.py     # Abstract database interface
 │   │   ├── memory.py        # In-memory implementation
+│   │   ├── sqlite.py        # SQLite implementation (local/Docker default)
 │   │   ├── firestore.py     # Firestore implementation
-│   │   └── emulator.py      # Emulator implementation
+│   │   └── emulator.py      # Emulator implementation (manually-run process)
 │   ├── middleware/          # FastAPI middleware
 │   │   ├── auth.py          # API key authentication
 │   │   └── logging.py       # Request/response logging
@@ -459,7 +461,7 @@ make test
 cd backend
 uv run pytest test/test_services/test_your_service.py -v
 
-# Test with Firestore emulator in the isolated ephemeral stack
+# Test against a real database (SQLite by default) in the isolated ephemeral stack
 make integration-tests
 ```
 
@@ -542,12 +544,17 @@ uv run uvicorn app.main:app --reload
 
 ### Database Inspection
 
-**Firestore UI** (when using emulator):
-- Open http://localhost:4000
-- Browse collections: `generations`, `workspaces`, `api_keys`
-- View documents, queries, indexes
+**SQLite** (local/Docker default) — the file is trivially inspectable, either from the
+host directly or via the running container:
+```bash
+sqlite3 ~/.specflow/db/specflow.db ".tables"
+sqlite3 ~/.specflow/db/specflow.db "SELECT doc_id, data FROM documents WHERE collection = 'workspaces' LIMIT 5;"
+
+# Or from inside the running container:
+docker compose exec backend sqlite3 /root/.specflow/db/specflow.db ".tables"
+```
 
-**Firestore CLI queries:**
+**Backend API queries** (works against any backend):
 ```bash
 # Get all generations
 curl http://localhost:8000/api/v1/generations \
@@ -605,7 +612,9 @@ curl http://localhost:8000/status \
 
 ### Add Database Migration
 
-SpecFlow uses Firestore (NoSQL), so migrations are schema-less. However, for data migrations:
+The `backend/scripts/migrations/migrate_*.py` scripts are one-time production data
+migrations against real GCP Firestore (NoSQL, so migrations are schema-less) — they call
+the Firestore SDK directly and don't apply to local SQLite. For data migrations:
 
 1. **Create migration script in `backend/scripts/migrate_xxx.py`**
 2. **Run manually via:**

From 3c08f6dc748651dca2102df80e28e3bcd8e3b11a Mon Sep 17 00:00:00 2001
From: Mateusz Konopelski <mkonopelski@griddynamics.com>
Date: Thu, 2 Jul 2026 11:31:49 +0200
Subject: [PATCH 6/8] fix(sqlite): seed against host DB path, not
 container-internal one
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Host-side seeding (uv run scripts/init_db.py) resolved SQLITE_DB_PATH from
.env, which is the container-internal path (/root/.specflow/db/specflow.db).
On the host that path is unwritable for non-root users (init_db.py crashes on
mkdir) and, even when writable, seeds a file the container never reads (it
reads the bind-mount source ~/.specflow/db/specflow.db) — so the backend boots
with an unseeded DB and LocalAuthMiddleware rejects every request. This broke
the from-scratch local quickstart.

Derive the host seed path from SPECFLOW_HOME_PATH (the bind-mount source), the
single knob (via SPECFLOW_HOME_MOUNT_PATH) shared by host and container. Comment
out the container-internal SQLITE_DB_PATH in .env.quickstart.example (already
defaulted by docker-compose) and add a regression test.

Addresses backend-review Finding #1 (CRITICAL).
---
 .env.quickstart.example                           |  7 +++++--
 backend/test/scripts/test_specflow_init_script.py | 14 ++++++++++++++
 specflow-init.sh                                  |  6 +++++-
 3 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/.env.quickstart.example b/.env.quickstart.example
index 4bc6478..a9983f0 100644
--- a/.env.quickstart.example
+++ b/.env.quickstart.example
@@ -66,8 +66,11 @@ DATABASE_TYPE=sqlite
 #          every local project/MCP session on this machine, the same way the
 #          Firestore emulator used to be one shared local instance. The parent
 #          dir is created automatically on first backend startup if missing.
-#   How:   Leave as-is unless you change the docker-compose bind-mount.
-SQLITE_DB_PATH=/root/.specflow/db/specflow.db
+#   How:   Leave this commented out. It is a CONTAINER-INTERNAL path and is already
+#          defaulted by docker-compose; setting it here has no effect on host-side
+#          seeding (which derives its path from SPECFLOW_HOME_MOUNT_PATH). Only
+#          uncomment + change it if you also change the docker-compose bind-mount.
+# SQLITE_DB_PATH=/root/.specflow/db/specflow.db
 
 # SPECFLOW_HOME_MOUNT_PATH
 #   What:  Host-side directory bind-mounted into the backend container at
diff --git a/backend/test/scripts/test_specflow_init_script.py b/backend/test/scripts/test_specflow_init_script.py
index 439fe75..65c8642 100644
--- a/backend/test/scripts/test_specflow_init_script.py
+++ b/backend/test/scripts/test_specflow_init_script.py
@@ -114,3 +114,17 @@ def test_reset_local_db_clears_sqlite_file_not_a_directory():
     assert 'basename "${SPECFLOW_HOME_PATH}"' in text
     assert '!= ".specflow"' in text
     assert 'rm -f "${SPECFLOW_HOME_PATH}/db/specflow.db"' in text
+
+
+def test_host_side_seed_targets_host_db_path_not_container_internal():
+    """Host-side seeding runs via `uv run` on the host, so it must target the host
+    bind-mount SOURCE derived from SPECFLOW_HOME_PATH — never the container-internal
+    SQLITE_DB_PATH from .env (/root/...), which is unwritable for non-root users and
+    would seed a file the container never reads. Regression guard for the quickstart
+    seeding-path bug."""
+    text = _script_text()
+
+    # The host seed path is derived from SPECFLOW_HOME_PATH, not inherited from the
+    # container-internal SQLITE_DB_PATH env var.
+    assert '_SQLITE_DB_PATH="${SPECFLOW_HOME_PATH}/db/specflow.db"' in text
+    assert '_SQLITE_DB_PATH="${SQLITE_DB_PATH:-' not in text
diff --git a/specflow-init.sh b/specflow-init.sh
index 8cdb256..9d1db2d 100755
--- a/specflow-init.sh
+++ b/specflow-init.sh
@@ -484,7 +484,11 @@ fi
 # Step 6: Seed the active database from durable workspace config
 # ---------------------------------------------------------------------------
 _DATABASE_TYPE="${DATABASE_TYPE:-sqlite}"
-_SQLITE_DB_PATH="${SQLITE_DB_PATH:-${SPECFLOW_HOME_PATH}/db/specflow.db}"
+# Seeding runs HOST-side (uv run), so it must target the host bind-mount SOURCE
+# (${SPECFLOW_HOME_PATH}/db/specflow.db), NOT the container-internal SQLITE_DB_PATH from .env
+# (/root/.specflow/db/specflow.db) — that host path is unwritable for non-root and would seed a
+# file the container never reads. SPECFLOW_HOME_MOUNT_PATH is the single knob for both sides.
+_SQLITE_DB_PATH="${SPECFLOW_HOME_PATH}/db/specflow.db"
 
 # Guard (H): the provisioning steps above always write workspaces.json. A missing file
 # here means an anomalous failure — refuse to seed an empty pool.

From 95c4100d36fe61201eb2f9268dfe35479e1cc02b Mon Sep 17 00:00:00 2001
From: Mateusz Konopelski <mkonopelski@griddynamics.com>
Date: Thu, 2 Jul 2026 11:45:24 +0200
Subject: [PATCH 7/8] refactor: consolidate workspace-pool seeding into one
 module
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both host-side seeders carried their own copy of the workspace-document builder,
id-assignment, and upsert loop. The two create_workspace_document copies had already
drifted — the provisioner's was missing the cleaning_started_at field.

Introduce app/services/workspace_pool_seeding.py as the single source of truth:
WorkspacePoolEntry (typed, validated), parse_pool_entries, assign_pool_entries
(ordered + prefix id assignment), build_workspace_document (one schema, incl.
cleaning_started_at), and seed_workspace_pool (idempotent upsert with created/
updated/skipped counts). init_db.py and create_generation_session_repos.py now
delegate to it. No behavior change; file-based seeding contracts preserved. Adds
unit tests for the module.

Part of moving local workspace config to SQLite as the single source of truth.
---
 .../app/services/workspace_pool_seeding.py    | 198 ++++++++++++++++++
 .../create_generation_session_repos.py        | 192 +++++------------
 backend/scripts/init_db.py                    | 178 +++-------------
 .../test_create_generation_session_repos.py   |  14 +-
 backend/test/scripts/test_init_db.py          |  51 ++---
 .../services/test_workspace_pool_seeding.py   | 162 ++++++++++++++
 6 files changed, 469 insertions(+), 326 deletions(-)
 create mode 100644 backend/app/services/workspace_pool_seeding.py
 create mode 100644 backend/test/services/test_workspace_pool_seeding.py

diff --git a/backend/app/services/workspace_pool_seeding.py b/backend/app/services/workspace_pool_seeding.py
new file mode 100644
index 0000000..ec6beb6
--- /dev/null
+++ b/backend/app/services/workspace_pool_seeding.py
@@ -0,0 +1,198 @@
+"""Single source of truth for workspace-pool seeding.
+
+Both host-side seeding entry points delegate here so the workspace-document schema,
+pool-entry validation, workspace-id assignment, and idempotent upsert exist in exactly
+one place:
+
+- ``scripts/init_db.py`` — seeds from a ``--workspace-config`` JSON file (e2e / bring-your-own
+  repos), plus the bootstrap API key and local-auth identity sentinel.
+- ``scripts/create_generation_session_repos.py`` — seeds freshly provisioned repos straight
+  into the ``workspaces`` collection.
+
+Previously each script carried its own ``create_workspace_document`` copy (which had already
+drifted — one was missing ``cleaning_started_at``) and its own id-assignment/upsert loop. This
+module removes that duplication.
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from typing import Any, Dict, List, Optional
+
+from app.database.interface import IDatabase
+
+logger = logging.getLogger(__name__)
+
+WORKSPACES_COLLECTION = "workspaces"
+
+
+@dataclass(frozen=True)
+class WorkspacePoolEntry:
+    """The durable identity of one workspace pool slot (mirrors the --workspace-config schema).
+
+    ``set_number`` is not part of the file schema — it is assigned by ``assign_pool_entries`` so
+    it stays consistent with the workspace_id (both derived from the same index). When absent
+    (file-loaded entries), ``seed_workspace_pool`` falls back to entry position.
+    """
+
+    workspace_id: str
+    repo_url: str
+    p10y_repository_id: int
+    workspace_pool: str  # required — the --workspace-config file schema mandates all four fields
+    set_number: Optional[int] = None
+
+    def __post_init__(self) -> None:
+        # bool is an int subclass; reject it so a JSON `true` can't masquerade as an id.
+        if not isinstance(self.p10y_repository_id, int) or isinstance(self.p10y_repository_id, bool):
+            raise ValueError(
+                f"'p10y_repository_id' must be an integer, got: {self.p10y_repository_id!r}"
+            )
+
+
+@dataclass(frozen=True)
+class SeedResult:
+    """Outcome of an idempotent pool upsert."""
+
+    created: int = 0
+    updated: int = 0
+    skipped: int = 0
+
+    @property
+    def total(self) -> int:
+        return self.created + self.updated + self.skipped
+
+
+def parse_pool_entries(raw: List[Any]) -> List[WorkspacePoolEntry]:
+    """Validate a list of raw config dicts into typed entries.
+
+    Raises ValueError (with a 0-based entry index) on any malformed entry so callers can map
+    it to their own error channel (init_db.py converts these to a SystemExit).
+    """
+    entries: List[WorkspacePoolEntry] = []
+    for i, entry in enumerate(raw):
+        if not isinstance(entry, dict):
+            raise ValueError(f"Entry {i} is not an object: {entry!r}")
+        try:
+            entries.append(WorkspacePoolEntry(**entry))
+        except TypeError as exc:
+            # Missing required keys or unexpected keys in the JSON object.
+            raise ValueError(f"Entry {i} has invalid fields: {exc}") from exc
+    return entries
+
+
+def assign_pool_entries(
+    repo_id_map: Dict[str, int],
+    github_org: str,
+    workspace_pool: str,
+    *,
+    ordered_repos: Optional[List[str]] = None,
+    prefix: Optional[str] = None,
+) -> List[WorkspacePoolEntry]:
+    """Assign ``ws-{set:02d}-{idx}`` ids to provisioned repos.
+
+    ``ordered_repos`` (the --repos / bring-your-own path): ids follow list position.
+    Otherwise (the --start/--end path): the trailing number in each ``{prefix}{num}`` repo name
+    fixes the position; names without a parseable number are skipped with a warning.
+    """
+
+    def _entry(repo_name: str, idx: int) -> WorkspacePoolEntry:
+        set_number = (idx // 3) + 1
+        workspace_index = (idx % 3) + 1
+        return WorkspacePoolEntry(
+            workspace_id=f"ws-{set_number:02d}-{workspace_index}",
+            repo_url=f"https://github.com/{github_org}/{repo_name}",
+            p10y_repository_id=int(repo_id_map[repo_name]),
+            workspace_pool=workspace_pool,
+            set_number=set_number,
+        )
+
+    entries: List[WorkspacePoolEntry] = []
+    if ordered_repos is not None:
+        for idx, repo_name in enumerate(ordered_repos):
+            entries.append(_entry(repo_name, idx))
+        return entries
+
+    if not prefix:
+        raise ValueError("prefix is required when ordered_repos is not provided")
+    for repo_name in sorted(repo_id_map):
+        try:
+            num = int(repo_name.split(prefix)[-1])
+        except (ValueError, IndexError):
+            logger.warning("Could not extract number from repo name: %s — skipping", repo_name)
+            continue
+        entries.append(_entry(repo_name, num - 1))
+    return entries
+
+
+def build_workspace_document(
+    entry: WorkspacePoolEntry, set_number: int, now: datetime
+) -> Dict[str, Any]:
+    """Build a fresh ``available`` workspace document (schema per state-management.md).
+
+    The single definition of the workspace-document shape — every field a freshly seeded
+    workspace must carry, including ``cleaning_started_at`` (which one of the old duplicate
+    builders silently omitted).
+    """
+    return {
+        # Core identity
+        "repo_url": entry.repo_url,
+        "p10y_repository_id": entry.p10y_repository_id,
+        "set_number": set_number,
+        "workspace_pool": entry.workspace_pool,
+        # Allocation state
+        "status": "available",
+        "locked_by": None,
+        "locked_at": None,
+        "lease_expires_at": None,
+        "cleaning_started_at": None,
+        # Safety fields
+        "clean_verified": True,  # CRITICAL: must be true to allocate
+        "last_used_by": None,
+        "last_cleaned_at": now,
+        # Audit trail
+        "allocation_history": [],
+        # Error tracking
+        "error": None,
+    }
+
+
+def seed_workspace_pool(
+    db: IDatabase,
+    entries: List[WorkspacePoolEntry],
+    *,
+    replace: bool = False,
+    dry_run: bool = False,
+    now: Optional[datetime] = None,
+) -> SeedResult:
+    """Idempotently upsert pool entries into the ``workspaces`` collection.
+
+    Existing docs are overwritten only when ``replace`` is True; otherwise they are left
+    untouched and counted as skipped. ``set_number`` is derived from entry position (groups of
+    3), preserving the historical seeding layout.
+    """
+    now = now or datetime.now(timezone.utc)
+    created = updated = skipped = 0
+
+    for i, entry in enumerate(entries):
+        # Prefer the entry's assigned set (kept consistent with its workspace_id); fall back to
+        # position for file-loaded entries, matching the historical init_db.py layout.
+        set_number = entry.set_number if entry.set_number is not None else (i // 3) + 1
+        doc = build_workspace_document(entry, set_number, now)
+
+        if dry_run:
+            created += 1
+            continue
+
+        if db.get(WORKSPACES_COLLECTION, entry.workspace_id) is not None:
+            if replace:
+                db.update(WORKSPACES_COLLECTION, entry.workspace_id, doc)
+                updated += 1
+            else:
+                skipped += 1
+        else:
+            db.set(WORKSPACES_COLLECTION, entry.workspace_id, doc)
+            created += 1
+
+    return SeedResult(created=created, updated=updated, skipped=skipped)
diff --git a/backend/scripts/create_generation_session_repos.py b/backend/scripts/create_generation_session_repos.py
index 12d3040..19e1900 100755
--- a/backend/scripts/create_generation_session_repos.py
+++ b/backend/scripts/create_generation_session_repos.py
@@ -40,7 +40,6 @@
 
 import argparse
 import asyncio
-from datetime import datetime, timezone
 import json
 import os
 from pathlib import Path
@@ -71,6 +70,10 @@
 from app.database.firestore import FirestoreDatabase  # noqa: E402
 from app.database.interface import IDatabase  # noqa: E402
 from app.services.p10y.p10y_api_client import P10YInternalAPIClient  # noqa: E402
+from app.services.workspace_pool_seeding import (  # noqa: E402
+    assign_pool_entries,
+    seed_workspace_pool,
+)
 
 LIVE_REPOSITORY_STATUS = "Live"
 LIVE_INTERNAL_STATUS = 1  # internal_status value P10Y sets after enable/metrics succeeds
@@ -603,57 +606,6 @@ async def poll_repository_status(
     return repo_statuses
 
 
-def create_workspace_document(
-    workspace_id: str,
-    set_number: int,
-    repo_url: str,
-    p10y_repository_id: int,
-    now: datetime,
-    workspace_pool: str = "default"
-) -> dict:
-    """
-    Create a workspace document following the schema from state-management.md.
-    
-    Schema fields:
-    - repo_url: GitHub repository URL
-    - p10y_repository_id: P10Y repository ID for tracking
-    - set_number: Which set this workspace belongs to (1-10)
-    - status: Workspace state (available, allocated, cleaning, stuck)
-    - locked_by: Which generation is using this workspace
-    - locked_at: When workspace was allocated
-    - lease_expires_at: When lease expires (for crash detection)
-    - clean_verified: CRITICAL - must be true to allocate
-    - last_used_by: Previous generation (for debugging)
-    - last_cleaned_at: Last cleanup timestamp
-    - allocation_history: Audit trail of allocations
-    - error: Error message if status is stuck
-    """
-    return {
-        # Core fields
-        "repo_url": repo_url,
-        "p10y_repository_id": p10y_repository_id,
-        "set_number": set_number,
-        "workspace_pool": workspace_pool,
-        
-        # Allocation state
-        "status": "available",
-        "locked_by": None,
-        "locked_at": None,
-        "lease_expires_at": None,
-        
-        # Safety fields
-        "clean_verified": True,  # CRITICAL: must be true to allocate
-        "last_used_by": None,
-        "last_cleaned_at": now,
-        
-        # Audit trail
-        "allocation_history": [],
-        
-        # Error tracking
-        "error": None,
-    }
-
-
 async def add_workspaces_to_firestore(
     repo_id_map: Dict[str, int],
     github_org: str,
@@ -664,16 +616,20 @@ async def add_workspaces_to_firestore(
     firestore_database_id: Optional[str] = None,
 ) -> None:
     """
-    Add workspace entries to Firestore database.
+    Add workspace entries to the active database.
+
+    If firestore_project_id and firestore_database_id are both set, target that hosted Firestore
+    instance directly; otherwise use get_database() (honors DATABASE_TYPE — sqlite by default).
 
-    If firestore_project_id and firestore_database_id are both set, use them only (no Settings /
-    get_database singleton). Otherwise use get_database() (DATABASE_TYPE must be firestore).
+    Id assignment and the upsert are delegated to app.services.workspace_pool_seeding, the single
+    source shared with init_db.py. ``start_num`` is unused (ids come from the repo names) and is
+    retained only for call-site stability.
     """
     if not repo_id_map:
-        print("\n⚠️  No repository IDs to add to Firestore")
+        print("\n⚠️  No repository IDs to add")
         return
-    
-    print(f"\n📝 Adding {len(repo_id_map)} workspaces to Firestore")
+
+    print(f"\n📝 Adding {len(repo_id_map)} workspaces to the database")
 
     try:
         if firestore_project_id is not None and firestore_database_id is not None:
@@ -687,60 +643,19 @@ async def add_workspaces_to_firestore(
             )
         else:
             db = get_database()
-        now = datetime.now(timezone.utc)
-        
-        created = 0
-        updated = 0
-        
-        for repo_name, p10y_id in repo_id_map.items():
-            try:
-                num = int(repo_name.split(prefix)[-1])
-            except (ValueError, IndexError):
-                print(f"⚠️  Could not extract number from repo name: {repo_name}")
-                continue
-            
-            # Calculate set number (groups of 3)
-            # For example: 1-3 = set 1, 4-6 = set 2, 7-9 = set 3
-            set_number = ((num - 1) // 3) + 1
-            workspace_index = ((num - 1) % 3) + 1
-            
-            # Create workspace ID (e.g., ws-03-1 for repo 7)
-            workspace_id = f"ws-{set_number:02d}-{workspace_index}"
-            
-            # Create repo URL
-            repo_url = f"https://github.com/{github_org}/{repo_name}"
-            
-            # Create workspace document
-            workspace_doc = create_workspace_document(
-                workspace_id=workspace_id,
-                set_number=set_number,
-                repo_url=repo_url,
-                p10y_repository_id=p10y_id,
-                now=now,
-                workspace_pool=workspace_pool
-            )
-            
-            # Check if workspace exists
-            existing = db.get("workspaces", workspace_id)
-            
-            if existing:
-                # Update existing workspace
-                db.update("workspaces", workspace_id, workspace_doc)
-                print(f"   ✓ Updated workspace: {workspace_id} -> {repo_name} (P10y ID: {p10y_id})")
-                updated += 1
-            else:
-                # Create new workspace
-                db.set("workspaces", workspace_id, workspace_doc)
-                print(f"   ✓ Created workspace: {workspace_id} -> {repo_name} (P10y ID: {p10y_id})")
-                created += 1
-        
-        print("\n✅ Firestore workspace sync complete:")
-        print(f"   Created: {created}")
-        print(f"   Updated: {updated}")
-        print(f"   Total: {created + updated}")
-        
+
+        entries = assign_pool_entries(
+            repo_id_map, github_org, workspace_pool, prefix=prefix
+        )
+        result = seed_workspace_pool(db, entries, replace=True)
+
+        print("\n✅ Database workspace sync complete:")
+        print(f"   Created: {result.created}")
+        print(f"   Updated: {result.updated}")
+        print(f"   Total: {result.total}")
+
     except Exception as e:
-        print(f"\n❌ Failed to add workspaces to Firestore: {e}")
+        print(f"\n❌ Failed to add workspaces to the database: {e}")
         import traceback
         traceback.print_exc()
         raise
@@ -761,46 +676,33 @@ def emit_workspace_config(
         [{"workspace_id": str, "repo_url": str,
           "p10y_repository_id": int, "workspace_pool": str}, ...]
 
-    All four fields are required (matching WorkspaceConfig dataclass).
-
-    Field sources:
-    - workspace_id: derived the same way as add_workspaces_to_firestore (ws-{set:02d}-{idx})
-    - repo_url: https://github.com/{github_org}/{repo_name}
-    - p10y_repository_id: integer value from repo_id_map[repo_name]
-    - workspace_pool: workspace_pool argument
-
-    When ordered_repos is provided (the --repos path), workspace IDs are assigned
-    by position in the list rather than extracted from the {prefix}{num} name pattern.
+    Id assignment is delegated to app.services.workspace_pool_seeding.assign_pool_entries (the
+    same routine that seeds the DB directly), so the file schema and the direct-seed path can
+    never drift. When ordered_repos is provided (the --repos path), ids follow list position;
+    otherwise they are derived from the {prefix}{num} repo names.
     """
-    def _make_entry(repo_name: str, idx: int) -> Dict[str, Any]:
-        set_number = (idx // 3) + 1
-        workspace_index = (idx % 3) + 1
-        return {
-            "workspace_id": f"ws-{set_number:02d}-{workspace_index}",
-            "repo_url": f"https://github.com/{github_org}/{repo_name}",
-            "p10y_repository_id": int(repo_id_map[repo_name]),
-            "workspace_pool": workspace_pool,
+    entries = assign_pool_entries(
+        repo_id_map,
+        github_org,
+        workspace_pool,
+        ordered_repos=ordered_repos,
+        prefix=prefix,
+    )
+    serialised = [
+        {
+            "workspace_id": e.workspace_id,
+            "repo_url": e.repo_url,
+            "p10y_repository_id": e.p10y_repository_id,
+            "workspace_pool": e.workspace_pool,
         }
-
-    entries: List[Dict[str, Any]] = []
-
-    if ordered_repos is not None:
-        for idx, repo_name in enumerate(ordered_repos):
-            entries.append(_make_entry(repo_name, idx))
-    else:
-        for repo_name in sorted(repo_id_map):
-            try:
-                num = int(repo_name.split(prefix)[-1])
-            except (ValueError, IndexError):
-                print(f"⚠️  Could not extract number from repo name: {repo_name} — skipping in config")
-                continue
-            entries.append(_make_entry(repo_name, num - 1))
+        for e in entries
+    ]
 
     out = Path(output_path)
     out.parent.mkdir(parents=True, exist_ok=True)
     with out.open("w", encoding="utf-8") as f:
-        json.dump(entries, f, indent=2)
-    print(f"\n✅ Workspace config written to: {output_path} ({len(entries)} entries)")
+        json.dump(serialised, f, indent=2)
+    print(f"\n✅ Workspace config written to: {output_path} ({len(serialised)} entries)")
 
 
 async def main():
diff --git a/backend/scripts/init_db.py b/backend/scripts/init_db.py
index 60e4197..fb11532 100755
--- a/backend/scripts/init_db.py
+++ b/backend/scripts/init_db.py
@@ -39,7 +39,6 @@
 import argparse
 import json
 import traceback
-from dataclasses import dataclass
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import List
@@ -61,6 +60,11 @@
 from app.core.config import settings
 from app.core.local_identity import LOCAL_API_KEY_DOC_ID, LOCAL_KEY_UID
 from app.database.factory import get_database
+from app.services.workspace_pool_seeding import (
+    WorkspacePoolEntry,
+    parse_pool_entries,
+    seed_workspace_pool,
+)
 from app.database.interface import IDatabase
 
 
@@ -72,29 +76,9 @@
 EXTRA_POOL_KEY_UID = "00000000-e2e0-0000-0000-000000000002"
 
 
-@dataclass
-class WorkspaceConfig:
-    """Typed workspace configuration entry parsed from ``--workspace-config`` JSON."""
-
-    workspace_id: str
-    repo_url: str
-    p10y_repository_id: int
-    workspace_pool: str
-
-    def __post_init__(self) -> None:
-        if not isinstance(self.p10y_repository_id, int) or isinstance(self.p10y_repository_id, bool):
-            raise ValueError(
-                f"'p10y_repository_id' must be an integer, got: {self.p10y_repository_id!r}"
-            )
-
-    def to_pool_entry(self) -> dict:
-        """Normalise to the dict shape ``initialize_workspace_pool`` consumes."""
-        return {
-            "workspace_id": self.workspace_id,
-            "repo_url": self.repo_url,
-            "p10y_id": self.p10y_repository_id,
-            "workspace_pool": self.workspace_pool,
-        }
+# The typed pool-entry model (WorkspacePoolEntry) and its validation now live in
+# app.services.workspace_pool_seeding — the single source shared with
+# create_generation_session_repos.py.
 
 
 # CONFIGURATION: Workspace Repository Mapping
@@ -108,7 +92,7 @@ def to_pool_entry(self) -> dict:
 #   make skip-mode-e2e-tests E2E_WORKSPACE_CONFIG=my-test-repos.json
 #
 # Populated from --workspace-config in main(); empty otherwise.
-WORKSPACE_CONFIGS: List[dict] = []
+WORKSPACE_CONFIGS: List[WorkspacePoolEntry] = []
 
 
 def initialize_api_key(db: IDatabase, dry_run: bool = False) -> None:
@@ -269,9 +253,9 @@ def initialize_local_identity(db: IDatabase, dry_run: bool = False, replace: boo
     print(f"{'='*60}\n")
 
 
-def load_workspace_configs_from_file(path: str) -> List[dict]:
+def load_workspace_configs_from_file(path: str) -> List[WorkspacePoolEntry]:
     """
-    Load workspace configs from a JSON file.
+    Load and validate workspace pool entries from a JSON file.
 
     Expected JSON schema::
 
@@ -285,10 +269,8 @@ def load_workspace_configs_from_file(path: str) -> List[dict]:
           ...
         ]
 
-    Returns a list of dicts normalised so ``initialize_workspace_pool`` can
-    consume them (``workspace_id``, ``repo_url``, ``p10y_id``, ``workspace_pool``).
-
-    Raises SystemExit on malformed input.
+    Returns typed ``WorkspacePoolEntry`` objects. Raises SystemExit on malformed input
+    (validation itself lives in ``workspace_pool_seeding.parse_pool_entries``).
     """
     try:
         with open(path, "r", encoding="utf-8") as f:
@@ -301,75 +283,11 @@ def load_workspace_configs_from_file(path: str) -> List[dict]:
         print(f"ERROR: Workspace config file must contain a JSON array, got {type(data).__name__}")
         sys.exit(1)
 
-    normalised: List[dict] = []
-    for i, entry in enumerate(data):
-        if not isinstance(entry, dict):
-            print(f"ERROR: Entry {i} in workspace config is not an object: {entry!r}")
-            sys.exit(1)
-        try:
-            config = WorkspaceConfig(**entry)
-        except TypeError as exc:
-            # Missing required keys or unexpected keys in the JSON object.
-            print(f"ERROR: Entry {i} in workspace config has invalid fields: {exc}")
-            sys.exit(1)
-        except ValueError as exc:
-            print(f"ERROR: Entry {i} in workspace config: {exc}")
-            sys.exit(1)
-        normalised.append(config.to_pool_entry())
-
-    return normalised
-
-
-def create_workspace_document(
-    workspace_id: str,
-    set_number: int,
-    repo_url: str,
-    p10y_repository_id: int,
-    now: datetime,
-    workspace_pool: str = "default",
-) -> dict:
-    """
-    Create a workspace document following the schema from state-management.md.
-    
-    Schema fields:
-    - repo_url: GitHub repository URL
-    - p10y_repository_id: P10Y repository ID for tracking
-    - set_number: Which set this workspace belongs to (1-10)
-    - status: Workspace state (available, allocated, cleaning, stuck)
-    - locked_by: Which generation session is using this workspace
-    - locked_at: When workspace was allocated
-    - lease_expires_at: When lease expires (for crash detection)
-    - clean_verified: CRITICAL - must be true to allocate
-    - last_used_by: Previous generation session (for debugging)
-    - last_cleaned_at: Last cleanup timestamp
-    - allocation_history: Audit trail of allocations
-    - error: Error message if status is stuck
-    """
-    return {
-        # Core fields
-        "repo_url": repo_url,
-        "p10y_repository_id": p10y_repository_id,
-        "set_number": set_number,
-        "workspace_pool": workspace_pool,
-        
-        # Allocation state
-        "status": "available",
-        "locked_by": None,
-        "locked_at": None,
-        "lease_expires_at": None,
-        "cleaning_started_at": None,
-        
-        # Safety fields
-        "clean_verified": True,  # CRITICAL: must be true to allocate
-        "last_used_by": None,
-        "last_cleaned_at": now,
-        
-        # Audit trail
-        "allocation_history": [],
-        
-        # Error tracking
-        "error": None,
-    }
+    try:
+        return parse_pool_entries(data)
+    except ValueError as exc:
+        print(f"ERROR: Invalid workspace config in '{path}': {exc}")
+        sys.exit(1)
 
 
 def initialize_workspace_pool(
@@ -414,61 +332,23 @@ def initialize_workspace_pool(
             print("Aborted.")
             return
 
-    # Create workspaces
-    created = 0
-    updated = 0
-    skipped = 0
-
-    for i, config in enumerate(WORKSPACE_CONFIGS):
-        set_number = (i // 3) + 1  # Sets 1-10
-        # Prefer explicit workspace_id from config (JSON-loaded); derive otherwise.
-        workspace_id = config.get("workspace_id") or f"ws-{set_number:02d}-{(i % 3) + 1}"
-
-        workspace_doc = create_workspace_document(
-            workspace_id=workspace_id,
-            set_number=set_number,
-            repo_url=config["repo_url"],
-            p10y_repository_id=config["p10y_id"],
-            now=now,
-            workspace_pool=config.get("workspace_pool", "default"),
-        )
-
-        if dry_run:
-            print(f"[DRY RUN] Would create workspace: {workspace_id}")
-            print(f"  - Set: {set_number}")
-            print(f"  - Repo: {config['repo_url']}")
-            print(f"  - P10Y ID: {config['p10y_id']}")
-            created += 1
-        else:
-            # Check if workspace exists
-            existing = db.get("workspaces", workspace_id)
-
-            if existing:
-                if replace:
-                    db.update("workspaces", workspace_id, workspace_doc)
-                    print(f"✓ Updated workspace: {workspace_id} (set {set_number})")
-                    updated += 1
-                else:
-                    print(f"  Skipped workspace: {workspace_id} (already exists; use --replace to overwrite)")
-                    skipped += 1
-            else:
-                # Create new workspace
-                db.set("workspaces", workspace_id, workspace_doc)
-                print(f"✓ Created workspace: {workspace_id} (set {set_number})")
-                created += 1
+    # Upsert the pool via the shared seeding routine (idempotent; --replace overwrites).
+    result = seed_workspace_pool(
+        db, WORKSPACE_CONFIGS, replace=replace, dry_run=dry_run, now=now
+    )
 
     # Summary
     print(f"\n{'='*60}")
     print("SUMMARY")
     print(f"{'='*60}")
     if dry_run:
-        print(f"Would create: {created} workspaces")
+        print(f"Would create: {result.created} workspaces")
     else:
-        print(f"Created: {created} workspaces")
-        print(f"Updated: {updated} workspaces")
-        if skipped:
-            print(f"Skipped: {skipped} workspaces (already exist)")
-        print(f"Total: {created + updated + skipped} workspaces")
+        print(f"Created: {result.created} workspaces")
+        print(f"Updated: {result.updated} workspaces")
+        if result.skipped:
+            print(f"Skipped: {result.skipped} workspaces (already exist; use --replace to overwrite)")
+        print(f"Total: {result.total} workspaces")
         expected_sets = (len(WORKSPACE_CONFIGS) + 2) // 3
         print(f"Configured sets: {expected_sets} set(s) of up to 3 workspaces")
     print(f"{'='*60}\n")
@@ -509,7 +389,7 @@ def initialize_workspace_pool(
 
 def extra_pool_configured() -> bool:
     """Return True when the loaded workspace config declares the example extra pool."""
-    return any(config.get("workspace_pool") == EXTRA_WORKSPACE_POOL for config in WORKSPACE_CONFIGS)
+    return any(entry.workspace_pool == EXTRA_WORKSPACE_POOL for entry in WORKSPACE_CONFIGS)
 
 
 def attach_github_tokens(dry_run: bool = False) -> None:
diff --git a/backend/test/scripts/test_create_generation_session_repos.py b/backend/test/scripts/test_create_generation_session_repos.py
index a2aaba0..101fa42 100644
--- a/backend/test/scripts/test_create_generation_session_repos.py
+++ b/backend/test/scripts/test_create_generation_session_repos.py
@@ -2,7 +2,7 @@
 Tests for backend/scripts/create_generation_session_repos.py — Phase 6.
 
 Coverage:
-  (a) emit_workspace_config writes JSON whose entries each construct a WorkspaceConfig
+  (a) emit_workspace_config writes JSON whose entries each construct a WorkspacePoolEntry
       without error, proving schema parity with init_db.py --workspace-config.
   (b) get_repository_ids calls list_repositories WITHOUT project_ids (no filter).
   (c) No code path calls add_repositories_to_project — the function is absent from the
@@ -21,7 +21,7 @@
 import pytest
 
 import scripts.create_generation_session_repos as cgsr
-from scripts.init_db import WorkspaceConfig
+from app.services.workspace_pool_seeding import WorkspacePoolEntry
 
 # ---------------------------------------------------------------------------
 # Path constants
@@ -32,17 +32,17 @@
 
 
 # ===========================================================================
-# (a) emit_workspace_config — schema parity with WorkspaceConfig
+# (a) emit_workspace_config — schema parity with WorkspacePoolEntry
 # ===========================================================================
 
 class TestEmitWorkspaceConfig:
-    """emit_workspace_config emits JSON that round-trips through WorkspaceConfig."""
+    """emit_workspace_config emits JSON that round-trips through WorkspacePoolEntry."""
 
     def _make_repo_id_map(self, prefix: str = "specflow-workspace", start: int = 1, count: int = 3) -> dict:
         return {f"{prefix}{i}": 10000 + i for i in range(start, start + count)}
 
     def test_roundtrip_through_workspace_config(self, tmp_path):
-        """Each emitted entry must construct WorkspaceConfig without error."""
+        """Each emitted entry must construct WorkspacePoolEntry without error."""
         repo_id_map = self._make_repo_id_map(start=1, count=3)
         output = str(tmp_path / "workspaces.json")
 
@@ -61,7 +61,7 @@ def test_roundtrip_through_workspace_config(self, tmp_path):
         assert len(data) == 3, "Should emit one entry per repo"
         for entry in data:
             # This will raise if any field is missing or p10y_repository_id is not an int
-            wc = WorkspaceConfig(**entry)
+            wc = WorkspacePoolEntry(**entry)
             assert isinstance(wc.p10y_repository_id, int)
             assert not isinstance(wc.p10y_repository_id, bool)
 
@@ -123,7 +123,7 @@ def test_repo_url_uses_github_org(self, tmp_path):
         assert data[0]["repo_url"] == "https://github.com/my-org/specflow-workspace4"
 
     def test_p10y_repository_id_is_int_not_bool(self, tmp_path):
-        """p10y_repository_id must be a plain int (WorkspaceConfig rejects bool)."""
+        """p10y_repository_id must be a plain int (WorkspacePoolEntry rejects bool)."""
         repo_id_map = {"specflow-workspace1": 12345}
         output = str(tmp_path / "out.json")
 
diff --git a/backend/test/scripts/test_init_db.py b/backend/test/scripts/test_init_db.py
index 9efd10a..75df38a 100644
--- a/backend/test/scripts/test_init_db.py
+++ b/backend/test/scripts/test_init_db.py
@@ -19,6 +19,7 @@
 
 from app.core.local_identity import LOCAL_API_KEY_DOC_ID, LOCAL_KEY_UID
 from app.database.memory import InMemoryDatabase
+from app.services.workspace_pool_seeding import WorkspacePoolEntry
 
 # ---------------------------------------------------------------------------
 # Import the functions under test directly.  The script sets DATABASE_TYPE
@@ -72,13 +73,13 @@ def test_load_replaces_hardcoded_list(self, tmp_path):
         loaded = init_script.load_workspace_configs_from_file(path)
 
         assert len(loaded) == 2
-        assert loaded[0]["workspace_id"] == "ws-test-1"
-        assert loaded[0]["repo_url"] == "https://github.com/test-org/test-repo-1"
-        assert loaded[0]["p10y_id"] == 11111  # normalised key
-        assert loaded[0]["workspace_pool"] == "default"
-        assert loaded[1]["workspace_id"] == "ws-test-2"
-        assert loaded[1]["p10y_id"] == 22222
-        assert loaded[1]["workspace_pool"] == "testpool"
+        assert loaded[0].workspace_id == "ws-test-1"
+        assert loaded[0].repo_url == "https://github.com/test-org/test-repo-1"
+        assert loaded[0].p10y_repository_id == 11111
+        assert loaded[0].workspace_pool == "default"
+        assert loaded[1].workspace_id == "ws-test-2"
+        assert loaded[1].p10y_repository_id == 22222
+        assert loaded[1].workspace_pool == "testpool"
 
     def test_load_invalid_json_exits(self, tmp_path):
         path = str(tmp_path / "bad.json")
@@ -543,12 +544,12 @@ def test_extra_pool_key_seeded_only_when_config_declares_extra_pool(self):
         original_configs = init_script.WORKSPACE_CONFIGS
         try:
             init_script.WORKSPACE_CONFIGS = [
-                {
-                    "workspace_id": "ws-default-1",
-                    "repo_url": "https://github.com/test-org/default",
-                    "p10y_id": 11111,
-                    "workspace_pool": "default",
-                }
+                WorkspacePoolEntry(
+                    workspace_id="ws-default-1",
+                    repo_url="https://github.com/test-org/default",
+                    p10y_repository_id=11111,
+                    workspace_pool="default",
+                )
             ]
             init_script.initialize_api_key(db, dry_run=False)
         finally:
@@ -562,12 +563,12 @@ def test_extra_pool_key_seeded_when_config_declares_extra_pool(self):
         original_configs = init_script.WORKSPACE_CONFIGS
         try:
             init_script.WORKSPACE_CONFIGS = [
-                {
-                    "workspace_id": "ws-extra-1",
-                    "repo_url": "https://github.com/test-org/extra",
-                    "p10y_id": 22222,
-                    "workspace_pool": init_script.EXTRA_WORKSPACE_POOL,
-                }
+                WorkspacePoolEntry(
+                    workspace_id="ws-extra-1",
+                    repo_url="https://github.com/test-org/extra",
+                    p10y_repository_id=22222,
+                    workspace_pool=init_script.EXTRA_WORKSPACE_POOL,
+                )
             ]
             init_script.initialize_api_key(db, dry_run=False)
         finally:
@@ -583,12 +584,12 @@ def test_github_token_attachment_skips_when_extra_pool_not_configured(self, monk
         original_configs = init_script.WORKSPACE_CONFIGS
         try:
             init_script.WORKSPACE_CONFIGS = [
-                {
-                    "workspace_id": "ws-default-1",
-                    "repo_url": "https://github.com/test-org/default",
-                    "p10y_id": 11111,
-                    "workspace_pool": "default",
-                }
+                WorkspacePoolEntry(
+                    workspace_id="ws-default-1",
+                    repo_url="https://github.com/test-org/default",
+                    p10y_repository_id=11111,
+                    workspace_pool="default",
+                )
             ]
             with patch.object(init_script.httpx, "put") as mock_put:
                 init_script.attach_github_tokens(dry_run=False)
diff --git a/backend/test/services/test_workspace_pool_seeding.py b/backend/test/services/test_workspace_pool_seeding.py
new file mode 100644
index 0000000..8f44b1a
--- /dev/null
+++ b/backend/test/services/test_workspace_pool_seeding.py
@@ -0,0 +1,162 @@
+"""Unit tests for the shared workspace-pool seeding module.
+
+Covers the single source of truth now used by both init_db.py (file-based seeding) and
+create_generation_session_repos.py (direct-to-DB seeding):
+  - WorkspacePoolEntry validation
+  - parse_pool_entries (dict -> typed, with indexed errors)
+  - assign_pool_entries (ordered + prefix id assignment, set_number consistency)
+  - build_workspace_document (full schema incl. cleaning_started_at)
+  - seed_workspace_pool (idempotent upsert, replace semantics, dry-run, counts)
+"""
+
+from datetime import datetime, timezone
+
+import pytest
+
+from app.database.memory import InMemoryDatabase
+from app.services.workspace_pool_seeding import (
+    WORKSPACES_COLLECTION,
+    SeedResult,
+    WorkspacePoolEntry,
+    assign_pool_entries,
+    build_workspace_document,
+    parse_pool_entries,
+    seed_workspace_pool,
+)
+
+_NOW = datetime(2026, 1, 1, tzinfo=timezone.utc)
+
+
+def _entry(ws_id="ws-01-1", pool="default", p10y=111, set_number=1):
+    return WorkspacePoolEntry(
+        workspace_id=ws_id,
+        repo_url=f"https://github.com/org/{ws_id}",
+        p10y_repository_id=p10y,
+        workspace_pool=pool,
+        set_number=set_number,
+    )
+
+
+class TestWorkspacePoolEntry:
+    def test_rejects_bool_p10y(self):
+        with pytest.raises(ValueError):
+            WorkspacePoolEntry(
+                workspace_id="x", repo_url="y", p10y_repository_id=True, workspace_pool="default"
+            )
+
+    def test_rejects_non_int_p10y(self):
+        with pytest.raises(ValueError):
+            WorkspacePoolEntry(
+                workspace_id="x", repo_url="y", p10y_repository_id="12", workspace_pool="default"
+            )
+
+    def test_set_number_optional(self):
+        e = WorkspacePoolEntry(
+            workspace_id="x", repo_url="y", p10y_repository_id=1, workspace_pool="default"
+        )
+        assert e.set_number is None
+
+    def test_workspace_pool_required(self):
+        # The file schema mandates all four fields — workspace_pool has no default.
+        with pytest.raises(TypeError):
+            WorkspacePoolEntry(workspace_id="x", repo_url="y", p10y_repository_id=1)
+
+
+class TestParsePoolEntries:
+    def test_valid(self):
+        raw = [{"workspace_id": "ws-01-1", "repo_url": "u", "p10y_repository_id": 5, "workspace_pool": "default"}]
+        entries = parse_pool_entries(raw)
+        assert len(entries) == 1
+        assert entries[0].p10y_repository_id == 5
+
+    def test_non_dict_entry_raises_with_index(self):
+        with pytest.raises(ValueError, match="Entry 0"):
+            parse_pool_entries(["notadict"])
+
+    def test_missing_field_raises_with_index(self):
+        with pytest.raises(ValueError, match="Entry 0"):
+            parse_pool_entries([{"workspace_id": "x", "repo_url": "y", "p10y_repository_id": 1, "extra": 9}])
+
+
+class TestAssignPoolEntries:
+    def test_prefix_path_derives_id_and_set_from_repo_number(self):
+        # repos 4,5,6 -> set 2, indices 1,2,3
+        repo_id_map = {"ws-4": 104, "ws-5": 105, "ws-6": 106}
+        entries = assign_pool_entries(repo_id_map, "org", "default", prefix="ws-")
+        by_id = {e.workspace_id: e for e in entries}
+        assert set(by_id) == {"ws-02-1", "ws-02-2", "ws-02-3"}
+        # set_number stays consistent with the workspace_id (both from the repo number).
+        assert all(e.set_number == 2 for e in entries)
+
+    def test_ordered_path_assigns_by_position(self):
+        repo_id_map = {"a": 1, "b": 2, "c": 3, "d": 4}
+        entries = assign_pool_entries(
+            repo_id_map, "org", "default", ordered_repos=["a", "b", "c", "d"]
+        )
+        assert [e.workspace_id for e in entries] == ["ws-01-1", "ws-01-2", "ws-01-3", "ws-02-1"]
+        assert entries[3].set_number == 2
+
+    def test_prefix_required_without_ordered(self):
+        with pytest.raises(ValueError, match="prefix is required"):
+            assign_pool_entries({"x1": 1}, "org", "default")
+
+    def test_unparseable_name_skipped(self):
+        entries = assign_pool_entries({"weird-name": 1}, "org", "default", prefix="ws-")
+        assert entries == []
+
+
+class TestBuildWorkspaceDocument:
+    def test_full_schema_including_cleaning_started_at(self):
+        doc = build_workspace_document(_entry(), set_number=3, now=_NOW)
+        # The field that had drifted out of one duplicate must be present.
+        assert doc["cleaning_started_at"] is None
+        assert doc["status"] == "available"
+        assert doc["clean_verified"] is True
+        assert doc["set_number"] == 3
+        assert doc["last_cleaned_at"] == _NOW
+        assert doc["allocation_history"] == []
+        assert doc["error"] is None
+
+
+class TestSeedWorkspacePool:
+    def test_creates_new(self):
+        db = InMemoryDatabase()
+        result = seed_workspace_pool(db, [_entry("ws-01-1"), _entry("ws-01-2")], now=_NOW)
+        assert result == SeedResult(created=2, updated=0, skipped=0)
+        assert db.get(WORKSPACES_COLLECTION, "ws-01-1") is not None
+
+    def test_skips_existing_without_replace(self):
+        db = InMemoryDatabase()
+        seed_workspace_pool(db, [_entry("ws-01-1")], now=_NOW)
+        result = seed_workspace_pool(db, [_entry("ws-01-1", p10y=999)], replace=False, now=_NOW)
+        assert result.skipped == 1 and result.created == 0
+        assert db.get(WORKSPACES_COLLECTION, "ws-01-1")["p10y_repository_id"] == 111
+
+    def test_replaces_existing_with_replace(self):
+        db = InMemoryDatabase()
+        seed_workspace_pool(db, [_entry("ws-01-1")], now=_NOW)
+        result = seed_workspace_pool(db, [_entry("ws-01-1", p10y=999)], replace=True, now=_NOW)
+        assert result.updated == 1
+        assert db.get(WORKSPACES_COLLECTION, "ws-01-1")["p10y_repository_id"] == 999
+
+    def test_dry_run_writes_nothing(self):
+        db = InMemoryDatabase()
+        result = seed_workspace_pool(db, [_entry("ws-01-1")], dry_run=True, now=_NOW)
+        assert result.created == 1
+        assert db.get(WORKSPACES_COLLECTION, "ws-01-1") is None
+
+    def test_set_number_falls_back_to_position_when_absent(self):
+        db = InMemoryDatabase()
+        # File-loaded entries carry no set_number -> derived from position (groups of 3).
+        entries = [
+            WorkspacePoolEntry(
+                workspace_id=f"ws-x-{i}", repo_url="u", p10y_repository_id=i, workspace_pool="default"
+            )
+            for i in range(4)
+        ]
+        seed_workspace_pool(db, entries, now=_NOW)
+        assert db.get(WORKSPACES_COLLECTION, "ws-x-0")["set_number"] == 1
+        assert db.get(WORKSPACES_COLLECTION, "ws-x-3")["set_number"] == 2
+
+    def test_seed_result_total(self):
+        assert SeedResult(created=1, updated=2, skipped=3).total == 6

From 5f5ae8acc378c4eea2fbcf7922c61f5ffecf48b2 Mon Sep 17 00:00:00 2001
From: Mateusz Konopelski <mkonopelski@griddynamics.com>
Date: Thu, 2 Jul 2026 11:56:01 +0200
Subject: [PATCH 8/8] feat(quickstart): seed workspace pool straight into
 SQLite; drop workspaces.json
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The workspace pool was duplicated: a transient .specflow-local/workspaces.json flat
file AND the DB workspaces collection. Eliminate the flat file so the SQLite DB is the
single source of truth for the local pool.

- create_generation_session_repos.py now seeds the DB directly for BOTH the {prefix}{num}
  and --provide-own-repos (arbitrary-name) paths — id assignment handles ordered repos, so
  the arbitrary-name limitation that forced the file handoff is gone.
- init_db.py: --workspace-config is now optional. Without it (local quickstart) it seeds only
  the bootstrap API key + local-auth identity; the file input is retained for e2e / BYO repos.
- specflow-init.sh: provisioning writes the pool straight into the DB (drops --skip-firestore
  and --output-workspace-config, passes the host SQLITE_DB_PATH); init_db.py then seeds API key
  + identity. .specflow-local/workspaces.json is gone entirely.

Tests updated to the new contract; MEMORY.md note refreshed.
---
 agents/MEMORY.md                              |  2 +-
 .../create_generation_session_repos.py        | 26 +++++---
 backend/scripts/init_db.py                    | 42 ++++++------
 backend/test/scripts/test_init_db.py          | 45 ++++++++-----
 .../test/scripts/test_specflow_init_script.py | 17 +++--
 specflow-init.sh                              | 64 +++++++++----------
 6 files changed, 113 insertions(+), 83 deletions(-)

diff --git a/agents/MEMORY.md b/agents/MEMORY.md
index b564fa3..ebdd7d7 100644
--- a/agents/MEMORY.md
+++ b/agents/MEMORY.md
@@ -8,4 +8,4 @@ Concise, generalized lessons (not a changelog — that is `agents/IMPLEMENTATION
 - For first-run `run_generation`, validate upload contract before creating generation sessions or allocating workspace sets; once `/workspace/sync` returns a `generation_id`, MCP may persist `specflow_session.json`.
 - When fixing a rejection/validation bug, enumerate every rejection code and every path that raises that rejection before planning the side-effect boundary; do not plan from only the observed failure case.
 - Firestore emulator imports require the `*overall_export_metadata` file path; export destinations are directories, but `--import-data` must not point at the snapshot directory itself.
-- Global SpecFlow/TUI settings live in `~/.specflow/config.json` (SSOT) — read/write via `mcp_server/tui/mcp_clients.py` `_read_config`/`_write_config`, which preserve unknown top-level keys; add each new setting as its own top-level section. Do NOT put global settings in the project-local `.specflow-local/` (that dir is per-project runtime: `mcp-config.json`, `workspaces.json`, `init.log`). MCP-client connection status is stored globally because connecting a client is a machine-wide act (`claude/gemini mcp add -s user`, Cursor `~/.cursor/mcp.json`).
+- Global SpecFlow/TUI settings live in `~/.specflow/config.json` (SSOT) — read/write via `mcp_server/tui/mcp_clients.py` `_read_config`/`_write_config`, which preserve unknown top-level keys; add each new setting as its own top-level section. Do NOT put global settings in the project-local `.specflow-local/` (that dir is per-project runtime: `mcp-config.json`, `init.log`; the workspace pool now lives only in the SQLite DB, seeded straight in by `create_generation_session_repos.py` — no `workspaces.json` flat file). MCP-client connection status is stored globally because connecting a client is a machine-wide act (`claude/gemini mcp add -s user`, Cursor `~/.cursor/mcp.json`).
diff --git a/backend/scripts/create_generation_session_repos.py b/backend/scripts/create_generation_session_repos.py
index 19e1900..23ebcc5 100755
--- a/backend/scripts/create_generation_session_repos.py
+++ b/backend/scripts/create_generation_session_repos.py
@@ -614,6 +614,7 @@ async def add_workspaces_to_firestore(
     workspace_pool: str = "default",
     firestore_project_id: Optional[str] = None,
     firestore_database_id: Optional[str] = None,
+    ordered_repos: Optional[List[str]] = None,
 ) -> None:
     """
     Add workspace entries to the active database.
@@ -622,8 +623,10 @@ async def add_workspaces_to_firestore(
     instance directly; otherwise use get_database() (honors DATABASE_TYPE — sqlite by default).
 
     Id assignment and the upsert are delegated to app.services.workspace_pool_seeding, the single
-    source shared with init_db.py. ``start_num`` is unused (ids come from the repo names) and is
-    retained only for call-site stability.
+    source shared with init_db.py. When ``ordered_repos`` is given (the --provide-own-repos path),
+    ids follow list position; otherwise they are derived from the {prefix}{num} repo names.
+    ``start_num`` is unused (ids come from the repo names/positions) and is retained only for
+    call-site stability.
     """
     if not repo_id_map:
         print("\n⚠️  No repository IDs to add")
@@ -645,7 +648,11 @@ async def add_workspaces_to_firestore(
             db = get_database()
 
         entries = assign_pool_entries(
-            repo_id_map, github_org, workspace_pool, prefix=prefix
+            repo_id_map,
+            github_org,
+            workspace_pool,
+            ordered_repos=ordered_repos,
+            prefix=prefix,
         )
         result = seed_workspace_pool(db, entries, replace=True)
 
@@ -1089,11 +1096,11 @@ async def main():
         else:
             final_statuses = {}
         
-        # Step 6: Add workspaces to the active database
-        # --repos path: workspace-config JSON is written above; database seeding is done
-        # separately by init_db.py --workspace-config. add_workspaces_to_firestore
-        # extracts workspace IDs from {prefix}{num} names and cannot handle arbitrary names.
-        if not args.skip_firestore and own_repo_list is None:
+        # Step 6: Add workspaces to the active database.
+        # Both the {prefix}{num} path and the --provide-own-repos (arbitrary-name) path seed
+        # directly now: id assignment handles ordered repos via own_repo_list, so the local
+        # quickstart no longer needs a workspaces.json handoff to init_db.py.
+        if not args.skip_firestore:
             await add_workspaces_to_firestore(
                 repo_id_map,
                 github_org,
@@ -1102,9 +1109,10 @@ async def main():
                 workspace_pool,
                 firestore_project_id=gcp_cli if firestore_target_from_cli else None,
                 firestore_database_id=fsdb_cli if firestore_target_from_cli else None,
+                ordered_repos=own_repo_list,
             )
         else:
-            print("\n⏭️  Skipping Firestore workspace creation")
+            print("\n⏭️  Skipping database workspace creation (--skip-firestore)")
         
         # Print final summary
         print("\n" + "=" * 80)
diff --git a/backend/scripts/init_db.py b/backend/scripts/init_db.py
index fb11532..26adad4 100755
--- a/backend/scripts/init_db.py
+++ b/backend/scripts/init_db.py
@@ -480,23 +480,19 @@ def main():
     )
     args = parser.parse_args()
 
-    # Workspace configs MUST come from --workspace-config. There are no default repos: workspace
-    # allocation clones every repo_url (even in SKIP_MODE), so the pool can only point at repos the
-    # user controls. Refuse to run otherwise rather than seeding a broken pool.
+    # --workspace-config is optional. When provided (e2e / bring-your-own repos), it is the
+    # source of the workspace pool. When omitted (local quickstart), the pool is seeded straight
+    # into the DB by create_generation_session_repos.py, and this script only seeds the bootstrap
+    # API key + local-auth identity sentinel. There are still no hardcoded default repos.
     global WORKSPACE_CONFIGS
-    if not args.workspace_config:
+    if args.workspace_config:
+        WORKSPACE_CONFIGS = load_workspace_configs_from_file(args.workspace_config)
+        print(f"✓ Loaded {len(WORKSPACE_CONFIGS)} workspace configs from {args.workspace_config}")
+    else:
         print(
-            "ERROR: No workspace config provided. There are no default test repos.\n"
-            "       Copy the template, point it at repos you control, then pass it via "
-            "--workspace-config:\n"
-            "         cp e2e-workspace-config.example.json my-test-repos.json\n"
-            "         # edit repo_url / p10y_repository_id in my-test-repos.json\n"
-            "       Then re-run, e.g.:\n"
-            "         make skip-mode-e2e-tests E2E_WORKSPACE_CONFIG=my-test-repos.json"
+            "ℹ️  No --workspace-config provided: seeding API key + local identity only "
+            "(the workspace pool is seeded separately, straight into the database)."
         )
-        sys.exit(1)
-    WORKSPACE_CONFIGS = load_workspace_configs_from_file(args.workspace_config)
-    print(f"✓ Loaded {len(WORKSPACE_CONFIGS)} workspace configs from {args.workspace_config}")
 
     # Safety check for production
     if args.prod:
@@ -559,13 +555,17 @@ def main():
         traceback.print_exc()
         sys.exit(1)
 
-    # Initialize workspace pool
-    try:
-        initialize_workspace_pool(db, dry_run=args.dry_run, yes=args.yes, replace=args.replace)
-    except Exception as e:
-        print(f"\nERROR: Initialization failed: {e}")
-        traceback.print_exc()
-        sys.exit(1)
+    # Initialize workspace pool (only when a config file was supplied; otherwise the pool is
+    # seeded directly into the DB by create_generation_session_repos.py).
+    if WORKSPACE_CONFIGS:
+        try:
+            initialize_workspace_pool(db, dry_run=args.dry_run, yes=args.yes, replace=args.replace)
+        except Exception as e:
+            print(f"\nERROR: Initialization failed: {e}")
+            traceback.print_exc()
+            sys.exit(1)
+    else:
+        print("⏭️  Skipping workspace pool seeding (no --workspace-config).")
 
     # Attach GitHub tokens to pool-specific keys
     attach_github_tokens(dry_run=args.dry_run)
diff --git a/backend/test/scripts/test_init_db.py b/backend/test/scripts/test_init_db.py
index 75df38a..71da0ff 100644
--- a/backend/test/scripts/test_init_db.py
+++ b/backend/test/scripts/test_init_db.py
@@ -144,24 +144,37 @@ def test_workspace_pool_initialized_with_config(self, tmp_path):
 # (a2) main() requires --workspace-config: there are no default repos
 # ---------------------------------------------------------------------------
 
-class TestWorkspaceConfigRequired:
-    def test_main_without_workspace_config_exits(self, capsys):
-        """main() refuses to run (exit 1) when --workspace-config is absent."""
+class TestWorkspaceConfigOptional:
+    def test_main_without_workspace_config_seeds_identity_only(self, monkeypatch):
+        """Without --workspace-config, main() seeds the API key + local identity but NOT the
+        workspace pool (the pool is seeded directly into the DB by the provisioner)."""
+        monkeypatch.setenv("DATABASE_TYPE", "sqlite")
+        monkeypatch.delenv("FIRESTORE_EMULATOR_HOST", raising=False)
+        db = _make_db()
+
         with patch("sys.argv", ["init_db.py", "--yes"]):
-            with pytest.raises(SystemExit) as exc:
-                init_script.main()
-        assert exc.value.code == 1
-        out = capsys.readouterr().out
-        assert "No workspace config provided" in out
-        assert "e2e-workspace-config.example.json" in out
-
-    def test_main_without_workspace_config_does_not_touch_db(self):
-        """The gate fires before any database access."""
-        with patch("sys.argv", ["init_db.py"]):
-            with patch.object(init_script, "get_database") as mock_get_db:
-                with pytest.raises(SystemExit):
+            with patch.object(init_script, "get_database", return_value=db):
+                with patch.object(init_script, "attach_github_tokens"):
+                    init_script.main()  # must NOT raise SystemExit
+
+        # Local-auth sentinel seeded; workspace pool left untouched.
+        assert db.get("api_keys", LOCAL_API_KEY_DOC_ID) is not None
+        assert db.query("workspaces") == []
+
+    def test_main_without_workspace_config_skips_pool_message(self, capsys, monkeypatch):
+        """It announces that pool seeding is skipped rather than erroring out."""
+        monkeypatch.setenv("DATABASE_TYPE", "sqlite")
+        monkeypatch.delenv("FIRESTORE_EMULATOR_HOST", raising=False)
+        db = _make_db()
+
+        with patch("sys.argv", ["init_db.py", "--yes"]):
+            with patch.object(init_script, "get_database", return_value=db):
+                with patch.object(init_script, "attach_github_tokens"):
                     init_script.main()
-        mock_get_db.assert_not_called()
+
+        out = capsys.readouterr().out
+        assert "No --workspace-config provided" in out
+        assert "Skipping workspace pool seeding" in out
 
 
 # ---------------------------------------------------------------------------
diff --git a/backend/test/scripts/test_specflow_init_script.py b/backend/test/scripts/test_specflow_init_script.py
index 65c8642..15f785c 100644
--- a/backend/test/scripts/test_specflow_init_script.py
+++ b/backend/test/scripts/test_specflow_init_script.py
@@ -15,12 +15,21 @@ def _compose_text() -> str:
     return _COMPOSE.read_text()
 
 
-def test_workspace_config_generation_skips_firestore_write():
-    """The quickstart emits workspaces.json, then init_firestore.py seeds emulator state."""
+def test_quickstart_seeds_pool_directly_without_workspaces_json():
+    """The quickstart provisions repos straight into the DB — no workspaces.json handoff.
+
+    Regression guard for the flat-file removal: the provisioner must NOT be invoked with
+    --skip-firestore or --output-workspace-config, and .specflow-local/workspaces.json must
+    never be referenced.
+    """
     text = _script_text()
 
-    assert "--output-workspace-config" in text
-    assert "--skip-firestore" in text
+    assert "--output-workspace-config" not in text
+    assert "--skip-firestore" not in text
+    assert "workspaces.json" not in text
+    # init_db.py still runs (API key + identity), but without a --workspace-config file.
+    assert "uv run scripts/init_db.py" in text
+    assert "--workspace-config" not in text
 
 
 def test_docker_starts_before_workspace_config_generation():
diff --git a/specflow-init.sh b/specflow-init.sh
index 9d1db2d..d0eed01 100755
--- a/specflow-init.sh
+++ b/specflow-init.sh
@@ -9,10 +9,11 @@
 # Responsibilities:
 #   1. Load .env (user must have copied .env.quickstart.example → .env)
 #   2. Generate TOKEN_ENCRYPTION_KEY if blank (via Fernet, never echoed)
-#   3. Create .specflow-local/ and write workspaces.json, init.log, mcp-config.json
+#   3. Create .specflow-local/ and write init.log, mcp-config.json
 #   4. Start the backend stack only (backend + sqlite; NOT mcp-server profile)
 #   5. Health-gate: poll /health/ready before seeding
-#   6. Seed the local SQLite database via init_db.py
+#   6. Provision repos + seed the workspace pool straight into the SQLite database, then
+#      seed the API key + local identity via init_db.py
 #   7. Write .specflow-local/mcp-config.json (keyless IDE MCP-client snippet)
 #   8. Install the local SpecFlow CLI entry point
 #   9. Print manual-install instruction for the user
@@ -29,7 +30,6 @@ set -euo pipefail
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 LOCAL_DIR="${SCRIPT_DIR}/.specflow-local"
 LOG_FILE="${LOCAL_DIR}/init.log"
-WORKSPACES_JSON="${LOCAL_DIR}/workspaces.json"
 MCP_CONFIG_JSON="${LOCAL_DIR}/mcp-config.json"
 BACKEND_HEALTH_URL="http://localhost:8000/health/ready"
 HEALTH_RETRIES=60
@@ -228,6 +228,14 @@ log "INFO: Loaded .env from ${ENV_FILE}"
 _WORKSPACE_MOUNT_PATH="${WORKSPACE_MOUNT_PATH:-./workspaces}"
 export SPECFLOW_HOME_PATH="${SPECFLOW_HOME_MOUNT_PATH:-${HOME}/.specflow}"
 export FIRESTORE_DATABASE_NAME="${FIRESTORE_DATABASE_NAME:-specflow}"
+
+# Host-side DB target for the provisioning + seeding subshells (uv run on the host). These
+# are plain (non-exported) vars so `docker compose up` still gives the CONTAINER its own
+# SQLITE_DB_PATH from .env — host writes must go to the bind-mount SOURCE
+# (${SPECFLOW_HOME_PATH}/db/specflow.db), never the container-internal /root/.specflow path.
+_DATABASE_TYPE="${DATABASE_TYPE:-sqlite}"
+_SQLITE_DB_PATH="${SPECFLOW_HOME_PATH}/db/specflow.db"
+
 log "INFO: SpecFlow home (central SQLite db) dir: ${SPECFLOW_HOME_PATH}"
 log "INFO: Firestore database name (hosted-connect mode only): ${FIRESTORE_DATABASE_NAME}"
 
@@ -440,64 +448,56 @@ if [[ -n "${OWN_REPOS}" ]]; then
     # back portably — BSD/macOS paste errors without the trailing '-'.
     _OWN_REPO_NAMES=$(echo "${OWN_REPOS}" | tr ',' '\n' | sed 's|.*/||' | paste -sd ',' -)
     if [[ "${DRY_RUN}" == "true" ]]; then
-        info "[DRY RUN] --provide-own-repos: would run create_generation_session_repos.py --repos ${_OWN_REPO_NAMES} --github-org ${_GH_ORG} --skip-metrics --skip-firestore --output-workspace-config ${WORKSPACES_JSON}"
+        info "[DRY RUN] --provide-own-repos: would run create_generation_session_repos.py --repos ${_OWN_REPO_NAMES} --github-org ${_GH_ORG} --skip-metrics (seeds the ${_DATABASE_TYPE} workspace pool directly)"
     else
-        info "Looking up P10Y IDs for provided repos and writing ${WORKSPACES_JSON} ..."
-        log "INFO: Running create_generation_session_repos.py --repos ${_OWN_REPO_NAMES} --github-org ${_GH_ORG} --skip-metrics --skip-firestore"
+        info "Looking up P10Y IDs for provided repos and seeding the workspace pool ..."
+        log "INFO: Running create_generation_session_repos.py --repos ${_OWN_REPO_NAMES} --github-org ${_GH_ORG} --skip-metrics (DATABASE_TYPE=${_DATABASE_TYPE})"
         (
             cd "${SCRIPT_DIR}/backend"
+            DATABASE_TYPE="${_DATABASE_TYPE}" \
+            SQLITE_DB_PATH="${_SQLITE_DB_PATH}" \
+            FIRESTORE_EMULATOR_HOST="${FIRESTORE_EMULATOR_HOST:-localhost:8080}" \
             uv run python scripts/create_generation_session_repos.py \
                 --repos "${_OWN_REPO_NAMES}" \
                 --github-org "${_GH_ORG}" \
-                --skip-metrics \
-                --skip-firestore \
-                --output-workspace-config "${WORKSPACES_JSON}"
+                --skip-metrics
         ) > >(log_stream) 2> >(log_stream) \
-            || error "Failed to look up P10Y IDs for provided repos. Check ${LOG_FILE}. Ensure the repos exist in GitHub and are synced in Compass."
-        info "Workspace config written at ${WORKSPACES_JSON}."
+            || error "Failed to look up P10Y IDs / seed provided repos. Check ${LOG_FILE}. Ensure the repos exist in GitHub and are synced in Compass."
+        info "Workspace pool seeded from provided repos."
     fi
 else
     _REPO_PREFIX="${WORKSPACE_REPO_PREFIX:-specflow-workspace}"
     if [[ "${DRY_RUN}" == "true" ]]; then
-        info "[DRY RUN] Would run create_generation_session_repos.py --start ${REPO_RANGE_START} --end ${REPO_RANGE_END} --prefix ${_REPO_PREFIX} --github-org ${_GH_ORG} --output-workspace-config ${WORKSPACES_JSON} --skip-firestore (${MAX_PARALLEL_RUNS} set(s) of 3 = ${REPO_RANGE_END} repos)"
+        info "[DRY RUN] Would run create_generation_session_repos.py --start ${REPO_RANGE_START} --end ${REPO_RANGE_END} --prefix ${_REPO_PREFIX} --github-org ${_GH_ORG} (seeds the ${_DATABASE_TYPE} workspace pool directly; ${MAX_PARALLEL_RUNS} set(s) of 3 = ${REPO_RANGE_END} repos)"
     else
         info "Ensuring ${MAX_PARALLEL_RUNS} set(s) of 3 workspace repos (${REPO_RANGE_END} total) and P10Y metrics ..."
-        log "INFO: Running create_generation_session_repos.py --start ${REPO_RANGE_START} --end ${REPO_RANGE_END} --prefix ${_REPO_PREFIX} --github-org ${_GH_ORG} --output-workspace-config <path> --skip-firestore"
+        log "INFO: Running create_generation_session_repos.py --start ${REPO_RANGE_START} --end ${REPO_RANGE_END} --prefix ${_REPO_PREFIX} --github-org ${_GH_ORG} (DATABASE_TYPE=${_DATABASE_TYPE})"
         # H (fail-loud): a provisioning failure is a hard error — never fall through to an
         # empty pool reported as success.
         (
             cd "${SCRIPT_DIR}/backend"
+            DATABASE_TYPE="${_DATABASE_TYPE}" \
+            SQLITE_DB_PATH="${_SQLITE_DB_PATH}" \
+            FIRESTORE_EMULATOR_HOST="${FIRESTORE_EMULATOR_HOST:-localhost:8080}" \
             uv run python scripts/create_generation_session_repos.py \
                 --start "${REPO_RANGE_START}" \
                 --end "${REPO_RANGE_END}" \
                 --prefix "${_REPO_PREFIX}" \
-                --github-org "${_GH_ORG}" \
-                --output-workspace-config "${WORKSPACES_JSON}" \
-                --skip-firestore
+                --github-org "${_GH_ORG}"
         ) > >(log_stream) 2> >(log_stream) \
             || error "Workspace provisioning failed (create_generation_session_repos.py). Check ${LOG_FILE}."
-        info "Workspace config refreshed at ${WORKSPACES_JSON}."
+        info "Workspace pool seeded (${REPO_RANGE_END} repos)."
     fi
 fi
 
 # ---------------------------------------------------------------------------
-# Step 6: Seed the active database from durable workspace config
+# Step 6: Seed the bootstrap API key + local-auth identity sentinel.
+# The workspace pool was already seeded straight into the database by the provisioning step
+# above (no flat-file handoff), so init_db.py runs without a workspace-config file here.
 # ---------------------------------------------------------------------------
-_DATABASE_TYPE="${DATABASE_TYPE:-sqlite}"
-# Seeding runs HOST-side (uv run), so it must target the host bind-mount SOURCE
-# (${SPECFLOW_HOME_PATH}/db/specflow.db), NOT the container-internal SQLITE_DB_PATH from .env
-# (/root/.specflow/db/specflow.db) — that host path is unwritable for non-root and would seed a
-# file the container never reads. SPECFLOW_HOME_MOUNT_PATH is the single knob for both sides.
-_SQLITE_DB_PATH="${SPECFLOW_HOME_PATH}/db/specflow.db"
-
-# Guard (H): the provisioning steps above always write workspaces.json. A missing file
-# here means an anomalous failure — refuse to seed an empty pool.
-if [[ "${DRY_RUN}" != "true" && ! -f "${WORKSPACES_JSON}" ]]; then
-    error "${WORKSPACES_JSON} not found after provisioning. Refusing to seed an empty pool. Check ${LOG_FILE}."
-fi
 
 # Array form (not a string) so paths containing spaces stay a single argument.
-_SEED_FLAGS=(--workspace-config "${WORKSPACES_JSON}" --yes)
+_SEED_FLAGS=(--yes)
 if [[ "${RESET_LOCAL_DB}" == "true" ]]; then
     _SEED_FLAGS+=(--replace)
 fi
@@ -506,7 +506,7 @@ if [[ "${DRY_RUN}" == "true" ]]; then
     info "[DRY RUN] Would run: DATABASE_TYPE=${_DATABASE_TYPE} uv run scripts/init_db.py --dry-run ${_SEED_FLAGS[*]}"
     log "INFO: [DRY RUN] init_db.py --dry-run ${_SEED_FLAGS[*]}"
 else
-    info "Seeding the ${_DATABASE_TYPE} database ..."
+    info "Seeding API key + local identity into the ${_DATABASE_TYPE} database ..."
     log "INFO: Running init_db.py ${_SEED_FLAGS[*]} (DATABASE_TYPE=${_DATABASE_TYPE})"
     (
         cd "${SCRIPT_DIR}/backend"