From d92ae6b57100d65c02f6013737532e3cc520d45d Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Thu, 18 Jun 2026 20:03:55 -0500
Subject: [PATCH 01/89] feat: Hugging Face model search and thinking-capability
 detection

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 docs/configurations.md           |   2 +
 src-tauri/src/config/defaults.rs |  12 +
 src-tauri/src/lib.rs             |   2 +
 src-tauri/src/models/mod.rs      | 503 ++++++++++++++++++++++++++++++-
 4 files changed, 514 insertions(+), 5 deletions(-)

diff --git a/docs/configurations.md b/docs/configurations.md
index 6d6c0d75..3858ba33 100644
--- a/docs/configurations.md
+++ b/docs/configurations.md
@@ -190,6 +190,8 @@ The table below also lists the baked-in safety limits that govern Thuki's commun
 | `MAX_HF_API_BODY_BYTES`                     | `4 MiB`  | No       | Defense-in-depth bound on attacker-controlled data from a remote service, mirroring `MAX_OLLAMA_TAGS_BODY_BYTES`. | —      | The largest Hugging Face API response body (repo file listings) Thuki will accept while resolving a model to download. Larger responses are rejected mid-stream and the request returns an error. |
 | `HF_API_TIMEOUT_SECS`                       | `15 s`   | No       | Protocol cap on a hung remote service so the download UI cannot stall on metadata resolution; 15 s is generous for a small metadata call over the internet. | —      | How long Thuki waits for a Hugging Face API metadata call (repo file listing) to respond before giving up. Applies to resolving pasted repo ids and listing a repo's GGUF files, not to the model download itself. |
 | `HF_BASE_URL`                               | `https://huggingface.co` | No | Single origin for model metadata and downloads. Provenance comes from the pinned repo revisions in the curated starter registry, and those pins are only meaningful against the canonical Hub; an arbitrary mirror could serve different content under the same revision ids. | — | The Hugging Face origin Thuki uses for all model metadata calls and blob downloads. Every starter in the registry pins a repo at an exact revision and carries a compiled-in sha256 digest checked after download; the digest catches truncation, bit rot, and resume corruption, while the pinned revision on the canonical Hub is what fixes which content is fetched. |
+| `HF_SEARCH_LIMIT`                           | `30`     | No       | A fixed page size for the in-app model search: the most-downloaded N results cover the discovery need, and cursor pagination beyond it is out of scope until the browse UI requires it. | —      | How many GGUF model repos a single in-app Hugging Face search returns, most-downloaded first. |
+| `MAX_HF_SEARCH_QUERY_LEN`                   | `200 bytes` | No    | Defense-in-depth bound on attacker-influenced input: the query reaches the fixed Hub host (no SSRF) and is percent-encoded by the client, but an unbounded string is still rejected to cap request size. | —      | The longest search string Thuki sends to the Hugging Face model search. A longer query is rejected before any network call. |
 | `OPENAI_MODELS_TIMEOUT_SECS`                | `5 s`    | No       | Protocol cap on a hung server so the Settings model dropdown cannot stall; the OpenAI-compatible server is local or LAN-hosted in the common case, so 5 s is generous. | —      | How long Thuki waits for an OpenAI-compatible server's `/v1/models` listing to respond before giving up. Applies to the Settings model dropdown for that provider, not to chat requests. |
 | `MAX_SSE_LINE_BYTES`                        | `1 MiB`  | No       | Defense-in-depth bound on attacker-controlled stream data. A malicious or broken chat server could otherwise grow a single stream line without limit and exhaust memory. | —      | The longest single Server-Sent-Events line Thuki accepts while streaming a chat response from an OpenAI-compatible (`/v1`) server. A stream line exceeding this aborts the response with an error. |
 
diff --git a/src-tauri/src/config/defaults.rs b/src-tauri/src/config/defaults.rs
index ed0c8b30..f8dd374f 100644
--- a/src-tauri/src/config/defaults.rs
+++ b/src-tauri/src/config/defaults.rs
@@ -404,6 +404,18 @@ pub const OPENAI_MODELS_TIMEOUT_SECS: u64 = 5;
 /// the integrity guarantees that make the curated starter registry safe.
 pub const HF_BASE_URL: &str = "https://huggingface.co";
 
+/// Page size for the in-app Hugging Face GGUF model search. Baked-in: a fixed
+/// number of most-downloaded results per query is enough for the browser;
+/// cursor pagination beyond this is intentionally out of scope until the UI
+/// needs it.
+pub const HF_SEARCH_LIMIT: usize = 30;
+
+/// Maximum accepted byte length for a Hugging Face search query before it is
+/// sent upstream. Defense-in-depth bound on attacker-influenced input: the
+/// query reaches the fixed Hub host (no SSRF) and is percent-encoded by the
+/// client, but an unbounded string is still rejected to cap request size.
+pub const MAX_HF_SEARCH_QUERY_LEN: usize = 200;
+
 /// Maximum accepted byte length for a model slug passed to `set_active_model`.
 /// Real Ollama slugs are a handful of characters; 256 is generous while still
 /// capping adversarial inputs long before any network or database work.
diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs
index 57f725fe..a20588e6 100644
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -2259,6 +2259,8 @@ pub fn run() {
             #[cfg(not(coverage))]
             models::list_hf_repo_ggufs,
             #[cfg(not(coverage))]
+            models::search_hf_models,
+            #[cfg(not(coverage))]
             models::list_openai_models,
             #[cfg(not(coverage))]
             models::cancel_model_download,
diff --git a/src-tauri/src/models/mod.rs b/src-tauri/src/models/mod.rs
index 316251bd..29c21c56 100644
--- a/src-tauri/src/models/mod.rs
+++ b/src-tauri/src/models/mod.rs
@@ -29,9 +29,10 @@ use tauri::Manager;
 
 use crate::config::defaults::{
     DEFAULT_OLLAMA_SHOW_REQUEST_TIMEOUT_SECS, DEFAULT_OLLAMA_TAGS_REQUEST_TIMEOUT_SECS,
-    HF_API_TIMEOUT_SECS, HF_BASE_URL, MAX_HF_API_BODY_BYTES, MAX_MODEL_SLUG_LEN,
-    MAX_OLLAMA_SHOW_BODY_BYTES, MAX_OLLAMA_TAGS_BODY_BYTES, OPENAI_MODELS_TIMEOUT_SECS,
-    PROVIDER_ID_BUILTIN, PROVIDER_KIND_BUILTIN, PROVIDER_KIND_OLLAMA, PROVIDER_KIND_OPENAI,
+    HF_API_TIMEOUT_SECS, HF_BASE_URL, HF_SEARCH_LIMIT, MAX_HF_API_BODY_BYTES,
+    MAX_HF_SEARCH_QUERY_LEN, MAX_MODEL_SLUG_LEN, MAX_OLLAMA_SHOW_BODY_BYTES,
+    MAX_OLLAMA_TAGS_BODY_BYTES, OPENAI_MODELS_TIMEOUT_SECS, PROVIDER_ID_BUILTIN,
+    PROVIDER_KIND_BUILTIN, PROVIDER_KIND_OLLAMA, PROVIDER_KIND_OPENAI,
 };
 use crate::config::AppConfig;
 
@@ -1252,6 +1253,35 @@ pub fn quant_from_filename(file: &str) -> String {
         .unwrap_or_default()
 }
 
+/// Marker substrings that flag a GGUF model as emitting explicit reasoning
+/// tokens (rendered in the ThinkingBlock UI). There is no machine-readable
+/// thinking signal in GGUF metadata or the Hugging Face API, so detection reads
+/// the publisher's own naming: an explicit reasoning self-label
+/// (`thinking`/`reasoning`/`reasoner`) or a known reasoning-first family. The
+/// list is kept narrow to avoid false positives; curated starters set the flag
+/// explicitly in the registry and never consult it, and a user override is the
+/// authority whenever the guess is wrong.
+const THINKING_MARKERS: &[&str] = &[
+    "thinking",
+    "reasoning",
+    "reasoner",
+    "deepseek-r1",
+    "qwq",
+    "gpt-oss",
+    "magistral",
+];
+
+/// Best-effort detection of whether an arbitrary GGUF model is a reasoning
+/// model, matching [`THINKING_MARKERS`] case-insensitively against both the
+/// repo id and the file name. Returns `false` when nothing matches.
+pub fn detect_thinking(repo: &str, file: &str) -> bool {
+    let repo = repo.to_ascii_lowercase();
+    let file = file.to_ascii_lowercase();
+    THINKING_MARKERS
+        .iter()
+        .any(|marker| repo.contains(marker) || file.contains(marker))
+}
+
 /// A `.gguf` entry in a Hugging Face repo listing, for the paste-a-repo UI.
 #[derive(Debug, Clone, PartialEq, Serialize)]
 pub struct HfGgufFile {
@@ -1323,7 +1353,8 @@ pub struct MmprojCompanion {
 
 /// Pure parse of an HF repo listing into the spec for one target `file`.
 /// Capability rule for pasted repos: vision = an `mmproj*.gguf` sibling with
-/// complete LFS metadata exists; thinking = false (full detection is not yet implemented).
+/// complete LFS metadata exists; thinking is derived from the model name by
+/// [`detect_thinking`] when the row is recorded in [`repo_installed_model`].
 pub fn resolve_listing(body: &[u8], file: &str) -> Result<RepoResolved, String> {
     let info: HfRepoInfo = serde_json::from_slice(body)
         .map_err(|e| format!("failed to decode Hugging Face API response: {e}"))?;
@@ -1481,6 +1512,161 @@ pub async fn fetch_repo_gguf_listing(
     parse_gguf_listing(&body)
 }
 
+// ─── Hugging Face model search ───────────────────────────────────────────────
+
+/// One repo row from a Hugging Face model search, trimmed to the fields the
+/// in-app browser needs to identify, rank, and gate a model.
+#[derive(Debug, Clone, PartialEq, Serialize)]
+pub struct HfModelSummary {
+    /// Repo id, e.g. `unsloth/Qwen3.5-9B-GGUF`; the install target.
+    pub id: String,
+    /// Lifetime download count. The search is sorted by it and the UI shows it
+    /// as a trust signal; `0` when the API omits the field.
+    pub downloads: u64,
+    /// True when the repo is access-gated (license click-through or manual
+    /// approval). Gated repos cannot be fetched anonymously, so the UI can flag
+    /// them instead of offering a download that would fail.
+    pub gated: bool,
+}
+
+/// One entry in the Hugging Face `/api/models` search response. Only the fields
+/// surfaced by [`HfModelSummary`] are decoded; everything else is ignored so
+/// upstream additions cannot break decoding.
+#[derive(Deserialize)]
+struct HfSearchEntry {
+    #[serde(default)]
+    id: String,
+    #[serde(default)]
+    downloads: u64,
+    /// HF reports `gated` as `false` or a strategy string (`"auto"`/`"manual"`);
+    /// [`deserialize_gated`] normalizes it to a bool. Absent on some rows, so it
+    /// defaults to `false`.
+    #[serde(default, deserialize_with = "deserialize_gated")]
+    gated: bool,
+}
+
+/// Normalizes Hugging Face's polymorphic `gated` field (a bool `false` or a
+/// strategy string like `"manual"`) into a plain bool: any string means gated,
+/// `true` means gated, everything else (including `null`) means not gated.
+fn deserialize_gated<'de, D>(deserializer: D) -> Result<bool, D::Error>
+where
+    D: serde::Deserializer<'de>,
+{
+    Ok(match serde_json::Value::deserialize(deserializer)? {
+        serde_json::Value::Bool(b) => b,
+        serde_json::Value::String(_) => true,
+        _ => false,
+    })
+}
+
+/// Pure parse of an `/api/models` search body into summary rows. Rows with an
+/// empty `id` are dropped rather than surfaced as un-installable blanks.
+pub fn parse_search_results(body: &[u8]) -> Result<Vec<HfModelSummary>, String> {
+    let entries: Vec<HfSearchEntry> = serde_json::from_slice(body)
+        .map_err(|e| format!("failed to decode Hugging Face search response: {e}"))?;
+    Ok(entries
+        .into_iter()
+        .filter(|e| !e.id.is_empty())
+        .map(|e| HfModelSummary {
+            id: e.id,
+            downloads: e.downloads,
+            gated: e.gated,
+        })
+        .collect())
+}
+
+/// Validates the query length, runs the Hugging Face GGUF model search against
+/// `base_url`, and parses the result. `base_url` is parameterized so tests
+/// point at a mock server; production passes [`HF_BASE_URL`].
+pub async fn fetch_hf_search(
+    client: &reqwest::Client,
+    base_url: &str,
+    query: &str,
+) -> Result<Vec<HfModelSummary>, String> {
+    let query = query.trim();
+    if query.len() > MAX_HF_SEARCH_QUERY_LEN {
+        return Err(format!(
+            "search query exceeds maximum length of {MAX_HF_SEARCH_QUERY_LEN} bytes"
+        ));
+    }
+    let body = fetch_hf_search_inner(
+        client,
+        base_url,
+        query,
+        std::time::Duration::from_secs(HF_API_TIMEOUT_SECS),
+        MAX_HF_API_BODY_BYTES,
+        HF_SEARCH_LIMIT,
+    )
+    .await?;
+    parse_search_results(&body)
+}
+
+/// Innermost search fetcher with timeout, body cap, and result limit
+/// configurable so the cap branches are testable. Every query parameter is
+/// percent-encoded by `Url::parse_with_params` (no manual string building) so a
+/// query cannot smuggle URL syntax, and the host stays fixed to `base_url` so
+/// there is no SSRF surface. The body cap is enforced incrementally during the
+/// streaming read, mirroring [`fetch_hf_repo_listing_inner`].
+async fn fetch_hf_search_inner(
+    client: &reqwest::Client,
+    base_url: &str,
+    query: &str,
+    timeout: std::time::Duration,
+    max_body_bytes: usize,
+    limit: usize,
+) -> Result<Vec<u8>, String> {
+    let endpoint = format!("{}/api/models", base_url.trim_end_matches('/'));
+    let limit = limit.to_string();
+    let mut params: Vec<(&str, &str)> = vec![
+        ("library", "gguf"),
+        ("sort", "downloads"),
+        ("direction", "-1"),
+        ("limit", &limit),
+    ];
+    // An empty query browses the most-downloaded GGUF repos; only attach the
+    // search term when the user actually typed one.
+    if !query.is_empty() {
+        params.push(("search", query));
+    }
+    let url = reqwest::Url::parse_with_params(&endpoint, params)
+        .map_err(|e| format!("failed to build Hugging Face search URL: {e}"))?;
+    let response = client
+        .get(url)
+        .timeout(timeout)
+        .send()
+        .await
+        .map_err(|e| format!("failed to reach Hugging Face: {e}"))?;
+
+    if !response.status().is_success() {
+        return Err(format!(
+            "Hugging Face API returned HTTP {}",
+            response.status().as_u16()
+        ));
+    }
+
+    if let Some(declared_len) = response.content_length() {
+        if declared_len as usize > max_body_bytes {
+            return Err(format!(
+                "Hugging Face search response exceeded {max_body_bytes} bytes"
+            ));
+        }
+    }
+
+    let mut stream = response.bytes_stream();
+    let mut buf: Vec<u8> = Vec::new();
+    while let Some(chunk) = stream.next().await {
+        let chunk = chunk.map_err(|e| format!("failed to read Hugging Face search body: {e}"))?;
+        if buf.len() + chunk.len() > max_body_bytes {
+            return Err(format!(
+                "Hugging Face search response exceeded {max_body_bytes} bytes"
+            ));
+        }
+        buf.extend_from_slice(&chunk);
+    }
+
+    Ok(buf)
+}
+
 // ─── OpenAI-compatible model listing ─────────────────────────────────────────
 
 /// Subset of an OpenAI-compatible `/v1/models` response Thuki consumes.
@@ -1643,7 +1829,7 @@ pub fn repo_installed_model(
         size_bytes: resolved.weights_size_bytes,
         quant: quant_from_filename(file),
         vision: resolved.mmproj.is_some(),
-        thinking: false,
+        thinking: detect_thinking(repo, file),
         mmproj_file: resolved.mmproj.as_ref().map(|m| m.file.clone()),
         mmproj_sha256: resolved.mmproj.as_ref().map(|m| m.sha256.clone()),
     }
@@ -1820,6 +2006,18 @@ pub async fn list_hf_repo_ggufs(
     fetch_repo_gguf_listing(&client, HF_BASE_URL, &repo).await
 }
 
+/// Searches Hugging Face for GGUF model repos matching `query`, most-downloaded
+/// first. Backs the in-app model browser; an empty query returns the most
+/// popular GGUF repos.
+#[cfg_attr(coverage_nightly, coverage(off))]
+#[cfg_attr(not(coverage), tauri::command)]
+pub async fn search_hf_models(
+    query: String,
+    client: tauri::State<'_, reqwest::Client>,
+) -> Result<Vec<HfModelSummary>, String> {
+    fetch_hf_search(&client, HF_BASE_URL, &query).await
+}
+
 /// Lists the models served by the configured OpenAI-compatible provider via
 /// its `/v1/models` endpoint, using the Keychain API key when one is stored.
 #[cfg_attr(coverage_nightly, coverage(off))]
@@ -4197,6 +4395,259 @@ mod tests {
         assert_eq!(files[0].file, "model-Q4_K_M.gguf");
     }
 
+    // ── Model library: Hugging Face search ───────────────────────────────────
+
+    /// Search fixture exercising every `gated` shape (bool, strategy string,
+    /// absent, null) plus an empty-id row that must be dropped.
+    fn search_fixture() -> serde_json::Value {
+        serde_json::json!([
+            {"id": "org/alpha-GGUF", "downloads": 1000, "gated": false},
+            {"id": "org/beta-GGUF", "downloads": 500, "gated": "manual"},
+            {"id": "org/gamma-GGUF"},
+            {"id": "org/delta-GGUF", "downloads": 1, "gated": true},
+            {"id": "org/epsilon-GGUF", "downloads": 2, "gated": null},
+            {"id": "", "downloads": 9}
+        ])
+    }
+
+    #[test]
+    fn parse_search_results_maps_rows_and_normalizes_gated() {
+        let body = search_fixture().to_string();
+        let rows = parse_search_results(body.as_bytes()).unwrap();
+        assert_eq!(
+            rows,
+            vec![
+                HfModelSummary {
+                    id: "org/alpha-GGUF".to_string(),
+                    downloads: 1000,
+                    gated: false,
+                },
+                HfModelSummary {
+                    id: "org/beta-GGUF".to_string(),
+                    downloads: 500,
+                    gated: true,
+                },
+                HfModelSummary {
+                    id: "org/gamma-GGUF".to_string(),
+                    downloads: 0,
+                    gated: false,
+                },
+                HfModelSummary {
+                    id: "org/delta-GGUF".to_string(),
+                    downloads: 1,
+                    gated: true,
+                },
+                HfModelSummary {
+                    id: "org/epsilon-GGUF".to_string(),
+                    downloads: 2,
+                    gated: false,
+                },
+            ]
+        );
+    }
+
+    #[test]
+    fn parse_search_results_rejects_invalid_json() {
+        let err = parse_search_results(b"not json").unwrap_err();
+        assert!(err.contains("failed to decode"), "got: {err}");
+    }
+
+    #[test]
+    fn hf_model_summary_serializes_snake_case() {
+        let v = serde_json::to_value(HfModelSummary {
+            id: "o/r".to_string(),
+            downloads: 7,
+            gated: true,
+        })
+        .unwrap();
+        assert_eq!(
+            v,
+            serde_json::json!({"id": "o/r", "downloads": 7, "gated": true})
+        );
+    }
+
+    #[tokio::test]
+    async fn fetch_hf_search_returns_rows_and_sends_filtered_query() {
+        let mut server = mockito::Server::new_async().await;
+        let mock = server
+            .mock("GET", "/api/models")
+            .match_query(mockito::Matcher::AllOf(vec![
+                mockito::Matcher::UrlEncoded("library".into(), "gguf".into()),
+                mockito::Matcher::UrlEncoded("search".into(), "qwen".into()),
+                mockito::Matcher::UrlEncoded("sort".into(), "downloads".into()),
+            ]))
+            .with_status(200)
+            .with_header("content-type", "application/json")
+            .with_body(search_fixture().to_string())
+            .create_async()
+            .await;
+        let client = reqwest::Client::new();
+        let rows = fetch_hf_search(&client, &server.url(), "qwen")
+            .await
+            .unwrap();
+        mock.assert_async().await;
+        assert_eq!(rows.len(), 5);
+        assert_eq!(rows[0].id, "org/alpha-GGUF");
+    }
+
+    #[tokio::test]
+    async fn fetch_hf_search_omits_blank_query() {
+        let mut server = mockito::Server::new_async().await;
+        let _m = server
+            .mock("GET", "/api/models")
+            .match_query(mockito::Matcher::Any)
+            .with_status(200)
+            .with_body("[]")
+            .create_async()
+            .await;
+        let client = reqwest::Client::new();
+        // Whitespace-only query trims to empty and the search param is dropped.
+        let rows = fetch_hf_search(&client, &server.url(), "   ")
+            .await
+            .unwrap();
+        assert!(rows.is_empty());
+    }
+
+    #[tokio::test]
+    async fn fetch_hf_search_maps_http_error() {
+        let mut server = mockito::Server::new_async().await;
+        let _m = server
+            .mock("GET", "/api/models")
+            .match_query(mockito::Matcher::Any)
+            .with_status(503)
+            .create_async()
+            .await;
+        let client = reqwest::Client::new();
+        let err = fetch_hf_search(&client, &server.url(), "q")
+            .await
+            .unwrap_err();
+        assert!(err.contains("503"), "got: {err}");
+    }
+
+    #[tokio::test]
+    async fn fetch_hf_search_maps_transport_error() {
+        let client = reqwest::Client::new();
+        let err = fetch_hf_search(&client, "http://127.0.0.1:1", "q")
+            .await
+            .unwrap_err();
+        assert!(err.contains("failed to reach Hugging Face"), "got: {err}");
+    }
+
+    #[tokio::test]
+    async fn fetch_hf_search_rejects_overlong_query() {
+        let client = reqwest::Client::new();
+        let long = "x".repeat(crate::config::defaults::MAX_HF_SEARCH_QUERY_LEN + 1);
+        let err = fetch_hf_search(&client, "http://127.0.0.1:9", &long)
+            .await
+            .unwrap_err();
+        assert!(err.contains("maximum length"), "got: {err}");
+    }
+
+    #[tokio::test]
+    async fn fetch_hf_search_inner_rejects_body_over_cap_via_content_length() {
+        let mut server = mockito::Server::new_async().await;
+        let _m = server
+            .mock("GET", "/api/models")
+            .match_query(mockito::Matcher::Any)
+            .with_status(200)
+            .with_body("x".repeat(100))
+            .create_async()
+            .await;
+        let client = reqwest::Client::new();
+        let err = fetch_hf_search_inner(
+            &client,
+            &server.url(),
+            "q",
+            std::time::Duration::from_secs(5),
+            32,
+            30,
+        )
+        .await
+        .unwrap_err();
+        assert!(err.contains("exceeded"), "got: {err}");
+    }
+
+    #[tokio::test]
+    async fn fetch_hf_search_inner_rejects_body_over_cap_when_chunked() {
+        // Chunked response (no Content-Length): the incremental cap must reject.
+        let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
+        let addr = listener.local_addr().unwrap();
+        std::thread::spawn(move || {
+            let (mut conn, _) = listener.accept().unwrap();
+            use std::io::{Read, Write};
+            let mut request_buf = [0u8; 1024];
+            let _ = conn.read(&mut request_buf);
+            let _ = conn.write_all(
+                b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n\
+                  0a\r\n0123456789\r\n\
+                  0a\r\n0123456789\r\n\
+                  0a\r\n0123456789\r\n\
+                  0\r\n\r\n",
+            );
+        });
+        let client = reqwest::Client::new();
+        let base = format!("http://{addr}");
+        let err = fetch_hf_search_inner(
+            &client,
+            &base,
+            "q",
+            std::time::Duration::from_secs(5),
+            20,
+            30,
+        )
+        .await
+        .unwrap_err();
+        assert!(err.contains("exceeded"), "got: {err}");
+    }
+
+    #[tokio::test]
+    async fn fetch_hf_search_inner_maps_body_read_error() {
+        // Headers promise 100 body bytes, then the server hangs up.
+        let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
+        let addr = listener.local_addr().unwrap();
+        std::thread::spawn(move || {
+            let (mut stream, _) = listener.accept().unwrap();
+            use std::io::{Read, Write};
+            let mut buf = [0u8; 1024];
+            let _ = stream.read(&mut buf);
+            let _ = stream.write_all(
+                b"HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: 100\r\nConnection: close\r\n\r\n",
+            );
+        });
+        let client = reqwest::Client::new();
+        let base = format!("http://{addr}");
+        let err = fetch_hf_search_inner(
+            &client,
+            &base,
+            "q",
+            std::time::Duration::from_secs(5),
+            4 * 1024 * 1024,
+            30,
+        )
+        .await
+        .unwrap_err();
+        assert!(
+            err.contains("failed to read Hugging Face search body"),
+            "got: {err}"
+        );
+    }
+
+    #[tokio::test]
+    async fn fetch_hf_search_inner_rejects_unparseable_base_url() {
+        let client = reqwest::Client::new();
+        let err = fetch_hf_search_inner(
+            &client,
+            "not a url",
+            "q",
+            std::time::Duration::from_secs(5),
+            4 * 1024 * 1024,
+            30,
+        )
+        .await
+        .unwrap_err();
+        assert!(err.contains("failed to build"), "got: {err}");
+    }
+
     // ── Model library: repo spec/model mapping ───────────────────────────────
 
     fn sample_resolved(with_mmproj: bool) -> RepoResolved {
@@ -4266,6 +4717,48 @@ mod tests {
         assert_eq!(m.mmproj_sha256, None);
     }
 
+    // ── Capability detection: thinking heuristic ─────────────────────────────
+
+    #[test]
+    fn detect_thinking_matches_reasoning_self_labels() {
+        // A repo or file whose own name advertises reasoning.
+        assert!(detect_thinking("acme/Model-Thinking", "model.gguf"));
+        assert!(detect_thinking("acme/model", "model-reasoning-Q4_K_M.gguf"));
+        assert!(detect_thinking("acme/reasoner-7b", "w.gguf"));
+    }
+
+    #[test]
+    fn detect_thinking_matches_known_reasoning_families() {
+        assert!(detect_thinking("deepseek-ai/DeepSeek-R1-GGUF", "x.gguf"));
+        assert!(detect_thinking("org/QwQ-32B-GGUF", "x.gguf"));
+        assert!(detect_thinking("ggml-org/gpt-oss-20b-GGUF", "x.gguf"));
+        assert!(detect_thinking("mistralai/Magistral-Small-GGUF", "x.gguf"));
+    }
+
+    #[test]
+    fn detect_thinking_is_case_insensitive() {
+        assert!(detect_thinking("ORG/GPT-OSS-20B", "MODEL.GGUF"));
+    }
+
+    #[test]
+    fn detect_thinking_defaults_false_without_markers() {
+        assert!(!detect_thinking(
+            "google/gemma-4-12b-it",
+            "gemma-4-12b-it-Q4_K_M.gguf"
+        ));
+        assert!(!detect_thinking("o/r", "w-Q4_K_M.gguf"));
+    }
+
+    #[test]
+    fn repo_installed_model_flags_thinking_from_name() {
+        let m = repo_installed_model(
+            "ggml-org/gpt-oss-20b-GGUF",
+            "gpt-oss-20b-Q4_K_M.gguf",
+            &sample_resolved(false),
+        );
+        assert!(m.thinking);
+    }
+
     // ── Model library: delete ────────────────────────────────────────────────
 
     #[test]

From da15b51138318770c1deb2f847475e258c5365e5 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 01:18:18 -0500
Subject: [PATCH 02/89] feat: restructure Settings to a premium left sidebar
 with a running-model footer

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/lib.rs                          |   2 +-
 src-tauri/tauri.conf.json                     |   6 +-
 src/settings/SettingsWindow.test.tsx          | 152 +++++++--
 src/settings/SettingsWindow.tsx               | 178 ++++++-----
 .../components/RunningModelFooter.test.tsx    | 289 ++++++++++++++++++
 .../components/RunningModelFooter.tsx         | 113 +++++++
 .../hooks/useSettingsAutoResize.test.ts       |   4 +-
 src/settings/hooks/useSettingsAutoResize.ts   |  18 +-
 src/styles/settings.module.css                | 226 ++++++++++----
 9 files changed, 820 insertions(+), 168 deletions(-)
 create mode 100644 src/settings/components/RunningModelFooter.test.tsx
 create mode 100644 src/settings/components/RunningModelFooter.tsx

diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs
index a20588e6..8295f7b4 100644
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -595,7 +595,7 @@ fn show_overlay(app_handle: &tauri::AppHandle, ctx: crate::context::ActivationCo
 /// the OS-default spawn position or previous moves.
 #[cfg_attr(coverage_nightly, coverage(off))]
 fn position_settings_window(window: &tauri::WebviewWindow) {
-    const SETTINGS_WIDTH: f64 = 580.0;
+    const SETTINGS_WIDTH: f64 = 760.0;
     // macOS menu bar is ~24 px logical on standard displays; notched MacBooks
     // push it to ~37 px. 72 px gives a comfortable ~35-48 px visual gap below
     // the menu bar on all hardware.
diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json
index 9cda66e8..f37deb67 100644
--- a/src-tauri/tauri.conf.json
+++ b/src-tauri/tauri.conf.json
@@ -28,11 +28,11 @@
         "label": "settings",
         "title": "Thuki Settings",
         "url": "index.html#/settings",
-        "width": 580,
+        "width": 760,
         "height": 520,
-        "minWidth": 580,
+        "minWidth": 760,
         "minHeight": 280,
-        "maxWidth": 580,
+        "maxWidth": 760,
         "maxHeight": 700,
         "resizable": false,
         "fullscreen": false,
diff --git a/src/settings/SettingsWindow.test.tsx b/src/settings/SettingsWindow.test.tsx
index 93d2c44a..c092bccb 100644
--- a/src/settings/SettingsWindow.test.tsx
+++ b/src/settings/SettingsWindow.test.tsx
@@ -85,6 +85,14 @@ function defaultInvoke(cmd: string): unknown {
       return true;
     case 'check_screen_recording_permission':
       return true;
+    case 'get_model_picker_state':
+      return { active: null, all: [], displayNames: {}, ollamaReachable: true };
+    case 'list_installed_models':
+      return [];
+    case 'get_engine_status':
+      return { state: 'stopped', model_path: '', port: null, error: null };
+    case 'get_loaded_model':
+      return null;
     case 'get_updater_state':
       return {
         last_check_at_unix: null,
@@ -116,7 +124,7 @@ describe('SettingsWindow', () => {
   it('renders the five tab labels after config loads', async () => {
     render(<SettingsWindow />);
     await waitFor(() =>
-      expect(screen.getByRole('tab', { name: /AI/ })).toBeInTheDocument(),
+      expect(screen.getByRole('tab', { name: /Models/ })).toBeInTheDocument(),
     );
     expect(screen.getByRole('tab', { name: /Behavior/ })).toBeInTheDocument();
     expect(screen.getByRole('tab', { name: /Web/ })).toBeInTheDocument();
@@ -141,10 +149,10 @@ describe('SettingsWindow', () => {
     ).toBeInTheDocument();
   });
 
-  it('starts on the AI tab', async () => {
+  it('starts on the Models tab', async () => {
     render(<SettingsWindow />);
     await waitFor(() =>
-      expect(screen.getByRole('tab', { name: /AI/ })).toHaveAttribute(
+      expect(screen.getByRole('tab', { name: /Models/ })).toHaveAttribute(
         'aria-selected',
         'true',
       ),
@@ -171,7 +179,7 @@ describe('SettingsWindow', () => {
       .spyOn(globalThis, 'requestAnimationFrame')
       .mockImplementation(() => 0);
     const { container } = render(<SettingsWindow />);
-    await waitFor(() => screen.getByRole('tab', { name: /AI/ }));
+    await waitFor(() => screen.getByRole('tab', { name: /Models/ }));
     const body = container.querySelector('[role="tabpanel"]')!;
     expect(body.className).not.toMatch(/bodyScrollable/);
 
@@ -191,9 +199,9 @@ describe('SettingsWindow', () => {
 
   it('ArrowRight rotates focus to the next tab', async () => {
     render(<SettingsWindow />);
-    await waitFor(() => screen.getByRole('tab', { name: /AI/ }));
+    await waitFor(() => screen.getByRole('tab', { name: /Models/ }));
 
-    const modelTab = screen.getByRole('tab', { name: /AI/ });
+    const modelTab = screen.getByRole('tab', { name: /Models/ });
     fireEvent.keyDown(modelTab, { key: 'ArrowRight' });
     expect(screen.getByRole('tab', { name: /Behavior/ })).toHaveAttribute(
       'aria-selected',
@@ -203,9 +211,9 @@ describe('SettingsWindow', () => {
 
   it('ArrowLeft wraps to the last tab when starting on the first', async () => {
     render(<SettingsWindow />);
-    await waitFor(() => screen.getByRole('tab', { name: /AI/ }));
+    await waitFor(() => screen.getByRole('tab', { name: /Models/ }));
 
-    const modelTab = screen.getByRole('tab', { name: /AI/ });
+    const modelTab = screen.getByRole('tab', { name: /Models/ });
     await act(async () => {
       fireEvent.keyDown(modelTab, { key: 'ArrowLeft' });
       await Promise.resolve();
@@ -219,9 +227,9 @@ describe('SettingsWindow', () => {
 
   it('non-arrow keys are ignored by the tab key handler', async () => {
     render(<SettingsWindow />);
-    await waitFor(() => screen.getByRole('tab', { name: /AI/ }));
+    await waitFor(() => screen.getByRole('tab', { name: /Models/ }));
 
-    const modelTab = screen.getByRole('tab', { name: /AI/ });
+    const modelTab = screen.getByRole('tab', { name: /Models/ });
     fireEvent.keyDown(modelTab, { key: 'Enter' });
     expect(modelTab).toHaveAttribute('aria-selected', 'true');
   });
@@ -276,7 +284,7 @@ describe('SettingsWindow', () => {
 
   it('Cmd+, on the document re-focuses the settings window', async () => {
     render(<SettingsWindow />);
-    await waitFor(() => screen.getByRole('tab', { name: /AI/ }));
+    await waitFor(() => screen.getByRole('tab', { name: /Models/ }));
 
     __mockWindow.setFocus.mockClear();
     fireEvent.keyDown(document, { key: ',', metaKey: true });
@@ -285,7 +293,7 @@ describe('SettingsWindow', () => {
 
   it('Other keystrokes do not trigger setFocus', async () => {
     render(<SettingsWindow />);
-    await waitFor(() => screen.getByRole('tab', { name: /AI/ }));
+    await waitFor(() => screen.getByRole('tab', { name: /Models/ }));
 
     __mockWindow.setFocus.mockClear();
     fireEvent.keyDown(document, { key: ',' }); // no Meta
@@ -295,7 +303,7 @@ describe('SettingsWindow', () => {
 
   it('Cmd+W on the document hides the settings window', async () => {
     render(<SettingsWindow />);
-    await waitFor(() => screen.getByRole('tab', { name: /AI/ }));
+    await waitFor(() => screen.getByRole('tab', { name: /Models/ }));
 
     __mockWindow.hide.mockClear();
     fireEvent.keyDown(document, { key: 'w', metaKey: true });
@@ -304,7 +312,7 @@ describe('SettingsWindow', () => {
 
   it('the close button hides the window instead of quitting', async () => {
     render(<SettingsWindow />);
-    await waitFor(() => screen.getByRole('tab', { name: /AI/ }));
+    await waitFor(() => screen.getByRole('tab', { name: /Models/ }));
     __mockWindow.hide.mockClear();
     fireEvent.click(screen.getByRole('button', { name: /Close/ }));
     expect(__mockWindow.hide).toHaveBeenCalled();
@@ -312,11 +320,11 @@ describe('SettingsWindow', () => {
 
   it('mousedown on the chrome triggers startDragging when not on an interactive element', async () => {
     render(<SettingsWindow />);
-    await waitFor(() => screen.getByRole('tab', { name: /AI/ }));
+    await waitFor(() => screen.getByRole('tab', { name: /Models/ }));
     __mockWindow.startDragging.mockClear();
     // Click on the body container itself (not on a button/input).
     const root = screen
-      .getByRole('tab', { name: /AI/ })
+      .getByRole('tab', { name: /Models/ })
       .closest('[role="tablist"]')!.parentElement!;
     fireEvent.mouseDown(root, { target: root });
     // The root is a div; not in INTERACTIVE_TAGS, so dragging fires.
@@ -325,9 +333,9 @@ describe('SettingsWindow', () => {
 
   it('mousedown that originates from an interactive element does NOT trigger drag', async () => {
     render(<SettingsWindow />);
-    await waitFor(() => screen.getByRole('tab', { name: /AI/ }));
+    await waitFor(() => screen.getByRole('tab', { name: /Models/ }));
     __mockWindow.startDragging.mockClear();
-    fireEvent.mouseDown(screen.getByRole('tab', { name: /AI/ }));
+    fireEvent.mouseDown(screen.getByRole('tab', { name: /Models/ }));
     expect(__mockWindow.startDragging).not.toHaveBeenCalled();
   });
 
@@ -349,10 +357,10 @@ describe('SettingsWindow', () => {
 
   it('mousedown with a non-primary button is ignored (no drag, lets context menus through)', async () => {
     render(<SettingsWindow />);
-    await waitFor(() => screen.getByRole('tab', { name: /AI/ }));
+    await waitFor(() => screen.getByRole('tab', { name: /Models/ }));
     __mockWindow.startDragging.mockClear();
     const root = screen
-      .getByRole('tab', { name: /AI/ })
+      .getByRole('tab', { name: /Models/ })
       .closest('[role="tablist"]')!.parentElement!;
     fireEvent.mouseDown(root, { target: root, button: 2 });
     expect(__mockWindow.startDragging).not.toHaveBeenCalled();
@@ -396,7 +404,7 @@ describe('SettingsWindow', () => {
       await Promise.resolve();
       await Promise.resolve();
     });
-    expect(screen.getByRole('status')).toHaveTextContent('Saved');
+    expect(screen.getByText('✓ Saved')).toHaveTextContent('Saved');
 
     // Second save before pill auto-hides — clearTimeout(savedTimerRef.current) fires.
     fireEvent.click(incBtns()[0]);
@@ -406,7 +414,7 @@ describe('SettingsWindow', () => {
       await Promise.resolve();
       await Promise.resolve();
     });
-    expect(screen.getByRole('status')).toHaveTextContent('Saved');
+    expect(screen.getByText('✓ Saved')).toHaveTextContent('Saved');
   });
 
   it('unmount with the savedPill timer still pending clears it cleanly', async () => {
@@ -460,7 +468,7 @@ describe('SettingsWindow', () => {
       await Promise.resolve();
     });
 
-    expect(screen.getByRole('status')).toHaveTextContent('Saved');
+    expect(screen.getByText('✓ Saved')).toHaveTextContent('Saved');
 
     // After SAVED_PILL_DURATION_MS the pill toggles back to invisible. We
     // don't assert on that visibility here because the underlying class
@@ -484,7 +492,7 @@ describe('SettingsWindow', () => {
       return defaultInvoke(cmd);
     });
     render(<SettingsWindow />);
-    await waitFor(() => screen.getByRole('tab', { name: /AI/ }));
+    await waitFor(() => screen.getByRole('tab', { name: /Models/ }));
     await waitFor(() =>
       expect(screen.getByText(/0\.8\.0 is ready/)).toBeInTheDocument(),
     );
@@ -545,7 +553,7 @@ describe('SettingsWindow', () => {
       return defaultInvoke(cmd);
     });
     render(<SettingsWindow />);
-    await waitFor(() => screen.getByRole('tab', { name: /AI/ }));
+    await waitFor(() => screen.getByRole('tab', { name: /Models/ }));
     // Allow time for updater state to load
     await act(async () => {
       await Promise.resolve();
@@ -554,3 +562,97 @@ describe('SettingsWindow', () => {
     expect(screen.queryByText(/0\.8\.0 is ready/)).not.toBeInTheDocument();
   });
 });
+
+describe('SettingsWindow left sidebar (Phase 3)', () => {
+  it('renders the section nav as a vertical sidebar', async () => {
+    render(<SettingsWindow />);
+    await waitFor(() => screen.getByRole('tab', { name: /Models/ }));
+    expect(screen.getByRole('tablist')).toHaveAttribute(
+      'aria-orientation',
+      'vertical',
+    );
+  });
+
+  it('renders Models as the first section label', async () => {
+    render(<SettingsWindow />);
+    await waitFor(() =>
+      expect(screen.getByRole('tab', { name: /Models/ })).toBeInTheDocument(),
+    );
+  });
+
+  it('ArrowDown rotates focus to the next sidebar section', async () => {
+    render(<SettingsWindow />);
+    await waitFor(() => screen.getByRole('tab', { name: /Models/ }));
+    fireEvent.keyDown(screen.getByRole('tab', { name: /Models/ }), {
+      key: 'ArrowDown',
+    });
+    expect(screen.getByRole('tab', { name: /Behavior/ })).toHaveAttribute(
+      'aria-selected',
+      'true',
+    );
+  });
+
+  it('ArrowUp wraps to the last sidebar section from the first', async () => {
+    render(<SettingsWindow />);
+    await waitFor(() => screen.getByRole('tab', { name: /Models/ }));
+    await act(async () => {
+      fireEvent.keyDown(screen.getByRole('tab', { name: /Models/ }), {
+        key: 'ArrowUp',
+      });
+      await Promise.resolve();
+      await Promise.resolve();
+    });
+    expect(screen.getByRole('tab', { name: /About/ })).toHaveAttribute(
+      'aria-selected',
+      'true',
+    );
+  });
+
+  it('shows the running-model footer with the active built-in model and size', async () => {
+    const builtinConfig: RawAppConfig = {
+      ...SAMPLE,
+      inference: {
+        ...SAMPLE.inference,
+        active_provider: 'builtin',
+        providers: SAMPLE.inference.providers.map((p) =>
+          p.kind === 'builtin'
+            ? { ...p, model: 'org/Qwen3.5-9B-GGUF:Qwen3.5-9B-Q4_K_M.gguf' }
+            : p,
+        ),
+      },
+    };
+    invokeMock.mockImplementation(async (cmd: string) => {
+      if (cmd === 'get_config') return builtinConfig;
+      if (cmd === 'list_installed_models') {
+        return [
+          {
+            id: 'org/Qwen3.5-9B-GGUF:Qwen3.5-9B-Q4_K_M.gguf',
+            display_name: 'Qwen3.5 9B',
+            size_bytes: 6_600_000_000,
+            quant: 'Q4_K_M',
+          },
+        ];
+      }
+      if (cmd === 'get_engine_status') {
+        return { state: 'loaded', model_path: '/x', port: 1, error: null };
+      }
+      return defaultInvoke(cmd);
+    });
+
+    render(<SettingsWindow />);
+    const footer = await screen.findByRole('status', {
+      name: /running model/i,
+    });
+    expect(footer).toHaveTextContent('Qwen3.5 9B');
+    expect(footer).toHaveTextContent(/Built-in/);
+    expect(footer).toHaveTextContent(/6\.6 GB/);
+  });
+
+  it('running-model footer shows a placeholder when no model is resolved', async () => {
+    render(<SettingsWindow />);
+    const footer = await screen.findByRole('status', {
+      name: /running model/i,
+    });
+    expect(footer).toHaveTextContent(/No model/i);
+  });
+});
diff --git a/src/settings/SettingsWindow.tsx b/src/settings/SettingsWindow.tsx
index f33138b7..91f09469 100644
--- a/src/settings/SettingsWindow.tsx
+++ b/src/settings/SettingsWindow.tsx
@@ -31,6 +31,7 @@ import { SearchTab } from './tabs/SearchTab';
 import { DisplayTab } from './tabs/DisplayTab';
 import { AboutTab } from './tabs/AboutTab';
 import { SavedPill } from './components';
+import { RunningModelFooter } from './components/RunningModelFooter';
 import { WindowControls } from '../components/WindowControls';
 import { UpdateBanner } from '../components/UpdateBanner';
 import { useUpdater } from '../hooks/useUpdater';
@@ -44,8 +45,8 @@ const TABS: ReadonlyArray<{
 }> = [
   {
     id: 'general',
-    label: 'AI',
-    // Brain — visual cue that this tab is for the AI itself.
+    label: 'Models',
+    // Grid — the model library / management surface.
     icon: (
       <svg
         viewBox="0 0 24 24"
@@ -56,8 +57,10 @@ const TABS: ReadonlyArray<{
         strokeLinejoin="round"
         aria-hidden
       >
-        <path d="M9.5 2a3 3 0 0 0-3 3v.5a2.5 2.5 0 0 0-2 4 3 3 0 0 0 .5 5 2.5 2.5 0 0 0 1.5 4.5 3 3 0 0 0 5.5-1.5V5a3 3 0 0 0-2.5-3z" />
-        <path d="M14.5 2a3 3 0 0 1 3 3v.5a2.5 2.5 0 0 1 2 4 3 3 0 0 1-.5 5 2.5 2.5 0 0 1-1.5 4.5 3 3 0 0 1-5.5-1.5V5a3 3 0 0 1 2.5-3z" />
+        <rect x="3" y="3" width="7" height="7" rx="1.5" />
+        <rect x="14" y="3" width="7" height="7" rx="1.5" />
+        <rect x="3" y="14" width="7" height="7" rx="1.5" />
+        <rect x="14" y="14" width="7" height="7" rx="1.5" />
       </svg>
     ),
   },
@@ -151,14 +154,17 @@ const SAVED_PILL_DURATION_MS = 1500;
 
 /**
  * Static chrome offset from inner content to total window height:
- *   window padding-top (8) + WindowControls strip (~28) + tab bar (~70)
+ *   window padding-top (8) + WindowControls strip (~28)
  *   + body padding top+bottom (18 + 24 = 42).
+ * The section nav now lives in a left sidebar beside the content, so it no
+ * longer adds vertical chrome (the old top tab bar did). The sidebar's own
+ * height is seated by the hook's MIN_HEIGHT floor instead.
  * Empirically measured against the rendered Settings window. If any of
  * the chrome surfaces change height, update this constant rather than
  * trying to read `offsetHeight` at runtime — the auto-resize hook fires
  * before paint settles, so dynamic measurement of chrome would miss.
  */
-const CHROME_HEIGHT = 148;
+const CHROME_HEIGHT = 78;
 /** Recovery banner height when the corrupt-config marker is shown. */
 const BANNER_HEIGHT = 56;
 
@@ -345,81 +351,93 @@ export function SettingsWindow() {
         />
       ) : null}
 
-      <div
-        role="tablist"
-        aria-label="Settings sections"
-        className={styles.tabBar}
-      >
-        {TABS.map((tab) => {
-          const active = tab.id === activeTab;
-          return (
-            <button
-              key={tab.id}
-              type="button"
-              role="tab"
-              aria-selected={active}
-              aria-controls={`panel-${tab.id}`}
-              tabIndex={active ? 0 : -1}
-              className={`${styles.tab} ${active ? styles.tabActive : ''}`}
-              onClick={() => setActiveTab(tab.id)}
-              onKeyDown={(e) => {
-                if (e.key === 'ArrowRight' || e.key === 'ArrowLeft') {
-                  e.preventDefault();
-                  const idx = TABS.findIndex((t) => t.id === activeTab);
-                  const next =
-                    e.key === 'ArrowRight'
-                      ? TABS[(idx + 1) % TABS.length]
-                      : TABS[(idx - 1 + TABS.length) % TABS.length];
-                  setActiveTab(next.id);
-                }
-              }}
-            >
-              <span className={styles.tabIcon} aria-hidden>
-                {tab.icon}
-              </span>
-              <span className={styles.tabLabel}>{tab.label}</span>
-            </button>
-          );
-        })}
-      </div>
+      <div className={styles.stage}>
+        <div className={styles.side}>
+          <div className={styles.sideGroup}>Settings</div>
+          <div
+            role="tablist"
+            aria-label="Settings sections"
+            aria-orientation="vertical"
+            className={styles.sideTabs}
+          >
+            {TABS.map((tab) => {
+              const active = tab.id === activeTab;
+              return (
+                <button
+                  key={tab.id}
+                  type="button"
+                  role="tab"
+                  aria-selected={active}
+                  aria-controls={`panel-${tab.id}`}
+                  tabIndex={active ? 0 : -1}
+                  className={`${styles.sideItem} ${active ? styles.sideItemActive : ''}`}
+                  onClick={() => setActiveTab(tab.id)}
+                  onKeyDown={(e) => {
+                    const isNext =
+                      e.key === 'ArrowDown' || e.key === 'ArrowRight';
+                    const isPrev = e.key === 'ArrowUp' || e.key === 'ArrowLeft';
+                    if (isNext || isPrev) {
+                      e.preventDefault();
+                      const idx = TABS.findIndex((t) => t.id === activeTab);
+                      const next = isNext
+                        ? TABS[(idx + 1) % TABS.length]
+                        : TABS[(idx - 1 + TABS.length) % TABS.length];
+                      setActiveTab(next.id);
+                    }
+                  }}
+                >
+                  <span className={styles.sideItemIcon} aria-hidden>
+                    {tab.icon}
+                  </span>
+                  <span className={styles.sideItemLabel}>{tab.label}</span>
+                </button>
+              );
+            })}
+          </div>
+          <div className={styles.sideSpacer} />
+          <RunningModelFooter config={config} />
+        </div>
 
-      <div
-        className={`${styles.body} ${bodyShouldScroll ? styles.bodyScrollable : ''}`}
-        id={`panel-${activeTab}`}
-        role="tabpanel"
-      >
-        <div ref={setContentEl}>
-          {activeTab === 'general' ? (
-            <ModelTab
-              config={config}
-              resyncToken={resyncToken}
-              onSaved={handleSaved}
-            />
-          ) : null}
-          {activeTab === 'behavior' ? (
-            <BehaviorTab
-              config={config}
-              resyncToken={resyncToken}
-              onSaved={handleSaved}
-            />
-          ) : null}
-          {activeTab === 'search' ? (
-            <SearchTab
-              config={config}
-              resyncToken={resyncToken}
-              onSaved={handleSaved}
-            />
-          ) : null}
-          {activeTab === 'display' ? (
-            <DisplayTab
-              config={config}
-              resyncToken={resyncToken}
-              onSaved={handleSaved}
-            />
-          ) : null}
-          {activeTab === 'about' ? (
-            <AboutTab onSaved={handleSaved} onReload={reload} />
-          ) : null}
+        <div className={styles.main}>
+          <div
+            className={`${styles.body} ${bodyShouldScroll ? styles.bodyScrollable : ''}`}
+            id={`panel-${activeTab}`}
+            role="tabpanel"
+          >
+            <div ref={setContentEl}>
+              {activeTab === 'general' ? (
+                <ModelTab
+                  config={config}
+                  resyncToken={resyncToken}
+                  onSaved={handleSaved}
+                />
+              ) : null}
+              {activeTab === 'behavior' ? (
+                <BehaviorTab
+                  config={config}
+                  resyncToken={resyncToken}
+                  onSaved={handleSaved}
+                />
+              ) : null}
+              {activeTab === 'search' ? (
+                <SearchTab
+                  config={config}
+                  resyncToken={resyncToken}
+                  onSaved={handleSaved}
+                />
+              ) : null}
+              {activeTab === 'display' ? (
+                <DisplayTab
+                  config={config}
+                  resyncToken={resyncToken}
+                  onSaved={handleSaved}
+                />
+              ) : null}
+              {activeTab === 'about' ? (
+                <AboutTab onSaved={handleSaved} onReload={reload} />
+              ) : null}
+            </div>
+          </div>
         </div>
       </div>
 
diff --git a/src/settings/components/RunningModelFooter.test.tsx b/src/settings/components/RunningModelFooter.test.tsx
new file mode 100644
index 00000000..34717fff
--- /dev/null
+++ b/src/settings/components/RunningModelFooter.test.tsx
@@ -0,0 +1,289 @@
+import { render, screen, waitFor, act } from '@testing-library/react';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+
+import { invoke } from '@tauri-apps/api/core';
+import {
+  emitTauriEvent,
+  clearEventHandlers,
+} from '../../testUtils/mocks/tauri';
+
+import { RunningModelFooter } from './RunningModelFooter';
+import type { RawAppConfig, RawProvider } from '../types';
+
+const invokeMock = invoke as unknown as ReturnType<typeof vi.fn>;
+
+const BUILTIN: RawProvider = {
+  id: 'builtin',
+  kind: 'builtin',
+  label: 'Built-in',
+  base_url: '',
+  model: '',
+  vision: false,
+};
+const OLLAMA: RawProvider = {
+  id: 'ollama',
+  kind: 'ollama',
+  label: 'Ollama',
+  base_url: 'http://127.0.0.1:11434',
+  model: '',
+  vision: false,
+};
+const OPENAI: RawProvider = {
+  id: 'openai',
+  kind: 'openai',
+  label: 'LM Studio',
+  base_url: 'http://127.0.0.1:1234',
+  model: '',
+  vision: false,
+};
+
+function makeConfig(
+  activeProvider: string,
+  providers: RawProvider[],
+): RawAppConfig {
+  return {
+    inference: {
+      active_provider: activeProvider,
+      keep_warm_inactivity_minutes: 0,
+      num_ctx: 16384,
+      providers,
+    },
+    prompt: { system: '' },
+    window: {
+      overlay_width: 600,
+      max_chat_height: 648,
+      max_images: 3,
+      text_base_px: 15,
+      text_line_height: 1.5,
+      text_letter_spacing_px: 0,
+      text_font_weight: 500,
+    },
+    quote: {
+      max_display_lines: 4,
+      max_display_chars: 300,
+      max_context_length: 4096,
+    },
+    behavior: { auto_replace: false, auto_close: false },
+    search: {
+      searxng_url: '',
+      reader_url: '',
+      max_iterations: 3,
+      top_k_urls: 10,
+      searxng_max_results: 10,
+      search_timeout_s: 20,
+      reader_per_url_timeout_s: 10,
+      reader_batch_timeout_s: 30,
+      judge_timeout_s: 30,
+      router_timeout_s: 45,
+    },
+    debug: { trace_enabled: false },
+  };
+}
+
+const QWEN_ROW = {
+  id: 'org/Qwen3.5-9B-GGUF:Qwen3.5-9B-Q4_K_M.gguf',
+  display_name: 'Qwen3.5 9B',
+  size_bytes: 6_600_000_000,
+  quant: 'Q4_K_M',
+};
+
+function mockInvoke(over: Record<string, unknown> = {}) {
+  invokeMock.mockImplementation(async (cmd: string) => {
+    if (Object.prototype.hasOwnProperty.call(over, cmd)) {
+      const v = over[cmd];
+      if (v instanceof Error) throw v;
+      return v;
+    }
+    switch (cmd) {
+      case 'list_installed_models':
+        return [];
+      case 'get_engine_status':
+        return { state: 'stopped', model_path: '', port: null, error: null };
+      default:
+        return undefined;
+    }
+  });
+}
+
+beforeEach(() => {
+  invokeMock.mockReset();
+  clearEventHandlers();
+  mockInvoke();
+});
+
+afterEach(() => {
+  clearEventHandlers();
+});
+
+describe('RunningModelFooter', () => {
+  it('shows the built-in model name, size, and a live dot when the engine is loaded', async () => {
+    const builtin = { ...BUILTIN, model: QWEN_ROW.id };
+    mockInvoke({
+      list_installed_models: [QWEN_ROW],
+      get_engine_status: {
+        state: 'loaded',
+        model_path: '/x',
+        port: 1,
+        error: null,
+      },
+    });
+
+    render(
+      <RunningModelFooter config={makeConfig('builtin', [builtin, OLLAMA])} />,
+    );
+
+    const footer = await screen.findByRole('status', {
+      name: /running model/i,
+    });
+    await waitFor(() => expect(footer).toHaveTextContent('Qwen3.5 9B'));
+    expect(footer).toHaveTextContent('Built-in · 6.6 GB');
+    expect(footer.querySelector('[class*="runningModelDot"]')).not.toBeNull();
+    // Live dot, not the idle variant.
+    expect(footer.querySelector('[class*="DotIdle"]')).toBeNull();
+  });
+
+  it('shows a placeholder when the active built-in model is not installed', async () => {
+    const builtin = { ...BUILTIN, model: 'org/missing:m.gguf' };
+    mockInvoke({ list_installed_models: [QWEN_ROW] });
+
+    render(
+      <RunningModelFooter config={makeConfig('builtin', [builtin, OLLAMA])} />,
+    );
+
+    const footer = await screen.findByRole('status', {
+      name: /running model/i,
+    });
+    await waitFor(() => expect(footer).toHaveTextContent(/No model/i));
+  });
+
+  it('shows the Ollama model name and label with an idle dot', async () => {
+    const ollama = { ...OLLAMA, model: 'llama3.1:8b' };
+    render(
+      <RunningModelFooter config={makeConfig('ollama', [BUILTIN, ollama])} />,
+    );
+
+    const footer = await screen.findByRole('status', {
+      name: /running model/i,
+    });
+    expect(footer).toHaveTextContent('llama3.1:8b');
+    expect(footer).toHaveTextContent('Ollama');
+    expect(footer.querySelector('[class*="DotIdle"]')).not.toBeNull();
+  });
+
+  it('shows a placeholder when the active Ollama provider has no model', async () => {
+    render(
+      <RunningModelFooter config={makeConfig('ollama', [BUILTIN, OLLAMA])} />,
+    );
+    const footer = await screen.findByRole('status', {
+      name: /running model/i,
+    });
+    expect(footer).toHaveTextContent(/No model/i);
+  });
+
+  it('shows the OpenAI provider model and label', async () => {
+    const openai = { ...OPENAI, model: 'qwen2.5-coder' };
+    render(
+      <RunningModelFooter config={makeConfig('openai', [BUILTIN, openai])} />,
+    );
+    const footer = await screen.findByRole('status', {
+      name: /running model/i,
+    });
+    expect(footer).toHaveTextContent('qwen2.5-coder');
+    expect(footer).toHaveTextContent('LM Studio');
+  });
+
+  it('falls back to a placeholder when the active provider id matches nothing', async () => {
+    render(
+      <RunningModelFooter config={makeConfig('ghost', [BUILTIN, OLLAMA])} />,
+    );
+    const footer = await screen.findByRole('status', {
+      name: /running model/i,
+    });
+    expect(footer).toHaveTextContent(/No model/i);
+  });
+
+  it('tolerates a config with no built-in provider', async () => {
+    const ollama = { ...OLLAMA, model: 'llama3.1:8b' };
+    render(<RunningModelFooter config={makeConfig('ollama', [ollama])} />);
+    const footer = await screen.findByRole('status', {
+      name: /running model/i,
+    });
+    expect(footer).toHaveTextContent('llama3.1:8b');
+  });
+
+  it('treats a non-array installed payload as empty', async () => {
+    const builtin = { ...BUILTIN, model: QWEN_ROW.id };
+    mockInvoke({ list_installed_models: null });
+    render(
+      <RunningModelFooter config={makeConfig('builtin', [builtin, OLLAMA])} />,
+    );
+    const footer = await screen.findByRole('status', {
+      name: /running model/i,
+    });
+    await waitFor(() => expect(footer).toHaveTextContent(/No model/i));
+  });
+
+  it('survives a failed installed-models read', async () => {
+    const builtin = { ...BUILTIN, model: QWEN_ROW.id };
+    mockInvoke({ list_installed_models: new Error('io') });
+    render(
+      <RunningModelFooter config={makeConfig('builtin', [builtin, OLLAMA])} />,
+    );
+    const footer = await screen.findByRole('status', {
+      name: /running model/i,
+    });
+    await waitFor(() => expect(footer).toHaveTextContent(/No model/i));
+  });
+
+  it('survives a failed engine-status read', async () => {
+    const builtin = { ...BUILTIN, model: QWEN_ROW.id };
+    mockInvoke({
+      list_installed_models: [QWEN_ROW],
+      get_engine_status: new Error('engine down'),
+    });
+    render(
+      <RunningModelFooter config={makeConfig('builtin', [builtin, OLLAMA])} />,
+    );
+    const footer = await screen.findByRole('status', {
+      name: /running model/i,
+    });
+    await waitFor(() => expect(footer).toHaveTextContent('Qwen3.5 9B'));
+    // Engine status unknown -> idle dot.
+    expect(footer.querySelector('[class*="DotIdle"]')).not.toBeNull();
+  });
+
+  it('reflects a live engine via the engine:status event stream', async () => {
+    const builtin = { ...BUILTIN, model: QWEN_ROW.id };
+    mockInvoke({ list_installed_models: [QWEN_ROW] });
+    render(
+      <RunningModelFooter config={makeConfig('builtin', [builtin, OLLAMA])} />,
+    );
+    const footer = await screen.findByRole('status', {
+      name: /running model/i,
+    });
+    await waitFor(() => expect(footer).toHaveTextContent('Qwen3.5 9B'));
+    expect(footer.querySelector('[class*="DotIdle"]')).not.toBeNull();
+
+    await act(async () => {
+      emitTauriEvent('engine:status', {
+        state: 'loaded',
+        model_path: '/x',
+        port: 1,
+        error: null,
+      });
+    });
+    expect(footer.querySelector('[class*="DotIdle"]')).toBeNull();
+  });
+
+  it('omits the meta line when the active provider has a model but no label', async () => {
+    const ollama = { ...OLLAMA, model: 'llama3.1:8b', label: '' };
+    render(
+      <RunningModelFooter config={makeConfig('ollama', [BUILTIN, ollama])} />,
+    );
+    const footer = await screen.findByRole('status', {
+      name: /running model/i,
+    });
+    expect(footer).toHaveTextContent('llama3.1:8b');
+    expect(footer.querySelector('[class*="runningModelMeta"]')).toBeNull();
+  });
+});
diff --git a/src/settings/components/RunningModelFooter.tsx b/src/settings/components/RunningModelFooter.tsx
new file mode 100644
index 00000000..fb3a6a6a
--- /dev/null
+++ b/src/settings/components/RunningModelFooter.tsx
@@ -0,0 +1,113 @@
+/**
+ * "Running model" footer pinned to the bottom of the Settings sidebar.
+ *
+ * Always visible, it names the model the active provider will answer with,
+ * adds a size hint for the built-in engine, and shows a live dot that lights
+ * when that model is currently resident in memory.
+ *
+ * Data sources, kept deliberately small:
+ * - The active provider, its label, and (for Ollama/OpenAI) its model come
+ *   straight from the config snapshot the parent already owns; the active
+ *   model persists onto the provider's `model` field.
+ * - The built-in engine's display name + on-disk size come from the manifest
+ *   (`list_installed_models`), refreshed whenever the selected built-in model
+ *   id changes.
+ * - Liveness for the built-in engine follows `get_engine_status` plus the
+ *   `engine:status` event stream. Ollama/OpenAI residency is not polled here,
+ *   so their dot stays idle.
+ */
+
+import { useEffect, useState } from 'react';
+import { invoke } from '@tauri-apps/api/core';
+import { listen } from '@tauri-apps/api/event';
+
+import styles from '../../styles/settings.module.css';
+import type { RawAppConfig } from '../types';
+import type { EngineStatus, InstalledModel } from '../../types/starter';
+
+/** Bytes rendered as decimal gigabytes with one decimal (e.g. "6.6"). */
+function gb(bytes: number): string {
+  return (bytes / 1e9).toFixed(1);
+}
+
+interface RunningModelFooterProps {
+  config: RawAppConfig;
+}
+
+export function RunningModelFooter({ config }: RunningModelFooterProps) {
+  const [installed, setInstalled] = useState<InstalledModel[]>([]);
+  const [engineState, setEngineState] =
+    useState<EngineStatus['state']>('stopped');
+
+  const providers = config.inference.providers;
+  const active = providers.find(
+    (p) => p.id === config.inference.active_provider,
+  );
+  const kind = active?.kind ?? 'ollama';
+  const builtinModelId =
+    providers.find((p) => p.kind === 'builtin')?.model ?? '';
+
+  // Manifest read seeds the built-in size/name; re-runs when the selected
+  // built-in model id changes (a download/delete/switch lifts a new config).
+  useEffect(() => {
+    void invoke<InstalledModel[]>('list_installed_models')
+      .then((rows) => setInstalled(Array.isArray(rows) ? rows : []))
+      .catch(() => setInstalled([]));
+  }, [builtinModelId]);
+
+  // Engine lifecycle drives the live dot for the built-in engine. Seed from
+  // the current snapshot (the backend only emits on transitions) then follow
+  // the event stream.
+  useEffect(() => {
+    invoke<EngineStatus>('get_engine_status')
+      .then((status) => setEngineState(status.state))
+      .catch(() => {
+        // Keep the stopped default; the event stream corrects it.
+      });
+    const unlisten = listen<EngineStatus>('engine:status', (e) => {
+      setEngineState(e.payload.state);
+    });
+    return () => {
+      void unlisten.then((fn) => fn());
+    };
+  }, []);
+
+  let name: string | null;
+  let meta: string | null;
+  if (kind === 'builtin') {
+    const row = installed.find((m) => m.id === builtinModelId);
+    name = row ? row.display_name : null;
+    meta = row ? `Built-in · ${gb(row.size_bytes)} GB` : null;
+  } else {
+    name = active && active.model !== '' ? active.model : null;
+    meta = active ? active.label : null;
+  }
+
+  const live = kind === 'builtin' && engineState === 'loaded';
+
+  return (
+    <div
+      className={styles.runningModel}
+      role="status"
+      aria-label="Running model"
+    >
+      <div className={styles.runningModelEyebrow}>Running</div>
+      {name ? (
+        <>
+          <div className={styles.runningModelName}>
+            <span
+              className={
+                live ? styles.runningModelDot : styles.runningModelDotIdle
+              }
+              aria-hidden
+            />
+            {name}
+          </div>
+          {meta ? <div className={styles.runningModelMeta}>{meta}</div> : null}
+        </>
+      ) : (
+        <div className={styles.runningModelMeta}>No model selected</div>
+      )}
+    </div>
+  );
+}
diff --git a/src/settings/hooks/useSettingsAutoResize.test.ts b/src/settings/hooks/useSettingsAutoResize.test.ts
index 06563ac4..ba6885dd 100644
--- a/src/settings/hooks/useSettingsAutoResize.test.ts
+++ b/src/settings/hooks/useSettingsAutoResize.test.ts
@@ -5,9 +5,9 @@ import { useState } from 'react';
 import { __mockWindow } from '../../testUtils/mocks/tauri-window';
 import { useSettingsAutoResize } from './useSettingsAutoResize';
 
-const SETTINGS_WIDTH = 580;
+const SETTINGS_WIDTH = 760;
 const ANIMATE_MS = 220;
-const MIN_HEIGHT = 280;
+const MIN_HEIGHT = 440;
 const MAX_HEIGHT = 700;
 const CHROME = 148;
 
diff --git a/src/settings/hooks/useSettingsAutoResize.ts b/src/settings/hooks/useSettingsAutoResize.ts
index 75ebb695..7d4b1600 100644
--- a/src/settings/hooks/useSettingsAutoResize.ts
+++ b/src/settings/hooks/useSettingsAutoResize.ts
@@ -33,16 +33,26 @@ import { useEffect, useLayoutEffect, useRef, useState } from 'react';
 import { getCurrentWindow, LogicalSize } from '@tauri-apps/api/window';
 
 const ANIMATE_MS = 220;
-/** Hard floor: settings panel below this is unusable on macOS. */
-const MIN_HEIGHT = 280;
+/**
+ * Hard floor. The window hugs the active section's content height, but the
+ * left sidebar (group label + five items + the pinned Running-model footer)
+ * is taller than a light section like Behavior. This floor guarantees the
+ * window is always tall enough to show the whole sidebar with the footer
+ * clearly separated from the last item, so short sections never clip it.
+ */
+const MIN_HEIGHT = 440;
 /**
  * Hard ceiling: keeps the panel comfortably small even on a 13" laptop.
  * Tabs whose natural content exceeds this (Web's full timeouts list)
  * scroll inside `.body` rather than push the window taller.
  */
 const MAX_HEIGHT = 700;
-/** Settings is intentionally a fixed-width column. */
-const SETTINGS_WIDTH = 580;
+/**
+ * Settings is a fixed width. Wide enough that the 172px left sidebar leaves
+ * the content column the room the old single-column layout had (and that the
+ * dense Models/Discover list needs).
+ */
+const SETTINGS_WIDTH = 760;
 /** Sub-pixel ResizeObserver chatter is dropped below this threshold. */
 const NEGLIGIBLE_DELTA_PX = 4;
 
diff --git a/src/styles/settings.module.css b/src/styles/settings.module.css
index bb45add9..48bac8ee 100644
--- a/src/styles/settings.module.css
+++ b/src/styles/settings.module.css
@@ -25,6 +25,31 @@
  * NSPanel-converted main window can render past its bounds cleanly.
  * Vocabulary still mirrors .morphing-container in App.css. */
 .window {
+  /* ─── Premium token layer (Phase 3 model-settings redesign) ───────────
+   * Scoped to the Settings window so every settings surface (sidebar,
+   * Models segmented panes, reskinned standard tabs) reads one set of
+   * values. Mirrors the locked design tokens. */
+  --base: #100e0d;
+  --rail: #0b0a09;
+  --elev-1: rgba(255, 255, 255, 0.03);
+  --elev-2: rgba(255, 255, 255, 0.055);
+  --hair: rgba(255, 255, 255, 0.075);
+  --hair-soft: rgba(255, 255, 255, 0.045);
+  --t1: #eceae7;
+  --t2: rgba(236, 234, 231, 0.54);
+  --t3: rgba(236, 234, 231, 0.34);
+  --accent: #ff8d5c;
+  --accent-soft: rgba(255, 141, 92, 0.14);
+  --vis: #7fd1a6;
+  --vis-bg: rgba(127, 209, 166, 0.1);
+  --rea: #b9a4f0;
+  --rea-bg: rgba(185, 164, 240, 0.1);
+  --ok: #79c08e;
+  --tight: #e6b56b;
+  --radius-card: 10px;
+  --radius-control: 8px;
+  --radius-pill: 999px;
+
   position: fixed;
   inset: 0;
   display: flex;
@@ -33,24 +58,18 @@
    * space above the traffic-light dots — mirrors the chat overlay's
    * outer `pt-2` padding so both panels feel the same vertically. */
   padding-top: 8px;
-  background:
-    radial-gradient(
-      ellipse 70% 40% at 50% 0%,
-      rgba(255, 141, 92, 0.08) 0%,
-      transparent 65%
-    ),
-    var(--color-surface-base);
-  color: var(--color-text-primary);
+  /* Premium flat base (warm off-black), elevation comes from light overlays
+   * on the surfaces above it, not a muddy radial wash. */
+  background: var(--base);
+  color: var(--t1);
   font-family: var(--font-sans);
   -webkit-font-smoothing: antialiased;
   font-size: 13px;
   user-select: none;
-  border: 1px solid var(--color-surface-border);
+  border: 1px solid var(--hair);
   border-top-color: rgba(255, 141, 92, 0.2);
   border-radius: 10px;
   overflow: hidden;
-  backdrop-filter: blur(24px);
-  -webkit-backdrop-filter: blur(24px);
 }
 
 /* Glowing 1px hairline at the very top edge — matches morphing-container. */
@@ -107,69 +126,171 @@
   gap: 6px;
 }
 
-/* ─── Top bar (horizontal icon tabs, CodexBar layout) ───────────────────── */
+/* ─── Left sidebar (section nav + Running-model footer) ─────────────────── */
 
-.tabBar {
+/* This codebase has no global box-sizing reset, so the rail boxes opt into
+ * border-box here: otherwise a width:100% item plus its padding renders
+ * wider than the rail and overflows into the content pane, where it gets
+ * clipped at the divider (the active pill "cutoff"). Scoped to the rail so
+ * the content pane keeps its existing content-box sizing. */
+.stage,
+.main,
+.side,
+.side * {
+  box-sizing: border-box;
+}
+
+/* The window stays a vertical column [WindowControls][banners][stage]; the
+ * stage is the horizontal split between the section rail and the active
+ * section's content. */
+.stage {
+  flex: 1;
   display: flex;
-  justify-content: center;
-  align-items: flex-end;
-  gap: 4px;
-  padding: 12px 20px 10px;
-  flex-shrink: 0;
+  min-height: 0;
+}
+
+.main {
+  flex: 1;
+  display: flex;
+  flex-direction: column;
+  min-width: 0;
+  min-height: 0;
+}
+
+.side {
+  width: 172px;
+  flex: none;
+  display: flex;
+  flex-direction: column;
+  padding: 12px 10px;
+  min-height: 0;
+  background: var(--rail);
+  border-right: 1px solid var(--hair-soft);
+}
+
+.sideGroup {
+  margin: 6px 8px;
+  font-size: 10px;
+  font-weight: 600;
+  letter-spacing: 0.07em;
+  text-transform: uppercase;
+  color: var(--t3);
 }
 
-.tab {
+.sideTabs {
   display: flex;
   flex-direction: column;
+  gap: 2px;
+}
+
+.sideItem {
+  position: relative;
+  display: flex;
   align-items: center;
-  gap: 6px;
-  padding: 8px 14px 8px;
+  gap: 11px;
+  width: 100%;
+  padding: 8px 10px;
   border: none;
+  border-radius: 8px;
   background: transparent;
-  color: rgba(240, 240, 242, 0.5);
-  cursor: pointer;
+  color: var(--t2);
   font-family: inherit;
-  border-radius: 10px;
-  min-width: 72px;
+  font-size: 13px;
+  font-weight: 500;
+  text-align: left;
+  cursor: pointer;
   transition:
-    color 180ms ease,
-    background 180ms ease;
+    color 160ms ease,
+    background 160ms ease;
 }
-.tab:hover:not(.tabActive) {
-  color: var(--color-text-primary);
-  background: rgba(255, 255, 255, 0.025);
+.sideItem:hover:not(.sideItemActive) {
+  color: var(--t1);
+  background: var(--elev-1);
 }
-.tab:focus-visible {
+.sideItem:focus-visible {
   outline: none;
   box-shadow: 0 0 0 2px rgba(255, 141, 92, 0.32);
 }
-.tab svg {
-  width: 22px;
-  height: 22px;
-  display: block;
+.sideItemActive {
+  color: var(--t1);
+  background: var(--elev-2);
+}
+.sideItemIcon {
+  display: inline-flex;
+  flex: none;
+  align-items: center;
+  justify-content: center;
+}
+.sideItemIcon svg {
+  width: 16px;
+  height: 16px;
   stroke-width: 1.6;
+  opacity: 0.9;
 }
-.tabLabel {
-  font-size: 11px;
-  font-weight: 500;
-  letter-spacing: 0.01em;
+/* Active section: the icon carries the accent. One active signal (orange
+ * icon + stronger fill + bright label), no separate rail bar that read as
+ * clutter against the filled pill. */
+.sideItemActive .sideItemIcon {
+  color: var(--accent);
+}
+.sideItemActive .sideItemIcon svg {
+  opacity: 1;
+}
+.sideItemLabel {
   color: inherit;
 }
-.tabActive {
-  color: var(--color-primary);
-  background: rgba(0, 0, 0, 0.28);
-  box-shadow: inset 0 0 0 1px rgba(255, 141, 92, 0.1);
+.sideSpacer {
+  flex: 1;
 }
-.tabActive svg {
-  color: var(--color-primary);
+
+/* Running-model footer (always visible, pinned to the sidebar bottom). The
+ * spacer above pushes it down when there is room; this margin guarantees a
+ * gap from the last section item even when the spacer collapses. */
+.runningModel {
+  margin-top: 14px;
+  padding: 9px 10px;
+  border: 1px solid var(--hair-soft);
+  border-radius: var(--radius-card);
+  background: var(--elev-1);
 }
-/* Decorative wrapper kept for backward-compat with structure; in the
- * top-bar layout the icon sits inline with the label, no chrome of its
- * own. */
-.tabIcon {
-  display: inline-flex;
+.runningModelEyebrow {
+  font-size: 9.5px;
+  font-weight: 600;
+  letter-spacing: 0.07em;
+  text-transform: uppercase;
+  color: var(--t3);
+}
+.runningModelName {
+  display: flex;
   align-items: center;
-  justify-content: center;
+  gap: 6px;
+  margin-top: 4px;
+  font-size: 12px;
+  font-weight: 580;
+  color: var(--t1);
+}
+.runningModelDot {
+  flex: none;
+  width: 6px;
+  height: 6px;
+  border-radius: 50%;
+  background: var(--accent);
+  box-shadow: 0 0 7px var(--accent);
+}
+.runningModelDotIdle {
+  flex: none;
+  width: 6px;
+  height: 6px;
+  border-radius: 50%;
+  /* The active/selected model, just not resident yet: accent (no glow), never
+   * grey, which reads as "disabled". The glow distinguishes the live state. */
+  background: var(--accent);
+  opacity: 0.85;
+}
+.runningModelMeta {
+  margin-top: 3px;
+  font-size: 10.5px;
+  color: var(--t3);
 }
 
 /* ─── Body (scrolling content) ──────────────────────────────────────────── */
@@ -1355,8 +1476,7 @@
 }
 
 @media (prefers-reduced-motion: reduce) {
-  .tab,
-  .tabIcon,
+  .sideItem,
   .input,
   .textarea,
   .button,

From 699d50eca2363ab39980c89b8874379a8beeaa1c Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 01:26:26 -0500
Subject: [PATCH 03/89] feat: add the Models segmented
 Library/Discover/Providers control

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 .../tabs/models/ModelsSegmented.test.tsx      | 68 +++++++++++++++++++
 src/settings/tabs/models/ModelsSegmented.tsx  | 60 ++++++++++++++++
 src/styles/settings.module.css                | 39 +++++++++++
 3 files changed, 167 insertions(+)
 create mode 100644 src/settings/tabs/models/ModelsSegmented.test.tsx
 create mode 100644 src/settings/tabs/models/ModelsSegmented.tsx

diff --git a/src/settings/tabs/models/ModelsSegmented.test.tsx b/src/settings/tabs/models/ModelsSegmented.test.tsx
new file mode 100644
index 00000000..8ef0d9bb
--- /dev/null
+++ b/src/settings/tabs/models/ModelsSegmented.test.tsx
@@ -0,0 +1,68 @@
+import { render, screen, fireEvent } from '@testing-library/react';
+import { describe, expect, it, vi } from 'vitest';
+
+import { ModelsSegmented } from './ModelsSegmented';
+
+describe('ModelsSegmented', () => {
+  it('renders the three model views', () => {
+    render(<ModelsSegmented value="providers" onChange={() => {}} />);
+    expect(screen.getByRole('tab', { name: 'Library' })).toBeInTheDocument();
+    expect(screen.getByRole('tab', { name: 'Discover' })).toBeInTheDocument();
+    expect(screen.getByRole('tab', { name: 'Providers' })).toBeInTheDocument();
+  });
+
+  it('marks the active view as selected', () => {
+    render(<ModelsSegmented value="discover" onChange={() => {}} />);
+    expect(screen.getByRole('tab', { name: 'Discover' })).toHaveAttribute(
+      'aria-selected',
+      'true',
+    );
+    expect(screen.getByRole('tab', { name: 'Library' })).toHaveAttribute(
+      'aria-selected',
+      'false',
+    );
+  });
+
+  it('calls onChange with the clicked view', () => {
+    const onChange = vi.fn();
+    render(<ModelsSegmented value="providers" onChange={onChange} />);
+    fireEvent.click(screen.getByRole('tab', { name: 'Library' }));
+    expect(onChange).toHaveBeenCalledWith('library');
+  });
+
+  it('ArrowRight selects the next view', () => {
+    const onChange = vi.fn();
+    render(<ModelsSegmented value="library" onChange={onChange} />);
+    fireEvent.keyDown(screen.getByRole('tab', { name: 'Library' }), {
+      key: 'ArrowRight',
+    });
+    expect(onChange).toHaveBeenCalledWith('discover');
+  });
+
+  it('ArrowRight wraps from the last view to the first', () => {
+    const onChange = vi.fn();
+    render(<ModelsSegmented value="providers" onChange={onChange} />);
+    fireEvent.keyDown(screen.getByRole('tab', { name: 'Providers' }), {
+      key: 'ArrowRight',
+    });
+    expect(onChange).toHaveBeenCalledWith('library');
+  });
+
+  it('ArrowLeft wraps from the first view to the last', () => {
+    const onChange = vi.fn();
+    render(<ModelsSegmented value="library" onChange={onChange} />);
+    fireEvent.keyDown(screen.getByRole('tab', { name: 'Library' }), {
+      key: 'ArrowLeft',
+    });
+    expect(onChange).toHaveBeenCalledWith('providers');
+  });
+
+  it('ignores non-arrow keys', () => {
+    const onChange = vi.fn();
+    render(<ModelsSegmented value="library" onChange={onChange} />);
+    fireEvent.keyDown(screen.getByRole('tab', { name: 'Library' }), {
+      key: 'Enter',
+    });
+    expect(onChange).not.toHaveBeenCalled();
+  });
+});
diff --git a/src/settings/tabs/models/ModelsSegmented.tsx b/src/settings/tabs/models/ModelsSegmented.tsx
new file mode 100644
index 00000000..2d2deeb6
--- /dev/null
+++ b/src/settings/tabs/models/ModelsSegmented.tsx
@@ -0,0 +1,60 @@
+/**
+ * Segmented control that switches the Models surface between its three
+ * sub-views. Rendered at the top of the Models section; the chosen view
+ * swaps the pane below it.
+ *
+ * A nested tablist (the left sidebar is the outer one): the views are
+ * mutually exclusive panes, so tab semantics + roving arrow keys are the
+ * right pattern. Labelled distinctly so queries never collide with the
+ * sidebar's section tabs.
+ */
+
+import styles from '../../../styles/settings.module.css';
+
+export type ModelsSubview = 'library' | 'discover' | 'providers';
+
+const VIEWS: ReadonlyArray<{ id: ModelsSubview; label: string }> = [
+  { id: 'library', label: 'Library' },
+  { id: 'discover', label: 'Discover' },
+  { id: 'providers', label: 'Providers' },
+];
+
+interface ModelsSegmentedProps {
+  value: ModelsSubview;
+  onChange: (next: ModelsSubview) => void;
+}
+
+export function ModelsSegmented({ value, onChange }: ModelsSegmentedProps) {
+  return (
+    <div className={styles.seg} role="tablist" aria-label="Model views">
+      {VIEWS.map((view) => {
+        const active = view.id === value;
+        return (
+          <button
+            key={view.id}
+            type="button"
+            role="tab"
+            aria-selected={active}
+            tabIndex={active ? 0 : -1}
+            className={`${styles.segItem} ${active ? styles.segItemActive : ''}`}
+            onClick={() => onChange(view.id)}
+            onKeyDown={(e) => {
+              const isNext = e.key === 'ArrowRight';
+              const isPrev = e.key === 'ArrowLeft';
+              if (isNext || isPrev) {
+                e.preventDefault();
+                const idx = VIEWS.findIndex((v) => v.id === value);
+                const next = isNext
+                  ? VIEWS[(idx + 1) % VIEWS.length]
+                  : VIEWS[(idx - 1 + VIEWS.length) % VIEWS.length];
+                onChange(next.id);
+              }
+            }}
+          >
+            {view.label}
+          </button>
+        );
+      })}
+    </div>
+  );
+}
diff --git a/src/styles/settings.module.css b/src/styles/settings.module.css
index 48bac8ee..629aa4f8 100644
--- a/src/styles/settings.module.css
+++ b/src/styles/settings.module.css
@@ -293,6 +293,45 @@
   color: var(--t3);
 }
 
+/* ─── Models surface (segmented Library / Discover / Providers) ──────────── */
+
+.seg {
+  display: inline-flex;
+  box-sizing: border-box;
+  padding: 3px;
+  border: 1px solid var(--hair-soft);
+  border-radius: 9px;
+  background: var(--elev-1);
+}
+.segItem {
+  box-sizing: border-box;
+  padding: 6px 14px;
+  border: none;
+  border-radius: 7px;
+  background: transparent;
+  color: var(--t2);
+  font-family: inherit;
+  font-size: 12px;
+  font-weight: 540;
+  cursor: pointer;
+  transition:
+    color 140ms ease,
+    background 140ms ease;
+}
+.segItem:hover:not(.segItemActive) {
+  color: var(--t1);
+}
+.segItem:focus-visible {
+  outline: none;
+  box-shadow: 0 0 0 2px var(--accent-soft);
+}
+/* Active view: filled accent pill with dark text (the one accent fill on the
+ * surface, matching the locked design). */
+.segItemActive {
+  color: #16110d;
+  background: var(--accent);
+}
+
 /* ─── Body (scrolling content) ──────────────────────────────────────────── */
 
 .body {

From afad60958e248ce95eb7e7994e2288d272f7822f Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 01:31:36 -0500
Subject: [PATCH 04/89] feat: wire the Models segmented control into the Model
 tab

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/settings/SettingsWindow.test.tsx |   9 +-
 src/settings/tabs/ModelTab.tsx       | 843 ++++++++++++++-------------
 src/settings/tabs/tabs.test.tsx      |  34 +-
 src/styles/settings.module.css       |  15 +
 4 files changed, 490 insertions(+), 411 deletions(-)

diff --git a/src/settings/SettingsWindow.test.tsx b/src/settings/SettingsWindow.test.tsx
index c092bccb..2093d097 100644
--- a/src/settings/SettingsWindow.test.tsx
+++ b/src/settings/SettingsWindow.test.tsx
@@ -567,10 +567,11 @@ describe('SettingsWindow left sidebar (Phase 3)', () => {
   it('renders the section nav as a vertical sidebar', async () => {
     render(<SettingsWindow />);
     await waitFor(() => screen.getByRole('tab', { name: /Models/ }));
-    expect(screen.getByRole('tablist')).toHaveAttribute(
-      'aria-orientation',
-      'vertical',
-    );
+    // Scope to the sidebar: the Models pane also renders a (horizontal)
+    // segmented tablist for Library/Discover/Providers.
+    expect(
+      screen.getByRole('tablist', { name: 'Settings sections' }),
+    ).toHaveAttribute('aria-orientation', 'vertical');
   });
 
   it('renders Models as the first section label', async () => {
diff --git a/src/settings/tabs/ModelTab.tsx b/src/settings/tabs/ModelTab.tsx
index 1849072f..9b7b7134 100644
--- a/src/settings/tabs/ModelTab.tsx
+++ b/src/settings/tabs/ModelTab.tsx
@@ -19,6 +19,7 @@ import {
   BuiltinProviderCard,
   OpenAiProviderCard,
 } from './ProviderCards';
+import { ModelsSegmented, type ModelsSubview } from './models/ModelsSegmented';
 import { useDebouncedSave } from '../hooks/useDebouncedSave';
 import { useModelSelection } from '../../hooks/useModelSelection';
 import { isNonLocalUrl } from '../../utils/isNonLocalUrl';
@@ -86,6 +87,9 @@ const CTX_TICKS = [
 ];
 
 export function ModelTab({ config, resyncToken, onSaved }: ModelTabProps) {
+  // Which of the three Models sub-views is showing. Providers is the default
+  // (the active provider + generation controls, the most-used surface).
+  const [view, setView] = useState<ModelsSubview>('providers');
   const [inactivityMin, setInactivityMin] = useState(
     config.inference.keep_warm_inactivity_minutes,
   );
@@ -285,455 +289,482 @@ export function ModelTab({ config, resyncToken, onSaved }: ModelTabProps) {
 
   return (
     <>
-      <Section heading="Providers">
-        <div
-          className={providerCardClass(activeKind === 'builtin')}
-          data-provider-card="builtin"
-        >
-          <label className={styles.providerSelectRow}>
-            <input
-              type="radio"
-              className={styles.providerRadio}
-              name="active-provider"
-              aria-label="Use Built-in (Thuki)"
-              checked={activeKind === 'builtin'}
-              onChange={() => selectProvider(builtinProvider?.id ?? 'builtin')}
-            />
-            <span className={styles.providerName}>
-              {builtinProvider?.label ?? 'Built-in (Thuki)'}
-            </span>
-          </label>
-          <BuiltinProviderCard config={config} onSaved={onSaved} />
+      <div className={styles.barrow}>
+        <ModelsSegmented value={view} onChange={setView} />
+      </div>
+
+      {view === 'library' ? (
+        <div className={styles.modelsPlaceholder}>
+          Your installed models will appear here.
         </div>
+      ) : null}
 
-        <div
-          className={providerCardClass(activeKind === 'ollama')}
-          data-provider-card="ollama"
-        >
-          <label className={styles.providerSelectRow}>
-            <input
-              type="radio"
-              className={styles.providerRadio}
-              name="active-provider"
-              aria-label="Use Ollama"
-              checked={activeKind === 'ollama'}
-              onChange={() => selectProvider('ollama')}
-            />
-            <span className={styles.providerName}>Ollama</span>
-          </label>
-          <SettingRow
-            label="Ollama URL"
-            helper={configHelp('inference', 'ollama_base_url')}
-          >
-            <input
-              type="text"
-              className={styles.input}
-              value={ollamaUrl}
-              aria-label="Ollama URL"
-              spellCheck={false}
-              autoComplete="off"
-              autoCorrect="off"
-              autoCapitalize="off"
-              placeholder="http://127.0.0.1:11434"
-              onFocus={() => {
-                ollamaUrlFocusedRef.current = true;
-              }}
-              onChange={(e) => setOllamaUrl(e.target.value)}
-              onBlur={() => {
-                ollamaUrlFocusedRef.current = false;
-                commitOllamaUrl();
-              }}
-              onKeyDown={(e) => {
-                if (e.key === 'Enter') (e.target as HTMLInputElement).blur();
-              }}
-            />
-          </SettingRow>
-          {isNonLocalUrl(ollamaUrl) && (
-            <p className={styles.providerWarning} role="alert">
-              This points Thuki at a non-local Ollama server. You are
-              responsible for securing it: prefer a VPN/Tailscale or SSH tunnel
-              over exposing the port directly.
-            </p>
-          )}
-          {/* get_model_picker_state is scoped to the ACTIVE provider, so this
-              inventory only describes Ollama while Ollama is active. Hide the
-              row otherwise to avoid listing another provider's models here. */}
-          {activeKind === 'ollama' ? (
-            <SettingRow label="Model">
-              {availableModels.length > 0 ? (
-                <Dropdown
-                  value={modelValue}
-                  options={availableModels}
-                  onChange={(m) => void setActiveModel(m)}
-                  ariaLabel="Active Ollama model"
-                />
-              ) : (
-                <span className={styles.providerHint}>No models installed</span>
-              )}
-            </SettingRow>
-          ) : null}
+      {view === 'discover' ? (
+        <div className={styles.modelsPlaceholder}>
+          Browse and download Hugging Face models here.
         </div>
+      ) : null}
 
-        {/* The OpenAI-compatible provider KIND is gated behind a
-            compile-time, dev-only flag (off in shipped builds): both the
-            management card and the "add a server" affordance are the only UI
-            paths to create or manage one, so hiding them keeps the kind out of
-            reach of end users. The shared /v1 backend stays live for the
-            built-in engine regardless. */}
-        {openaiProviderEnabled ? (
-          openaiProvider ? (
+      {view === 'providers' ? (
+        <>
+          <Section heading="Providers">
             <div
-              className={providerCardClass(activeKind === 'openai')}
-              data-provider-card="openai"
+              className={providerCardClass(activeKind === 'builtin')}
+              data-provider-card="builtin"
             >
               <label className={styles.providerSelectRow}>
                 <input
                   type="radio"
                   className={styles.providerRadio}
                   name="active-provider"
-                  aria-label="Use OpenAI-compatible server"
-                  checked={activeKind === 'openai'}
-                  onChange={() => selectProvider(openaiProvider.id)}
+                  aria-label="Use Built-in (Thuki)"
+                  checked={activeKind === 'builtin'}
+                  onChange={() =>
+                    selectProvider(builtinProvider?.id ?? 'builtin')
+                  }
                 />
                 <span className={styles.providerName}>
-                  {openaiProvider.label}
+                  {builtinProvider?.label ?? 'Built-in (Thuki)'}
                 </span>
               </label>
-              <OpenAiProviderCard
-                provider={openaiProvider}
-                resyncToken={resyncToken}
-                onSaved={onSaved}
-              />
+              <BuiltinProviderCard config={config} onSaved={onSaved} />
             </div>
-          ) : (
-            <AddOpenAiProvider onSaved={onSaved} />
-          )
-        ) : null}
-      </Section>
 
-      {/* Unified residency control: one Keep Warm knob bound to
+            <div
+              className={providerCardClass(activeKind === 'ollama')}
+              data-provider-card="ollama"
+            >
+              <label className={styles.providerSelectRow}>
+                <input
+                  type="radio"
+                  className={styles.providerRadio}
+                  name="active-provider"
+                  aria-label="Use Ollama"
+                  checked={activeKind === 'ollama'}
+                  onChange={() => selectProvider('ollama')}
+                />
+                <span className={styles.providerName}>Ollama</span>
+              </label>
+              <SettingRow
+                label="Ollama URL"
+                helper={configHelp('inference', 'ollama_base_url')}
+              >
+                <input
+                  type="text"
+                  className={styles.input}
+                  value={ollamaUrl}
+                  aria-label="Ollama URL"
+                  spellCheck={false}
+                  autoComplete="off"
+                  autoCorrect="off"
+                  autoCapitalize="off"
+                  placeholder="http://127.0.0.1:11434"
+                  onFocus={() => {
+                    ollamaUrlFocusedRef.current = true;
+                  }}
+                  onChange={(e) => setOllamaUrl(e.target.value)}
+                  onBlur={() => {
+                    ollamaUrlFocusedRef.current = false;
+                    commitOllamaUrl();
+                  }}
+                  onKeyDown={(e) => {
+                    if (e.key === 'Enter')
+                      (e.target as HTMLInputElement).blur();
+                  }}
+                />
+              </SettingRow>
+              {isNonLocalUrl(ollamaUrl) && (
+                <p className={styles.providerWarning} role="alert">
+                  This points Thuki at a non-local Ollama server. You are
+                  responsible for securing it: prefer a VPN/Tailscale or SSH
+                  tunnel over exposing the port directly.
+                </p>
+              )}
+              {/* get_model_picker_state is scoped to the ACTIVE provider, so this
+              inventory only describes Ollama while Ollama is active. Hide the
+              row otherwise to avoid listing another provider's models here. */}
+              {activeKind === 'ollama' ? (
+                <SettingRow label="Model">
+                  {availableModels.length > 0 ? (
+                    <Dropdown
+                      value={modelValue}
+                      options={availableModels}
+                      onChange={(m) => void setActiveModel(m)}
+                      ariaLabel="Active Ollama model"
+                    />
+                  ) : (
+                    <span className={styles.providerHint}>
+                      No models installed
+                    </span>
+                  )}
+                </SettingRow>
+              ) : null}
+            </div>
+
+            {/* The OpenAI-compatible provider KIND is gated behind a
+            compile-time, dev-only flag (off in shipped builds): both the
+            management card and the "add a server" affordance are the only UI
+            paths to create or manage one, so hiding them keeps the kind out of
+            reach of end users. The shared /v1 backend stays live for the
+            built-in engine regardless. */}
+            {openaiProviderEnabled ? (
+              openaiProvider ? (
+                <div
+                  className={providerCardClass(activeKind === 'openai')}
+                  data-provider-card="openai"
+                >
+                  <label className={styles.providerSelectRow}>
+                    <input
+                      type="radio"
+                      className={styles.providerRadio}
+                      name="active-provider"
+                      aria-label="Use OpenAI-compatible server"
+                      checked={activeKind === 'openai'}
+                      onChange={() => selectProvider(openaiProvider.id)}
+                    />
+                    <span className={styles.providerName}>
+                      {openaiProvider.label}
+                    </span>
+                  </label>
+                  <OpenAiProviderCard
+                    provider={openaiProvider}
+                    resyncToken={resyncToken}
+                    onSaved={onSaved}
+                  />
+                </div>
+              ) : (
+                <AddOpenAiProvider onSaved={onSaved} />
+              )
+            ) : null}
+          </Section>
+
+          {/* Unified residency control: one Keep Warm knob bound to
           keep_warm_inactivity_minutes, shown for both local providers
           (built-in engine and Ollama) and hidden for OpenAI (Thuki does not
           manage a remote server's residency). The status row branches by
           kind: the built-in engine reports its sidecar lifecycle, Ollama
           reports the model resident in VRAM. */}
-      {activeKind === 'builtin' || activeKind === 'ollama' ? (
-        <Section heading="Keep Warm">
-          {/* Row 1: label + [?] on left | Release after [N] min on right */}
-          <div className={styles.keepWarmRow1}>
-            <div className={styles.keepWarmLabelLine}>
-              <span className={styles.keepWarmLabel}>
-                Keep active model in memory
-              </span>
-              <Tooltip label={KEEP_WARM_TOOLTIP} multiline>
-                <button
-                  type="button"
-                  className={styles.infoBtn}
-                  aria-label="About Keep active model in memory"
-                >
-                  ?
-                </button>
-              </Tooltip>
-            </div>
-            <div className={styles.keepWarmTimerGroup}>
-              <span className={styles.keepWarmBarFieldLabel}>
-                Release after
-              </span>
-              <input
-                type="number"
-                className={styles.keepWarmNumberInput}
-                value={rawMin}
-                min={-1}
-                max={1440}
-                aria-label="Release after N minutes"
-                onFocus={() => {
-                  minFocusedRef.current = true;
-                }}
-                onChange={(e) => {
-                  const n = parseInt(e.target.value, 10);
-                  if (Number.isNaN(n)) {
-                    setRawMin(e.target.value);
-                  } else {
-                    const clamped = Math.max(-1, Math.min(1440, n));
-                    setRawMin(String(clamped));
-                    setInactivityMin(clamped);
-                  }
-                }}
-                onBlur={() => {
-                  minFocusedRef.current = false;
-                  if (Number.isNaN(parseInt(rawMin, 10))) {
-                    setRawMin('0');
-                    setInactivityMin(0);
-                  }
-                }}
-              />
-              <span className={styles.keepWarmUnit}>min</span>
-            </div>
-          </div>
-
-          {/* Row 2: residency status on left | Unload now on right. */}
-          {activeKind === 'builtin' ? (
-            <div className={styles.keepWarmStatusRow}>
-              <span className={styles.engineStatusLine}>
-                Engine: {engineState}
-              </span>
-              <button
-                type="button"
-                className={styles.keepWarmEjectPill}
-                aria-label="Unload now"
-                disabled={engineState !== 'loaded'}
-                onClick={handleEngineEject}
-              >
-                Unload now
-              </button>
-            </div>
-          ) : (
-            <div className={styles.keepWarmStatusRow}>
-              <div className={styles.keepWarmStatusLeft}>
-                {loadedModel !== null ? (
-                  <div className={styles.keepWarmVramSubtitle}>
-                    <span
-                      className={styles.keepWarmVramDot}
-                      data-testid="vram-status-dot"
-                      aria-hidden="true"
-                    />
-                    <span className={styles.keepWarmVramModelName}>
-                      {loadedModel}
-                    </span>
-                    <span>&nbsp;· in VRAM</span>
-                  </div>
-                ) : (
-                  <span className={styles.keepWarmNoModel}>
-                    No model loaded
+          {activeKind === 'builtin' || activeKind === 'ollama' ? (
+            <Section heading="Keep Warm">
+              {/* Row 1: label + [?] on left | Release after [N] min on right */}
+              <div className={styles.keepWarmRow1}>
+                <div className={styles.keepWarmLabelLine}>
+                  <span className={styles.keepWarmLabel}>
+                    Keep active model in memory
                   </span>
-                )}
+                  <Tooltip label={KEEP_WARM_TOOLTIP} multiline>
+                    <button
+                      type="button"
+                      className={styles.infoBtn}
+                      aria-label="About Keep active model in memory"
+                    >
+                      ?
+                    </button>
+                  </Tooltip>
+                </div>
+                <div className={styles.keepWarmTimerGroup}>
+                  <span className={styles.keepWarmBarFieldLabel}>
+                    Release after
+                  </span>
+                  <input
+                    type="number"
+                    className={styles.keepWarmNumberInput}
+                    value={rawMin}
+                    min={-1}
+                    max={1440}
+                    aria-label="Release after N minutes"
+                    onFocus={() => {
+                      minFocusedRef.current = true;
+                    }}
+                    onChange={(e) => {
+                      const n = parseInt(e.target.value, 10);
+                      if (Number.isNaN(n)) {
+                        setRawMin(e.target.value);
+                      } else {
+                        const clamped = Math.max(-1, Math.min(1440, n));
+                        setRawMin(String(clamped));
+                        setInactivityMin(clamped);
+                      }
+                    }}
+                    onBlur={() => {
+                      minFocusedRef.current = false;
+                      if (Number.isNaN(parseInt(rawMin, 10))) {
+                        setRawMin('0');
+                        setInactivityMin(0);
+                      }
+                    }}
+                  />
+                  <span className={styles.keepWarmUnit}>min</span>
+                </div>
               </div>
 
-              <button
-                type="button"
-                className={styles.keepWarmEjectPill}
-                aria-label="Unload now"
-                disabled={ejecting || loadedModel === null}
-                data-ejecting={ejecting}
-                onClick={handleEject}
-              >
-                {ejecting ? (
-                  <DrawCheckIcon />
-                ) : (
-                  <svg
-                    viewBox="0 0 16 16"
-                    width="11"
-                    height="11"
-                    fill="currentColor"
-                    aria-hidden="true"
+              {/* Row 2: residency status on left | Unload now on right. */}
+              {activeKind === 'builtin' ? (
+                <div className={styles.keepWarmStatusRow}>
+                  <span className={styles.engineStatusLine}>
+                    Engine: {engineState}
+                  </span>
+                  <button
+                    type="button"
+                    className={styles.keepWarmEjectPill}
+                    aria-label="Unload now"
+                    disabled={engineState !== 'loaded'}
+                    onClick={handleEngineEject}
                   >
-                    <polygon points="8,2 14,11 2,11" />
-                    <rect x="2" y="12.5" width="12" height="2" rx="1" />
-                  </svg>
-                )}
-                Unload now
-              </button>
-            </div>
-          )}
-        </Section>
-      ) : null}
+                    Unload now
+                  </button>
+                </div>
+              ) : (
+                <div className={styles.keepWarmStatusRow}>
+                  <div className={styles.keepWarmStatusLeft}>
+                    {loadedModel !== null ? (
+                      <div className={styles.keepWarmVramSubtitle}>
+                        <span
+                          className={styles.keepWarmVramDot}
+                          data-testid="vram-status-dot"
+                          aria-hidden="true"
+                        />
+                        <span className={styles.keepWarmVramModelName}>
+                          {loadedModel}
+                        </span>
+                        <span>&nbsp;· in VRAM</span>
+                      </div>
+                    ) : (
+                      <span className={styles.keepWarmNoModel}>
+                        No model loaded
+                      </span>
+                    )}
+                  </div>
 
-      <Section heading="Context Window">
-        <div className={styles.ctxBlock}>
-          {/* Label row: "Context window" left + editable token chip right */}
-          <div className={styles.ctxTopRow}>
-            <span className={styles.ctxLabel}>Context window</span>
-            <div className={styles.ctxChipGroup}>
+                  <button
+                    type="button"
+                    className={styles.keepWarmEjectPill}
+                    aria-label="Unload now"
+                    disabled={ejecting || loadedModel === null}
+                    data-ejecting={ejecting}
+                    onClick={handleEject}
+                  >
+                    {ejecting ? (
+                      <DrawCheckIcon />
+                    ) : (
+                      <svg
+                        viewBox="0 0 16 16"
+                        width="11"
+                        height="11"
+                        fill="currentColor"
+                        aria-hidden="true"
+                      >
+                        <polygon points="8,2 14,11 2,11" />
+                        <rect x="2" y="12.5" width="12" height="2" rx="1" />
+                      </svg>
+                    )}
+                    Unload now
+                  </button>
+                </div>
+              )}
+            </Section>
+          ) : null}
+
+          <Section heading="Context Window">
+            <div className={styles.ctxBlock}>
+              {/* Label row: "Context window" left + editable token chip right */}
+              <div className={styles.ctxTopRow}>
+                <span className={styles.ctxLabel}>Context window</span>
+                <div className={styles.ctxChipGroup}>
+                  <input
+                    type="number"
+                    className={styles.ctxChipInput}
+                    value={ctxChip}
+                    min={CTX_MIN}
+                    max={CTX_MAX}
+                    aria-label="Context window tokens"
+                    onChange={(e) => setCtxChip(e.target.value)}
+                    onBlur={() => {
+                      const n = parseInt(ctxChip, 10);
+                      if (!Number.isNaN(n) && n >= CTX_MIN) {
+                        // Clamp upper bound so the UI mirrors the backend
+                        // BOUNDS_NUM_CTX cap and the slider stays in sync.
+                        commitCtx(Math.min(n, CTX_MAX));
+                      } else {
+                        setCtxChip(String(numCtx));
+                      }
+                    }}
+                    onKeyDown={(e) => {
+                      if (e.key === 'Enter')
+                        (e.target as HTMLInputElement).blur();
+                    }}
+                  />
+                  <span className={styles.ctxChipUnit}>tokens</span>
+                </div>
+              </div>
+
+              {/* Log-scale slider — fill percentage tracked via CSS custom property */}
               <input
-                type="number"
-                className={styles.ctxChipInput}
-                value={ctxChip}
-                min={CTX_MIN}
-                max={CTX_MAX}
+                type="range"
+                className={styles.ctxSlider}
+                style={{ '--fill': fillPct } as React.CSSProperties}
+                min={0}
+                max={1000}
+                step={1}
+                value={ctxPos}
                 aria-label="Context window tokens"
-                onChange={(e) => setCtxChip(e.target.value)}
-                onBlur={() => {
-                  const n = parseInt(ctxChip, 10);
-                  if (!Number.isNaN(n) && n >= CTX_MIN) {
-                    // Clamp upper bound so the UI mirrors the backend
-                    // BOUNDS_NUM_CTX cap and the slider stays in sync.
-                    commitCtx(Math.min(n, CTX_MAX));
-                  } else {
-                    setCtxChip(String(numCtx));
-                  }
+                aria-valuemin={CTX_MIN}
+                aria-valuemax={CTX_MAX}
+                aria-valuenow={numCtx}
+                aria-valuetext={`${numCtx} tokens`}
+                onChange={(e) => {
+                  ctxDraggingRef.current = true;
+                  const pos = Number(e.target.value);
+                  setCtxPos(pos);
+                  setCtxChip(String(posToCtx(pos)));
+                }}
+                onMouseUp={() => {
+                  ctxDraggingRef.current = false;
+                  commitCtx(posToCtx(ctxPos));
                 }}
-                onKeyDown={(e) => {
-                  if (e.key === 'Enter') (e.target as HTMLInputElement).blur();
+                onTouchEnd={() => {
+                  ctxDraggingRef.current = false;
+                  commitCtx(posToCtx(ctxPos));
+                }}
+                onKeyUp={() => {
+                  if (!ctxDraggingRef.current) commitCtx(posToCtx(ctxPos));
                 }}
               />
-              <span className={styles.ctxChipUnit}>tokens</span>
-            </div>
-          </div>
-
-          {/* Log-scale slider — fill percentage tracked via CSS custom property */}
-          <input
-            type="range"
-            className={styles.ctxSlider}
-            style={{ '--fill': fillPct } as React.CSSProperties}
-            min={0}
-            max={1000}
-            step={1}
-            value={ctxPos}
-            aria-label="Context window tokens"
-            aria-valuemin={CTX_MIN}
-            aria-valuemax={CTX_MAX}
-            aria-valuenow={numCtx}
-            aria-valuetext={`${numCtx} tokens`}
-            onChange={(e) => {
-              ctxDraggingRef.current = true;
-              const pos = Number(e.target.value);
-              setCtxPos(pos);
-              setCtxChip(String(posToCtx(pos)));
-            }}
-            onMouseUp={() => {
-              ctxDraggingRef.current = false;
-              commitCtx(posToCtx(ctxPos));
-            }}
-            onTouchEnd={() => {
-              ctxDraggingRef.current = false;
-              commitCtx(posToCtx(ctxPos));
-            }}
-            onKeyUp={() => {
-              if (!ctxDraggingRef.current) commitCtx(posToCtx(ctxPos));
-            }}
-          />
-
-          <div className={styles.ctxTickRow} aria-hidden="true">
-            {CTX_TICKS.map((label, i) => (
-              <span
-                key={label}
-                className={styles.ctxTick}
-                style={{ left: `${(i / (CTX_TICKS.length - 1)) * 100}%` }}
-              >
-                {label}
-              </span>
-            ))}
-          </div>
 
-          {activeKind === 'builtin' &&
-          (engineState === 'starting' || engineState === 'stopping') ? (
-            <div className={styles.ctxApplyingHint} role="status">
-              Applying… the engine restarts with the new context on your next
-              message.
-            </div>
-          ) : null}
+              <div className={styles.ctxTickRow} aria-hidden="true">
+                {CTX_TICKS.map((label, i) => (
+                  <span
+                    key={label}
+                    className={styles.ctxTick}
+                    style={{ left: `${(i / (CTX_TICKS.length - 1)) * 100}%` }}
+                  >
+                    {label}
+                  </span>
+                ))}
+              </div>
 
-          <div className={styles.ctxHelper}>
-            ~{ctxTurns.toLocaleString()} turns of context
-            {' · '}
-            {activeKind === 'builtin'
-              ? 'Passed to the engine as --ctx-size at start; changing it restarts the engine.'
-              : activeKind === 'openai'
-                ? 'Informational only; your server controls the actual context.'
-                : "Ollama caps to your model's trained maximum."}
-          </div>
+              {activeKind === 'builtin' &&
+              (engineState === 'starting' || engineState === 'stopping') ? (
+                <div className={styles.ctxApplyingHint} role="status">
+                  Applying… the engine restarts with the new context on your
+                  next message.
+                </div>
+              ) : null}
+
+              <div className={styles.ctxHelper}>
+                ~{ctxTurns.toLocaleString()} turns of context
+                {' · '}
+                {activeKind === 'builtin'
+                  ? 'Passed to the engine as --ctx-size at start; changing it restarts the engine.'
+                  : activeKind === 'openai'
+                    ? 'Informational only; your server controls the actual context.'
+                    : "Ollama caps to your model's trained maximum."}
+              </div>
 
-          <div className={styles.ctxVramNote}>
-            <span className={styles.ctxVramIcon} aria-hidden="true">
-              ⚠
-            </span>
-            <span>
-              The KV cache scales linearly with context length, so doubling the
-              context roughly doubles its memory footprint (model weights stay
-              the same). Benchmark with your hardware before pushing it high.{' '}
-              <button
-                type="button"
-                className={styles.ctxVramLink}
-                onClick={() => {
-                  void invoke('open_url', {
-                    url: 'https://github.com/quiet-node/thuki/blob/main/docs/tuning-context-window.md#the-5-minute-benchmark-recipe',
-                  });
-                }}
-              >
-                Learn how to tune Context Window in 5 minute ↗
-              </button>
-            </span>
-          </div>
-        </div>
-      </Section>
-
-      <Section heading="Prompt">
-        <SaveField
-          section="prompt"
-          fieldKey="system"
-          label="System prompt"
-          helper={configHelp('prompt', 'system')}
-          vertical
-          initialValue={config.prompt.system}
-          resyncToken={resyncToken}
-          onSaved={onSaved}
-          render={(value, setValue) => (
-            <>
-              <Textarea
-                value={value}
-                onChange={setValue}
-                placeholder="Persona prompt…"
-                maxLength={PROMPT_MAX_CHARS}
-                ariaLabel="System prompt"
-                rows={PROMPT_TEXTAREA_ROWS}
-              />
-              <div className={styles.charCounter}>
-                {value.length} / {PROMPT_MAX_CHARS}
+              <div className={styles.ctxVramNote}>
+                <span className={styles.ctxVramIcon} aria-hidden="true">
+                  ⚠
+                </span>
+                <span>
+                  The KV cache scales linearly with context length, so doubling
+                  the context roughly doubles its memory footprint (model
+                  weights stay the same). Benchmark with your hardware before
+                  pushing it high.{' '}
+                  <button
+                    type="button"
+                    className={styles.ctxVramLink}
+                    onClick={() => {
+                      void invoke('open_url', {
+                        url: 'https://github.com/quiet-node/thuki/blob/main/docs/tuning-context-window.md#the-5-minute-benchmark-recipe',
+                      });
+                    }}
+                  >
+                    Learn how to tune Context Window in 5 minute ↗
+                  </button>
+                </span>
               </div>
-            </>
-          )}
-        />
-      </Section>
-
-      <div className={styles.devSection}>
-        <button
-          type="button"
-          className={styles.devTrigger}
-          aria-expanded={devOpen}
-          aria-controls="dev-diagnostics"
-          onClick={() => setDevOpen((o) => !o)}
-        >
-          <span className={styles.devTriggerLabel}>Diagnostics</span>
-          <span className={styles.devTag}>DEV</span>
-          <svg
-            className={`${styles.devChevron} ${devOpen ? styles.devChevronOpen : ''}`}
-            viewBox="0 0 10 10"
-            fill="currentColor"
-            aria-hidden
-          >
-            <path
-              d="M3 2l4 3-4 3"
-              stroke="currentColor"
-              strokeWidth="1.5"
-              strokeLinecap="round"
-              strokeLinejoin="round"
-              fill="none"
-            />
-          </svg>
-        </button>
-        {devOpen && (
-          <div id="dev-diagnostics">
+            </div>
+          </Section>
+
+          <Section heading="Prompt">
             <SaveField
-              section="debug"
-              fieldKey="trace_enabled"
-              label="Trace recording"
-              helper={configHelp('debug', 'trace_enabled')}
-              initialValue={config.debug.trace_enabled}
+              section="prompt"
+              fieldKey="system"
+              label="System prompt"
+              helper={configHelp('prompt', 'system')}
+              vertical
+              initialValue={config.prompt.system}
               resyncToken={resyncToken}
               onSaved={onSaved}
-              tooltipPlacement="top"
-              rightAlign
               render={(value, setValue) => (
-                <Toggle
-                  checked={value}
-                  onChange={setValue}
-                  ariaLabel="Enable trace recording"
-                />
+                <>
+                  <Textarea
+                    value={value}
+                    onChange={setValue}
+                    placeholder="Persona prompt…"
+                    maxLength={PROMPT_MAX_CHARS}
+                    ariaLabel="System prompt"
+                    rows={PROMPT_TEXTAREA_ROWS}
+                  />
+                  <div className={styles.charCounter}>
+                    {value.length} / {PROMPT_MAX_CHARS}
+                  </div>
+                </>
               )}
             />
+          </Section>
+
+          <div className={styles.devSection}>
+            <button
+              type="button"
+              className={styles.devTrigger}
+              aria-expanded={devOpen}
+              aria-controls="dev-diagnostics"
+              onClick={() => setDevOpen((o) => !o)}
+            >
+              <span className={styles.devTriggerLabel}>Diagnostics</span>
+              <span className={styles.devTag}>DEV</span>
+              <svg
+                className={`${styles.devChevron} ${devOpen ? styles.devChevronOpen : ''}`}
+                viewBox="0 0 10 10"
+                fill="currentColor"
+                aria-hidden
+              >
+                <path
+                  d="M3 2l4 3-4 3"
+                  stroke="currentColor"
+                  strokeWidth="1.5"
+                  strokeLinecap="round"
+                  strokeLinejoin="round"
+                  fill="none"
+                />
+              </svg>
+            </button>
+            {devOpen && (
+              <div id="dev-diagnostics">
+                <SaveField
+                  section="debug"
+                  fieldKey="trace_enabled"
+                  label="Trace recording"
+                  helper={configHelp('debug', 'trace_enabled')}
+                  initialValue={config.debug.trace_enabled}
+                  resyncToken={resyncToken}
+                  onSaved={onSaved}
+                  tooltipPlacement="top"
+                  rightAlign
+                  render={(value, setValue) => (
+                    <Toggle
+                      checked={value}
+                      onChange={setValue}
+                      ariaLabel="Enable trace recording"
+                    />
+                  )}
+                />
+              </div>
+            )}
           </div>
-        )}
-      </div>
+        </>
+      ) : null}
     </>
   );
 }
diff --git a/src/settings/tabs/tabs.test.tsx b/src/settings/tabs/tabs.test.tsx
index c366a71f..2924c987 100644
--- a/src/settings/tabs/tabs.test.tsx
+++ b/src/settings/tabs/tabs.test.tsx
@@ -170,7 +170,11 @@ async function renderModelTab() {
 describe('ModelTab', () => {
   it('renders Providers and Prompt sections with the expected labels', async () => {
     await renderModelTab();
-    expect(screen.getByText('Providers')).toBeInTheDocument();
+    // `selector: 'div'` targets the section heading, not the same-named
+    // segmented-control tab button.
+    expect(
+      screen.getByText('Providers', { selector: 'div' }),
+    ).toBeInTheDocument();
     expect(screen.getByText('Built-in (Thuki)')).toBeInTheDocument();
     // Built-in is selectable (no more "upcoming version" badge); Ollama is
     // the active provider in this config.
@@ -183,6 +187,34 @@ describe('ModelTab', () => {
     expect(screen.getByText('System prompt')).toBeInTheDocument();
   });
 
+  it('defaults to the Providers view', async () => {
+    await renderModelTab();
+    expect(screen.getByRole('tab', { name: 'Providers' })).toHaveAttribute(
+      'aria-selected',
+      'true',
+    );
+  });
+
+  it('switches to the Discover view via the segmented control', async () => {
+    await renderModelTab();
+    fireEvent.click(screen.getByRole('tab', { name: 'Discover' }));
+    expect(
+      screen.getByText(/Browse and download Hugging Face/),
+    ).toBeInTheDocument();
+    // Providers content is unmounted while Discover is showing.
+    expect(
+      screen.queryByRole('radio', { name: 'Use Built-in (Thuki)' }),
+    ).toBeNull();
+  });
+
+  it('switches to the Library view via the segmented control', async () => {
+    await renderModelTab();
+    fireEvent.click(screen.getByRole('tab', { name: 'Library' }));
+    expect(
+      screen.getByText(/installed models will appear/),
+    ).toBeInTheDocument();
+  });
+
   it('renders the Ollama URL field seeded from the active provider base_url', async () => {
     await renderModelTab();
     const input = screen.getByRole('textbox', {
diff --git a/src/styles/settings.module.css b/src/styles/settings.module.css
index 629aa4f8..4cd15136 100644
--- a/src/styles/settings.module.css
+++ b/src/styles/settings.module.css
@@ -332,6 +332,21 @@
   background: var(--accent);
 }
 
+/* Row holding the segmented control (and, on Library, the Add-model action). */
+.barrow {
+  display: flex;
+  align-items: center;
+  margin-bottom: 18px;
+}
+
+/* Interim copy for the Library / Discover panes while they are being built. */
+.modelsPlaceholder {
+  padding: 48px 8px;
+  color: var(--t2);
+  font-size: 13px;
+  text-align: center;
+}
+
 /* ─── Body (scrolling content) ──────────────────────────────────────────── */
 
 .body {

From 0b1b1c05b964e99833c1747d4b306e2c4f94578b Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 02:13:41 -0500
Subject: [PATCH 05/89] feat: build the Models surface with Active-Hero
 providers, Library, and Discover

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/settings/tabs/ModelTab.tsx                |  761 +---------
 src/settings/tabs/ProviderCards.test.tsx      |  600 +-------
 src/settings/tabs/ProviderCards.tsx           |  363 +----
 .../tabs/models/DiscoverPane.module.css       |  274 ++++
 .../tabs/models/DiscoverPane.test.tsx         |  454 ++++++
 src/settings/tabs/models/DiscoverPane.tsx     |  253 ++++
 .../tabs/models/LibraryPane.module.css        |  222 +++
 src/settings/tabs/models/LibraryPane.test.tsx |  488 ++++++
 src/settings/tabs/models/LibraryPane.tsx      |  251 ++++
 .../tabs/models/ProvidersPane.test.tsx        |  653 ++++++++
 src/settings/tabs/models/ProvidersPane.tsx    |  629 ++++++++
 src/settings/tabs/models/useHfSearch.test.ts  |  254 ++++
 src/settings/tabs/models/useHfSearch.ts       |  117 ++
 src/settings/tabs/tabs.test.tsx               | 1308 +----------------
 src/styles/settings.module.css                |  217 ++-
 src/types/hf.ts                               |   26 +
 16 files changed, 3888 insertions(+), 2982 deletions(-)
 create mode 100644 src/settings/tabs/models/DiscoverPane.module.css
 create mode 100644 src/settings/tabs/models/DiscoverPane.test.tsx
 create mode 100644 src/settings/tabs/models/DiscoverPane.tsx
 create mode 100644 src/settings/tabs/models/LibraryPane.module.css
 create mode 100644 src/settings/tabs/models/LibraryPane.test.tsx
 create mode 100644 src/settings/tabs/models/LibraryPane.tsx
 create mode 100644 src/settings/tabs/models/ProvidersPane.test.tsx
 create mode 100644 src/settings/tabs/models/ProvidersPane.tsx
 create mode 100644 src/settings/tabs/models/useHfSearch.test.ts
 create mode 100644 src/settings/tabs/models/useHfSearch.ts
 create mode 100644 src/types/hf.ts

diff --git a/src/settings/tabs/ModelTab.tsx b/src/settings/tabs/ModelTab.tsx
index 9b7b7134..aa1fa9fe 100644
--- a/src/settings/tabs/ModelTab.tsx
+++ b/src/settings/tabs/ModelTab.tsx
@@ -1,34 +1,20 @@
 /**
- * AI tab.
+ * Models tab: a segmented surface over the three model sub-views.
  *
- * Holds the Providers panel (built-in engine, Ollama, and an optional
- * OpenAI-compatible server, with the active one selectable), the unified
- * Keep Warm residency control (shown for both local providers, hidden for
- * OpenAI), the context window slider, and the custom system prompt. The
- * Window/Quote knobs live in the Display tab.
+ * The left sidebar selects this section; the segmented control at the top
+ * picks Library (installed models), Discover (the Hugging Face browser), or
+ * Providers (the active provider plus the shared generation settings). Each
+ * sub-view is its own pane component; this file only routes between them.
  */
 
-import { useEffect, useRef, useState } from 'react';
-import { invoke } from '@tauri-apps/api/core';
-import { listen } from '@tauri-apps/api/event';
+import { useState } from 'react';
 
-import { Section, SettingRow, Dropdown, Textarea, Toggle } from '../components';
-import { SaveField } from '../components/SaveField';
-import {
-  AddOpenAiProvider,
-  BuiltinProviderCard,
-  OpenAiProviderCard,
-} from './ProviderCards';
 import { ModelsSegmented, type ModelsSubview } from './models/ModelsSegmented';
-import { useDebouncedSave } from '../hooks/useDebouncedSave';
-import { useModelSelection } from '../../hooks/useModelSelection';
-import { isNonLocalUrl } from '../../utils/isNonLocalUrl';
-import { configHelp } from '../configHelpers';
-import { DrawCheckIcon } from '../../components/DrawCheckIcon';
-import { Tooltip } from '../../components/Tooltip';
+import { ProvidersPane } from './models/ProvidersPane';
+import { LibraryPane } from './models/LibraryPane';
+import { DiscoverPane } from './models/DiscoverPane';
 import styles from '../../styles/settings.module.css';
 import type { RawAppConfig } from '../types';
-import type { EngineStatus } from '../../types/starter';
 
 interface ModelTabProps {
   config: RawAppConfig;
@@ -36,256 +22,11 @@ interface ModelTabProps {
   onSaved: (next: RawAppConfig) => void;
 }
 
-/// Built-in prompt body is ~17 KB; cap roomy so users can edit without truncation.
-const PROMPT_MAX_CHARS = 32000;
-/// Default textarea height for the system prompt: large enough to show a
-/// meaningful slice of the seeded built-in body without forcing the user to
-/// drag the resize grip on first open.
-const PROMPT_TEXTAREA_ROWS = 16;
-const EJECT_RESET_MS = 2500;
-/// Approximate tokens per chat turn used for the "~N turns of context" hint.
-/// 400 tokens ≈ a typical user question + assistant reply pair on this app.
-const TOKENS_PER_TURN_ESTIMATE = 400;
-
-const KEEP_WARM_TOOLTIP =
-  'Keep Warm holds your active model resident in memory after each use, ' +
-  'for both the built-in engine and Ollama. ' +
-  'The timer below sets how long before it auto-releases; use -1 to keep it indefinitely. ' +
-  'Unload now releases it immediately. ' +
-  'If set to 0, each provider uses its natural short default (about 5 minutes).';
-
-// Log-scale context window slider: slider pos [0..1000] ↔ token count.
-// Scale: value = CTX_MIN * (CTX_MAX / CTX_MIN)^(pos/1000)
-// With CTX_MAX/CTX_MIN = 512 (= 2^9), each 1/9 of the slider doubles the value.
-const CTX_MIN = 2048;
-const CTX_MAX = 1_048_576; // 1M
-const CTX_LOG_RATIO = Math.log(CTX_MAX / CTX_MIN);
-
-function ctxToPos(v: number): number {
-  return Math.round((1000 * Math.log(v / CTX_MIN)) / CTX_LOG_RATIO);
-}
-
-function posToCtx(pos: number): number {
-  // Snap to nearest 1 KiB boundary (standard Ollama increment).
-  return (
-    Math.round((CTX_MIN * Math.pow(CTX_MAX / CTX_MIN, pos / 1000)) / 1024) *
-    1024
-  );
-}
-
-const CTX_TICKS = [
-  '2K',
-  '4K',
-  '8K',
-  '16K',
-  '32K',
-  '64K',
-  '128K',
-  '256K',
-  '512K',
-  '1M',
-];
-
 export function ModelTab({ config, resyncToken, onSaved }: ModelTabProps) {
-  // Which of the three Models sub-views is showing. Providers is the default
-  // (the active provider + generation controls, the most-used surface).
+  // Providers is the default sub-view: the active provider and the shared
+  // generation controls are the most-used surface.
   const [view, setView] = useState<ModelsSubview>('providers');
-  const [inactivityMin, setInactivityMin] = useState(
-    config.inference.keep_warm_inactivity_minutes,
-  );
-  const [rawMin, setRawMin] = useState(
-    String(config.inference.keep_warm_inactivity_minutes),
-  );
-  const minFocusedRef = useRef(false);
-  const [ejecting, setEjecting] = useState(false);
-  const [loadedModel, setLoadedModel] = useState<string | null>(null);
-
-  // Providers panel: who is active and of which kind, derived from the
-  // config snapshot so a resync always reflects disk.
-  const providers = config.inference.providers;
-  const activeId = config.inference.active_provider;
-  const activeKind = providers.find((p) => p.id === activeId)?.kind ?? 'ollama';
-  const builtinProvider = providers.find((p) => p.kind === 'builtin');
-  const openaiProvider = providers.find((p) => p.kind === 'openai');
-
-  // The OpenAI-compatible provider kind is gated behind a compile-time,
-  // dev-only env flag, off by default. Vite statically replaces
-  // `import.meta.env` at build, so a production build folds this to `false`
-  // and tree-shakes the gated affordance out of the bundle entirely. Gates
-  // the UI only: the shared /v1 client the built-in engine depends on stays
-  // live. Read here (not at module load) so tests can toggle it via
-  // `vi.stubEnv`.
-  const openaiProviderEnabled =
-    import.meta.env.VITE_ENABLE_OPENAI_PROVIDER === 'true';
-
-  // Latest engine lifecycle snapshot; drives the built-in residency line and
-  // the context slider's non-blocking "Applying" hint.
-  const [engineState, setEngineState] =
-    useState<EngineStatus['state']>('stopped');
-
-  // Context window: committed value drives the debounced save; local slider
-  // pos updates live on drag without committing on every pixel.
-  const [numCtx, setNumCtx] = useState(config.inference.num_ctx);
-  const [ctxPos, setCtxPos] = useState(() =>
-    ctxToPos(config.inference.num_ctx),
-  );
-  const [ctxChip, setCtxChip] = useState(String(config.inference.num_ctx));
-  const ctxDraggingRef = useRef(false);
-
-  const [devOpen, setDevOpen] = useState(false);
-
-  // Ollama provider URL: local editable copy committed on blur / Enter via
-  // the dedicated set_ollama_url command (the URL lives on the providers array,
-  // not a flat set_config_field key).
-  const ollamaBaseUrl =
-    config.inference.providers.find((p) => p.kind === 'ollama')?.base_url ?? '';
-  const [ollamaUrl, setOllamaUrl] = useState(ollamaBaseUrl);
-  const ollamaUrlFocusedRef = useRef(false);
-
-  // Per-provider model picker (Ollama). Mirrors the overlay picker; both read
-  // get_model_picker_state, which is scoped to the active provider.
-  // `useModelSelection` already refreshes once on mount, so no extra effect is
-  // needed here.
-  const { activeModel, availableModels, setActiveModel } = useModelSelection();
-
-  useEffect(() => {
-    // Cleanup chains on the listen promises (not a captured variable) so an
-    // unmount that races the registration still detaches every listener.
-    const unlistenLoaded = listen<string>('warmup:model-loaded', (e) => {
-      setLoadedModel(e.payload);
-    });
-    const unlistenEvicted = listen<null>('warmup:model-evicted', () => {
-      setLoadedModel(null);
-    });
-    invoke<string | null>('get_loaded_model')
-      .then(setLoadedModel)
-      .catch(() => {});
-
-    function handleVisibilityChange() {
-      if (!document.hidden) {
-        invoke<string | null>('get_loaded_model')
-          .then(setLoadedModel)
-          .catch(() => {});
-      }
-    }
-    document.addEventListener('visibilitychange', handleVisibilityChange);
-
-    return () => {
-      void unlistenLoaded.then((unlisten) => unlisten());
-      void unlistenEvicted.then((unlisten) => unlisten());
-      document.removeEventListener('visibilitychange', handleVisibilityChange);
-    };
-  }, []);
-
-  useEffect(() => {
-    // Seed from the runner's current snapshot: the backend only emits
-    // engine:status on transitions, so without this an already-loaded
-    // engine would read "stopped" (and Unload now would stay dead) until
-    // the next transition.
-    invoke<EngineStatus>('get_engine_status')
-      .then((status) => setEngineState(status.state))
-      .catch(() => {
-        // Keep the stopped default; the event stream corrects it.
-      });
-    const unlistenPromise = listen<EngineStatus>('engine:status', (e) => {
-      setEngineState(e.payload.state);
-    });
-    return () => {
-      void unlistenPromise.then((unlisten) => unlisten());
-    };
-  }, []);
-
-  const { resetTo: resetMin } = useDebouncedSave(
-    'inference',
-    'keep_warm_inactivity_minutes',
-    inactivityMin,
-    { onSaved },
-  );
-
-  const { resetTo: resetNumCtx } = useDebouncedSave(
-    'inference',
-    'num_ctx',
-    numCtx,
-    { onSaved },
-  );
-
-  const prevTokenRef = useRef(resyncToken);
-
-  if (prevTokenRef.current !== resyncToken) {
-    prevTokenRef.current = resyncToken;
-    if (!minFocusedRef.current) {
-      setInactivityMin(config.inference.keep_warm_inactivity_minutes);
-      setRawMin(String(config.inference.keep_warm_inactivity_minutes));
-      resetMin(config.inference.keep_warm_inactivity_minutes);
-    }
-    const nextCtx = config.inference.num_ctx;
-    setNumCtx(nextCtx);
-    setCtxPos(ctxToPos(nextCtx));
-    setCtxChip(String(nextCtx));
-    resetNumCtx(nextCtx);
-    if (!ollamaUrlFocusedRef.current) {
-      setOllamaUrl(ollamaBaseUrl);
-    }
-  }
-
-  function commitCtx(v: number) {
-    setNumCtx(v);
-    setCtxPos(ctxToPos(v));
-    setCtxChip(String(v));
-  }
-
-  function handleEject() {
-    setEjecting(true);
-    invoke('evict_model')
-      .then(() => {
-        setTimeout(() => setEjecting(false), EJECT_RESET_MS);
-      })
-      .catch(() => setEjecting(false));
-  }
-
-  function commitOllamaUrl() {
-    const next = ollamaUrl.trim();
-    if (next === ollamaBaseUrl) return;
-    void invoke<RawAppConfig>('set_ollama_url', { baseUrl: next })
-      .then((cfg) => onSaved(cfg))
-      .catch(() => {
-        // Save failed: revert the field to the persisted value so the input
-        // never shows a URL the backend did not accept.
-        setOllamaUrl(ollamaBaseUrl);
-      });
-  }
-
-  function selectProvider(id: string) {
-    // Radios only fire onChange when the selection actually changes, so no
-    // same-provider guard is needed here.
-    void invoke<RawAppConfig>('set_active_provider', { providerId: id })
-      .then((cfg) => onSaved(cfg))
-      .catch(() => {
-        // Switching failed (e.g. config write error): the radio re-seeds
-        // from config on the next render.
-      });
-  }
-
-  function handleEngineEject() {
-    void invoke('evict_model').catch(() => {
-      // The engine:status event stream is the source of truth; a failed
-      // eviction simply leaves the residency line unchanged.
-    });
-  }
-
-  function providerCardClass(active: boolean): string {
-    return active
-      ? `${styles.providerCard} ${styles.providerCardActive}`
-      : styles.providerCard;
-  }
-
-  const modelValue =
-    activeModel && availableModels.includes(activeModel)
-      ? activeModel
-      : (availableModels[0] ?? '');
-
-  const ctxTurns = Math.round(numCtx / TOKENS_PER_TURN_ESTIMATE);
-  const fillPct = `${ctxPos / 10}%`;
+  const goToDiscover = () => setView('discover');
 
   return (
     <>
@@ -294,476 +35,22 @@ export function ModelTab({ config, resyncToken, onSaved }: ModelTabProps) {
       </div>
 
       {view === 'library' ? (
-        <div className={styles.modelsPlaceholder}>
-          Your installed models will appear here.
-        </div>
+        <LibraryPane
+          config={config}
+          onSaved={onSaved}
+          onAddModel={goToDiscover}
+        />
       ) : null}
 
-      {view === 'discover' ? (
-        <div className={styles.modelsPlaceholder}>
-          Browse and download Hugging Face models here.
-        </div>
-      ) : null}
+      {view === 'discover' ? <DiscoverPane onSaved={onSaved} /> : null}
 
       {view === 'providers' ? (
-        <>
-          <Section heading="Providers">
-            <div
-              className={providerCardClass(activeKind === 'builtin')}
-              data-provider-card="builtin"
-            >
-              <label className={styles.providerSelectRow}>
-                <input
-                  type="radio"
-                  className={styles.providerRadio}
-                  name="active-provider"
-                  aria-label="Use Built-in (Thuki)"
-                  checked={activeKind === 'builtin'}
-                  onChange={() =>
-                    selectProvider(builtinProvider?.id ?? 'builtin')
-                  }
-                />
-                <span className={styles.providerName}>
-                  {builtinProvider?.label ?? 'Built-in (Thuki)'}
-                </span>
-              </label>
-              <BuiltinProviderCard config={config} onSaved={onSaved} />
-            </div>
-
-            <div
-              className={providerCardClass(activeKind === 'ollama')}
-              data-provider-card="ollama"
-            >
-              <label className={styles.providerSelectRow}>
-                <input
-                  type="radio"
-                  className={styles.providerRadio}
-                  name="active-provider"
-                  aria-label="Use Ollama"
-                  checked={activeKind === 'ollama'}
-                  onChange={() => selectProvider('ollama')}
-                />
-                <span className={styles.providerName}>Ollama</span>
-              </label>
-              <SettingRow
-                label="Ollama URL"
-                helper={configHelp('inference', 'ollama_base_url')}
-              >
-                <input
-                  type="text"
-                  className={styles.input}
-                  value={ollamaUrl}
-                  aria-label="Ollama URL"
-                  spellCheck={false}
-                  autoComplete="off"
-                  autoCorrect="off"
-                  autoCapitalize="off"
-                  placeholder="http://127.0.0.1:11434"
-                  onFocus={() => {
-                    ollamaUrlFocusedRef.current = true;
-                  }}
-                  onChange={(e) => setOllamaUrl(e.target.value)}
-                  onBlur={() => {
-                    ollamaUrlFocusedRef.current = false;
-                    commitOllamaUrl();
-                  }}
-                  onKeyDown={(e) => {
-                    if (e.key === 'Enter')
-                      (e.target as HTMLInputElement).blur();
-                  }}
-                />
-              </SettingRow>
-              {isNonLocalUrl(ollamaUrl) && (
-                <p className={styles.providerWarning} role="alert">
-                  This points Thuki at a non-local Ollama server. You are
-                  responsible for securing it: prefer a VPN/Tailscale or SSH
-                  tunnel over exposing the port directly.
-                </p>
-              )}
-              {/* get_model_picker_state is scoped to the ACTIVE provider, so this
-              inventory only describes Ollama while Ollama is active. Hide the
-              row otherwise to avoid listing another provider's models here. */}
-              {activeKind === 'ollama' ? (
-                <SettingRow label="Model">
-                  {availableModels.length > 0 ? (
-                    <Dropdown
-                      value={modelValue}
-                      options={availableModels}
-                      onChange={(m) => void setActiveModel(m)}
-                      ariaLabel="Active Ollama model"
-                    />
-                  ) : (
-                    <span className={styles.providerHint}>
-                      No models installed
-                    </span>
-                  )}
-                </SettingRow>
-              ) : null}
-            </div>
-
-            {/* The OpenAI-compatible provider KIND is gated behind a
-            compile-time, dev-only flag (off in shipped builds): both the
-            management card and the "add a server" affordance are the only UI
-            paths to create or manage one, so hiding them keeps the kind out of
-            reach of end users. The shared /v1 backend stays live for the
-            built-in engine regardless. */}
-            {openaiProviderEnabled ? (
-              openaiProvider ? (
-                <div
-                  className={providerCardClass(activeKind === 'openai')}
-                  data-provider-card="openai"
-                >
-                  <label className={styles.providerSelectRow}>
-                    <input
-                      type="radio"
-                      className={styles.providerRadio}
-                      name="active-provider"
-                      aria-label="Use OpenAI-compatible server"
-                      checked={activeKind === 'openai'}
-                      onChange={() => selectProvider(openaiProvider.id)}
-                    />
-                    <span className={styles.providerName}>
-                      {openaiProvider.label}
-                    </span>
-                  </label>
-                  <OpenAiProviderCard
-                    provider={openaiProvider}
-                    resyncToken={resyncToken}
-                    onSaved={onSaved}
-                  />
-                </div>
-              ) : (
-                <AddOpenAiProvider onSaved={onSaved} />
-              )
-            ) : null}
-          </Section>
-
-          {/* Unified residency control: one Keep Warm knob bound to
-          keep_warm_inactivity_minutes, shown for both local providers
-          (built-in engine and Ollama) and hidden for OpenAI (Thuki does not
-          manage a remote server's residency). The status row branches by
-          kind: the built-in engine reports its sidecar lifecycle, Ollama
-          reports the model resident in VRAM. */}
-          {activeKind === 'builtin' || activeKind === 'ollama' ? (
-            <Section heading="Keep Warm">
-              {/* Row 1: label + [?] on left | Release after [N] min on right */}
-              <div className={styles.keepWarmRow1}>
-                <div className={styles.keepWarmLabelLine}>
-                  <span className={styles.keepWarmLabel}>
-                    Keep active model in memory
-                  </span>
-                  <Tooltip label={KEEP_WARM_TOOLTIP} multiline>
-                    <button
-                      type="button"
-                      className={styles.infoBtn}
-                      aria-label="About Keep active model in memory"
-                    >
-                      ?
-                    </button>
-                  </Tooltip>
-                </div>
-                <div className={styles.keepWarmTimerGroup}>
-                  <span className={styles.keepWarmBarFieldLabel}>
-                    Release after
-                  </span>
-                  <input
-                    type="number"
-                    className={styles.keepWarmNumberInput}
-                    value={rawMin}
-                    min={-1}
-                    max={1440}
-                    aria-label="Release after N minutes"
-                    onFocus={() => {
-                      minFocusedRef.current = true;
-                    }}
-                    onChange={(e) => {
-                      const n = parseInt(e.target.value, 10);
-                      if (Number.isNaN(n)) {
-                        setRawMin(e.target.value);
-                      } else {
-                        const clamped = Math.max(-1, Math.min(1440, n));
-                        setRawMin(String(clamped));
-                        setInactivityMin(clamped);
-                      }
-                    }}
-                    onBlur={() => {
-                      minFocusedRef.current = false;
-                      if (Number.isNaN(parseInt(rawMin, 10))) {
-                        setRawMin('0');
-                        setInactivityMin(0);
-                      }
-                    }}
-                  />
-                  <span className={styles.keepWarmUnit}>min</span>
-                </div>
-              </div>
-
-              {/* Row 2: residency status on left | Unload now on right. */}
-              {activeKind === 'builtin' ? (
-                <div className={styles.keepWarmStatusRow}>
-                  <span className={styles.engineStatusLine}>
-                    Engine: {engineState}
-                  </span>
-                  <button
-                    type="button"
-                    className={styles.keepWarmEjectPill}
-                    aria-label="Unload now"
-                    disabled={engineState !== 'loaded'}
-                    onClick={handleEngineEject}
-                  >
-                    Unload now
-                  </button>
-                </div>
-              ) : (
-                <div className={styles.keepWarmStatusRow}>
-                  <div className={styles.keepWarmStatusLeft}>
-                    {loadedModel !== null ? (
-                      <div className={styles.keepWarmVramSubtitle}>
-                        <span
-                          className={styles.keepWarmVramDot}
-                          data-testid="vram-status-dot"
-                          aria-hidden="true"
-                        />
-                        <span className={styles.keepWarmVramModelName}>
-                          {loadedModel}
-                        </span>
-                        <span>&nbsp;· in VRAM</span>
-                      </div>
-                    ) : (
-                      <span className={styles.keepWarmNoModel}>
-                        No model loaded
-                      </span>
-                    )}
-                  </div>
-
-                  <button
-                    type="button"
-                    className={styles.keepWarmEjectPill}
-                    aria-label="Unload now"
-                    disabled={ejecting || loadedModel === null}
-                    data-ejecting={ejecting}
-                    onClick={handleEject}
-                  >
-                    {ejecting ? (
-                      <DrawCheckIcon />
-                    ) : (
-                      <svg
-                        viewBox="0 0 16 16"
-                        width="11"
-                        height="11"
-                        fill="currentColor"
-                        aria-hidden="true"
-                      >
-                        <polygon points="8,2 14,11 2,11" />
-                        <rect x="2" y="12.5" width="12" height="2" rx="1" />
-                      </svg>
-                    )}
-                    Unload now
-                  </button>
-                </div>
-              )}
-            </Section>
-          ) : null}
-
-          <Section heading="Context Window">
-            <div className={styles.ctxBlock}>
-              {/* Label row: "Context window" left + editable token chip right */}
-              <div className={styles.ctxTopRow}>
-                <span className={styles.ctxLabel}>Context window</span>
-                <div className={styles.ctxChipGroup}>
-                  <input
-                    type="number"
-                    className={styles.ctxChipInput}
-                    value={ctxChip}
-                    min={CTX_MIN}
-                    max={CTX_MAX}
-                    aria-label="Context window tokens"
-                    onChange={(e) => setCtxChip(e.target.value)}
-                    onBlur={() => {
-                      const n = parseInt(ctxChip, 10);
-                      if (!Number.isNaN(n) && n >= CTX_MIN) {
-                        // Clamp upper bound so the UI mirrors the backend
-                        // BOUNDS_NUM_CTX cap and the slider stays in sync.
-                        commitCtx(Math.min(n, CTX_MAX));
-                      } else {
-                        setCtxChip(String(numCtx));
-                      }
-                    }}
-                    onKeyDown={(e) => {
-                      if (e.key === 'Enter')
-                        (e.target as HTMLInputElement).blur();
-                    }}
-                  />
-                  <span className={styles.ctxChipUnit}>tokens</span>
-                </div>
-              </div>
-
-              {/* Log-scale slider — fill percentage tracked via CSS custom property */}
-              <input
-                type="range"
-                className={styles.ctxSlider}
-                style={{ '--fill': fillPct } as React.CSSProperties}
-                min={0}
-                max={1000}
-                step={1}
-                value={ctxPos}
-                aria-label="Context window tokens"
-                aria-valuemin={CTX_MIN}
-                aria-valuemax={CTX_MAX}
-                aria-valuenow={numCtx}
-                aria-valuetext={`${numCtx} tokens`}
-                onChange={(e) => {
-                  ctxDraggingRef.current = true;
-                  const pos = Number(e.target.value);
-                  setCtxPos(pos);
-                  setCtxChip(String(posToCtx(pos)));
-                }}
-                onMouseUp={() => {
-                  ctxDraggingRef.current = false;
-                  commitCtx(posToCtx(ctxPos));
-                }}
-                onTouchEnd={() => {
-                  ctxDraggingRef.current = false;
-                  commitCtx(posToCtx(ctxPos));
-                }}
-                onKeyUp={() => {
-                  if (!ctxDraggingRef.current) commitCtx(posToCtx(ctxPos));
-                }}
-              />
-
-              <div className={styles.ctxTickRow} aria-hidden="true">
-                {CTX_TICKS.map((label, i) => (
-                  <span
-                    key={label}
-                    className={styles.ctxTick}
-                    style={{ left: `${(i / (CTX_TICKS.length - 1)) * 100}%` }}
-                  >
-                    {label}
-                  </span>
-                ))}
-              </div>
-
-              {activeKind === 'builtin' &&
-              (engineState === 'starting' || engineState === 'stopping') ? (
-                <div className={styles.ctxApplyingHint} role="status">
-                  Applying… the engine restarts with the new context on your
-                  next message.
-                </div>
-              ) : null}
-
-              <div className={styles.ctxHelper}>
-                ~{ctxTurns.toLocaleString()} turns of context
-                {' · '}
-                {activeKind === 'builtin'
-                  ? 'Passed to the engine as --ctx-size at start; changing it restarts the engine.'
-                  : activeKind === 'openai'
-                    ? 'Informational only; your server controls the actual context.'
-                    : "Ollama caps to your model's trained maximum."}
-              </div>
-
-              <div className={styles.ctxVramNote}>
-                <span className={styles.ctxVramIcon} aria-hidden="true">
-                  ⚠
-                </span>
-                <span>
-                  The KV cache scales linearly with context length, so doubling
-                  the context roughly doubles its memory footprint (model
-                  weights stay the same). Benchmark with your hardware before
-                  pushing it high.{' '}
-                  <button
-                    type="button"
-                    className={styles.ctxVramLink}
-                    onClick={() => {
-                      void invoke('open_url', {
-                        url: 'https://github.com/quiet-node/thuki/blob/main/docs/tuning-context-window.md#the-5-minute-benchmark-recipe',
-                      });
-                    }}
-                  >
-                    Learn how to tune Context Window in 5 minute ↗
-                  </button>
-                </span>
-              </div>
-            </div>
-          </Section>
-
-          <Section heading="Prompt">
-            <SaveField
-              section="prompt"
-              fieldKey="system"
-              label="System prompt"
-              helper={configHelp('prompt', 'system')}
-              vertical
-              initialValue={config.prompt.system}
-              resyncToken={resyncToken}
-              onSaved={onSaved}
-              render={(value, setValue) => (
-                <>
-                  <Textarea
-                    value={value}
-                    onChange={setValue}
-                    placeholder="Persona prompt…"
-                    maxLength={PROMPT_MAX_CHARS}
-                    ariaLabel="System prompt"
-                    rows={PROMPT_TEXTAREA_ROWS}
-                  />
-                  <div className={styles.charCounter}>
-                    {value.length} / {PROMPT_MAX_CHARS}
-                  </div>
-                </>
-              )}
-            />
-          </Section>
-
-          <div className={styles.devSection}>
-            <button
-              type="button"
-              className={styles.devTrigger}
-              aria-expanded={devOpen}
-              aria-controls="dev-diagnostics"
-              onClick={() => setDevOpen((o) => !o)}
-            >
-              <span className={styles.devTriggerLabel}>Diagnostics</span>
-              <span className={styles.devTag}>DEV</span>
-              <svg
-                className={`${styles.devChevron} ${devOpen ? styles.devChevronOpen : ''}`}
-                viewBox="0 0 10 10"
-                fill="currentColor"
-                aria-hidden
-              >
-                <path
-                  d="M3 2l4 3-4 3"
-                  stroke="currentColor"
-                  strokeWidth="1.5"
-                  strokeLinecap="round"
-                  strokeLinejoin="round"
-                  fill="none"
-                />
-              </svg>
-            </button>
-            {devOpen && (
-              <div id="dev-diagnostics">
-                <SaveField
-                  section="debug"
-                  fieldKey="trace_enabled"
-                  label="Trace recording"
-                  helper={configHelp('debug', 'trace_enabled')}
-                  initialValue={config.debug.trace_enabled}
-                  resyncToken={resyncToken}
-                  onSaved={onSaved}
-                  tooltipPlacement="top"
-                  rightAlign
-                  render={(value, setValue) => (
-                    <Toggle
-                      checked={value}
-                      onChange={setValue}
-                      ariaLabel="Enable trace recording"
-                    />
-                  )}
-                />
-              </div>
-            )}
-          </div>
-        </>
+        <ProvidersPane
+          config={config}
+          resyncToken={resyncToken}
+          onSaved={onSaved}
+          onAddModel={goToDiscover}
+        />
       ) : null}
     </>
   );
diff --git a/src/settings/tabs/ProviderCards.test.tsx b/src/settings/tabs/ProviderCards.test.tsx
index d45b58f3..1aab46d1 100644
--- a/src/settings/tabs/ProviderCards.test.tsx
+++ b/src/settings/tabs/ProviderCards.test.tsx
@@ -24,13 +24,8 @@ import { beforeEach, describe, expect, it, vi } from 'vitest';
 
 import { invoke } from '@tauri-apps/api/core';
 
-import {
-  AddOpenAiProvider,
-  BuiltinProviderCard,
-  OpenAiProviderCard,
-} from './ProviderCards';
+import { AddOpenAiProvider, OpenAiProviderCard } from './ProviderCards';
 import type { RawAppConfig, RawProvider } from '../types';
-import type { InstalledModel, StarterOption } from '../../types/starter';
 
 const invokeMock = invoke as unknown as ReturnType<typeof vi.fn>;
 
@@ -95,59 +90,6 @@ const NEW_CONFIG: RawAppConfig = {
   prompt: { system: 'updated' },
 };
 
-function makeConfig(builtinModel: string): RawAppConfig {
-  return {
-    ...BASE_CONFIG,
-    inference: {
-      ...BASE_CONFIG.inference,
-      providers: [
-        { ...BASE_CONFIG.inference.providers[0], model: builtinModel },
-        BASE_CONFIG.inference.providers[1],
-      ],
-    },
-  };
-}
-
-const INSTALLED: InstalledModel[] = [
-  {
-    id: 'org/gemma:gemma.gguf',
-    display_name: 'gemma',
-    size_bytes: 2_489_757_856,
-    quant: 'Q4_K_M',
-  },
-  {
-    id: 'org/qwen:qwen.gguf',
-    display_name: 'qwen',
-    size_bytes: 9_000_000_000,
-    quant: '',
-  },
-];
-
-const STARTER_OPTION: StarterOption = {
-  starter: {
-    tier: 'balanced',
-    display_name: 'Gemma 4',
-    repo: 'org/gemma',
-    revision: 'abc123',
-    file_name: 'gemma.gguf',
-    sha256: 'sha-balanced',
-    size_bytes: 5_000_000_000,
-    quant: 'Q4_K_M',
-    vision: false,
-    thinking: false,
-    mmproj_file: null,
-    mmproj_sha256: null,
-    mmproj_bytes: 0,
-    est_runtime_gb: 6,
-    license_note: '',
-    origin: 'Google',
-    origin_repo: 'google/gemma-4-12B-it',
-  },
-  fit: 'fits',
-  installed: false,
-  partial_bytes: null,
-};
-
 const OPENAI_PROVIDER: RawProvider = {
   id: 'openai',
   kind: 'openai',
@@ -186,37 +128,19 @@ function StatefulOpenAiCard() {
   );
 }
 
-/**
- * Wraps the builtin card the way ModelTab does: `onSaved` lifts the returned
- * config snapshot so a backend-side model clear reaches the dropdown.
- */
-function StatefulBuiltinCard({ initialModel }: { initialModel: string }) {
-  const [config, setConfig] = useState<RawAppConfig>(() =>
-    makeConfig(initialModel),
-  );
-  return <BuiltinProviderCard config={config} onSaved={setConfig} />;
-}
-
-type MockChannel = { simulateMessage: (msg: unknown) => void };
-
 /** Marks a command response as a rejection in `mockCommands`. */
 class Reject {
   constructor(public readonly value: unknown) {}
 }
 
-let lastChannel: MockChannel | null = null;
-
 /**
  * Routes `invoke` by command name. Values: `Reject` throws its payload,
  * functions are called with the invoke args (for stateful sequences), and
- * anything else resolves as-is. Channels passed via `onEvent` are captured.
+ * anything else resolves as-is.
  */
 function mockCommands(responses: Record<string, unknown>) {
   invokeMock.mockImplementation(
     async (cmd: string, args?: Record<string, unknown>) => {
-      if (args && 'onEvent' in args) {
-        lastChannel = args.onEvent as unknown as MockChannel;
-      }
       if (Object.prototype.hasOwnProperty.call(responses, cmd)) {
         const v = responses[cmd];
         if (v instanceof Reject) throw v.value;
@@ -230,17 +154,6 @@ function mockCommands(responses: Record<string, unknown>) {
   );
 }
 
-/** Default backend for the builtin card: two installed models, one starter. */
-function builtinResponses(overrides: Record<string, unknown> = {}) {
-  return {
-    list_installed_models: INSTALLED,
-    get_starter_options: [STARTER_OPTION],
-    get_models_dir_free_bytes: 50_000_000_000,
-    get_config: NEW_CONFIG,
-    ...overrides,
-  };
-}
-
 async function flush() {
   await act(async () => {
     await Promise.resolve();
@@ -272,515 +185,6 @@ function deferredQueue<T>() {
 
 beforeEach(() => {
   invokeMock.mockReset();
-  lastChannel = null;
-});
-
-// ─── BuiltinProviderCard ─────────────────────────────────────────────────────
-
-describe('BuiltinProviderCard', () => {
-  async function renderCard(
-    builtinModel = '',
-    onSaved: (next: RawAppConfig) => void = () => {},
-  ) {
-    const view = render(
-      <BuiltinProviderCard
-        config={makeConfig(builtinModel)}
-        onSaved={onSaved}
-      />,
-    );
-    await flush();
-    return view;
-  }
-
-  it('renders installed models with a Choose placeholder when none is selected', async () => {
-    mockCommands(builtinResponses());
-    await renderCard('');
-    const select = screen.getByRole('combobox', {
-      name: 'Built-in model',
-    }) as HTMLSelectElement;
-    expect(select.value).toBe('');
-    expect(screen.getByText('Choose a model')).toBeInTheDocument();
-    expect(screen.getByText('gemma · Q4_K_M')).toBeInTheDocument();
-    expect(screen.getByText('qwen')).toBeInTheDocument();
-  });
-
-  it('selects the persisted builtin model and omits the placeholder', async () => {
-    mockCommands(builtinResponses());
-    await renderCard('org/gemma:gemma.gguf');
-    const select = screen.getByRole('combobox', {
-      name: 'Built-in model',
-    }) as HTMLSelectElement;
-    expect(select.value).toBe('org/gemma:gemma.gguf');
-    expect(screen.queryByText('Choose a model')).not.toBeInTheDocument();
-  });
-
-  it('committing a model invokes update_provider_field and lifts the config', async () => {
-    mockCommands(builtinResponses({ update_provider_field: NEW_CONFIG }));
-    const onSaved = vi.fn();
-    await renderCard('', onSaved);
-    fireEvent.change(screen.getByRole('combobox', { name: 'Built-in model' }), {
-      target: { value: 'org/qwen:qwen.gguf' },
-    });
-    await flush();
-    expect(invokeMock).toHaveBeenCalledWith('update_provider_field', {
-      providerId: 'builtin',
-      field: 'model',
-      value: 'org/qwen:qwen.gguf',
-    });
-    expect(onSaved).toHaveBeenCalledWith(NEW_CONFIG);
-  });
-
-  it('swallows an update_provider_field failure on model commit', async () => {
-    mockCommands(
-      builtinResponses({
-        update_provider_field: new Reject(new Error('write failed')),
-      }),
-    );
-    const onSaved = vi.fn();
-    await renderCard('', onSaved);
-    fireEvent.change(screen.getByRole('combobox', { name: 'Built-in model' }), {
-      target: { value: 'org/qwen:qwen.gguf' },
-    });
-    await flush();
-    expect(onSaved).not.toHaveBeenCalled();
-    expect(
-      screen.getByRole('combobox', { name: 'Built-in model' }),
-    ).toBeInTheDocument();
-  });
-
-  it('shows the no-models hint when the manifest is empty', async () => {
-    mockCommands(builtinResponses({ list_installed_models: [] }));
-    await renderCard();
-    expect(screen.getByText('No models downloaded yet')).toBeInTheDocument();
-  });
-
-  it('treats a non-array list_installed_models payload as empty', async () => {
-    mockCommands(builtinResponses({ list_installed_models: null }));
-    await renderCard();
-    expect(screen.getByText('No models downloaded yet')).toBeInTheDocument();
-  });
-
-  it('falls back to empty state when the manifest and disk probes reject', async () => {
-    mockCommands(
-      builtinResponses({
-        list_installed_models: new Reject(new Error('manifest unreadable')),
-        get_models_dir_free_bytes: new Reject(new Error('statfs failed')),
-      }),
-    );
-    await renderCard();
-    expect(screen.getByText('No models downloaded yet')).toBeInTheDocument();
-  });
-
-  it('keeps the download kit hidden until starter options resolve', async () => {
-    mockCommands(
-      builtinResponses({ get_starter_options: new Promise(() => {}) }),
-    );
-    await renderCard();
-    fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
-    expect(
-      screen.queryByRole('button', { name: 'Look up' }),
-    ).not.toBeInTheDocument();
-  });
-
-  it('toggles the download kit open and closed', async () => {
-    mockCommands(builtinResponses());
-    await renderCard();
-    const trigger = screen.getByRole('button', { name: 'Download a model' });
-    fireEvent.click(trigger);
-    expect(screen.getByText('Gemma 4')).toBeInTheDocument();
-    expect(screen.getByRole('button', { name: 'Look up' })).toBeInTheDocument();
-    fireEvent.click(trigger);
-    expect(screen.queryByText('Gemma 4')).not.toBeInTheDocument();
-  });
-
-  it('walks the confirm flow and lifts the config when the download finishes', async () => {
-    mockCommands(builtinResponses());
-    const onSaved = vi.fn();
-    await renderCard('', onSaved);
-    fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
-    // Row-level Download opens the confirm card.
-    fireEvent.click(screen.getByRole('button', { name: 'Download' }));
-    expect(screen.getByText('5.0 GB download.')).toBeInTheDocument();
-    expect(screen.getByText('50.0 GB free on this disk.')).toBeInTheDocument();
-    // Two Download buttons now: the picker row's and the confirm card's.
-    const confirmBtn = screen.getAllByRole('button', { name: 'Download' })[1];
-    fireEvent.click(confirmBtn);
-    await flush();
-    expect(invokeMock).toHaveBeenCalledWith(
-      'download_starter',
-      expect.objectContaining({ tier: 'balanced' }),
-    );
-    act(() => {
-      lastChannel?.simulateMessage({ type: 'AllDone' });
-    });
-    await waitFor(() => expect(onSaved).toHaveBeenCalledWith(NEW_CONFIG));
-  });
-
-  it('returns to the picker once the Ready card dwell elapses', async () => {
-    vi.useFakeTimers();
-    try {
-      mockCommands(builtinResponses());
-      await renderCard();
-      fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
-      fireEvent.click(screen.getByRole('button', { name: 'Download' }));
-      fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[1]);
-      await flush();
-      act(() => {
-        lastChannel?.simulateMessage({ type: 'AllDone' });
-      });
-      await flush();
-      // Success card up, starter rows hidden.
-      expect(screen.getByText('Ready')).toBeInTheDocument();
-      expect(
-        screen.queryByRole('button', { name: 'Download' }),
-      ).not.toBeInTheDocument();
-
-      await act(async () => {
-        vi.advanceTimersByTime(2500);
-      });
-      expect(screen.queryByText('Ready')).not.toBeInTheDocument();
-      expect(
-        screen.getByRole('button', { name: 'Download' }),
-      ).toBeInTheDocument();
-    } finally {
-      vi.useRealTimers();
-    }
-  });
-
-  it('Choose a different model on the failed card returns to the picker', async () => {
-    mockCommands(builtinResponses());
-    await renderCard();
-    fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
-    fireEvent.click(screen.getByRole('button', { name: 'Download' }));
-    fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[1]);
-    await flush();
-    act(() => {
-      lastChannel?.simulateMessage({
-        type: 'Failed',
-        data: { kind: 'disk_full', message: 'no space left' },
-      });
-    });
-    expect(
-      screen.getByText('Not enough disk space. Free up space and retry.'),
-    ).toBeInTheDocument();
-    expect(
-      screen.queryByRole('button', { name: 'Download' }),
-    ).not.toBeInTheDocument();
-
-    fireEvent.click(
-      screen.getByRole('button', { name: 'Choose a different model' }),
-    );
-    expect(
-      screen.getByRole('button', { name: 'Download' }),
-    ).toBeInTheDocument();
-  });
-
-  it('leaves the lift to the focus resync when get_config fails post-download', async () => {
-    mockCommands(
-      builtinResponses({ get_config: new Reject(new Error('read failed')) }),
-    );
-    const onSaved = vi.fn();
-    await renderCard('', onSaved);
-    fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
-    fireEvent.click(screen.getByRole('button', { name: 'Download' }));
-    fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[1]);
-    await flush();
-    act(() => {
-      lastChannel?.simulateMessage({ type: 'AllDone' });
-    });
-    await flush();
-    expect(onSaved).not.toHaveBeenCalled();
-  });
-
-  it('hides the free-disk line when the free-bytes probe returns a non-number', async () => {
-    mockCommands(builtinResponses({ get_models_dir_free_bytes: null }));
-    await renderCard();
-    fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
-    fireEvent.click(screen.getByRole('button', { name: 'Download' }));
-    expect(screen.getByText('5.0 GB download.')).toBeInTheDocument();
-    expect(screen.queryByText(/free on this disk/)).not.toBeInTheDocument();
-    // Cancel returns to the plain picker.
-    fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
-    expect(screen.queryByText('5.0 GB download.')).not.toBeInTheDocument();
-  });
-
-  it('cancels an in-flight download and retries after a failure', async () => {
-    mockCommands(builtinResponses());
-    await renderCard();
-    fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
-    fireEvent.click(screen.getByRole('button', { name: 'Download' }));
-    fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[1]);
-    await flush();
-    expect(screen.getByText('Downloading model')).toBeInTheDocument();
-    fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
-    await flush();
-    expect(invokeMock).toHaveBeenCalledWith('cancel_model_download');
-    act(() => {
-      lastChannel?.simulateMessage({
-        type: 'Failed',
-        data: { kind: 'other', message: 'socket closed' },
-      });
-    });
-    expect(screen.getByText('socket closed')).toBeInTheDocument();
-    fireEvent.click(screen.getByRole('button', { name: 'Retry' }));
-    await flush();
-    const starts = invokeMock.mock.calls.filter(
-      (c: unknown[]) => c[0] === 'download_starter',
-    );
-    expect(starts).toHaveLength(2);
-  });
-
-  it('enters resume_pending for an interrupted partial and resumes from it', async () => {
-    mockCommands(
-      builtinResponses({
-        get_starter_options: [
-          { ...STARTER_OPTION, partial_bytes: 1_000_000_000 },
-        ],
-      }),
-    );
-    await renderCard();
-    fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
-    await flush();
-    fireEvent.click(screen.getByRole('button', { name: /Resume download/ }));
-    await flush();
-    expect(invokeMock).toHaveBeenCalledWith(
-      'download_starter',
-      expect.objectContaining({ tier: 'balanced' }),
-    );
-  });
-
-  it('discards an interrupted partial and refreshes the starter options', async () => {
-    mockCommands(
-      builtinResponses({
-        get_starter_options: [
-          { ...STARTER_OPTION, partial_bytes: 1_000_000_000 },
-        ],
-      }),
-    );
-    await renderCard();
-    fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
-    await flush();
-    fireEvent.click(screen.getByRole('button', { name: 'Discard' }));
-    await flush();
-    expect(invokeMock).toHaveBeenCalledWith('discard_partial_download', {
-      sha256: 'sha-balanced',
-    });
-  });
-
-  it('looks up a pasted repo and downloads the chosen GGUF file', async () => {
-    mockCommands(
-      builtinResponses({
-        list_hf_repo_ggufs: [
-          { file: 'a.gguf', size_bytes: 2_000_000_000 },
-          { file: 'b.gguf', size_bytes: 3_000_000_000 },
-        ],
-      }),
-    );
-    await renderCard();
-    fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
-    const lookupBtn = screen.getByRole('button', { name: 'Look up' });
-    expect(lookupBtn).toBeDisabled();
-    fireEvent.change(screen.getByLabelText('Hugging Face repo id'), {
-      target: { value: '  owner/repo  ' },
-    });
-    expect(lookupBtn).toBeEnabled();
-    fireEvent.click(lookupBtn);
-    await flush();
-    expect(invokeMock).toHaveBeenCalledWith('list_hf_repo_ggufs', {
-      repo: 'owner/repo',
-    });
-    const fileSelect = screen.getByRole('combobox', {
-      name: 'GGUF file',
-    }) as HTMLSelectElement;
-    expect(fileSelect.value).toBe('a.gguf');
-    expect(screen.getByText('a.gguf · 2.0 GB')).toBeInTheDocument();
-    fireEvent.change(fileSelect, { target: { value: 'b.gguf' } });
-    // The repo Download sits after the picker row's Download button.
-    const downloads = screen.getAllByRole('button', { name: 'Download' });
-    fireEvent.click(downloads[downloads.length - 1]);
-    await flush();
-    expect(invokeMock).toHaveBeenCalledWith(
-      'download_repo_model',
-      expect.objectContaining({ repo: 'owner/repo', file: 'b.gguf' }),
-    );
-  });
-
-  it('shows the empty-repo hint when the lookup finds no GGUF files', async () => {
-    mockCommands(builtinResponses({ list_hf_repo_ggufs: [] }));
-    await renderCard();
-    fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
-    fireEvent.change(screen.getByLabelText('Hugging Face repo id'), {
-      target: { value: 'owner/empty' },
-    });
-    fireEvent.click(screen.getByRole('button', { name: 'Look up' }));
-    await flush();
-    expect(
-      screen.getByText('No GGUF files found in this repo.'),
-    ).toBeInTheDocument();
-  });
-
-  it('treats a non-array lookup payload as an empty file list', async () => {
-    mockCommands(builtinResponses({ list_hf_repo_ggufs: 'nope' }));
-    await renderCard();
-    fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
-    fireEvent.change(screen.getByLabelText('Hugging Face repo id'), {
-      target: { value: 'owner/odd' },
-    });
-    fireEvent.click(screen.getByRole('button', { name: 'Look up' }));
-    await flush();
-    expect(
-      screen.getByText('No GGUF files found in this repo.'),
-    ).toBeInTheDocument();
-  });
-
-  it('surfaces a lookup failure as an inline error', async () => {
-    mockCommands(
-      builtinResponses({
-        list_hf_repo_ggufs: new Reject('repo not found'),
-      }),
-    );
-    await renderCard();
-    fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
-    fireEvent.change(screen.getByLabelText('Hugging Face repo id'), {
-      target: { value: 'owner/missing' },
-    });
-    fireEvent.click(screen.getByRole('button', { name: 'Look up' }));
-    await flush();
-    expect(screen.getByRole('alert')).toHaveTextContent('repo not found');
-  });
-
-  it('lists each installed model with size, quant, and a delete affordance', async () => {
-    mockCommands(builtinResponses());
-    await renderCard();
-    expect(screen.getByText('gemma · 2.5 GB · Q4_K_M')).toBeInTheDocument();
-    // Empty quant omits the trailing separator.
-    expect(screen.getByText('qwen · 9.0 GB')).toBeInTheDocument();
-    expect(
-      screen.getByRole('button', { name: 'Delete gemma' }),
-    ).toBeInTheDocument();
-    expect(
-      screen.getByRole('button', { name: 'Delete qwen' }),
-    ).toBeInTheDocument();
-  });
-
-  it('delete asks for confirmation and Cancel backs out without deleting', async () => {
-    mockCommands(builtinResponses());
-    await renderCard();
-    fireEvent.click(screen.getByRole('button', { name: 'Delete gemma' }));
-    expect(
-      screen.getByText('Delete gemma? Its files are removed from disk.'),
-    ).toBeInTheDocument();
-    fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
-    expect(
-      screen.queryByText('Delete gemma? Its files are removed from disk.'),
-    ).not.toBeInTheDocument();
-    expect(
-      screen.getByRole('button', { name: 'Delete gemma' }),
-    ).toBeInTheDocument();
-    expect(invokeMock).not.toHaveBeenCalledWith(
-      'delete_installed_model',
-      expect.anything(),
-    );
-  });
-
-  it('confirmed delete invokes delete_installed_model and refreshes the rows', async () => {
-    let deleted = false;
-    mockCommands(
-      builtinResponses({
-        list_installed_models: () => (deleted ? [INSTALLED[1]] : INSTALLED),
-        delete_installed_model: () => {
-          deleted = true;
-          return undefined;
-        },
-      }),
-    );
-    const onSaved = vi.fn();
-    await renderCard('', onSaved);
-    fireEvent.click(screen.getByRole('button', { name: 'Delete gemma' }));
-    fireEvent.click(screen.getByRole('button', { name: 'Delete' }));
-    await flush();
-    expect(invokeMock).toHaveBeenCalledWith('delete_installed_model', {
-      id: 'org/gemma:gemma.gguf',
-    });
-    expect(
-      screen.queryByText('gemma · 2.5 GB · Q4_K_M'),
-    ).not.toBeInTheDocument();
-    expect(screen.getByText('qwen · 9.0 GB')).toBeInTheDocument();
-    // The deletion also re-fetches the starter rows (an installed starter
-    // flips back to downloadable) and lifts the fresh config snapshot.
-    expect(invokeMock).toHaveBeenCalledWith('get_starter_options');
-    expect(onSaved).toHaveBeenCalledWith(NEW_CONFIG);
-  });
-
-  it('deleting the active model clears the selection and shows the picker affordance', async () => {
-    let deleted = false;
-    mockCommands(
-      builtinResponses({
-        list_installed_models: () => (deleted ? [INSTALLED[1]] : INSTALLED),
-        delete_installed_model: () => {
-          deleted = true;
-          return undefined;
-        },
-        // The backend cleared the builtin provider's model field itself.
-        get_config: () => makeConfig(''),
-      }),
-    );
-    render(<StatefulBuiltinCard initialModel="org/gemma:gemma.gguf" />);
-    await flush();
-    const select = screen.getByRole('combobox', {
-      name: 'Built-in model',
-    }) as HTMLSelectElement;
-    expect(select.value).toBe('org/gemma:gemma.gguf');
-    fireEvent.click(screen.getByRole('button', { name: 'Delete gemma' }));
-    fireEvent.click(screen.getByRole('button', { name: 'Delete' }));
-    await flush();
-    expect(select.value).toBe('');
-    expect(screen.getByText('Choose a model')).toBeInTheDocument();
-  });
-
-  it('surfaces a delete failure and keeps the row', async () => {
-    mockCommands(
-      builtinResponses({
-        delete_installed_model: new Reject('file busy'),
-      }),
-    );
-    await renderCard();
-    fireEvent.click(screen.getByRole('button', { name: 'Delete gemma' }));
-    fireEvent.click(screen.getByRole('button', { name: 'Delete' }));
-    await flush();
-    expect(screen.getByRole('alert')).toHaveTextContent('file busy');
-    expect(screen.getByText('gemma · 2.5 GB · Q4_K_M')).toBeInTheDocument();
-    expect(invokeMock).not.toHaveBeenCalledWith('get_config');
-    // A later successful delete clears the stale error.
-    mockCommands(
-      builtinResponses({
-        list_installed_models: [INSTALLED[1]],
-        delete_installed_model: undefined,
-      }),
-    );
-    fireEvent.click(screen.getByRole('button', { name: 'Delete gemma' }));
-    fireEvent.click(screen.getByRole('button', { name: 'Delete' }));
-    await flush();
-    expect(screen.queryByRole('alert')).not.toBeInTheDocument();
-  });
-
-  it('leaves the lift to the focus resync when get_config fails post-delete', async () => {
-    mockCommands(
-      builtinResponses({
-        delete_installed_model: undefined,
-        get_config: new Reject(new Error('read failed')),
-      }),
-    );
-    const onSaved = vi.fn();
-    await renderCard('', onSaved);
-    fireEvent.click(screen.getByRole('button', { name: 'Delete qwen' }));
-    fireEvent.click(screen.getByRole('button', { name: 'Delete' }));
-    await flush();
-    expect(invokeMock).toHaveBeenCalledWith('delete_installed_model', {
-      id: 'org/qwen:qwen.gguf',
-    });
-    expect(onSaved).not.toHaveBeenCalled();
-  });
 });
 
 // ─── OpenAiProviderCard ──────────────────────────────────────────────────────
diff --git a/src/settings/tabs/ProviderCards.tsx b/src/settings/tabs/ProviderCards.tsx
index 92150397..bc5dfcf3 100644
--- a/src/settings/tabs/ProviderCards.tsx
+++ b/src/settings/tabs/ProviderCards.tsx
@@ -1,8 +1,6 @@
 /**
- * Provider card bodies for the AI tab's Providers panel.
+ * Provider card bodies for the Providers pane's OpenAI-compatible provider.
  *
- * - `BuiltinProviderCard`: installed-model picker plus the shared download
- *   kit (starter picker + paste-a-repo) for the built-in engine.
  * - `OpenAiProviderCard`: editable label/base URL/model for the single
  *   OpenAI-compatible provider, write-only API key (Keychain), manual vision
  *   toggle, and removal with confirm.
@@ -10,8 +8,7 @@
  *   OpenAI-compatible provider exists.
  *
  * The cards lift every config write back through `onSaved` so the parent's
- * `RawAppConfig` snapshot stays in lock-step with disk, mirroring how the
- * Ollama URL field in ModelTab behaves.
+ * `RawAppConfig` snapshot stays in lock-step with disk.
  */
 
 import { useCallback, useEffect, useRef, useState } from 'react';
@@ -21,32 +18,8 @@ import { SettingRow, Toggle } from '../components';
 import { configHelp } from '../configHelpers';
 import { describeConfigError } from '../types';
 import { isNonLocalUrl } from '../../utils/isNonLocalUrl';
-import {
-  StarterPicker,
-  useStarterOptions,
-} from '../../components/StarterPicker';
-import { DownloadProgress } from '../../components/DownloadProgress';
-import { useDownloadModel } from '../../hooks/useDownloadModel';
-import { buildConfirmInfo } from '../../view/onboarding/ModelCheckStep';
 import styles from '../../styles/settings.module.css';
 import type { RawAppConfig, RawProvider } from '../types';
-import type {
-  HfGgufFile,
-  InstalledModel,
-  StarterTier,
-} from '../../types/starter';
-
-/** Bytes rendered as decimal gigabytes with one decimal (e.g. "8.2"). */
-function gb(bytes: number): string {
-  return (bytes / 1e9).toFixed(1);
-}
-
-/**
- * How long the post-download "Ready" card stays up before the kit returns
- * to the picker. Long enough to read, short enough to need no dismiss
- * affordance; mirrors the eject button's 2.5 s confirmation in ModelTab.
- */
-const READY_CARD_DWELL_MS = 2500;
 
 /** Shared remote-URL caution, same mechanism as the Ollama URL warning. */
 function NonLocalWarning() {
@@ -58,338 +31,6 @@ function NonLocalWarning() {
   );
 }
 
-// ─── Built-in (Thuki) card body ──────────────────────────────────────────────
-
-interface BuiltinProviderCardProps {
-  config: RawAppConfig;
-  onSaved: (next: RawAppConfig) => void;
-}
-
-export function BuiltinProviderCard({
-  config,
-  onSaved,
-}: BuiltinProviderCardProps) {
-  const builtinModel =
-    config.inference.providers.find((p) => p.kind === 'builtin')?.model ?? '';
-
-  const [installed, setInstalled] = useState<InstalledModel[]>([]);
-  const [confirmingDelete, setConfirmingDelete] = useState<string | null>(null);
-  const [deleteError, setDeleteError] = useState<string | null>(null);
-  const [downloadOpen, setDownloadOpen] = useState(false);
-  const [selected, setSelected] = useState<StarterTier>('balanced');
-  const [freeDiskBytes, setFreeDiskBytes] = useState<number | null>(null);
-
-  // Paste-a-repo flow: id input -> Look up -> file dropdown -> Download.
-  const [repoId, setRepoId] = useState('');
-  const [repoFiles, setRepoFiles] = useState<HfGgufFile[] | null>(null);
-  const [repoFile, setRepoFile] = useState('');
-  const [repoError, setRepoError] = useState<string | null>(null);
-
-  const { options, refresh } = useStarterOptions();
-  const {
-    state,
-    progress,
-    etaSeconds,
-    beginConfirm,
-    cancelConfirm,
-    start,
-    startRepo,
-    cancel,
-    retry,
-    resume,
-    discard,
-    enterResumePending,
-    reset,
-  } = useDownloadModel();
-
-  const refreshInstalled = useCallback(async () => {
-    try {
-      const rows = await invoke<InstalledModel[]>('list_installed_models');
-      setInstalled(Array.isArray(rows) ? rows : []);
-    } catch {
-      setInstalled([]);
-    }
-  }, []);
-
-  useEffect(() => {
-    void refreshInstalled();
-    void invoke<number | null>('get_models_dir_free_bytes')
-      .then((bytes) => {
-        setFreeDiskBytes(typeof bytes === 'number' ? bytes : null);
-      })
-      .catch(() => {
-        // Unknown free space hides the disk line; never blocks the download.
-      });
-  }, [refreshInstalled]);
-
-  // An interrupted earlier download leaves a resumable partial: surface the
-  // per-card Resume/Discard pair instead of the plain Download button.
-  useEffect(() => {
-    if (
-      downloadOpen &&
-      state.phase === 'idle' &&
-      options !== null &&
-      options.some((o) => o.partial_bytes !== null)
-    ) {
-      enterResumePending();
-    }
-  }, [downloadOpen, state.phase, options, enterResumePending]);
-
-  // Download finished: the backend already wrote the builtin provider's
-  // model field, so refresh the rows and lift the new config snapshot.
-  // After a short dwell the Ready card has served its purpose; reset to
-  // idle so the starter rows (now marked Installed) come back without a
-  // tab remount.
-  useEffect(() => {
-    if (state.phase !== 'ready') return;
-    void (async () => {
-      await refresh();
-      await refreshInstalled();
-      try {
-        onSaved(await invoke<RawAppConfig>('get_config'));
-      } catch {
-        // The focus-driven resync picks the change up on next activation.
-      }
-    })();
-    const timer = window.setTimeout(reset, READY_CARD_DWELL_MS);
-    return () => window.clearTimeout(timer);
-  }, [state.phase, refresh, refreshInstalled, onSaved, reset]);
-
-  function commitModel(id: string) {
-    void invoke<RawAppConfig>('update_provider_field', {
-      providerId: 'builtin',
-      field: 'model',
-      value: id,
-    })
-      .then(onSaved)
-      .catch(() => {
-        // The dropdown re-seeds from config on the next resync.
-      });
-  }
-
-  // Deletion is refcounted server-side (shared blobs survive); the backend
-  // also clears the builtin provider's model field when the deleted model
-  // was the selected one, so the lifted snapshot is the source of truth.
-  async function handleDelete(id: string) {
-    setConfirmingDelete(null);
-    try {
-      await invoke('delete_installed_model', { id });
-    } catch (err) {
-      setDeleteError(String(err));
-      return;
-    }
-    setDeleteError(null);
-    // A deleted starter flips back to downloadable in the picker rows.
-    await refresh();
-    await refreshInstalled();
-    try {
-      onSaved(await invoke<RawAppConfig>('get_config'));
-    } catch {
-      // The focus-driven resync picks the change up on next activation.
-    }
-  }
-
-  async function handleLookup() {
-    setRepoError(null);
-    setRepoFiles(null);
-    try {
-      const rows = await invoke<HfGgufFile[]>('list_hf_repo_ggufs', {
-        repo: repoId.trim(),
-      });
-      const files = Array.isArray(rows) ? rows : [];
-      setRepoFiles(files);
-      setRepoFile(files[0]?.file ?? '');
-    } catch (err) {
-      setRepoError(String(err));
-    }
-  }
-
-  const modelValue = installed.some((m) => m.id === builtinModel)
-    ? builtinModel
-    : '';
-  const pickerVisible =
-    state.phase === 'idle' ||
-    state.phase === 'confirming' ||
-    state.phase === 'resume_pending';
-
-  return (
-    <>
-      <SettingRow
-        label="Model"
-        helper={configHelp('inference', 'builtin_model')}
-      >
-        {installed.length > 0 ? (
-          <select
-            className={styles.dropdown}
-            aria-label="Built-in model"
-            value={modelValue}
-            onChange={(e) => commitModel(e.target.value)}
-          >
-            {modelValue === '' ? (
-              <option value="" disabled>
-                Choose a model
-              </option>
-            ) : null}
-            {installed.map((m) => (
-              <option key={m.id} value={m.id}>
-                {m.display_name}
-                {m.quant !== '' ? ` · ${m.quant}` : ''}
-              </option>
-            ))}
-          </select>
-        ) : (
-          <span className={styles.providerHint}>No models downloaded yet</span>
-        )}
-      </SettingRow>
-
-      {installed.map((m) => (
-        <div className={styles.providerInlineRow} key={m.id}>
-          <span className={styles.providerHint}>
-            {m.display_name} · {gb(m.size_bytes)} GB
-            {m.quant !== '' ? ` · ${m.quant}` : ''}
-          </span>
-          {confirmingDelete === m.id ? (
-            <>
-              <span className={styles.providerHint}>
-                Delete {m.display_name}? Its files are removed from disk.
-              </span>
-              <button
-                type="button"
-                className={`${styles.button} ${styles.buttonDestructive}`}
-                onClick={() => void handleDelete(m.id)}
-              >
-                Delete
-              </button>
-              <button
-                type="button"
-                className={`${styles.button} ${styles.buttonGhost}`}
-                onClick={() => setConfirmingDelete(null)}
-              >
-                Cancel
-              </button>
-            </>
-          ) : (
-            <button
-              type="button"
-              className={`${styles.button} ${styles.buttonGhost}`}
-              aria-label={`Delete ${m.display_name}`}
-              onClick={() => setConfirmingDelete(m.id)}
-            >
-              Delete
-            </button>
-          )}
-        </div>
-      ))}
-      {deleteError !== null ? (
-        <p className={styles.providerError} role="alert">
-          {deleteError}
-        </p>
-      ) : null}
-
-      <button
-        type="button"
-        className={`${styles.button} ${styles.buttonGhost}`}
-        aria-expanded={downloadOpen}
-        onClick={() => setDownloadOpen((o) => !o)}
-      >
-        Download a model
-      </button>
-
-      {downloadOpen && options !== null ? (
-        <div style={{ marginTop: 10 }}>
-          {pickerVisible ? (
-            <StarterPicker
-              options={options}
-              selected={selected}
-              onSelect={setSelected}
-              onDownload={(tier) => {
-                setSelected(tier);
-                beginConfirm(tier);
-              }}
-              onResume={(tier) => {
-                setSelected(tier);
-                void resume(tier);
-              }}
-              onDiscard={(sha256) => {
-                void discard(sha256).then(refresh);
-              }}
-            />
-          ) : null}
-          <DownloadProgress
-            state={state}
-            progress={progress}
-            etaSeconds={etaSeconds}
-            confirmInfo={buildConfirmInfo(state, options, freeDiskBytes)}
-            onConfirm={() => void start(selected)}
-            onCancelConfirm={cancelConfirm}
-            onCancel={() => void cancel()}
-            onRetry={() => void retry()}
-            // Same trap-avoidance as onboarding: a terminal failure must
-            // leave a path back to the starter rows, not just Retry.
-            onChooseAnother={reset}
-          />
-
-          <div className={styles.providerInlineRow}>
-            <input
-              type="text"
-              className={styles.input}
-              aria-label="Hugging Face repo id"
-              placeholder="owner/repo"
-              spellCheck={false}
-              autoComplete="off"
-              autoCorrect="off"
-              autoCapitalize="off"
-              value={repoId}
-              onChange={(e) => setRepoId(e.target.value)}
-            />
-            <button
-              type="button"
-              className={styles.button}
-              disabled={repoId.trim() === ''}
-              onClick={() => void handleLookup()}
-            >
-              Look up
-            </button>
-          </div>
-          {repoError !== null ? (
-            <p className={styles.providerError} role="alert">
-              {repoError}
-            </p>
-          ) : null}
-          {repoFiles !== null && repoFiles.length === 0 ? (
-            <p className={styles.providerHint}>
-              No GGUF files found in this repo.
-            </p>
-          ) : null}
-          {repoFiles !== null && repoFiles.length > 0 ? (
-            <div className={styles.providerInlineRow}>
-              <select
-                className={styles.dropdown}
-                aria-label="GGUF file"
-                value={repoFile}
-                onChange={(e) => setRepoFile(e.target.value)}
-              >
-                {repoFiles.map((f) => (
-                  <option key={f.file} value={f.file}>
-                    {f.file} · {gb(f.size_bytes)} GB
-                  </option>
-                ))}
-              </select>
-              <button
-                type="button"
-                className={styles.button}
-                onClick={() => void startRepo(repoId.trim(), repoFile)}
-              >
-                Download
-              </button>
-            </div>
-          ) : null}
-        </div>
-      ) : null}
-    </>
-  );
-}
-
 // ─── OpenAI-compatible card body ─────────────────────────────────────────────
 
 interface OpenAiProviderCardProps {
diff --git a/src/settings/tabs/models/DiscoverPane.module.css b/src/settings/tabs/models/DiscoverPane.module.css
new file mode 100644
index 00000000..bed658fd
--- /dev/null
+++ b/src/settings/tabs/models/DiscoverPane.module.css
@@ -0,0 +1,274 @@
+/*
+ * Discover pane: the in-app Hugging Face GGUF browser. Structure, spacing,
+ * and color come straight from the model-settings redesign mockup
+ * (panel-sidebar.html). The premium design tokens (--accent, --t1, --hair,
+ * etc.) are defined on the Settings window root and cascade down, so this
+ * module references them via var() rather than redeclaring them.
+ *
+ * No global box-sizing reset exists in this codebase, so any class that pairs
+ * width:100% with padding sets box-sizing: border-box itself.
+ */
+
+.pane {
+  display: flex;
+  flex-direction: column;
+  min-height: 0;
+}
+
+/* ── Search field ─────────────────────────────────────────────────────── */
+
+.search {
+  display: flex;
+  align-items: center;
+  gap: 10px;
+  background: var(--elev-1);
+  border: 1px solid var(--hair);
+  border-radius: 11px;
+  padding: 10px 12px;
+  box-sizing: border-box;
+  width: 100%;
+}
+
+.search svg {
+  width: 15px;
+  height: 15px;
+  stroke: var(--t3);
+  stroke-width: 1.7;
+  fill: none;
+  flex: none;
+}
+
+.searchInput {
+  flex: 1;
+  min-width: 0;
+  background: transparent;
+  border: none;
+  outline: none;
+  color: var(--t1);
+  font-family: inherit;
+  font-size: 13.5px;
+  letter-spacing: inherit;
+}
+
+.searchInput::placeholder {
+  color: var(--t3);
+}
+
+.kbd {
+  margin-left: auto;
+  flex: none;
+  font-size: 11px;
+  color: var(--t3);
+  border: 1px solid var(--hair);
+  border-radius: 5px;
+  padding: 2px 6px;
+}
+
+/* ── Family filter chips ──────────────────────────────────────────────── */
+
+.chips {
+  display: flex;
+  gap: 6px;
+  margin-top: 11px;
+  flex-wrap: wrap;
+}
+
+.chip {
+  font-size: 11.5px;
+  color: var(--t2);
+  padding: 5px 10px;
+  border-radius: var(--radius-pill);
+  border: 1px solid var(--hair-soft);
+  background: transparent;
+  font-family: inherit;
+  cursor: pointer;
+}
+
+.chipOn {
+  color: var(--t1);
+  border-color: transparent;
+  background: var(--elev-2);
+}
+
+/* ── Sub-bar: result count + sort label ───────────────────────────────── */
+
+.subbar {
+  display: flex;
+  align-items: center;
+  margin-top: 11px;
+  padding-bottom: 11px;
+  border-bottom: 1px solid var(--hair-soft);
+}
+
+.count {
+  font-size: 11.5px;
+  color: var(--t3);
+}
+
+.count b {
+  color: var(--t2);
+  font-weight: 590;
+}
+
+.sort {
+  margin-left: auto;
+  font-size: 11.5px;
+  color: var(--t2);
+}
+
+/* ── Result list ──────────────────────────────────────────────────────── */
+
+.list {
+  flex: 1;
+  overflow: auto;
+  padding: 4px 0 8px;
+}
+
+.rowWrap + .rowWrap {
+  box-shadow: 0 -1px 0 var(--hair-soft);
+}
+
+.row {
+  display: flex;
+  align-items: center;
+  gap: 12px;
+  padding: 10px 11px;
+  border-radius: 9px;
+}
+
+.row:hover {
+  background: var(--elev-1);
+}
+
+.av {
+  width: 28px;
+  height: 28px;
+  border-radius: 7px;
+  flex: none;
+  display: grid;
+  place-items: center;
+  background: var(--elev-2);
+  font-weight: 700;
+  color: var(--t2);
+  font-size: 11px;
+  text-transform: uppercase;
+}
+
+.mid {
+  flex: 1;
+  min-width: 0;
+}
+
+.nm {
+  font-weight: 540;
+  font-size: 12.5px;
+  color: var(--t1);
+  white-space: nowrap;
+  overflow: hidden;
+  text-overflow: ellipsis;
+}
+
+.org {
+  font-size: 11px;
+  color: var(--t3);
+  margin-top: 2px;
+}
+
+.gatedBadge {
+  display: inline-flex;
+  align-items: center;
+  font-size: 10px;
+  font-weight: 560;
+  padding: 2px 6px;
+  border-radius: var(--radius-pill);
+  margin-left: 6px;
+  color: var(--tight);
+  background: rgba(230, 181, 107, 0.1);
+}
+
+.get {
+  flex: none;
+  font-size: 11.5px;
+  font-weight: 540;
+  color: var(--t2);
+  border: 1px solid var(--hair);
+  border-radius: var(--radius-control);
+  padding: 5px 12px;
+  background: transparent;
+  font-family: inherit;
+  cursor: pointer;
+}
+
+.get:hover:not(:disabled) {
+  color: var(--t1);
+  border-color: rgba(255, 141, 92, 0.5);
+  background: var(--accent-soft);
+}
+
+.get:disabled {
+  opacity: 0.45;
+  cursor: default;
+}
+
+/* ── Accordion: per-repo quant choices ────────────────────────────────── */
+
+.expand {
+  padding: 4px 11px 12px;
+  display: flex;
+  flex-direction: column;
+  gap: 8px;
+}
+
+.quantRow {
+  display: flex;
+  align-items: center;
+  gap: 10px;
+}
+
+.quantName {
+  flex: 1;
+  min-width: 0;
+  font-size: 11.5px;
+  color: var(--t2);
+  white-space: nowrap;
+  overflow: hidden;
+  text-overflow: ellipsis;
+}
+
+.quantSize {
+  flex: none;
+  font-size: 11px;
+  color: var(--t3);
+}
+
+.download {
+  flex: none;
+  font-size: 11.5px;
+  font-weight: 540;
+  color: #16110d;
+  border: none;
+  border-radius: var(--radius-control);
+  padding: 5px 12px;
+  background: var(--accent);
+  font-family: inherit;
+  cursor: pointer;
+}
+
+/* ── Inline notes (gated, loading, empty, error) ──────────────────────── */
+
+.note {
+  font-size: 11px;
+  color: var(--t3);
+}
+
+.error {
+  font-size: 11px;
+  color: var(--tight);
+}
+
+.state {
+  padding: 18px 11px;
+  font-size: 12px;
+  color: var(--t3);
+  text-align: center;
+}
diff --git a/src/settings/tabs/models/DiscoverPane.test.tsx b/src/settings/tabs/models/DiscoverPane.test.tsx
new file mode 100644
index 00000000..ec5cb988
--- /dev/null
+++ b/src/settings/tabs/models/DiscoverPane.test.tsx
@@ -0,0 +1,454 @@
+/**
+ * Unit tests for the Discover pane: the in-app Hugging Face GGUF browser.
+ *
+ * Covers the search field wiring, family filter chips, the result list rows
+ * (avatar/org parsing, gated rows), the per-row quant accordion (expand,
+ * empty repo, list error), and the download flow (start, progress, ready ->
+ * onSaved + collapse, cancel, retry). The download channel is captured the
+ * same way ProviderCards.test.tsx does it: `onEvent` is grabbed off the
+ * invoke args and driven with `simulateMessage`.
+ */
+
+import {
+  act,
+  fireEvent,
+  render,
+  screen,
+  waitFor,
+  within,
+} from '@testing-library/react';
+import { beforeEach, afterEach, describe, expect, it, vi } from 'vitest';
+
+import { invoke } from '@tauri-apps/api/core';
+
+import { DiscoverPane } from './DiscoverPane';
+import { HF_SEARCH_DEBOUNCE_MS } from './useHfSearch';
+import type { HfModelSummary } from '../../../types/hf';
+import type { HfGgufFile } from '../../../types/starter';
+import type { RawAppConfig } from '../../types';
+
+const invokeMock = invoke as unknown as ReturnType<typeof vi.fn>;
+
+type MockChannel = { simulateMessage: (msg: unknown) => void };
+let lastChannel: MockChannel | null = null;
+
+/** Marks a command response as a rejection in `mockCommands`. */
+class Reject {
+  constructor(public readonly value: unknown) {}
+}
+
+/**
+ * Routes `invoke` by command name. `Reject` throws its payload, functions are
+ * called with the invoke args, anything else resolves as-is. A channel passed
+ * via `onEvent` is captured for download-event simulation.
+ */
+function mockCommands(responses: Record<string, unknown>) {
+  invokeMock.mockImplementation(
+    async (cmd: string, args?: Record<string, unknown>) => {
+      if (args && 'onEvent' in args) {
+        lastChannel = args.onEvent as unknown as MockChannel;
+      }
+      if (Object.prototype.hasOwnProperty.call(responses, cmd)) {
+        const v = responses[cmd];
+        if (v instanceof Reject) throw v.value;
+        if (typeof v === 'function') {
+          return (v as (a?: Record<string, unknown>) => unknown)(args);
+        }
+        return v;
+      }
+      return undefined;
+    },
+  );
+}
+
+const RESULTS: HfModelSummary[] = [
+  { id: 'google/gemma-4-12b-it-GGUF', downloads: 1_200_000, gated: false },
+  { id: 'unsloth/gemma-4-27b-it-GGUF', downloads: 410_000, gated: false },
+  { id: 'meta-llama/Llama-3-8B-GGUF', downloads: 9_000, gated: true },
+];
+
+const GGUFS: HfGgufFile[] = [
+  { file: 'gemma-q4.gguf', size_bytes: 5_000_000_000 },
+  { file: 'gemma-q8.gguf', size_bytes: 9_000_000_000 },
+];
+
+const CONFIG_AFTER_INSTALL = { marker: 'fresh' } as unknown as RawAppConfig;
+
+/**
+ * Default backend: the search returns RESULTS, a repo lookup returns GGUFS,
+ * and get_config returns the post-install snapshot.
+ */
+function discoverResponses(overrides: Record<string, unknown> = {}) {
+  return {
+    search_hf_models: RESULTS,
+    list_hf_repo_ggufs: GGUFS,
+    get_config: CONFIG_AFTER_INSTALL,
+    ...overrides,
+  };
+}
+
+async function flush() {
+  await act(async () => {
+    await Promise.resolve();
+    await Promise.resolve();
+  });
+}
+
+beforeEach(() => {
+  invokeMock.mockReset();
+  lastChannel = null;
+});
+
+afterEach(() => {
+  vi.useRealTimers();
+});
+
+/** Renders the pane and waits for the mount search to resolve. */
+async function renderPane(
+  onSaved: (next: RawAppConfig) => void = () => {},
+  overrides: Record<string, unknown> = {},
+) {
+  mockCommands(discoverResponses(overrides));
+  const view = render(<DiscoverPane onSaved={onSaved} />);
+  await waitFor(() =>
+    expect(invokeMock).toHaveBeenCalledWith('search_hf_models', { query: '' }),
+  );
+  await flush();
+  return view;
+}
+
+describe('DiscoverPane', () => {
+  it('renders a row per search result with parsed avatar and org line', async () => {
+    await renderPane();
+    expect(screen.getByText('google/gemma-4-12b-it-GGUF')).toBeInTheDocument();
+    // Avatar is the first letter of the org segment.
+    expect(screen.getByText('g', { selector: '*' })).toBeTruthy();
+    // Org + formatted downloads sub-line.
+    expect(
+      screen.getByText('google · 1,200,000 downloads'),
+    ).toBeInTheDocument();
+    expect(screen.getByText('unsloth · 410,000 downloads')).toBeInTheDocument();
+  });
+
+  it('shows the result count in the sub-bar', async () => {
+    await renderPane();
+    expect(screen.getByText(/GGUF models/)).toHaveTextContent('3 GGUF models');
+  });
+
+  it('renders the avatar from the full id when it has no org segment', async () => {
+    await renderPane(() => {}, {
+      search_hf_models: [
+        { id: 'standalone-repo', downloads: 12, gated: false },
+      ],
+    });
+    expect(screen.getByText('standalone-repo')).toBeInTheDocument();
+    expect(
+      screen.getByText('standalone-repo · 12 downloads'),
+    ).toBeInTheDocument();
+  });
+
+  it('typing in the search drives a debounced fetch and re-renders results', async () => {
+    vi.useFakeTimers();
+    mockCommands(discoverResponses());
+    render(<DiscoverPane onSaved={() => {}} />);
+    await act(async () => {
+      vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
+      await Promise.resolve();
+    });
+    invokeMock.mockClear();
+    mockCommands(
+      discoverResponses({
+        search_hf_models: [
+          { id: 'qwen/Qwen3-GGUF', downloads: 50, gated: false },
+        ],
+      }),
+    );
+    fireEvent.change(screen.getByRole('searchbox'), {
+      target: { value: 'qwen' },
+    });
+    await act(async () => {
+      vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
+      await Promise.resolve();
+    });
+    expect(invokeMock).toHaveBeenCalledWith('search_hf_models', {
+      query: 'qwen',
+    });
+  });
+
+  it('clicking a family chip sets the query to that family', async () => {
+    vi.useFakeTimers();
+    mockCommands(discoverResponses());
+    render(<DiscoverPane onSaved={() => {}} />);
+    await act(async () => {
+      vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
+      await Promise.resolve();
+    });
+    invokeMock.mockClear();
+    fireEvent.click(screen.getByRole('button', { name: 'Llama' }));
+    await act(async () => {
+      vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
+      await Promise.resolve();
+    });
+    expect(invokeMock).toHaveBeenCalledWith('search_hf_models', {
+      query: 'Llama',
+    });
+    expect(screen.getByRole('button', { name: 'Llama' })).toHaveAttribute(
+      'aria-pressed',
+      'true',
+    );
+  });
+
+  it('the All chip clears the query and is active by default', async () => {
+    vi.useFakeTimers();
+    mockCommands(discoverResponses());
+    render(<DiscoverPane onSaved={() => {}} />);
+    await act(async () => {
+      vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
+      await Promise.resolve();
+    });
+    // All is the active chip while the query is empty.
+    expect(screen.getByRole('button', { name: 'All' })).toHaveAttribute(
+      'aria-pressed',
+      'true',
+    );
+    fireEvent.click(screen.getByRole('button', { name: 'Gemma' }));
+    await act(async () => {
+      vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
+      await Promise.resolve();
+    });
+    expect(screen.getByRole('button', { name: 'All' })).toHaveAttribute(
+      'aria-pressed',
+      'false',
+    );
+    invokeMock.mockClear();
+    fireEvent.click(screen.getByRole('button', { name: 'All' }));
+    await act(async () => {
+      vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
+      await Promise.resolve();
+    });
+    expect(invokeMock).toHaveBeenCalledWith('search_hf_models', { query: '' });
+  });
+
+  it('renders every family chip', async () => {
+    await renderPane();
+    for (const family of [
+      'All',
+      'Qwen',
+      'Llama',
+      'Gemma',
+      'gpt-oss',
+      'DeepSeek',
+      'Phi',
+    ]) {
+      expect(screen.getByRole('button', { name: family })).toBeInTheDocument();
+    }
+  });
+
+  it('disables Get and shows a gated note for a gated repo', async () => {
+    await renderPane();
+    const gatedRow = screen
+      .getByText('meta-llama/Llama-3-8B-GGUF')
+      .closest('[data-row]') as HTMLElement;
+    expect(
+      within(gatedRow).getByRole('button', { name: 'Get' }),
+    ).toBeDisabled();
+    expect(within(gatedRow).getByText('gated')).toBeInTheDocument();
+  });
+
+  it('expanding a row lists each GGUF file with its size', async () => {
+    await renderPane();
+    const row = screen
+      .getByText('google/gemma-4-12b-it-GGUF')
+      .closest('[data-row]') as HTMLElement;
+    fireEvent.click(within(row).getByRole('button', { name: 'Get' }));
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('list_hf_repo_ggufs', {
+      repo: 'google/gemma-4-12b-it-GGUF',
+    });
+    expect(screen.getByText('gemma-q4.gguf')).toBeInTheDocument();
+    expect(screen.getByText('5.0 GB')).toBeInTheDocument();
+    expect(screen.getByText('gemma-q8.gguf')).toBeInTheDocument();
+    expect(screen.getByText('9.0 GB')).toBeInTheDocument();
+  });
+
+  it('collapses an expanded row when Get is clicked again', async () => {
+    await renderPane();
+    const row = screen
+      .getByText('google/gemma-4-12b-it-GGUF')
+      .closest('[data-row]') as HTMLElement;
+    fireEvent.click(within(row).getByRole('button', { name: 'Get' }));
+    await flush();
+    expect(screen.getByText('gemma-q4.gguf')).toBeInTheDocument();
+    fireEvent.click(within(row).getByRole('button', { name: 'Get' }));
+    expect(screen.queryByText('gemma-q4.gguf')).not.toBeInTheDocument();
+  });
+
+  it('shows an empty-repo note when the lookup finds no GGUF files', async () => {
+    await renderPane(() => {}, { list_hf_repo_ggufs: [] });
+    const row = screen
+      .getByText('google/gemma-4-12b-it-GGUF')
+      .closest('[data-row]') as HTMLElement;
+    fireEvent.click(within(row).getByRole('button', { name: 'Get' }));
+    await flush();
+    expect(screen.getByText('No GGUF files in this repo.')).toBeInTheDocument();
+  });
+
+  it('treats a non-array gguf payload as an empty file list', async () => {
+    await renderPane(() => {}, { list_hf_repo_ggufs: 'nope' });
+    const row = screen
+      .getByText('google/gemma-4-12b-it-GGUF')
+      .closest('[data-row]') as HTMLElement;
+    fireEvent.click(within(row).getByRole('button', { name: 'Get' }));
+    await flush();
+    expect(screen.getByText('No GGUF files in this repo.')).toBeInTheDocument();
+  });
+
+  it('surfaces a lookup failure as an inline error', async () => {
+    await renderPane(() => {}, {
+      list_hf_repo_ggufs: new Reject('repo unavailable'),
+    });
+    const row = screen
+      .getByText('google/gemma-4-12b-it-GGUF')
+      .closest('[data-row]') as HTMLElement;
+    fireEvent.click(within(row).getByRole('button', { name: 'Get' }));
+    await flush();
+    expect(screen.getByText(/repo unavailable/)).toBeInTheDocument();
+  });
+
+  it('downloads a chosen quant, progresses, and on ready lifts config and collapses', async () => {
+    const onSaved = vi.fn();
+    await renderPane(onSaved);
+    const row = screen
+      .getByText('google/gemma-4-12b-it-GGUF')
+      .closest('[data-row]') as HTMLElement;
+    fireEvent.click(within(row).getByRole('button', { name: 'Get' }));
+    await flush();
+    // Download the second quant.
+    const downloadButtons = screen.getAllByRole('button', {
+      name: 'Download',
+    });
+    fireEvent.click(downloadButtons[1]);
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith(
+      'download_repo_model',
+      expect.objectContaining({
+        repo: 'google/gemma-4-12b-it-GGUF',
+        file: 'gemma-q8.gguf',
+      }),
+    );
+    // Progress is shown via DownloadProgress.
+    act(() => {
+      lastChannel?.simulateMessage({
+        type: 'Started',
+        data: {
+          file: 'gemma-q8.gguf',
+          total_bytes: 9_000_000_000,
+          resumed_from: 0,
+        },
+      });
+    });
+    expect(screen.getByText('Downloading model')).toBeInTheDocument();
+    // Completion lifts the fresh config and collapses the accordion.
+    act(() => {
+      lastChannel?.simulateMessage({ type: 'AllDone' });
+    });
+    await flush();
+    expect(onSaved).toHaveBeenCalledWith(CONFIG_AFTER_INSTALL);
+    await waitFor(() =>
+      expect(screen.queryByText('gemma-q4.gguf')).not.toBeInTheDocument(),
+    );
+  });
+
+  it('leaves the lift to a later resync when get_config fails post-download', async () => {
+    const onSaved = vi.fn();
+    await renderPane(onSaved, {
+      get_config: new Reject(new Error('config read failed')),
+    });
+    const row = screen
+      .getByText('google/gemma-4-12b-it-GGUF')
+      .closest('[data-row]') as HTMLElement;
+    fireEvent.click(within(row).getByRole('button', { name: 'Get' }));
+    await flush();
+    fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[0]);
+    await flush();
+    act(() => {
+      lastChannel?.simulateMessage({ type: 'AllDone' });
+    });
+    await flush();
+    expect(onSaved).not.toHaveBeenCalled();
+  });
+
+  it('cancels an in-flight download', async () => {
+    await renderPane();
+    const row = screen
+      .getByText('google/gemma-4-12b-it-GGUF')
+      .closest('[data-row]') as HTMLElement;
+    fireEvent.click(within(row).getByRole('button', { name: 'Get' }));
+    await flush();
+    fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[0]);
+    await flush();
+    expect(screen.getByText('Downloading model')).toBeInTheDocument();
+    fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('cancel_model_download');
+  });
+
+  it('retries after a failure and offers a path back to the quant list', async () => {
+    await renderPane();
+    const row = screen
+      .getByText('google/gemma-4-12b-it-GGUF')
+      .closest('[data-row]') as HTMLElement;
+    fireEvent.click(within(row).getByRole('button', { name: 'Get' }));
+    await flush();
+    fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[0]);
+    await flush();
+    act(() => {
+      lastChannel?.simulateMessage({
+        type: 'Failed',
+        data: { kind: 'other', message: 'connection dropped' },
+      });
+    });
+    expect(screen.getByText('connection dropped')).toBeInTheDocument();
+    fireEvent.click(screen.getByRole('button', { name: 'Retry' }));
+    await flush();
+    const repoDownloads = invokeMock.mock.calls.filter(
+      (c: unknown[]) => c[0] === 'download_repo_model',
+    );
+    expect(repoDownloads).toHaveLength(2);
+    // Choose another returns to the quant list.
+    act(() => {
+      lastChannel?.simulateMessage({
+        type: 'Failed',
+        data: { kind: 'other', message: 'again' },
+      });
+    });
+    fireEvent.click(
+      screen.getByRole('button', { name: 'Choose a different model' }),
+    );
+    expect(screen.getByText('gemma-q4.gguf')).toBeInTheDocument();
+  });
+
+  it('shows a loading hint while the search is in flight', async () => {
+    let resolveSearch!: (value: HfModelSummary[]) => void;
+    const pending = new Promise<HfModelSummary[]>((res) => {
+      resolveSearch = res;
+    });
+    mockCommands(discoverResponses({ search_hf_models: pending }));
+    render(<DiscoverPane onSaved={() => {}} />);
+    await flush();
+    expect(screen.getByText('Searching…')).toBeInTheDocument();
+    await act(async () => {
+      resolveSearch(RESULTS);
+      await Promise.resolve();
+    });
+    await waitFor(() =>
+      expect(screen.queryByText('Searching…')).not.toBeInTheDocument(),
+    );
+  });
+
+  it('shows a no-results message when the search returns nothing', async () => {
+    await renderPane(() => {}, { search_hf_models: [] });
+    expect(screen.getByText('No models found.')).toBeInTheDocument();
+    expect(screen.getByText(/GGUF models/)).toHaveTextContent('0 GGUF models');
+  });
+});
diff --git a/src/settings/tabs/models/DiscoverPane.tsx b/src/settings/tabs/models/DiscoverPane.tsx
new file mode 100644
index 00000000..94951991
--- /dev/null
+++ b/src/settings/tabs/models/DiscoverPane.tsx
@@ -0,0 +1,253 @@
+/**
+ * Discover pane: the in-app Hugging Face GGUF model browser.
+ *
+ * A search field (driven by {@link useHfSearch}) plus a row of family filter
+ * chips feed one debounced backend query. The result list renders one lean
+ * row per repo: the search payload carries no size or capability data, so a
+ * row shows only the avatar, the repo id, an org + downloads sub-line, and a
+ * gated indicator. "Get" expands the row into a quant accordion that lists the
+ * repo's `.gguf` files (`list_hf_repo_ggufs`) and downloads the chosen one
+ * through the shared {@link useDownloadModel} kit. A finished install lifts a
+ * fresh config snapshot through `onSaved` and collapses the row.
+ */
+
+import { useCallback, useEffect, useState } from 'react';
+import { invoke } from '@tauri-apps/api/core';
+
+import { DownloadProgress } from '../../../components/DownloadProgress';
+import { useDownloadModel } from '../../../hooks/useDownloadModel';
+import { useHfSearch } from './useHfSearch';
+import styles from './DiscoverPane.module.css';
+import type { HfModelSummary } from '../../../types/hf';
+import type { HfGgufFile } from '../../../types/starter';
+import type { RawAppConfig } from '../../types';
+
+/**
+ * Family filter chips. Clicking a chip sets the search query to its name;
+ * `All` (empty query) is the browse-popular default. No backend beyond the
+ * shared search: the chips just preset the query.
+ */
+const FAMILIES = [
+  'All',
+  'Qwen',
+  'Llama',
+  'Gemma',
+  'gpt-oss',
+  'DeepSeek',
+  'Phi',
+] as const;
+
+/** Bytes rendered as decimal gigabytes with one decimal (e.g. "8.2"). */
+function gb(bytes: number): string {
+  return (bytes / 1e9).toFixed(1);
+}
+
+/** The org segment of an `owner/repo` id, or the whole id when there is no slash. */
+function orgOf(id: string): string {
+  const slash = id.indexOf('/');
+  return slash === -1 ? id : id.slice(0, slash);
+}
+
+interface DiscoverPaneProps {
+  /** Lift a fresh config snapshot after a successful install. */
+  onSaved: (next: RawAppConfig) => void;
+}
+
+export function DiscoverPane({ onSaved }: DiscoverPaneProps) {
+  const { query, setQuery, results, loading } = useHfSearch();
+
+  return (
+    <div className={styles.pane}>
+      <div className={styles.search}>
+        <svg viewBox="0 0 24 24" aria-hidden="true">
+          <circle cx="11" cy="11" r="7" />
+          <path d="m20 20-3.5-3.5" />
+        </svg>
+        <input
+          type="search"
+          className={styles.searchInput}
+          aria-label="Search Hugging Face models"
+          placeholder="Search Hugging Face models"
+          spellCheck={false}
+          autoComplete="off"
+          autoCorrect="off"
+          autoCapitalize="off"
+          value={query}
+          onChange={(e) => setQuery(e.target.value)}
+        />
+        <span className={styles.kbd}>⌘K</span>
+      </div>
+
+      <div className={styles.chips}>
+        {FAMILIES.map((family) => {
+          const target = family === 'All' ? '' : family;
+          const active = query === target;
+          return (
+            <button
+              key={family}
+              type="button"
+              aria-pressed={active}
+              className={`${styles.chip} ${active ? styles.chipOn : ''}`}
+              onClick={() => setQuery(target)}
+            >
+              {family}
+            </button>
+          );
+        })}
+      </div>
+
+      <div className={styles.subbar}>
+        <span className={styles.count}>
+          <b>{results.length}</b> GGUF models
+        </span>
+        <span className={styles.sort}>Most downloaded</span>
+      </div>
+
+      <div className={styles.list}>
+        {loading ? <p className={styles.state}>Searching…</p> : null}
+        {!loading && results.length === 0 ? (
+          <p className={styles.state}>No models found.</p>
+        ) : null}
+        {results.map((model) => (
+          <DiscoverRow key={model.id} model={model} onSaved={onSaved} />
+        ))}
+      </div>
+    </div>
+  );
+}
+
+interface DiscoverRowProps {
+  model: HfModelSummary;
+  onSaved: (next: RawAppConfig) => void;
+}
+
+/**
+ * One repo row plus its lazy quant accordion. The GGUF file list is fetched
+ * the first time the row expands; the download state machine is local to the
+ * row so two rows cannot share an in-flight download.
+ */
+function DiscoverRow({ model, onSaved }: DiscoverRowProps) {
+  const [expanded, setExpanded] = useState(false);
+  const [files, setFiles] = useState<HfGgufFile[] | null>(null);
+  const [listError, setListError] = useState<string | null>(null);
+
+  const { state, progress, etaSeconds, startRepo, cancel, retry, reset } =
+    useDownloadModel();
+
+  const org = orgOf(model.id);
+
+  const loadFiles = useCallback(async () => {
+    setListError(null);
+    setFiles(null);
+    try {
+      const rows = await invoke<HfGgufFile[]>('list_hf_repo_ggufs', {
+        repo: model.id,
+      });
+      setFiles(Array.isArray(rows) ? rows : []);
+    } catch (err) {
+      setListError(String(err));
+    }
+  }, [model.id]);
+
+  function toggle() {
+    if (expanded) {
+      setExpanded(false);
+      return;
+    }
+    setExpanded(true);
+    void loadFiles();
+  }
+
+  // A finished install: the backend already wrote the builtin provider's
+  // model field, so lift the fresh config snapshot and collapse the row.
+  useEffect(() => {
+    if (state.phase !== 'ready') return;
+    void (async () => {
+      try {
+        onSaved(await invoke<RawAppConfig>('get_config'));
+      } catch {
+        // The focus-driven resync picks the change up on next activation.
+      }
+      reset();
+      setExpanded(false);
+    })();
+  }, [state.phase, onSaved, reset]);
+
+  const showProgress = state.phase !== 'idle';
+
+  return (
+    <div className={styles.rowWrap} data-row>
+      <div className={styles.row}>
+        <div className={styles.av} aria-hidden="true">
+          {org.charAt(0)}
+        </div>
+        <div className={styles.mid}>
+          <div className={styles.nm}>
+            {model.id}
+            {model.gated ? (
+              <span className={styles.gatedBadge}>gated</span>
+            ) : null}
+          </div>
+          <div className={styles.org}>
+            {org} · {model.downloads.toLocaleString()} downloads
+          </div>
+        </div>
+        <button
+          type="button"
+          className={styles.get}
+          aria-expanded={expanded}
+          disabled={model.gated}
+          onClick={toggle}
+        >
+          Get
+        </button>
+      </div>
+
+      {expanded ? (
+        <div className={styles.expand}>
+          {listError !== null ? (
+            <p className={styles.error}>{listError}</p>
+          ) : null}
+          {files !== null && files.length === 0 && listError === null ? (
+            <p className={styles.note}>No GGUF files in this repo.</p>
+          ) : null}
+          {!showProgress && files !== null && files.length > 0
+            ? files.map((f) => (
+                <div className={styles.quantRow} key={f.file}>
+                  <span className={styles.quantName}>{f.file}</span>
+                  <span className={styles.quantSize}>
+                    {gb(f.size_bytes)} GB
+                  </span>
+                  <button
+                    type="button"
+                    className={styles.download}
+                    onClick={() => void startRepo(model.id, f.file)}
+                  >
+                    Download
+                  </button>
+                </div>
+              ))
+            : null}
+          {showProgress ? (
+            <DownloadProgress
+              state={state}
+              progress={progress}
+              etaSeconds={etaSeconds}
+              // The repo download flow has no pre-flight confirm step (only
+              // the starter picker does), so the confirm card never renders;
+              // these required props point at the same covered handlers as
+              // their respective cards rather than dead no-op literals.
+              onConfirm={reset}
+              onCancelConfirm={reset}
+              onCancel={() => void cancel()}
+              onRetry={() => void retry()}
+              // A terminal failure must leave a path back to the quant list,
+              // not just Retry; reset returns to the file rows.
+              onChooseAnother={reset}
+            />
+          ) : null}
+        </div>
+      ) : null}
+    </div>
+  );
+}
diff --git a/src/settings/tabs/models/LibraryPane.module.css b/src/settings/tabs/models/LibraryPane.module.css
new file mode 100644
index 00000000..9495daaa
--- /dev/null
+++ b/src/settings/tabs/models/LibraryPane.module.css
@@ -0,0 +1,222 @@
+/*
+ * Styles for the Library pane of the Models surface. Tokens (--accent,
+ * --vis, --rea, --hair, --t1..--t3, --elev-*) cascade from the Settings
+ * window, so they are referenced directly here.
+ */
+
+.pane {
+  display: flex;
+  flex-direction: column;
+  min-height: 0;
+}
+
+.bar {
+  display: flex;
+  align-items: center;
+  padding: 0 4px 12px;
+}
+
+.addButton {
+  margin-left: auto;
+  display: inline-flex;
+  align-items: center;
+  gap: 6px;
+  font-size: 12px;
+  font-weight: 560;
+  color: #16110d;
+  background: var(--accent);
+  border: none;
+  border-radius: 8px;
+  padding: 7px 12px;
+  cursor: pointer;
+}
+
+.list {
+  display: flex;
+  flex-direction: column;
+  gap: 8px;
+}
+
+.card {
+  border: 1px solid var(--hair-soft);
+  border-radius: var(--radius-card);
+  background: var(--elev-1);
+  overflow: hidden;
+}
+
+.cardActive {
+  border-color: rgba(255, 141, 92, 0.4);
+}
+
+.row {
+  display: flex;
+  align-items: center;
+  gap: 13px;
+  padding: 13px 14px;
+}
+
+.avatar {
+  width: 32px;
+  height: 32px;
+  border-radius: 8px;
+  flex: none;
+  display: grid;
+  place-items: center;
+  background: var(--elev-2);
+  font-weight: 700;
+  color: var(--t2);
+  font-size: 12px;
+}
+
+.mid {
+  flex: 1;
+  min-width: 0;
+}
+
+.name {
+  font-weight: 550;
+  font-size: 13px;
+  color: var(--t1);
+  display: flex;
+  align-items: center;
+  gap: 8px;
+}
+
+.org {
+  font-size: 11px;
+  color: var(--t3);
+  margin-top: 3px;
+}
+
+.badge {
+  display: inline-flex;
+  align-items: center;
+  font-size: 10.5px;
+  font-weight: 560;
+  padding: 2px 8px;
+  border-radius: 999px;
+}
+
+.badgeActive {
+  color: var(--accent);
+  background: var(--accent-soft);
+}
+
+.badgeVision {
+  color: var(--vis);
+  background: var(--vis-bg);
+}
+
+.badgeReason {
+  color: var(--rea);
+  background: var(--rea-bg);
+}
+
+.actions {
+  display: flex;
+  align-items: center;
+  gap: 8px;
+  flex: none;
+}
+
+.useButton {
+  font-size: 12px;
+  font-weight: 540;
+  color: var(--accent);
+  border: 1px solid rgba(255, 141, 92, 0.45);
+  background: var(--accent-soft);
+  border-radius: 8px;
+  padding: 6px 12px;
+  cursor: pointer;
+}
+
+.manageButton {
+  width: 30px;
+  height: 30px;
+  border-radius: 8px;
+  border: 1px solid var(--hair);
+  background: transparent;
+  display: grid;
+  place-items: center;
+  color: var(--t3);
+  font-size: 16px;
+  line-height: 1;
+  cursor: pointer;
+}
+
+.manageRow {
+  display: flex;
+  align-items: center;
+  gap: 8px;
+  padding: 0 14px 13px;
+  flex-wrap: wrap;
+}
+
+.confirmText {
+  font-size: 11px;
+  color: var(--t2);
+}
+
+.deleteButton {
+  font-size: 12px;
+  font-weight: 540;
+  color: #ff6b6b;
+  border: 1px solid rgba(255, 107, 107, 0.4);
+  background: rgba(255, 107, 107, 0.1);
+  border-radius: 8px;
+  padding: 6px 12px;
+  cursor: pointer;
+}
+
+.ghostButton {
+  font-size: 12px;
+  font-weight: 540;
+  color: var(--t2);
+  border: 1px solid var(--hair);
+  background: transparent;
+  border-radius: 8px;
+  padding: 6px 12px;
+  cursor: pointer;
+}
+
+.empty {
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  gap: 12px;
+  padding: 40px 14px;
+}
+
+.emptyText {
+  font-size: 13px;
+  color: var(--t2);
+}
+
+.browseButton {
+  font-size: 12px;
+  font-weight: 560;
+  color: #16110d;
+  background: var(--accent);
+  border: none;
+  border-radius: 8px;
+  padding: 7px 14px;
+  cursor: pointer;
+}
+
+.error {
+  font-size: 11px;
+  color: #ff6b6b;
+  margin: 8px 4px 0;
+}
+
+.footer {
+  box-sizing: border-box;
+  width: 100%;
+  margin-top: 12px;
+  padding: 11px 4px 0;
+  border-top: 1px solid var(--hair-soft);
+  font-size: 11px;
+  color: var(--t3);
+  display: flex;
+  justify-content: space-between;
+}
diff --git a/src/settings/tabs/models/LibraryPane.test.tsx b/src/settings/tabs/models/LibraryPane.test.tsx
new file mode 100644
index 00000000..863f742f
--- /dev/null
+++ b/src/settings/tabs/models/LibraryPane.test.tsx
@@ -0,0 +1,488 @@
+/**
+ * Unit tests for the Models surface's Library pane.
+ *
+ * Covers the installed-model list (active + non-active cards, capability
+ * badges), the Use action, the Delete confirm/cancel/success/error flow,
+ * the empty state, the free-disk footer, and the defensive guards around
+ * the manifest and disk probes.
+ *
+ * `invoke` comes from the global Tauri mock; capabilities are fetched
+ * through the same `get_model_capabilities` command the hook reads.
+ */
+
+import {
+  act,
+  fireEvent,
+  render,
+  screen,
+  waitFor,
+} from '@testing-library/react';
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+
+import { invoke } from '@tauri-apps/api/core';
+
+import { LibraryPane } from './LibraryPane';
+import type { RawAppConfig, RawProvider } from '../../types';
+import type { InstalledModel } from '../../../types/starter';
+import type { ModelCapabilitiesMap } from '../../../types/model';
+
+const invokeMock = invoke as unknown as ReturnType<typeof vi.fn>;
+
+const BASE_CONFIG: RawAppConfig = {
+  inference: {
+    active_provider: 'builtin',
+    keep_warm_inactivity_minutes: 0,
+    num_ctx: 16384,
+    providers: [
+      {
+        id: 'builtin',
+        kind: 'builtin',
+        label: 'Built-in (Thuki)',
+        base_url: '',
+        model: '',
+        vision: false,
+      },
+      {
+        id: 'ollama',
+        kind: 'ollama',
+        label: 'Ollama',
+        base_url: 'http://127.0.0.1:11434',
+        model: '',
+        vision: false,
+      },
+    ],
+  },
+  prompt: { system: 'hello' },
+  window: {
+    overlay_width: 600,
+    max_chat_height: 648,
+    max_images: 3,
+    text_base_px: 15,
+    text_line_height: 1.5,
+    text_letter_spacing_px: 0,
+    text_font_weight: 500,
+  },
+  quote: {
+    max_display_lines: 4,
+    max_display_chars: 300,
+    max_context_length: 4096,
+  },
+  behavior: { auto_replace: false, auto_close: false },
+  search: {
+    searxng_url: 'http://127.0.0.1:25017',
+    reader_url: 'http://127.0.0.1:25018',
+    max_iterations: 3,
+    top_k_urls: 10,
+    searxng_max_results: 10,
+    search_timeout_s: 20,
+    reader_per_url_timeout_s: 10,
+    reader_batch_timeout_s: 30,
+    judge_timeout_s: 30,
+    router_timeout_s: 45,
+  },
+  debug: { trace_enabled: false },
+};
+
+/** Distinct snapshot so onSaved assertions cannot pass by referential luck. */
+const NEW_CONFIG: RawAppConfig = {
+  ...BASE_CONFIG,
+  prompt: { system: 'updated' },
+};
+
+/** BASE_CONFIG with the builtin provider's model set to `id`. */
+function makeConfig(builtinModel: string): RawAppConfig {
+  const builtin: RawProvider = {
+    ...BASE_CONFIG.inference.providers[0],
+    model: builtinModel,
+  };
+  return {
+    ...BASE_CONFIG,
+    inference: {
+      ...BASE_CONFIG.inference,
+      providers: [builtin, BASE_CONFIG.inference.providers[1]],
+    },
+  };
+}
+
+const GEMMA: InstalledModel = {
+  id: 'org/gemma:gemma.gguf',
+  display_name: 'gemma',
+  size_bytes: 2_489_757_856,
+  quant: 'Q4_K_M',
+};
+
+const QWEN: InstalledModel = {
+  id: 'org/qwen:qwen.gguf',
+  display_name: 'qwen',
+  size_bytes: 9_000_000_000,
+  quant: '',
+};
+
+const INSTALLED: InstalledModel[] = [GEMMA, QWEN];
+
+const CAPS: ModelCapabilitiesMap = {
+  'org/gemma:gemma.gguf': { vision: true, thinking: false },
+  'org/qwen:qwen.gguf': { vision: false, thinking: true },
+};
+
+/** Marks a command response as a rejection in `mockCommands`. */
+class Reject {
+  constructor(public readonly value: unknown) {}
+}
+
+/**
+ * Routes `invoke` by command name. A `Reject` throws its payload, a function
+ * is called with the invoke args (for stateful sequences), anything else
+ * resolves as-is. Unmapped commands resolve to `undefined`.
+ */
+function mockCommands(responses: Record<string, unknown>) {
+  invokeMock.mockImplementation(
+    async (cmd: string, args?: Record<string, unknown>) => {
+      if (Object.prototype.hasOwnProperty.call(responses, cmd)) {
+        const v = responses[cmd];
+        if (v instanceof Reject) throw v.value;
+        if (typeof v === 'function') {
+          return (v as (a?: Record<string, unknown>) => unknown)(args);
+        }
+        return v;
+      }
+      return undefined;
+    },
+  );
+}
+
+/** Default backend: two installed models, capability map, known free disk. */
+function libraryResponses(overrides: Record<string, unknown> = {}) {
+  return {
+    list_installed_models: INSTALLED,
+    get_model_capabilities: CAPS,
+    get_models_dir_free_bytes: 30_400_000_000,
+    get_config: NEW_CONFIG,
+    ...overrides,
+  };
+}
+
+async function flush() {
+  await act(async () => {
+    await Promise.resolve();
+    await Promise.resolve();
+  });
+}
+
+beforeEach(() => {
+  invokeMock.mockReset();
+});
+
+async function renderPane(
+  config: RawAppConfig = makeConfig(''),
+  onSaved: (next: RawAppConfig) => void = () => {},
+  onAddModel: () => void = () => {},
+) {
+  const view = render(
+    <LibraryPane config={config} onSaved={onSaved} onAddModel={onAddModel} />,
+  );
+  await flush();
+  return view;
+}
+
+describe('LibraryPane', () => {
+  it('lists each installed model with its org line, size, and quant', async () => {
+    mockCommands(libraryResponses());
+    await renderPane();
+    expect(screen.getByText('gemma')).toBeInTheDocument();
+    expect(screen.getByText('org/gemma · Q4_K_M · 2.5 GB')).toBeInTheDocument();
+    // Empty quant drops out of the org line.
+    expect(screen.getByText('org/qwen · 9.0 GB')).toBeInTheDocument();
+  });
+
+  it('renders the uppercased first character as each avatar', async () => {
+    mockCommands(libraryResponses());
+    await renderPane();
+    expect(screen.getByText('G')).toBeInTheDocument();
+    expect(screen.getByText('Q')).toBeInTheDocument();
+  });
+
+  it('marks the active model with an Active badge and no Use button', async () => {
+    mockCommands(libraryResponses());
+    await renderPane(makeConfig('org/gemma:gemma.gguf'));
+    expect(screen.getByText('Active')).toBeInTheDocument();
+    // The active model offers no Use button; the non-active one does.
+    expect(
+      screen.getByRole('button', { name: 'Use qwen' }),
+    ).toBeInTheDocument();
+    expect(
+      screen.queryByRole('button', { name: 'Use gemma' }),
+    ).not.toBeInTheDocument();
+  });
+
+  it('shows a Vision badge only for vision-capable models', async () => {
+    mockCommands(libraryResponses());
+    await renderPane();
+    const vision = screen.getByText('Vision');
+    expect(vision).toBeInTheDocument();
+    // Only gemma is vision-capable, so exactly one Vision badge.
+    expect(screen.getAllByText('Vision')).toHaveLength(1);
+  });
+
+  it('shows a Reasoning badge only for thinking-capable models', async () => {
+    mockCommands(libraryResponses());
+    await renderPane();
+    expect(screen.getByText('Reasoning')).toBeInTheDocument();
+    expect(screen.getAllByText('Reasoning')).toHaveLength(1);
+  });
+
+  it('omits capability badges when no map entry exists for a model', async () => {
+    mockCommands(libraryResponses({ get_model_capabilities: {} }));
+    await renderPane();
+    expect(screen.queryByText('Vision')).not.toBeInTheDocument();
+    expect(screen.queryByText('Reasoning')).not.toBeInTheDocument();
+  });
+
+  it('Use commits the model, lifts the config, and refreshes', async () => {
+    mockCommands(libraryResponses({ update_provider_field: undefined }));
+    const onSaved = vi.fn();
+    await renderPane(makeConfig('org/gemma:gemma.gguf'), onSaved);
+    fireEvent.click(screen.getByRole('button', { name: 'Use qwen' }));
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('update_provider_field', {
+      providerId: 'builtin',
+      field: 'model',
+      value: 'org/qwen:qwen.gguf',
+    });
+    expect(onSaved).toHaveBeenCalledWith(NEW_CONFIG);
+  });
+
+  it('leaves the lift to the focus resync when Use cannot read the config', async () => {
+    mockCommands(
+      libraryResponses({
+        update_provider_field: undefined,
+        get_config: new Reject(new Error('read failed')),
+      }),
+    );
+    const onSaved = vi.fn();
+    await renderPane(makeConfig('org/gemma:gemma.gguf'), onSaved);
+    fireEvent.click(screen.getByRole('button', { name: 'Use qwen' }));
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('update_provider_field', {
+      providerId: 'builtin',
+      field: 'model',
+      value: 'org/qwen:qwen.gguf',
+    });
+    expect(onSaved).not.toHaveBeenCalled();
+  });
+
+  it('swallows an update_provider_field failure on Use', async () => {
+    mockCommands(
+      libraryResponses({
+        update_provider_field: new Reject(new Error('write failed')),
+      }),
+    );
+    const onSaved = vi.fn();
+    await renderPane(makeConfig('org/gemma:gemma.gguf'), onSaved);
+    fireEvent.click(screen.getByRole('button', { name: 'Use qwen' }));
+    await flush();
+    expect(onSaved).not.toHaveBeenCalled();
+    expect(screen.getByText('qwen')).toBeInTheDocument();
+  });
+
+  it('Delete asks for confirmation and Cancel backs out without deleting', async () => {
+    mockCommands(libraryResponses());
+    await renderPane();
+    fireEvent.click(screen.getByRole('button', { name: 'Manage gemma' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Delete gemma' }));
+    expect(
+      screen.getByText('Delete gemma? Its files are removed from disk.'),
+    ).toBeInTheDocument();
+    fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
+    expect(
+      screen.queryByText('Delete gemma? Its files are removed from disk.'),
+    ).not.toBeInTheDocument();
+    expect(invokeMock).not.toHaveBeenCalledWith(
+      'delete_installed_model',
+      expect.anything(),
+    );
+  });
+
+  it('confirmed Delete invokes delete_installed_model, refreshes, and lifts the config', async () => {
+    let deleted = false;
+    mockCommands(
+      libraryResponses({
+        list_installed_models: () => (deleted ? [QWEN] : INSTALLED),
+        delete_installed_model: () => {
+          deleted = true;
+          return undefined;
+        },
+      }),
+    );
+    const onSaved = vi.fn();
+    await renderPane(makeConfig(''), onSaved);
+    fireEvent.click(screen.getByRole('button', { name: 'Manage gemma' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Delete gemma' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Confirm delete' }));
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('delete_installed_model', {
+      id: 'org/gemma:gemma.gguf',
+    });
+    expect(screen.queryByText('gemma')).not.toBeInTheDocument();
+    expect(screen.getByText('qwen')).toBeInTheDocument();
+    expect(onSaved).toHaveBeenCalledWith(NEW_CONFIG);
+  });
+
+  it('leaves the lift to the focus resync when get_config fails post-delete', async () => {
+    mockCommands(
+      libraryResponses({
+        delete_installed_model: undefined,
+        get_config: new Reject(new Error('read failed')),
+      }),
+    );
+    const onSaved = vi.fn();
+    await renderPane(makeConfig(''), onSaved);
+    fireEvent.click(screen.getByRole('button', { name: 'Manage qwen' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Delete qwen' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Confirm delete' }));
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('delete_installed_model', {
+      id: 'org/qwen:qwen.gguf',
+    });
+    expect(onSaved).not.toHaveBeenCalled();
+  });
+
+  it('surfaces a delete failure as an alert and keeps the row', async () => {
+    mockCommands(
+      libraryResponses({ delete_installed_model: new Reject('file busy') }),
+    );
+    await renderPane();
+    fireEvent.click(screen.getByRole('button', { name: 'Manage gemma' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Delete gemma' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Confirm delete' }));
+    await flush();
+    expect(screen.getByRole('alert')).toHaveTextContent('file busy');
+    expect(screen.getByText('gemma')).toBeInTheDocument();
+    expect(invokeMock).not.toHaveBeenCalledWith('get_config');
+  });
+
+  it('renders the empty state and routes both add affordances to onAddModel', async () => {
+    mockCommands(libraryResponses({ list_installed_models: [] }));
+    const onAddModel = vi.fn();
+    await renderPane(makeConfig(''), () => {}, onAddModel);
+    expect(screen.getByText('No models downloaded yet.')).toBeInTheDocument();
+    fireEvent.click(screen.getByRole('button', { name: 'Browse Discover' }));
+    expect(onAddModel).toHaveBeenCalledTimes(1);
+    // The top-right Add model button is present in the empty state too.
+    fireEvent.click(screen.getByRole('button', { name: 'Add model' }));
+    expect(onAddModel).toHaveBeenCalledTimes(2);
+  });
+
+  it('treats a non-array manifest payload as empty', async () => {
+    mockCommands(libraryResponses({ list_installed_models: null }));
+    await renderPane();
+    expect(screen.getByText('No models downloaded yet.')).toBeInTheDocument();
+  });
+
+  it('falls back to the empty state when the manifest probe rejects', async () => {
+    mockCommands(
+      libraryResponses({
+        list_installed_models: new Reject(new Error('manifest unreadable')),
+      }),
+    );
+    await renderPane();
+    expect(screen.getByText('No models downloaded yet.')).toBeInTheDocument();
+  });
+
+  it('shows the free-disk footer and the model count when both are known', async () => {
+    mockCommands(libraryResponses());
+    await renderPane();
+    expect(screen.getByText('30.4 GB free on disk')).toBeInTheDocument();
+    expect(
+      screen.getByText('2 models · capabilities detected automatically'),
+    ).toBeInTheDocument();
+  });
+
+  it('hides the free-disk line when the probe returns a non-number', async () => {
+    mockCommands(libraryResponses({ get_models_dir_free_bytes: null }));
+    await renderPane();
+    expect(screen.queryByText(/free on disk/)).not.toBeInTheDocument();
+    expect(
+      screen.getByText('2 models · capabilities detected automatically'),
+    ).toBeInTheDocument();
+  });
+
+  it('hides the free-disk line when the disk probe rejects', async () => {
+    mockCommands(
+      libraryResponses({
+        get_models_dir_free_bytes: new Reject(new Error('statfs failed')),
+      }),
+    );
+    await renderPane();
+    expect(screen.queryByText(/free on disk/)).not.toBeInTheDocument();
+  });
+
+  it('renders the top-right Add model button and routes it to onAddModel', async () => {
+    mockCommands(libraryResponses());
+    const onAddModel = vi.fn();
+    await renderPane(makeConfig(''), () => {}, onAddModel);
+    fireEvent.click(screen.getByRole('button', { name: 'Add model' }));
+    expect(onAddModel).toHaveBeenCalledTimes(1);
+  });
+
+  it('treats every model as non-active when no builtin provider exists', async () => {
+    mockCommands(libraryResponses());
+    // A config whose only provider is Ollama: the builtin lookup misses and
+    // the active model falls back to "", so no card is Active and both get Use.
+    const ollamaOnly: RawAppConfig = {
+      ...BASE_CONFIG,
+      inference: {
+        ...BASE_CONFIG.inference,
+        providers: [BASE_CONFIG.inference.providers[1]],
+      },
+    };
+    await renderPane(ollamaOnly);
+    expect(screen.queryByText('Active')).not.toBeInTheDocument();
+    expect(
+      screen.getByRole('button', { name: 'Use gemma' }),
+    ).toBeInTheDocument();
+    expect(
+      screen.getByRole('button', { name: 'Use qwen' }),
+    ).toBeInTheDocument();
+  });
+
+  it('toggles the Manage menu closed when its own button is clicked again', async () => {
+    mockCommands(libraryResponses());
+    await renderPane();
+    const manage = screen.getByRole('button', { name: 'Manage gemma' });
+    fireEvent.click(manage);
+    expect(
+      screen.getByRole('button', { name: 'Delete gemma' }),
+    ).toBeInTheDocument();
+    // A second click on the same Manage button collapses the row.
+    fireEvent.click(manage);
+    expect(
+      screen.queryByRole('button', { name: 'Delete gemma' }),
+    ).not.toBeInTheDocument();
+  });
+
+  it('clears a stale delete error once a later delete succeeds', async () => {
+    mockCommands(
+      libraryResponses({ delete_installed_model: new Reject('file busy') }),
+    );
+    await renderPane();
+    fireEvent.click(screen.getByRole('button', { name: 'Manage gemma' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Delete gemma' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Confirm delete' }));
+    await flush();
+    expect(screen.getByRole('alert')).toHaveTextContent('file busy');
+
+    mockCommands(
+      libraryResponses({
+        list_installed_models: [QWEN],
+        delete_installed_model: undefined,
+      }),
+    );
+    fireEvent.click(screen.getByRole('button', { name: 'Manage gemma' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Delete gemma' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Confirm delete' }));
+    await waitFor(() =>
+      expect(screen.queryByRole('alert')).not.toBeInTheDocument(),
+    );
+  });
+});
diff --git a/src/settings/tabs/models/LibraryPane.tsx b/src/settings/tabs/models/LibraryPane.tsx
new file mode 100644
index 00000000..c436d618
--- /dev/null
+++ b/src/settings/tabs/models/LibraryPane.tsx
@@ -0,0 +1,251 @@
+/**
+ * Library pane of the Models surface: the user's installed local models.
+ *
+ * Each downloaded model shows as a card with its name, capability badges
+ * (Vision / Reasoning, detected automatically), and its Hugging Face repo,
+ * quantisation, and size. The currently selected built-in model is marked
+ * Active; any other model offers a Use button that makes it the active one.
+ * A per-card Manage menu reveals an inline Delete confirm that removes the
+ * model from disk. When nothing is installed the pane invites the user over
+ * to Discover; a footer reports free disk space and the model count.
+ */
+
+import { useCallback, useEffect, useState } from 'react';
+import { invoke } from '@tauri-apps/api/core';
+
+import { useModelCapabilities } from '../../../hooks/useModelCapabilities';
+import styles from './LibraryPane.module.css';
+import type { RawAppConfig } from '../../types';
+import type { InstalledModel } from '../../../types/starter';
+
+/** Bytes rendered as decimal gigabytes with one decimal (e.g. "8.2"). */
+function gb(bytes: number): string {
+  return (bytes / 1e9).toFixed(1);
+}
+
+interface LibraryPaneProps {
+  config: RawAppConfig;
+  /** Lift a fresh config after a Use or Delete writes to disk. */
+  onSaved: (next: RawAppConfig) => void;
+  /** Navigate to the Discover view to download a new model. */
+  onAddModel: () => void;
+}
+
+export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
+  const activeModel =
+    config.inference.providers.find((p) => p.kind === 'builtin')?.model ?? '';
+
+  const [installed, setInstalled] = useState<InstalledModel[]>([]);
+  const [freeDiskBytes, setFreeDiskBytes] = useState<number | null>(null);
+  const [managing, setManaging] = useState<string | null>(null);
+  const [confirmingDelete, setConfirmingDelete] = useState<string | null>(null);
+  const [deleteError, setDeleteError] = useState<string | null>(null);
+
+  const { capabilities } = useModelCapabilities();
+
+  const refreshInstalled = useCallback(async () => {
+    try {
+      const rows = await invoke<InstalledModel[]>('list_installed_models');
+      setInstalled(Array.isArray(rows) ? rows : []);
+    } catch {
+      setInstalled([]);
+    }
+  }, []);
+
+  useEffect(() => {
+    void refreshInstalled();
+    void invoke<number | null>('get_models_dir_free_bytes')
+      .then((bytes) => {
+        setFreeDiskBytes(typeof bytes === 'number' ? bytes : null);
+      })
+      .catch(() => {
+        // Unknown free space just hides the disk line.
+      });
+  }, [refreshInstalled]);
+
+  // The backend writes the builtin provider's model field; lift the fresh
+  // snapshot so the active card moves without a tab remount.
+  function selectModel(id: string) {
+    void invoke('update_provider_field', {
+      providerId: 'builtin',
+      field: 'model',
+      value: id,
+    })
+      .then(async () => {
+        await refreshInstalled();
+        onSaved(await invoke<RawAppConfig>('get_config'));
+      })
+      .catch(() => {
+        // The focus-driven resync picks the change up on next activation.
+      });
+  }
+
+  // Deletion is refcounted server-side; the backend also clears the builtin
+  // provider's model field when the deleted model was the selected one, so
+  // the lifted snapshot is the source of truth.
+  async function handleDelete(id: string) {
+    setConfirmingDelete(null);
+    setManaging(null);
+    try {
+      await invoke('delete_installed_model', { id });
+    } catch (err) {
+      setDeleteError(String(err));
+      return;
+    }
+    setDeleteError(null);
+    await refreshInstalled();
+    try {
+      onSaved(await invoke<RawAppConfig>('get_config'));
+    } catch {
+      // The focus-driven resync picks the change up on next activation.
+    }
+  }
+
+  return (
+    <div className={styles.pane}>
+      <div className={styles.bar}>
+        <button type="button" className={styles.addButton} onClick={onAddModel}>
+          Add model
+        </button>
+      </div>
+
+      {installed.length === 0 ? (
+        <div className={styles.empty}>
+          <p className={styles.emptyText}>No models downloaded yet.</p>
+          <button
+            type="button"
+            className={styles.browseButton}
+            onClick={onAddModel}
+          >
+            Browse Discover
+          </button>
+        </div>
+      ) : (
+        <div className={styles.list}>
+          {installed.map((m) => {
+            const active = m.id === activeModel;
+            const caps = capabilities[m.id];
+            const repo = m.id.split(':')[0];
+            return (
+              <div
+                key={m.id}
+                className={`${styles.card} ${active ? styles.cardActive : ''}`}
+              >
+                <div className={styles.row}>
+                  <div className={styles.avatar}>
+                    {m.display_name.charAt(0).toUpperCase()}
+                  </div>
+                  <div className={styles.mid}>
+                    <div className={styles.name}>
+                      {m.display_name}
+                      {active ? (
+                        <span
+                          className={`${styles.badge} ${styles.badgeActive}`}
+                        >
+                          Active
+                        </span>
+                      ) : null}
+                      {caps?.vision ? (
+                        <span
+                          className={`${styles.badge} ${styles.badgeVision}`}
+                        >
+                          Vision
+                        </span>
+                      ) : null}
+                      {caps?.thinking ? (
+                        <span
+                          className={`${styles.badge} ${styles.badgeReason}`}
+                        >
+                          Reasoning
+                        </span>
+                      ) : null}
+                    </div>
+                    <div className={styles.org}>
+                      {repo}
+                      {m.quant !== '' ? ` · ${m.quant}` : ''} ·{' '}
+                      {gb(m.size_bytes)} GB
+                    </div>
+                  </div>
+                  <div className={styles.actions}>
+                    {active ? null : (
+                      <button
+                        type="button"
+                        className={styles.useButton}
+                        aria-label={`Use ${m.display_name}`}
+                        onClick={() => selectModel(m.id)}
+                      >
+                        Use
+                      </button>
+                    )}
+                    <button
+                      type="button"
+                      className={styles.manageButton}
+                      aria-label={`Manage ${m.display_name}`}
+                      onClick={() =>
+                        setManaging((cur) => (cur === m.id ? null : m.id))
+                      }
+                    >
+                      ⋮
+                    </button>
+                  </div>
+                </div>
+
+                {managing === m.id ? (
+                  <div className={styles.manageRow}>
+                    {confirmingDelete === m.id ? (
+                      <>
+                        <span className={styles.confirmText}>
+                          Delete {m.display_name}? Its files are removed from
+                          disk.
+                        </span>
+                        <button
+                          type="button"
+                          className={styles.deleteButton}
+                          aria-label="Confirm delete"
+                          onClick={() => void handleDelete(m.id)}
+                        >
+                          Delete
+                        </button>
+                        <button
+                          type="button"
+                          className={styles.ghostButton}
+                          onClick={() => setConfirmingDelete(null)}
+                        >
+                          Cancel
+                        </button>
+                      </>
+                    ) : (
+                      <button
+                        type="button"
+                        className={styles.deleteButton}
+                        aria-label={`Delete ${m.display_name}`}
+                        onClick={() => setConfirmingDelete(m.id)}
+                      >
+                        Delete
+                      </button>
+                    )}
+                  </div>
+                ) : null}
+              </div>
+            );
+          })}
+        </div>
+      )}
+
+      {deleteError !== null ? (
+        <p className={styles.error} role="alert">
+          {deleteError}
+        </p>
+      ) : null}
+
+      <div className={styles.footer}>
+        <span>
+          {freeDiskBytes !== null ? `${gb(freeDiskBytes)} GB free on disk` : ''}
+        </span>
+        <span>
+          {installed.length} models · capabilities detected automatically
+        </span>
+      </div>
+    </div>
+  );
+}
diff --git a/src/settings/tabs/models/ProvidersPane.test.tsx b/src/settings/tabs/models/ProvidersPane.test.tsx
new file mode 100644
index 00000000..a606617a
--- /dev/null
+++ b/src/settings/tabs/models/ProvidersPane.test.tsx
@@ -0,0 +1,653 @@
+import {
+  render,
+  screen,
+  fireEvent,
+  act,
+  waitFor,
+} from '@testing-library/react';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+
+import { invoke } from '@tauri-apps/api/core';
+import {
+  emitTauriEvent,
+  clearEventHandlers,
+} from '../../../testUtils/mocks/tauri';
+
+import { ProvidersPane } from './ProvidersPane';
+import type { RawAppConfig, RawProvider } from '../../types';
+
+const invokeMock = invoke as unknown as ReturnType<typeof vi.fn>;
+
+const BUILTIN: RawProvider = {
+  id: 'builtin',
+  kind: 'builtin',
+  label: 'Built-in (Thuki)',
+  base_url: '',
+  model: '',
+  vision: false,
+};
+const OLLAMA: RawProvider = {
+  id: 'ollama',
+  kind: 'ollama',
+  label: 'Ollama',
+  base_url: 'http://127.0.0.1:11434',
+  model: '',
+  vision: false,
+};
+const OPENAI: RawProvider = {
+  id: 'openai',
+  kind: 'openai',
+  label: 'LM Studio',
+  base_url: 'http://127.0.0.1:1234',
+  model: '',
+  vision: false,
+};
+
+const INSTALLED = [
+  {
+    id: 'org/Qwen3.5-9B-GGUF:Qwen3.5-9B-Q4_K_M.gguf',
+    display_name: 'Qwen3.5 9B',
+    size_bytes: 6_600_000_000,
+    quant: 'Q4_K_M',
+  },
+];
+
+function makeConfig(
+  activeProvider: string,
+  providers: RawProvider[],
+  over: Partial<RawAppConfig['inference']> = {},
+): RawAppConfig {
+  return {
+    inference: {
+      active_provider: activeProvider,
+      keep_warm_inactivity_minutes: 0,
+      num_ctx: 16384,
+      providers,
+      ...over,
+    },
+    prompt: { system: 'hello' },
+    window: {
+      overlay_width: 600,
+      max_chat_height: 648,
+      max_images: 3,
+      text_base_px: 15,
+      text_line_height: 1.5,
+      text_letter_spacing_px: 0,
+      text_font_weight: 500,
+    },
+    quote: {
+      max_display_lines: 4,
+      max_display_chars: 300,
+      max_context_length: 4096,
+    },
+    behavior: { auto_replace: false, auto_close: false },
+    search: {
+      searxng_url: '',
+      reader_url: '',
+      max_iterations: 3,
+      top_k_urls: 10,
+      searxng_max_results: 10,
+      search_timeout_s: 20,
+      reader_per_url_timeout_s: 10,
+      reader_batch_timeout_s: 30,
+      judge_timeout_s: 30,
+      router_timeout_s: 45,
+    },
+    debug: { trace_enabled: false },
+  };
+}
+
+function engineStatus(state: string) {
+  return { state, model_path: '', port: null, error: null };
+}
+
+function mockInvoke(over: Record<string, unknown> = {}) {
+  invokeMock.mockImplementation(async (cmd: string) => {
+    if (Object.prototype.hasOwnProperty.call(over, cmd)) {
+      const v = over[cmd];
+      if (v instanceof Error) throw v;
+      return v;
+    }
+    switch (cmd) {
+      case 'list_installed_models':
+        return [];
+      case 'get_engine_status':
+        return engineStatus('stopped');
+      case 'get_loaded_model':
+        return null;
+      case 'get_model_picker_state':
+        return { active: null, all: [], ollamaReachable: true };
+      default:
+        return makeConfig('ollama', [BUILTIN, OLLAMA]);
+    }
+  });
+}
+
+function renderPane(config: RawAppConfig, props: Record<string, unknown> = {}) {
+  return render(
+    <ProvidersPane
+      config={config}
+      resyncToken={0}
+      onSaved={() => {}}
+      onAddModel={() => {}}
+      {...props}
+    />,
+  );
+}
+
+beforeEach(() => {
+  vi.stubEnv('VITE_ENABLE_OPENAI_PROVIDER', 'true');
+  invokeMock.mockReset();
+  clearEventHandlers();
+  mockInvoke();
+});
+
+afterEach(() => {
+  vi.useRealTimers();
+  clearEventHandlers();
+});
+
+describe('ProvidersPane active hero', () => {
+  it('shows the active Ollama provider in the hero', async () => {
+    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
+    expect(screen.getByText('Active provider')).toBeInTheDocument();
+    expect(screen.getByText('Ollama')).toBeInTheDocument();
+    expect(screen.getByText('http://127.0.0.1:11434')).toBeInTheDocument();
+    expect(screen.getByText('Active')).toBeInTheDocument();
+  });
+
+  it('falls back to Ollama labelling when the active id matches no provider', () => {
+    renderPane(makeConfig('ghost', [BUILTIN, OLLAMA]));
+    // The hero name falls back to "Ollama" and the subtitle to the generic copy.
+    expect(screen.getAllByText('Ollama').length).toBeGreaterThan(0);
+    expect(screen.getByText('Local or remote Ollama')).toBeInTheDocument();
+  });
+
+  it('lists installed models in the built-in hero and commits a pick', async () => {
+    const builtin = { ...BUILTIN, model: INSTALLED[0].id };
+    mockInvoke({ list_installed_models: INSTALLED });
+    const onSaved = vi.fn();
+    renderPane(makeConfig('builtin', [builtin, OLLAMA]), { onSaved });
+    const select = await screen.findByRole('combobox', {
+      name: 'Built-in model',
+    });
+    expect(select).toHaveValue(INSTALLED[0].id);
+    fireEvent.change(select, { target: { value: INSTALLED[0].id } });
+    await waitFor(() =>
+      expect(invokeMock).toHaveBeenCalledWith('update_provider_field', {
+        providerId: 'builtin',
+        field: 'model',
+        value: INSTALLED[0].id,
+      }),
+    );
+  });
+
+  it('shows a Choose-a-model option when the built-in model is not installed', async () => {
+    mockInvoke({ list_installed_models: INSTALLED });
+    renderPane(makeConfig('builtin', [{ ...BUILTIN, model: 'gone' }, OLLAMA]));
+    const select = await screen.findByRole('combobox', {
+      name: 'Built-in model',
+    });
+    expect(select).toHaveValue('');
+    expect(screen.getByText('Choose a model')).toBeInTheDocument();
+  });
+
+  it('offers a Discover link when no built-in models are installed', async () => {
+    const onAddModel = vi.fn();
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]), { onAddModel });
+    const link = await screen.findByRole('button', {
+      name: /Download a model in Discover/,
+    });
+    fireEvent.click(link);
+    expect(onAddModel).toHaveBeenCalled();
+  });
+
+  it('commit of the built-in model swallows a backend error', async () => {
+    const builtin = { ...BUILTIN, model: INSTALLED[0].id };
+    mockInvoke({
+      list_installed_models: INSTALLED,
+      update_provider_field: new Error('nope'),
+    });
+    renderPane(makeConfig('builtin', [builtin, OLLAMA]));
+    const select = await screen.findByRole('combobox', {
+      name: 'Built-in model',
+    });
+    fireEvent.change(select, { target: { value: INSTALLED[0].id } });
+    // No throw.
+    await Promise.resolve();
+  });
+
+  it('renders the Ollama endpoint field and model dropdown', async () => {
+    mockInvoke({
+      get_model_picker_state: {
+        active: 'llama3.1:8b',
+        all: ['llama3.1:8b'],
+        ollamaReachable: true,
+      },
+    });
+    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
+    expect(screen.getByRole('textbox', { name: 'Ollama URL' })).toHaveValue(
+      'http://127.0.0.1:11434',
+    );
+    const select = await screen.findByRole('combobox', {
+      name: 'Active Ollama model',
+    });
+    expect(select).toHaveValue('llama3.1:8b');
+    fireEvent.change(select, { target: { value: 'llama3.1:8b' } });
+    await waitFor(() =>
+      expect(invokeMock).toHaveBeenCalledWith('set_active_model', {
+        model: 'llama3.1:8b',
+      }),
+    );
+  });
+
+  it('shows a no-models hint when Ollama has none', () => {
+    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
+    expect(screen.getByText('No models installed')).toBeInTheDocument();
+  });
+
+  it('warns when the Ollama URL is non-local', () => {
+    renderPane(
+      makeConfig('ollama', [
+        BUILTIN,
+        { ...OLLAMA, base_url: 'http://example.com:11434' },
+      ]),
+    );
+    expect(screen.getByRole('alert')).toHaveTextContent(/non-local Ollama/);
+  });
+
+  it('commits an edited Ollama URL on blur and lifts the config', async () => {
+    const onSaved = vi.fn();
+    const nextConfig = makeConfig('ollama', [BUILTIN, OLLAMA]);
+    mockInvoke({ set_ollama_url: nextConfig });
+    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]), { onSaved });
+    const input = screen.getByRole('textbox', { name: 'Ollama URL' });
+    fireEvent.focus(input);
+    fireEvent.change(input, { target: { value: 'http://127.0.0.1:9999' } });
+    fireEvent.blur(input);
+    await waitFor(() =>
+      expect(invokeMock).toHaveBeenCalledWith('set_ollama_url', {
+        baseUrl: 'http://127.0.0.1:9999',
+      }),
+    );
+    await waitFor(() => expect(onSaved).toHaveBeenCalledWith(nextConfig));
+  });
+
+  it('does not commit the Ollama URL when it is unchanged', () => {
+    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
+    const input = screen.getByRole('textbox', { name: 'Ollama URL' });
+    fireEvent.blur(input);
+    expect(invokeMock).not.toHaveBeenCalledWith(
+      'set_ollama_url',
+      expect.anything(),
+    );
+  });
+
+  it('reverts the Ollama URL field when the commit fails', async () => {
+    mockInvoke({ set_ollama_url: new Error('bad') });
+    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
+    const input = screen.getByRole('textbox', { name: 'Ollama URL' });
+    fireEvent.change(input, { target: { value: 'http://127.0.0.1:9999' } });
+    fireEvent.blur(input);
+    await waitFor(() => expect(input).toHaveValue('http://127.0.0.1:11434'));
+  });
+
+  it('commits the Ollama URL on Enter', () => {
+    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
+    const input = screen.getByRole('textbox', { name: 'Ollama URL' });
+    fireEvent.keyDown(input, { key: 'Enter' });
+    // blur fires; unchanged -> no commit, but the keydown branch is covered.
+    fireEvent.keyDown(input, { key: 'a' });
+  });
+
+  it('renders the OpenAI card in the hero when openai is active and enabled', () => {
+    renderPane(makeConfig('openai', [BUILTIN, OLLAMA, OPENAI]));
+    expect(
+      screen.getByRole('textbox', { name: 'Provider label' }),
+    ).toBeInTheDocument();
+  });
+});
+
+describe('ProvidersPane other providers', () => {
+  it('lists non-active providers with a Switch and switches on click', async () => {
+    const onSaved = vi.fn();
+    const next = makeConfig('builtin', [BUILTIN, OLLAMA]);
+    mockInvoke({ set_active_provider: next });
+    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]), { onSaved });
+    const switches = screen.getAllByRole('button', { name: 'Switch' });
+    fireEvent.click(switches[0]);
+    await waitFor(() =>
+      expect(invokeMock).toHaveBeenCalledWith('set_active_provider', {
+        providerId: 'builtin',
+      }),
+    );
+    await waitFor(() => expect(onSaved).toHaveBeenCalledWith(next));
+  });
+
+  it('swallows a failed provider switch', async () => {
+    mockInvoke({ set_active_provider: new Error('x') });
+    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
+    fireEvent.click(screen.getAllByRole('button', { name: 'Switch' })[0]);
+    await Promise.resolve();
+  });
+
+  it('hides the openai row when the dev flag is off', () => {
+    vi.stubEnv('VITE_ENABLE_OPENAI_PROVIDER', 'false');
+    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA, OPENAI]));
+    expect(screen.queryByText('LM Studio')).toBeNull();
+  });
+
+  it('shows the add-a-provider affordance when enabled and no openai exists', () => {
+    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
+    expect(
+      screen.getByRole('button', { name: /Add OpenAI-compatible server/ }),
+    ).toBeInTheDocument();
+  });
+
+  it('shows the openai row in others when enabled, present, and not active', () => {
+    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA, OPENAI]));
+    expect(screen.getByText('LM Studio')).toBeInTheDocument();
+  });
+});
+
+describe('ProvidersPane generation', () => {
+  it('commits a context-window change on mouse up', () => {
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+    const slider = screen.getByRole('slider', {
+      name: 'Context window tokens',
+    });
+    fireEvent.change(slider, { target: { value: '800' } });
+    fireEvent.mouseUp(slider);
+    expect(screen.getByText(/tokens ·/)).toBeInTheDocument();
+  });
+
+  it('commits a context-window change via touch and keyboard', () => {
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+    const slider = screen.getByRole('slider', {
+      name: 'Context window tokens',
+    });
+    fireEvent.change(slider, { target: { value: '600' } });
+    fireEvent.touchEnd(slider);
+    fireEvent.keyUp(slider);
+  });
+
+  it('clamps the keep-warm minutes and handles non-numeric input', () => {
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+    const input = screen.getByRole('spinbutton', {
+      name: 'Release after N minutes',
+    });
+    fireEvent.focus(input);
+    fireEvent.change(input, { target: { value: '5000' } });
+    expect(input).toHaveValue(1440);
+    fireEvent.change(input, { target: { value: 'abc' } });
+    fireEvent.blur(input);
+    expect(input).toHaveValue(0);
+  });
+
+  it('shows the built-in engine state and gates Unload until loaded', () => {
+    mockInvoke({ get_engine_status: engineStatus('loaded') });
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+    return waitFor(() => {
+      expect(screen.getByText('Engine: loaded')).toBeInTheDocument();
+      expect(screen.getByRole('button', { name: 'Unload now' })).toBeEnabled();
+    });
+  });
+
+  it('disables Unload while the built-in engine is stopped', () => {
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+    expect(screen.getByRole('button', { name: 'Unload now' })).toBeDisabled();
+  });
+
+  it('ejects the model on Unload click when loaded', async () => {
+    mockInvoke({ get_engine_status: engineStatus('loaded') });
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+    await waitFor(() =>
+      expect(screen.getByRole('button', { name: 'Unload now' })).toBeEnabled(),
+    );
+    fireEvent.click(screen.getByRole('button', { name: 'Unload now' }));
+    await waitFor(() => expect(invokeMock).toHaveBeenCalledWith('evict_model'));
+  });
+
+  it('swallows a failed eject', async () => {
+    mockInvoke({
+      get_engine_status: engineStatus('loaded'),
+      evict_model: new Error('no'),
+    });
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+    await waitFor(() =>
+      expect(screen.getByRole('button', { name: 'Unload now' })).toBeEnabled(),
+    );
+    fireEvent.click(screen.getByRole('button', { name: 'Unload now' }));
+    await Promise.resolve();
+  });
+
+  it('shows the Ollama VRAM model line when one is loaded', async () => {
+    mockInvoke({ get_loaded_model: 'llama3.1:8b' });
+    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
+    await waitFor(() =>
+      expect(screen.getByText('llama3.1:8b in VRAM')).toBeInTheDocument(),
+    );
+  });
+
+  it('shows no-model-loaded for Ollama when nothing is resident', () => {
+    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
+    expect(screen.getByText('No model loaded')).toBeInTheDocument();
+  });
+
+  it('reflects warmup load + evict events', async () => {
+    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
+    // Let the mount-time get_loaded_model settle so the event is not clobbered.
+    await act(async () => {
+      await Promise.resolve();
+    });
+    await act(async () => {
+      emitTauriEvent('warmup:model-loaded', 'phi4');
+    });
+    expect(screen.getByText('phi4 in VRAM')).toBeInTheDocument();
+    await act(async () => {
+      emitTauriEvent('warmup:model-evicted', null);
+    });
+    expect(screen.getByText('No model loaded')).toBeInTheDocument();
+  });
+
+  it('opens and closes the system prompt editor', () => {
+    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
+    fireEvent.click(screen.getByRole('button', { name: /Edit/ }));
+    expect(
+      screen.getByRole('textbox', { name: 'System prompt' }),
+    ).toBeInTheDocument();
+    fireEvent.click(screen.getByRole('button', { name: 'Done' }));
+    expect(screen.queryByRole('textbox', { name: 'System prompt' })).toBeNull();
+  });
+
+  it('opens the diagnostics section with the trace toggle', () => {
+    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
+    fireEvent.click(screen.getByRole('button', { name: /Diagnostics/ }));
+    expect(
+      screen.getByRole('switch', { name: 'Enable trace recording' }),
+    ).toBeInTheDocument();
+  });
+});
+
+describe('ProvidersPane robustness', () => {
+  it('treats a non-array installed payload as empty', async () => {
+    mockInvoke({ list_installed_models: null });
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+    expect(
+      await screen.findByRole('button', {
+        name: /Download a model in Discover/,
+      }),
+    ).toBeInTheDocument();
+  });
+
+  it('survives failed installed/engine/loaded reads', async () => {
+    mockInvoke({
+      list_installed_models: new Error('a'),
+      get_engine_status: new Error('b'),
+      get_loaded_model: new Error('c'),
+    });
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+    await waitFor(() =>
+      expect(screen.getByText('Active provider')).toBeInTheDocument(),
+    );
+  });
+
+  it('re-seeds local state on a resync token bump (unfocused)', () => {
+    const { rerender } = renderPane(
+      makeConfig('ollama', [BUILTIN, OLLAMA], {
+        keep_warm_inactivity_minutes: 0,
+        num_ctx: 16384,
+      }),
+    );
+    rerender(
+      <ProvidersPane
+        config={makeConfig('ollama', [BUILTIN, OLLAMA], {
+          keep_warm_inactivity_minutes: 30,
+          num_ctx: 32768,
+        })}
+        resyncToken={1}
+        onSaved={() => {}}
+        onAddModel={() => {}}
+      />,
+    );
+    expect(
+      screen.getByRole('spinbutton', { name: 'Release after N minutes' }),
+    ).toHaveValue(30);
+  });
+
+  it('keeps focused fields unchanged across a resync', () => {
+    const { rerender } = renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
+    const min = screen.getByRole('spinbutton', {
+      name: 'Release after N minutes',
+    });
+    fireEvent.focus(min);
+    const url = screen.getByRole('textbox', { name: 'Ollama URL' });
+    fireEvent.focus(url);
+    rerender(
+      <ProvidersPane
+        config={makeConfig('ollama', [BUILTIN, OLLAMA], {
+          keep_warm_inactivity_minutes: 99,
+        })}
+        resyncToken={2}
+        onSaved={() => {}}
+        onAddModel={() => {}}
+      />,
+    );
+    // Focused fields are not clobbered: still the original values.
+    expect(min).toHaveValue(0);
+    expect(url).toHaveValue('http://127.0.0.1:11434');
+  });
+
+  it('shows the built-in active size in the footnote', async () => {
+    const builtin = { ...BUILTIN, model: INSTALLED[0].id };
+    mockInvoke({ list_installed_models: INSTALLED });
+    renderPane(makeConfig('builtin', [builtin, OLLAMA]));
+    await waitFor(() =>
+      expect(screen.getByText(/built-in active: 6.6 GB/)).toBeInTheDocument(),
+    );
+  });
+
+  it('pluralises the installed count in the footnote', () => {
+    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
+    expect(screen.getByText(/0 installed models/)).toBeInTheDocument();
+  });
+
+  it('singularises one installed model', async () => {
+    mockInvoke({ list_installed_models: INSTALLED });
+    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
+    await waitFor(() =>
+      expect(screen.getByText(/1 installed model/)).toBeInTheDocument(),
+    );
+  });
+
+  it('reflects the engine:status event stream for the built-in engine', async () => {
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+    await act(async () => {
+      await Promise.resolve();
+    });
+    await act(async () => {
+      emitTauriEvent('engine:status', engineStatus('loaded'));
+    });
+    expect(screen.getByText('Engine: loaded')).toBeInTheDocument();
+  });
+
+  it('falls back to the first Ollama model when the active one is not listed', async () => {
+    mockInvoke({
+      get_model_picker_state: {
+        active: 'not-installed',
+        all: ['m1', 'm2'],
+        ollamaReachable: true,
+      },
+    });
+    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
+    const select = await screen.findByRole('combobox', {
+      name: 'Active Ollama model',
+    });
+    expect(select).toHaveValue('m1');
+  });
+
+  it('uses generic subtitles when provider URLs are empty', () => {
+    renderPane(
+      makeConfig('builtin', [
+        BUILTIN,
+        { ...OLLAMA, base_url: '' },
+        { ...OPENAI, base_url: '' },
+      ]),
+    );
+    expect(screen.getByText('Local or remote Ollama')).toBeInTheDocument();
+    expect(screen.getByText('OpenAI-compatible server')).toBeInTheDocument();
+  });
+
+  it('tolerates a config with no built-in provider', () => {
+    renderPane(makeConfig('ollama', [OLLAMA]));
+    expect(screen.getByText(/0 installed models/)).toBeInTheDocument();
+  });
+
+  it('renders no openai card in the hero when the dev flag is off', () => {
+    vi.stubEnv('VITE_ENABLE_OPENAI_PROVIDER', 'false');
+    renderPane(makeConfig('openai', [BUILTIN, OLLAMA, OPENAI]));
+    // The hero shows the openai provider name but not its editable card.
+    expect(screen.getByText('LM Studio')).toBeInTheDocument();
+    expect(
+      screen.queryByRole('textbox', { name: 'Provider label' }),
+    ).toBeNull();
+  });
+
+  it('does not commit a context change on keyup while still dragging', () => {
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+    const slider = screen.getByRole('slider', {
+      name: 'Context window tokens',
+    });
+    fireEvent.change(slider, { target: { value: '700' } });
+    // dragging is still true (no mouse/touch up), so keyup must not commit.
+    fireEvent.keyUp(slider);
+  });
+
+  it('keeps a valid keep-warm value untouched on blur', () => {
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+    const input = screen.getByRole('spinbutton', {
+      name: 'Release after N minutes',
+    });
+    fireEvent.focus(input);
+    fireEvent.change(input, { target: { value: '60' } });
+    fireEvent.blur(input);
+    expect(input).toHaveValue(60);
+  });
+
+  it('renders a built-in model option without a quant suffix', async () => {
+    const noQuant = { ...INSTALLED[0], quant: '' };
+    mockInvoke({ list_installed_models: [noQuant] });
+    renderPane(
+      makeConfig('builtin', [{ ...BUILTIN, model: noQuant.id }, OLLAMA]),
+    );
+    const select = await screen.findByRole('combobox', {
+      name: 'Built-in model',
+    });
+    expect(select).toHaveValue(noQuant.id);
+  });
+
+  it('handles a config with no Ollama provider', () => {
+    renderPane(makeConfig('builtin', [BUILTIN]));
+    expect(screen.getByText('Active provider')).toBeInTheDocument();
+  });
+});
diff --git a/src/settings/tabs/models/ProvidersPane.tsx b/src/settings/tabs/models/ProvidersPane.tsx
new file mode 100644
index 00000000..3a692749
--- /dev/null
+++ b/src/settings/tabs/models/ProvidersPane.tsx
@@ -0,0 +1,629 @@
+/**
+ * Providers pane (the "Active Hero" layout).
+ *
+ * Whichever provider is active occupies a prominent hero block at the top
+ * (its name, a one-line description, an Active marker, and a Model row that
+ * lets you pick the model that provider answers with). The remaining
+ * providers are compact rows under "Other providers", each with a Switch.
+ *
+ * Below the provider list sits the shared "Generation" section: the context
+ * window, keep-warm timer, and system prompt are GLOBAL settings that apply
+ * to whichever provider is active, so they live in their own section rather
+ * than inside any one provider card.
+ *
+ * Model downloads live in the Discover pane and per-model deletion lives in
+ * Library; this pane only selects the active provider and its model.
+ */
+
+import { useEffect, useRef, useState } from 'react';
+import { invoke } from '@tauri-apps/api/core';
+import { listen } from '@tauri-apps/api/event';
+
+import { Textarea, Toggle } from '../../components';
+import { SaveField } from '../../components/SaveField';
+import { OpenAiProviderCard, AddOpenAiProvider } from '../ProviderCards';
+import { useDebouncedSave } from '../../hooks/useDebouncedSave';
+import { useModelSelection } from '../../../hooks/useModelSelection';
+import { isNonLocalUrl } from '../../../utils/isNonLocalUrl';
+import { configHelp } from '../../configHelpers';
+import { Tooltip } from '../../../components/Tooltip';
+import styles from '../../../styles/settings.module.css';
+import type { RawAppConfig, RawProvider } from '../../types';
+import type { EngineStatus, InstalledModel } from '../../../types/starter';
+
+interface ProvidersPaneProps {
+  config: RawAppConfig;
+  resyncToken: number;
+  onSaved: (next: RawAppConfig) => void;
+  /** Navigate to the Discover view (used by the no-model-installed hint). */
+  onAddModel: () => void;
+}
+
+const PROMPT_MAX_CHARS = 32000;
+const PROMPT_TEXTAREA_ROWS = 12;
+const TOKENS_PER_TURN_ESTIMATE = 400;
+
+const KEEP_WARM_TOOLTIP =
+  'Keep Warm holds your active model resident in memory after each use, ' +
+  'for both the built-in engine and Ollama. ' +
+  'The timer sets how long before it auto-releases; use -1 to keep it indefinitely. ' +
+  'Unload now releases it immediately. ' +
+  'If set to 0, each provider uses its natural short default (about 5 minutes).';
+
+// Log-scale context window slider: slider pos [0..1000] maps to a token count.
+const CTX_MIN = 2048;
+const CTX_MAX = 1_048_576;
+const CTX_LOG_RATIO = Math.log(CTX_MAX / CTX_MIN);
+
+function ctxToPos(v: number): number {
+  return Math.round((1000 * Math.log(v / CTX_MIN)) / CTX_LOG_RATIO);
+}
+function posToCtx(pos: number): number {
+  return (
+    Math.round((CTX_MIN * Math.pow(CTX_MAX / CTX_MIN, pos / 1000)) / 1024) *
+    1024
+  );
+}
+const CTX_TICKS = ['2K', '8K', '32K', '128K', '512K', '1M'];
+
+/** Bytes rendered as decimal gigabytes with one decimal. */
+function gb(bytes: number): string {
+  return (bytes / 1e9).toFixed(1);
+}
+
+/** One-line description shown under a provider's name. */
+function providerSubtitle(p: RawProvider): string {
+  if (p.kind === 'builtin') return "Thuki's bundled llama.cpp engine";
+  if (p.kind === 'ollama') return p.base_url || 'Local or remote Ollama';
+  return p.base_url || 'OpenAI-compatible server';
+}
+
+export function ProvidersPane({
+  config,
+  resyncToken,
+  onSaved,
+  onAddModel,
+}: ProvidersPaneProps) {
+  const providers = config.inference.providers;
+  const activeId = config.inference.active_provider;
+  const activeProvider = providers.find((p) => p.id === activeId);
+  const activeKind = activeProvider?.kind ?? 'ollama';
+  const builtinProvider = providers.find((p) => p.kind === 'builtin');
+  const openaiProvider = providers.find((p) => p.kind === 'openai');
+
+  // The OpenAI-compatible provider kind is gated behind a compile-time,
+  // dev-only env flag, off by default and tree-shaken from shipped builds.
+  const openaiProviderEnabled =
+    import.meta.env.VITE_ENABLE_OPENAI_PROVIDER === 'true';
+
+  // Installed models drive the built-in hero's model picker; refreshed when
+  // the selected built-in model id changes (a switch lifts a new config).
+  const [installed, setInstalled] = useState<InstalledModel[]>([]);
+  const builtinModelId = builtinProvider?.model ?? '';
+  useEffect(() => {
+    void invoke<InstalledModel[]>('list_installed_models')
+      .then((rows) => setInstalled(Array.isArray(rows) ? rows : []))
+      .catch(() => setInstalled([]));
+  }, [builtinModelId]);
+
+  // Engine lifecycle + Ollama VRAM residency for the keep-warm status line.
+  const [engineState, setEngineState] =
+    useState<EngineStatus['state']>('stopped');
+  const [loadedModel, setLoadedModel] = useState<string | null>(null);
+  useEffect(() => {
+    invoke<EngineStatus>('get_engine_status')
+      .then((s) => setEngineState(s.state))
+      .catch(() => {});
+    invoke<string | null>('get_loaded_model')
+      .then(setLoadedModel)
+      .catch(() => {});
+    const unlistenStatus = listen<EngineStatus>('engine:status', (e) =>
+      setEngineState(e.payload.state),
+    );
+    const unlistenLoaded = listen<string>('warmup:model-loaded', (e) =>
+      setLoadedModel(e.payload),
+    );
+    const unlistenEvicted = listen<null>('warmup:model-evicted', () =>
+      setLoadedModel(null),
+    );
+    return () => {
+      void unlistenStatus.then((fn) => fn());
+      void unlistenLoaded.then((fn) => fn());
+      void unlistenEvicted.then((fn) => fn());
+    };
+  }, []);
+
+  // Keep-warm minutes (debounced save).
+  const [inactivityMin, setInactivityMin] = useState(
+    config.inference.keep_warm_inactivity_minutes,
+  );
+  const [rawMin, setRawMin] = useState(
+    String(config.inference.keep_warm_inactivity_minutes),
+  );
+  const minFocusedRef = useRef(false);
+  const { resetTo: resetMin } = useDebouncedSave(
+    'inference',
+    'keep_warm_inactivity_minutes',
+    inactivityMin,
+    { onSaved },
+  );
+
+  // Context window (debounced save); local slider pos updates live on drag.
+  const [numCtx, setNumCtx] = useState(config.inference.num_ctx);
+  const [ctxPos, setCtxPos] = useState(() =>
+    ctxToPos(config.inference.num_ctx),
+  );
+  const [ctxChip, setCtxChip] = useState(String(config.inference.num_ctx));
+  const ctxDraggingRef = useRef(false);
+  const { resetTo: resetNumCtx } = useDebouncedSave(
+    'inference',
+    'num_ctx',
+    numCtx,
+    { onSaved },
+  );
+
+  // Ollama URL (committed on blur / Enter via the dedicated command).
+  const ollamaBaseUrl =
+    providers.find((p) => p.kind === 'ollama')?.base_url ?? '';
+  const [ollamaUrl, setOllamaUrl] = useState(ollamaBaseUrl);
+  const ollamaUrlFocusedRef = useRef(false);
+
+  const [promptOpen, setPromptOpen] = useState(false);
+  const [devOpen, setDevOpen] = useState(false);
+
+  const { activeModel, availableModels, setActiveModel } = useModelSelection();
+
+  // Re-seed local editable state from a resync without scheduling saves.
+  const prevTokenRef = useRef(resyncToken);
+  if (prevTokenRef.current !== resyncToken) {
+    prevTokenRef.current = resyncToken;
+    if (!minFocusedRef.current) {
+      setInactivityMin(config.inference.keep_warm_inactivity_minutes);
+      setRawMin(String(config.inference.keep_warm_inactivity_minutes));
+      resetMin(config.inference.keep_warm_inactivity_minutes);
+    }
+    const nextCtx = config.inference.num_ctx;
+    setNumCtx(nextCtx);
+    setCtxPos(ctxToPos(nextCtx));
+    setCtxChip(String(nextCtx));
+    resetNumCtx(nextCtx);
+    if (!ollamaUrlFocusedRef.current) setOllamaUrl(ollamaBaseUrl);
+  }
+
+  function commitCtx(v: number) {
+    setNumCtx(v);
+    setCtxPos(ctxToPos(v));
+    setCtxChip(String(v));
+  }
+
+  function commitOllamaUrl() {
+    const next = ollamaUrl.trim();
+    if (next === ollamaBaseUrl) return;
+    void invoke<RawAppConfig>('set_ollama_url', { baseUrl: next })
+      .then((cfg) => onSaved(cfg))
+      .catch(() => setOllamaUrl(ollamaBaseUrl));
+  }
+
+  function selectProvider(id: string) {
+    void invoke<RawAppConfig>('set_active_provider', { providerId: id })
+      .then((cfg) => onSaved(cfg))
+      .catch(() => {});
+  }
+
+  function commitBuiltinModel(id: string) {
+    void invoke<RawAppConfig>('update_provider_field', {
+      providerId: 'builtin',
+      field: 'model',
+      value: id,
+    })
+      .then(onSaved)
+      .catch(() => {});
+  }
+
+  function handleEngineEject() {
+    void invoke('evict_model').catch(() => {});
+  }
+
+  const ctxTurns = Math.round(numCtx / TOKENS_PER_TURN_ESTIMATE);
+  const fillPct = `${ctxPos / 10}%`;
+
+  // The active Ollama model value, constrained to the installed list.
+  const ollamaModelValue =
+    activeModel && availableModels.includes(activeModel)
+      ? activeModel
+      : (availableModels[0] ?? '');
+  const builtinModelValue = installed.some((m) => m.id === builtinModelId)
+    ? builtinModelId
+    : '';
+  const activeBuiltin = installed.find((m) => m.id === builtinModelValue);
+
+  // Providers other than the active one, in a stable order.
+  const otherProviders = providers.filter((p) => p.id !== activeId);
+
+  return (
+    <>
+      <div className={styles.shead}>Active provider</div>
+      <div className={styles.hero}>
+        <div className={styles.heroHead}>
+          <div>
+            <div className={styles.heroName}>
+              {activeProvider?.label ?? 'Ollama'}
+            </div>
+            <div className={styles.heroSub}>
+              {activeProvider
+                ? providerSubtitle(activeProvider)
+                : 'Local or remote Ollama'}
+            </div>
+          </div>
+          <span className={styles.heroActive}>
+            <span className={styles.heroLiveDot} aria-hidden />
+            Active
+          </span>
+        </div>
+
+        {activeKind === 'builtin' ? (
+          <div className={styles.heroModel}>
+            <span className={styles.heroModelLabel}>Model</span>
+            {installed.length > 0 ? (
+              <select
+                className={styles.dropdown}
+                aria-label="Built-in model"
+                value={builtinModelValue}
+                onChange={(e) => commitBuiltinModel(e.target.value)}
+              >
+                {builtinModelValue === '' ? (
+                  <option value="" disabled>
+                    Choose a model
+                  </option>
+                ) : null}
+                {installed.map((m) => (
+                  <option key={m.id} value={m.id}>
+                    {m.display_name}
+                    {m.quant !== '' ? ` · ${m.quant}` : ''}
+                  </option>
+                ))}
+              </select>
+            ) : (
+              <button
+                type="button"
+                className={styles.heroModelLink}
+                onClick={onAddModel}
+              >
+                Download a model in Discover ›
+              </button>
+            )}
+          </div>
+        ) : null}
+
+        {activeKind === 'ollama' ? (
+          <>
+            <div className={styles.heroModel}>
+              <span className={styles.heroModelLabel}>Endpoint</span>
+              <input
+                type="text"
+                className={styles.input}
+                value={ollamaUrl}
+                aria-label="Ollama URL"
+                spellCheck={false}
+                autoComplete="off"
+                autoCorrect="off"
+                autoCapitalize="off"
+                placeholder="http://127.0.0.1:11434"
+                onFocus={() => {
+                  ollamaUrlFocusedRef.current = true;
+                }}
+                onChange={(e) => setOllamaUrl(e.target.value)}
+                onBlur={() => {
+                  ollamaUrlFocusedRef.current = false;
+                  commitOllamaUrl();
+                }}
+                onKeyDown={(e) => {
+                  if (e.key === 'Enter') (e.target as HTMLInputElement).blur();
+                }}
+              />
+            </div>
+            {isNonLocalUrl(ollamaUrl) ? (
+              <p className={styles.providerWarning} role="alert">
+                This points Thuki at a non-local Ollama server. You are
+                responsible for securing it: prefer a VPN/Tailscale or SSH
+                tunnel over exposing the port directly.
+              </p>
+            ) : null}
+            <div className={styles.heroModel}>
+              <span className={styles.heroModelLabel}>Model</span>
+              {availableModels.length > 0 ? (
+                <select
+                  className={styles.dropdown}
+                  aria-label="Active Ollama model"
+                  value={ollamaModelValue}
+                  onChange={(e) => void setActiveModel(e.target.value)}
+                >
+                  {availableModels.map((m) => (
+                    <option key={m} value={m}>
+                      {m}
+                    </option>
+                  ))}
+                </select>
+              ) : (
+                <span className={styles.providerHint}>No models installed</span>
+              )}
+            </div>
+          </>
+        ) : null}
+
+        {activeProvider?.kind === 'openai' && openaiProviderEnabled ? (
+          <OpenAiProviderCard
+            provider={activeProvider}
+            resyncToken={resyncToken}
+            onSaved={onSaved}
+          />
+        ) : null}
+      </div>
+
+      <div className={styles.shead}>Other providers</div>
+      <div className={styles.listcard}>
+        {otherProviders.map((p) =>
+          p.kind === 'openai' && !openaiProviderEnabled ? null : (
+            <div className={styles.providerRow} key={p.id}>
+              <span className={styles.providerRowName}>{p.label}</span>
+              <span className={styles.providerRowSub}>
+                {providerSubtitle(p)}
+              </span>
+              <span className={styles.grow} />
+              <button
+                type="button"
+                className={styles.switchBtn}
+                onClick={() => selectProvider(p.id)}
+              >
+                Switch
+              </button>
+            </div>
+          ),
+        )}
+        {openaiProviderEnabled && !openaiProvider ? (
+          <div className={styles.providerRow}>
+            <AddOpenAiProvider onSaved={onSaved} />
+          </div>
+        ) : null}
+      </div>
+
+      <div className={styles.shead}>
+        Generation
+        <span className={styles.sheadNote}>
+          {' '}
+          · applies to whichever provider is active
+        </span>
+      </div>
+      <div className={styles.listcard}>
+        {/* Context window */}
+        <div className={styles.genRow}>
+          <div className={styles.genLabel}>
+            <div className={styles.genName}>Context window</div>
+            <div className={styles.genHelp}>
+              How much conversation the model remembers
+            </div>
+          </div>
+          <div className={styles.genCtxControl}>
+            <input
+              type="range"
+              className={styles.ctxSlider}
+              style={{ '--fill': fillPct } as React.CSSProperties}
+              min={0}
+              max={1000}
+              step={1}
+              value={ctxPos}
+              aria-label="Context window tokens"
+              aria-valuemin={CTX_MIN}
+              aria-valuemax={CTX_MAX}
+              aria-valuenow={numCtx}
+              aria-valuetext={`${numCtx} tokens`}
+              onChange={(e) => {
+                ctxDraggingRef.current = true;
+                const pos = Number(e.target.value);
+                setCtxPos(pos);
+                setCtxChip(String(posToCtx(pos)));
+              }}
+              onMouseUp={() => {
+                ctxDraggingRef.current = false;
+                commitCtx(posToCtx(ctxPos));
+              }}
+              onTouchEnd={() => {
+                ctxDraggingRef.current = false;
+                commitCtx(posToCtx(ctxPos));
+              }}
+              onKeyUp={() => {
+                if (!ctxDraggingRef.current) commitCtx(posToCtx(ctxPos));
+              }}
+            />
+            <div className={styles.ctxTickRow} aria-hidden="true">
+              {CTX_TICKS.map((label, i) => (
+                <span
+                  key={label}
+                  className={styles.ctxTick}
+                  style={{ left: `${(i / (CTX_TICKS.length - 1)) * 100}%` }}
+                >
+                  {label}
+                </span>
+              ))}
+            </div>
+            <div className={styles.genCtxValue}>
+              {Number(ctxChip).toLocaleString()} tokens ·{' '}
+              {ctxTurns.toLocaleString()} turns
+            </div>
+          </div>
+        </div>
+
+        {/* Keep model warm */}
+        <div className={styles.genRow}>
+          <div className={styles.genLabel}>
+            <div className={styles.genName}>
+              Keep model warm
+              <Tooltip label={KEEP_WARM_TOOLTIP} multiline>
+                <button
+                  type="button"
+                  className={styles.infoBtn}
+                  aria-label="About Keep model warm"
+                >
+                  ?
+                </button>
+              </Tooltip>
+            </div>
+            <div className={styles.genHelp}>
+              {activeKind === 'builtin'
+                ? `Engine: ${engineState}`
+                : loadedModel !== null
+                  ? `${loadedModel} in VRAM`
+                  : 'No model loaded'}
+            </div>
+          </div>
+          <div className={styles.genWarmControl}>
+            <input
+              type="number"
+              className={styles.keepWarmNumberInput}
+              value={rawMin}
+              min={-1}
+              max={1440}
+              aria-label="Release after N minutes"
+              onFocus={() => {
+                minFocusedRef.current = true;
+              }}
+              onChange={(e) => {
+                const n = parseInt(e.target.value, 10);
+                if (Number.isNaN(n)) {
+                  setRawMin(e.target.value);
+                } else {
+                  const clamped = Math.max(-1, Math.min(1440, n));
+                  setRawMin(String(clamped));
+                  setInactivityMin(clamped);
+                }
+              }}
+              onBlur={() => {
+                minFocusedRef.current = false;
+                if (Number.isNaN(parseInt(rawMin, 10))) {
+                  setRawMin('0');
+                  setInactivityMin(0);
+                }
+              }}
+            />
+            <span className={styles.keepWarmUnit}>min</span>
+            <button
+              type="button"
+              className={styles.switchBtn}
+              aria-label="Unload now"
+              disabled={activeKind === 'builtin' && engineState !== 'loaded'}
+              onClick={handleEngineEject}
+            >
+              Unload
+            </button>
+          </div>
+        </div>
+
+        {/* System prompt */}
+        <div className={styles.genRow}>
+          <div className={styles.genLabel}>
+            <div className={styles.genName}>System prompt</div>
+            <div className={styles.genHelp}>
+              Persona sent at the start of every chat
+            </div>
+          </div>
+          <button
+            type="button"
+            className={styles.heroModelLink}
+            aria-expanded={promptOpen}
+            onClick={() => setPromptOpen((o) => !o)}
+          >
+            {promptOpen ? 'Done' : 'Edit ›'}
+          </button>
+        </div>
+        {promptOpen ? (
+          <div className={styles.genPromptEditor}>
+            <SaveField
+              section="prompt"
+              fieldKey="system"
+              label="System prompt"
+              helper={configHelp('prompt', 'system')}
+              vertical
+              initialValue={config.prompt.system}
+              resyncToken={resyncToken}
+              onSaved={onSaved}
+              render={(value, setValue) => (
+                <>
+                  <Textarea
+                    value={value}
+                    onChange={setValue}
+                    placeholder="Persona prompt…"
+                    maxLength={PROMPT_MAX_CHARS}
+                    ariaLabel="System prompt"
+                    rows={PROMPT_TEXTAREA_ROWS}
+                  />
+                  <div className={styles.charCounter}>
+                    {value.length} / {PROMPT_MAX_CHARS}
+                  </div>
+                </>
+              )}
+            />
+          </div>
+        ) : null}
+      </div>
+
+      {/* A small free-disk + count footer mirrors the other panes. */}
+      <div className={styles.genFootnote}>
+        {installed.length} installed{' '}
+        {installed.length === 1 ? 'model' : 'models'}
+        {builtinProvider && activeBuiltin
+          ? ` · built-in active: ${gb(activeBuiltin.size_bytes)} GB`
+          : ''}
+      </div>
+
+      <div className={styles.devSection}>
+        <button
+          type="button"
+          className={styles.devTrigger}
+          aria-expanded={devOpen}
+          aria-controls="dev-diagnostics"
+          onClick={() => setDevOpen((o) => !o)}
+        >
+          <span className={styles.devTriggerLabel}>Diagnostics</span>
+          <span className={styles.devTag}>DEV</span>
+          <svg
+            className={`${styles.devChevron} ${devOpen ? styles.devChevronOpen : ''}`}
+            viewBox="0 0 10 10"
+            fill="currentColor"
+            aria-hidden
+          >
+            <path
+              d="M3 2l4 3-4 3"
+              stroke="currentColor"
+              strokeWidth="1.5"
+              strokeLinecap="round"
+              strokeLinejoin="round"
+              fill="none"
+            />
+          </svg>
+        </button>
+        {devOpen && (
+          <div id="dev-diagnostics">
+            <SaveField
+              section="debug"
+              fieldKey="trace_enabled"
+              label="Trace recording"
+              helper={configHelp('debug', 'trace_enabled')}
+              initialValue={config.debug.trace_enabled}
+              resyncToken={resyncToken}
+              onSaved={onSaved}
+              tooltipPlacement="top"
+              rightAlign
+              render={(value, setValue) => (
+                <Toggle
+                  checked={value}
+                  onChange={setValue}
+                  ariaLabel="Enable trace recording"
+                />
+              )}
+            />
+          </div>
+        )}
+      </div>
+    </>
+  );
+}
diff --git a/src/settings/tabs/models/useHfSearch.test.ts b/src/settings/tabs/models/useHfSearch.test.ts
new file mode 100644
index 00000000..9809d6c9
--- /dev/null
+++ b/src/settings/tabs/models/useHfSearch.test.ts
@@ -0,0 +1,254 @@
+/**
+ * Unit tests for {@link useHfSearch}.
+ *
+ * The hook debounces the query, serializes overlapping fetches with a
+ * monotonic token, drops post-unmount resolutions, and guards the IPC
+ * payload at runtime. The tests drive the debounce with fake timers and
+ * control resolution order with externally-settled promises so the
+ * stale-token path is exercised deterministically.
+ */
+
+import { act, renderHook, waitFor } from '@testing-library/react';
+import { beforeEach, afterEach, describe, expect, it, vi } from 'vitest';
+
+import { invoke } from '@tauri-apps/api/core';
+
+import { useHfSearch, HF_SEARCH_DEBOUNCE_MS } from './useHfSearch';
+import type { HfModelSummary } from '../../../types/hf';
+
+const invokeMock = invoke as unknown as ReturnType<typeof vi.fn>;
+
+const POPULAR: HfModelSummary[] = [
+  { id: 'google/gemma-popular-GGUF', downloads: 1_000_000, gated: false },
+];
+
+const GEMMA: HfModelSummary[] = [
+  { id: 'google/gemma-4-12b-it-GGUF', downloads: 1_200_000, gated: false },
+  { id: 'unsloth/gemma-4-27b-it-GGUF', downloads: 410_000, gated: false },
+];
+
+beforeEach(() => {
+  invokeMock.mockReset();
+});
+
+afterEach(() => {
+  vi.useRealTimers();
+});
+
+/** Externally-settled promise so a test can control when invoke resolves. */
+function deferred<T>() {
+  let resolve!: (value: T) => void;
+  let reject!: (reason: unknown) => void;
+  const promise = new Promise<T>((res, rej) => {
+    resolve = res;
+    reject = rej;
+  });
+  return { promise, resolve, reject };
+}
+
+describe('useHfSearch', () => {
+  it('fetches the popular browse list on mount with an empty query', async () => {
+    invokeMock.mockResolvedValue(POPULAR);
+    const { result } = renderHook(() => useHfSearch());
+
+    await waitFor(() => expect(result.current.loading).toBe(false));
+    expect(invokeMock).toHaveBeenCalledWith('search_hf_models', { query: '' });
+    expect(result.current.results).toEqual(POPULAR);
+    expect(result.current.query).toBe('');
+  });
+
+  it('sets the query immediately but debounces the fetch', async () => {
+    vi.useFakeTimers();
+    invokeMock.mockResolvedValue(POPULAR);
+    const { result } = renderHook(() => useHfSearch());
+    // Drain the mount fetch.
+    await act(async () => {
+      await Promise.resolve();
+    });
+    invokeMock.mockClear();
+    invokeMock.mockResolvedValue(GEMMA);
+
+    act(() => result.current.setQuery('gemma'));
+    // Query is visible immediately; no fetch has fired yet.
+    expect(result.current.query).toBe('gemma');
+    expect(invokeMock).not.toHaveBeenCalled();
+
+    await act(async () => {
+      vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
+      await Promise.resolve();
+    });
+    expect(invokeMock).toHaveBeenCalledWith('search_hf_models', {
+      query: 'gemma',
+    });
+    expect(result.current.results).toEqual(GEMMA);
+  });
+
+  it('coalesces rapid input into a single fetch', async () => {
+    vi.useFakeTimers();
+    invokeMock.mockResolvedValue(POPULAR);
+    const { result } = renderHook(() => useHfSearch());
+    await act(async () => {
+      await Promise.resolve();
+    });
+    invokeMock.mockClear();
+    invokeMock.mockResolvedValue(GEMMA);
+
+    act(() => {
+      result.current.setQuery('g');
+      result.current.setQuery('ge');
+      result.current.setQuery('gem');
+    });
+    // Each keystroke restarts the timer; nothing has fired between them.
+    act(() => vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS - 1));
+    expect(invokeMock).not.toHaveBeenCalled();
+
+    await act(async () => {
+      vi.advanceTimersByTime(1);
+      await Promise.resolve();
+    });
+    expect(invokeMock).toHaveBeenCalledTimes(1);
+    expect(invokeMock).toHaveBeenCalledWith('search_hf_models', {
+      query: 'gem',
+    });
+  });
+
+  it('drops a stale response that resolves after a newer one', async () => {
+    vi.useFakeTimers();
+    const first = deferred<HfModelSummary[]>();
+    const second = deferred<HfModelSummary[]>();
+    // Mount fetch resolves immediately so the two we care about are #2 and #3.
+    invokeMock.mockResolvedValueOnce(POPULAR);
+    invokeMock.mockReturnValueOnce(first.promise);
+    invokeMock.mockReturnValueOnce(second.promise);
+    const { result } = renderHook(() => useHfSearch());
+    // Fire and drain the debounced mount fetch so it consumes POPULAR; the
+    // two requests we care about are the next two (first, second).
+    await act(async () => {
+      vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
+      await Promise.resolve();
+    });
+
+    act(() => result.current.setQuery('a'));
+    await act(async () => {
+      vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
+    });
+    act(() => result.current.setQuery('ab'));
+    await act(async () => {
+      vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
+    });
+
+    // Resolve the NEWER request first, then the older one.
+    await act(async () => {
+      second.resolve(GEMMA);
+      await Promise.resolve();
+    });
+    expect(result.current.results).toEqual(GEMMA);
+    await act(async () => {
+      first.resolve(POPULAR);
+      await Promise.resolve();
+    });
+    // The stale (older) response must not overwrite the newer result.
+    expect(result.current.results).toEqual(GEMMA);
+  });
+
+  it('drops a resolution that lands after unmount', async () => {
+    const pending = deferred<HfModelSummary[]>();
+    invokeMock.mockReturnValue(pending.promise);
+    const { result, unmount } = renderHook(() => useHfSearch());
+    expect(result.current.loading).toBe(true);
+    unmount();
+    // Resolving after unmount must not throw or update state.
+    await act(async () => {
+      pending.resolve(POPULAR);
+      await Promise.resolve();
+    });
+    // No assertion on state (unmounted); the test passes if nothing throws.
+  });
+
+  it('treats a malformed payload as an empty result', async () => {
+    invokeMock.mockResolvedValue({ not: 'an array' });
+    const { result } = renderHook(() => useHfSearch());
+    await waitFor(() => expect(result.current.loading).toBe(false));
+    expect(result.current.results).toEqual([]);
+  });
+
+  it('treats an array with a malformed item as an empty result', async () => {
+    invokeMock.mockResolvedValue([
+      { id: 'ok/repo', downloads: 1, gated: false },
+      { id: 5 },
+    ]);
+    const { result } = renderHook(() => useHfSearch());
+    await waitFor(() => expect(result.current.loading).toBe(false));
+    expect(result.current.results).toEqual([]);
+  });
+
+  it('treats an array containing a null item as an empty result', async () => {
+    invokeMock.mockResolvedValue([null]);
+    const { result } = renderHook(() => useHfSearch());
+    await waitFor(() => expect(result.current.loading).toBe(false));
+    expect(result.current.results).toEqual([]);
+  });
+
+  it('drops a stale rejection that lands after a newer success', async () => {
+    vi.useFakeTimers();
+    const first = deferred<HfModelSummary[]>();
+    const second = deferred<HfModelSummary[]>();
+    invokeMock.mockResolvedValueOnce(POPULAR);
+    invokeMock.mockReturnValueOnce(first.promise);
+    invokeMock.mockReturnValueOnce(second.promise);
+    const { result } = renderHook(() => useHfSearch());
+    await act(async () => {
+      vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
+      await Promise.resolve();
+    });
+
+    act(() => result.current.setQuery('a'));
+    await act(async () => {
+      vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
+    });
+    act(() => result.current.setQuery('ab'));
+    await act(async () => {
+      vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
+    });
+
+    // Newer request succeeds first; the older one then rejects.
+    await act(async () => {
+      second.resolve(GEMMA);
+      await Promise.resolve();
+    });
+    expect(result.current.results).toEqual(GEMMA);
+    await act(async () => {
+      first.reject(new Error('stale failure'));
+      await Promise.resolve();
+    });
+    // The stale rejection must not clear the newer result.
+    expect(result.current.results).toEqual(GEMMA);
+  });
+
+  it('falls back to an empty result when the fetch rejects', async () => {
+    invokeMock.mockRejectedValue(new Error('network down'));
+    const { result } = renderHook(() => useHfSearch());
+    await waitFor(() => expect(result.current.loading).toBe(false));
+    expect(result.current.results).toEqual([]);
+  });
+
+  it('passes a non-empty query verbatim', async () => {
+    vi.useFakeTimers();
+    invokeMock.mockResolvedValue(POPULAR);
+    const { result } = renderHook(() => useHfSearch());
+    await act(async () => {
+      await Promise.resolve();
+    });
+    invokeMock.mockClear();
+    invokeMock.mockResolvedValue(GEMMA);
+
+    act(() => result.current.setQuery('llama'));
+    await act(async () => {
+      vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
+      await Promise.resolve();
+    });
+    expect(invokeMock).toHaveBeenCalledWith('search_hf_models', {
+      query: 'llama',
+    });
+  });
+});
diff --git a/src/settings/tabs/models/useHfSearch.ts b/src/settings/tabs/models/useHfSearch.ts
new file mode 100644
index 00000000..80585d88
--- /dev/null
+++ b/src/settings/tabs/models/useHfSearch.ts
@@ -0,0 +1,117 @@
+/**
+ * Search hook for the Discover pane's Hugging Face GGUF browser.
+ *
+ * Mirrors the request-serialization discipline of `useModelSelection`: a
+ * monotonic token drops stale/out-of-order responses, a mounted flag drops
+ * post-unmount resolutions, and a runtime guard validates the IPC payload
+ * before it is trusted. On top of that, the query input is debounced so a
+ * burst of keystrokes makes one backend call, not one per keystroke.
+ *
+ * The backend command `search_hf_models` returns the most-downloaded GGUF
+ * repos for a blank query (a "browse popular" list), so the hook fetches once
+ * on mount with an empty query and again on every debounced query change.
+ */
+
+import { useCallback, useEffect, useRef, useState } from 'react';
+import { invoke } from '@tauri-apps/api/core';
+import type { HfModelSummary } from '../../../types/hf';
+
+/** Debounce window before a query change triggers a backend fetch. */
+export const HF_SEARCH_DEBOUNCE_MS = 300;
+
+/**
+ * Runtime guard for the IPC boundary. The Rust backend is trusted, but this
+ * keeps the hook robust against shape drift (schema changes, legacy builds,
+ * mocks) without pulling in a schema library. A malformed payload is treated
+ * as a transport failure and collapses to an empty result.
+ */
+function isHfModelSummaryArray(value: unknown): value is HfModelSummary[] {
+  return (
+    Array.isArray(value) &&
+    value.every((item) => {
+      if (typeof item !== 'object' || item === null) return false;
+      const candidate = item as {
+        id?: unknown;
+        downloads?: unknown;
+        gated?: unknown;
+      };
+      return (
+        typeof candidate.id === 'string' &&
+        typeof candidate.downloads === 'number' &&
+        typeof candidate.gated === 'boolean'
+      );
+    })
+  );
+}
+
+/** Shape returned by {@link useHfSearch}. */
+export interface UseHfSearchResult {
+  /** The current query text, updated synchronously on every keystroke. */
+  query: string;
+  /** Set the query. Updates immediately; the backend fetch is debounced. */
+  setQuery: (q: string) => void;
+  /** The most recent (validated) search results, or `[]` on any failure. */
+  results: HfModelSummary[];
+  /** True while a debounced fetch is in flight. */
+  loading: boolean;
+}
+
+/**
+ * React hook that drives the Discover pane's repo search. Fetches the popular
+ * browse list on mount, then re-fetches on each debounced query change.
+ *
+ * Request serialization: every fetch increments a monotonic token.
+ * Resolutions that belong to a stale token are dropped so rapid out-of-order
+ * responses cannot overwrite newer state. Resolutions that fire after unmount
+ * are also dropped.
+ */
+export function useHfSearch(): UseHfSearchResult {
+  const [query, setQuery] = useState('');
+  const [results, setResults] = useState<HfModelSummary[]>([]);
+  const [loading, setLoading] = useState(true);
+
+  const mountedRef = useRef(true);
+  const latestTokenRef = useRef(0);
+
+  useEffect(() => {
+    mountedRef.current = true;
+    return () => {
+      mountedRef.current = false;
+    };
+  }, []);
+
+  const isLatest = useCallback((token: number): boolean => {
+    return mountedRef.current && token === latestTokenRef.current;
+  }, []);
+
+  const runSearch = useCallback(
+    async (q: string): Promise<void> => {
+      latestTokenRef.current += 1;
+      const token = latestTokenRef.current;
+      setLoading(true);
+      try {
+        const payload = await invoke<unknown>('search_hf_models', { query: q });
+        if (!isLatest(token)) return;
+        setResults(isHfModelSummaryArray(payload) ? payload : []);
+      } catch {
+        if (!isLatest(token)) return;
+        setResults([]);
+      } finally {
+        if (isLatest(token)) setLoading(false);
+      }
+    },
+    [isLatest],
+  );
+
+  // Debounced fetch: a query change schedules a fetch, and any further change
+  // within the window cancels and reschedules it, so a burst of keystrokes
+  // makes a single call. The empty-query mount fetch rides the same path.
+  useEffect(() => {
+    const timer = window.setTimeout(() => {
+      void runSearch(query);
+    }, HF_SEARCH_DEBOUNCE_MS);
+    return () => window.clearTimeout(timer);
+  }, [query, runSearch]);
+
+  return { query, setQuery, results, loading };
+}
diff --git a/src/settings/tabs/tabs.test.tsx b/src/settings/tabs/tabs.test.tsx
index 2924c987..353df96e 100644
--- a/src/settings/tabs/tabs.test.tsx
+++ b/src/settings/tabs/tabs.test.tsx
@@ -19,11 +19,7 @@ import {
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 
 import { invoke } from '@tauri-apps/api/core';
-import { listen } from '@tauri-apps/api/event';
-import {
-  clearEventHandlers,
-  emitTauriEvent,
-} from '../../testUtils/mocks/tauri';
+import { clearEventHandlers } from '../../testUtils/mocks/tauri';
 
 import { ModelTab } from './ModelTab';
 import { DisplayTab } from './DisplayTab';
@@ -94,31 +90,6 @@ const CONFIG: RawAppConfig = {
   },
 };
 
-/** CONFIG with the built-in provider active (Keep Warm shows the engine-status row). */
-const BUILTIN_ACTIVE_CONFIG: RawAppConfig = {
-  ...CONFIG,
-  inference: { ...CONFIG.inference, active_provider: 'builtin' },
-};
-
-/** CONFIG plus the single OpenAI-compatible provider record. */
-const OPENAI_CONFIG: RawAppConfig = {
-  ...CONFIG,
-  inference: {
-    ...CONFIG.inference,
-    providers: [
-      ...CONFIG.inference.providers,
-      {
-        id: 'openai',
-        kind: 'openai',
-        label: 'LM Studio',
-        base_url: 'http://127.0.0.1:1234',
-        model: '',
-        vision: false,
-      },
-    ],
-  },
-};
-
 /** Full engine lifecycle payload for `engine:status` emissions. */
 function engineStatus(
   state: 'stopped' | 'starting' | 'loaded' | 'stopping' | 'failed',
@@ -167,1288 +138,63 @@ async function renderModelTab() {
   return view;
 }
 
-describe('ModelTab', () => {
-  it('renders Providers and Prompt sections with the expected labels', async () => {
-    await renderModelTab();
-    // `selector: 'div'` targets the section heading, not the same-named
-    // segmented-control tab button.
-    expect(
-      screen.getByText('Providers', { selector: 'div' }),
-    ).toBeInTheDocument();
-    expect(screen.getByText('Built-in (Thuki)')).toBeInTheDocument();
-    // Built-in is selectable (no more "upcoming version" badge); Ollama is
-    // the active provider in this config.
-    expect(
-      screen.getByRole('radio', { name: 'Use Built-in (Thuki)' }),
-    ).not.toBeChecked();
-    expect(screen.getByRole('radio', { name: 'Use Ollama' })).toBeChecked();
-    expect(screen.getByText('Prompt')).toBeInTheDocument();
-    expect(screen.getByText('Ollama URL')).toBeInTheDocument();
-    expect(screen.getByText('System prompt')).toBeInTheDocument();
-  });
-
-  it('defaults to the Providers view', async () => {
+describe('ModelTab (router)', () => {
+  it('defaults to the Providers view and renders the active provider hero', async () => {
     await renderModelTab();
     expect(screen.getByRole('tab', { name: 'Providers' })).toHaveAttribute(
       'aria-selected',
       'true',
     );
+    expect(screen.getByText('Active provider')).toBeInTheDocument();
   });
 
-  it('switches to the Discover view via the segmented control', async () => {
-    await renderModelTab();
-    fireEvent.click(screen.getByRole('tab', { name: 'Discover' }));
-    expect(
-      screen.getByText(/Browse and download Hugging Face/),
-    ).toBeInTheDocument();
-    // Providers content is unmounted while Discover is showing.
-    expect(
-      screen.queryByRole('radio', { name: 'Use Built-in (Thuki)' }),
-    ).toBeNull();
-  });
-
-  it('switches to the Library view via the segmented control', async () => {
-    await renderModelTab();
-    fireEvent.click(screen.getByRole('tab', { name: 'Library' }));
-    expect(
-      screen.getByText(/installed models will appear/),
-    ).toBeInTheDocument();
-  });
-
-  it('renders the Ollama URL field seeded from the active provider base_url', async () => {
-    await renderModelTab();
-    const input = screen.getByRole('textbox', {
-      name: 'Ollama URL',
-    }) as HTMLInputElement;
-    expect(input.value).toBe('http://127.0.0.1:11434');
-  });
-
-  it('committing a changed Ollama URL invokes set_ollama_url and lifts the config', async () => {
-    let savedUrl: unknown;
-    const onSaved = vi.fn();
-    invokeMock.mockImplementation((cmd: string, args?: unknown) => {
-      if (cmd === 'get_loaded_model') return Promise.resolve(null);
-      if (cmd === 'get_model_picker_state')
-        return Promise.resolve({
-          active: null,
-          all: [],
-          ollamaReachable: false,
-        });
-      if (cmd === 'set_ollama_url') {
-        savedUrl = (args as { baseUrl: string }).baseUrl;
-        return Promise.resolve(CONFIG);
-      }
-      return Promise.resolve(CONFIG);
-    });
-    render(<ModelTab config={CONFIG} resyncToken={0} onSaved={onSaved} />);
-    await act(async () => {
-      await Promise.resolve();
-    });
-    const input = screen.getByRole('textbox', { name: 'Ollama URL' });
-    fireEvent.focus(input);
-    fireEvent.change(input, { target: { value: 'http://10.0.0.2:11434' } });
-    fireEvent.blur(input);
-    await act(async () => {
-      await Promise.resolve();
-    });
-    expect(savedUrl).toBe('http://10.0.0.2:11434');
-    expect(onSaved).toHaveBeenCalledWith(CONFIG);
-  });
-
-  it('committing an unchanged Ollama URL does not invoke set_ollama_url', async () => {
-    await renderModelTab();
-    const input = screen.getByRole('textbox', { name: 'Ollama URL' });
-    fireEvent.focus(input);
-    fireEvent.blur(input);
-    expect(invokeMock).not.toHaveBeenCalledWith(
-      'set_ollama_url',
-      expect.anything(),
-    );
-  });
-
-  it('Enter in the Ollama URL field commits via blur', async () => {
-    invokeMock.mockImplementation((cmd: string) => {
-      if (cmd === 'get_loaded_model') return Promise.resolve(null);
-      if (cmd === 'get_model_picker_state')
-        return Promise.resolve({
-          active: null,
-          all: [],
-          ollamaReachable: false,
-        });
-      return Promise.resolve(CONFIG);
-    });
-    await renderModelTab();
-    const input = screen.getByRole('textbox', { name: 'Ollama URL' });
-    fireEvent.focus(input);
-    fireEvent.change(input, { target: { value: 'http://10.0.0.9:11434' } });
-    fireEvent.keyDown(input, { key: 'Enter' });
-    // Programmatic blur() only fires when the element is focused.
-    fireEvent.blur(input);
-    await act(async () => {
-      await Promise.resolve();
-    });
-    expect(invokeMock).toHaveBeenCalledWith('set_ollama_url', {
-      baseUrl: 'http://10.0.0.9:11434',
-    });
-  });
-
-  it('a non-Enter keydown in the Ollama URL field does not commit', async () => {
-    await renderModelTab();
-    const input = screen.getByRole('textbox', { name: 'Ollama URL' });
-    fireEvent.focus(input);
-    fireEvent.change(input, { target: { value: 'http://10.0.0.4:11434' } });
-    fireEvent.keyDown(input, { key: 'Tab' });
-    expect(invokeMock).not.toHaveBeenCalledWith(
-      'set_ollama_url',
-      expect.anything(),
-    );
-  });
-
-  it('swallows a set_ollama_url failure without crashing', async () => {
-    invokeMock.mockImplementation((cmd: string) => {
-      if (cmd === 'get_loaded_model') return Promise.resolve(null);
-      if (cmd === 'get_model_picker_state')
-        return Promise.resolve({
-          active: null,
-          all: [],
-          ollamaReachable: false,
-        });
-      if (cmd === 'set_ollama_url')
-        return Promise.reject(new Error('write failed'));
-      return Promise.resolve(CONFIG);
-    });
-    await renderModelTab();
-    const input = screen.getByRole('textbox', { name: 'Ollama URL' });
-    fireEvent.change(input, { target: { value: 'http://10.0.0.3:11434' } });
-    fireEvent.blur(input);
-    await act(async () => {
-      await Promise.resolve();
-    });
-    // Field still rendered; no throw.
-    expect(
-      screen.getByRole('textbox', { name: 'Ollama URL' }),
-    ).toBeInTheDocument();
-  });
-
-  it('shows the non-local warning for a remote URL and hides it for localhost', async () => {
-    await renderModelTab();
-    const input = screen.getByRole('textbox', { name: 'Ollama URL' });
-    expect(screen.queryByRole('alert')).not.toBeInTheDocument();
-    fireEvent.focus(input);
-    fireEvent.change(input, { target: { value: 'http://example.com:11434' } });
-    expect(screen.getByRole('alert')).toHaveTextContent(
-      /responsible for securing it/,
-    );
-    fireEvent.change(input, { target: { value: 'http://127.0.0.1:11434' } });
-    expect(screen.queryByRole('alert')).not.toBeInTheDocument();
-  });
-
-  it('renders the model dropdown with installed models and switches on change', async () => {
-    let switched: unknown;
-    invokeMock.mockImplementation((cmd: string, args?: unknown) => {
-      if (cmd === 'get_loaded_model') return Promise.resolve(null);
-      if (cmd === 'get_model_picker_state') {
-        return Promise.resolve({
-          active: 'llama3.1:8b',
-          all: ['llama3.1:8b', 'phi4:14b'],
-          ollamaReachable: true,
-        });
-      }
-      if (cmd === 'set_active_model') {
-        switched = (args as { model: string }).model;
-        return Promise.resolve(undefined);
-      }
-      return Promise.resolve(CONFIG);
-    });
-    await renderModelTab();
-    const dropdown = screen.getByRole('combobox', {
-      name: 'Active Ollama model',
-    }) as HTMLSelectElement;
-    expect(dropdown.value).toBe('llama3.1:8b');
-    fireEvent.change(dropdown, { target: { value: 'phi4:14b' } });
-    await act(async () => {
-      await Promise.resolve();
-    });
-    expect(switched).toBe('phi4:14b');
-  });
-
-  it('falls back to the first installed model when none is active', async () => {
-    invokeMock.mockImplementation((cmd: string) => {
-      if (cmd === 'get_loaded_model') return Promise.resolve(null);
-      if (cmd === 'get_model_picker_state') {
-        return Promise.resolve({
-          active: null,
-          all: ['gemma3:12b', 'phi4:14b'],
-          ollamaReachable: true,
-        });
-      }
-      return Promise.resolve(CONFIG);
-    });
-    await renderModelTab();
-    const dropdown = screen.getByRole('combobox', {
-      name: 'Active Ollama model',
-    }) as HTMLSelectElement;
-    expect(dropdown.value).toBe('gemma3:12b');
-  });
-
-  it('shows a no-models hint when the provider reports no installed models', async () => {
-    await renderModelTab();
-    expect(screen.getByText('No models installed')).toBeInTheDocument();
-    expect(
-      screen.queryByRole('combobox', { name: 'Active Ollama model' }),
-    ).not.toBeInTheDocument();
-  });
-
-  it('hides the Ollama model row entirely when the built-in provider is active', async () => {
-    // get_model_picker_state is scoped to the ACTIVE provider, so with the
-    // built-in active it returns builtin manifest ids. The Ollama card must
-    // not render that inventory (or the no-models hint) as its own.
-    invokeMock.mockImplementation((cmd: string) => {
-      if (cmd === 'get_loaded_model') return Promise.resolve(null);
-      if (cmd === 'get_model_picker_state') {
-        return Promise.resolve({
-          active: 'thuki-starter-4b',
-          all: ['thuki-starter-4b'],
-          ollamaReachable: true,
-        });
-      }
-      return Promise.resolve(BUILTIN_ACTIVE_CONFIG);
-    });
-    render(
-      <ModelTab
-        config={BUILTIN_ACTIVE_CONFIG}
-        resyncToken={0}
-        onSaved={() => {}}
-      />,
-    );
-    await act(async () => {
-      await Promise.resolve();
-    });
-    expect(
-      screen.queryByRole('combobox', { name: 'Active Ollama model' }),
-    ).not.toBeInTheDocument();
-    expect(screen.queryByText('No models installed')).not.toBeInTheDocument();
-    // The rest of the Ollama card stays.
-    expect(
-      screen.getByRole('textbox', { name: 'Ollama URL' }),
-    ).toBeInTheDocument();
-  });
-
-  it('shows an empty Ollama URL when no Ollama provider is configured', async () => {
-    const builtinOnly: RawAppConfig = {
-      ...CONFIG,
-      inference: {
-        ...CONFIG.inference,
-        providers: [CONFIG.inference.providers[0]],
-      },
-    };
-    render(
-      <ModelTab config={builtinOnly} resyncToken={0} onSaved={() => {}} />,
-    );
-    await act(async () => {
-      await Promise.resolve();
-    });
-    const input = screen.getByRole('textbox', {
-      name: 'Ollama URL',
-    }) as HTMLInputElement;
-    expect(input.value).toBe('');
-  });
-
-  it('does not overwrite the Ollama URL on resync while the field is focused', async () => {
-    const { rerender } = await renderModelTab();
-    const input = screen.getByRole('textbox', {
-      name: 'Ollama URL',
-    }) as HTMLInputElement;
-    fireEvent.focus(input);
-    fireEvent.change(input, { target: { value: 'http://typing.in/progress' } });
-    const updatedConfig: RawAppConfig = {
-      ...CONFIG,
-      inference: {
-        ...CONFIG.inference,
-        providers: [
-          CONFIG.inference.providers[0],
-          {
-            ...CONFIG.inference.providers[1],
-            base_url: 'http://10.0.0.8:11434',
-          },
-        ],
-      },
-    };
-    rerender(
-      <ModelTab config={updatedConfig} resyncToken={1} onSaved={() => {}} />,
-    );
-    expect(input.value).toBe('http://typing.in/progress');
-  });
-
-  it('resyncs the Ollama URL field when resyncToken changes', async () => {
-    const { rerender } = await renderModelTab();
-    const input = screen.getByRole('textbox', {
-      name: 'Ollama URL',
-    }) as HTMLInputElement;
-    expect(input.value).toBe('http://127.0.0.1:11434');
-    const updatedConfig: RawAppConfig = {
-      ...CONFIG,
-      inference: {
-        ...CONFIG.inference,
-        providers: [
-          CONFIG.inference.providers[0],
-          {
-            ...CONFIG.inference.providers[1],
-            base_url: 'http://10.0.0.7:11434',
-          },
-        ],
-      },
-    };
-    rerender(
-      <ModelTab config={updatedConfig} resyncToken={1} onSaved={() => {}} />,
-    );
-    expect(input.value).toBe('http://10.0.0.7:11434');
-  });
-
-  it('no longer renders the auto-replace toggle (moved to the Behavior tab)', async () => {
-    await renderModelTab();
-    expect(screen.queryByText('Text Replacement')).not.toBeInTheDocument();
-    expect(screen.queryByText('Rewrite')).not.toBeInTheDocument();
-    expect(
-      screen.queryByRole('switch', {
-        name: /Auto-replace selected text after \/rewrite or \/refine/,
-      }),
-    ).not.toBeInTheDocument();
-  });
-
-  it('renders the live char counter for the prompt textarea', async () => {
-    await renderModelTab();
-    expect(screen.getByText(/5 \/ 32000/)).toBeInTheDocument();
-  });
-
-  it('renders the prompt textarea with the configured persona text and a tall default size', async () => {
-    await renderModelTab();
-    const ta = screen.getByRole('textbox', {
-      name: 'System prompt',
-    }) as HTMLTextAreaElement;
-    expect(ta.value).toBe('hello');
-    // Default rows must be larger than the generic 4-row Textarea so the
-    // seeded built-in prompt body is visible without manual resizing.
-    expect(ta.rows).toBeGreaterThanOrEqual(8);
-  });
-
-  it('typing into the prompt textarea schedules a save with the typed text', async () => {
-    vi.useFakeTimers();
-    let savedValue: unknown = undefined;
-    invokeMock.mockImplementation((cmd: string, args?: unknown) => {
-      if (cmd === 'get_loaded_model') return Promise.resolve(null);
-      if (cmd === 'set_config_field') {
-        savedValue = (args as { value: unknown }).value;
-        return Promise.resolve(CONFIG);
-      }
-      return Promise.resolve(CONFIG);
-    });
-    render(<ModelTab config={CONFIG} resyncToken={0} onSaved={() => {}} />);
-    await act(async () => {
-      await Promise.resolve();
-    });
-    const ta = screen.getByRole('textbox', {
-      name: 'System prompt',
-    }) as HTMLTextAreaElement;
-    fireEvent.change(ta, { target: { value: 'new prompt body' } });
-    await act(async () => {
-      vi.advanceTimersByTime(300);
-      await Promise.resolve();
-    });
-    expect(savedValue).toBe('new prompt body');
-  });
-
-  it('renders the Keep Warm section with Release after input and Unload now button', async () => {
-    await renderModelTab();
-    expect(screen.getByText('Keep Warm')).toBeInTheDocument();
-    expect(screen.getByText('Keep active model in memory')).toBeInTheDocument();
-    expect(screen.getByText('Release after')).toBeInTheDocument();
-    expect(
-      screen.getByRole('button', { name: 'Unload now' }),
-    ).toBeInTheDocument();
-  });
-
-  it('Unload now button invokes evict_model', async () => {
-    invokeMock.mockImplementation((cmd: string) => {
-      if (cmd === 'get_loaded_model') return Promise.resolve('llama3.2:3b');
-      return Promise.resolve(undefined);
-    });
-    await renderModelTab();
-    fireEvent.click(screen.getByRole('button', { name: 'Unload now' }));
-    await waitFor(() => expect(invokeMock).toHaveBeenCalledWith('evict_model'));
-  });
-
-  it('Unload now button is disabled while ejecting and stays disabled after model unloads', async () => {
-    vi.useFakeTimers();
-    invokeMock.mockImplementation((cmd: string) => {
-      if (cmd === 'get_loaded_model') return Promise.resolve('llama3.2:3b');
-      return Promise.resolve(undefined);
-    });
-    await renderModelTab();
-    const btn = screen.getByRole('button', { name: 'Unload now' });
-    expect(btn).not.toBeDisabled();
-    fireEvent.click(btn);
-    expect(btn).toBeDisabled(); // disabled from ejecting state
-    // Flush microtasks so evict_model resolves, then backend emits model-evicted.
-    await act(async () => {
-      await Promise.resolve();
-    });
-    act(() => {
-      emitTauriEvent('warmup:model-evicted', null);
-    });
-    act(() => {
-      vi.advanceTimersByTime(2500); // ejecting clears
-    });
-    // Button stays disabled because loadedModel is now null.
-    expect(btn).toBeDisabled();
-  });
-
-  it('Unload now button resets immediately when evict_model rejects', async () => {
-    invokeMock.mockImplementation((cmd: string) => {
-      if (cmd === 'get_loaded_model') return Promise.resolve('llama3.2:3b');
-      if (cmd === 'evict_model')
-        return Promise.reject(new Error('connection refused'));
-      return Promise.resolve(undefined);
-    });
-    await renderModelTab();
-    const btn = screen.getByRole('button', { name: 'Unload now' });
-    expect(btn).not.toBeDisabled();
-    fireEvent.click(btn);
-    expect(btn).toBeDisabled();
-    await act(async () => {
-      await Promise.resolve();
-    });
-    // Ejecting cleared; loadedModel still set (eject failed), button re-enabled.
-    expect(btn).not.toBeDisabled();
-  });
-
-  it('Unload now button is disabled when no model is loaded in VRAM', async () => {
-    await renderModelTab();
-    expect(screen.getByRole('button', { name: 'Unload now' })).toBeDisabled();
-  });
-
-  it('Unload now button is enabled when a model is loaded in VRAM', async () => {
-    invokeMock.mockImplementation((cmd: string) => {
-      if (cmd === 'get_loaded_model') return Promise.resolve('llama3.2:3b');
-      return Promise.resolve(CONFIG);
-    });
-    await renderModelTab();
-    expect(
-      screen.getByRole('button', { name: 'Unload now' }),
-    ).not.toBeDisabled();
-  });
-
-  it('shows VRAM subtitle with model name and dot when a model is loaded', async () => {
-    invokeMock.mockImplementation((cmd: string) => {
-      if (cmd === 'get_loaded_model') return Promise.resolve('llama3.2:3b');
-      return Promise.resolve(CONFIG);
-    });
-    await renderModelTab();
-    expect(screen.getByText('llama3.2:3b')).toBeInTheDocument();
-    expect(screen.getByTestId('vram-status-dot')).toBeInTheDocument();
-  });
-
-  it('hides VRAM subtitle when no model is loaded', async () => {
-    await renderModelTab();
-    expect(screen.queryByTestId('vram-status-dot')).not.toBeInTheDocument();
-  });
-
-  it('handles get_loaded_model failure gracefully and leaves button disabled', async () => {
-    invokeMock.mockImplementation((cmd: string) => {
-      if (cmd === 'get_loaded_model')
-        return Promise.reject(new Error('network error'));
-      return Promise.resolve(CONFIG);
-    });
-    await renderModelTab();
-    expect(screen.getByRole('button', { name: 'Unload now' })).toBeDisabled();
-    expect(screen.queryByTestId('vram-status-dot')).not.toBeInTheDocument();
-  });
-
-  it('clears VRAM subtitle and keeps button disabled after successful eject', async () => {
-    vi.useFakeTimers();
-    invokeMock.mockImplementation((cmd: string) => {
-      if (cmd === 'get_loaded_model') return Promise.resolve('llama3.2:3b');
-      return Promise.resolve(undefined);
-    });
-    await renderModelTab();
-    expect(screen.getByText('llama3.2:3b')).toBeInTheDocument();
-    fireEvent.click(screen.getByRole('button', { name: 'Unload now' }));
-    // Flush microtasks so evict_model resolves, then backend emits model-evicted.
-    await act(async () => {
-      await Promise.resolve();
-    });
-    act(() => {
-      emitTauriEvent('warmup:model-evicted', null);
-    });
-    expect(screen.queryByText('llama3.2:3b')).not.toBeInTheDocument();
-    expect(screen.queryByTestId('vram-status-dot')).not.toBeInTheDocument();
-    // Button disabled: ejecting still true (timer not yet fired).
-    expect(screen.getByRole('button', { name: 'Unload now' })).toBeDisabled();
-    act(() => {
-      vi.advanceTimersByTime(2500);
-    });
-    // After timer: ejecting clears but loadedModel=null keeps button disabled.
-    expect(screen.getByRole('button', { name: 'Unload now' })).toBeDisabled();
-  });
-
-  it('changing the inactivity minutes input updates its value', async () => {
-    await renderModelTab();
-    const input = screen.getByRole('spinbutton', {
-      name: 'Release after N minutes',
-    });
-    fireEvent.change(input, { target: { value: '60' } });
-    expect((input as HTMLInputElement).value).toBe('60');
-  });
-
-  it('allows empty inactivity input mid-edit; blur defaults to 0', async () => {
-    await renderModelTab();
-    const input = screen.getByRole('spinbutton', {
-      name: 'Release after N minutes',
-    });
-    fireEvent.change(input, { target: { value: '' } });
-    expect((input as HTMLInputElement).value).toBe('');
-    fireEvent.blur(input);
-    expect((input as HTMLInputElement).value).toBe('0');
-  });
-
-  it('blur with a valid inactivity value does not reset the field', async () => {
-    await renderModelTab();
-    const input = screen.getByRole('spinbutton', {
-      name: 'Release after N minutes',
-    });
-    fireEvent.change(input, { target: { value: '60' } });
-    fireEvent.blur(input);
-    expect((input as HTMLInputElement).value).toBe('60');
-  });
-
-  it('clamps below-range inactivity input to -1 immediately', async () => {
-    await renderModelTab();
-    const input = screen.getByRole('spinbutton', {
-      name: 'Release after N minutes',
-    });
-    fireEvent.change(input, { target: { value: '-99' } });
-    expect((input as HTMLInputElement).value).toBe('-1');
-  });
-
-  it('clamps above-range inactivity input to 1440 immediately', async () => {
-    await renderModelTab();
-    const input = screen.getByRole('spinbutton', {
-      name: 'Release after N minutes',
-    });
-    fireEvent.change(input, { target: { value: '9999' } });
-    expect((input as HTMLInputElement).value).toBe('1440');
-  });
-
-  it('updates VRAM subtitle when warmup:model-loaded event fires', async () => {
-    await renderModelTab();
-    expect(screen.queryByTestId('vram-status-dot')).not.toBeInTheDocument();
-    act(() => {
-      emitTauriEvent('warmup:model-loaded', 'phi3:mini');
-    });
-    expect(screen.getByText('phi3:mini')).toBeInTheDocument();
-    expect(screen.getByTestId('vram-status-dot')).toBeInTheDocument();
-  });
-
-  it('clears VRAM subtitle when warmup:model-evicted event fires', async () => {
-    invokeMock.mockImplementation((cmd: string) => {
-      if (cmd === 'get_loaded_model') return Promise.resolve('llama3.2:3b');
-      return Promise.resolve(CONFIG);
-    });
-    await renderModelTab();
-    expect(screen.getByText('llama3.2:3b')).toBeInTheDocument();
-    act(() => {
-      emitTauriEvent('warmup:model-evicted', null);
-    });
-    expect(screen.queryByText('llama3.2:3b')).not.toBeInTheDocument();
-    expect(screen.queryByTestId('vram-status-dot')).not.toBeInTheDocument();
-  });
-
-  it('re-queries get_loaded_model when visibilitychange fires and panel is visible', async () => {
-    await renderModelTab();
-    // Initially no model loaded.
-    expect(screen.queryByTestId('vram-status-dot')).not.toBeInTheDocument();
-
-    // Switch mock: now a model is loaded in VRAM.
-    invokeMock.mockImplementation((cmd: string) => {
-      if (cmd === 'get_loaded_model') return Promise.resolve('llama3.2:3b');
-      return Promise.resolve(CONFIG);
-    });
-
-    // Simulate settings panel becoming visible (document.hidden is false in happy-dom).
-    await act(async () => {
-      document.dispatchEvent(new Event('visibilitychange'));
-      await Promise.resolve();
-    });
-
-    expect(screen.getByTestId('vram-status-dot')).toBeInTheDocument();
-    expect(screen.getByText('llama3.2:3b')).toBeInTheDocument();
-  });
-
-  it('handles get_loaded_model failure gracefully on visibilitychange', async () => {
-    invokeMock.mockImplementation((cmd: string) => {
-      if (cmd === 'get_loaded_model') return Promise.reject(new Error('fail'));
-      return Promise.resolve(CONFIG);
-    });
-    await renderModelTab();
-    // Fires visibilitychange with a rejecting get_loaded_model — covers the .catch path.
-    await act(async () => {
-      document.dispatchEvent(new Event('visibilitychange'));
-      await Promise.resolve();
-    });
-    expect(screen.getByRole('button', { name: 'Unload now' })).toBeDisabled();
-  });
-
-  it('skips get_loaded_model when visibilitychange fires while document is hidden', async () => {
+  it('switches to the Discover view', async () => {
     await renderModelTab();
-
-    invokeMock.mockClear();
-
-    Object.defineProperty(document, 'hidden', {
-      configurable: true,
-      get: () => true,
-    });
-
     await act(async () => {
-      document.dispatchEvent(new Event('visibilitychange'));
+      fireEvent.click(screen.getByRole('tab', { name: 'Discover' }));
       await Promise.resolve();
     });
-
-    Object.defineProperty(document, 'hidden', {
-      configurable: true,
-      get: () => false,
-    });
-
-    expect(invokeMock).not.toHaveBeenCalledWith('get_loaded_model');
-  });
-
-  it('resyncs inactivity minutes when resyncToken changes', async () => {
-    const { rerender } = await renderModelTab();
-    const input = screen.getByRole('spinbutton', {
-      name: 'Release after N minutes',
-    });
-    expect((input as HTMLInputElement).value).toBe('0');
-
-    const updatedConfig: RawAppConfig = {
-      ...CONFIG,
-      inference: { ...CONFIG.inference, keep_warm_inactivity_minutes: 60 },
-    };
-    rerender(
-      <ModelTab config={updatedConfig} resyncToken={1} onSaved={() => {}} />,
-    );
-    expect((input as HTMLInputElement).value).toBe('60');
-  });
-
-  it('resync does not overwrite rawMin while input is focused', async () => {
-    const { rerender } = await renderModelTab();
-    const input = screen.getByRole('spinbutton', {
-      name: 'Release after N minutes',
-    });
-    fireEvent.focus(input);
-    fireEvent.change(input, { target: { value: '' } });
-    expect((input as HTMLInputElement).value).toBe('');
-
-    const updatedConfig: RawAppConfig = {
-      ...CONFIG,
-      inference: { ...CONFIG.inference, keep_warm_inactivity_minutes: 60 },
-    };
-    rerender(
-      <ModelTab config={updatedConfig} resyncToken={1} onSaved={() => {}} />,
+    expect(screen.getByRole('tab', { name: 'Discover' })).toHaveAttribute(
+      'aria-selected',
+      'true',
     );
-    expect((input as HTMLInputElement).value).toBe('');
-  });
-
-  it('renders Context Window section with label, slider, chip, tick marks, and VRAM note', async () => {
-    await renderModelTab();
-    expect(screen.getByText('Context Window')).toBeInTheDocument();
-    expect(screen.getByText('Context window')).toBeInTheDocument();
-    expect(
-      screen.getByRole('slider', { name: 'Context window tokens' }),
-    ).toBeInTheDocument();
-    expect(
-      screen.getByRole('spinbutton', { name: 'Context window tokens' }),
-    ).toBeInTheDocument();
-    // Tick marks
-    expect(screen.getByText('8K')).toBeInTheDocument();
-    expect(screen.getByText('16K')).toBeInTheDocument();
-    expect(screen.getByText('1M')).toBeInTheDocument();
-    // VRAM note
-    expect(
-      screen.getByText(
-        /doubling the context roughly doubles its memory footprint/,
-      ),
-    ).toBeInTheDocument();
-    // Embedded button opens the tuning doc on GitHub via open_url so the
-    // link works inside the Tauri webview (target="_blank" is a no-op here).
-    const tuneButton = screen.getByRole('button', {
-      name: /how to tune Context Window/i,
-    });
-    fireEvent.click(tuneButton);
-    expect(invokeMock).toHaveBeenCalledWith('open_url', {
-      url: 'https://github.com/quiet-node/thuki/blob/main/docs/tuning-context-window.md#the-5-minute-benchmark-recipe',
-    });
-  });
-
-  it('typing a valid value in the chip and blurring commits it', async () => {
-    await renderModelTab();
-    const chip = screen.getByRole('spinbutton', {
-      name: 'Context window tokens',
-    }) as HTMLInputElement;
-    fireEvent.change(chip, { target: { value: '32768' } });
-    fireEvent.blur(chip);
-    expect(chip.value).toBe('32768');
-  });
-
-  it('typing an invalid value in the chip and blurring reverts to committed value', async () => {
-    await renderModelTab();
-    const chip = screen.getByRole('spinbutton', {
-      name: 'Context window tokens',
-    }) as HTMLInputElement;
-    fireEvent.change(chip, { target: { value: 'abc' } });
-    fireEvent.blur(chip);
-    expect(chip.value).toBe('16384');
-  });
-
-  it('typing a value below CTX_MIN and blurring reverts to committed value', async () => {
-    await renderModelTab();
-    const chip = screen.getByRole('spinbutton', {
-      name: 'Context window tokens',
-    }) as HTMLInputElement;
-    fireEvent.change(chip, { target: { value: '512' } });
-    fireEvent.blur(chip);
-    expect(chip.value).toBe('16384');
-  });
-
-  it('typing a value above CTX_MAX and blurring clamps to CTX_MAX', async () => {
-    await renderModelTab();
-    const chip = screen.getByRole('spinbutton', {
-      name: 'Context window tokens',
-    }) as HTMLInputElement;
-    fireEvent.change(chip, { target: { value: '99999999' } });
-    fireEvent.blur(chip);
-    expect(chip.value).toBe('1048576');
-  });
-
-  it('Enter key in chip commits by blurring', async () => {
-    await renderModelTab();
-    const chip = screen.getByRole('spinbutton', {
-      name: 'Context window tokens',
-    }) as HTMLInputElement;
-    fireEvent.change(chip, { target: { value: '131072' } });
-    fireEvent.keyDown(chip, { key: 'Enter' });
-    expect(chip.value).toBe('131072');
-  });
-
-  it('non-Enter keyDown in chip does not commit', async () => {
-    await renderModelTab();
-    const chip = screen.getByRole('spinbutton', {
-      name: 'Context window tokens',
-    }) as HTMLInputElement;
-    fireEvent.change(chip, { target: { value: '32768' } });
-    fireEvent.keyDown(chip, { key: 'Tab' });
-    // No blur triggered, so the chip still shows the in-progress text.
-    expect(chip.value).toBe('32768');
-  });
-
-  it('slider onChange updates chip text via posToCtx', async () => {
-    await renderModelTab();
-    const slider = screen.getByRole('slider', {
-      name: 'Context window tokens',
-    }) as HTMLInputElement;
-    // pos=556 → 2048 * 512^(556/1000) ≈ 64K (65536) with CTX_MAX=1M
-    fireEvent.change(slider, { target: { value: '556' } });
-    const chip = screen.getByRole('spinbutton', {
-      name: 'Context window tokens',
-    }) as HTMLInputElement;
-    expect(chip.value).toBe('65536');
-  });
-
-  it('slider onMouseUp commits the current slider position', async () => {
-    await renderModelTab();
-    const slider = screen.getByRole('slider', {
-      name: 'Context window tokens',
-    }) as HTMLInputElement;
-    // pos=444 → 2048 * 512^(444/1000) ≈ 32K (32768) with CTX_MAX=1M
-    fireEvent.change(slider, { target: { value: '444' } });
-    fireEvent.mouseUp(slider);
-    const chip = screen.getByRole('spinbutton', {
-      name: 'Context window tokens',
-    }) as HTMLInputElement;
-    expect(chip.value).toBe('32768');
+    // The Providers hero is unmounted while Discover is showing.
+    expect(screen.queryByText('Active provider')).toBeNull();
   });
 
-  it('slider onTouchEnd commits the current slider position', async () => {
+  it('switches to the Library view', async () => {
     await renderModelTab();
-    const slider = screen.getByRole('slider', {
-      name: 'Context window tokens',
-    }) as HTMLInputElement;
-    // pos=667 → 2048 * 512^(667/1000) ≈ 128K (131072) with CTX_MAX=1M
-    fireEvent.change(slider, { target: { value: '667' } });
-    fireEvent.touchEnd(slider);
-    const chip = screen.getByRole('spinbutton', {
-      name: 'Context window tokens',
-    }) as HTMLInputElement;
-    expect(chip.value).toBe('131072');
-  });
-
-  it('slider onKeyUp commits when not in a drag sequence', async () => {
-    await renderModelTab();
-    const slider = screen.getByRole('slider', {
-      name: 'Context window tokens',
-    }) as HTMLInputElement;
-    // No preceding onChange, so ctxDraggingRef is false → onKeyUp commits.
-    fireEvent.keyUp(slider);
-    const chip = screen.getByRole('spinbutton', {
-      name: 'Context window tokens',
-    }) as HTMLInputElement;
-    // No position change yet; committed value stays 16384.
-    expect(chip.value).toBe('16384');
-  });
-
-  it('slider onKeyUp does not commit when a drag is in progress', async () => {
-    await renderModelTab();
-    const slider = screen.getByRole('slider', {
-      name: 'Context window tokens',
-    }) as HTMLInputElement;
-    // onChange sets ctxDraggingRef to true; wrap in act so React flushes the
-    // setCtxPos/setCtxChip state updates before the keyUp fires.
-    act(() => {
-      fireEvent.change(slider, { target: { value: '556' } });
-    });
-    // onKeyUp while dragging: skips commitCtx, chip still shows intermediate.
-    fireEvent.keyUp(slider);
-    const chip = screen.getByRole('spinbutton', {
-      name: 'Context window tokens',
-    }) as HTMLInputElement;
-    // pos=556 → 64K (65536); numCtx unchanged, chip shows the intermediate value.
-    expect(chip.value).toBe('65536');
-  });
-
-  it('resyncs context window chip and slider when resyncToken changes', async () => {
-    const { rerender } = await renderModelTab();
-    const chip = screen.getByRole('spinbutton', {
-      name: 'Context window tokens',
-    }) as HTMLInputElement;
-    expect(chip.value).toBe('16384');
-
-    const updatedConfig: RawAppConfig = {
-      ...CONFIG,
-      inference: { ...CONFIG.inference, num_ctx: 65536 },
-    };
-    rerender(
-      <ModelTab config={updatedConfig} resyncToken={1} onSaved={() => {}} />,
-    );
-    expect(chip.value).toBe('65536');
-  });
-
-  it('renders the collapsed Diagnostics trigger and hides its content by default', () => {
-    render(<ModelTab config={CONFIG} resyncToken={0} onSaved={() => {}} />);
-    expect(
-      screen.getByRole('button', { name: /Diagnostics/i }),
-    ).toBeInTheDocument();
-    expect(screen.queryByText('Trace recording')).not.toBeInTheDocument();
-  });
-
-  it('expands the Diagnostics section and reveals the trace toggle when clicked', () => {
-    render(<ModelTab config={CONFIG} resyncToken={0} onSaved={() => {}} />);
-    fireEvent.click(screen.getByRole('button', { name: /Diagnostics/i }));
-    expect(screen.getByText('Trace recording')).toBeInTheDocument();
-    const toggle = screen.getByRole('switch', {
-      name: 'Enable trace recording',
-    });
-    expect(toggle).toHaveAttribute('aria-checked', 'false');
-  });
-
-  it('reflects trace_enabled=true from config when the section is expanded', () => {
-    const configOn: RawAppConfig = {
-      ...CONFIG,
-      debug: { trace_enabled: true },
-    };
-    render(<ModelTab config={configOn} resyncToken={0} onSaved={() => {}} />);
-    fireEvent.click(screen.getByRole('button', { name: /Diagnostics/i }));
-    const toggle = screen.getByRole('switch', {
-      name: 'Enable trace recording',
-    });
-    expect(toggle).toHaveAttribute('aria-checked', 'true');
-  });
-
-  // ─── Providers panel: radio selection ───────────────────────────────────
-
-  it('selecting the Built-in radio invokes set_active_provider and lifts the config', async () => {
-    const onSaved = vi.fn();
-    render(<ModelTab config={CONFIG} resyncToken={0} onSaved={onSaved} />);
     await act(async () => {
+      fireEvent.click(screen.getByRole('tab', { name: 'Library' }));
       await Promise.resolve();
     });
-    fireEvent.click(
-      screen.getByRole('radio', { name: 'Use Built-in (Thuki)' }),
+    expect(screen.getByRole('tab', { name: 'Library' })).toHaveAttribute(
+      'aria-selected',
+      'true',
     );
-    await act(async () => {
-      await Promise.resolve();
-    });
-    expect(invokeMock).toHaveBeenCalledWith('set_active_provider', {
-      providerId: 'builtin',
-    });
-    expect(onSaved).toHaveBeenCalledWith(CONFIG);
+    expect(screen.queryByText('Active provider')).toBeNull();
   });
 
-  it('falls back to the literal builtin id and label when no builtin provider is configured', async () => {
-    const noBuiltin: RawAppConfig = {
+  it('navigates to Discover from the built-in no-model hint', async () => {
+    const builtinActive: RawAppConfig = {
       ...CONFIG,
-      inference: {
-        ...CONFIG.inference,
-        providers: [CONFIG.inference.providers[1]],
-      },
+      inference: { ...CONFIG.inference, active_provider: 'builtin' },
     };
-    render(<ModelTab config={noBuiltin} resyncToken={0} onSaved={() => {}} />);
-    await act(async () => {
-      await Promise.resolve();
-    });
-    expect(screen.getByText('Built-in (Thuki)')).toBeInTheDocument();
-    fireEvent.click(
-      screen.getByRole('radio', { name: 'Use Built-in (Thuki)' }),
-    );
-    await act(async () => {
-      await Promise.resolve();
-    });
-    expect(invokeMock).toHaveBeenCalledWith('set_active_provider', {
-      providerId: 'builtin',
-    });
-  });
-
-  it('selecting the Ollama radio invokes set_active_provider with the ollama id', async () => {
-    const onSaved = vi.fn();
-    render(
-      <ModelTab
-        config={BUILTIN_ACTIVE_CONFIG}
-        resyncToken={0}
-        onSaved={onSaved}
-      />,
-    );
-    await act(async () => {
-      await Promise.resolve();
-    });
-    fireEvent.click(screen.getByRole('radio', { name: 'Use Ollama' }));
-    await act(async () => {
-      await Promise.resolve();
-    });
-    expect(invokeMock).toHaveBeenCalledWith('set_active_provider', {
-      providerId: 'ollama',
-    });
-    expect(onSaved).toHaveBeenCalledWith(CONFIG);
-  });
-
-  it('swallows a set_active_provider failure without crashing', async () => {
-    invokeMock.mockImplementation((cmd: string) => {
-      if (cmd === 'get_loaded_model') return Promise.resolve(null);
-      if (cmd === 'get_model_picker_state')
-        return Promise.resolve({
-          active: null,
-          all: [],
-          ollamaReachable: false,
-        });
-      if (cmd === 'set_active_provider')
-        return Promise.reject(new Error('write failed'));
-      return Promise.resolve(CONFIG);
-    });
-    const onSaved = vi.fn();
-    render(<ModelTab config={CONFIG} resyncToken={0} onSaved={onSaved} />);
-    await act(async () => {
-      await Promise.resolve();
-    });
-    fireEvent.click(
-      screen.getByRole('radio', { name: 'Use Built-in (Thuki)' }),
-    );
-    await act(async () => {
-      await Promise.resolve();
-    });
-    expect(onSaved).not.toHaveBeenCalled();
-    expect(
-      screen.getByRole('radio', { name: 'Use Built-in (Thuki)' }),
-    ).toBeInTheDocument();
-  });
-
-  it('renders the OpenAI-compatible card when configured and selects it via its radio', async () => {
     render(
-      <ModelTab config={OPENAI_CONFIG} resyncToken={0} onSaved={() => {}} />,
+      <ModelTab config={builtinActive} resyncToken={0} onSaved={() => {}} />,
     );
     await act(async () => {
       await Promise.resolve();
     });
-    expect(screen.getByText('LM Studio')).toBeInTheDocument();
-    expect(
-      screen.queryByRole('button', { name: 'Add OpenAI-compatible server' }),
-    ).not.toBeInTheDocument();
     fireEvent.click(
-      screen.getByRole('radio', { name: 'Use OpenAI-compatible server' }),
-    );
-    await act(async () => {
-      await Promise.resolve();
-    });
-    expect(invokeMock).toHaveBeenCalledWith('set_active_provider', {
-      providerId: 'openai',
-    });
-  });
-
-  it('hides every OpenAI-compatible affordance when the dev flag is disabled', async () => {
-    vi.stubEnv('VITE_ENABLE_OPENAI_PROVIDER', 'false');
-    // No openai provider configured: the "add a server" affordance is gone.
-    const { unmount } = render(
-      <ModelTab config={CONFIG} resyncToken={0} onSaved={() => {}} />,
-    );
-    await act(async () => {
-      await Promise.resolve();
-    });
-    expect(
-      screen.queryByRole('button', { name: 'Add OpenAI-compatible server' }),
-    ).not.toBeInTheDocument();
-    unmount();
-
-    // An openai provider hand-edited into config: its management card and
-    // radio stay hidden too (the backend still honors it).
-    render(
-      <ModelTab config={OPENAI_CONFIG} resyncToken={0} onSaved={() => {}} />,
-    );
-    await act(async () => {
-      await Promise.resolve();
-    });
-    expect(screen.queryByText('LM Studio')).not.toBeInTheDocument();
-    expect(
-      screen.queryByRole('radio', { name: 'Use OpenAI-compatible server' }),
-    ).not.toBeInTheDocument();
-  });
-
-  // ─── Keep Warm with the built-in provider active ────────────────────────
-
-  async function renderBuiltinActive(
-    onSaved: (next: RawAppConfig) => void = () => {},
-  ) {
-    const view = render(
-      <ModelTab
-        config={BUILTIN_ACTIVE_CONFIG}
-        resyncToken={0}
-        onSaved={onSaved}
-      />,
+      await screen.findByRole('button', {
+        name: /Download a model in Discover/,
+      }),
     );
-    await act(async () => {
-      await Promise.resolve();
-    });
-    return view;
-  }
-
-  it('renders the unified Keep Warm control with the engine-status row when the built-in provider is active', async () => {
-    await renderBuiltinActive();
-    // Same single Keep Warm section as Ollama, but the built-in status row
-    // reports the sidecar lifecycle instead of the VRAM slug.
-    expect(screen.getByText('Keep Warm')).toBeInTheDocument();
-    expect(screen.getByText('Keep active model in memory')).toBeInTheDocument();
-    expect(screen.queryByText('Idle Unload')).not.toBeInTheDocument();
-    expect(screen.queryByTestId('vram-status-dot')).not.toBeInTheDocument();
-    expect(screen.getByText('Engine: stopped')).toBeInTheDocument();
-    expect(screen.getByRole('button', { name: 'Unload now' })).toBeDisabled();
-  });
-
-  it('clamps the Keep Warm input to the -1..1440 range while built-in is active', async () => {
-    await renderBuiltinActive();
-    const input = screen.getByRole('spinbutton', {
-      name: 'Release after N minutes',
-    }) as HTMLInputElement;
-    fireEvent.change(input, { target: { value: '45' } });
-    expect(input.value).toBe('45');
-    fireEvent.change(input, { target: { value: '-5' } });
-    expect(input.value).toBe('-1');
-    fireEvent.change(input, { target: { value: '99999' } });
-    expect(input.value).toBe('1440');
-  });
-
-  it('resync does not overwrite the Keep Warm input while focused (built-in active)', async () => {
-    const { rerender } = await renderBuiltinActive();
-    const input = screen.getByRole('spinbutton', {
-      name: 'Release after N minutes',
-    }) as HTMLInputElement;
-    fireEvent.focus(input);
-    fireEvent.change(input, { target: { value: '25' } });
-    const updatedConfig: RawAppConfig = {
-      ...BUILTIN_ACTIVE_CONFIG,
-      inference: {
-        ...BUILTIN_ACTIVE_CONFIG.inference,
-        keep_warm_inactivity_minutes: 90,
-      },
-    };
-    rerender(
-      <ModelTab config={updatedConfig} resyncToken={1} onSaved={() => {}} />,
+    // The onAddModel callback flips the view: the Providers hero unmounts.
+    await waitFor(() =>
+      expect(screen.queryByText('Active provider')).toBeNull(),
     );
-    expect(input.value).toBe('25');
-  });
-
-  it('engine:status loaded enables Unload now and clicking invokes evict_model', async () => {
-    await renderBuiltinActive();
-    act(() => {
-      emitTauriEvent('engine:status', engineStatus('loaded'));
-    });
-    expect(screen.getByText('Engine: loaded')).toBeInTheDocument();
-    const btn = screen.getByRole('button', { name: 'Unload now' });
-    expect(btn).toBeEnabled();
-    fireEvent.click(btn);
-    await act(async () => {
-      await Promise.resolve();
-    });
-    expect(invokeMock).toHaveBeenCalledWith('evict_model');
-  });
-
-  it('swallows an evict_model failure from the engine Unload now button', async () => {
-    invokeMock.mockImplementation((cmd: string) => {
-      if (cmd === 'get_loaded_model') return Promise.resolve(null);
-      if (cmd === 'get_model_picker_state')
-        return Promise.resolve({
-          active: null,
-          all: [],
-          ollamaReachable: false,
-        });
-      if (cmd === 'evict_model')
-        return Promise.reject(new Error('no engine running'));
-      return Promise.resolve(CONFIG);
-    });
-    await renderBuiltinActive();
-    act(() => {
-      emitTauriEvent('engine:status', engineStatus('loaded'));
-    });
-    fireEvent.click(screen.getByRole('button', { name: 'Unload now' }));
-    await act(async () => {
-      await Promise.resolve();
-    });
-    // The residency line is event-driven, so a failed eviction changes nothing.
-    expect(screen.getByText('Engine: loaded')).toBeInTheDocument();
-  });
-
-  // ─── Context slider "Applying" hint ─────────────────────────────────────
-
-  it('shows the Applying hint while the engine starts or stops and hides it otherwise', async () => {
-    await renderBuiltinActive();
-    expect(screen.queryByRole('status')).not.toBeInTheDocument();
-    act(() => {
-      emitTauriEvent('engine:status', engineStatus('starting'));
-    });
-    expect(screen.getByRole('status')).toHaveTextContent(/Applying/);
-    act(() => {
-      emitTauriEvent('engine:status', engineStatus('stopping'));
-    });
-    expect(screen.getByRole('status')).toHaveTextContent(/Applying/);
-    act(() => {
-      emitTauriEvent('engine:status', engineStatus('loaded'));
-    });
-    expect(screen.queryByRole('status')).not.toBeInTheDocument();
-  });
-
-  // ─── Engine status mount seeding + listener cleanup ─────────────────────
-
-  it('seeds the residency line from get_engine_status on mount', async () => {
-    // The backend emits engine:status only on transitions; an engine that
-    // is already loaded must be reflected (and Unload now enabled) without
-    // waiting for the next event.
-    invokeMock.mockImplementation((cmd: string) => {
-      if (cmd === 'get_engine_status') {
-        return Promise.resolve(engineStatus('loaded'));
-      }
-      if (cmd === 'get_loaded_model') return Promise.resolve(null);
-      if (cmd === 'get_model_picker_state') {
-        return Promise.resolve({
-          active: null,
-          all: [],
-          ollamaReachable: false,
-        });
-      }
-      return Promise.resolve(CONFIG);
-    });
-    await renderBuiltinActive();
-    expect(screen.getByText('Engine: loaded')).toBeInTheDocument();
-    expect(screen.getByRole('button', { name: 'Unload now' })).toBeEnabled();
-  });
-
-  it('keeps the stopped default when the get_engine_status seed rejects', async () => {
-    invokeMock.mockImplementation((cmd: string) => {
-      if (cmd === 'get_engine_status') {
-        return Promise.reject(new Error('runner not managed'));
-      }
-      if (cmd === 'get_loaded_model') return Promise.resolve(null);
-      if (cmd === 'get_model_picker_state') {
-        return Promise.resolve({
-          active: null,
-          all: [],
-          ollamaReachable: false,
-        });
-      }
-      return Promise.resolve(CONFIG);
-    });
-    await renderBuiltinActive();
-    expect(screen.getByText('Engine: stopped')).toBeInTheDocument();
-    expect(screen.getByRole('button', { name: 'Unload now' })).toBeDisabled();
-  });
-
-  it('detaches every listener even when unmount races the listen promise', async () => {
-    // Regression for the leak where cleanup ran before listen() resolved
-    // and the captured unlisten was still null, leaving the handler
-    // registered forever. The promise-chained cleanup must detach all of
-    // them once the registrations resolve.
-    const listenMock = listen as unknown as ReturnType<typeof vi.fn>;
-    const original = listenMock.getMockImplementation();
-    let removed = 0;
-    listenMock.mockImplementation(async () => () => {
-      removed += 1;
-    });
-    try {
-      const before = listenMock.mock.calls.length;
-      const view = render(
-        <ModelTab
-          config={BUILTIN_ACTIVE_CONFIG}
-          resyncToken={0}
-          onSaved={() => {}}
-        />,
-      );
-      const registered = listenMock.mock.calls.length - before;
-      expect(registered).toBe(3); // engine:status + the warmup pair
-      // Unmount before the listen promises are flushed.
-      view.unmount();
-      await act(async () => {
-        await Promise.resolve();
-      });
-      expect(removed).toBe(registered);
-    } finally {
-      listenMock.mockImplementation(original!);
-    }
-  });
-
-  // ─── Context Window helper copy per provider kind ────────────────────────
-
-  it('shows the builtin ctx helper while the built-in provider is active', async () => {
-    await renderBuiltinActive();
-    expect(
-      screen.getByText(/--ctx-size at start; changing it restarts the engine/),
-    ).toBeInTheDocument();
-    expect(screen.queryByText(/Ollama caps/)).not.toBeInTheDocument();
-  });
-
-  it('shows the server-controlled ctx helper for an openai provider', async () => {
-    const cfg: RawAppConfig = {
-      ...OPENAI_CONFIG,
-      inference: { ...OPENAI_CONFIG.inference, active_provider: 'openai' },
-    };
-    render(<ModelTab config={cfg} resyncToken={0} onSaved={() => {}} />);
-    await act(async () => {
-      await Promise.resolve();
-    });
-    expect(
-      screen.getByText(
-        /Informational only; your server controls the actual context/,
-      ),
-    ).toBeInTheDocument();
-    expect(screen.queryByText(/Ollama caps/)).not.toBeInTheDocument();
-  });
-
-  it('keeps the Ollama ctx helper for the ollama provider', async () => {
-    await renderModelTab();
-    expect(
-      screen.getByText(/Ollama caps to your model's trained maximum\./),
-    ).toBeInTheDocument();
   });
 });
 
diff --git a/src/styles/settings.module.css b/src/styles/settings.module.css
index 4cd15136..d63e0cf3 100644
--- a/src/styles/settings.module.css
+++ b/src/styles/settings.module.css
@@ -332,19 +332,226 @@
   background: var(--accent);
 }
 
-/* Row holding the segmented control (and, on Library, the Add-model action). */
+/* Row holding the segmented control, centered across the Models surface. */
 .barrow {
   display: flex;
   align-items: center;
+  justify-content: center;
   margin-bottom: 18px;
 }
 
-/* Interim copy for the Library / Discover panes while they are being built. */
-.modelsPlaceholder {
-  padding: 48px 8px;
+/* ─── Providers pane (Active Hero) ───────────────────────────────────────── */
+
+.shead {
+  display: flex;
+  align-items: baseline;
+  margin: 18px 2px 9px;
+  font-size: 10.5px;
+  font-weight: 600;
+  letter-spacing: 0.06em;
+  text-transform: uppercase;
+  color: var(--t3);
+}
+.shead:first-child {
+  margin-top: 0;
+}
+.sheadNote {
+  text-transform: none;
+  letter-spacing: 0;
+  font-weight: 400;
+  color: var(--t3);
+  opacity: 0.85;
+}
+
+/* Active provider hero. */
+.hero {
+  box-sizing: border-box;
+  padding: 18px;
+  border: 1px solid rgba(255, 141, 92, 0.3);
+  border-radius: 14px;
+  background: linear-gradient(
+    180deg,
+    rgba(255, 141, 92, 0.06),
+    rgba(255, 141, 92, 0.02)
+  );
+}
+.heroHead {
+  display: flex;
+  align-items: flex-start;
+}
+.heroName {
+  font-size: 15.5px;
+  font-weight: 650;
+  color: var(--t1);
+}
+.heroSub {
+  margin-top: 4px;
+  font-size: 12px;
+  color: var(--t3);
+}
+.heroActive {
+  display: flex;
+  align-items: center;
+  gap: 6px;
+  margin-left: auto;
+  font-size: 11px;
+  font-weight: 580;
+  color: var(--accent);
+}
+.heroLiveDot {
+  width: 6px;
+  height: 6px;
+  border-radius: 50%;
+  background: var(--accent);
+  box-shadow: 0 0 7px var(--accent);
+}
+.heroModel {
+  display: flex;
+  align-items: center;
+  gap: 12px;
+  margin-top: 14px;
+  padding-top: 13px;
+  border-top: 1px solid var(--hair-soft);
+}
+.heroModel .dropdown,
+.heroModel .input {
+  flex: 1;
+  min-width: 0;
+}
+.heroModelLabel {
+  flex: none;
+  width: 64px;
+  font-size: 12.5px;
   color: var(--t2);
+}
+.heroModelLink {
+  border: none;
+  background: transparent;
+  color: var(--accent);
+  font-family: inherit;
+  font-size: 12.5px;
+  font-weight: 540;
+  cursor: pointer;
+  padding: 0;
+}
+
+/* Grouped list card (Other providers + Generation). */
+.listcard {
+  box-sizing: border-box;
+  overflow: hidden;
+  border: 1px solid var(--hair-soft);
+  border-radius: 12px;
+  background: var(--elev-1);
+}
+
+.providerRow {
+  display: flex;
+  align-items: center;
+  gap: 10px;
+  padding: 14px 16px;
+}
+.providerRow + .providerRow {
+  box-shadow: 0 -1px 0 var(--hair-soft);
+}
+.providerRowName {
   font-size: 13px;
-  text-align: center;
+  font-weight: 550;
+  color: var(--t1);
+}
+.providerRowSub {
+  font-size: 11.5px;
+  color: var(--t3);
+}
+.grow {
+  flex: 1;
+}
+.switchBtn {
+  flex: none;
+  padding: 5px 11px;
+  border: 1px solid var(--hair);
+  border-radius: 7px;
+  background: transparent;
+  color: var(--t2);
+  font-family: inherit;
+  font-size: 11.5px;
+  font-weight: 540;
+  cursor: pointer;
+  transition:
+    color 140ms ease,
+    border-color 140ms ease,
+    background 140ms ease;
+}
+.switchBtn:hover:not(:disabled) {
+  color: var(--t1);
+  border-color: rgba(255, 141, 92, 0.5);
+  background: var(--accent-soft);
+}
+.switchBtn:disabled {
+  opacity: 0.4;
+  cursor: default;
+}
+
+/* Generation rows. */
+.genRow {
+  display: flex;
+  align-items: center;
+  gap: 14px;
+  padding: 14px 16px;
+}
+.genRow + .genRow {
+  box-shadow: 0 -1px 0 var(--hair-soft);
+}
+.genLabel {
+  flex: 1;
+  min-width: 0;
+}
+.genName {
+  display: flex;
+  align-items: center;
+  gap: 6px;
+  font-size: 12.5px;
+  font-weight: 500;
+  color: var(--t1);
+}
+.genHelp {
+  margin-top: 3px;
+  font-size: 11px;
+  color: var(--t3);
+}
+.genCtxControl {
+  flex: none;
+  width: 280px;
+  display: flex;
+  flex-direction: column;
+  gap: 8px;
+}
+.genCtxControl .ctxSlider {
+  width: 100%;
+}
+.genCtxControl .ctxTickRow {
+  position: relative;
+  height: 12px;
+}
+.genCtxValue {
+  font-size: 11px;
+  color: var(--t3);
+  text-align: right;
+}
+.genWarmControl {
+  flex: none;
+  display: flex;
+  align-items: center;
+  gap: 8px;
+}
+.genPromptEditor {
+  padding: 0 16px 14px;
+  box-shadow: 0 -1px 0 var(--hair-soft);
+}
+.genFootnote {
+  margin-top: 12px;
+  padding: 0 2px;
+  font-size: 11px;
+  color: var(--t3);
 }
 
 /* ─── Body (scrolling content) ──────────────────────────────────────────── */
diff --git a/src/types/hf.ts b/src/types/hf.ts
new file mode 100644
index 00000000..addd917d
--- /dev/null
+++ b/src/types/hf.ts
@@ -0,0 +1,26 @@
+/* v8 ignore file -- type-only declarations, no runtime code */
+
+/**
+ * IPC shapes for the in-app Hugging Face GGUF model browser (the Discover
+ * pane). Mirrors the serde output of the Rust `search_hf_models` command,
+ * which serializes its `HfModelSummary` struct as snake_case.
+ */
+
+/**
+ * One repo row from `search_hf_models`. The search payload is deliberately
+ * lean: it carries only what the Discover list needs to render a row and to
+ * decide whether anonymous download is allowed.
+ *
+ * - `id` is the canonical `owner/repo` slug.
+ * - `downloads` is Hugging Face's all-time download count for the repo.
+ * - `gated` is true when the repo requires accepting terms or auth; an
+ *   anonymous download fails, so the Discover row disables "Get" for it.
+ */
+export interface HfModelSummary {
+  /** Canonical `owner/repo` slug. */
+  id: string;
+  /** All-time Hugging Face download count for the repo. */
+  downloads: number;
+  /** True when the repo is gated; anonymous downloads fail. */
+  gated: boolean;
+}

From 55db7c7f56d43e114c7705f5892da08bc8112054 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 02:17:30 -0500
Subject: [PATCH 06/89] style: reskin the standard settings tabs to the premium
 tokens

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/styles/settings.module.css | 132 ++++++++++++++++-----------------
 1 file changed, 66 insertions(+), 66 deletions(-)

diff --git a/src/styles/settings.module.css b/src/styles/settings.module.css
index d63e0cf3..4d2861a4 100644
--- a/src/styles/settings.module.css
+++ b/src/styles/settings.module.css
@@ -103,7 +103,7 @@
   border-top-color: rgba(230, 156, 5, 0.32);
   border-radius: 10px;
   font-size: 12.5px;
-  color: var(--color-text-primary);
+  color: var(--t1);
   flex-shrink: 0;
   /* Banner content (corrupt-config path, file name) must be selectable
    * so users can copy the path. The window-wide `user-select: none` on
@@ -115,7 +115,7 @@
 }
 .bannerIcon {
   flex-shrink: 0;
-  color: var(--color-tertiary);
+  color: var(--t3);
 }
 .bannerText {
   flex: 1;
@@ -605,7 +605,7 @@
   font-weight: 600;
   letter-spacing: 0.18em;
   text-transform: uppercase;
-  color: var(--color-text-secondary);
+  color: var(--t2);
   margin-bottom: 14px;
 }
 
@@ -638,7 +638,7 @@
 .rowLabel {
   font-size: 13px;
   font-weight: 500;
-  color: var(--color-text-primary);
+  color: var(--t1);
 }
 .infoBtn {
   display: inline-flex;
@@ -649,7 +649,7 @@
   border-radius: 50%;
   border: 1px solid rgba(255, 255, 255, 0.14);
   background: transparent;
-  color: var(--color-text-secondary);
+  color: var(--t2);
   font-size: 10px;
   font-weight: 600;
   font-family: inherit;
@@ -663,11 +663,11 @@
 }
 .infoBtn:hover {
   background: rgba(255, 141, 92, 0.12);
-  color: var(--color-primary);
+  color: var(--accent);
   border-color: rgba(255, 141, 92, 0.4);
 }
 .infoBtn:focus-visible {
-  outline: 2px solid var(--color-primary);
+  outline: 2px solid var(--accent);
   outline-offset: 2px;
 }
 .rowControl {
@@ -682,13 +682,13 @@
 }
 .charCounter {
   font-size: 11px;
-  color: var(--color-text-secondary);
+  color: var(--t2);
   text-align: right;
   letter-spacing: 0.02em;
 }
 .rowHelper {
   font-size: 11.5px;
-  color: var(--color-text-secondary);
+  color: var(--t2);
   line-height: 1.45;
 }
 .rowError {
@@ -706,7 +706,7 @@
   border: 1px solid rgba(255, 255, 255, 0.06);
   border-top-color: rgba(255, 141, 92, 0.1);
   border-radius: 8px;
-  color: var(--color-text-primary);
+  color: var(--t1);
   font-family: inherit;
   font-size: 13px;
   padding: 0 12px;
@@ -755,7 +755,7 @@
   border-top-color: rgba(255, 141, 92, 0.1);
   border-radius: 8px;
   padding: 0 10px 0 12px;
-  color: var(--color-text-primary);
+  color: var(--t1);
   font-family: inherit;
   font-size: 13px;
   cursor: pointer;
@@ -779,7 +779,7 @@
 
 .button {
   background: rgba(36, 30, 26, 0.95);
-  color: var(--color-text-primary);
+  color: var(--t1);
   border: 1px solid rgba(255, 255, 255, 0.06);
   border-top-color: rgba(255, 141, 92, 0.1);
   border-radius: 8px;
@@ -813,7 +813,7 @@
   background: transparent;
   border-color: rgba(255, 255, 255, 0.06);
   border-top-color: rgba(255, 255, 255, 0.06);
-  color: var(--color-text-primary);
+  color: var(--t1);
   box-shadow: none;
 }
 .buttonGhost:hover {
@@ -882,7 +882,7 @@
 }
 .valChip {
   background: rgba(36, 30, 26, 0.95);
-  color: var(--color-text-primary);
+  color: var(--t1);
   border: 1px solid rgba(255, 255, 255, 0.06);
   border-top-color: rgba(255, 141, 92, 0.1);
   border-radius: 6px;
@@ -907,7 +907,7 @@
 }
 .ctxLabel {
   font-size: 13px;
-  color: var(--color-text-primary);
+  color: var(--t1);
 }
 .ctxChipGroup {
   display: flex;
@@ -918,7 +918,7 @@
   background: rgba(36, 30, 26, 0.95);
   border: 1px solid rgba(255, 141, 92, 0.3);
   border-radius: 6px;
-  color: var(--color-primary);
+  color: var(--accent);
   font-size: 13px;
   font-weight: 600;
   font-variant-numeric: tabular-nums;
@@ -927,7 +927,7 @@
   text-align: right;
   width: 82px;
   outline: none;
-  caret-color: var(--color-primary);
+  caret-color: var(--accent);
   /* hide the native number spinners — the slider + pills are the affordance */
   -moz-appearance: textfield;
 }
@@ -937,12 +937,12 @@
   margin: 0;
 }
 .ctxChipInput:focus {
-  border-color: var(--color-primary);
+  border-color: var(--accent);
   box-shadow: 0 0 0 2.5px rgba(255, 141, 92, 0.18);
 }
 .ctxChipUnit {
   font-size: 11px;
-  color: var(--color-text-secondary);
+  color: var(--t2);
 }
 /* Full-width log-scale slider. Fill percentage is controlled via the
  * --fill CSS custom property set inline from React state. */
@@ -958,8 +958,8 @@
    * colored portion lines up with the thumb regardless of track width. */
   background: linear-gradient(
     to right,
-    var(--color-primary) 0%,
-    var(--color-primary) calc(8px + var(--fill) * (100% - 16px) / 100%),
+    var(--accent) 0%,
+    var(--accent) calc(8px + var(--fill) * (100% - 16px) / 100%),
     rgba(255, 255, 255, 0.1) calc(8px + var(--fill) * (100% - 16px) / 100%),
     rgba(255, 255, 255, 0.1) 100%
   );
@@ -1010,7 +1010,7 @@
   position: absolute;
   transform: translateX(-50%);
   font-size: 9.5px;
-  color: var(--color-text-tertiary);
+  color: var(--t3);
   font-variant-numeric: tabular-nums;
   white-space: nowrap;
 }
@@ -1027,7 +1027,7 @@
   padding: 3px 8px;
   font-size: 10.5px;
   font-variant-numeric: tabular-nums;
-  color: var(--color-text-secondary);
+  color: var(--t2);
   cursor: pointer;
   font-family: inherit;
   transition:
@@ -1038,7 +1038,7 @@
 .ctxPill:hover {
   background: rgba(255, 141, 92, 0.1);
   border-color: rgba(255, 141, 92, 0.22);
-  color: var(--color-primary);
+  color: var(--accent);
 }
 .ctxPill:focus-visible {
   outline: none;
@@ -1047,11 +1047,11 @@
 .ctxPillActive {
   background: rgba(255, 141, 92, 0.14);
   border-color: rgba(255, 141, 92, 0.3);
-  color: var(--color-primary);
+  color: var(--accent);
 }
 .ctxHelper {
   font-size: 11px;
-  color: var(--color-text-secondary);
+  color: var(--t2);
   line-height: 1.5;
   margin-bottom: 10px;
 }
@@ -1117,7 +1117,7 @@
   height: 32px;
   background: transparent;
   border: none;
-  color: var(--color-text-secondary);
+  color: var(--t2);
   font-size: 14px;
   font-weight: 500;
   cursor: pointer;
@@ -1128,12 +1128,12 @@
 }
 .stepperBtn:hover:not(:disabled) {
   background: rgba(255, 255, 255, 0.04);
-  color: var(--color-text-primary);
+  color: var(--t1);
 }
 .stepperBtn:focus-visible {
   outline: none;
   background: rgba(255, 141, 92, 0.14);
-  color: var(--color-primary);
+  color: var(--accent);
 }
 .stepperBtn:disabled {
   opacity: 0.3;
@@ -1145,7 +1145,7 @@
   text-align: center;
   font-variant-numeric: tabular-nums;
   font-size: 13px;
-  color: var(--color-text-primary);
+  color: var(--t1);
 }
 
 /* ─── Saved chip (warm gradient like user bubble, but small) ────────────── */
@@ -1161,7 +1161,7 @@
     rgba(255, 141, 92, 0.22) 0%,
     rgba(224, 107, 48, 0.18) 100%
   );
-  color: var(--color-primary);
+  color: var(--accent);
   border: 1px solid rgba(255, 141, 92, 0.45);
   border-radius: 999px;
   padding: 5px 13px;
@@ -1185,7 +1185,7 @@
 .resetLink {
   background: none;
   border: none;
-  color: var(--color-primary);
+  color: var(--accent);
   font-size: 12px;
   cursor: pointer;
   padding: 0;
@@ -1235,12 +1235,12 @@
 .dialogTitle {
   font-size: 15px;
   font-weight: 600;
-  color: var(--color-text-primary);
+  color: var(--t1);
   margin: 0 0 8px;
 }
 .dialogMessage {
   font-size: 13px;
-  color: var(--color-text-secondary);
+  color: var(--t2);
   line-height: 1.55;
   margin: 0 0 20px;
 }
@@ -1304,7 +1304,7 @@
   font-size: 20px;
   font-weight: 600;
   letter-spacing: 0.01em;
-  color: var(--color-text-primary);
+  color: var(--t1);
   margin-bottom: 2px;
 }
 .aboutHeroVersion {
@@ -1317,7 +1317,7 @@
   font-size: 11px;
   font-weight: 500;
   letter-spacing: 0.06em;
-  color: var(--color-text-secondary);
+  color: var(--t2);
   font-variant-numeric: tabular-nums;
   cursor: pointer;
   text-decoration: underline transparent;
@@ -1327,17 +1327,17 @@
     text-decoration-color 150ms ease;
 }
 .aboutHeroVersion:hover {
-  color: var(--color-primary);
-  text-decoration-color: var(--color-primary);
+  color: var(--accent);
+  text-decoration-color: var(--accent);
 }
 .aboutHeroVersion:focus-visible {
   outline: none;
-  color: var(--color-primary);
-  text-decoration-color: var(--color-primary);
+  color: var(--accent);
+  text-decoration-color: var(--accent);
 }
 .aboutHeroTagline {
   font-size: 13px;
-  color: var(--color-text-secondary);
+  color: var(--t2);
   line-height: 1.5;
   margin-bottom: 14px;
   max-width: 340px;
@@ -1359,7 +1359,7 @@
   padding: 4px;
   border: none;
   background: transparent;
-  color: var(--color-text-secondary);
+  color: var(--t2);
   cursor: pointer;
   font-family: inherit;
   transition:
@@ -1367,22 +1367,22 @@
     transform 150ms ease;
 }
 .iconLinkBtn:hover {
-  color: var(--color-primary);
+  color: var(--accent);
   transform: translateY(-1px);
 }
 .iconLinkBtn:focus-visible {
   outline: none;
-  color: var(--color-primary);
+  color: var(--accent);
   box-shadow: 0 0 0 2px rgba(255, 141, 92, 0.35);
   border-radius: 6px;
 }
 .aboutInfoLine {
   font-size: 13px;
-  color: var(--color-text-secondary);
+  color: var(--t2);
   line-height: 1.6;
 }
 .aboutInfoLine strong {
-  color: var(--color-text-primary);
+  color: var(--t1);
   font-weight: 600;
 }
 .aboutLinkRow {
@@ -1431,7 +1431,7 @@
     border-color 180ms ease;
 }
 .toggleOn {
-  background: var(--color-primary);
+  background: var(--accent);
   border-color: rgba(255, 141, 92, 0.5);
 }
 .toggle:focus-visible {
@@ -1473,7 +1473,7 @@
 .keepWarmLabel {
   font-size: 13px;
   font-weight: 500;
-  color: var(--color-text-primary);
+  color: var(--t1);
 }
 /* Right-side group: "Release after" + number input + "min" */
 .keepWarmTimerGroup {
@@ -1500,13 +1500,13 @@
   align-items: center;
   gap: 5px;
   font-size: 11px;
-  color: var(--color-text-secondary);
+  color: var(--t2);
   font-weight: 400;
   line-height: 1;
 }
 .keepWarmVramModelName {
   font-weight: 500;
-  color: var(--color-text-primary);
+  color: var(--t1);
 }
 .keepWarmNoModel {
   font-size: 11px;
@@ -1536,7 +1536,7 @@
 
 .keepWarmBarFieldLabel {
   font-size: 12px;
-  color: var(--color-text-secondary);
+  color: var(--t2);
   font-weight: 500;
   white-space: nowrap;
   flex-shrink: 0;
@@ -1546,7 +1546,7 @@
   background: transparent;
   border: none;
   border-bottom: 1px solid rgba(255, 255, 255, 0.22);
-  color: var(--color-text-primary);
+  color: var(--t1);
   font-family: inherit;
   font-size: 14px;
   font-weight: 600;
@@ -1568,7 +1568,7 @@
 }
 .keepWarmUnit {
   font-size: 12px;
-  color: var(--color-text-secondary);
+  color: var(--t2);
   margin-left: -3px;
 }
 /* Thin pipe separator between inline group and eject pill */
@@ -1588,7 +1588,7 @@
   border-radius: 8px;
   border: 1px solid rgba(255, 141, 92, 0.18);
   background: transparent;
-  color: var(--color-primary);
+  color: var(--accent);
   font-size: 12px;
   font-weight: 500;
   font-family: inherit;
@@ -1688,7 +1688,7 @@
 }
 
 .devTrigger:hover .devTriggerLabel {
-  color: var(--color-text-secondary);
+  color: var(--t2);
 }
 
 .devTrigger:focus-visible {
@@ -1778,7 +1778,7 @@
   width: 9px;
   height: 9px;
   border-radius: 50%;
-  background: var(--color-tertiary);
+  background: var(--t3);
   box-shadow: 0 0 0 4px rgba(230, 156, 5, 0.18);
 }
 
@@ -1789,15 +1789,15 @@
 .updateBannerTitle {
   font-size: 12.5px;
   font-weight: 600;
-  color: var(--color-text-primary);
+  color: var(--t1);
 }
 .updateBannerMeta {
   font-size: 11px;
-  color: var(--color-text-secondary);
+  color: var(--t2);
   margin-top: 2px;
 }
 .updateBannerMeta a {
-  color: var(--color-primary);
+  color: var(--accent);
   text-decoration: underline;
   cursor: pointer;
 }
@@ -1814,7 +1814,7 @@
   font-weight: 500;
   background: rgba(255, 141, 92, 0.16);
   border: 1px solid rgba(255, 141, 92, 0.3);
-  color: var(--color-primary);
+  color: var(--accent);
   cursor: pointer;
 }
 .updateBannerLater {
@@ -1823,7 +1823,7 @@
   font-size: 11.5px;
   background: transparent;
   border: 1px solid transparent;
-  color: var(--color-text-secondary);
+  color: var(--t2);
   cursor: pointer;
 }
 
@@ -1851,10 +1851,10 @@
   gap: 8px;
   font-size: 13px;
   font-weight: 600;
-  color: var(--color-text-primary);
+  color: var(--t1);
 }
 .updateHeroStatus[data-state='available'] {
-  color: var(--color-tertiary);
+  color: var(--t3);
 }
 
 .updateHeroCheckMark {
@@ -1879,13 +1879,13 @@
   position: absolute;
   inset: 4px;
   border-radius: 50%;
-  background: var(--color-tertiary);
+  background: var(--t3);
   box-shadow: 0 0 8px rgba(230, 156, 5, 0.6);
 }
 
 .updateHeroMeta {
   font-size: 11px;
-  color: var(--color-text-secondary);
+  color: var(--t2);
 }
 
 .updateHeroBtn {
@@ -1897,7 +1897,7 @@
   border-radius: 8px;
   border: 1px solid rgba(255, 141, 92, 0.3);
   background: rgba(255, 141, 92, 0.12);
-  color: var(--color-primary);
+  color: var(--accent);
   font-size: 12px;
   font-weight: 500;
   font-family: inherit;

From f58f6f12bdfb06399697971a7fcd9edd24e2b6e7 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 14:17:17 -0500
Subject: [PATCH 07/89] feat: HF search text-gen default, paginated Load-more,
 and RAM-fit annotations

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 docs/configurations.md           |   5 +-
 src-tauri/src/config/defaults.rs |  26 ++-
 src-tauri/src/models/mod.rs      | 334 +++++++++++++++++++++++++++++--
 3 files changed, 345 insertions(+), 20 deletions(-)

diff --git a/docs/configurations.md b/docs/configurations.md
index 3858ba33..4d05a804 100644
--- a/docs/configurations.md
+++ b/docs/configurations.md
@@ -190,7 +190,10 @@ The table below also lists the baked-in safety limits that govern Thuki's commun
 | `MAX_HF_API_BODY_BYTES`                     | `4 MiB`  | No       | Defense-in-depth bound on attacker-controlled data from a remote service, mirroring `MAX_OLLAMA_TAGS_BODY_BYTES`. | —      | The largest Hugging Face API response body (repo file listings) Thuki will accept while resolving a model to download. Larger responses are rejected mid-stream and the request returns an error. |
 | `HF_API_TIMEOUT_SECS`                       | `15 s`   | No       | Protocol cap on a hung remote service so the download UI cannot stall on metadata resolution; 15 s is generous for a small metadata call over the internet. | —      | How long Thuki waits for a Hugging Face API metadata call (repo file listing) to respond before giving up. Applies to resolving pasted repo ids and listing a repo's GGUF files, not to the model download itself. |
 | `HF_BASE_URL`                               | `https://huggingface.co` | No | Single origin for model metadata and downloads. Provenance comes from the pinned repo revisions in the curated starter registry, and those pins are only meaningful against the canonical Hub; an arbitrary mirror could serve different content under the same revision ids. | — | The Hugging Face origin Thuki uses for all model metadata calls and blob downloads. Every starter in the registry pins a repo at an exact revision and carries a compiled-in sha256 digest checked after download; the digest catches truncation, bit rot, and resume corruption, while the pinned revision on the canonical Hub is what fixes which content is fetched. |
-| `HF_SEARCH_LIMIT`                           | `30`     | No       | A fixed page size for the in-app model search: the most-downloaded N results cover the discovery need, and cursor pagination beyond it is out of scope until the browse UI requires it. | —      | How many GGUF model repos a single in-app Hugging Face search returns, most-downloaded first. |
+| `HF_SEARCH_LIMIT`                           | `30`     | No       | The per-page step for the in-app model browser. The "Load more" control raises the requested page size in multiples of this value, so it is a layout step rather than a user preference. | —      | How many GGUF model repos the first page of an in-app Hugging Face search returns, most-downloaded first. |
+| `HF_SEARCH_LIMIT_MAX`                        | `120`    | No       | Defense-in-depth bound on request size: "Load more" grows the requested page size in `HF_SEARCH_LIMIT` steps, and this caps the largest single request so a runaway page count cannot ask the Hub for an unbounded result set. | —      | The largest page size a single in-app Hugging Face search request may ask for, regardless of how many times "Load more" was pressed. |
+| `RUNTIME_OVERHEAD_GB`                        | `2.0`    | No       | Feeds the approximate RAM-fit hint shown in Library and Discover only; the authoritative per-starter memory estimates live in the model registry. A user-tunable overhead would imply a precision the hint does not claim. | —      | Resident-memory overhead added on top of a model's weights size (KV cache plus runtime buffers) when estimating whether it fits in this Mac's RAM. |
+| `PARAM_GB_PER_BILLION`                       | `0.65`   | No       | Feeds the approximate RAM-fit hint for Discover search rows only, where no file size is available and the footprint is estimated from the parameter count in the repo id. The hint never gates a download. | —      | Approximate resident GiB per billion parameters for a 4-bit quantised GGUF, used to estimate a search row's footprint. |
 | `MAX_HF_SEARCH_QUERY_LEN`                   | `200 bytes` | No    | Defense-in-depth bound on attacker-influenced input: the query reaches the fixed Hub host (no SSRF) and is percent-encoded by the client, but an unbounded string is still rejected to cap request size. | —      | The longest search string Thuki sends to the Hugging Face model search. A longer query is rejected before any network call. |
 | `OPENAI_MODELS_TIMEOUT_SECS`                | `5 s`    | No       | Protocol cap on a hung server so the Settings model dropdown cannot stall; the OpenAI-compatible server is local or LAN-hosted in the common case, so 5 s is generous. | —      | How long Thuki waits for an OpenAI-compatible server's `/v1/models` listing to respond before giving up. Applies to the Settings model dropdown for that provider, not to chat requests. |
 | `MAX_SSE_LINE_BYTES`                        | `1 MiB`  | No       | Defense-in-depth bound on attacker-controlled stream data. A malicious or broken chat server could otherwise grow a single stream line without limit and exhaust memory. | —      | The longest single Server-Sent-Events line Thuki accepts while streaming a chat response from an OpenAI-compatible (`/v1`) server. A stream line exceeding this aborts the response with an error. |
diff --git a/src-tauri/src/config/defaults.rs b/src-tauri/src/config/defaults.rs
index f8dd374f..e76e12e7 100644
--- a/src-tauri/src/config/defaults.rs
+++ b/src-tauri/src/config/defaults.rs
@@ -404,12 +404,30 @@ pub const OPENAI_MODELS_TIMEOUT_SECS: u64 = 5;
 /// the integrity guarantees that make the curated starter registry safe.
 pub const HF_BASE_URL: &str = "https://huggingface.co";
 
-/// Page size for the in-app Hugging Face GGUF model search. Baked-in: a fixed
-/// number of most-downloaded results per query is enough for the browser;
-/// cursor pagination beyond this is intentionally out of scope until the UI
-/// needs it.
+/// Page size for the in-app Hugging Face GGUF model search. The Discover
+/// "Load more" control raises the requested limit in multiples of this value.
+/// Baked-in: the per-page step for the browser, not a user preference.
 pub const HF_SEARCH_LIMIT: usize = 30;
 
+/// Hard cap on a single Hugging Face search request's page size. "Load more"
+/// grows the requested limit in [`HF_SEARCH_LIMIT`] steps; this bounds the
+/// largest single request so a runaway page count cannot ask the Hub for an
+/// unbounded result set. Baked-in: defense-in-depth bound on request size.
+pub const HF_SEARCH_LIMIT_MAX: usize = 120;
+
+/// Approximate resident-memory overhead in GiB added on top of a model's
+/// weights size when estimating whether it fits in this Mac's RAM (the KV
+/// cache at the default context plus runtime buffers). Baked-in: feeds the
+/// RAM-fit *hint* in Library/Discover only; the authoritative per-starter
+/// estimates live in the model registry.
+pub const RUNTIME_OVERHEAD_GB: f64 = 2.0;
+
+/// Approximate GiB of resident memory per billion parameters for a 4-bit
+/// quantised GGUF, used to estimate a Discover search row's footprint from the
+/// parameter count parsed out of its repo id (no file size is available at
+/// search time). Baked-in: feeds the RAM-fit *hint* only.
+pub const PARAM_GB_PER_BILLION: f64 = 0.65;
+
 /// Maximum accepted byte length for a Hugging Face search query before it is
 /// sent upstream. Defense-in-depth bound on attacker-influenced input: the
 /// query reaches the fixed Hub host (no SSRF) and is percent-encoded by the
diff --git a/src-tauri/src/models/mod.rs b/src-tauri/src/models/mod.rs
index 29c21c56..f5aa773b 100644
--- a/src-tauri/src/models/mod.rs
+++ b/src-tauri/src/models/mod.rs
@@ -29,10 +29,11 @@ use tauri::Manager;
 
 use crate::config::defaults::{
     DEFAULT_OLLAMA_SHOW_REQUEST_TIMEOUT_SECS, DEFAULT_OLLAMA_TAGS_REQUEST_TIMEOUT_SECS,
-    HF_API_TIMEOUT_SECS, HF_BASE_URL, HF_SEARCH_LIMIT, MAX_HF_API_BODY_BYTES,
+    HF_API_TIMEOUT_SECS, HF_BASE_URL, HF_SEARCH_LIMIT_MAX, MAX_HF_API_BODY_BYTES,
     MAX_HF_SEARCH_QUERY_LEN, MAX_MODEL_SLUG_LEN, MAX_OLLAMA_SHOW_BODY_BYTES,
-    MAX_OLLAMA_TAGS_BODY_BYTES, OPENAI_MODELS_TIMEOUT_SECS, PROVIDER_ID_BUILTIN,
-    PROVIDER_KIND_BUILTIN, PROVIDER_KIND_OLLAMA, PROVIDER_KIND_OPENAI,
+    MAX_OLLAMA_TAGS_BODY_BYTES, OPENAI_MODELS_TIMEOUT_SECS, PARAM_GB_PER_BILLION,
+    PROVIDER_ID_BUILTIN, PROVIDER_KIND_BUILTIN, PROVIDER_KIND_OLLAMA, PROVIDER_KIND_OPENAI,
+    RUNTIME_OVERHEAD_GB,
 };
 use crate::config::AppConfig;
 
@@ -1575,6 +1576,152 @@ pub fn parse_search_results(body: &[u8]) -> Result<Vec<HfModelSummary>, String>
         .collect())
 }
 
+// ─── RAM-fit estimation + annotated view rows ────────────────────────────────
+//
+// The model-settings UI surfaces a "will this fit in your Mac's RAM" hint in
+// both Discover and Library. The authoritative per-starter estimate lives in
+// the registry; for arbitrary downloaded/searched models there is no curated
+// number, so these helpers estimate the resident footprint (weights + a fixed
+// KV/runtime overhead) and reuse `registry::ram_fit` for the threshold. They
+// are deliberately approximate: the result is a hint, never a hard gate.
+
+/// A Hugging Face search row annotated with a best-effort RAM-fit hint for the
+/// host. The base summary carries the Hub facts; `est_runtime_gb` and `fit`
+/// are estimated from the parameter count parsed out of the repo id (no file
+/// size is available at search time). Both are `None` when the id carries no
+/// `<number>B` token; `fit` is additionally `None` when host RAM is unknown.
+#[derive(Debug, Clone, PartialEq, Serialize)]
+pub struct HfModelRow {
+    #[serde(flatten)]
+    pub summary: HfModelSummary,
+    pub est_runtime_gb: Option<f64>,
+    pub fit: Option<registry::RamFit>,
+}
+
+/// A repo `.gguf` file annotated with the accurate per-quant RAM-fit computed
+/// from its real file size. `fit` is `None` when host RAM or the file size is
+/// unknown (both are required to judge fit).
+#[derive(Debug, Clone, PartialEq, Serialize)]
+pub struct HfGgufFileRow {
+    #[serde(flatten)]
+    pub file: HfGgufFile,
+    pub fit: Option<registry::RamFit>,
+}
+
+/// An installed model annotated with its RAM-fit on the host, computed from the
+/// recorded weights size. `fit` is `None` when host RAM or the size is unknown.
+#[derive(Debug, Clone, PartialEq, Serialize)]
+pub struct InstalledModelView {
+    #[serde(flatten)]
+    pub model: manifest::InstalledModel,
+    pub fit: Option<registry::RamFit>,
+}
+
+/// Parses the parameter count in billions from a model repo id by reading the
+/// last `<number>B` token (e.g. `unsloth/Qwen3.5-9B-GGUF` -> `9.0`,
+/// `org/Model-3.8B-it` -> `3.8`). Splits on `/ - _ space` (keeping `.` so a
+/// fractional count survives) and is case-insensitive on the trailing `B`.
+/// Returns `None` when no positive `<number>B` token is present.
+pub fn parse_param_billions(id: &str) -> Option<f64> {
+    let mut found = None;
+    for token in id.split(['/', '-', '_', ' ']) {
+        let Some(stripped) = token
+            .strip_suffix('B')
+            .or_else(|| token.strip_suffix('b'))
+        else {
+            continue;
+        };
+        if let Ok(v) = stripped.parse::<f64>() {
+            if v.is_finite() && v > 0.0 {
+                found = Some(v);
+            }
+        }
+    }
+    found
+}
+
+/// Estimated resident memory (GiB) for a 4-bit GGUF of `params_b` billion
+/// parameters: weights (~[`PARAM_GB_PER_BILLION`]/B) plus the fixed
+/// [`RUNTIME_OVERHEAD_GB`].
+pub fn estimate_runtime_gb_from_params(params_b: f64) -> f64 {
+    params_b * PARAM_GB_PER_BILLION + RUNTIME_OVERHEAD_GB
+}
+
+/// Estimated resident memory (GiB) for a GGUF weights blob of `size_bytes`:
+/// the on-disk size plus the fixed [`RUNTIME_OVERHEAD_GB`].
+pub fn estimate_runtime_gb_from_bytes(size_bytes: u64) -> f64 {
+    size_bytes as f64 / (1u64 << 30) as f64 + RUNTIME_OVERHEAD_GB
+}
+
+/// Clamps a requested search page size to `1..=`[`HF_SEARCH_LIMIT_MAX`] so a
+/// runaway page count cannot request an unbounded result set.
+pub fn clamp_search_limit(limit: usize) -> usize {
+    limit.clamp(1, HF_SEARCH_LIMIT_MAX)
+}
+
+/// Annotates search summaries with an estimated RAM-fit derived from the
+/// parameter count in each repo id. `ram_bytes == 0` (host RAM unknown) leaves
+/// `fit` as `None` even when the size could be estimated.
+pub fn annotate_search_rows(summaries: Vec<HfModelSummary>, ram_bytes: u64) -> Vec<HfModelRow> {
+    summaries
+        .into_iter()
+        .map(|summary| {
+            let est_runtime_gb =
+                parse_param_billions(&summary.id).map(estimate_runtime_gb_from_params);
+            let fit = match est_runtime_gb {
+                Some(est) if ram_bytes > 0 => Some(registry::ram_fit(est, ram_bytes)),
+                _ => None,
+            };
+            HfModelRow {
+                summary,
+                est_runtime_gb,
+                fit,
+            }
+        })
+        .collect()
+}
+
+/// Annotates repo `.gguf` rows with the accurate per-quant RAM-fit from each
+/// file's real size. A row gets `None` when host RAM or the file size is 0.
+pub fn annotate_gguf_rows(files: Vec<HfGgufFile>, ram_bytes: u64) -> Vec<HfGgufFileRow> {
+    files
+        .into_iter()
+        .map(|file| {
+            let fit = if ram_bytes > 0 && file.size_bytes > 0 {
+                Some(registry::ram_fit(
+                    estimate_runtime_gb_from_bytes(file.size_bytes),
+                    ram_bytes,
+                ))
+            } else {
+                None
+            };
+            HfGgufFileRow { file, fit }
+        })
+        .collect()
+}
+
+/// Annotates installed models with their RAM-fit on the host, from the recorded
+/// weights size. A model gets `None` when host RAM or the size is 0.
+pub fn build_installed_views(
+    models: Vec<manifest::InstalledModel>,
+    ram_bytes: u64,
+) -> Vec<InstalledModelView> {
+    models
+        .into_iter()
+        .map(|model| {
+            let fit = if ram_bytes > 0 && model.size_bytes > 0 {
+                Some(registry::ram_fit(
+                    estimate_runtime_gb_from_bytes(model.size_bytes),
+                    ram_bytes,
+                ))
+            } else {
+                None
+            };
+            InstalledModelView { model, fit }
+        })
+        .collect()
+}
+
 /// Validates the query length, runs the Hugging Face GGUF model search against
 /// `base_url`, and parses the result. `base_url` is parameterized so tests
 /// point at a mock server; production passes [`HF_BASE_URL`].
@@ -1582,6 +1729,7 @@ pub async fn fetch_hf_search(
     client: &reqwest::Client,
     base_url: &str,
     query: &str,
+    limit: usize,
 ) -> Result<Vec<HfModelSummary>, String> {
     let query = query.trim();
     if query.len() > MAX_HF_SEARCH_QUERY_LEN {
@@ -1595,7 +1743,7 @@ pub async fn fetch_hf_search(
         query,
         std::time::Duration::from_secs(HF_API_TIMEOUT_SECS),
         MAX_HF_API_BODY_BYTES,
-        HF_SEARCH_LIMIT,
+        limit,
     )
     .await?;
     parse_search_results(&body)
@@ -1617,8 +1765,13 @@ async fn fetch_hf_search_inner(
 ) -> Result<Vec<u8>, String> {
     let endpoint = format!("{}/api/models", base_url.trim_end_matches('/'));
     let limit = limit.to_string();
+    // `pipeline_tag=text-generation` keeps the results to chat/instruct models;
+    // without it an empty query returns the most-downloaded GGUF repos overall,
+    // which are dominated by embedding/reranker repos (sentence-transformers,
+    // BERT) that Thuki cannot run as a chat model.
     let mut params: Vec<(&str, &str)> = vec![
         ("library", "gguf"),
+        ("pipeline_tag", "text-generation"),
         ("sort", "downloads"),
         ("direction", "-1"),
         ("limit", &limit),
@@ -2002,8 +2155,9 @@ pub async fn download_repo_model(
 pub async fn list_hf_repo_ggufs(
     repo: String,
     client: tauri::State<'_, reqwest::Client>,
-) -> Result<Vec<HfGgufFile>, String> {
-    fetch_repo_gguf_listing(&client, HF_BASE_URL, &repo).await
+) -> Result<Vec<HfGgufFileRow>, String> {
+    let files = fetch_repo_gguf_listing(&client, HF_BASE_URL, &repo).await?;
+    Ok(annotate_gguf_rows(files, system_ram_bytes()))
 }
 
 /// Searches Hugging Face for GGUF model repos matching `query`, most-downloaded
@@ -2013,9 +2167,11 @@ pub async fn list_hf_repo_ggufs(
 #[cfg_attr(not(coverage), tauri::command)]
 pub async fn search_hf_models(
     query: String,
+    limit: usize,
     client: tauri::State<'_, reqwest::Client>,
-) -> Result<Vec<HfModelSummary>, String> {
-    fetch_hf_search(&client, HF_BASE_URL, &query).await
+) -> Result<Vec<HfModelRow>, String> {
+    let summaries = fetch_hf_search(&client, HF_BASE_URL, &query, clamp_search_limit(limit)).await?;
+    Ok(annotate_search_rows(summaries, system_ram_bytes()))
 }
 
 /// Lists the models served by the configured OpenAI-compatible provider via
@@ -2058,9 +2214,10 @@ pub fn discard_partial_download(
 #[cfg_attr(not(coverage), tauri::command)]
 pub fn list_installed_models(
     db: tauri::State<'_, crate::history::Database>,
-) -> Result<Vec<manifest::InstalledModel>, String> {
+) -> Result<Vec<InstalledModelView>, String> {
     let conn = db.0.lock().map_err(|e| e.to_string())?;
-    manifest::list(&conn).map_err(|e| e.to_string())
+    let models = manifest::list(&conn).map_err(|e| e.to_string())?;
+    Ok(build_installed_views(models, system_ram_bytes()))
 }
 
 /// Deletes an installed model: manifest row, orphaned blobs, and (when it was
@@ -4466,6 +4623,151 @@ mod tests {
         );
     }
 
+    // ── RAM-fit estimation + annotated views ─────────────────────────────────
+
+    #[test]
+    fn parse_param_billions_reads_last_b_token() {
+        assert_eq!(parse_param_billions("unsloth/Qwen3.5-9B-GGUF"), Some(9.0));
+        assert_eq!(parse_param_billions("org/Model-3.8B-it"), Some(3.8));
+        assert_eq!(
+            parse_param_billions("bartowski/Llama-3.3-8B-Instruct-GGUF"),
+            Some(8.0)
+        );
+        // Lowercase trailing b is accepted.
+        assert_eq!(parse_param_billions("org/qwen-9b-gguf"), Some(9.0));
+        // Multiple B tokens: the rightmost positive one wins.
+        assert_eq!(parse_param_billions("org/Qwen3-235B-A22B"), Some(235.0));
+    }
+
+    #[test]
+    fn parse_param_billions_returns_none_without_a_param_token() {
+        assert_eq!(parse_param_billions("google/bert-base-uncased"), None);
+        assert_eq!(parse_param_billions(""), None);
+        // A zero count is not a usable estimate.
+        assert_eq!(parse_param_billions("org/0B-weird"), None);
+        // A non-numeric prefix before B does not parse.
+        assert_eq!(parse_param_billions("org/Model-AxB"), None);
+    }
+
+    #[test]
+    fn estimate_runtime_gb_helpers_add_overhead() {
+        // 8B * 0.65 + 2.0 overhead.
+        assert!((estimate_runtime_gb_from_params(8.0) - 7.2).abs() < 1e-9);
+        // 1 GiB weights + 2.0 overhead.
+        assert!((estimate_runtime_gb_from_bytes(1 << 30) - 3.0).abs() < 1e-9);
+    }
+
+    #[test]
+    fn clamp_search_limit_bounds_the_page_size() {
+        assert_eq!(clamp_search_limit(0), 1);
+        assert_eq!(clamp_search_limit(50), 50);
+        assert_eq!(clamp_search_limit(10_000), HF_SEARCH_LIMIT_MAX);
+    }
+
+    #[test]
+    fn annotate_search_rows_estimates_fit_and_handles_unknowns() {
+        let summaries = vec![
+            HfModelSummary {
+                id: "org/Tiny-1B-GGUF".to_string(),
+                downloads: 10,
+                gated: false,
+            },
+            HfModelSummary {
+                id: "org/no-param-token".to_string(),
+                downloads: 5,
+                gated: false,
+            },
+        ];
+        // 64 GiB host: the 1B model fits, the param-less row stays unannotated.
+        let rows = annotate_search_rows(summaries.clone(), 64 << 30);
+        assert_eq!(rows[0].fit, Some(registry::RamFit::Fits));
+        assert!(rows[0].est_runtime_gb.is_some());
+        assert_eq!(rows[1].est_runtime_gb, None);
+        assert_eq!(rows[1].fit, None);
+        // Unknown host RAM keeps the size estimate but drops the fit verdict.
+        let rows = annotate_search_rows(summaries, 0);
+        assert!(rows[0].est_runtime_gb.is_some());
+        assert_eq!(rows[0].fit, None);
+    }
+
+    #[test]
+    fn annotate_gguf_rows_uses_real_sizes() {
+        let files = vec![
+            HfGgufFile {
+                file: "a.gguf".to_string(),
+                size_bytes: 1 << 30,
+            },
+            HfGgufFile {
+                file: "b.gguf".to_string(),
+                size_bytes: 0,
+            },
+        ];
+        let rows = annotate_gguf_rows(files.clone(), 64 << 30);
+        assert_eq!(rows[0].fit, Some(registry::RamFit::Fits));
+        // A zero size cannot be judged.
+        assert_eq!(rows[1].fit, None);
+        // Unknown host RAM drops every verdict.
+        let rows = annotate_gguf_rows(files, 0);
+        assert_eq!(rows[0].fit, None);
+    }
+
+    #[test]
+    fn build_installed_views_annotates_fit() {
+        let model = manifest::InstalledModel {
+            id: "org/Repo:weights.gguf".to_string(),
+            display_name: "Repo".to_string(),
+            repo: "org/Repo".to_string(),
+            revision: "0".repeat(40),
+            file_name: "weights.gguf".to_string(),
+            sha256: "a".repeat(64),
+            size_bytes: 1 << 30,
+            quant: "Q4_K_M".to_string(),
+            vision: false,
+            thinking: false,
+            mmproj_file: None,
+            mmproj_sha256: None,
+        };
+        let views = build_installed_views(vec![model.clone()], 64 << 30);
+        assert_eq!(views[0].fit, Some(registry::RamFit::Fits));
+        // Unknown host RAM drops the verdict.
+        let views = build_installed_views(vec![model], 0);
+        assert_eq!(views[0].fit, None);
+    }
+
+    #[test]
+    fn view_rows_serialize_with_flattened_base_and_fit() {
+        let row = HfModelRow {
+            summary: HfModelSummary {
+                id: "o/r".to_string(),
+                downloads: 3,
+                gated: false,
+            },
+            est_runtime_gb: Some(7.0),
+            fit: Some(registry::RamFit::Tight),
+        };
+        assert_eq!(
+            serde_json::to_value(row).unwrap(),
+            serde_json::json!({
+                "id": "o/r",
+                "downloads": 3,
+                "gated": false,
+                "est_runtime_gb": 7.0,
+                "fit": "tight",
+            })
+        );
+        let file_row = HfGgufFileRow {
+            file: HfGgufFile {
+                file: "w.gguf".to_string(),
+                size_bytes: 42,
+            },
+            fit: None,
+        };
+        assert_eq!(
+            serde_json::to_value(file_row).unwrap(),
+            serde_json::json!({"file": "w.gguf", "size_bytes": 42, "fit": serde_json::Value::Null})
+        );
+    }
+
     #[tokio::test]
     async fn fetch_hf_search_returns_rows_and_sends_filtered_query() {
         let mut server = mockito::Server::new_async().await;
@@ -4473,8 +4775,10 @@ mod tests {
             .mock("GET", "/api/models")
             .match_query(mockito::Matcher::AllOf(vec![
                 mockito::Matcher::UrlEncoded("library".into(), "gguf".into()),
+                mockito::Matcher::UrlEncoded("pipeline_tag".into(), "text-generation".into()),
                 mockito::Matcher::UrlEncoded("search".into(), "qwen".into()),
                 mockito::Matcher::UrlEncoded("sort".into(), "downloads".into()),
+                mockito::Matcher::UrlEncoded("limit".into(), "60".into()),
             ]))
             .with_status(200)
             .with_header("content-type", "application/json")
@@ -4482,7 +4786,7 @@ mod tests {
             .create_async()
             .await;
         let client = reqwest::Client::new();
-        let rows = fetch_hf_search(&client, &server.url(), "qwen")
+        let rows = fetch_hf_search(&client, &server.url(), "qwen", 60)
             .await
             .unwrap();
         mock.assert_async().await;
@@ -4502,7 +4806,7 @@ mod tests {
             .await;
         let client = reqwest::Client::new();
         // Whitespace-only query trims to empty and the search param is dropped.
-        let rows = fetch_hf_search(&client, &server.url(), "   ")
+        let rows = fetch_hf_search(&client, &server.url(), "   ", crate::config::defaults::HF_SEARCH_LIMIT)
             .await
             .unwrap();
         assert!(rows.is_empty());
@@ -4518,7 +4822,7 @@ mod tests {
             .create_async()
             .await;
         let client = reqwest::Client::new();
-        let err = fetch_hf_search(&client, &server.url(), "q")
+        let err = fetch_hf_search(&client, &server.url(), "q", crate::config::defaults::HF_SEARCH_LIMIT)
             .await
             .unwrap_err();
         assert!(err.contains("503"), "got: {err}");
@@ -4527,7 +4831,7 @@ mod tests {
     #[tokio::test]
     async fn fetch_hf_search_maps_transport_error() {
         let client = reqwest::Client::new();
-        let err = fetch_hf_search(&client, "http://127.0.0.1:1", "q")
+        let err = fetch_hf_search(&client, "http://127.0.0.1:1", "q", crate::config::defaults::HF_SEARCH_LIMIT)
             .await
             .unwrap_err();
         assert!(err.contains("failed to reach Hugging Face"), "got: {err}");
@@ -4537,7 +4841,7 @@ mod tests {
     async fn fetch_hf_search_rejects_overlong_query() {
         let client = reqwest::Client::new();
         let long = "x".repeat(crate::config::defaults::MAX_HF_SEARCH_QUERY_LEN + 1);
-        let err = fetch_hf_search(&client, "http://127.0.0.1:9", &long)
+        let err = fetch_hf_search(&client, "http://127.0.0.1:9", &long, crate::config::defaults::HF_SEARCH_LIMIT)
             .await
             .unwrap_err();
         assert!(err.contains("maximum length"), "got: {err}");

From 9a855008807f2259e2ae174bb334dee54c8292cb Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 14:19:29 -0500
Subject: [PATCH 08/89] style: restyle the Models segmented control to
 icon-above-label tabs

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 .../tabs/models/ModelsSegmented.test.tsx      |  7 +++
 src/settings/tabs/models/ModelsSegmented.tsx  | 38 +++++++++++++--
 src/styles/settings.module.css                | 47 ++++++++++++++-----
 3 files changed, 76 insertions(+), 16 deletions(-)

diff --git a/src/settings/tabs/models/ModelsSegmented.test.tsx b/src/settings/tabs/models/ModelsSegmented.test.tsx
index 8ef0d9bb..15862a5f 100644
--- a/src/settings/tabs/models/ModelsSegmented.test.tsx
+++ b/src/settings/tabs/models/ModelsSegmented.test.tsx
@@ -11,6 +11,13 @@ describe('ModelsSegmented', () => {
     expect(screen.getByRole('tab', { name: 'Providers' })).toBeInTheDocument();
   });
 
+  it('renders a decorative icon in each tab', () => {
+    const { container } = render(
+      <ModelsSegmented value="library" onChange={() => {}} />,
+    );
+    expect(container.querySelectorAll('svg')).toHaveLength(3);
+  });
+
   it('marks the active view as selected', () => {
     render(<ModelsSegmented value="discover" onChange={() => {}} />);
     expect(screen.getByRole('tab', { name: 'Discover' })).toHaveAttribute(
diff --git a/src/settings/tabs/models/ModelsSegmented.tsx b/src/settings/tabs/models/ModelsSegmented.tsx
index 2d2deeb6..3fabb3e0 100644
--- a/src/settings/tabs/models/ModelsSegmented.tsx
+++ b/src/settings/tabs/models/ModelsSegmented.tsx
@@ -9,14 +9,41 @@
  * sidebar's section tabs.
  */
 
+import type { ReactNode } from 'react';
+
 import styles from '../../../styles/settings.module.css';
 
 export type ModelsSubview = 'library' | 'discover' | 'providers';
 
-const VIEWS: ReadonlyArray<{ id: ModelsSubview; label: string }> = [
-  { id: 'library', label: 'Library' },
-  { id: 'discover', label: 'Discover' },
-  { id: 'providers', label: 'Providers' },
+// Line-art icons in the same family as the sidebar section tabs (1.6 rounded
+// stroke, currentColor): Library = stacked layers, Discover = compass,
+// Providers = server stack. Decorative, so each is aria-hidden and the button's
+// text label remains the accessible name.
+const LIBRARY_ICON = (
+  <svg viewBox="0 0 24 24" aria-hidden="true">
+    <path d="M12 3l9 4.8-9 4.8-9-4.8 9-4.8z" />
+    <path d="M3 12.2l9 4.8 9-4.8" />
+    <path d="M3 16.6l9 4.8 9-4.8" />
+  </svg>
+);
+const DISCOVER_ICON = (
+  <svg viewBox="0 0 24 24" aria-hidden="true">
+    <circle cx="12" cy="12" r="9" />
+    <path d="M15.6 8.4l-2.3 5.2-5.2 2.3 2.3-5.2 5.2-2.3z" />
+  </svg>
+);
+const PROVIDERS_ICON = (
+  <svg viewBox="0 0 24 24" aria-hidden="true">
+    <rect x="3" y="4.5" width="18" height="6.5" rx="1.8" />
+    <rect x="3" y="13" width="18" height="6.5" rx="1.8" />
+    <path d="M6.5 7.75h.01M6.5 16.25h.01" />
+  </svg>
+);
+
+const VIEWS: ReadonlyArray<{ id: ModelsSubview; label: string; icon: ReactNode }> = [
+  { id: 'library', label: 'Library', icon: LIBRARY_ICON },
+  { id: 'discover', label: 'Discover', icon: DISCOVER_ICON },
+  { id: 'providers', label: 'Providers', icon: PROVIDERS_ICON },
 ];
 
 interface ModelsSegmentedProps {
@@ -51,7 +78,8 @@ export function ModelsSegmented({ value, onChange }: ModelsSegmentedProps) {
               }
             }}
           >
-            {view.label}
+            {view.icon}
+            <span className={styles.segItemLabel}>{view.label}</span>
           </button>
         );
       })}
diff --git a/src/styles/settings.module.css b/src/styles/settings.module.css
index 4d2861a4..c3732a1c 100644
--- a/src/styles/settings.module.css
+++ b/src/styles/settings.module.css
@@ -295,19 +295,28 @@
 
 /* ─── Models surface (segmented Library / Discover / Providers) ──────────── */
 
+/* Icon-above-label tabs in the same family as the sidebar section nav: the
+ * active view carries the accent on its icon + label inside a softly elevated
+ * bordered box (no loud filled pill). */
 .seg {
   display: inline-flex;
   box-sizing: border-box;
-  padding: 3px;
+  gap: 6px;
+  padding: 5px;
   border: 1px solid var(--hair-soft);
-  border-radius: 9px;
+  border-radius: 13px;
   background: var(--elev-1);
 }
 .segItem {
   box-sizing: border-box;
-  padding: 6px 14px;
-  border: none;
-  border-radius: 7px;
+  display: inline-flex;
+  flex-direction: column;
+  align-items: center;
+  gap: 7px;
+  width: 96px;
+  padding: 11px 0 9px;
+  border: 1px solid transparent;
+  border-radius: 10px;
   background: transparent;
   color: var(--t2);
   font-family: inherit;
@@ -315,8 +324,19 @@
   font-weight: 540;
   cursor: pointer;
   transition:
-    color 140ms ease,
-    background 140ms ease;
+    color 150ms ease,
+    background 150ms ease,
+    border-color 150ms ease;
+}
+.segItem svg {
+  width: 20px;
+  height: 20px;
+  stroke: currentColor;
+  stroke-width: 1.6;
+  fill: none;
+  stroke-linecap: round;
+  stroke-linejoin: round;
+  opacity: 0.92;
 }
 .segItem:hover:not(.segItemActive) {
   color: var(--t1);
@@ -325,11 +345,16 @@
   outline: none;
   box-shadow: 0 0 0 2px var(--accent-soft);
 }
-/* Active view: filled accent pill with dark text (the one accent fill on the
- * surface, matching the locked design). */
 .segItemActive {
-  color: #16110d;
-  background: var(--accent);
+  color: var(--accent);
+  background: var(--elev-2);
+  border-color: var(--hair-soft);
+}
+.segItemActive svg {
+  opacity: 1;
+}
+.segItemLabel {
+  color: inherit;
 }
 
 /* Row holding the segmented control, centered across the Models surface. */

From cce84eedadba828bada2a219f133bad695ea6e11 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 14:27:17 -0500
Subject: [PATCH 09/89] feat: redesign the Library pane with quiet rows, a
 popover menu, and RAM-fit

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 .../tabs/models/LibraryPane.module.css        | 247 ++++++++++++------
 src/settings/tabs/models/LibraryPane.test.tsx | 187 ++++++++-----
 src/settings/tabs/models/LibraryPane.tsx      | 224 +++++++++-------
 src/styles/settings.module.css                |   5 +
 src/types/starter.ts                          |   7 +-
 5 files changed, 435 insertions(+), 235 deletions(-)

diff --git a/src/settings/tabs/models/LibraryPane.module.css b/src/settings/tabs/models/LibraryPane.module.css
index 9495daaa..7ba6b05c 100644
--- a/src/settings/tabs/models/LibraryPane.module.css
+++ b/src/settings/tabs/models/LibraryPane.module.css
@@ -1,7 +1,7 @@
 /*
- * Styles for the Library pane of the Models surface. Tokens (--accent,
- * --vis, --rea, --hair, --t1..--t3, --elev-*) cascade from the Settings
- * window, so they are referenced directly here.
+ * Styles for the Library pane of the Models surface. Tokens (--accent, --vis,
+ * --rea, --ok, --tight, --heavy, --danger, --hair, --t1..--t3, --elev-*)
+ * cascade from the Settings window, so they are referenced directly here.
  */
 
 .pane {
@@ -13,22 +13,41 @@
 .bar {
   display: flex;
   align-items: center;
-  padding: 0 4px 12px;
+  padding: 0 2px 14px;
 }
 
+/* Quiet ghost button: the Library is a place you manage what you have, so the
+ * add affordance is present but not the loud accent block it used to be. */
 .addButton {
   margin-left: auto;
   display: inline-flex;
   align-items: center;
-  gap: 6px;
+  gap: 7px;
   font-size: 12px;
-  font-weight: 560;
-  color: #16110d;
-  background: var(--accent);
-  border: none;
+  font-weight: 540;
+  color: var(--t2);
+  background: transparent;
+  border: 1px solid var(--hair);
   border-radius: 8px;
-  padding: 7px 12px;
+  padding: 7px 13px;
   cursor: pointer;
+  transition:
+    color 150ms ease,
+    border-color 150ms ease,
+    background 150ms ease;
+}
+.addButton:hover {
+  color: var(--accent);
+  border-color: rgba(255, 141, 92, 0.45);
+  background: var(--accent-soft);
+}
+.addIcon {
+  width: 13px;
+  height: 13px;
+  stroke: currentColor;
+  stroke-width: 2;
+  fill: none;
+  stroke-linecap: round;
 }
 
 .list {
@@ -38,34 +57,38 @@
 }
 
 .card {
+  position: relative;
   border: 1px solid var(--hair-soft);
   border-radius: var(--radius-card);
   background: var(--elev-1);
-  overflow: hidden;
 }
 
 .cardActive {
   border-color: rgba(255, 141, 92, 0.4);
+  background: linear-gradient(
+    180deg,
+    rgba(255, 141, 92, 0.05),
+    rgba(255, 141, 92, 0.015)
+  );
+}
+
+/* Accent edge marks the active model: one signal, no avatar tile. */
+.activeEdge {
+  position: absolute;
+  left: 0;
+  top: 12px;
+  bottom: 12px;
+  width: 2.5px;
+  border-radius: 2px;
+  background: var(--accent);
+  box-shadow: 0 0 8px var(--accent);
 }
 
 .row {
   display: flex;
   align-items: center;
-  gap: 13px;
-  padding: 13px 14px;
-}
-
-.avatar {
-  width: 32px;
-  height: 32px;
-  border-radius: 8px;
-  flex: none;
-  display: grid;
-  place-items: center;
-  background: var(--elev-2);
-  font-weight: 700;
-  color: var(--t2);
-  font-size: 12px;
+  gap: 14px;
+  padding: 14px 14px 14px 16px;
 }
 
 .mid {
@@ -74,60 +97,74 @@
 }
 
 .name {
-  font-weight: 550;
-  font-size: 13px;
+  font-weight: 580;
+  font-size: 13.5px;
   color: var(--t1);
   display: flex;
   align-items: center;
-  gap: 8px;
+  gap: 9px;
 }
 
 .org {
   font-size: 11px;
   color: var(--t3);
   margin-top: 3px;
+  font-variant-numeric: tabular-nums;
 }
 
-.badge {
+.activeBadge {
   display: inline-flex;
   align-items: center;
   font-size: 10.5px;
   font-weight: 560;
   padding: 2px 8px;
-  border-radius: 999px;
-}
-
-.badgeActive {
+  border-radius: var(--radius-pill);
   color: var(--accent);
   background: var(--accent-soft);
 }
 
-.badgeVision {
-  color: var(--vis);
-  background: var(--vis-bg);
+.right {
+  display: flex;
+  align-items: center;
+  gap: 14px;
+  flex: none;
 }
 
-.badgeReason {
+/* Capability tags are text-only (no pill), per the locked design. */
+.tagVision {
+  font-size: 11px;
+  font-weight: 540;
+  color: var(--vis);
+}
+.tagReason {
+  font-size: 11px;
+  font-weight: 540;
   color: var(--rea);
-  background: var(--rea-bg);
 }
 
-.actions {
-  display: flex;
+/* RAM-fit hint: a coloured dot + label reusing the onboarding fit palette. */
+.fit {
+  display: inline-flex;
   align-items: center;
-  gap: 8px;
-  flex: none;
+  gap: 5px;
+  font-size: 11px;
+  font-weight: 560;
 }
-
-.useButton {
-  font-size: 12px;
-  font-weight: 540;
-  color: var(--accent);
-  border: 1px solid rgba(255, 141, 92, 0.45);
-  background: var(--accent-soft);
-  border-radius: 8px;
-  padding: 6px 12px;
-  cursor: pointer;
+.fit::before {
+  content: '';
+  width: 6px;
+  height: 6px;
+  border-radius: 50%;
+  background: currentColor;
+}
+.fitOk {
+  color: var(--ok);
+}
+.fitTight {
+  color: var(--tight);
+}
+.fitHeavy {
+  color: var(--heavy);
 }
 
 .manageButton {
@@ -142,41 +179,81 @@
   font-size: 16px;
   line-height: 1;
   cursor: pointer;
+  transition:
+    color 150ms ease,
+    border-color 150ms ease;
 }
-
-.manageRow {
-  display: flex;
-  align-items: center;
-  gap: 8px;
-  padding: 0 14px 13px;
-  flex-wrap: wrap;
+.manageButton:hover {
+  color: var(--t1);
+  border-color: rgba(255, 141, 92, 0.4);
 }
 
-.confirmText {
-  font-size: 11px;
-  color: var(--t2);
+/* ── Popover menu (replaces the old inline card expand) ─────────────────── */
+
+@keyframes menuIn {
+  from {
+    opacity: 0;
+    transform: scale(0.94) translateY(-4px);
+  }
+  to {
+    opacity: 1;
+    transform: scale(1) translateY(0);
+  }
 }
 
-.deleteButton {
-  font-size: 12px;
-  font-weight: 540;
-  color: #ff6b6b;
-  border: 1px solid rgba(255, 107, 107, 0.4);
-  background: rgba(255, 107, 107, 0.1);
-  border-radius: 8px;
-  padding: 6px 12px;
-  cursor: pointer;
+.menuWrap {
+  position: relative;
 }
 
-.ghostButton {
-  font-size: 12px;
-  font-weight: 540;
-  color: var(--t2);
+.menu {
+  position: absolute;
+  right: 0;
+  top: 36px;
+  z-index: 20;
+  min-width: 208px;
+  padding: 6px;
+  background: rgba(28, 23, 20, 0.98);
   border: 1px solid var(--hair);
-  background: transparent;
+  border-top-color: rgba(255, 141, 92, 0.18);
+  border-radius: 12px;
+  box-shadow:
+    0 14px 34px -10px rgba(0, 0, 0, 0.7),
+    0 0 0 1px rgba(255, 141, 92, 0.08),
+    inset 0 1px 0 rgba(255, 255, 255, 0.05);
+  transform-origin: top right;
+  animation: menuIn 130ms cubic-bezier(0.2, 0.8, 0.3, 1);
+}
+
+.menuItem {
+  display: flex;
+  align-items: center;
+  width: 100%;
+  padding: 8px 10px;
+  border: none;
   border-radius: 8px;
-  padding: 6px 12px;
+  background: transparent;
+  color: var(--t1);
+  font-family: inherit;
+  font-size: 12.5px;
+  font-weight: 500;
+  text-align: left;
   cursor: pointer;
+  transition: background 120ms ease;
+}
+.menuItem:hover {
+  background: var(--elev-2);
+}
+.menuItemDanger {
+  color: var(--danger);
+}
+.menuItemDanger:hover {
+  background: rgba(255, 122, 110, 0.12);
+}
+
+.menuSep {
+  height: 1px;
+  margin: 5px 6px;
+  background: var(--hair-soft);
 }
 
 .empty {
@@ -205,7 +282,7 @@
 
 .error {
   font-size: 11px;
-  color: #ff6b6b;
+  color: var(--danger);
   margin: 8px 4px 0;
 }
 
@@ -219,4 +296,16 @@
   color: var(--t3);
   display: flex;
   justify-content: space-between;
+  font-variant-numeric: tabular-nums;
+}
+
+@media (prefers-reduced-motion: reduce) {
+  .menu {
+    animation: none;
+  }
+  .addButton,
+  .manageButton,
+  .menuItem {
+    transition: none;
+  }
 }
diff --git a/src/settings/tabs/models/LibraryPane.test.tsx b/src/settings/tabs/models/LibraryPane.test.tsx
index 863f742f..732841d1 100644
--- a/src/settings/tabs/models/LibraryPane.test.tsx
+++ b/src/settings/tabs/models/LibraryPane.test.tsx
@@ -1,10 +1,11 @@
 /**
  * Unit tests for the Models surface's Library pane.
  *
- * Covers the installed-model list (active + non-active cards, capability
- * badges), the Use action, the Delete confirm/cancel/success/error flow,
- * the empty state, the free-disk footer, and the defensive guards around
- * the manifest and disk probes.
+ * Covers the installed-model list (active + non-active rows, capability text
+ * tags, RAM-fit hint), the popover menu (Set as active / View on Hugging Face
+ * / Delete), the delete confirm/cancel/success/error flow, menu dismissal
+ * (outside click + Escape), the empty state, the footer, and the defensive
+ * guards around the manifest and disk probes.
  *
  * `invoke` comes from the global Tauri mock; capabilities are fetched
  * through the same `get_model_capabilities` command the hook reads.
@@ -109,8 +110,10 @@ const GEMMA: InstalledModel = {
   display_name: 'gemma',
   size_bytes: 2_489_757_856,
   quant: 'Q4_K_M',
+  fit: 'fits',
 };
 
+// No `fit` here: exercises the "RAM unknown" branch (no fit pill).
 const QWEN: InstalledModel = {
   id: 'org/qwen:qwen.gguf',
   display_name: 'qwen',
@@ -185,6 +188,11 @@ async function renderPane(
   return view;
 }
 
+/** Opens the popover menu for the named model. */
+function openMenu(name: string) {
+  fireEvent.click(screen.getByRole('button', { name: `Manage ${name}` }));
+}
+
 describe('LibraryPane', () => {
   it('lists each installed model with its org line, size, and quant', async () => {
     mockCommands(libraryResponses());
@@ -195,54 +203,57 @@ describe('LibraryPane', () => {
     expect(screen.getByText('org/qwen · 9.0 GB')).toBeInTheDocument();
   });
 
-  it('renders the uppercased first character as each avatar', async () => {
+  it('shows the RAM-fit hint only when the backend provides one', async () => {
     mockCommands(libraryResponses());
     await renderPane();
-    expect(screen.getByText('G')).toBeInTheDocument();
-    expect(screen.getByText('Q')).toBeInTheDocument();
+    // gemma carries fit: 'fits'; qwen has no fit, so only one hint renders.
+    expect(screen.getByText('Comfortable')).toBeInTheDocument();
+    expect(screen.getAllByText('Comfortable')).toHaveLength(1);
   });
 
-  it('marks the active model with an Active badge and no Use button', async () => {
+  it('marks the active model and offers Set as active only on the rest', async () => {
     mockCommands(libraryResponses());
     await renderPane(makeConfig('org/gemma:gemma.gguf'));
     expect(screen.getByText('Active')).toBeInTheDocument();
-    // The active model offers no Use button; the non-active one does.
+    // The non-active model's menu offers Set as active.
+    openMenu('qwen');
     expect(
-      screen.getByRole('button', { name: 'Use qwen' }),
+      screen.getByRole('menuitem', { name: 'Set as active' }),
     ).toBeInTheDocument();
+    // The active model's menu does not.
+    openMenu('gemma');
     expect(
-      screen.queryByRole('button', { name: 'Use gemma' }),
+      screen.queryByRole('menuitem', { name: 'Set as active' }),
     ).not.toBeInTheDocument();
   });
 
-  it('shows a Vision badge only for vision-capable models', async () => {
+  it('shows a Vision tag only for vision-capable models', async () => {
     mockCommands(libraryResponses());
     await renderPane();
-    const vision = screen.getByText('Vision');
-    expect(vision).toBeInTheDocument();
-    // Only gemma is vision-capable, so exactly one Vision badge.
+    expect(screen.getByText('Vision')).toBeInTheDocument();
     expect(screen.getAllByText('Vision')).toHaveLength(1);
   });
 
-  it('shows a Reasoning badge only for thinking-capable models', async () => {
+  it('shows a Reasoning tag only for thinking-capable models', async () => {
     mockCommands(libraryResponses());
     await renderPane();
     expect(screen.getByText('Reasoning')).toBeInTheDocument();
     expect(screen.getAllByText('Reasoning')).toHaveLength(1);
   });
 
-  it('omits capability badges when no map entry exists for a model', async () => {
+  it('omits capability tags when no map entry exists for a model', async () => {
     mockCommands(libraryResponses({ get_model_capabilities: {} }));
     await renderPane();
     expect(screen.queryByText('Vision')).not.toBeInTheDocument();
     expect(screen.queryByText('Reasoning')).not.toBeInTheDocument();
   });
 
-  it('Use commits the model, lifts the config, and refreshes', async () => {
+  it('Set as active commits the model, lifts the config, and refreshes', async () => {
     mockCommands(libraryResponses({ update_provider_field: undefined }));
     const onSaved = vi.fn();
     await renderPane(makeConfig('org/gemma:gemma.gguf'), onSaved);
-    fireEvent.click(screen.getByRole('button', { name: 'Use qwen' }));
+    openMenu('qwen');
+    fireEvent.click(screen.getByRole('menuitem', { name: 'Set as active' }));
     await flush();
     expect(invokeMock).toHaveBeenCalledWith('update_provider_field', {
       providerId: 'builtin',
@@ -252,7 +263,7 @@ describe('LibraryPane', () => {
     expect(onSaved).toHaveBeenCalledWith(NEW_CONFIG);
   });
 
-  it('leaves the lift to the focus resync when Use cannot read the config', async () => {
+  it('leaves the lift to the focus resync when Set as active cannot read the config', async () => {
     mockCommands(
       libraryResponses({
         update_provider_field: undefined,
@@ -261,7 +272,8 @@ describe('LibraryPane', () => {
     );
     const onSaved = vi.fn();
     await renderPane(makeConfig('org/gemma:gemma.gguf'), onSaved);
-    fireEvent.click(screen.getByRole('button', { name: 'Use qwen' }));
+    openMenu('qwen');
+    fireEvent.click(screen.getByRole('menuitem', { name: 'Set as active' }));
     await flush();
     expect(invokeMock).toHaveBeenCalledWith('update_provider_field', {
       providerId: 'builtin',
@@ -271,7 +283,7 @@ describe('LibraryPane', () => {
     expect(onSaved).not.toHaveBeenCalled();
   });
 
-  it('swallows an update_provider_field failure on Use', async () => {
+  it('swallows an update_provider_field failure on Set as active', async () => {
     mockCommands(
       libraryResponses({
         update_provider_field: new Reject(new Error('write failed')),
@@ -279,24 +291,33 @@ describe('LibraryPane', () => {
     );
     const onSaved = vi.fn();
     await renderPane(makeConfig('org/gemma:gemma.gguf'), onSaved);
-    fireEvent.click(screen.getByRole('button', { name: 'Use qwen' }));
+    openMenu('qwen');
+    fireEvent.click(screen.getByRole('menuitem', { name: 'Set as active' }));
     await flush();
     expect(onSaved).not.toHaveBeenCalled();
     expect(screen.getByText('qwen')).toBeInTheDocument();
   });
 
+  it('View on Hugging Face opens the repo page in the system browser', async () => {
+    mockCommands(libraryResponses());
+    await renderPane();
+    openMenu('gemma');
+    fireEvent.click(
+      screen.getByRole('menuitem', { name: 'View on Hugging Face' }),
+    );
+    expect(invokeMock).toHaveBeenCalledWith('open_url', {
+      url: 'https://huggingface.co/org/gemma',
+    });
+  });
+
   it('Delete asks for confirmation and Cancel backs out without deleting', async () => {
     mockCommands(libraryResponses());
     await renderPane();
-    fireEvent.click(screen.getByRole('button', { name: 'Manage gemma' }));
-    fireEvent.click(screen.getByRole('button', { name: 'Delete gemma' }));
-    expect(
-      screen.getByText('Delete gemma? Its files are removed from disk.'),
-    ).toBeInTheDocument();
+    openMenu('gemma');
+    fireEvent.click(screen.getByRole('menuitem', { name: 'Delete gemma' }));
+    expect(screen.getByText('Delete gemma?')).toBeInTheDocument();
     fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
-    expect(
-      screen.queryByText('Delete gemma? Its files are removed from disk.'),
-    ).not.toBeInTheDocument();
+    expect(screen.queryByText('Delete gemma?')).not.toBeInTheDocument();
     expect(invokeMock).not.toHaveBeenCalledWith(
       'delete_installed_model',
       expect.anything(),
@@ -316,9 +337,9 @@ describe('LibraryPane', () => {
     );
     const onSaved = vi.fn();
     await renderPane(makeConfig(''), onSaved);
-    fireEvent.click(screen.getByRole('button', { name: 'Manage gemma' }));
-    fireEvent.click(screen.getByRole('button', { name: 'Delete gemma' }));
-    fireEvent.click(screen.getByRole('button', { name: 'Confirm delete' }));
+    openMenu('gemma');
+    fireEvent.click(screen.getByRole('menuitem', { name: 'Delete gemma' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Delete' }));
     await flush();
     expect(invokeMock).toHaveBeenCalledWith('delete_installed_model', {
       id: 'org/gemma:gemma.gguf',
@@ -337,9 +358,9 @@ describe('LibraryPane', () => {
     );
     const onSaved = vi.fn();
     await renderPane(makeConfig(''), onSaved);
-    fireEvent.click(screen.getByRole('button', { name: 'Manage qwen' }));
-    fireEvent.click(screen.getByRole('button', { name: 'Delete qwen' }));
-    fireEvent.click(screen.getByRole('button', { name: 'Confirm delete' }));
+    openMenu('qwen');
+    fireEvent.click(screen.getByRole('menuitem', { name: 'Delete qwen' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Delete' }));
     await flush();
     expect(invokeMock).toHaveBeenCalledWith('delete_installed_model', {
       id: 'org/qwen:qwen.gguf',
@@ -352,9 +373,9 @@ describe('LibraryPane', () => {
       libraryResponses({ delete_installed_model: new Reject('file busy') }),
     );
     await renderPane();
-    fireEvent.click(screen.getByRole('button', { name: 'Manage gemma' }));
-    fireEvent.click(screen.getByRole('button', { name: 'Delete gemma' }));
-    fireEvent.click(screen.getByRole('button', { name: 'Confirm delete' }));
+    openMenu('gemma');
+    fireEvent.click(screen.getByRole('menuitem', { name: 'Delete gemma' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Delete' }));
     await flush();
     expect(screen.getByRole('alert')).toHaveTextContent('file busy');
     expect(screen.getByText('gemma')).toBeInTheDocument();
@@ -392,19 +413,21 @@ describe('LibraryPane', () => {
   it('shows the free-disk footer and the model count when both are known', async () => {
     mockCommands(libraryResponses());
     await renderPane();
-    expect(screen.getByText('30.4 GB free on disk')).toBeInTheDocument();
-    expect(
-      screen.getByText('2 models · capabilities detected automatically'),
-    ).toBeInTheDocument();
+    expect(screen.getByText('30.4 GB free')).toBeInTheDocument();
+    expect(screen.getByText('2 models installed')).toBeInTheDocument();
+  });
+
+  it('singularises the model count for a single install', async () => {
+    mockCommands(libraryResponses({ list_installed_models: [GEMMA] }));
+    await renderPane();
+    expect(screen.getByText('1 model installed')).toBeInTheDocument();
   });
 
   it('hides the free-disk line when the probe returns a non-number', async () => {
     mockCommands(libraryResponses({ get_models_dir_free_bytes: null }));
     await renderPane();
-    expect(screen.queryByText(/free on disk/)).not.toBeInTheDocument();
-    expect(
-      screen.getByText('2 models · capabilities detected automatically'),
-    ).toBeInTheDocument();
+    expect(screen.queryByText(/free/)).not.toBeInTheDocument();
+    expect(screen.getByText('2 models installed')).toBeInTheDocument();
   });
 
   it('hides the free-disk line when the disk probe rejects', async () => {
@@ -414,7 +437,7 @@ describe('LibraryPane', () => {
       }),
     );
     await renderPane();
-    expect(screen.queryByText(/free on disk/)).not.toBeInTheDocument();
+    expect(screen.queryByText(/free/)).not.toBeInTheDocument();
   });
 
   it('renders the top-right Add model button and routes it to onAddModel', async () => {
@@ -428,7 +451,7 @@ describe('LibraryPane', () => {
   it('treats every model as non-active when no builtin provider exists', async () => {
     mockCommands(libraryResponses());
     // A config whose only provider is Ollama: the builtin lookup misses and
-    // the active model falls back to "", so no card is Active and both get Use.
+    // the active model falls back to "", so no row is Active.
     const ollamaOnly: RawAppConfig = {
       ...BASE_CONFIG,
       inference: {
@@ -438,37 +461,73 @@ describe('LibraryPane', () => {
     };
     await renderPane(ollamaOnly);
     expect(screen.queryByText('Active')).not.toBeInTheDocument();
+    openMenu('gemma');
     expect(
-      screen.getByRole('button', { name: 'Use gemma' }),
-    ).toBeInTheDocument();
-    expect(
-      screen.getByRole('button', { name: 'Use qwen' }),
+      screen.getByRole('menuitem', { name: 'Set as active' }),
     ).toBeInTheDocument();
   });
 
-  it('toggles the Manage menu closed when its own button is clicked again', async () => {
+  it('toggles the popover closed when its own button is clicked again', async () => {
     mockCommands(libraryResponses());
     await renderPane();
     const manage = screen.getByRole('button', { name: 'Manage gemma' });
     fireEvent.click(manage);
     expect(
-      screen.getByRole('button', { name: 'Delete gemma' }),
+      screen.getByRole('menuitem', { name: 'Delete gemma' }),
     ).toBeInTheDocument();
-    // A second click on the same Manage button collapses the row.
     fireEvent.click(manage);
     expect(
-      screen.queryByRole('button', { name: 'Delete gemma' }),
+      screen.queryByRole('menuitem', { name: 'Delete gemma' }),
+    ).not.toBeInTheDocument();
+  });
+
+  it('closes the popover on an outside click', async () => {
+    mockCommands(libraryResponses());
+    await renderPane();
+    openMenu('gemma');
+    expect(
+      screen.getByRole('menuitem', { name: 'Delete gemma' }),
+    ).toBeInTheDocument();
+    fireEvent.mouseDown(document.body);
+    expect(
+      screen.queryByRole('menuitem', { name: 'Delete gemma' }),
+    ).not.toBeInTheDocument();
+  });
+
+  it('closes the popover on Escape but ignores other keys', async () => {
+    mockCommands(libraryResponses());
+    await renderPane();
+    openMenu('gemma');
+    fireEvent.keyDown(document.body, { key: 'a' });
+    expect(
+      screen.getByRole('menuitem', { name: 'Delete gemma' }),
+    ).toBeInTheDocument();
+    fireEvent.keyDown(document.body, { key: 'Escape' });
+    expect(
+      screen.queryByRole('menuitem', { name: 'Delete gemma' }),
     ).not.toBeInTheDocument();
   });
 
+  it('keeps the popover open when clicking inside it', async () => {
+    mockCommands(libraryResponses());
+    await renderPane();
+    openMenu('gemma');
+    fireEvent.mouseDown(
+      screen.getByRole('menuitem', { name: 'View on Hugging Face' }),
+    );
+    expect(
+      screen.getByRole('menuitem', { name: 'View on Hugging Face' }),
+    ).toBeInTheDocument();
+  });
+
   it('clears a stale delete error once a later delete succeeds', async () => {
     mockCommands(
       libraryResponses({ delete_installed_model: new Reject('file busy') }),
     );
     await renderPane();
-    fireEvent.click(screen.getByRole('button', { name: 'Manage gemma' }));
-    fireEvent.click(screen.getByRole('button', { name: 'Delete gemma' }));
-    fireEvent.click(screen.getByRole('button', { name: 'Confirm delete' }));
+    openMenu('gemma');
+    fireEvent.click(screen.getByRole('menuitem', { name: 'Delete gemma' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Delete' }));
     await flush();
     expect(screen.getByRole('alert')).toHaveTextContent('file busy');
 
@@ -478,9 +537,9 @@ describe('LibraryPane', () => {
         delete_installed_model: undefined,
       }),
     );
-    fireEvent.click(screen.getByRole('button', { name: 'Manage gemma' }));
-    fireEvent.click(screen.getByRole('button', { name: 'Delete gemma' }));
-    fireEvent.click(screen.getByRole('button', { name: 'Confirm delete' }));
+    openMenu('gemma');
+    fireEvent.click(screen.getByRole('menuitem', { name: 'Delete gemma' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Delete' }));
     await waitFor(() =>
       expect(screen.queryByRole('alert')).not.toBeInTheDocument(),
     );
diff --git a/src/settings/tabs/models/LibraryPane.tsx b/src/settings/tabs/models/LibraryPane.tsx
index c436d618..8dce99ed 100644
--- a/src/settings/tabs/models/LibraryPane.tsx
+++ b/src/settings/tabs/models/LibraryPane.tsx
@@ -1,22 +1,39 @@
 /**
  * Library pane of the Models surface: the user's installed local models.
  *
- * Each downloaded model shows as a card with its name, capability badges
- * (Vision / Reasoning, detected automatically), and its Hugging Face repo,
- * quantisation, and size. The currently selected built-in model is marked
- * Active; any other model offers a Use button that makes it the active one.
- * A per-card Manage menu reveals an inline Delete confirm that removes the
- * model from disk. When nothing is installed the pane invites the user over
- * to Discover; a footer reports free disk space and the model count.
+ * Each downloaded model shows as a quiet row: its name, an Active state, the
+ * Hugging Face repo / quantisation / size, capability text tags (Vision /
+ * Reasoning, detected automatically), and a RAM-fit hint for this Mac. A ⋮
+ * button opens a floating popover (Set as active / View on Hugging Face /
+ * Delete) instead of expanding the card. Delete routes through a confirm
+ * dialog. When nothing is installed the pane invites the user over to
+ * Discover; a footer reports the model count and free disk space.
  */
 
 import { useCallback, useEffect, useState } from 'react';
 import { invoke } from '@tauri-apps/api/core';
 
 import { useModelCapabilities } from '../../../hooks/useModelCapabilities';
+import { ConfirmDialog } from '../../components';
 import styles from './LibraryPane.module.css';
 import type { RawAppConfig } from '../../types';
-import type { InstalledModel } from '../../../types/starter';
+import type { InstalledModel, RamFit } from '../../../types/starter';
+
+const HF_BASE_URL = 'https://huggingface.co';
+
+/** RAM-fit hint label shown next to a model. */
+const FIT_LABEL: Record<RamFit, string> = {
+  fits: 'Comfortable',
+  tight: 'Tight',
+  too_big: 'Heavy',
+};
+
+/** RAM-fit hint colour class on this pane's stylesheet. */
+const FIT_CLASS: Record<RamFit, string> = {
+  fits: styles.fitOk,
+  tight: styles.fitTight,
+  too_big: styles.fitHeavy,
+};
 
 /** Bytes rendered as decimal gigabytes with one decimal (e.g. "8.2"). */
 function gb(bytes: number): string {
@@ -37,8 +54,8 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
 
   const [installed, setInstalled] = useState<InstalledModel[]>([]);
   const [freeDiskBytes, setFreeDiskBytes] = useState<number | null>(null);
-  const [managing, setManaging] = useState<string | null>(null);
-  const [confirmingDelete, setConfirmingDelete] = useState<string | null>(null);
+  const [openMenu, setOpenMenu] = useState<string | null>(null);
+  const [confirmDelete, setConfirmDelete] = useState<string | null>(null);
   const [deleteError, setDeleteError] = useState<string | null>(null);
 
   const { capabilities } = useModelCapabilities();
@@ -63,9 +80,30 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
       });
   }, [refreshInstalled]);
 
+  // Close the popover on an outside click or Escape so it behaves like a real
+  // menu rather than a sticky panel.
+  useEffect(() => {
+    if (openMenu === null) return;
+    const onDown = (e: MouseEvent) => {
+      if (!(e.target as HTMLElement).closest('[data-menu-root]')) {
+        setOpenMenu(null);
+      }
+    };
+    const onKey = (e: KeyboardEvent) => {
+      if (e.key === 'Escape') setOpenMenu(null);
+    };
+    document.addEventListener('mousedown', onDown);
+    document.addEventListener('keydown', onKey);
+    return () => {
+      document.removeEventListener('mousedown', onDown);
+      document.removeEventListener('keydown', onKey);
+    };
+  }, [openMenu]);
+
   // The backend writes the builtin provider's model field; lift the fresh
-  // snapshot so the active card moves without a tab remount.
+  // snapshot so the active row moves without a tab remount.
   function selectModel(id: string) {
+    setOpenMenu(null);
     void invoke('update_provider_field', {
       providerId: 'builtin',
       field: 'model',
@@ -80,12 +118,16 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
       });
   }
 
+  function openHuggingFace(id: string) {
+    setOpenMenu(null);
+    void invoke('open_url', { url: `${HF_BASE_URL}/${id.split(':')[0]}` });
+  }
+
   // Deletion is refcounted server-side; the backend also clears the builtin
   // provider's model field when the deleted model was the selected one, so
   // the lifted snapshot is the source of truth.
   async function handleDelete(id: string) {
-    setConfirmingDelete(null);
-    setManaging(null);
+    setConfirmDelete(null);
     try {
       await invoke('delete_installed_model', { id });
     } catch (err) {
@@ -101,10 +143,15 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
     }
   }
 
+  const confirmModel = installed.find((m) => m.id === confirmDelete);
+
   return (
     <div className={styles.pane}>
       <div className={styles.bar}>
         <button type="button" className={styles.addButton} onClick={onAddModel}>
+          <svg viewBox="0 0 24 24" aria-hidden="true" className={styles.addIcon}>
+            <path d="M12 5v14M5 12h14" />
+          </svg>
           Add model
         </button>
       </div>
@@ -131,33 +178,13 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
                 key={m.id}
                 className={`${styles.card} ${active ? styles.cardActive : ''}`}
               >
+                {active ? <span className={styles.activeEdge} /> : null}
                 <div className={styles.row}>
-                  <div className={styles.avatar}>
-                    {m.display_name.charAt(0).toUpperCase()}
-                  </div>
                   <div className={styles.mid}>
                     <div className={styles.name}>
                       {m.display_name}
                       {active ? (
-                        <span
-                          className={`${styles.badge} ${styles.badgeActive}`}
-                        >
-                          Active
-                        </span>
-                      ) : null}
-                      {caps?.vision ? (
-                        <span
-                          className={`${styles.badge} ${styles.badgeVision}`}
-                        >
-                          Vision
-                        </span>
-                      ) : null}
-                      {caps?.thinking ? (
-                        <span
-                          className={`${styles.badge} ${styles.badgeReason}`}
-                        >
-                          Reasoning
-                        </span>
+                        <span className={styles.activeBadge}>Active</span>
                       ) : null}
                     </div>
                     <div className={styles.org}>
@@ -166,66 +193,69 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
                       {gb(m.size_bytes)} GB
                     </div>
                   </div>
-                  <div className={styles.actions}>
-                    {active ? null : (
+                  <div className={styles.right}>
+                    {m.fit ? (
+                      <span className={`${styles.fit} ${FIT_CLASS[m.fit]}`}>
+                        {FIT_LABEL[m.fit]}
+                      </span>
+                    ) : null}
+                    {caps?.vision ? (
+                      <span className={styles.tagVision}>Vision</span>
+                    ) : null}
+                    {caps?.thinking ? (
+                      <span className={styles.tagReason}>Reasoning</span>
+                    ) : null}
+                    <div className={styles.menuWrap} data-menu-root>
                       <button
                         type="button"
-                        className={styles.useButton}
-                        aria-label={`Use ${m.display_name}`}
-                        onClick={() => selectModel(m.id)}
+                        className={styles.manageButton}
+                        aria-label={`Manage ${m.display_name}`}
+                        aria-haspopup="menu"
+                        aria-expanded={openMenu === m.id}
+                        onClick={() =>
+                          setOpenMenu((cur) => (cur === m.id ? null : m.id))
+                        }
                       >
-                        Use
+                        ⋮
                       </button>
-                    )}
-                    <button
-                      type="button"
-                      className={styles.manageButton}
-                      aria-label={`Manage ${m.display_name}`}
-                      onClick={() =>
-                        setManaging((cur) => (cur === m.id ? null : m.id))
-                      }
-                    >
-                      ⋮
-                    </button>
+                      {openMenu === m.id ? (
+                        <div className={styles.menu} role="menu">
+                          {active ? null : (
+                            <button
+                              type="button"
+                              role="menuitem"
+                              className={styles.menuItem}
+                              onClick={() => selectModel(m.id)}
+                            >
+                              Set as active
+                            </button>
+                          )}
+                          <button
+                            type="button"
+                            role="menuitem"
+                            className={styles.menuItem}
+                            onClick={() => openHuggingFace(m.id)}
+                          >
+                            View on Hugging Face
+                          </button>
+                          <div className={styles.menuSep} />
+                          <button
+                            type="button"
+                            role="menuitem"
+                            className={`${styles.menuItem} ${styles.menuItemDanger}`}
+                            aria-label={`Delete ${m.display_name}`}
+                            onClick={() => {
+                              setOpenMenu(null);
+                              setConfirmDelete(m.id);
+                            }}
+                          >
+                            Delete
+                          </button>
+                        </div>
+                      ) : null}
+                    </div>
                   </div>
                 </div>
-
-                {managing === m.id ? (
-                  <div className={styles.manageRow}>
-                    {confirmingDelete === m.id ? (
-                      <>
-                        <span className={styles.confirmText}>
-                          Delete {m.display_name}? Its files are removed from
-                          disk.
-                        </span>
-                        <button
-                          type="button"
-                          className={styles.deleteButton}
-                          aria-label="Confirm delete"
-                          onClick={() => void handleDelete(m.id)}
-                        >
-                          Delete
-                        </button>
-                        <button
-                          type="button"
-                          className={styles.ghostButton}
-                          onClick={() => setConfirmingDelete(null)}
-                        >
-                          Cancel
-                        </button>
-                      </>
-                    ) : (
-                      <button
-                        type="button"
-                        className={styles.deleteButton}
-                        aria-label={`Delete ${m.display_name}`}
-                        onClick={() => setConfirmingDelete(m.id)}
-                      >
-                        Delete
-                      </button>
-                    )}
-                  </div>
-                ) : null}
               </div>
             );
           })}
@@ -240,12 +270,24 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
 
       <div className={styles.footer}>
         <span>
-          {freeDiskBytes !== null ? `${gb(freeDiskBytes)} GB free on disk` : ''}
+          {installed.length} model{installed.length === 1 ? '' : 's'} installed
         </span>
         <span>
-          {installed.length} models · capabilities detected automatically
+          {freeDiskBytes !== null ? `${gb(freeDiskBytes)} GB free` : ''}
         </span>
       </div>
+
+      {confirmModel ? (
+        <ConfirmDialog
+          open
+          title={`Delete ${confirmModel.display_name}?`}
+          message="Its files are removed from disk."
+          confirmLabel="Delete"
+          destructive
+          onConfirm={() => void handleDelete(confirmModel.id)}
+          onCancel={() => setConfirmDelete(null)}
+        />
+      ) : null}
     </div>
   );
 }
diff --git a/src/styles/settings.module.css b/src/styles/settings.module.css
index c3732a1c..43f2053b 100644
--- a/src/styles/settings.module.css
+++ b/src/styles/settings.module.css
@@ -46,6 +46,11 @@
   --rea-bg: rgba(185, 164, 240, 0.1);
   --ok: #79c08e;
   --tight: #e6b56b;
+  /* RAM-fit "Heavy" (model larger than this Mac comfortably holds) and the
+   * single destructive accent (Delete). One token each so the red is not
+   * re-spelled as a literal across the model panes. */
+  --heavy: #ef6b6b;
+  --danger: #ff7a6e;
   --radius-card: 10px;
   --radius-control: 8px;
   --radius-pill: 999px;
diff --git a/src/types/starter.ts b/src/types/starter.ts
index 094ab21e..bb1506a7 100644
--- a/src/types/starter.ts
+++ b/src/types/starter.ts
@@ -83,12 +83,17 @@ export interface InstalledModel {
   size_bytes: number;
   /** Quantisation label (e.g. "Q4_K_M"); empty when unknown. */
   quant: string;
+  /** RAM-fit on this Mac, computed by the backend from the recorded size.
+   * `null`/absent when host RAM or the size is unknown. */
+  fit?: RamFit | null;
 }
 
-/** One `.gguf` row from `list_hf_repo_ggufs`, for the paste-a-repo browser. */
+/** One `.gguf` row from `list_hf_repo_ggufs`, for the paste-a-repo browser.
+ * `fit` is the accurate per-quant RAM-fit computed from the real file size. */
 export interface HfGgufFile {
   file: string;
   size_bytes: number;
+  fit?: RamFit | null;
 }
 
 /** Engine lifecycle snapshot published on the `engine:status` event. */

From 60ad330150a85448adcbbbc566804b1b64207217 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 14:34:58 -0500
Subject: [PATCH 10/89] feat: redesign the Discover pane with RAM-fit, HF
 links, icon download, and Load more

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 .../tabs/models/DiscoverPane.module.css       | 138 +++++++++++++-----
 .../tabs/models/DiscoverPane.test.tsx         | 119 +++++++++++----
 src/settings/tabs/models/DiscoverPane.tsx     |  89 ++++++++---
 src/settings/tabs/models/useHfSearch.test.ts  |  54 ++++++-
 src/settings/tabs/models/useHfSearch.ts       |  44 +++++-
 src/types/hf.ts                               |  19 ++-
 6 files changed, 368 insertions(+), 95 deletions(-)

diff --git a/src/settings/tabs/models/DiscoverPane.module.css b/src/settings/tabs/models/DiscoverPane.module.css
index bed658fd..ace090fb 100644
--- a/src/settings/tabs/models/DiscoverPane.module.css
+++ b/src/settings/tabs/models/DiscoverPane.module.css
@@ -54,16 +54,6 @@
   color: var(--t3);
 }
 
-.kbd {
-  margin-left: auto;
-  flex: none;
-  font-size: 11px;
-  color: var(--t3);
-  border: 1px solid var(--hair);
-  border-radius: 5px;
-  padding: 2px 6px;
-}
-
 /* ── Family filter chips ──────────────────────────────────────────────── */
 
 .chips {
@@ -140,20 +130,6 @@
   background: var(--elev-1);
 }
 
-.av {
-  width: 28px;
-  height: 28px;
-  border-radius: 7px;
-  flex: none;
-  display: grid;
-  place-items: center;
-  background: var(--elev-2);
-  font-weight: 700;
-  color: var(--t2);
-  font-size: 11px;
-  text-transform: uppercase;
-}
-
 .mid {
   flex: 1;
   min-width: 0;
@@ -186,30 +162,122 @@
   background: rgba(230, 181, 107, 0.1);
 }
 
+/* RAM-fit hint (estimated on the row, accurate per-quant in the accordion). */
+.fit {
+  flex: none;
+  display: inline-flex;
+  align-items: center;
+  gap: 5px;
+  font-size: 11px;
+  font-weight: 560;
+}
+.fit::before {
+  content: '';
+  width: 6px;
+  height: 6px;
+  border-radius: 50%;
+  background: currentColor;
+}
+.fitOk {
+  color: var(--ok);
+}
+.fitTight {
+  color: var(--tight);
+}
+.fitHeavy {
+  color: var(--heavy);
+}
+
+/* Icon-only link out to the repo's Hugging Face page. */
+.extlink {
+  flex: none;
+  width: 28px;
+  height: 28px;
+  display: grid;
+  place-items: center;
+  border: none;
+  border-radius: 7px;
+  background: transparent;
+  color: var(--t3);
+  cursor: pointer;
+  transition:
+    color 140ms ease,
+    background 140ms ease;
+}
+.extlink svg {
+  width: 14px;
+  height: 14px;
+  stroke: currentColor;
+  stroke-width: 1.7;
+  fill: none;
+  stroke-linecap: round;
+  stroke-linejoin: round;
+}
+.extlink:hover {
+  color: var(--accent);
+  background: var(--elev-1);
+}
+
+/* Icon-only download / open-quants button. */
 .get {
   flex: none;
-  font-size: 11.5px;
-  font-weight: 540;
-  color: var(--t2);
-  border: 1px solid var(--hair);
+  width: 30px;
+  height: 30px;
+  display: grid;
+  place-items: center;
+  color: var(--accent);
+  border: 1px solid rgba(255, 141, 92, 0.4);
   border-radius: var(--radius-control);
-  padding: 5px 12px;
   background: transparent;
-  font-family: inherit;
   cursor: pointer;
+  transition:
+    color 140ms ease,
+    border-color 140ms ease,
+    background 140ms ease;
+}
+.get svg {
+  width: 15px;
+  height: 15px;
+  stroke: currentColor;
+  stroke-width: 1.7;
+  fill: none;
+  stroke-linecap: round;
+  stroke-linejoin: round;
 }
-
 .get:hover:not(:disabled) {
-  color: var(--t1);
-  border-color: rgba(255, 141, 92, 0.5);
+  border-color: rgba(255, 141, 92, 0.6);
   background: var(--accent-soft);
 }
-
 .get:disabled {
-  opacity: 0.45;
+  color: var(--t3);
+  border-color: var(--hair-soft);
+  opacity: 0.6;
   cursor: default;
 }
 
+/* "Load more" pages past the first batch of results. */
+.loadMore {
+  display: block;
+  width: 100%;
+  margin-top: 10px;
+  padding: 10px;
+  border: 1px dashed var(--hair);
+  border-radius: 10px;
+  background: transparent;
+  color: var(--t2);
+  font-family: inherit;
+  font-size: 12px;
+  font-weight: 540;
+  cursor: pointer;
+  transition:
+    color 140ms ease,
+    border-color 140ms ease;
+}
+.loadMore:hover {
+  color: var(--accent);
+  border-color: rgba(255, 141, 92, 0.4);
+}
+
 /* ── Accordion: per-repo quant choices ────────────────────────────────── */
 
 .expand {
diff --git a/src/settings/tabs/models/DiscoverPane.test.tsx b/src/settings/tabs/models/DiscoverPane.test.tsx
index ec5cb988..ce77df18 100644
--- a/src/settings/tabs/models/DiscoverPane.test.tsx
+++ b/src/settings/tabs/models/DiscoverPane.test.tsx
@@ -1,12 +1,13 @@
 /**
  * Unit tests for the Discover pane: the in-app Hugging Face GGUF browser.
  *
- * Covers the search field wiring, family filter chips, the result list rows
- * (avatar/org parsing, gated rows), the per-row quant accordion (expand,
- * empty repo, list error), and the download flow (start, progress, ready ->
- * onSaved + collapse, cancel, retry). The download channel is captured the
- * same way ProviderCards.test.tsx does it: `onEvent` is grabbed off the
- * invoke args and driven with `simulateMessage`.
+ * Covers the search field wiring, family filter chips, the result rows (org
+ * parsing, gated rows, RAM-fit hint, the Hugging Face link), pagination (Load
+ * more), the per-row quant accordion (expand, per-quant fit, empty repo, list
+ * error), and the download flow (start, progress, ready -> onSaved + collapse,
+ * cancel, retry). The download channel is captured the same way
+ * ProviderCards.test.tsx does it: `onEvent` is grabbed off the invoke args and
+ * driven with `simulateMessage`.
  */
 
 import {
@@ -22,7 +23,7 @@ import { beforeEach, afterEach, describe, expect, it, vi } from 'vitest';
 import { invoke } from '@tauri-apps/api/core';
 
 import { DiscoverPane } from './DiscoverPane';
-import { HF_SEARCH_DEBOUNCE_MS } from './useHfSearch';
+import { HF_SEARCH_DEBOUNCE_MS, HF_PAGE_SIZE } from './useHfSearch';
 import type { HfModelSummary } from '../../../types/hf';
 import type { HfGgufFile } from '../../../types/starter';
 import type { RawAppConfig } from '../../types';
@@ -62,13 +63,19 @@ function mockCommands(responses: Record<string, unknown>) {
 }
 
 const RESULTS: HfModelSummary[] = [
-  { id: 'google/gemma-4-12b-it-GGUF', downloads: 1_200_000, gated: false },
+  {
+    id: 'google/gemma-4-12b-it-GGUF',
+    downloads: 1_200_000,
+    gated: false,
+    est_runtime_gb: 9.5,
+    fit: 'fits',
+  },
   { id: 'unsloth/gemma-4-27b-it-GGUF', downloads: 410_000, gated: false },
   { id: 'meta-llama/Llama-3-8B-GGUF', downloads: 9_000, gated: true },
 ];
 
 const GGUFS: HfGgufFile[] = [
-  { file: 'gemma-q4.gguf', size_bytes: 5_000_000_000 },
+  { file: 'gemma-q4.gguf', size_bytes: 5_000_000_000, fit: 'tight' },
   { file: 'gemma-q8.gguf', size_bytes: 9_000_000_000 },
 ];
 
@@ -111,19 +118,19 @@ async function renderPane(
   mockCommands(discoverResponses(overrides));
   const view = render(<DiscoverPane onSaved={onSaved} />);
   await waitFor(() =>
-    expect(invokeMock).toHaveBeenCalledWith('search_hf_models', { query: '' }),
+    expect(invokeMock).toHaveBeenCalledWith('search_hf_models', {
+      query: '',
+      limit: HF_PAGE_SIZE,
+    }),
   );
   await flush();
   return view;
 }
 
 describe('DiscoverPane', () => {
-  it('renders a row per search result with parsed avatar and org line', async () => {
+  it('renders a row per search result with the repo id and org line', async () => {
     await renderPane();
     expect(screen.getByText('google/gemma-4-12b-it-GGUF')).toBeInTheDocument();
-    // Avatar is the first letter of the org segment.
-    expect(screen.getByText('g', { selector: '*' })).toBeTruthy();
-    // Org + formatted downloads sub-line.
     expect(
       screen.getByText('google · 1,200,000 downloads'),
     ).toBeInTheDocument();
@@ -132,10 +139,17 @@ describe('DiscoverPane', () => {
 
   it('shows the result count in the sub-bar', async () => {
     await renderPane();
-    expect(screen.getByText(/GGUF models/)).toHaveTextContent('3 GGUF models');
+    expect(screen.getByText(/chat models/)).toHaveTextContent('3 chat models');
   });
 
-  it('renders the avatar from the full id when it has no org segment', async () => {
+  it('shows the estimated RAM-fit on a row when the backend provides one', async () => {
+    await renderPane();
+    // Only the first result carries a fit estimate.
+    expect(screen.getByText('Comfortable')).toBeInTheDocument();
+    expect(screen.getAllByText('Comfortable')).toHaveLength(1);
+  });
+
+  it('parses the org line from the full id when it has no org segment', async () => {
     await renderPane(() => {}, {
       search_hf_models: [
         { id: 'standalone-repo', downloads: 12, gated: false },
@@ -172,6 +186,7 @@ describe('DiscoverPane', () => {
     });
     expect(invokeMock).toHaveBeenCalledWith('search_hf_models', {
       query: 'qwen',
+      limit: HF_PAGE_SIZE,
     });
   });
 
@@ -191,6 +206,7 @@ describe('DiscoverPane', () => {
     });
     expect(invokeMock).toHaveBeenCalledWith('search_hf_models', {
       query: 'Llama',
+      limit: HF_PAGE_SIZE,
     });
     expect(screen.getByRole('button', { name: 'Llama' })).toHaveAttribute(
       'aria-pressed',
@@ -206,7 +222,6 @@ describe('DiscoverPane', () => {
       vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
       await Promise.resolve();
     });
-    // All is the active chip while the query is empty.
     expect(screen.getByRole('button', { name: 'All' })).toHaveAttribute(
       'aria-pressed',
       'true',
@@ -226,7 +241,10 @@ describe('DiscoverPane', () => {
       vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
       await Promise.resolve();
     });
-    expect(invokeMock).toHaveBeenCalledWith('search_hf_models', { query: '' });
+    expect(invokeMock).toHaveBeenCalledWith('search_hf_models', {
+      query: '',
+      limit: HF_PAGE_SIZE,
+    });
   });
 
   it('renders every family chip', async () => {
@@ -244,7 +262,19 @@ describe('DiscoverPane', () => {
     }
   });
 
-  it('disables Get and shows a gated note for a gated repo', async () => {
+  it('opens the repo on Hugging Face from the row link', async () => {
+    await renderPane();
+    fireEvent.click(
+      screen.getByRole('button', {
+        name: 'View google/gemma-4-12b-it-GGUF on Hugging Face',
+      }),
+    );
+    expect(invokeMock).toHaveBeenCalledWith('open_url', {
+      url: 'https://huggingface.co/google/gemma-4-12b-it-GGUF',
+    });
+  });
+
+  it('disables the download button and shows a Gated badge for a gated repo', async () => {
     await renderPane();
     const gatedRow = screen
       .getByText('meta-llama/Llama-3-8B-GGUF')
@@ -252,10 +282,10 @@ describe('DiscoverPane', () => {
     expect(
       within(gatedRow).getByRole('button', { name: 'Get' }),
     ).toBeDisabled();
-    expect(within(gatedRow).getByText('gated')).toBeInTheDocument();
+    expect(within(gatedRow).getByText('Gated')).toBeInTheDocument();
   });
 
-  it('expanding a row lists each GGUF file with its size', async () => {
+  it('expanding a row lists each GGUF file with its size and per-quant fit', async () => {
     await renderPane();
     const row = screen
       .getByText('google/gemma-4-12b-it-GGUF')
@@ -267,11 +297,13 @@ describe('DiscoverPane', () => {
     });
     expect(screen.getByText('gemma-q4.gguf')).toBeInTheDocument();
     expect(screen.getByText('5.0 GB')).toBeInTheDocument();
+    // The first quant carries an accurate per-quant fit; the second does not.
+    expect(within(row).getByText('Tight')).toBeInTheDocument();
     expect(screen.getByText('gemma-q8.gguf')).toBeInTheDocument();
     expect(screen.getByText('9.0 GB')).toBeInTheDocument();
   });
 
-  it('collapses an expanded row when Get is clicked again', async () => {
+  it('collapses an expanded row when the download button is clicked again', async () => {
     await renderPane();
     const row = screen
       .getByText('google/gemma-4-12b-it-GGUF')
@@ -323,7 +355,6 @@ describe('DiscoverPane', () => {
       .closest('[data-row]') as HTMLElement;
     fireEvent.click(within(row).getByRole('button', { name: 'Get' }));
     await flush();
-    // Download the second quant.
     const downloadButtons = screen.getAllByRole('button', {
       name: 'Download',
     });
@@ -336,7 +367,6 @@ describe('DiscoverPane', () => {
         file: 'gemma-q8.gguf',
       }),
     );
-    // Progress is shown via DownloadProgress.
     act(() => {
       lastChannel?.simulateMessage({
         type: 'Started',
@@ -348,7 +378,6 @@ describe('DiscoverPane', () => {
       });
     });
     expect(screen.getByText('Downloading model')).toBeInTheDocument();
-    // Completion lifts the fresh config and collapses the accordion.
     act(() => {
       lastChannel?.simulateMessage({ type: 'AllDone' });
     });
@@ -415,7 +444,6 @@ describe('DiscoverPane', () => {
       (c: unknown[]) => c[0] === 'download_repo_model',
     );
     expect(repoDownloads).toHaveLength(2);
-    // Choose another returns to the quant list.
     act(() => {
       lastChannel?.simulateMessage({
         type: 'Failed',
@@ -449,6 +477,43 @@ describe('DiscoverPane', () => {
   it('shows a no-results message when the search returns nothing', async () => {
     await renderPane(() => {}, { search_hf_models: [] });
     expect(screen.getByText('No models found.')).toBeInTheDocument();
-    expect(screen.getByText(/GGUF models/)).toHaveTextContent('0 GGUF models');
+    expect(screen.getByText(/chat models/)).toHaveTextContent('0 chat models');
+  });
+
+  it('offers Load more on a full page and pages to the next batch', async () => {
+    vi.useFakeTimers();
+    const full = (n: number): HfModelSummary[] =>
+      Array.from({ length: n }, (_, i) => ({
+        id: `org/repo-${i}-GGUF`,
+        downloads: n - i,
+        gated: false,
+      }));
+    mockCommands(discoverResponses({ search_hf_models: full(HF_PAGE_SIZE) }));
+    render(<DiscoverPane onSaved={() => {}} />);
+    await act(async () => {
+      vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
+      await Promise.resolve();
+    });
+    const loadMore = screen.getByRole('button', { name: 'Load more' });
+    invokeMock.mockClear();
+    mockCommands(
+      discoverResponses({ search_hf_models: full(HF_PAGE_SIZE + 5) }),
+    );
+    fireEvent.click(loadMore);
+    await act(async () => {
+      vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
+      await Promise.resolve();
+    });
+    expect(invokeMock).toHaveBeenCalledWith('search_hf_models', {
+      query: '',
+      limit: HF_PAGE_SIZE * 2,
+    });
+  });
+
+  it('hides Load more when the page is not full', async () => {
+    await renderPane();
+    expect(
+      screen.queryByRole('button', { name: 'Load more' }),
+    ).not.toBeInTheDocument();
   });
 });
diff --git a/src/settings/tabs/models/DiscoverPane.tsx b/src/settings/tabs/models/DiscoverPane.tsx
index 94951991..38b7d010 100644
--- a/src/settings/tabs/models/DiscoverPane.tsx
+++ b/src/settings/tabs/models/DiscoverPane.tsx
@@ -2,13 +2,14 @@
  * Discover pane: the in-app Hugging Face GGUF model browser.
  *
  * A search field (driven by {@link useHfSearch}) plus a row of family filter
- * chips feed one debounced backend query. The result list renders one lean
- * row per repo: the search payload carries no size or capability data, so a
- * row shows only the avatar, the repo id, an org + downloads sub-line, and a
- * gated indicator. "Get" expands the row into a quant accordion that lists the
- * repo's `.gguf` files (`list_hf_repo_ggufs`) and downloads the chosen one
- * through the shared {@link useDownloadModel} kit. A finished install lifts a
- * fresh config snapshot through `onSaved` and collapses the row.
+ * chips feed one debounced backend query that returns chat/text-generation
+ * GGUF repos. Each lean row shows the repo id, an org + downloads sub-line, an
+ * approximate RAM-fit hint, a link out to the repo on Hugging Face, and an
+ * icon-only download button. That button expands a quant accordion listing the
+ * repo's `.gguf` files (`list_hf_repo_ggufs`, each with an accurate per-quant
+ * RAM-fit) and downloads the chosen one through the shared
+ * {@link useDownloadModel} kit. A "Load more" control pages past the first
+ * batch. A finished install lifts a fresh config snapshot and collapses the row.
  */
 
 import { useCallback, useEffect, useState } from 'react';
@@ -19,9 +20,25 @@ import { useDownloadModel } from '../../../hooks/useDownloadModel';
 import { useHfSearch } from './useHfSearch';
 import styles from './DiscoverPane.module.css';
 import type { HfModelSummary } from '../../../types/hf';
-import type { HfGgufFile } from '../../../types/starter';
+import type { HfGgufFile, RamFit } from '../../../types/starter';
 import type { RawAppConfig } from '../../types';
 
+const HF_BASE_URL = 'https://huggingface.co';
+
+/** RAM-fit hint label (shared vocabulary with the Library pane). */
+const FIT_LABEL: Record<RamFit, string> = {
+  fits: 'Comfortable',
+  tight: 'Tight',
+  too_big: 'Heavy',
+};
+
+/** RAM-fit hint colour class on this pane's stylesheet. */
+const FIT_CLASS: Record<RamFit, string> = {
+  fits: styles.fitOk,
+  tight: styles.fitTight,
+  too_big: styles.fitHeavy,
+};
+
 /**
  * Family filter chips. Clicking a chip sets the search query to its name;
  * `All` (empty query) is the browse-popular default. No backend beyond the
@@ -48,13 +65,25 @@ function orgOf(id: string): string {
   return slash === -1 ? id : id.slice(0, slash);
 }
 
+const DOWNLOAD_ICON = (
+  <svg viewBox="0 0 24 24" aria-hidden="true">
+    <path d="M12 4v11M7 11l5 5 5-5M5 20h14" />
+  </svg>
+);
+const HF_LINK_ICON = (
+  <svg viewBox="0 0 24 24" aria-hidden="true">
+    <path d="M14 3h7v7M21 3l-9 9M19 14v5a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V7a2 2 0 0 1 2-2h5" />
+  </svg>
+);
+
 interface DiscoverPaneProps {
   /** Lift a fresh config snapshot after a successful install. */
   onSaved: (next: RawAppConfig) => void;
 }
 
 export function DiscoverPane({ onSaved }: DiscoverPaneProps) {
-  const { query, setQuery, results, loading } = useHfSearch();
+  const { query, setQuery, results, loading, loadMore, canLoadMore } =
+    useHfSearch();
 
   return (
     <div className={styles.pane}>
@@ -75,7 +104,6 @@ export function DiscoverPane({ onSaved }: DiscoverPaneProps) {
           value={query}
           onChange={(e) => setQuery(e.target.value)}
         />
-        <span className={styles.kbd}>⌘K</span>
       </div>
 
       <div className={styles.chips}>
@@ -98,7 +126,7 @@ export function DiscoverPane({ onSaved }: DiscoverPaneProps) {
 
       <div className={styles.subbar}>
         <span className={styles.count}>
-          <b>{results.length}</b> GGUF models
+          <b>{results.length}</b> chat models
         </span>
         <span className={styles.sort}>Most downloaded</span>
       </div>
@@ -111,6 +139,11 @@ export function DiscoverPane({ onSaved }: DiscoverPaneProps) {
         {results.map((model) => (
           <DiscoverRow key={model.id} model={model} onSaved={onSaved} />
         ))}
+        {canLoadMore ? (
+          <button type="button" className={styles.loadMore} onClick={loadMore}>
+            Load more
+          </button>
+        ) : null}
       </div>
     </div>
   );
@@ -158,6 +191,10 @@ function DiscoverRow({ model, onSaved }: DiscoverRowProps) {
     void loadFiles();
   }
 
+  function openHuggingFace() {
+    void invoke('open_url', { url: `${HF_BASE_URL}/${model.id}` });
+  }
+
   // A finished install: the backend already wrote the builtin provider's
   // model field, so lift the fresh config snapshot and collapse the row.
   useEffect(() => {
@@ -178,28 +215,39 @@ function DiscoverRow({ model, onSaved }: DiscoverRowProps) {
   return (
     <div className={styles.rowWrap} data-row>
       <div className={styles.row}>
-        <div className={styles.av} aria-hidden="true">
-          {org.charAt(0)}
-        </div>
         <div className={styles.mid}>
           <div className={styles.nm}>
             {model.id}
             {model.gated ? (
-              <span className={styles.gatedBadge}>gated</span>
+              <span className={styles.gatedBadge}>Gated</span>
             ) : null}
           </div>
           <div className={styles.org}>
             {org} · {model.downloads.toLocaleString()} downloads
           </div>
         </div>
+        {model.fit ? (
+          <span className={`${styles.fit} ${FIT_CLASS[model.fit]}`}>
+            {FIT_LABEL[model.fit]}
+          </span>
+        ) : null}
+        <button
+          type="button"
+          className={styles.extlink}
+          aria-label={`View ${model.id} on Hugging Face`}
+          onClick={openHuggingFace}
+        >
+          {HF_LINK_ICON}
+        </button>
         <button
           type="button"
           className={styles.get}
+          aria-label="Get"
           aria-expanded={expanded}
           disabled={model.gated}
           onClick={toggle}
         >
-          Get
+          {DOWNLOAD_ICON}
         </button>
       </div>
 
@@ -215,9 +263,12 @@ function DiscoverRow({ model, onSaved }: DiscoverRowProps) {
             ? files.map((f) => (
                 <div className={styles.quantRow} key={f.file}>
                   <span className={styles.quantName}>{f.file}</span>
-                  <span className={styles.quantSize}>
-                    {gb(f.size_bytes)} GB
-                  </span>
+                  {f.fit ? (
+                    <span className={`${styles.fit} ${FIT_CLASS[f.fit]}`}>
+                      {FIT_LABEL[f.fit]}
+                    </span>
+                  ) : null}
+                  <span className={styles.quantSize}>{gb(f.size_bytes)} GB</span>
                   <button
                     type="button"
                     className={styles.download}
diff --git a/src/settings/tabs/models/useHfSearch.test.ts b/src/settings/tabs/models/useHfSearch.test.ts
index 9809d6c9..ead36ddd 100644
--- a/src/settings/tabs/models/useHfSearch.test.ts
+++ b/src/settings/tabs/models/useHfSearch.test.ts
@@ -13,7 +13,11 @@ import { beforeEach, afterEach, describe, expect, it, vi } from 'vitest';
 
 import { invoke } from '@tauri-apps/api/core';
 
-import { useHfSearch, HF_SEARCH_DEBOUNCE_MS } from './useHfSearch';
+import {
+  useHfSearch,
+  HF_SEARCH_DEBOUNCE_MS,
+  HF_PAGE_SIZE,
+} from './useHfSearch';
 import type { HfModelSummary } from '../../../types/hf';
 
 const invokeMock = invoke as unknown as ReturnType<typeof vi.fn>;
@@ -52,7 +56,10 @@ describe('useHfSearch', () => {
     const { result } = renderHook(() => useHfSearch());
 
     await waitFor(() => expect(result.current.loading).toBe(false));
-    expect(invokeMock).toHaveBeenCalledWith('search_hf_models', { query: '' });
+    expect(invokeMock).toHaveBeenCalledWith('search_hf_models', {
+      query: '',
+      limit: HF_PAGE_SIZE,
+    });
     expect(result.current.results).toEqual(POPULAR);
     expect(result.current.query).toBe('');
   });
@@ -79,6 +86,7 @@ describe('useHfSearch', () => {
     });
     expect(invokeMock).toHaveBeenCalledWith('search_hf_models', {
       query: 'gemma',
+      limit: HF_PAGE_SIZE,
     });
     expect(result.current.results).toEqual(GEMMA);
   });
@@ -109,6 +117,7 @@ describe('useHfSearch', () => {
     expect(invokeMock).toHaveBeenCalledTimes(1);
     expect(invokeMock).toHaveBeenCalledWith('search_hf_models', {
       query: 'gem',
+      limit: HF_PAGE_SIZE,
     });
   });
 
@@ -249,6 +258,47 @@ describe('useHfSearch', () => {
     });
     expect(invokeMock).toHaveBeenCalledWith('search_hf_models', {
       query: 'llama',
+      limit: HF_PAGE_SIZE,
+    });
+  });
+
+  it('does not offer Load more when the page is not full', async () => {
+    invokeMock.mockResolvedValue(POPULAR); // one row, far below a full page
+    const { result } = renderHook(() => useHfSearch());
+    await waitFor(() => expect(result.current.loading).toBe(false));
+    expect(result.current.canLoadMore).toBe(false);
+  });
+
+  it('Load more requests the next page and clears canLoadMore when it runs dry', async () => {
+    vi.useFakeTimers();
+    const full = (n: number): HfModelSummary[] =>
+      Array.from({ length: n }, (_, i) => ({
+        id: `org/repo-${i}-GGUF`,
+        downloads: n - i,
+        gated: false,
+      }));
+    invokeMock.mockResolvedValueOnce(full(HF_PAGE_SIZE)); // mount fills page 1
+    const { result } = renderHook(() => useHfSearch());
+    await act(async () => {
+      vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
+      await Promise.resolve();
+    });
+    expect(result.current.results).toHaveLength(HF_PAGE_SIZE);
+    expect(result.current.canLoadMore).toBe(true);
+
+    invokeMock.mockClear();
+    // Page 2 returns fewer than the requested 60: the Hub is out of rows.
+    invokeMock.mockResolvedValueOnce(full(HF_PAGE_SIZE + 15));
+    act(() => result.current.loadMore());
+    await act(async () => {
+      vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
+      await Promise.resolve();
+    });
+    expect(invokeMock).toHaveBeenCalledWith('search_hf_models', {
+      query: '',
+      limit: HF_PAGE_SIZE * 2,
     });
+    expect(result.current.results).toHaveLength(HF_PAGE_SIZE + 15);
+    expect(result.current.canLoadMore).toBe(false);
   });
 });
diff --git a/src/settings/tabs/models/useHfSearch.ts b/src/settings/tabs/models/useHfSearch.ts
index 80585d88..5ed5d8e4 100644
--- a/src/settings/tabs/models/useHfSearch.ts
+++ b/src/settings/tabs/models/useHfSearch.ts
@@ -19,6 +19,10 @@ import type { HfModelSummary } from '../../../types/hf';
 /** Debounce window before a query change triggers a backend fetch. */
 export const HF_SEARCH_DEBOUNCE_MS = 300;
 
+/** How many more results each "Load more" press requests. Mirrors the backend
+ * page step (`HF_SEARCH_LIMIT`); the backend clamps the total to its own max. */
+export const HF_PAGE_SIZE = 30;
+
 /**
  * Runtime guard for the IPC boundary. The Rust backend is trusted, but this
  * keeps the hook robust against shape drift (schema changes, legacy builds,
@@ -48,12 +52,17 @@ function isHfModelSummaryArray(value: unknown): value is HfModelSummary[] {
 export interface UseHfSearchResult {
   /** The current query text, updated synchronously on every keystroke. */
   query: string;
-  /** Set the query. Updates immediately; the backend fetch is debounced. */
+  /** Set the query. Updates immediately; the backend fetch is debounced.
+   * A new query resets pagination back to the first page. */
   setQuery: (q: string) => void;
   /** The most recent (validated) search results, or `[]` on any failure. */
   results: HfModelSummary[];
   /** True while a debounced fetch is in flight. */
   loading: boolean;
+  /** Request the next page (one more {@link HF_PAGE_SIZE} of results). */
+  loadMore: () => void;
+  /** True when the last response filled the requested page, so more may exist. */
+  canLoadMore: boolean;
 }
 
 /**
@@ -66,7 +75,8 @@ export interface UseHfSearchResult {
  * are also dropped.
  */
 export function useHfSearch(): UseHfSearchResult {
-  const [query, setQuery] = useState('');
+  const [query, setQueryState] = useState('');
+  const [limit, setLimit] = useState(HF_PAGE_SIZE);
   const [results, setResults] = useState<HfModelSummary[]>([]);
   const [loading, setLoading] = useState(true);
 
@@ -84,13 +94,27 @@ export function useHfSearch(): UseHfSearchResult {
     return mountedRef.current && token === latestTokenRef.current;
   }, []);
 
+  // A new query starts over at the first page; growing `limit` mid-query is
+  // what "Load more" does.
+  const setQuery = useCallback((q: string) => {
+    setQueryState(q);
+    setLimit(HF_PAGE_SIZE);
+  }, []);
+
+  const loadMore = useCallback(() => {
+    setLimit((current) => current + HF_PAGE_SIZE);
+  }, []);
+
   const runSearch = useCallback(
-    async (q: string): Promise<void> => {
+    async (q: string, lim: number): Promise<void> => {
       latestTokenRef.current += 1;
       const token = latestTokenRef.current;
       setLoading(true);
       try {
-        const payload = await invoke<unknown>('search_hf_models', { query: q });
+        const payload = await invoke<unknown>('search_hf_models', {
+          query: q,
+          limit: lim,
+        });
         if (!isLatest(token)) return;
         setResults(isHfModelSummaryArray(payload) ? payload : []);
       } catch {
@@ -105,13 +129,17 @@ export function useHfSearch(): UseHfSearchResult {
 
   // Debounced fetch: a query change schedules a fetch, and any further change
   // within the window cancels and reschedules it, so a burst of keystrokes
-  // makes a single call. The empty-query mount fetch rides the same path.
+  // makes a single call. The empty-query mount fetch and "Load more" (a
+  // `limit` bump) ride the same path.
   useEffect(() => {
     const timer = window.setTimeout(() => {
-      void runSearch(query);
+      void runSearch(query, limit);
     }, HF_SEARCH_DEBOUNCE_MS);
     return () => window.clearTimeout(timer);
-  }, [query, runSearch]);
+  }, [query, limit, runSearch]);
+
+  // The last response filled the page, so the Hub may hold more rows.
+  const canLoadMore = !loading && results.length >= limit;
 
-  return { query, setQuery, results, loading };
+  return { query, setQuery, results, loading, loadMore, canLoadMore };
 }
diff --git a/src/types/hf.ts b/src/types/hf.ts
index addd917d..f6d893f3 100644
--- a/src/types/hf.ts
+++ b/src/types/hf.ts
@@ -3,18 +3,25 @@
 /**
  * IPC shapes for the in-app Hugging Face GGUF model browser (the Discover
  * pane). Mirrors the serde output of the Rust `search_hf_models` command,
- * which serializes its `HfModelSummary` struct as snake_case.
+ * which serializes its `HfModelRow` struct (a flattened `HfModelSummary` plus
+ * an estimated RAM-fit) as snake_case.
  */
 
+import type { RamFit } from './starter';
+
 /**
  * One repo row from `search_hf_models`. The search payload is deliberately
- * lean: it carries only what the Discover list needs to render a row and to
- * decide whether anonymous download is allowed.
+ * lean: it carries what the Discover list needs to render a row, decide
+ * whether anonymous download is allowed, and show an approximate RAM-fit.
  *
  * - `id` is the canonical `owner/repo` slug.
  * - `downloads` is Hugging Face's all-time download count for the repo.
  * - `gated` is true when the repo requires accepting terms or auth; an
- *   anonymous download fails, so the Discover row disables "Get" for it.
+ *   anonymous download fails, so the Discover row disables download for it.
+ * - `est_runtime_gb` / `fit` are estimated from the parameter count in the
+ *   repo id (no file size is available at search time); both are `null` when
+ *   the id carries no `<number>B` token, and `fit` is also `null` when host
+ *   RAM is unknown. Accurate per-quant fit arrives at the expand step.
  */
 export interface HfModelSummary {
   /** Canonical `owner/repo` slug. */
@@ -23,4 +30,8 @@ export interface HfModelSummary {
   downloads: number;
   /** True when the repo is gated; anonymous downloads fail. */
   gated: boolean;
+  /** Estimated resident footprint in GiB, or `null` when not derivable. */
+  est_runtime_gb?: number | null;
+  /** Estimated RAM-fit for this Mac, or `null` when not derivable. */
+  fit?: RamFit | null;
 }

From a1dbcd302c03391ec8bdcfed2d065c72f23e436a Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 14:42:20 -0500
Subject: [PATCH 11/89] fix: Providers pane row padding, switch confirmation,
 single prompt header, footer

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 .../tabs/models/ProvidersPane.test.tsx        | 23 +++--
 src/settings/tabs/models/ProvidersPane.tsx    | 99 ++++++++++++-------
 src/styles/settings.module.css                |  9 +-
 3 files changed, 76 insertions(+), 55 deletions(-)

diff --git a/src/settings/tabs/models/ProvidersPane.test.tsx b/src/settings/tabs/models/ProvidersPane.test.tsx
index a606617a..5c2b296d 100644
--- a/src/settings/tabs/models/ProvidersPane.test.tsx
+++ b/src/settings/tabs/models/ProvidersPane.test.tsx
@@ -316,6 +316,8 @@ describe('ProvidersPane other providers', () => {
     renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]), { onSaved });
     const switches = screen.getAllByRole('button', { name: 'Switch' });
     fireEvent.click(switches[0]);
+    // The switch is confirmed in a dialog before it takes effect.
+    fireEvent.click(screen.getByRole('button', { name: /^Switch to / }));
     await waitFor(() =>
       expect(invokeMock).toHaveBeenCalledWith('set_active_provider', {
         providerId: 'builtin',
@@ -324,10 +326,22 @@ describe('ProvidersPane other providers', () => {
     await waitFor(() => expect(onSaved).toHaveBeenCalledWith(next));
   });
 
+  it('cancels a provider switch without changing the active provider', () => {
+    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
+    fireEvent.click(screen.getAllByRole('button', { name: 'Switch' })[0]);
+    fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
+    expect(screen.queryByRole('button', { name: /^Switch to / })).toBeNull();
+    expect(invokeMock).not.toHaveBeenCalledWith(
+      'set_active_provider',
+      expect.anything(),
+    );
+  });
+
   it('swallows a failed provider switch', async () => {
     mockInvoke({ set_active_provider: new Error('x') });
     renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
     fireEvent.click(screen.getAllByRole('button', { name: 'Switch' })[0]);
+    fireEvent.click(screen.getByRole('button', { name: /^Switch to / }));
     await Promise.resolve();
   });
 
@@ -538,15 +552,6 @@ describe('ProvidersPane robustness', () => {
     expect(url).toHaveValue('http://127.0.0.1:11434');
   });
 
-  it('shows the built-in active size in the footnote', async () => {
-    const builtin = { ...BUILTIN, model: INSTALLED[0].id };
-    mockInvoke({ list_installed_models: INSTALLED });
-    renderPane(makeConfig('builtin', [builtin, OLLAMA]));
-    await waitFor(() =>
-      expect(screen.getByText(/built-in active: 6.6 GB/)).toBeInTheDocument(),
-    );
-  });
-
   it('pluralises the installed count in the footnote', () => {
     renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
     expect(screen.getByText(/0 installed models/)).toBeInTheDocument();
diff --git a/src/settings/tabs/models/ProvidersPane.tsx b/src/settings/tabs/models/ProvidersPane.tsx
index 3a692749..4321d298 100644
--- a/src/settings/tabs/models/ProvidersPane.tsx
+++ b/src/settings/tabs/models/ProvidersPane.tsx
@@ -19,7 +19,7 @@ import { useEffect, useRef, useState } from 'react';
 import { invoke } from '@tauri-apps/api/core';
 import { listen } from '@tauri-apps/api/event';
 
-import { Textarea, Toggle } from '../../components';
+import { ConfirmDialog, Textarea, Toggle } from '../../components';
 import { SaveField } from '../../components/SaveField';
 import { OpenAiProviderCard, AddOpenAiProvider } from '../ProviderCards';
 import { useDebouncedSave } from '../../hooks/useDebouncedSave';
@@ -66,11 +66,6 @@ function posToCtx(pos: number): number {
 }
 const CTX_TICKS = ['2K', '8K', '32K', '128K', '512K', '1M'];
 
-/** Bytes rendered as decimal gigabytes with one decimal. */
-function gb(bytes: number): string {
-  return (bytes / 1e9).toFixed(1);
-}
-
 /** One-line description shown under a provider's name. */
 function providerSubtitle(p: RawProvider): string {
   if (p.kind === 'builtin') return "Thuki's bundled llama.cpp engine";
@@ -168,8 +163,20 @@ export function ProvidersPane({
   const [ollamaUrl, setOllamaUrl] = useState(ollamaBaseUrl);
   const ollamaUrlFocusedRef = useRef(false);
 
+  // System prompt (debounced save); the editor mounts inline under the
+  // Generation list with a single header, so it does not use SaveField's row.
+  const [promptValue, setPromptValue] = useState(config.prompt.system);
+  const { resetTo: resetPrompt } = useDebouncedSave(
+    'prompt',
+    'system',
+    promptValue,
+    { onSaved },
+  );
+
   const [promptOpen, setPromptOpen] = useState(false);
   const [devOpen, setDevOpen] = useState(false);
+  // A provider switch is confirmed before it takes effect.
+  const [pendingSwitch, setPendingSwitch] = useState<RawProvider | null>(null);
 
   const { activeModel, availableModels, setActiveModel } = useModelSelection();
 
@@ -187,6 +194,8 @@ export function ProvidersPane({
     setCtxPos(ctxToPos(nextCtx));
     setCtxChip(String(nextCtx));
     resetNumCtx(nextCtx);
+    setPromptValue(config.prompt.system);
+    resetPrompt(config.prompt.system);
     if (!ollamaUrlFocusedRef.current) setOllamaUrl(ollamaBaseUrl);
   }
 
@@ -235,7 +244,6 @@ export function ProvidersPane({
   const builtinModelValue = installed.some((m) => m.id === builtinModelId)
     ? builtinModelId
     : '';
-  const activeBuiltin = installed.find((m) => m.id === builtinModelValue);
 
   // Providers other than the active one, in a stable order.
   const otherProviders = providers.filter((p) => p.id !== activeId);
@@ -373,7 +381,7 @@ export function ProvidersPane({
               <button
                 type="button"
                 className={styles.switchBtn}
-                onClick={() => selectProvider(p.id)}
+                onClick={() => setPendingSwitch(p)}
               >
                 Switch
               </button>
@@ -518,10 +526,26 @@ export function ProvidersPane({
           </div>
         </div>
 
-        {/* System prompt */}
+        {/* System prompt: one header (with the ? help), Edit/Done toggles the
+            inline editor below it. */}
         <div className={styles.genRow}>
           <div className={styles.genLabel}>
-            <div className={styles.genName}>System prompt</div>
+            <div className={styles.genName}>
+              System prompt
+              <Tooltip
+                label={configHelp('prompt', 'system')}
+                multiline
+                placement="top"
+              >
+                <button
+                  type="button"
+                  className={styles.infoBtn}
+                  aria-label="About System prompt"
+                >
+                  ?
+                </button>
+              </Tooltip>
+            </div>
             <div className={styles.genHelp}>
               Persona sent at the start of every chat
             </div>
@@ -537,42 +561,27 @@ export function ProvidersPane({
         </div>
         {promptOpen ? (
           <div className={styles.genPromptEditor}>
-            <SaveField
-              section="prompt"
-              fieldKey="system"
-              label="System prompt"
-              helper={configHelp('prompt', 'system')}
-              vertical
-              initialValue={config.prompt.system}
-              resyncToken={resyncToken}
-              onSaved={onSaved}
-              render={(value, setValue) => (
-                <>
-                  <Textarea
-                    value={value}
-                    onChange={setValue}
-                    placeholder="Persona prompt…"
-                    maxLength={PROMPT_MAX_CHARS}
-                    ariaLabel="System prompt"
-                    rows={PROMPT_TEXTAREA_ROWS}
-                  />
-                  <div className={styles.charCounter}>
-                    {value.length} / {PROMPT_MAX_CHARS}
-                  </div>
-                </>
-              )}
+            <Textarea
+              value={promptValue}
+              onChange={setPromptValue}
+              placeholder="Persona prompt…"
+              maxLength={PROMPT_MAX_CHARS}
+              ariaLabel="System prompt"
+              rows={PROMPT_TEXTAREA_ROWS}
             />
+            <div className={styles.charCounter}>
+              {promptValue.length} / {PROMPT_MAX_CHARS}
+            </div>
           </div>
         ) : null}
       </div>
 
-      {/* A small free-disk + count footer mirrors the other panes. */}
+      {/* A small installed-count footer mirrors the other panes. The active
+          model's identity already lives in the hero and the Running footer, so
+          this stays a neutral count rather than restating it. */}
       <div className={styles.genFootnote}>
         {installed.length} installed{' '}
         {installed.length === 1 ? 'model' : 'models'}
-        {builtinProvider && activeBuiltin
-          ? ` · built-in active: ${gb(activeBuiltin.size_bytes)} GB`
-          : ''}
       </div>
 
       <div className={styles.devSection}>
@@ -624,6 +633,20 @@ export function ProvidersPane({
           </div>
         )}
       </div>
+
+      {pendingSwitch ? (
+        <ConfirmDialog
+          open
+          title={`Switch to ${pendingSwitch.label}?`}
+          message={`New chats will be answered by ${pendingSwitch.label}. The model currently held in memory is released to free up RAM.`}
+          confirmLabel={`Switch to ${pendingSwitch.label}`}
+          onConfirm={() => {
+            selectProvider(pendingSwitch.id);
+            setPendingSwitch(null);
+          }}
+          onCancel={() => setPendingSwitch(null)}
+        />
+      ) : null}
     </>
   );
 }
diff --git a/src/styles/settings.module.css b/src/styles/settings.module.css
index 43f2053b..a16f954d 100644
--- a/src/styles/settings.module.css
+++ b/src/styles/settings.module.css
@@ -1957,14 +1957,7 @@
   background: rgba(94, 201, 138, 0.06);
 }
 
-/* ─── Providers section (AI tab) ─────────────────────────────────────────── */
-.providerRow {
-  display: flex;
-  align-items: center;
-  justify-content: space-between;
-  gap: 8px;
-  padding: 6px 0;
-}
+/* ─── Providers section (legacy OpenAI card) ─────────────────────────────── */
 .providerName {
   font-size: 13px;
   font-weight: 600;

From 96151f9697a49c765d10932764f07e737a8034b2 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 14:46:34 -0500
Subject: [PATCH 12/89] style: apply prettier and rustfmt, and align the search
 hook setter name

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/models/mod.rs                  | 52 +++++++++++++-------
 src/settings/tabs/models/DiscoverPane.tsx    |  4 +-
 src/settings/tabs/models/LibraryPane.tsx     |  6 ++-
 src/settings/tabs/models/ModelsSegmented.tsx |  6 ++-
 src/settings/tabs/models/useHfSearch.ts      | 17 +++++--
 5 files changed, 60 insertions(+), 25 deletions(-)

diff --git a/src-tauri/src/models/mod.rs b/src-tauri/src/models/mod.rs
index f5aa773b..495a8731 100644
--- a/src-tauri/src/models/mod.rs
+++ b/src-tauri/src/models/mod.rs
@@ -1625,10 +1625,7 @@ pub struct InstalledModelView {
 pub fn parse_param_billions(id: &str) -> Option<f64> {
     let mut found = None;
     for token in id.split(['/', '-', '_', ' ']) {
-        let Some(stripped) = token
-            .strip_suffix('B')
-            .or_else(|| token.strip_suffix('b'))
-        else {
+        let Some(stripped) = token.strip_suffix('B').or_else(|| token.strip_suffix('b')) else {
             continue;
         };
         if let Ok(v) = stripped.parse::<f64>() {
@@ -2170,7 +2167,8 @@ pub async fn search_hf_models(
     limit: usize,
     client: tauri::State<'_, reqwest::Client>,
 ) -> Result<Vec<HfModelRow>, String> {
-    let summaries = fetch_hf_search(&client, HF_BASE_URL, &query, clamp_search_limit(limit)).await?;
+    let summaries =
+        fetch_hf_search(&client, HF_BASE_URL, &query, clamp_search_limit(limit)).await?;
     Ok(annotate_search_rows(summaries, system_ram_bytes()))
 }
 
@@ -4806,9 +4804,14 @@ mod tests {
             .await;
         let client = reqwest::Client::new();
         // Whitespace-only query trims to empty and the search param is dropped.
-        let rows = fetch_hf_search(&client, &server.url(), "   ", crate::config::defaults::HF_SEARCH_LIMIT)
-            .await
-            .unwrap();
+        let rows = fetch_hf_search(
+            &client,
+            &server.url(),
+            "   ",
+            crate::config::defaults::HF_SEARCH_LIMIT,
+        )
+        .await
+        .unwrap();
         assert!(rows.is_empty());
     }
 
@@ -4822,18 +4825,28 @@ mod tests {
             .create_async()
             .await;
         let client = reqwest::Client::new();
-        let err = fetch_hf_search(&client, &server.url(), "q", crate::config::defaults::HF_SEARCH_LIMIT)
-            .await
-            .unwrap_err();
+        let err = fetch_hf_search(
+            &client,
+            &server.url(),
+            "q",
+            crate::config::defaults::HF_SEARCH_LIMIT,
+        )
+        .await
+        .unwrap_err();
         assert!(err.contains("503"), "got: {err}");
     }
 
     #[tokio::test]
     async fn fetch_hf_search_maps_transport_error() {
         let client = reqwest::Client::new();
-        let err = fetch_hf_search(&client, "http://127.0.0.1:1", "q", crate::config::defaults::HF_SEARCH_LIMIT)
-            .await
-            .unwrap_err();
+        let err = fetch_hf_search(
+            &client,
+            "http://127.0.0.1:1",
+            "q",
+            crate::config::defaults::HF_SEARCH_LIMIT,
+        )
+        .await
+        .unwrap_err();
         assert!(err.contains("failed to reach Hugging Face"), "got: {err}");
     }
 
@@ -4841,9 +4854,14 @@ mod tests {
     async fn fetch_hf_search_rejects_overlong_query() {
         let client = reqwest::Client::new();
         let long = "x".repeat(crate::config::defaults::MAX_HF_SEARCH_QUERY_LEN + 1);
-        let err = fetch_hf_search(&client, "http://127.0.0.1:9", &long, crate::config::defaults::HF_SEARCH_LIMIT)
-            .await
-            .unwrap_err();
+        let err = fetch_hf_search(
+            &client,
+            "http://127.0.0.1:9",
+            &long,
+            crate::config::defaults::HF_SEARCH_LIMIT,
+        )
+        .await
+        .unwrap_err();
         assert!(err.contains("maximum length"), "got: {err}");
     }
 
diff --git a/src/settings/tabs/models/DiscoverPane.tsx b/src/settings/tabs/models/DiscoverPane.tsx
index 38b7d010..ccf8a7f0 100644
--- a/src/settings/tabs/models/DiscoverPane.tsx
+++ b/src/settings/tabs/models/DiscoverPane.tsx
@@ -268,7 +268,9 @@ function DiscoverRow({ model, onSaved }: DiscoverRowProps) {
                       {FIT_LABEL[f.fit]}
                     </span>
                   ) : null}
-                  <span className={styles.quantSize}>{gb(f.size_bytes)} GB</span>
+                  <span className={styles.quantSize}>
+                    {gb(f.size_bytes)} GB
+                  </span>
                   <button
                     type="button"
                     className={styles.download}
diff --git a/src/settings/tabs/models/LibraryPane.tsx b/src/settings/tabs/models/LibraryPane.tsx
index 8dce99ed..7314d04c 100644
--- a/src/settings/tabs/models/LibraryPane.tsx
+++ b/src/settings/tabs/models/LibraryPane.tsx
@@ -149,7 +149,11 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
     <div className={styles.pane}>
       <div className={styles.bar}>
         <button type="button" className={styles.addButton} onClick={onAddModel}>
-          <svg viewBox="0 0 24 24" aria-hidden="true" className={styles.addIcon}>
+          <svg
+            viewBox="0 0 24 24"
+            aria-hidden="true"
+            className={styles.addIcon}
+          >
             <path d="M12 5v14M5 12h14" />
           </svg>
           Add model
diff --git a/src/settings/tabs/models/ModelsSegmented.tsx b/src/settings/tabs/models/ModelsSegmented.tsx
index 3fabb3e0..8ece1f75 100644
--- a/src/settings/tabs/models/ModelsSegmented.tsx
+++ b/src/settings/tabs/models/ModelsSegmented.tsx
@@ -40,7 +40,11 @@ const PROVIDERS_ICON = (
   </svg>
 );
 
-const VIEWS: ReadonlyArray<{ id: ModelsSubview; label: string; icon: ReactNode }> = [
+const VIEWS: ReadonlyArray<{
+  id: ModelsSubview;
+  label: string;
+  icon: ReactNode;
+}> = [
   { id: 'library', label: 'Library', icon: LIBRARY_ICON },
   { id: 'discover', label: 'Discover', icon: DISCOVER_ICON },
   { id: 'providers', label: 'Providers', icon: PROVIDERS_ICON },
diff --git a/src/settings/tabs/models/useHfSearch.ts b/src/settings/tabs/models/useHfSearch.ts
index 5ed5d8e4..7b6c2950 100644
--- a/src/settings/tabs/models/useHfSearch.ts
+++ b/src/settings/tabs/models/useHfSearch.ts
@@ -75,7 +75,7 @@ export interface UseHfSearchResult {
  * are also dropped.
  */
 export function useHfSearch(): UseHfSearchResult {
-  const [query, setQueryState] = useState('');
+  const [queryText, setQueryText] = useState('');
   const [limit, setLimit] = useState(HF_PAGE_SIZE);
   const [results, setResults] = useState<HfModelSummary[]>([]);
   const [loading, setLoading] = useState(true);
@@ -97,7 +97,7 @@ export function useHfSearch(): UseHfSearchResult {
   // A new query starts over at the first page; growing `limit` mid-query is
   // what "Load more" does.
   const setQuery = useCallback((q: string) => {
-    setQueryState(q);
+    setQueryText(q);
     setLimit(HF_PAGE_SIZE);
   }, []);
 
@@ -133,13 +133,20 @@ export function useHfSearch(): UseHfSearchResult {
   // `limit` bump) ride the same path.
   useEffect(() => {
     const timer = window.setTimeout(() => {
-      void runSearch(query, limit);
+      void runSearch(queryText, limit);
     }, HF_SEARCH_DEBOUNCE_MS);
     return () => window.clearTimeout(timer);
-  }, [query, limit, runSearch]);
+  }, [queryText, limit, runSearch]);
 
   // The last response filled the page, so the Hub may hold more rows.
   const canLoadMore = !loading && results.length >= limit;
 
-  return { query, setQuery, results, loading, loadMore, canLoadMore };
+  return {
+    query: queryText,
+    setQuery,
+    results,
+    loading,
+    loadMore,
+    canLoadMore,
+  };
 }

From f4aa41f5c9611fd10b78d530ba53743644f0e4dc Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 15:08:04 -0500
Subject: [PATCH 13/89] refactor: drop unused est_runtime_gb from search rows
 and share the RAM-fit label map

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/models/mod.rs                   | 31 +++++++------------
 .../tabs/models/DiscoverPane.test.tsx         |  1 -
 src/settings/tabs/models/DiscoverPane.tsx     | 14 +++------
 src/settings/tabs/models/LibraryPane.tsx      | 12 ++-----
 src/types/hf.ts                               | 10 +++---
 src/utils/ramFit.ts                           | 15 +++++++++
 6 files changed, 37 insertions(+), 46 deletions(-)
 create mode 100644 src/utils/ramFit.ts

diff --git a/src-tauri/src/models/mod.rs b/src-tauri/src/models/mod.rs
index 495a8731..01ca4c4b 100644
--- a/src-tauri/src/models/mod.rs
+++ b/src-tauri/src/models/mod.rs
@@ -1586,15 +1586,14 @@ pub fn parse_search_results(body: &[u8]) -> Result<Vec<HfModelSummary>, String>
 // are deliberately approximate: the result is a hint, never a hard gate.
 
 /// A Hugging Face search row annotated with a best-effort RAM-fit hint for the
-/// host. The base summary carries the Hub facts; `est_runtime_gb` and `fit`
-/// are estimated from the parameter count parsed out of the repo id (no file
-/// size is available at search time). Both are `None` when the id carries no
-/// `<number>B` token; `fit` is additionally `None` when host RAM is unknown.
+/// host. The base summary carries the Hub facts; `fit` is estimated from the
+/// parameter count parsed out of the repo id (no file size is available at
+/// search time) and is `None` when the id carries no `<number>B` token or when
+/// host RAM is unknown.
 #[derive(Debug, Clone, PartialEq, Serialize)]
 pub struct HfModelRow {
     #[serde(flatten)]
     pub summary: HfModelSummary,
-    pub est_runtime_gb: Option<f64>,
     pub fit: Option<registry::RamFit>,
 }
 
@@ -1663,17 +1662,14 @@ pub fn annotate_search_rows(summaries: Vec<HfModelSummary>, ram_bytes: u64) -> V
     summaries
         .into_iter()
         .map(|summary| {
-            let est_runtime_gb =
-                parse_param_billions(&summary.id).map(estimate_runtime_gb_from_params);
-            let fit = match est_runtime_gb {
-                Some(est) if ram_bytes > 0 => Some(registry::ram_fit(est, ram_bytes)),
+            let fit = match parse_param_billions(&summary.id) {
+                Some(params_b) if ram_bytes > 0 => Some(registry::ram_fit(
+                    estimate_runtime_gb_from_params(params_b),
+                    ram_bytes,
+                )),
                 _ => None,
             };
-            HfModelRow {
-                summary,
-                est_runtime_gb,
-                fit,
-            }
+            HfModelRow { summary, fit }
         })
         .collect()
 }
@@ -4679,12 +4675,9 @@ mod tests {
         // 64 GiB host: the 1B model fits, the param-less row stays unannotated.
         let rows = annotate_search_rows(summaries.clone(), 64 << 30);
         assert_eq!(rows[0].fit, Some(registry::RamFit::Fits));
-        assert!(rows[0].est_runtime_gb.is_some());
-        assert_eq!(rows[1].est_runtime_gb, None);
         assert_eq!(rows[1].fit, None);
-        // Unknown host RAM keeps the size estimate but drops the fit verdict.
+        // Unknown host RAM drops the fit verdict even when params parse.
         let rows = annotate_search_rows(summaries, 0);
-        assert!(rows[0].est_runtime_gb.is_some());
         assert_eq!(rows[0].fit, None);
     }
 
@@ -4740,7 +4733,6 @@ mod tests {
                 downloads: 3,
                 gated: false,
             },
-            est_runtime_gb: Some(7.0),
             fit: Some(registry::RamFit::Tight),
         };
         assert_eq!(
@@ -4749,7 +4741,6 @@ mod tests {
                 "id": "o/r",
                 "downloads": 3,
                 "gated": false,
-                "est_runtime_gb": 7.0,
                 "fit": "tight",
             })
         );
diff --git a/src/settings/tabs/models/DiscoverPane.test.tsx b/src/settings/tabs/models/DiscoverPane.test.tsx
index ce77df18..d75ffd6c 100644
--- a/src/settings/tabs/models/DiscoverPane.test.tsx
+++ b/src/settings/tabs/models/DiscoverPane.test.tsx
@@ -67,7 +67,6 @@ const RESULTS: HfModelSummary[] = [
     id: 'google/gemma-4-12b-it-GGUF',
     downloads: 1_200_000,
     gated: false,
-    est_runtime_gb: 9.5,
     fit: 'fits',
   },
   { id: 'unsloth/gemma-4-27b-it-GGUF', downloads: 410_000, gated: false },
diff --git a/src/settings/tabs/models/DiscoverPane.tsx b/src/settings/tabs/models/DiscoverPane.tsx
index ccf8a7f0..c8752e63 100644
--- a/src/settings/tabs/models/DiscoverPane.tsx
+++ b/src/settings/tabs/models/DiscoverPane.tsx
@@ -18,6 +18,7 @@ import { invoke } from '@tauri-apps/api/core';
 import { DownloadProgress } from '../../../components/DownloadProgress';
 import { useDownloadModel } from '../../../hooks/useDownloadModel';
 import { useHfSearch } from './useHfSearch';
+import { RAM_FIT_LABEL } from '../../../utils/ramFit';
 import styles from './DiscoverPane.module.css';
 import type { HfModelSummary } from '../../../types/hf';
 import type { HfGgufFile, RamFit } from '../../../types/starter';
@@ -25,14 +26,7 @@ import type { RawAppConfig } from '../../types';
 
 const HF_BASE_URL = 'https://huggingface.co';
 
-/** RAM-fit hint label (shared vocabulary with the Library pane). */
-const FIT_LABEL: Record<RamFit, string> = {
-  fits: 'Comfortable',
-  tight: 'Tight',
-  too_big: 'Heavy',
-};
-
-/** RAM-fit hint colour class on this pane's stylesheet. */
+/** RAM-fit hint colour class on this pane's stylesheet (labels are shared). */
 const FIT_CLASS: Record<RamFit, string> = {
   fits: styles.fitOk,
   tight: styles.fitTight,
@@ -228,7 +222,7 @@ function DiscoverRow({ model, onSaved }: DiscoverRowProps) {
         </div>
         {model.fit ? (
           <span className={`${styles.fit} ${FIT_CLASS[model.fit]}`}>
-            {FIT_LABEL[model.fit]}
+            {RAM_FIT_LABEL[model.fit]}
           </span>
         ) : null}
         <button
@@ -265,7 +259,7 @@ function DiscoverRow({ model, onSaved }: DiscoverRowProps) {
                   <span className={styles.quantName}>{f.file}</span>
                   {f.fit ? (
                     <span className={`${styles.fit} ${FIT_CLASS[f.fit]}`}>
-                      {FIT_LABEL[f.fit]}
+                      {RAM_FIT_LABEL[f.fit]}
                     </span>
                   ) : null}
                   <span className={styles.quantSize}>
diff --git a/src/settings/tabs/models/LibraryPane.tsx b/src/settings/tabs/models/LibraryPane.tsx
index 7314d04c..52262c1b 100644
--- a/src/settings/tabs/models/LibraryPane.tsx
+++ b/src/settings/tabs/models/LibraryPane.tsx
@@ -15,20 +15,14 @@ import { invoke } from '@tauri-apps/api/core';
 
 import { useModelCapabilities } from '../../../hooks/useModelCapabilities';
 import { ConfirmDialog } from '../../components';
+import { RAM_FIT_LABEL } from '../../../utils/ramFit';
 import styles from './LibraryPane.module.css';
 import type { RawAppConfig } from '../../types';
 import type { InstalledModel, RamFit } from '../../../types/starter';
 
 const HF_BASE_URL = 'https://huggingface.co';
 
-/** RAM-fit hint label shown next to a model. */
-const FIT_LABEL: Record<RamFit, string> = {
-  fits: 'Comfortable',
-  tight: 'Tight',
-  too_big: 'Heavy',
-};
-
-/** RAM-fit hint colour class on this pane's stylesheet. */
+/** RAM-fit hint colour class on this pane's stylesheet (labels are shared). */
 const FIT_CLASS: Record<RamFit, string> = {
   fits: styles.fitOk,
   tight: styles.fitTight,
@@ -200,7 +194,7 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
                   <div className={styles.right}>
                     {m.fit ? (
                       <span className={`${styles.fit} ${FIT_CLASS[m.fit]}`}>
-                        {FIT_LABEL[m.fit]}
+                        {RAM_FIT_LABEL[m.fit]}
                       </span>
                     ) : null}
                     {caps?.vision ? (
diff --git a/src/types/hf.ts b/src/types/hf.ts
index f6d893f3..f214e09a 100644
--- a/src/types/hf.ts
+++ b/src/types/hf.ts
@@ -18,10 +18,10 @@ import type { RamFit } from './starter';
  * - `downloads` is Hugging Face's all-time download count for the repo.
  * - `gated` is true when the repo requires accepting terms or auth; an
  *   anonymous download fails, so the Discover row disables download for it.
- * - `est_runtime_gb` / `fit` are estimated from the parameter count in the
- *   repo id (no file size is available at search time); both are `null` when
- *   the id carries no `<number>B` token, and `fit` is also `null` when host
- *   RAM is unknown. Accurate per-quant fit arrives at the expand step.
+ * - `fit` is the estimated RAM-fit for this Mac, derived from the parameter
+ *   count in the repo id (no file size is available at search time); it is
+ *   `null` when the id carries no `<number>B` token or host RAM is unknown.
+ *   Accurate per-quant fit arrives at the expand step.
  */
 export interface HfModelSummary {
   /** Canonical `owner/repo` slug. */
@@ -30,8 +30,6 @@ export interface HfModelSummary {
   downloads: number;
   /** True when the repo is gated; anonymous downloads fail. */
   gated: boolean;
-  /** Estimated resident footprint in GiB, or `null` when not derivable. */
-  est_runtime_gb?: number | null;
   /** Estimated RAM-fit for this Mac, or `null` when not derivable. */
   fit?: RamFit | null;
 }
diff --git a/src/utils/ramFit.ts b/src/utils/ramFit.ts
new file mode 100644
index 00000000..6c469e1c
--- /dev/null
+++ b/src/utils/ramFit.ts
@@ -0,0 +1,15 @@
+/**
+ * User-facing labels for the backend's RAM-fit verdict, shared by every
+ * surface that shows the hint (the Library and Discover panes). Colour classes
+ * stay per-component because they reference each pane's own CSS-module classes;
+ * only the wording is shared here so the vocabulary cannot drift between panes.
+ */
+
+import type { RamFit } from '../types/starter';
+
+/** Maps a RAM-fit verdict to the word shown next to a model. */
+export const RAM_FIT_LABEL: Record<RamFit, string> = {
+  fits: 'Comfortable',
+  tight: 'Tight',
+  too_big: 'Heavy',
+};

From f82b5ed6a2ae62c0760005edfd3474809772a64b Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 15:50:43 -0500
Subject: [PATCH 14/89] fix: Discover search returns downloadable GGUF chat
 repos and add reveal-in-Finder

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/lib.rs        |  2 ++
 src-tauri/src/models/mod.rs | 39 +++++++++++++++++++++++++++++++------
 2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs
index 8295f7b4..f41afe41 100644
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -2273,6 +2273,8 @@ pub fn run() {
             #[cfg(not(coverage))]
             models::delete_installed_model,
             #[cfg(not(coverage))]
+            models::reveal_model_in_finder,
+            #[cfg(not(coverage))]
             history::save_conversation,
             #[cfg(not(coverage))]
             history::persist_message,
diff --git a/src-tauri/src/models/mod.rs b/src-tauri/src/models/mod.rs
index 01ca4c4b..48ccf74f 100644
--- a/src-tauri/src/models/mod.rs
+++ b/src-tauri/src/models/mod.rs
@@ -1758,12 +1758,13 @@ async fn fetch_hf_search_inner(
 ) -> Result<Vec<u8>, String> {
     let endpoint = format!("{}/api/models", base_url.trim_end_matches('/'));
     let limit = limit.to_string();
-    // `pipeline_tag=text-generation` keeps the results to chat/instruct models;
-    // without it an empty query returns the most-downloaded GGUF repos overall,
-    // which are dominated by embedding/reranker repos (sentence-transformers,
-    // BERT) that Thuki cannot run as a chat model.
+    // `filter=gguf` matches repos *tagged* gguf (the dedicated quant repos that
+    // actually ship `.gguf` files), and `pipeline_tag=text-generation` keeps
+    // them to chat/instruct models. `library=gguf` is deliberately NOT used: it
+    // also matches base repos that merely link to GGUF quants elsewhere, so the
+    // rows would have no downloadable `.gguf` files of their own.
     let mut params: Vec<(&str, &str)> = vec![
-        ("library", "gguf"),
+        ("filter", "gguf"),
         ("pipeline_tag", "text-generation"),
         ("sort", "downloads"),
         ("direction", "-1"),
@@ -2238,6 +2239,32 @@ pub fn delete_installed_model(
     Ok(())
 }
 
+/// Reveals an installed model's weights blob in Finder. Thin FFI wrapper
+/// (excluded from coverage) over `open -R`, mirroring
+/// [`crate::settings_commands::reveal_config_in_finder`]; the manifest lookup
+/// and content-addressed path are covered through `manifest::get` and
+/// `storage::ModelStore::blob_path`.
+#[cfg_attr(coverage_nightly, coverage(off))]
+#[cfg_attr(not(coverage), tauri::command)]
+pub fn reveal_model_in_finder(
+    id: String,
+    db: tauri::State<'_, crate::history::Database>,
+    store: tauri::State<'_, storage::ModelStore>,
+) -> Result<(), String> {
+    let model = {
+        let conn = db.0.lock().map_err(|e| e.to_string())?;
+        manifest::get(&conn, &id)
+            .map_err(|e| e.to_string())?
+            .ok_or_else(|| format!("model not installed: {id}"))?
+    };
+    std::process::Command::new("open")
+        .arg("-R")
+        .arg(store.blob_path(&model.sha256))
+        .spawn()
+        .map(|_| ())
+        .map_err(|e| e.to_string())
+}
+
 /// Maps the `finalize_install` outcome onto the terminal download event:
 /// `AllDone` once the install is recorded, `Failed` otherwise. AllDone is
 /// emitted here (after finalize) rather than from `run_download` so the
@@ -4763,7 +4790,7 @@ mod tests {
         let mock = server
             .mock("GET", "/api/models")
             .match_query(mockito::Matcher::AllOf(vec![
-                mockito::Matcher::UrlEncoded("library".into(), "gguf".into()),
+                mockito::Matcher::UrlEncoded("filter".into(), "gguf".into()),
                 mockito::Matcher::UrlEncoded("pipeline_tag".into(), "text-generation".into()),
                 mockito::Matcher::UrlEncoded("search".into(), "qwen".into()),
                 mockito::Matcher::UrlEncoded("sort".into(), "downloads".into()),

From 7ae917a96281a1d4dc8bfefe918f0d95e45fbb5d Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 15:50:50 -0500
Subject: [PATCH 15/89] fix: refresh the provider model dropdown on switch,
 consistent names, premium switch dialog

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/settings/components/index.tsx             | 12 +++-
 .../tabs/models/ProvidersPane.test.tsx        | 60 +++++++++++++++++++
 src/settings/tabs/models/ProvidersPane.tsx    | 28 ++++++++-
 src/styles/settings.module.css                | 14 +++++
 4 files changed, 111 insertions(+), 3 deletions(-)

diff --git a/src/settings/components/index.tsx b/src/settings/components/index.tsx
index 6ca8eedb..6c22d43b 100644
--- a/src/settings/components/index.tsx
+++ b/src/settings/components/index.tsx
@@ -408,6 +408,7 @@ export function ConfirmDialog({
   confirmLabel,
   cancelLabel = 'Cancel',
   destructive = false,
+  primary = false,
   onConfirm,
   onCancel,
 }: {
@@ -417,6 +418,9 @@ export function ConfirmDialog({
   confirmLabel: string;
   cancelLabel?: string;
   destructive?: boolean;
+  /** Accent-fill the confirm button (the affirmative primary action). Ignored
+   * when `destructive` is set, which takes visual precedence. */
+  primary?: boolean;
   onConfirm: () => void;
   onCancel: () => void;
 }) {
@@ -455,7 +459,13 @@ export function ConfirmDialog({
           </button>
           <button
             type="button"
-            className={`${styles.button} ${destructive ? styles.buttonDestructive : ''}`}
+            className={`${styles.button} ${
+              destructive
+                ? styles.buttonDestructive
+                : primary
+                  ? styles.buttonPrimary
+                  : ''
+            }`}
             onClick={onConfirm}
             autoFocus
           >
diff --git a/src/settings/tabs/models/ProvidersPane.test.tsx b/src/settings/tabs/models/ProvidersPane.test.tsx
index 5c2b296d..252b08d7 100644
--- a/src/settings/tabs/models/ProvidersPane.test.tsx
+++ b/src/settings/tabs/models/ProvidersPane.test.tsx
@@ -182,6 +182,66 @@ describe('ProvidersPane active hero', () => {
     );
   });
 
+  it('re-fetches the picker state and shows the new provider model on switch', async () => {
+    // Built-in active first: the picker returns the built-in model id.
+    mockInvoke({
+      list_installed_models: INSTALLED,
+      get_model_picker_state: {
+        active: INSTALLED[0].id,
+        all: [INSTALLED[0].id],
+        ollamaReachable: true,
+      },
+    });
+    const builtin = { ...BUILTIN, model: INSTALLED[0].id };
+    const view = renderPane(makeConfig('builtin', [builtin, OLLAMA]));
+    await waitFor(() =>
+      expect(invokeMock).toHaveBeenCalledWith('get_model_picker_state'),
+    );
+
+    // Now Ollama is active and its tags are different from the built-in id.
+    mockInvoke({
+      list_installed_models: INSTALLED,
+      get_model_picker_state: {
+        active: 'gemma4:e4b',
+        all: ['gemma4:e4b'],
+        ollamaReachable: true,
+      },
+    });
+    view.rerender(
+      <ProvidersPane
+        config={makeConfig('ollama', [builtin, OLLAMA])}
+        resyncToken={0}
+        onSaved={() => {}}
+        onAddModel={() => {}}
+      />,
+    );
+    // The provider-change refetch replaces the stale built-in id with the
+    // live Ollama model rather than leaving the built-in id in the dropdown.
+    const select = await screen.findByRole('combobox', {
+      name: 'Active Ollama model',
+    });
+    await waitFor(() => expect(select).toHaveValue('gemma4:e4b'));
+  });
+
+  it('appends the quant only to disambiguate duplicate display names', async () => {
+    const dupes = [
+      { ...INSTALLED[0], id: 'org/x:q4.gguf', quant: 'Q4_K_M' },
+      { ...INSTALLED[0], id: 'org/x:q8.gguf', quant: 'Q8_0' },
+    ];
+    mockInvoke({ list_installed_models: dupes });
+    renderPane(
+      makeConfig('builtin', [{ ...BUILTIN, model: 'org/x:q4.gguf' }, OLLAMA]),
+    );
+    await screen.findByRole('combobox', { name: 'Built-in model' });
+    // Shared display name -> each option disambiguates with its quant.
+    expect(
+      screen.getByRole('option', { name: 'Qwen3.5 9B · Q4_K_M' }),
+    ).toBeInTheDocument();
+    expect(
+      screen.getByRole('option', { name: 'Qwen3.5 9B · Q8_0' }),
+    ).toBeInTheDocument();
+  });
+
   it('shows a Choose-a-model option when the built-in model is not installed', async () => {
     mockInvoke({ list_installed_models: INSTALLED });
     renderPane(makeConfig('builtin', [{ ...BUILTIN, model: 'gone' }, OLLAMA]));
diff --git a/src/settings/tabs/models/ProvidersPane.tsx b/src/settings/tabs/models/ProvidersPane.tsx
index 4321d298..c41df0f9 100644
--- a/src/settings/tabs/models/ProvidersPane.tsx
+++ b/src/settings/tabs/models/ProvidersPane.tsx
@@ -178,7 +178,20 @@ export function ProvidersPane({
   // A provider switch is confirmed before it takes effect.
   const [pendingSwitch, setPendingSwitch] = useState<RawProvider | null>(null);
 
-  const { activeModel, availableModels, setActiveModel } = useModelSelection();
+  const { activeModel, availableModels, setActiveModel, refreshModels } =
+    useModelSelection();
+
+  // The picker hook fetches once on mount; re-fetch whenever the active
+  // provider changes so the hero's Model dropdown reflects the newly-active
+  // provider's inventory instead of the previous provider's cached list.
+  // Without this, switching Built-in -> Ollama would keep showing the built-in
+  // model id (the stale `availableModels`/`activeModel` from before the switch).
+  const lastProviderRef = useRef(activeId);
+  useEffect(() => {
+    if (lastProviderRef.current === activeId) return;
+    lastProviderRef.current = activeId;
+    void refreshModels();
+  }, [activeId, refreshModels]);
 
   // Re-seed local editable state from a resync without scheduling saves.
   const prevTokenRef = useRef(resyncToken);
@@ -244,6 +257,14 @@ export function ProvidersPane({
   const builtinModelValue = installed.some((m) => m.id === builtinModelId)
     ? builtinModelId
     : '';
+  // Display names match the picker / Library / running footer (friendly name,
+  // no quant). The quant is appended only to disambiguate two installs that
+  // share a display name, so the common case reads consistently everywhere.
+  const duplicateDisplayNames = new Set(
+    installed
+      .map((m) => m.display_name)
+      .filter((name, i, all) => all.indexOf(name) !== i),
+  );
 
   // Providers other than the active one, in a stable order.
   const otherProviders = providers.filter((p) => p.id !== activeId);
@@ -287,7 +308,9 @@ export function ProvidersPane({
                 {installed.map((m) => (
                   <option key={m.id} value={m.id}>
                     {m.display_name}
-                    {m.quant !== '' ? ` · ${m.quant}` : ''}
+                    {duplicateDisplayNames.has(m.display_name) && m.quant !== ''
+                      ? ` · ${m.quant}`
+                      : ''}
                   </option>
                 ))}
               </select>
@@ -637,6 +660,7 @@ export function ProvidersPane({
       {pendingSwitch ? (
         <ConfirmDialog
           open
+          primary
           title={`Switch to ${pendingSwitch.label}?`}
           message={`New chats will be answered by ${pendingSwitch.label}. The model currently held in memory is released to free up RAM.`}
           confirmLabel={`Switch to ${pendingSwitch.label}`}
diff --git a/src/styles/settings.module.css b/src/styles/settings.module.css
index a16f954d..f08c6f43 100644
--- a/src/styles/settings.module.css
+++ b/src/styles/settings.module.css
@@ -860,6 +860,20 @@
   background: rgba(48, 34, 32, 0.95);
   border-color: rgba(255, 138, 128, 0.5);
 }
+/* Affirmative primary action (e.g. the confirm in a Switch dialog): the one
+ * accent fill, matching the segmented-control active + Add-model treatment. */
+.buttonPrimary {
+  background: var(--accent);
+  border-color: rgba(255, 141, 92, 0.5);
+  border-top-color: rgba(255, 141, 92, 0.5);
+  color: #16110d;
+  font-weight: 600;
+  box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.18);
+}
+.buttonPrimary:hover {
+  background: #ff9d72;
+  border-color: rgba(255, 141, 92, 0.7);
+}
 
 /* ─── Slider (hairline rail with warm gradient thumb) ───────────────────── */
 

From ab09401ff7989a1adad1e03bda937f82d94a00f4 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 15:50:57 -0500
Subject: [PATCH 16/89] feat: RAM-fit tooltips, capability pills, clickable
 Discover titles, and search caching

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 .../tabs/models/DiscoverPane.module.css       |  56 +++++----
 .../tabs/models/DiscoverPane.test.tsx         |  27 +++--
 src/settings/tabs/models/DiscoverPane.tsx     |  47 ++++----
 .../tabs/models/LibraryPane.module.css        |  51 ++++----
 src/settings/tabs/models/LibraryPane.test.tsx |  58 ++++------
 src/settings/tabs/models/LibraryPane.tsx      | 109 ++++++++++++------
 src/settings/tabs/models/useHfSearch.test.ts  |  21 ++++
 src/settings/tabs/models/useHfSearch.ts       |  56 ++++++++-
 src/utils/ramFit.ts                           |   8 ++
 9 files changed, 278 insertions(+), 155 deletions(-)

diff --git a/src/settings/tabs/models/DiscoverPane.module.css b/src/settings/tabs/models/DiscoverPane.module.css
index ace090fb..d9169f99 100644
--- a/src/settings/tabs/models/DiscoverPane.module.css
+++ b/src/settings/tabs/models/DiscoverPane.module.css
@@ -136,12 +136,38 @@
 }
 
 .nm {
+  display: flex;
+  align-items: center;
+  min-width: 0;
+}
+
+/* The repo title doubles as the link to its Hugging Face page. */
+.nmLink {
+  border: none;
+  background: transparent;
+  padding: 0;
+  font-family: inherit;
   font-weight: 540;
   font-size: 12.5px;
   color: var(--t1);
   white-space: nowrap;
   overflow: hidden;
   text-overflow: ellipsis;
+  text-align: left;
+  min-width: 0;
+  cursor: pointer;
+  transition: color 140ms ease;
+}
+.nmLink:hover {
+  color: var(--accent);
+  text-decoration: underline;
+  text-underline-offset: 2px;
+}
+.nmLink:focus-visible {
+  outline: none;
+  color: var(--accent);
+  text-decoration: underline;
+  text-underline-offset: 2px;
 }
 
 .org {
@@ -188,36 +214,6 @@
   color: var(--heavy);
 }
 
-/* Icon-only link out to the repo's Hugging Face page. */
-.extlink {
-  flex: none;
-  width: 28px;
-  height: 28px;
-  display: grid;
-  place-items: center;
-  border: none;
-  border-radius: 7px;
-  background: transparent;
-  color: var(--t3);
-  cursor: pointer;
-  transition:
-    color 140ms ease,
-    background 140ms ease;
-}
-.extlink svg {
-  width: 14px;
-  height: 14px;
-  stroke: currentColor;
-  stroke-width: 1.7;
-  fill: none;
-  stroke-linecap: round;
-  stroke-linejoin: round;
-}
-.extlink:hover {
-  color: var(--accent);
-  background: var(--elev-1);
-}
-
 /* Icon-only download / open-quants button. */
 .get {
   flex: none;
diff --git a/src/settings/tabs/models/DiscoverPane.test.tsx b/src/settings/tabs/models/DiscoverPane.test.tsx
index d75ffd6c..a4a62ef1 100644
--- a/src/settings/tabs/models/DiscoverPane.test.tsx
+++ b/src/settings/tabs/models/DiscoverPane.test.tsx
@@ -23,7 +23,11 @@ import { beforeEach, afterEach, describe, expect, it, vi } from 'vitest';
 import { invoke } from '@tauri-apps/api/core';
 
 import { DiscoverPane } from './DiscoverPane';
-import { HF_SEARCH_DEBOUNCE_MS, HF_PAGE_SIZE } from './useHfSearch';
+import {
+  HF_SEARCH_DEBOUNCE_MS,
+  HF_PAGE_SIZE,
+  clearHfSearchCache,
+} from './useHfSearch';
 import type { HfModelSummary } from '../../../types/hf';
 import type { HfGgufFile } from '../../../types/starter';
 import type { RawAppConfig } from '../../types';
@@ -103,6 +107,7 @@ async function flush() {
 beforeEach(() => {
   invokeMock.mockReset();
   lastChannel = null;
+  clearHfSearchCache();
 });
 
 afterEach(() => {
@@ -240,10 +245,16 @@ describe('DiscoverPane', () => {
       vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
       await Promise.resolve();
     });
-    expect(invokeMock).toHaveBeenCalledWith('search_hf_models', {
-      query: '',
-      limit: HF_PAGE_SIZE,
-    });
+    // The empty query was cached by the mount fetch, so returning to All is
+    // served from cache without another Hub call.
+    expect(invokeMock).not.toHaveBeenCalledWith(
+      'search_hf_models',
+      expect.anything(),
+    );
+    expect(screen.getByRole('button', { name: 'All' })).toHaveAttribute(
+      'aria-pressed',
+      'true',
+    );
   });
 
   it('renders every family chip', async () => {
@@ -261,12 +272,10 @@ describe('DiscoverPane', () => {
     }
   });
 
-  it('opens the repo on Hugging Face from the row link', async () => {
+  it('opens the repo on Hugging Face when the title is clicked', async () => {
     await renderPane();
     fireEvent.click(
-      screen.getByRole('button', {
-        name: 'View google/gemma-4-12b-it-GGUF on Hugging Face',
-      }),
+      screen.getByRole('button', { name: 'google/gemma-4-12b-it-GGUF' }),
     );
     expect(invokeMock).toHaveBeenCalledWith('open_url', {
       url: 'https://huggingface.co/google/gemma-4-12b-it-GGUF',
diff --git a/src/settings/tabs/models/DiscoverPane.tsx b/src/settings/tabs/models/DiscoverPane.tsx
index c8752e63..3073abf8 100644
--- a/src/settings/tabs/models/DiscoverPane.tsx
+++ b/src/settings/tabs/models/DiscoverPane.tsx
@@ -18,7 +18,8 @@ import { invoke } from '@tauri-apps/api/core';
 import { DownloadProgress } from '../../../components/DownloadProgress';
 import { useDownloadModel } from '../../../hooks/useDownloadModel';
 import { useHfSearch } from './useHfSearch';
-import { RAM_FIT_LABEL } from '../../../utils/ramFit';
+import { Tooltip } from '../../../components/Tooltip';
+import { RAM_FIT_LABEL, RAM_FIT_TOOLTIP } from '../../../utils/ramFit';
 import styles from './DiscoverPane.module.css';
 import type { HfModelSummary } from '../../../types/hf';
 import type { HfGgufFile, RamFit } from '../../../types/starter';
@@ -64,12 +65,6 @@ const DOWNLOAD_ICON = (
     <path d="M12 4v11M7 11l5 5 5-5M5 20h14" />
   </svg>
 );
-const HF_LINK_ICON = (
-  <svg viewBox="0 0 24 24" aria-hidden="true">
-    <path d="M14 3h7v7M21 3l-9 9M19 14v5a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V7a2 2 0 0 1 2-2h5" />
-  </svg>
-);
-
 interface DiscoverPaneProps {
   /** Lift a fresh config snapshot after a successful install. */
   onSaved: (next: RawAppConfig) => void;
@@ -211,7 +206,15 @@ function DiscoverRow({ model, onSaved }: DiscoverRowProps) {
       <div className={styles.row}>
         <div className={styles.mid}>
           <div className={styles.nm}>
-            {model.id}
+            {/* The title opens the repo on Hugging Face, so the row needs no
+                separate link icon. */}
+            <button
+              type="button"
+              className={styles.nmLink}
+              onClick={openHuggingFace}
+            >
+              {model.id}
+            </button>
             {model.gated ? (
               <span className={styles.gatedBadge}>Gated</span>
             ) : null}
@@ -221,18 +224,12 @@ function DiscoverRow({ model, onSaved }: DiscoverRowProps) {
           </div>
         </div>
         {model.fit ? (
-          <span className={`${styles.fit} ${FIT_CLASS[model.fit]}`}>
-            {RAM_FIT_LABEL[model.fit]}
-          </span>
+          <Tooltip label={RAM_FIT_TOOLTIP[model.fit]} multiline placement="top">
+            <span className={`${styles.fit} ${FIT_CLASS[model.fit]}`}>
+              {RAM_FIT_LABEL[model.fit]}
+            </span>
+          </Tooltip>
         ) : null}
-        <button
-          type="button"
-          className={styles.extlink}
-          aria-label={`View ${model.id} on Hugging Face`}
-          onClick={openHuggingFace}
-        >
-          {HF_LINK_ICON}
-        </button>
         <button
           type="button"
           className={styles.get}
@@ -258,9 +255,15 @@ function DiscoverRow({ model, onSaved }: DiscoverRowProps) {
                 <div className={styles.quantRow} key={f.file}>
                   <span className={styles.quantName}>{f.file}</span>
                   {f.fit ? (
-                    <span className={`${styles.fit} ${FIT_CLASS[f.fit]}`}>
-                      {RAM_FIT_LABEL[f.fit]}
-                    </span>
+                    <Tooltip
+                      label={RAM_FIT_TOOLTIP[f.fit]}
+                      multiline
+                      placement="top"
+                    >
+                      <span className={`${styles.fit} ${FIT_CLASS[f.fit]}`}>
+                        {RAM_FIT_LABEL[f.fit]}
+                      </span>
+                    </Tooltip>
                   ) : null}
                   <span className={styles.quantSize}>
                     {gb(f.size_bytes)} GB
diff --git a/src/settings/tabs/models/LibraryPane.module.css b/src/settings/tabs/models/LibraryPane.module.css
index 7ba6b05c..7b845f77 100644
--- a/src/settings/tabs/models/LibraryPane.module.css
+++ b/src/settings/tabs/models/LibraryPane.module.css
@@ -130,16 +130,23 @@
   flex: none;
 }
 
-/* Capability tags are text-only (no pill), per the locked design. */
-.tagVision {
-  font-size: 11px;
-  font-weight: 540;
+/* Capability pills sit in the name line, right after the model name. */
+.pillVision,
+.pillReason {
+  display: inline-flex;
+  align-items: center;
+  font-size: 10.5px;
+  font-weight: 560;
+  padding: 2px 8px;
+  border-radius: var(--radius-pill);
+}
+.pillVision {
   color: var(--vis);
+  background: var(--vis-bg);
 }
-.tagReason {
-  font-size: 11px;
-  font-weight: 540;
+.pillReason {
   color: var(--rea);
+  background: var(--rea-bg);
 }
 
 /* RAM-fit hint: a coloured dot + label reusing the onboarding fit palette. */
@@ -227,6 +234,7 @@
 .menuItem {
   display: flex;
   align-items: center;
+  gap: 10px;
   width: 100%;
   padding: 8px 10px;
   border: none;
@@ -240,6 +248,22 @@
   cursor: pointer;
   transition: background 120ms ease;
 }
+.menuItem svg {
+  width: 14px;
+  height: 14px;
+  stroke: currentColor;
+  stroke-width: 1.7;
+  fill: none;
+  stroke-linecap: round;
+  stroke-linejoin: round;
+  opacity: 0.8;
+  flex: none;
+}
+.menuExt {
+  margin-left: auto;
+  font-size: 12px;
+  opacity: 0.4;
+}
 .menuItem:hover {
   background: var(--elev-2);
 }
@@ -286,19 +310,6 @@
   margin: 8px 4px 0;
 }
 
-.footer {
-  box-sizing: border-box;
-  width: 100%;
-  margin-top: 12px;
-  padding: 11px 4px 0;
-  border-top: 1px solid var(--hair-soft);
-  font-size: 11px;
-  color: var(--t3);
-  display: flex;
-  justify-content: space-between;
-  font-variant-numeric: tabular-nums;
-}
-
 @media (prefers-reduced-motion: reduce) {
   .menu {
     animation: none;
diff --git a/src/settings/tabs/models/LibraryPane.test.tsx b/src/settings/tabs/models/LibraryPane.test.tsx
index 732841d1..b98f3b08 100644
--- a/src/settings/tabs/models/LibraryPane.test.tsx
+++ b/src/settings/tabs/models/LibraryPane.test.tsx
@@ -314,7 +314,7 @@ describe('LibraryPane', () => {
     mockCommands(libraryResponses());
     await renderPane();
     openMenu('gemma');
-    fireEvent.click(screen.getByRole('menuitem', { name: 'Delete gemma' }));
+    fireEvent.click(screen.getByRole('menuitem', { name: 'Delete model' }));
     expect(screen.getByText('Delete gemma?')).toBeInTheDocument();
     fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
     expect(screen.queryByText('Delete gemma?')).not.toBeInTheDocument();
@@ -338,7 +338,7 @@ describe('LibraryPane', () => {
     const onSaved = vi.fn();
     await renderPane(makeConfig(''), onSaved);
     openMenu('gemma');
-    fireEvent.click(screen.getByRole('menuitem', { name: 'Delete gemma' }));
+    fireEvent.click(screen.getByRole('menuitem', { name: 'Delete model' }));
     fireEvent.click(screen.getByRole('button', { name: 'Delete' }));
     await flush();
     expect(invokeMock).toHaveBeenCalledWith('delete_installed_model', {
@@ -359,7 +359,7 @@ describe('LibraryPane', () => {
     const onSaved = vi.fn();
     await renderPane(makeConfig(''), onSaved);
     openMenu('qwen');
-    fireEvent.click(screen.getByRole('menuitem', { name: 'Delete qwen' }));
+    fireEvent.click(screen.getByRole('menuitem', { name: 'Delete model' }));
     fireEvent.click(screen.getByRole('button', { name: 'Delete' }));
     await flush();
     expect(invokeMock).toHaveBeenCalledWith('delete_installed_model', {
@@ -374,7 +374,7 @@ describe('LibraryPane', () => {
     );
     await renderPane();
     openMenu('gemma');
-    fireEvent.click(screen.getByRole('menuitem', { name: 'Delete gemma' }));
+    fireEvent.click(screen.getByRole('menuitem', { name: 'Delete model' }));
     fireEvent.click(screen.getByRole('button', { name: 'Delete' }));
     await flush();
     expect(screen.getByRole('alert')).toHaveTextContent('file busy');
@@ -410,34 +410,26 @@ describe('LibraryPane', () => {
     expect(screen.getByText('No models downloaded yet.')).toBeInTheDocument();
   });
 
-  it('shows the free-disk footer and the model count when both are known', async () => {
+  it('reveals the model in Finder from the popover', async () => {
     mockCommands(libraryResponses());
     await renderPane();
-    expect(screen.getByText('30.4 GB free')).toBeInTheDocument();
-    expect(screen.getByText('2 models installed')).toBeInTheDocument();
-  });
-
-  it('singularises the model count for a single install', async () => {
-    mockCommands(libraryResponses({ list_installed_models: [GEMMA] }));
-    await renderPane();
-    expect(screen.getByText('1 model installed')).toBeInTheDocument();
-  });
-
-  it('hides the free-disk line when the probe returns a non-number', async () => {
-    mockCommands(libraryResponses({ get_models_dir_free_bytes: null }));
-    await renderPane();
-    expect(screen.queryByText(/free/)).not.toBeInTheDocument();
-    expect(screen.getByText('2 models installed')).toBeInTheDocument();
+    openMenu('gemma');
+    fireEvent.click(screen.getByRole('menuitem', { name: 'Reveal in Finder' }));
+    expect(invokeMock).toHaveBeenCalledWith('reveal_model_in_finder', {
+      id: 'org/gemma:gemma.gguf',
+    });
   });
 
-  it('hides the free-disk line when the disk probe rejects', async () => {
+  it('swallows a reveal-in-Finder failure', async () => {
     mockCommands(
-      libraryResponses({
-        get_models_dir_free_bytes: new Reject(new Error('statfs failed')),
-      }),
+      libraryResponses({ reveal_model_in_finder: new Reject('no blob') }),
     );
     await renderPane();
-    expect(screen.queryByText(/free/)).not.toBeInTheDocument();
+    openMenu('gemma');
+    fireEvent.click(screen.getByRole('menuitem', { name: 'Reveal in Finder' }));
+    await flush();
+    // The row is untouched; the failure is best-effort and silent.
+    expect(screen.getByText('gemma')).toBeInTheDocument();
   });
 
   it('renders the top-right Add model button and routes it to onAddModel', async () => {
@@ -473,11 +465,11 @@ describe('LibraryPane', () => {
     const manage = screen.getByRole('button', { name: 'Manage gemma' });
     fireEvent.click(manage);
     expect(
-      screen.getByRole('menuitem', { name: 'Delete gemma' }),
+      screen.getByRole('menuitem', { name: 'Delete model' }),
     ).toBeInTheDocument();
     fireEvent.click(manage);
     expect(
-      screen.queryByRole('menuitem', { name: 'Delete gemma' }),
+      screen.queryByRole('menuitem', { name: 'Delete model' }),
     ).not.toBeInTheDocument();
   });
 
@@ -486,11 +478,11 @@ describe('LibraryPane', () => {
     await renderPane();
     openMenu('gemma');
     expect(
-      screen.getByRole('menuitem', { name: 'Delete gemma' }),
+      screen.getByRole('menuitem', { name: 'Delete model' }),
     ).toBeInTheDocument();
     fireEvent.mouseDown(document.body);
     expect(
-      screen.queryByRole('menuitem', { name: 'Delete gemma' }),
+      screen.queryByRole('menuitem', { name: 'Delete model' }),
     ).not.toBeInTheDocument();
   });
 
@@ -500,11 +492,11 @@ describe('LibraryPane', () => {
     openMenu('gemma');
     fireEvent.keyDown(document.body, { key: 'a' });
     expect(
-      screen.getByRole('menuitem', { name: 'Delete gemma' }),
+      screen.getByRole('menuitem', { name: 'Delete model' }),
     ).toBeInTheDocument();
     fireEvent.keyDown(document.body, { key: 'Escape' });
     expect(
-      screen.queryByRole('menuitem', { name: 'Delete gemma' }),
+      screen.queryByRole('menuitem', { name: 'Delete model' }),
     ).not.toBeInTheDocument();
   });
 
@@ -526,7 +518,7 @@ describe('LibraryPane', () => {
     );
     await renderPane();
     openMenu('gemma');
-    fireEvent.click(screen.getByRole('menuitem', { name: 'Delete gemma' }));
+    fireEvent.click(screen.getByRole('menuitem', { name: 'Delete model' }));
     fireEvent.click(screen.getByRole('button', { name: 'Delete' }));
     await flush();
     expect(screen.getByRole('alert')).toHaveTextContent('file busy');
@@ -538,7 +530,7 @@ describe('LibraryPane', () => {
       }),
     );
     openMenu('gemma');
-    fireEvent.click(screen.getByRole('menuitem', { name: 'Delete gemma' }));
+    fireEvent.click(screen.getByRole('menuitem', { name: 'Delete model' }));
     fireEvent.click(screen.getByRole('button', { name: 'Delete' }));
     await waitFor(() =>
       expect(screen.queryByRole('alert')).not.toBeInTheDocument(),
diff --git a/src/settings/tabs/models/LibraryPane.tsx b/src/settings/tabs/models/LibraryPane.tsx
index 52262c1b..50ff451b 100644
--- a/src/settings/tabs/models/LibraryPane.tsx
+++ b/src/settings/tabs/models/LibraryPane.tsx
@@ -1,13 +1,13 @@
 /**
  * Library pane of the Models surface: the user's installed local models.
  *
- * Each downloaded model shows as a quiet row: its name, an Active state, the
- * Hugging Face repo / quantisation / size, capability text tags (Vision /
- * Reasoning, detected automatically), and a RAM-fit hint for this Mac. A ⋮
- * button opens a floating popover (Set as active / View on Hugging Face /
- * Delete) instead of expanding the card. Delete routes through a confirm
- * dialog. When nothing is installed the pane invites the user over to
- * Discover; a footer reports the model count and free disk space.
+ * Each downloaded model shows as a quiet row: its name with capability pills
+ * (Vision / Reasoning) and an Active marker, the Hugging Face repo /
+ * quantisation / size, and a RAM-fit hint (hover for a one-line explanation).
+ * A ⋮ button opens a floating popover (Set as active / View on Hugging Face /
+ * Reveal in Finder / Delete) instead of expanding the card; Delete routes
+ * through a confirm dialog. When nothing is installed the pane invites the
+ * user over to Discover.
  */
 
 import { useCallback, useEffect, useState } from 'react';
@@ -15,7 +15,8 @@ import { invoke } from '@tauri-apps/api/core';
 
 import { useModelCapabilities } from '../../../hooks/useModelCapabilities';
 import { ConfirmDialog } from '../../components';
-import { RAM_FIT_LABEL } from '../../../utils/ramFit';
+import { Tooltip } from '../../../components/Tooltip';
+import { RAM_FIT_LABEL, RAM_FIT_TOOLTIP } from '../../../utils/ramFit';
 import styles from './LibraryPane.module.css';
 import type { RawAppConfig } from '../../types';
 import type { InstalledModel, RamFit } from '../../../types/starter';
@@ -29,6 +30,28 @@ const FIT_CLASS: Record<RamFit, string> = {
   too_big: styles.fitHeavy,
 };
 
+// Popover icons (line-art, currentColor), matching the locked menu layout.
+const SET_ACTIVE_ICON = (
+  <svg viewBox="0 0 24 24" aria-hidden="true">
+    <path d="M5 13l4 4L19 7" />
+  </svg>
+);
+const HF_ICON = (
+  <svg viewBox="0 0 24 24" aria-hidden="true">
+    <path d="M14 3h7v7M21 3l-9 9M19 14v5a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V7a2 2 0 0 1 2-2h5" />
+  </svg>
+);
+const FINDER_ICON = (
+  <svg viewBox="0 0 24 24" aria-hidden="true">
+    <path d="M3 7h6l2 2h10v9a2 2 0 0 1-2 2H3z" />
+  </svg>
+);
+const TRASH_ICON = (
+  <svg viewBox="0 0 24 24" aria-hidden="true">
+    <path d="M3 6h18M8 6V4h8v2M6 6l1 14h10l1-14" />
+  </svg>
+);
+
 /** Bytes rendered as decimal gigabytes with one decimal (e.g. "8.2"). */
 function gb(bytes: number): string {
   return (bytes / 1e9).toFixed(1);
@@ -47,7 +70,6 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
     config.inference.providers.find((p) => p.kind === 'builtin')?.model ?? '';
 
   const [installed, setInstalled] = useState<InstalledModel[]>([]);
-  const [freeDiskBytes, setFreeDiskBytes] = useState<number | null>(null);
   const [openMenu, setOpenMenu] = useState<string | null>(null);
   const [confirmDelete, setConfirmDelete] = useState<string | null>(null);
   const [deleteError, setDeleteError] = useState<string | null>(null);
@@ -65,13 +87,6 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
 
   useEffect(() => {
     void refreshInstalled();
-    void invoke<number | null>('get_models_dir_free_bytes')
-      .then((bytes) => {
-        setFreeDiskBytes(typeof bytes === 'number' ? bytes : null);
-      })
-      .catch(() => {
-        // Unknown free space just hides the disk line.
-      });
   }, [refreshInstalled]);
 
   // Close the popover on an outside click or Escape so it behaves like a real
@@ -117,6 +132,13 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
     void invoke('open_url', { url: `${HF_BASE_URL}/${id.split(':')[0]}` });
   }
 
+  function revealInFinder(id: string) {
+    setOpenMenu(null);
+    void invoke('reveal_model_in_finder', { id }).catch(() => {
+      // Best-effort: a missing blob just means nothing to reveal.
+    });
+  }
+
   // Deletion is refcounted server-side; the backend also clears the builtin
   // provider's model field when the deleted model was the selected one, so
   // the lifted snapshot is the source of truth.
@@ -181,6 +203,12 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
                   <div className={styles.mid}>
                     <div className={styles.name}>
                       {m.display_name}
+                      {caps?.vision ? (
+                        <span className={styles.pillVision}>Vision</span>
+                      ) : null}
+                      {caps?.thinking ? (
+                        <span className={styles.pillReason}>Reasoning</span>
+                      ) : null}
                       {active ? (
                         <span className={styles.activeBadge}>Active</span>
                       ) : null}
@@ -193,15 +221,15 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
                   </div>
                   <div className={styles.right}>
                     {m.fit ? (
-                      <span className={`${styles.fit} ${FIT_CLASS[m.fit]}`}>
-                        {RAM_FIT_LABEL[m.fit]}
-                      </span>
-                    ) : null}
-                    {caps?.vision ? (
-                      <span className={styles.tagVision}>Vision</span>
-                    ) : null}
-                    {caps?.thinking ? (
-                      <span className={styles.tagReason}>Reasoning</span>
+                      <Tooltip
+                        label={RAM_FIT_TOOLTIP[m.fit]}
+                        multiline
+                        placement="top"
+                      >
+                        <span className={`${styles.fit} ${FIT_CLASS[m.fit]}`}>
+                          {RAM_FIT_LABEL[m.fit]}
+                        </span>
+                      </Tooltip>
                     ) : null}
                     <div className={styles.menuWrap} data-menu-root>
                       <button
@@ -225,7 +253,8 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
                               className={styles.menuItem}
                               onClick={() => selectModel(m.id)}
                             >
-                              Set as active
+                              {SET_ACTIVE_ICON}
+                              <span>Set as active</span>
                             </button>
                           )}
                           <button
@@ -234,20 +263,33 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
                             className={styles.menuItem}
                             onClick={() => openHuggingFace(m.id)}
                           >
-                            View on Hugging Face
+                            {HF_ICON}
+                            <span>View on Hugging Face</span>
+                            <span className={styles.menuExt} aria-hidden="true">
+                              ↗
+                            </span>
+                          </button>
+                          <button
+                            type="button"
+                            role="menuitem"
+                            className={styles.menuItem}
+                            onClick={() => revealInFinder(m.id)}
+                          >
+                            {FINDER_ICON}
+                            <span>Reveal in Finder</span>
                           </button>
                           <div className={styles.menuSep} />
                           <button
                             type="button"
                             role="menuitem"
                             className={`${styles.menuItem} ${styles.menuItemDanger}`}
-                            aria-label={`Delete ${m.display_name}`}
                             onClick={() => {
                               setOpenMenu(null);
                               setConfirmDelete(m.id);
                             }}
                           >
-                            Delete
+                            {TRASH_ICON}
+                            <span>Delete model</span>
                           </button>
                         </div>
                       ) : null}
@@ -266,15 +308,6 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
         </p>
       ) : null}
 
-      <div className={styles.footer}>
-        <span>
-          {installed.length} model{installed.length === 1 ? '' : 's'} installed
-        </span>
-        <span>
-          {freeDiskBytes !== null ? `${gb(freeDiskBytes)} GB free` : ''}
-        </span>
-      </div>
-
       {confirmModel ? (
         <ConfirmDialog
           open
diff --git a/src/settings/tabs/models/useHfSearch.test.ts b/src/settings/tabs/models/useHfSearch.test.ts
index ead36ddd..c9de39fb 100644
--- a/src/settings/tabs/models/useHfSearch.test.ts
+++ b/src/settings/tabs/models/useHfSearch.test.ts
@@ -17,6 +17,7 @@ import {
   useHfSearch,
   HF_SEARCH_DEBOUNCE_MS,
   HF_PAGE_SIZE,
+  clearHfSearchCache,
 } from './useHfSearch';
 import type { HfModelSummary } from '../../../types/hf';
 
@@ -33,6 +34,7 @@ const GEMMA: HfModelSummary[] = [
 
 beforeEach(() => {
   invokeMock.mockReset();
+  clearHfSearchCache();
 });
 
 afterEach(() => {
@@ -262,6 +264,25 @@ describe('useHfSearch', () => {
     });
   });
 
+  it('serves a repeated query from cache without re-fetching', async () => {
+    invokeMock.mockResolvedValue(POPULAR);
+    const first = renderHook(() => useHfSearch());
+    await waitFor(() => expect(first.result.current.loading).toBe(false));
+    expect(invokeMock).toHaveBeenCalledTimes(1);
+    first.unmount();
+
+    // A fresh mount (a Discover tab revisit) seeds from cache: results are
+    // present immediately, there is no loading flash, and no new call fires.
+    invokeMock.mockClear();
+    const second = renderHook(() => useHfSearch());
+    expect(second.result.current.loading).toBe(false);
+    expect(second.result.current.results).toEqual(POPULAR);
+    await act(async () => {
+      await Promise.resolve();
+    });
+    expect(invokeMock).not.toHaveBeenCalled();
+  });
+
   it('does not offer Load more when the page is not full', async () => {
     invokeMock.mockResolvedValue(POPULAR); // one row, far below a full page
     const { result } = renderHook(() => useHfSearch());
diff --git a/src/settings/tabs/models/useHfSearch.ts b/src/settings/tabs/models/useHfSearch.ts
index 7b6c2950..bf100224 100644
--- a/src/settings/tabs/models/useHfSearch.ts
+++ b/src/settings/tabs/models/useHfSearch.ts
@@ -23,6 +23,29 @@ export const HF_SEARCH_DEBOUNCE_MS = 300;
  * page step (`HF_SEARCH_LIMIT`); the backend clamps the total to its own max. */
 export const HF_PAGE_SIZE = 30;
 
+/**
+ * Session-scoped cache of search results, keyed by `query::limit`. Switching to
+ * another tab unmounts the Discover pane, so without this every return trip
+ * would re-hit the Hub and flash "Searching…"; serving an already-seen query
+ * from cache makes the tab feel instant and avoids the redundant call. Lives
+ * for the app session (cleared on reload), since Hub rankings barely move on
+ * that timescale.
+ */
+const searchCache = new Map<string, HfModelSummary[]>();
+
+function cacheKey(query: string, limit: number): string {
+  return `${query}::${limit}`;
+}
+
+/**
+ * Clears the session search cache. Exposed for tests, which need a clean cache
+ * between cases; production never evicts (the cache is bounded by the small set
+ * of queries a user types in one session).
+ */
+export function clearHfSearchCache(): void {
+  searchCache.clear();
+}
+
 /**
  * Runtime guard for the IPC boundary. The Rust backend is trusted, but this
  * keeps the hook robust against shape drift (schema changes, legacy builds,
@@ -77,8 +100,14 @@ export interface UseHfSearchResult {
 export function useHfSearch(): UseHfSearchResult {
   const [queryText, setQueryText] = useState('');
   const [limit, setLimit] = useState(HF_PAGE_SIZE);
-  const [results, setResults] = useState<HfModelSummary[]>([]);
-  const [loading, setLoading] = useState(true);
+  // Seed straight from the cache so a remount (tab switch) paints the last
+  // results with no loading flash; a cold first run still starts in `loading`.
+  const [results, setResults] = useState<HfModelSummary[]>(
+    () => searchCache.get(cacheKey('', HF_PAGE_SIZE)) ?? [],
+  );
+  const [loading, setLoading] = useState(
+    () => !searchCache.has(cacheKey('', HF_PAGE_SIZE)),
+  );
 
   const mountedRef = useRef(true);
   const latestTokenRef = useRef(0);
@@ -107,8 +136,18 @@ export function useHfSearch(): UseHfSearchResult {
 
   const runSearch = useCallback(
     async (q: string, lim: number): Promise<void> => {
+      const key = cacheKey(q, lim);
       latestTokenRef.current += 1;
       const token = latestTokenRef.current;
+      // Cache hit: serve immediately, no network, no spinner. This lives here
+      // (a callback) rather than in the effect body so it is not a synchronous
+      // setState in an effect.
+      const cached = searchCache.get(key);
+      if (cached) {
+        setResults(cached);
+        setLoading(false);
+        return;
+      }
       setLoading(true);
       try {
         const payload = await invoke<unknown>('search_hf_models', {
@@ -116,7 +155,12 @@ export function useHfSearch(): UseHfSearchResult {
           limit: lim,
         });
         if (!isLatest(token)) return;
-        setResults(isHfModelSummaryArray(payload) ? payload : []);
+        if (isHfModelSummaryArray(payload)) {
+          searchCache.set(key, payload);
+          setResults(payload);
+        } else {
+          setResults([]);
+        }
       } catch {
         if (!isLatest(token)) return;
         setResults([]);
@@ -132,6 +176,12 @@ export function useHfSearch(): UseHfSearchResult {
   // makes a single call. The empty-query mount fetch and "Load more" (a
   // `limit` bump) ride the same path.
   useEffect(() => {
+    // A cache hit serves instantly (runSearch short-circuits to the cache); a
+    // miss is debounced so a burst of keystrokes makes one network call.
+    if (searchCache.has(cacheKey(queryText, limit))) {
+      void runSearch(queryText, limit);
+      return;
+    }
     const timer = window.setTimeout(() => {
       void runSearch(queryText, limit);
     }, HF_SEARCH_DEBOUNCE_MS);
diff --git a/src/utils/ramFit.ts b/src/utils/ramFit.ts
index 6c469e1c..a30c4513 100644
--- a/src/utils/ramFit.ts
+++ b/src/utils/ramFit.ts
@@ -13,3 +13,11 @@ export const RAM_FIT_LABEL: Record<RamFit, string> = {
   tight: 'Tight',
   too_big: 'Heavy',
 };
+
+/** One-line explanation shown when hovering a RAM-fit label, so the verdict
+ * is not cryptic. Phrased around this Mac's memory, not raw numbers. */
+export const RAM_FIT_TOOLTIP: Record<RamFit, string> = {
+  fits: 'Runs with memory to spare on this Mac.',
+  tight: 'Runs, but close to this Mac’s memory limit.',
+  too_big: 'Larger than this Mac’s memory comfortably holds; expect slowdowns.',
+};

From d29e16a7eaceb112282504bb526f7e642cc74080 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 17:56:27 -0500
Subject: [PATCH 17/89] fix: lift config on Ollama model switch so Running card
 updates

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 .../tabs/models/ProvidersPane.test.tsx        | 53 +++++++++++++++++++
 src/settings/tabs/models/ProvidersPane.tsx    | 13 ++++-
 2 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/src/settings/tabs/models/ProvidersPane.test.tsx b/src/settings/tabs/models/ProvidersPane.test.tsx
index 252b08d7..35a682d2 100644
--- a/src/settings/tabs/models/ProvidersPane.test.tsx
+++ b/src/settings/tabs/models/ProvidersPane.test.tsx
@@ -301,6 +301,59 @@ describe('ProvidersPane active hero', () => {
     );
   });
 
+  it('lifts a fresh config after selecting a different Ollama model', async () => {
+    const lifted = makeConfig('ollama', [
+      BUILTIN,
+      { ...OLLAMA, model: 'llama3.2:3b' },
+    ]);
+    mockInvoke({
+      get_model_picker_state: {
+        active: 'gemma4:e4b',
+        all: ['gemma4:e4b', 'llama3.2:3b'],
+        ollamaReachable: true,
+      },
+      get_config: lifted,
+    });
+    const onSaved = vi.fn();
+    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]), { onSaved });
+    const select = await screen.findByRole('combobox', {
+      name: 'Active Ollama model',
+    });
+    fireEvent.change(select, { target: { value: 'llama3.2:3b' } });
+    await waitFor(() =>
+      expect(invokeMock).toHaveBeenCalledWith('set_active_model', {
+        model: 'llama3.2:3b',
+      }),
+    );
+    // The lifted config (carrying the newly-selected Ollama model) reaches the
+    // parent so the Running footer re-renders with the new name.
+    await waitFor(() => expect(onSaved).toHaveBeenCalledWith(lifted));
+  });
+
+  it('swallows a failed Ollama model selection without lifting config', async () => {
+    const onSaved = vi.fn();
+    mockInvoke({
+      get_model_picker_state: {
+        active: 'gemma4:e4b',
+        all: ['gemma4:e4b', 'llama3.2:3b'],
+        ollamaReachable: true,
+      },
+      set_active_model: new Error('nope'),
+    });
+    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]), { onSaved });
+    const select = await screen.findByRole('combobox', {
+      name: 'Active Ollama model',
+    });
+    fireEvent.change(select, { target: { value: 'llama3.2:3b' } });
+    await waitFor(() =>
+      expect(invokeMock).toHaveBeenCalledWith('set_active_model', {
+        model: 'llama3.2:3b',
+      }),
+    );
+    await Promise.resolve();
+    expect(onSaved).not.toHaveBeenCalled();
+  });
+
   it('shows a no-models hint when Ollama has none', () => {
     renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
     expect(screen.getByText('No models installed')).toBeInTheDocument();
diff --git a/src/settings/tabs/models/ProvidersPane.tsx b/src/settings/tabs/models/ProvidersPane.tsx
index c41df0f9..2bdf0bf6 100644
--- a/src/settings/tabs/models/ProvidersPane.tsx
+++ b/src/settings/tabs/models/ProvidersPane.tsx
@@ -242,6 +242,17 @@ export function ProvidersPane({
       .catch(() => {});
   }
 
+  // Ollama selection persists onto the active provider's model field via
+  // set_active_model; lift the fresh config so the Running footer (and the
+  // hero) re-render with the newly-selected model instead of the old name.
+  function commitOllamaModel(model: string) {
+    void setActiveModel(model)
+      .then(async () => onSaved(await invoke<RawAppConfig>('get_config')))
+      .catch(() => {
+        // The focus-driven resync picks the change up on next activation.
+      });
+  }
+
   function handleEngineEject() {
     void invoke('evict_model').catch(() => {});
   }
@@ -367,7 +378,7 @@ export function ProvidersPane({
                   className={styles.dropdown}
                   aria-label="Active Ollama model"
                   value={ollamaModelValue}
-                  onChange={(e) => void setActiveModel(e.target.value)}
+                  onChange={(e) => commitOllamaModel(e.target.value)}
                 >
                   {availableModels.map((m) => (
                     <option key={m} value={m}>

From ac5adcc186d7f366ec6f58028af190791d2f616d Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 18:03:44 -0500
Subject: [PATCH 18/89] refactor: drop unreliable row-level RAM-fit, keep
 accurate per-quant fit

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 docs/configurations.md                        |   1 -
 src-tauri/src/config/defaults.rs              |   6 -
 src-tauri/src/models/mod.rs                   | 139 +-----------------
 .../tabs/models/DiscoverPane.test.tsx         |  17 +--
 src/settings/tabs/models/DiscoverPane.tsx     |  17 +--
 src/types/hf.ts                               |  17 +--
 6 files changed, 23 insertions(+), 174 deletions(-)

diff --git a/docs/configurations.md b/docs/configurations.md
index 4d05a804..341b7ca1 100644
--- a/docs/configurations.md
+++ b/docs/configurations.md
@@ -193,7 +193,6 @@ The table below also lists the baked-in safety limits that govern Thuki's commun
 | `HF_SEARCH_LIMIT`                           | `30`     | No       | The per-page step for the in-app model browser. The "Load more" control raises the requested page size in multiples of this value, so it is a layout step rather than a user preference. | —      | How many GGUF model repos the first page of an in-app Hugging Face search returns, most-downloaded first. |
 | `HF_SEARCH_LIMIT_MAX`                        | `120`    | No       | Defense-in-depth bound on request size: "Load more" grows the requested page size in `HF_SEARCH_LIMIT` steps, and this caps the largest single request so a runaway page count cannot ask the Hub for an unbounded result set. | —      | The largest page size a single in-app Hugging Face search request may ask for, regardless of how many times "Load more" was pressed. |
 | `RUNTIME_OVERHEAD_GB`                        | `2.0`    | No       | Feeds the approximate RAM-fit hint shown in Library and Discover only; the authoritative per-starter memory estimates live in the model registry. A user-tunable overhead would imply a precision the hint does not claim. | —      | Resident-memory overhead added on top of a model's weights size (KV cache plus runtime buffers) when estimating whether it fits in this Mac's RAM. |
-| `PARAM_GB_PER_BILLION`                       | `0.65`   | No       | Feeds the approximate RAM-fit hint for Discover search rows only, where no file size is available and the footprint is estimated from the parameter count in the repo id. The hint never gates a download. | —      | Approximate resident GiB per billion parameters for a 4-bit quantised GGUF, used to estimate a search row's footprint. |
 | `MAX_HF_SEARCH_QUERY_LEN`                   | `200 bytes` | No    | Defense-in-depth bound on attacker-influenced input: the query reaches the fixed Hub host (no SSRF) and is percent-encoded by the client, but an unbounded string is still rejected to cap request size. | —      | The longest search string Thuki sends to the Hugging Face model search. A longer query is rejected before any network call. |
 | `OPENAI_MODELS_TIMEOUT_SECS`                | `5 s`    | No       | Protocol cap on a hung server so the Settings model dropdown cannot stall; the OpenAI-compatible server is local or LAN-hosted in the common case, so 5 s is generous. | —      | How long Thuki waits for an OpenAI-compatible server's `/v1/models` listing to respond before giving up. Applies to the Settings model dropdown for that provider, not to chat requests. |
 | `MAX_SSE_LINE_BYTES`                        | `1 MiB`  | No       | Defense-in-depth bound on attacker-controlled stream data. A malicious or broken chat server could otherwise grow a single stream line without limit and exhaust memory. | —      | The longest single Server-Sent-Events line Thuki accepts while streaming a chat response from an OpenAI-compatible (`/v1`) server. A stream line exceeding this aborts the response with an error. |
diff --git a/src-tauri/src/config/defaults.rs b/src-tauri/src/config/defaults.rs
index e76e12e7..d8430d21 100644
--- a/src-tauri/src/config/defaults.rs
+++ b/src-tauri/src/config/defaults.rs
@@ -422,12 +422,6 @@ pub const HF_SEARCH_LIMIT_MAX: usize = 120;
 /// estimates live in the model registry.
 pub const RUNTIME_OVERHEAD_GB: f64 = 2.0;
 
-/// Approximate GiB of resident memory per billion parameters for a 4-bit
-/// quantised GGUF, used to estimate a Discover search row's footprint from the
-/// parameter count parsed out of its repo id (no file size is available at
-/// search time). Baked-in: feeds the RAM-fit *hint* only.
-pub const PARAM_GB_PER_BILLION: f64 = 0.65;
-
 /// Maximum accepted byte length for a Hugging Face search query before it is
 /// sent upstream. Defense-in-depth bound on attacker-influenced input: the
 /// query reaches the fixed Hub host (no SSRF) and is percent-encoded by the
diff --git a/src-tauri/src/models/mod.rs b/src-tauri/src/models/mod.rs
index 48ccf74f..e9d7f7c5 100644
--- a/src-tauri/src/models/mod.rs
+++ b/src-tauri/src/models/mod.rs
@@ -31,9 +31,8 @@ use crate::config::defaults::{
     DEFAULT_OLLAMA_SHOW_REQUEST_TIMEOUT_SECS, DEFAULT_OLLAMA_TAGS_REQUEST_TIMEOUT_SECS,
     HF_API_TIMEOUT_SECS, HF_BASE_URL, HF_SEARCH_LIMIT_MAX, MAX_HF_API_BODY_BYTES,
     MAX_HF_SEARCH_QUERY_LEN, MAX_MODEL_SLUG_LEN, MAX_OLLAMA_SHOW_BODY_BYTES,
-    MAX_OLLAMA_TAGS_BODY_BYTES, OPENAI_MODELS_TIMEOUT_SECS, PARAM_GB_PER_BILLION,
-    PROVIDER_ID_BUILTIN, PROVIDER_KIND_BUILTIN, PROVIDER_KIND_OLLAMA, PROVIDER_KIND_OPENAI,
-    RUNTIME_OVERHEAD_GB,
+    MAX_OLLAMA_TAGS_BODY_BYTES, OPENAI_MODELS_TIMEOUT_SECS, PROVIDER_ID_BUILTIN,
+    PROVIDER_KIND_BUILTIN, PROVIDER_KIND_OLLAMA, PROVIDER_KIND_OPENAI, RUNTIME_OVERHEAD_GB,
 };
 use crate::config::AppConfig;
 
@@ -1585,18 +1584,6 @@ pub fn parse_search_results(body: &[u8]) -> Result<Vec<HfModelSummary>, String>
 // KV/runtime overhead) and reuse `registry::ram_fit` for the threshold. They
 // are deliberately approximate: the result is a hint, never a hard gate.
 
-/// A Hugging Face search row annotated with a best-effort RAM-fit hint for the
-/// host. The base summary carries the Hub facts; `fit` is estimated from the
-/// parameter count parsed out of the repo id (no file size is available at
-/// search time) and is `None` when the id carries no `<number>B` token or when
-/// host RAM is unknown.
-#[derive(Debug, Clone, PartialEq, Serialize)]
-pub struct HfModelRow {
-    #[serde(flatten)]
-    pub summary: HfModelSummary,
-    pub fit: Option<registry::RamFit>,
-}
-
 /// A repo `.gguf` file annotated with the accurate per-quant RAM-fit computed
 /// from its real file size. `fit` is `None` when host RAM or the file size is
 /// unknown (both are required to judge fit).
@@ -1616,33 +1603,6 @@ pub struct InstalledModelView {
     pub fit: Option<registry::RamFit>,
 }
 
-/// Parses the parameter count in billions from a model repo id by reading the
-/// last `<number>B` token (e.g. `unsloth/Qwen3.5-9B-GGUF` -> `9.0`,
-/// `org/Model-3.8B-it` -> `3.8`). Splits on `/ - _ space` (keeping `.` so a
-/// fractional count survives) and is case-insensitive on the trailing `B`.
-/// Returns `None` when no positive `<number>B` token is present.
-pub fn parse_param_billions(id: &str) -> Option<f64> {
-    let mut found = None;
-    for token in id.split(['/', '-', '_', ' ']) {
-        let Some(stripped) = token.strip_suffix('B').or_else(|| token.strip_suffix('b')) else {
-            continue;
-        };
-        if let Ok(v) = stripped.parse::<f64>() {
-            if v.is_finite() && v > 0.0 {
-                found = Some(v);
-            }
-        }
-    }
-    found
-}
-
-/// Estimated resident memory (GiB) for a 4-bit GGUF of `params_b` billion
-/// parameters: weights (~[`PARAM_GB_PER_BILLION`]/B) plus the fixed
-/// [`RUNTIME_OVERHEAD_GB`].
-pub fn estimate_runtime_gb_from_params(params_b: f64) -> f64 {
-    params_b * PARAM_GB_PER_BILLION + RUNTIME_OVERHEAD_GB
-}
-
 /// Estimated resident memory (GiB) for a GGUF weights blob of `size_bytes`:
 /// the on-disk size plus the fixed [`RUNTIME_OVERHEAD_GB`].
 pub fn estimate_runtime_gb_from_bytes(size_bytes: u64) -> f64 {
@@ -1655,25 +1615,6 @@ pub fn clamp_search_limit(limit: usize) -> usize {
     limit.clamp(1, HF_SEARCH_LIMIT_MAX)
 }
 
-/// Annotates search summaries with an estimated RAM-fit derived from the
-/// parameter count in each repo id. `ram_bytes == 0` (host RAM unknown) leaves
-/// `fit` as `None` even when the size could be estimated.
-pub fn annotate_search_rows(summaries: Vec<HfModelSummary>, ram_bytes: u64) -> Vec<HfModelRow> {
-    summaries
-        .into_iter()
-        .map(|summary| {
-            let fit = match parse_param_billions(&summary.id) {
-                Some(params_b) if ram_bytes > 0 => Some(registry::ram_fit(
-                    estimate_runtime_gb_from_params(params_b),
-                    ram_bytes,
-                )),
-                _ => None,
-            };
-            HfModelRow { summary, fit }
-        })
-        .collect()
-}
-
 /// Annotates repo `.gguf` rows with the accurate per-quant RAM-fit from each
 /// file's real size. A row gets `None` when host RAM or the file size is 0.
 pub fn annotate_gguf_rows(files: Vec<HfGgufFile>, ram_bytes: u64) -> Vec<HfGgufFileRow> {
@@ -2163,10 +2104,8 @@ pub async fn search_hf_models(
     query: String,
     limit: usize,
     client: tauri::State<'_, reqwest::Client>,
-) -> Result<Vec<HfModelRow>, String> {
-    let summaries =
-        fetch_hf_search(&client, HF_BASE_URL, &query, clamp_search_limit(limit)).await?;
-    Ok(annotate_search_rows(summaries, system_ram_bytes()))
+) -> Result<Vec<HfModelSummary>, String> {
+    fetch_hf_search(&client, HF_BASE_URL, &query, clamp_search_limit(limit)).await
 }
 
 /// Lists the models served by the configured OpenAI-compatible provider via
@@ -4647,33 +4586,7 @@ mod tests {
     // ── RAM-fit estimation + annotated views ─────────────────────────────────
 
     #[test]
-    fn parse_param_billions_reads_last_b_token() {
-        assert_eq!(parse_param_billions("unsloth/Qwen3.5-9B-GGUF"), Some(9.0));
-        assert_eq!(parse_param_billions("org/Model-3.8B-it"), Some(3.8));
-        assert_eq!(
-            parse_param_billions("bartowski/Llama-3.3-8B-Instruct-GGUF"),
-            Some(8.0)
-        );
-        // Lowercase trailing b is accepted.
-        assert_eq!(parse_param_billions("org/qwen-9b-gguf"), Some(9.0));
-        // Multiple B tokens: the rightmost positive one wins.
-        assert_eq!(parse_param_billions("org/Qwen3-235B-A22B"), Some(235.0));
-    }
-
-    #[test]
-    fn parse_param_billions_returns_none_without_a_param_token() {
-        assert_eq!(parse_param_billions("google/bert-base-uncased"), None);
-        assert_eq!(parse_param_billions(""), None);
-        // A zero count is not a usable estimate.
-        assert_eq!(parse_param_billions("org/0B-weird"), None);
-        // A non-numeric prefix before B does not parse.
-        assert_eq!(parse_param_billions("org/Model-AxB"), None);
-    }
-
-    #[test]
-    fn estimate_runtime_gb_helpers_add_overhead() {
-        // 8B * 0.65 + 2.0 overhead.
-        assert!((estimate_runtime_gb_from_params(8.0) - 7.2).abs() < 1e-9);
+    fn estimate_runtime_gb_from_bytes_adds_overhead() {
         // 1 GiB weights + 2.0 overhead.
         assert!((estimate_runtime_gb_from_bytes(1 << 30) - 3.0).abs() < 1e-9);
     }
@@ -4685,29 +4598,6 @@ mod tests {
         assert_eq!(clamp_search_limit(10_000), HF_SEARCH_LIMIT_MAX);
     }
 
-    #[test]
-    fn annotate_search_rows_estimates_fit_and_handles_unknowns() {
-        let summaries = vec![
-            HfModelSummary {
-                id: "org/Tiny-1B-GGUF".to_string(),
-                downloads: 10,
-                gated: false,
-            },
-            HfModelSummary {
-                id: "org/no-param-token".to_string(),
-                downloads: 5,
-                gated: false,
-            },
-        ];
-        // 64 GiB host: the 1B model fits, the param-less row stays unannotated.
-        let rows = annotate_search_rows(summaries.clone(), 64 << 30);
-        assert_eq!(rows[0].fit, Some(registry::RamFit::Fits));
-        assert_eq!(rows[1].fit, None);
-        // Unknown host RAM drops the fit verdict even when params parse.
-        let rows = annotate_search_rows(summaries, 0);
-        assert_eq!(rows[0].fit, None);
-    }
-
     #[test]
     fn annotate_gguf_rows_uses_real_sizes() {
         let files = vec![
@@ -4753,24 +4643,7 @@ mod tests {
     }
 
     #[test]
-    fn view_rows_serialize_with_flattened_base_and_fit() {
-        let row = HfModelRow {
-            summary: HfModelSummary {
-                id: "o/r".to_string(),
-                downloads: 3,
-                gated: false,
-            },
-            fit: Some(registry::RamFit::Tight),
-        };
-        assert_eq!(
-            serde_json::to_value(row).unwrap(),
-            serde_json::json!({
-                "id": "o/r",
-                "downloads": 3,
-                "gated": false,
-                "fit": "tight",
-            })
-        );
+    fn gguf_file_row_serializes_with_flattened_base_and_fit() {
         let file_row = HfGgufFileRow {
             file: HfGgufFile {
                 file: "w.gguf".to_string(),
diff --git a/src/settings/tabs/models/DiscoverPane.test.tsx b/src/settings/tabs/models/DiscoverPane.test.tsx
index a4a62ef1..bf323d0d 100644
--- a/src/settings/tabs/models/DiscoverPane.test.tsx
+++ b/src/settings/tabs/models/DiscoverPane.test.tsx
@@ -67,12 +67,7 @@ function mockCommands(responses: Record<string, unknown>) {
 }
 
 const RESULTS: HfModelSummary[] = [
-  {
-    id: 'google/gemma-4-12b-it-GGUF',
-    downloads: 1_200_000,
-    gated: false,
-    fit: 'fits',
-  },
+  { id: 'google/gemma-4-12b-it-GGUF', downloads: 1_200_000, gated: false },
   { id: 'unsloth/gemma-4-27b-it-GGUF', downloads: 410_000, gated: false },
   { id: 'meta-llama/Llama-3-8B-GGUF', downloads: 9_000, gated: true },
 ];
@@ -146,11 +141,13 @@ describe('DiscoverPane', () => {
     expect(screen.getByText(/chat models/)).toHaveTextContent('3 chat models');
   });
 
-  it('shows the estimated RAM-fit on a row when the backend provides one', async () => {
+  it('does not show a RAM-fit hint on the collapsed model row', async () => {
     await renderPane();
-    // Only the first result carries a fit estimate.
-    expect(screen.getByText('Comfortable')).toBeInTheDocument();
-    expect(screen.getAllByText('Comfortable')).toHaveLength(1);
+    // The row-level fit was an unreliable repo-id estimate and is gone; fit
+    // shows only on the per-quant rows once a row is expanded.
+    expect(screen.queryByText('Comfortable')).not.toBeInTheDocument();
+    expect(screen.queryByText('Tight')).not.toBeInTheDocument();
+    expect(screen.queryByText('Heavy')).not.toBeInTheDocument();
   });
 
   it('parses the org line from the full id when it has no org segment', async () => {
diff --git a/src/settings/tabs/models/DiscoverPane.tsx b/src/settings/tabs/models/DiscoverPane.tsx
index 3073abf8..e87b726c 100644
--- a/src/settings/tabs/models/DiscoverPane.tsx
+++ b/src/settings/tabs/models/DiscoverPane.tsx
@@ -3,11 +3,11 @@
  *
  * A search field (driven by {@link useHfSearch}) plus a row of family filter
  * chips feed one debounced backend query that returns chat/text-generation
- * GGUF repos. Each lean row shows the repo id, an org + downloads sub-line, an
- * approximate RAM-fit hint, a link out to the repo on Hugging Face, and an
- * icon-only download button. That button expands a quant accordion listing the
- * repo's `.gguf` files (`list_hf_repo_ggufs`, each with an accurate per-quant
- * RAM-fit) and downloads the chosen one through the shared
+ * GGUF repos. Each lean row shows the repo id, an org + downloads sub-line, a
+ * link out to the repo on Hugging Face, and an icon-only download button. That
+ * button expands a quant accordion listing the repo's `.gguf` files
+ * (`list_hf_repo_ggufs`, each with an accurate per-quant RAM-fit, the only
+ * place fit is shown) and downloads the chosen one through the shared
  * {@link useDownloadModel} kit. A "Load more" control pages past the first
  * batch. A finished install lifts a fresh config snapshot and collapses the row.
  */
@@ -223,13 +223,6 @@ function DiscoverRow({ model, onSaved }: DiscoverRowProps) {
             {org} · {model.downloads.toLocaleString()} downloads
           </div>
         </div>
-        {model.fit ? (
-          <Tooltip label={RAM_FIT_TOOLTIP[model.fit]} multiline placement="top">
-            <span className={`${styles.fit} ${FIT_CLASS[model.fit]}`}>
-              {RAM_FIT_LABEL[model.fit]}
-            </span>
-          </Tooltip>
-        ) : null}
         <button
           type="button"
           className={styles.get}
diff --git a/src/types/hf.ts b/src/types/hf.ts
index f214e09a..33a9a895 100644
--- a/src/types/hf.ts
+++ b/src/types/hf.ts
@@ -3,25 +3,20 @@
 /**
  * IPC shapes for the in-app Hugging Face GGUF model browser (the Discover
  * pane). Mirrors the serde output of the Rust `search_hf_models` command,
- * which serializes its `HfModelRow` struct (a flattened `HfModelSummary` plus
- * an estimated RAM-fit) as snake_case.
+ * which serializes its `HfModelSummary` struct as snake_case.
  */
 
-import type { RamFit } from './starter';
-
 /**
  * One repo row from `search_hf_models`. The search payload is deliberately
- * lean: it carries what the Discover list needs to render a row, decide
- * whether anonymous download is allowed, and show an approximate RAM-fit.
+ * lean: it carries what the Discover list needs to render a row and decide
+ * whether anonymous download is allowed. RAM-fit is not carried here; it shows
+ * only on the per-quant rows (where a real file size makes it accurate), which
+ * arrive at the expand step.
  *
  * - `id` is the canonical `owner/repo` slug.
  * - `downloads` is Hugging Face's all-time download count for the repo.
  * - `gated` is true when the repo requires accepting terms or auth; an
  *   anonymous download fails, so the Discover row disables download for it.
- * - `fit` is the estimated RAM-fit for this Mac, derived from the parameter
- *   count in the repo id (no file size is available at search time); it is
- *   `null` when the id carries no `<number>B` token or host RAM is unknown.
- *   Accurate per-quant fit arrives at the expand step.
  */
 export interface HfModelSummary {
   /** Canonical `owner/repo` slug. */
@@ -30,6 +25,4 @@ export interface HfModelSummary {
   downloads: number;
   /** True when the repo is gated; anonymous downloads fail. */
   gated: boolean;
-  /** Estimated RAM-fit for this Mac, or `null` when not derivable. */
-  fit?: RamFit | null;
 }

From b179d9c7635142477a989087f6525d91f69edbfb Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 18:04:41 -0500
Subject: [PATCH 19/89] polish: shorten RAM-fit hover tooltips to one clean
 line each

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/utils/__tests__/ramFit.test.ts | 23 +++++++++++++++++++++++
 src/utils/ramFit.ts                | 10 +++++-----
 2 files changed, 28 insertions(+), 5 deletions(-)
 create mode 100644 src/utils/__tests__/ramFit.test.ts

diff --git a/src/utils/__tests__/ramFit.test.ts b/src/utils/__tests__/ramFit.test.ts
new file mode 100644
index 00000000..ded1bc9c
--- /dev/null
+++ b/src/utils/__tests__/ramFit.test.ts
@@ -0,0 +1,23 @@
+import { describe, expect, it } from 'vitest';
+
+import { RAM_FIT_LABEL, RAM_FIT_TOOLTIP } from '../ramFit';
+import type { RamFit } from '../../types/starter';
+
+const VERDICTS: RamFit[] = ['fits', 'tight', 'too_big'];
+
+describe('RAM-fit copy', () => {
+  it('keeps each tooltip a single short, clean sentence', () => {
+    for (const verdict of VERDICTS) {
+      const tip = RAM_FIT_TOOLTIP[verdict];
+      // Short and clean: a handful of words ending in a period, no clauses.
+      expect(tip.length).toBeLessThanOrEqual(30);
+      expect(tip).toMatch(/^[^;]+\.$/);
+    }
+  });
+
+  it('exposes a label for every verdict', () => {
+    for (const verdict of VERDICTS) {
+      expect(RAM_FIT_LABEL[verdict]).toBeTruthy();
+    }
+  });
+});
diff --git a/src/utils/ramFit.ts b/src/utils/ramFit.ts
index a30c4513..0ccf1025 100644
--- a/src/utils/ramFit.ts
+++ b/src/utils/ramFit.ts
@@ -14,10 +14,10 @@ export const RAM_FIT_LABEL: Record<RamFit, string> = {
   too_big: 'Heavy',
 };
 
-/** One-line explanation shown when hovering a RAM-fit label, so the verdict
- * is not cryptic. Phrased around this Mac's memory, not raw numbers. */
+/** Short hover explanation for a RAM-fit label, so the verdict is not cryptic.
+ * A single clean sentence each, no clauses or numbers. */
 export const RAM_FIT_TOOLTIP: Record<RamFit, string> = {
-  fits: 'Runs with memory to spare on this Mac.',
-  tight: 'Runs, but close to this Mac’s memory limit.',
-  too_big: 'Larger than this Mac’s memory comfortably holds; expect slowdowns.',
+  fits: 'Fits comfortably.',
+  tight: 'Close to the memory limit.',
+  too_big: 'Too big for this Mac.',
 };

From 868b6d990c393836fe8956384824c1570e3600df Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 18:07:19 -0500
Subject: [PATCH 20/89] polish: calm capability pills, add Text, rename
 Reasoning to Thinking, drop Active pill

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 .../tabs/models/LibraryPane.module.css        | 43 ++++++++++---------
 src/settings/tabs/models/LibraryPane.test.tsx | 27 +++++++++---
 src/settings/tabs/models/LibraryPane.tsx      | 21 ++++++---
 src/styles/settings.module.css                |  2 -
 4 files changed, 57 insertions(+), 36 deletions(-)

diff --git a/src/settings/tabs/models/LibraryPane.module.css b/src/settings/tabs/models/LibraryPane.module.css
index 7b845f77..f96d074b 100644
--- a/src/settings/tabs/models/LibraryPane.module.css
+++ b/src/settings/tabs/models/LibraryPane.module.css
@@ -112,17 +112,6 @@
   font-variant-numeric: tabular-nums;
 }
 
-.activeBadge {
-  display: inline-flex;
-  align-items: center;
-  font-size: 10.5px;
-  font-weight: 560;
-  padding: 2px 8px;
-  border-radius: var(--radius-pill);
-  color: var(--accent);
-  background: var(--accent-soft);
-}
-
 .right {
   display: flex;
   align-items: center;
@@ -130,23 +119,35 @@
   flex: none;
 }
 
-/* Capability pills sit in the name line, right after the model name. */
-.pillVision,
-.pillReason {
+/* Capability pills sit in the name line, right after the model name. One calm,
+ * cohesive chrome for all of them (neutral text + a faint shared background);
+ * a small colour dot is the only accent, so Text / Vision / Thinking read as a
+ * quiet family rather than a saturated rainbow. */
+.pill {
   display: inline-flex;
   align-items: center;
+  gap: 5px;
   font-size: 10.5px;
-  font-weight: 560;
+  font-weight: 540;
   padding: 2px 8px;
   border-radius: var(--radius-pill);
+  color: var(--t2);
+  background: rgba(255, 255, 255, 0.05);
+}
+.pill::before {
+  content: '';
+  width: 5px;
+  height: 5px;
+  border-radius: 50%;
+}
+.pillText::before {
+  background: var(--t3);
 }
-.pillVision {
-  color: var(--vis);
-  background: var(--vis-bg);
+.pillVision::before {
+  background: var(--vis);
 }
-.pillReason {
-  color: var(--rea);
-  background: var(--rea-bg);
+.pillThinking::before {
+  background: var(--rea);
 }
 
 /* RAM-fit hint: a coloured dot + label reusing the onboarding fit palette. */
diff --git a/src/settings/tabs/models/LibraryPane.test.tsx b/src/settings/tabs/models/LibraryPane.test.tsx
index b98f3b08..d2c28457 100644
--- a/src/settings/tabs/models/LibraryPane.test.tsx
+++ b/src/settings/tabs/models/LibraryPane.test.tsx
@@ -214,7 +214,6 @@ describe('LibraryPane', () => {
   it('marks the active model and offers Set as active only on the rest', async () => {
     mockCommands(libraryResponses());
     await renderPane(makeConfig('org/gemma:gemma.gguf'));
-    expect(screen.getByText('Active')).toBeInTheDocument();
     // The non-active model's menu offers Set as active.
     openMenu('qwen');
     expect(
@@ -227,6 +226,13 @@ describe('LibraryPane', () => {
     ).not.toBeInTheDocument();
   });
 
+  it('shows a Text pill on every model', async () => {
+    mockCommands(libraryResponses());
+    await renderPane();
+    // Text is the baseline capability, so it shows on both rows.
+    expect(screen.getAllByText('Text')).toHaveLength(2);
+  });
+
   it('shows a Vision tag only for vision-capable models', async () => {
     mockCommands(libraryResponses());
     await renderPane();
@@ -234,18 +240,27 @@ describe('LibraryPane', () => {
     expect(screen.getAllByText('Vision')).toHaveLength(1);
   });
 
-  it('shows a Reasoning tag only for thinking-capable models', async () => {
+  it('shows a Thinking tag only for thinking-capable models', async () => {
     mockCommands(libraryResponses());
     await renderPane();
-    expect(screen.getByText('Reasoning')).toBeInTheDocument();
-    expect(screen.getAllByText('Reasoning')).toHaveLength(1);
+    expect(screen.getByText('Thinking')).toBeInTheDocument();
+    expect(screen.getAllByText('Thinking')).toHaveLength(1);
+  });
+
+  it('marks the active model with an edge, not an Active pill', async () => {
+    mockCommands(libraryResponses());
+    await renderPane(makeConfig('org/gemma:gemma.gguf'));
+    // The accent edge is the only active signal; the textual pill is gone.
+    expect(screen.queryByText('Active')).not.toBeInTheDocument();
   });
 
-  it('omits capability tags when no map entry exists for a model', async () => {
+  it('omits Vision and Thinking tags when no map entry exists, keeping Text', async () => {
     mockCommands(libraryResponses({ get_model_capabilities: {} }));
     await renderPane();
     expect(screen.queryByText('Vision')).not.toBeInTheDocument();
-    expect(screen.queryByText('Reasoning')).not.toBeInTheDocument();
+    expect(screen.queryByText('Thinking')).not.toBeInTheDocument();
+    // Text is unconditional, so it survives a missing capability map.
+    expect(screen.getAllByText('Text')).toHaveLength(2);
   });
 
   it('Set as active commits the model, lifts the config, and refreshes', async () => {
diff --git a/src/settings/tabs/models/LibraryPane.tsx b/src/settings/tabs/models/LibraryPane.tsx
index 50ff451b..ab2fd267 100644
--- a/src/settings/tabs/models/LibraryPane.tsx
+++ b/src/settings/tabs/models/LibraryPane.tsx
@@ -2,8 +2,9 @@
  * Library pane of the Models surface: the user's installed local models.
  *
  * Each downloaded model shows as a quiet row: its name with capability pills
- * (Vision / Reasoning) and an Active marker, the Hugging Face repo /
- * quantisation / size, and a RAM-fit hint (hover for a one-line explanation).
+ * (Text always, plus Vision / Thinking when applicable), the Hugging Face repo
+ * / quantisation / size, and a RAM-fit hint (hover for a one-line explanation).
+ * The active model is marked by the accent edge alone, not a textual pill.
  * A ⋮ button opens a floating popover (Set as active / View on Hugging Face /
  * Reveal in Finder / Delete) instead of expanding the card; Delete routes
  * through a confirm dialog. When nothing is installed the pane invites the
@@ -203,14 +204,20 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
                   <div className={styles.mid}>
                     <div className={styles.name}>
                       {m.display_name}
+                      <span className={`${styles.pill} ${styles.pillText}`}>
+                        Text
+                      </span>
                       {caps?.vision ? (
-                        <span className={styles.pillVision}>Vision</span>
+                        <span className={`${styles.pill} ${styles.pillVision}`}>
+                          Vision
+                        </span>
                       ) : null}
                       {caps?.thinking ? (
-                        <span className={styles.pillReason}>Reasoning</span>
-                      ) : null}
-                      {active ? (
-                        <span className={styles.activeBadge}>Active</span>
+                        <span
+                          className={`${styles.pill} ${styles.pillThinking}`}
+                        >
+                          Thinking
+                        </span>
                       ) : null}
                     </div>
                     <div className={styles.org}>
diff --git a/src/styles/settings.module.css b/src/styles/settings.module.css
index f08c6f43..b11c027f 100644
--- a/src/styles/settings.module.css
+++ b/src/styles/settings.module.css
@@ -41,9 +41,7 @@
   --accent: #ff8d5c;
   --accent-soft: rgba(255, 141, 92, 0.14);
   --vis: #7fd1a6;
-  --vis-bg: rgba(127, 209, 166, 0.1);
   --rea: #b9a4f0;
-  --rea-bg: rgba(185, 164, 240, 0.1);
   --ok: #79c08e;
   --tight: #e6b56b;
   /* RAM-fit "Heavy" (model larger than this Mac comfortably holds) and the

From 1e08d348e958e96f0c46a742d307037a75e2a5b0 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 18:08:18 -0500
Subject: [PATCH 21/89] polish: restyle Models tabs to match Settings nav, drop
 the container box

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/styles/settings.module.css | 39 +++++++++++++---------------------
 1 file changed, 15 insertions(+), 24 deletions(-)

diff --git a/src/styles/settings.module.css b/src/styles/settings.module.css
index b11c027f..058e2dfd 100644
--- a/src/styles/settings.module.css
+++ b/src/styles/settings.module.css
@@ -298,48 +298,42 @@
 
 /* ─── Models surface (segmented Library / Discover / Providers) ──────────── */
 
-/* Icon-above-label tabs in the same family as the sidebar section nav: the
- * active view carries the accent on its icon + label inside a softly elevated
- * bordered box (no loud filled pill). */
+/* Icon-above-label tabs that mirror the original Settings section nav: a plain
+ * centered row with no container box, the active view carrying the accent on
+ * its icon + label over a subtle dark inset (not a loud elevated pill). */
 .seg {
   display: inline-flex;
   box-sizing: border-box;
-  gap: 6px;
-  padding: 5px;
-  border: 1px solid var(--hair-soft);
-  border-radius: 13px;
-  background: var(--elev-1);
+  gap: 4px;
 }
 .segItem {
   box-sizing: border-box;
   display: inline-flex;
   flex-direction: column;
   align-items: center;
-  gap: 7px;
-  width: 96px;
-  padding: 11px 0 9px;
-  border: 1px solid transparent;
+  gap: 6px;
+  min-width: 72px;
+  padding: 8px 14px;
+  border: none;
   border-radius: 10px;
   background: transparent;
   color: var(--t2);
   font-family: inherit;
-  font-size: 12px;
-  font-weight: 540;
+  font-size: 11px;
+  font-weight: 500;
   cursor: pointer;
   transition:
     color 150ms ease,
-    background 150ms ease,
-    border-color 150ms ease;
+    background 150ms ease;
 }
 .segItem svg {
-  width: 20px;
-  height: 20px;
+  width: 22px;
+  height: 22px;
   stroke: currentColor;
   stroke-width: 1.6;
   fill: none;
   stroke-linecap: round;
   stroke-linejoin: round;
-  opacity: 0.92;
 }
 .segItem:hover:not(.segItemActive) {
   color: var(--t1);
@@ -350,11 +344,8 @@
 }
 .segItemActive {
   color: var(--accent);
-  background: var(--elev-2);
-  border-color: var(--hair-soft);
-}
-.segItemActive svg {
-  opacity: 1;
+  background: rgba(0, 0, 0, 0.28);
+  box-shadow: inset 0 0 0 1px rgba(255, 141, 92, 0.1);
 }
 .segItemLabel {
   color: inherit;

From 3d831a6861986b71f6f0433358fd54e2107d6904 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 01:31:27 -0500
Subject: [PATCH 22/89] fix: make built-in reasoning opt-in via /think with
 honest thinking UX

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/commands.rs                    |  12 ++
 src-tauri/src/history.rs                     |   4 +
 src-tauri/src/models/mod.rs                  |  50 +++++-
 src-tauri/src/models/registry.rs             |  16 +-
 src-tauri/src/openai.rs                      | 172 ++++++++++++++++++-
 src-tauri/src/search/llm.rs                  |   3 +-
 src-tauri/src/search/pipeline.rs             |   4 +
 src/view/ConversationView.tsx                |  10 +-
 src/view/__tests__/ConversationView.test.tsx |  24 +++
 9 files changed, 274 insertions(+), 21 deletions(-)

diff --git a/src-tauri/src/commands.rs b/src-tauri/src/commands.rs
index 0fb7e5e5..9d5a46c9 100644
--- a/src-tauri/src/commands.rs
+++ b/src-tauri/src/commands.rs
@@ -280,10 +280,12 @@ async fn fetch_builtin_vision(client: &reqwest::Client, base_url: &str) -> bool
 ///
 /// Returns the accumulated assistant content (empty on the error paths) so
 /// the caller's persistence tail treats every route identically.
+#[allow(clippy::too_many_arguments)]
 pub(crate) async fn stream_builtin_chat(
     engine: &crate::engine::runner::EngineHandle,
     target: crate::engine::state::Target,
     model_id: String,
+    think: bool,
     mut messages: Vec<ChatMessage>,
     client: &reqwest::Client,
     cancel_token: CancellationToken,
@@ -326,6 +328,7 @@ pub(crate) async fn stream_builtin_chat(
                     messages,
                     api_key: None,
                     flavor: crate::openai::V1Flavor::Builtin,
+                    enable_thinking: think,
                 },
                 client,
                 cancel_token,
@@ -1181,6 +1184,7 @@ pub async fn ask_model(
                         &engine,
                         target,
                         model_id,
+                        think,
                         messages,
                         &client,
                         cancel_token.clone(),
@@ -1206,6 +1210,9 @@ pub async fn ask_model(
                     messages,
                     api_key,
                     flavor: crate::openai::V1Flavor::Remote,
+                    // `/think` reasoning control is built-in only; a remote
+                    // OpenAI-compatible server uses its own server-side defaults.
+                    enable_thinking: false,
                 },
                 &client,
                 cancel_token.clone(),
@@ -3362,6 +3369,7 @@ mod tests {
             &engine,
             engine_target(),
             "org/repo:m.gguf".to_string(),
+            false,
             vec![],
             &client,
             CancellationToken::new(),
@@ -3399,6 +3407,7 @@ mod tests {
                     &engine,
                     engine_target(),
                     "org/repo:m.gguf".to_string(),
+                    false,
                     vec![],
                     &client,
                     CancellationToken::new(),
@@ -3452,6 +3461,7 @@ mod tests {
                     &engine,
                     engine_target(),
                     "org/repo:m.gguf".to_string(),
+                    false,
                     vec![],
                     &client,
                     cancel_token,
@@ -3496,6 +3506,7 @@ mod tests {
             &engine,
             engine_target(),
             "org/repo:m.gguf".to_string(),
+            false,
             vec![],
             &client,
             CancellationToken::new(),
@@ -3612,6 +3623,7 @@ mod tests {
             &engine,
             engine_target(),
             "org/repo:m.gguf".to_string(),
+            false,
             image_message(),
             &client,
             CancellationToken::new(),
diff --git a/src-tauri/src/history.rs b/src-tauri/src/history.rs
index a4c8a104..54d5dc27 100644
--- a/src-tauri/src/history.rs
+++ b/src-tauri/src/history.rs
@@ -296,6 +296,10 @@ pub(crate) async fn generate_title_text(
                     messages: title_messages,
                     api_key: api_key.clone(),
                     flavor: *flavor,
+                    // Title generation answers directly; the built-in engine
+                    // must not run a thinking pass (it would burn the token
+                    // budget before producing the title).
+                    enable_thinking: false,
                 },
                 client,
                 cancel_token,
diff --git a/src-tauri/src/models/mod.rs b/src-tauri/src/models/mod.rs
index e9d7f7c5..79a78d82 100644
--- a/src-tauri/src/models/mod.rs
+++ b/src-tauri/src/models/mod.rs
@@ -988,20 +988,29 @@ pub async fn get_model_capabilities(
     }
 }
 
-/// Capability map for the built-in provider, derived from the installed-model
-/// manifest. Each row carries the curated vision/thinking flags recorded at
-/// download time; `max_images` stays `None` because llama-server imposes no
-/// fixed per-request image cap.
+/// Capability map for the built-in provider. For a curated starter the flags
+/// come from the current registry, not the manifest row: the row freezes the
+/// flags recorded at download time, so a later flag correction (e.g. a
+/// reasoning model previously recorded as non-thinking) would otherwise stay
+/// wrong for already-installed models. Reading the registry heals those rows on
+/// every read with no manifest migration. A pasted (non-curated) repo has no
+/// registry entry and keeps the flags its row recorded. `max_images` stays
+/// `None` because llama-server imposes no fixed per-request image cap.
 pub(crate) fn builtin_capabilities_from_manifest(
     rows: &[manifest::InstalledModel],
 ) -> HashMap<String, Capabilities> {
     rows.iter()
         .map(|row| {
+            let (vision, thinking) = registry::STARTERS
+                .iter()
+                .find(|s| s.repo == row.repo && s.file_name == row.file_name)
+                .map(|s| (s.vision, s.thinking))
+                .unwrap_or((row.vision, row.thinking));
             (
                 row.id.clone(),
                 Capabilities {
-                    vision: row.vision,
-                    thinking: row.thinking,
+                    vision,
+                    thinking,
                     max_images: None,
                 },
             )
@@ -3959,6 +3968,35 @@ mod tests {
         assert!(caps.values().all(|c| c.max_images.is_none()));
     }
 
+    /// A curated starter installed before its `thinking` flag was corrected
+    /// still carries the stale flag in its manifest row. The capability view
+    /// heals it from the current registry, so the model is no longer wrongly
+    /// told it "does not emit thinking tokens" without a manifest migration.
+    #[test]
+    fn builtin_capabilities_heal_curated_flags_from_registry() {
+        let fast = registry::STARTERS
+            .iter()
+            .find(|s| s.tier == registry::Tier::Fast)
+            .unwrap();
+        // Simulate a row written before the flag fix: capabilities recorded
+        // as the old, wrong values.
+        let mut stale = registry::to_installed_model(fast);
+        stale.thinking = false;
+        stale.vision = false;
+
+        let caps = builtin_capabilities_from_manifest(&[stale]);
+
+        let healed = &caps[&registry::to_installed_model(fast).id];
+        assert!(
+            healed.thinking,
+            "registry heals the corrected reasoning flag"
+        );
+        assert!(
+            healed.vision,
+            "registry capabilities win for curated models"
+        );
+    }
+
     #[test]
     fn builtin_capabilities_empty_manifest_yields_empty_map() {
         assert!(builtin_capabilities_from_manifest(&[]).is_empty());
diff --git a/src-tauri/src/models/registry.rs b/src-tauri/src/models/registry.rs
index e591765a..214ed225 100644
--- a/src-tauri/src/models/registry.rs
+++ b/src-tauri/src/models/registry.rs
@@ -83,7 +83,7 @@ pub const STARTERS: &[Starter] = &[
         size_bytes: 5_680_522_464,
         quant: "Q4_K_M",
         vision: true,
-        thinking: false,
+        thinking: true,
         mmproj_file: Some("mmproj-BF16.gguf"),
         mmproj_sha256: Some("853698ce7aa6c7ba732478bad280240969ddf7b0fcbf93900046f63903a83383"),
         mmproj_bytes: 921_705_024,
@@ -121,7 +121,7 @@ pub const STARTERS: &[Starter] = &[
         size_bytes: 12_109_566_560,
         quant: "MXFP4",
         vision: false,
-        thinking: false,
+        thinking: true,
         mmproj_file: None,
         mmproj_sha256: None,
         mmproj_bytes: 0,
@@ -238,6 +238,18 @@ mod tests {
         assert_eq!(smartest.mmproj_bytes, 0);
     }
 
+    /// The `thinking` flag is the passive "this model reasons" badge: it drives
+    /// the picker tag, the `/think` capability gate, and the earlier-turn
+    /// reasoning strip. It must match each curated model's real behavior, or a
+    /// reasoning model is wrongly told it "does not emit thinking tokens".
+    /// Qwen3.5 and gpt-oss are reasoning models; Gemma 4 is not.
+    #[test]
+    fn thinking_flag_per_tier() {
+        assert!(starter(Tier::Fast).thinking, "Qwen3.5 reasons");
+        assert!(!starter(Tier::Balanced).thinking, "Gemma 4 does not reason");
+        assert!(starter(Tier::Smartest).thinking, "gpt-oss reasons");
+    }
+
     #[test]
     fn all_revisions_are_40_hex() {
         for s in STARTERS {
diff --git a/src-tauri/src/openai.rs b/src-tauri/src/openai.rs
index 6c6dae50..051e21b3 100644
--- a/src-tauri/src/openai.rs
+++ b/src-tauri/src/openai.rs
@@ -39,6 +39,11 @@ pub struct OpenAiChatParams {
     pub api_key: Option<String>,
     /// Picks the user-facing error copy for this request.
     pub flavor: V1Flavor,
+    /// Whether the model should run a reasoning pass before answering.
+    /// Reasoning is opt-in (the `/think` command); a plain message answers
+    /// directly. Honored only on the built-in engine via
+    /// [`reasoning_template_kwargs`]; remote `/v1` servers ignore it.
+    pub enable_thinking: bool,
 }
 
 /// Error returned by [`request_openai_json`]. Mirrors the classification the
@@ -211,6 +216,45 @@ fn oversize_sse_line_error() -> EngineError {
     }
 }
 
+// ─── Reasoning control ───────────────────────────────────────────────────────
+
+/// The per-request reasoning switch to merge into a `/v1` body as
+/// `chat_template_kwargs`, or `None` when the request must carry no such field.
+///
+/// llama.cpp honors `chat_template_kwargs.enable_thinking` per request
+/// (verified against the pinned `b9590` sidecar with Qwen3.5): `false` answers
+/// directly, `true` runs a thinking pass first. Only the bundled engine
+/// ([`V1Flavor::Builtin`]) receives it; the field is llama.cpp-specific and an
+/// arbitrary OpenAI-compatible server may reject an unknown body key, so remote
+/// providers get nothing.
+fn reasoning_template_kwargs(flavor: V1Flavor, enable_thinking: bool) -> Option<serde_json::Value> {
+    match flavor {
+        V1Flavor::Builtin => Some(serde_json::json!({ "enable_thinking": enable_thinking })),
+        V1Flavor::Remote => None,
+    }
+}
+
+/// Builds the streaming `/v1/chat/completions` request body. Pulled out of
+/// [`stream_openai_chat`] so the reasoning-control wiring is unit-tested
+/// without a live server. No sampling parameters are sent: the server and
+/// model defaults apply.
+pub(crate) fn chat_request_body(
+    model: &str,
+    messages: &[ChatMessage],
+    flavor: V1Flavor,
+    enable_thinking: bool,
+) -> serde_json::Value {
+    let mut body = serde_json::json!({
+        "model": model,
+        "messages": messages.iter().map(to_openai_message).collect::<Vec<_>>(),
+        "stream": true,
+    });
+    if let Some(kwargs) = reasoning_template_kwargs(flavor, enable_thinking) {
+        body["chat_template_kwargs"] = kwargs;
+    }
+    body
+}
+
 // ─── Streaming chat ──────────────────────────────────────────────────────────
 
 /// Streams a `/v1/chat/completions` request (`stream: true`) and emits the
@@ -235,12 +279,9 @@ pub async fn stream_openai_chat(
         messages,
         api_key,
         flavor,
+        enable_thinking,
     } = params;
-    let body = serde_json::json!({
-        "model": model,
-        "messages": messages.iter().map(to_openai_message).collect::<Vec<_>>(),
-        "stream": true,
-    });
+    let body = chat_request_body(&model, &messages, flavor, enable_thinking);
     let mut request = client
         .post(format!("{base_url}/v1/chat/completions"))
         .json(&body);
@@ -362,8 +403,9 @@ pub(crate) fn json_request_body(
     messages: &[ChatMessage],
     schema: serde_json::Value,
     max_tokens: i32,
+    flavor: V1Flavor,
 ) -> serde_json::Value {
-    serde_json::json!({
+    let mut body = serde_json::json!({
         "model": model,
         "messages": messages.iter().map(to_openai_message).collect::<Vec<_>>(),
         "stream": false,
@@ -373,7 +415,14 @@ pub(crate) fn json_request_body(
             "type": "json_schema",
             "json_schema": {"name": "out", "strict": true, "schema": schema},
         },
-    })
+    });
+    // Structured output must never reason on the built-in engine: a thinking
+    // pass would consume the `max_tokens` budget before any JSON is emitted,
+    // yielding empty content. Force the switch off (remote servers get nothing).
+    if let Some(kwargs) = reasoning_template_kwargs(flavor, false) {
+        body["chat_template_kwargs"] = kwargs;
+    }
+    body
 }
 
 /// Sends a single non-streaming `/v1/chat/completions` request with a strict
@@ -391,9 +440,10 @@ pub async fn request_openai_json(
     api_key: Option<&str>,
     timeout_secs: u64,
     max_tokens: i32,
+    flavor: V1Flavor,
     cancel_token: &CancellationToken,
 ) -> Result<String, OpenAiError> {
-    let body = json_request_body(model, &messages, schema, max_tokens);
+    let body = json_request_body(model, &messages, schema, max_tokens, flavor);
     let mut request = client
         .post(format!("{base_url}/v1/chat/completions"))
         .json(&body)
@@ -460,6 +510,7 @@ mod tests {
             messages: vec![user_message("hi")],
             api_key: None,
             flavor: V1Flavor::Remote,
+            enable_thinking: false,
         }
     }
 
@@ -1089,6 +1140,7 @@ mod tests {
             Some("sk-json"),
             5,
             256,
+            V1Flavor::Remote,
             &CancellationToken::new(),
         )
         .await;
@@ -1116,6 +1168,7 @@ mod tests {
             None,
             5,
             64,
+            V1Flavor::Remote,
             &CancellationToken::new(),
         )
         .await;
@@ -1141,6 +1194,7 @@ mod tests {
             None,
             5,
             64,
+            V1Flavor::Remote,
             &token,
         )
         .await;
@@ -1174,6 +1228,7 @@ mod tests {
             None,
             1,
             64,
+            V1Flavor::Remote,
             &CancellationToken::new(),
         )
         .await;
@@ -1213,6 +1268,7 @@ mod tests {
             None,
             5,
             64,
+            V1Flavor::Remote,
             &CancellationToken::new(),
         )
         .await;
@@ -1240,6 +1296,7 @@ mod tests {
             None,
             5,
             64,
+            V1Flavor::Remote,
             &CancellationToken::new(),
         )
         .await;
@@ -1269,6 +1326,7 @@ mod tests {
             None,
             5,
             64,
+            V1Flavor::Remote,
             &CancellationToken::new(),
         )
         .await;
@@ -1303,6 +1361,7 @@ mod tests {
             None,
             5,
             64,
+            V1Flavor::Remote,
             &CancellationToken::new(),
         )
         .await;
@@ -1312,4 +1371,101 @@ mod tests {
         assert_eq!(requests.len(), 1);
         assert!(!requests[0].headers.contains_key("authorization"));
     }
+
+    // ── reasoning control (chat_template_kwargs.enable_thinking) ─────────────
+
+    /// Built-in chat carries the llama.cpp per-request reasoning switch. With
+    /// reasoning opted out (the default), the body sets
+    /// `chat_template_kwargs.enable_thinking = false` so the model answers
+    /// directly instead of running a thinking pass.
+    #[test]
+    fn builtin_chat_body_disables_thinking_by_default() {
+        let body = chat_request_body("m", &[user_message("hi")], V1Flavor::Builtin, false);
+        assert_eq!(
+            body["chat_template_kwargs"]["enable_thinking"],
+            serde_json::json!(false)
+        );
+        assert_eq!(body["stream"], serde_json::json!(true));
+    }
+
+    /// Built-in chat with `/think` opts in: `enable_thinking = true`.
+    #[test]
+    fn builtin_chat_body_enables_thinking_when_opted_in() {
+        let body = chat_request_body("m", &[user_message("hi")], V1Flavor::Builtin, true);
+        assert_eq!(
+            body["chat_template_kwargs"]["enable_thinking"],
+            serde_json::json!(true)
+        );
+    }
+
+    /// Remote `/v1` servers never receive the llama.cpp-specific
+    /// `chat_template_kwargs` field: an arbitrary OpenAI-compatible server may
+    /// reject an unknown body key, and the `/think` opt-in is built-in only.
+    #[test]
+    fn remote_chat_body_omits_thinking_kwargs() {
+        let body = chat_request_body("m", &[user_message("hi")], V1Flavor::Remote, true);
+        assert!(body.get("chat_template_kwargs").is_none());
+    }
+
+    /// Structured-output calls (search judges, title generation) must never
+    /// reason on the built-in engine: a thinking pass would consume the
+    /// `max_tokens` budget before any JSON is emitted. The builtin structured
+    /// body forces `enable_thinking = false`.
+    #[test]
+    fn builtin_structured_body_disables_thinking() {
+        let body = json_request_body(
+            "m",
+            &[user_message("q")],
+            serde_json::json!({}),
+            64,
+            V1Flavor::Builtin,
+        );
+        assert_eq!(
+            body["chat_template_kwargs"]["enable_thinking"],
+            serde_json::json!(false)
+        );
+        assert_eq!(body["stream"], serde_json::json!(false));
+    }
+
+    /// Remote structured-output bodies stay clean of the llama.cpp kwarg.
+    #[test]
+    fn remote_structured_body_omits_thinking_kwargs() {
+        let body = json_request_body(
+            "m",
+            &[user_message("q")],
+            serde_json::json!({}),
+            64,
+            V1Flavor::Remote,
+        );
+        assert!(body.get("chat_template_kwargs").is_none());
+    }
+
+    /// End to end: a built-in streaming chat actually sends the reasoning
+    /// switch on the wire, locking `stream_openai_chat` to `chat_request_body`.
+    #[tokio::test]
+    async fn builtin_stream_sends_enable_thinking_on_the_wire() {
+        let server = MockServer::start().await;
+        mount_sse(&server, b"data: [DONE]\n".to_vec()).await;
+
+        let client = reqwest::Client::new();
+        let (_, callback) = collect_chunks();
+        stream_openai_chat(
+            OpenAiChatParams {
+                flavor: V1Flavor::Builtin,
+                enable_thinking: false,
+                ..chat_params(server.uri())
+            },
+            &client,
+            CancellationToken::new(),
+            callback,
+        )
+        .await;
+
+        let requests = server.received_requests().await.unwrap();
+        let sent: serde_json::Value = serde_json::from_slice(&requests[0].body).unwrap();
+        assert_eq!(
+            sent["chat_template_kwargs"]["enable_thinking"],
+            serde_json::json!(false)
+        );
+    }
 }
diff --git a/src-tauri/src/search/llm.rs b/src-tauri/src/search/llm.rs
index 52829d10..9c02d138 100644
--- a/src-tauri/src/search/llm.rs
+++ b/src-tauri/src/search/llm.rs
@@ -386,7 +386,7 @@ async fn request_json_v1(
     // Build the trace body via the same helper request_openai_json uses so
     // the recorded body always mirrors the actual wire shape.
     let request_body_value =
-        crate::openai::json_request_body(model, &messages, format.clone(), num_predict);
+        crate::openai::json_request_body(model, &messages, format.clone(), num_predict, flavor);
     let started = std::time::Instant::now();
     let result = crate::openai::request_openai_json(
         base_url,
@@ -397,6 +397,7 @@ async fn request_json_v1(
         api_key,
         timeout_secs,
         num_predict,
+        flavor,
         cancel_token,
     )
     .await;
diff --git a/src-tauri/src/search/pipeline.rs b/src-tauri/src/search/pipeline.rs
index 6a557465..635374bb 100644
--- a/src-tauri/src/search/pipeline.rs
+++ b/src-tauri/src/search/pipeline.rs
@@ -516,6 +516,10 @@ async fn run_streaming_branch(
                     messages,
                     api_key: api_key.clone(),
                     flavor: *flavor,
+                    // Search synthesis answers directly; reasoning is opt-in
+                    // chat only, and on the built-in engine a thinking pass
+                    // would burn the token budget before the answer.
+                    enable_thinking: false,
                 },
                 client,
                 cancel_token,
diff --git a/src/view/ConversationView.tsx b/src/view/ConversationView.tsx
index 213c2a4c..3d55376f 100644
--- a/src/view/ConversationView.tsx
+++ b/src/view/ConversationView.tsx
@@ -290,11 +290,13 @@ export function ConversationView({
               errorKind={msg.errorKind}
               thinkingContent={msg.thinkingContent}
               isThinkingPending={isThinkingPending}
+              // "Still thinking" reflects the real stream state, not whether
+              // /think was used: thinking tokens have arrived, the answer has
+              // not started, and the turn is still generating (isLastAssistant
+              // already implies isGenerating). This keeps the Done indicator
+              // honest even if a model reasons without an explicit /think.
               isThinking={
-                isLastAssistant &&
-                msg.fromThink === true &&
-                !msg.content &&
-                !!msg.thinkingContent
+                isLastAssistant && !msg.content && !!msg.thinkingContent
               }
               searchSources={msg.searchSources}
               searchWarnings={msg.searchWarnings}
diff --git a/src/view/__tests__/ConversationView.test.tsx b/src/view/__tests__/ConversationView.test.tsx
index fefa79d0..602a8fc4 100644
--- a/src/view/__tests__/ConversationView.test.tsx
+++ b/src/view/__tests__/ConversationView.test.tsx
@@ -558,6 +558,30 @@ describe('ConversationView', () => {
       );
     });
 
+    it('shows the live "Thinking..." indicator while reasoning streams even without /think', () => {
+      // A reasoning model may emit thinking tokens without an explicit
+      // /think (e.g. it ignored the off switch). The indicator must reflect
+      // the real stream state: still thinking, not a premature "Done".
+      render(
+        <ConversationView
+          messages={[
+            {
+              id: '1',
+              role: 'assistant' as const,
+              content: '',
+              // No fromThink flag: this turn was not an explicit /think.
+              thinkingContent: 'Reasoning in progress...',
+            },
+          ]}
+          isGenerating={true}
+          onClose={vi.fn()}
+        />,
+      );
+      expect(screen.getByTestId('loading-label').textContent).toBe(
+        'Thinking...',
+      );
+    });
+
     it('does not show TypingIndicator when assistant has thinkingContent but no content', () => {
       const { container } = render(
         <ConversationView

From bf14fdc2c474f97ccec19c24e3bd188162b1ecbd Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 01:49:21 -0500
Subject: [PATCH 23/89] fix: drop reasoning output when thinking is off for a
 model-agnostic policy

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/commands.rs | 66 ++++++++++++++++++++++++++++++++++++---
 src-tauri/src/openai.rs   | 66 ++++++++++++++++++++++++++++++++++-----
 2 files changed, 119 insertions(+), 13 deletions(-)

diff --git a/src-tauri/src/commands.rs b/src-tauri/src/commands.rs
index 9d5a46c9..28906073 100644
--- a/src-tauri/src/commands.rs
+++ b/src-tauri/src/commands.rs
@@ -848,11 +848,17 @@ pub async fn stream_ollama_chat(
                                             serde_json::from_str::<OllamaChatResponse>(trimmed)
                                         {
                                             if let Some(ref msg) = json.message {
-                                                if let Some(ref thinking) = msg.thinking {
-                                                    if !thinking.is_empty() {
-                                                        on_chunk(StreamChunk::ThinkingToken(
-                                                            thinking.clone(),
-                                                        ));
+                                                // Reasoning is opt-in: drop the
+                                                // thinking channel when off, so no
+                                                // thinking block is ever shown
+                                                // (universal display policy).
+                                                if think {
+                                                    if let Some(ref thinking) = msg.thinking {
+                                                        if !thinking.is_empty() {
+                                                            on_chunk(StreamChunk::ThinkingToken(
+                                                                thinking.clone(),
+                                                            ));
+                                                        }
                                                     }
                                                 }
                                                 if let Some(ref token) = msg.content {
@@ -2616,6 +2622,56 @@ mod tests {
         assert_eq!(accumulated, "Hello");
     }
 
+    /// Reasoning is opt-in on the Ollama path too: with thinking off, a
+    /// `thinking` field in the response is dropped (no ThinkingToken). Ollama
+    /// already honors `think:false`, so this is the belt-and-suspenders display
+    /// guarantee matching the built-in path's universal policy.
+    #[tokio::test]
+    async fn stream_ollama_chat_drops_thinking_when_off() {
+        let mut server = mockito::Server::new_async().await;
+        let body = format!(
+            "{}{}{}",
+            chat_line_with_thinking("leaked reasoning", "", false),
+            chat_line_with_thinking("", "Hello", false),
+            chat_line_with_thinking("", "", true),
+        );
+        let mock = server
+            .mock("POST", "/api/chat")
+            .with_body(body)
+            .create_async()
+            .await;
+
+        let client = reqwest::Client::new();
+        let token = CancellationToken::new();
+        let (chunks, callback) = collect_chunks();
+
+        let accumulated = stream_ollama_chat(
+            OllamaChatParams {
+                endpoint: format!("{}/api/chat", server.url()),
+                model: "test-model".to_string(),
+                messages: vec![],
+                think: false,
+                keep_alive: None,
+                num_ctx: DEFAULT_NUM_CTX,
+            },
+            &client,
+            token,
+            callback,
+        )
+        .await;
+
+        mock.assert_async().await;
+        let chunks = chunks.lock().unwrap();
+        assert!(
+            !chunks
+                .iter()
+                .any(|c| matches!(c, StreamChunk::ThinkingToken(_))),
+            "reasoning must be dropped when thinking is off"
+        );
+        assert!(matches!(&chunks[0], StreamChunk::Token(t) if t == "Hello"));
+        assert_eq!(accumulated, "Hello");
+    }
+
     #[tokio::test]
     async fn stream_ollama_chat_sends_think_true_in_request() {
         let mut server = mockito::Server::new_async().await;
diff --git a/src-tauri/src/openai.rs b/src-tauri/src/openai.rs
index 051e21b3..6aea06f1 100644
--- a/src-tauri/src/openai.rs
+++ b/src-tauri/src/openai.rs
@@ -350,13 +350,20 @@ pub async fn stream_openai_chat(
                                 continue;
                             };
                             if let Some(choice) = event.choices.first() {
-                                if let Some(thinking) = choice
-                                    .delta
-                                    .reasoning_content
-                                    .as_deref()
-                                    .filter(|s| !s.is_empty())
-                                {
-                                    on_chunk(StreamChunk::ThinkingToken(thinking.to_string()));
+                                // Reasoning is opt-in: when thinking is off, drop
+                                // the model's reasoning channel entirely so no
+                                // thinking block is ever shown, regardless of
+                                // whether the model honored the request-level
+                                // switch. The universal display guarantee.
+                                if enable_thinking {
+                                    if let Some(thinking) = choice
+                                        .delta
+                                        .reasoning_content
+                                        .as_deref()
+                                        .filter(|s| !s.is_empty())
+                                    {
+                                        on_chunk(StreamChunk::ThinkingToken(thinking.to_string()));
+                                    }
                                 }
                                 if let Some(token) =
                                     choice.delta.content.as_deref().filter(|s| !s.is_empty())
@@ -624,8 +631,12 @@ mod tests {
 
         let client = reqwest::Client::new();
         let (chunks, callback) = collect_chunks();
+        // Thinking is opted in, so the reasoning channel is surfaced.
         let accumulated = stream_openai_chat(
-            chat_params(server.uri()),
+            OpenAiChatParams {
+                enable_thinking: true,
+                ..chat_params(server.uri())
+            },
             &client,
             CancellationToken::new(),
             callback,
@@ -642,6 +653,45 @@ mod tests {
         );
     }
 
+    /// Universal display guarantee: when thinking is off, the model's reasoning
+    /// channel is dropped (no `ThinkingToken`), so the user never sees a
+    /// thinking block no matter what the model emits. This holds even for a
+    /// model that ignored the request-level reasoning switch and reasoned
+    /// anyway.
+    #[tokio::test]
+    async fn reasoning_dropped_when_thinking_off() {
+        let server = MockServer::start().await;
+        let body = format!(
+            "data: {{\"choices\":[{{\"delta\":{{\"reasoning_content\":\"secret thoughts\"}}}}]}}\n\n{}data: [DONE]\n",
+            sse_content_line("answer"),
+        );
+        mount_sse(&server, body.into_bytes()).await;
+
+        let client = reqwest::Client::new();
+        let (chunks, callback) = collect_chunks();
+        let accumulated = stream_openai_chat(
+            OpenAiChatParams {
+                enable_thinking: false,
+                ..chat_params(server.uri())
+            },
+            &client,
+            CancellationToken::new(),
+            callback,
+        )
+        .await;
+
+        let chunks = chunks.lock().unwrap();
+        assert!(
+            !chunks
+                .iter()
+                .any(|c| matches!(c, StreamChunk::ThinkingToken(_))),
+            "reasoning must be dropped when thinking is off"
+        );
+        assert!(matches!(&chunks[0], StreamChunk::Token(t) if t == "answer"));
+        assert!(matches!(&chunks[1], StreamChunk::Done));
+        assert_eq!(accumulated, "answer");
+    }
+
     /// `data: [DONE]` terminates the stream immediately: anything the server
     /// sends afterwards is never parsed and no second terminal chunk appears.
     #[tokio::test]

From d4363f15a92cf86d8e9ca229ab8c14df963d649f Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 02:20:07 -0500
Subject: [PATCH 24/89] fix: extend reasoning-off to all kwarg-controllable
 model families

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/openai.rs | 61 +++++++++++++++++++++++++++--------------
 1 file changed, 41 insertions(+), 20 deletions(-)

diff --git a/src-tauri/src/openai.rs b/src-tauri/src/openai.rs
index 6aea06f1..bebe3c97 100644
--- a/src-tauri/src/openai.rs
+++ b/src-tauri/src/openai.rs
@@ -221,15 +221,31 @@ fn oversize_sse_line_error() -> EngineError {
 /// The per-request reasoning switch to merge into a `/v1` body as
 /// `chat_template_kwargs`, or `None` when the request must carry no such field.
 ///
-/// llama.cpp honors `chat_template_kwargs.enable_thinking` per request
-/// (verified against the pinned `b9590` sidecar with Qwen3.5): `false` answers
-/// directly, `true` runs a thinking pass first. Only the bundled engine
-/// ([`V1Flavor::Builtin`]) receives it; the field is llama.cpp-specific and an
-/// arbitrary OpenAI-compatible server may reject an unknown body key, so remote
-/// providers get nothing.
+/// llama.cpp injects these into the model's chat template and a template
+/// silently ignores any kwarg it does not read (verified against the pinned
+/// `b9590` sidecar with Qwen3.5: the full set below suppresses reasoning with
+/// no error). So one harmless "blast" covers every reasoning family that
+/// exposes a template-level switch, with no per-family detection:
+/// `enable_thinking` (Qwen3/3.5, GLM, Hunyuan, Gemma), `thinking` (IBM Granite,
+/// DeepSeek-V3.x), and `thinking_budget` (`0` = off / `-1` = unrestricted, for
+/// ByteDance Seed-OSS). `false`/`0` answers directly; `true`/`-1` reasons.
+///
+/// Families with no template switch (DeepSeek-R1 + distills, QwQ, gpt-oss
+/// Harmony, MiniMax, EXAONE, Phi-4-reasoning, ...) reason regardless; their
+/// output is dropped at the stream layer when thinking is off (see
+/// [`stream_openai_chat`]), so nothing is ever shown even though the compute
+/// cannot be stopped on this engine.
+///
+/// Only the bundled engine ([`V1Flavor::Builtin`]) receives the kwargs; the
+/// fields are llama.cpp-specific and an arbitrary OpenAI-compatible server may
+/// reject an unknown body key, so remote providers get nothing.
 fn reasoning_template_kwargs(flavor: V1Flavor, enable_thinking: bool) -> Option<serde_json::Value> {
     match flavor {
-        V1Flavor::Builtin => Some(serde_json::json!({ "enable_thinking": enable_thinking })),
+        V1Flavor::Builtin => Some(serde_json::json!({
+            "enable_thinking": enable_thinking,
+            "thinking": enable_thinking,
+            "thinking_budget": if enable_thinking { -1 } else { 0 },
+        })),
         V1Flavor::Remote => None,
     }
 }
@@ -1431,21 +1447,26 @@ mod tests {
     #[test]
     fn builtin_chat_body_disables_thinking_by_default() {
         let body = chat_request_body("m", &[user_message("hi")], V1Flavor::Builtin, false);
-        assert_eq!(
-            body["chat_template_kwargs"]["enable_thinking"],
-            serde_json::json!(false)
-        );
+        // The OFF blast covers every reasoning family that honors a template
+        // kwarg, in one harmless payload: `enable_thinking` (Qwen/GLM/Hunyuan/
+        // Gemma), `thinking` (Granite/DeepSeek-V3.x), `thinking_budget` 0
+        // (Seed-OSS). Templates ignore the kwargs they do not read.
+        let kwargs = &body["chat_template_kwargs"];
+        assert_eq!(kwargs["enable_thinking"], serde_json::json!(false));
+        assert_eq!(kwargs["thinking"], serde_json::json!(false));
+        assert_eq!(kwargs["thinking_budget"], serde_json::json!(0));
         assert_eq!(body["stream"], serde_json::json!(true));
     }
 
-    /// Built-in chat with `/think` opts in: `enable_thinking = true`.
+    /// Built-in chat with `/think` opts in: the ON blast sets every kwarg to
+    /// the reasoning-enabled value (`thinking_budget` -1 = unrestricted).
     #[test]
     fn builtin_chat_body_enables_thinking_when_opted_in() {
         let body = chat_request_body("m", &[user_message("hi")], V1Flavor::Builtin, true);
-        assert_eq!(
-            body["chat_template_kwargs"]["enable_thinking"],
-            serde_json::json!(true)
-        );
+        let kwargs = &body["chat_template_kwargs"];
+        assert_eq!(kwargs["enable_thinking"], serde_json::json!(true));
+        assert_eq!(kwargs["thinking"], serde_json::json!(true));
+        assert_eq!(kwargs["thinking_budget"], serde_json::json!(-1));
     }
 
     /// Remote `/v1` servers never receive the llama.cpp-specific
@@ -1470,10 +1491,10 @@ mod tests {
             64,
             V1Flavor::Builtin,
         );
-        assert_eq!(
-            body["chat_template_kwargs"]["enable_thinking"],
-            serde_json::json!(false)
-        );
+        let kwargs = &body["chat_template_kwargs"];
+        assert_eq!(kwargs["enable_thinking"], serde_json::json!(false));
+        assert_eq!(kwargs["thinking"], serde_json::json!(false));
+        assert_eq!(kwargs["thinking_budget"], serde_json::json!(0));
         assert_eq!(body["stream"], serde_json::json!(false));
     }
 

From 891bf9ba58ee99c506acc2ad1eb99921ac282361 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 13:25:22 -0500
Subject: [PATCH 25/89] fix: show model reasoning instead of hiding it when
 thinking is off

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/commands.rs | 72 ++++++---------------------------------
 src-tauri/src/openai.rs   | 72 ++++++++-------------------------------
 2 files changed, 25 insertions(+), 119 deletions(-)

diff --git a/src-tauri/src/commands.rs b/src-tauri/src/commands.rs
index 28906073..864d1360 100644
--- a/src-tauri/src/commands.rs
+++ b/src-tauri/src/commands.rs
@@ -848,17 +848,11 @@ pub async fn stream_ollama_chat(
                                             serde_json::from_str::<OllamaChatResponse>(trimmed)
                                         {
                                             if let Some(ref msg) = json.message {
-                                                // Reasoning is opt-in: drop the
-                                                // thinking channel when off, so no
-                                                // thinking block is ever shown
-                                                // (universal display policy).
-                                                if think {
-                                                    if let Some(ref thinking) = msg.thinking {
-                                                        if !thinking.is_empty() {
-                                                            on_chunk(StreamChunk::ThinkingToken(
-                                                                thinking.clone(),
-                                                            ));
-                                                        }
+                                                if let Some(ref thinking) = msg.thinking {
+                                                    if !thinking.is_empty() {
+                                                        on_chunk(StreamChunk::ThinkingToken(
+                                                            thinking.clone(),
+                                                        ));
                                                     }
                                                 }
                                                 if let Some(ref token) = msg.content {
@@ -2622,56 +2616,6 @@ mod tests {
         assert_eq!(accumulated, "Hello");
     }
 
-    /// Reasoning is opt-in on the Ollama path too: with thinking off, a
-    /// `thinking` field in the response is dropped (no ThinkingToken). Ollama
-    /// already honors `think:false`, so this is the belt-and-suspenders display
-    /// guarantee matching the built-in path's universal policy.
-    #[tokio::test]
-    async fn stream_ollama_chat_drops_thinking_when_off() {
-        let mut server = mockito::Server::new_async().await;
-        let body = format!(
-            "{}{}{}",
-            chat_line_with_thinking("leaked reasoning", "", false),
-            chat_line_with_thinking("", "Hello", false),
-            chat_line_with_thinking("", "", true),
-        );
-        let mock = server
-            .mock("POST", "/api/chat")
-            .with_body(body)
-            .create_async()
-            .await;
-
-        let client = reqwest::Client::new();
-        let token = CancellationToken::new();
-        let (chunks, callback) = collect_chunks();
-
-        let accumulated = stream_ollama_chat(
-            OllamaChatParams {
-                endpoint: format!("{}/api/chat", server.url()),
-                model: "test-model".to_string(),
-                messages: vec![],
-                think: false,
-                keep_alive: None,
-                num_ctx: DEFAULT_NUM_CTX,
-            },
-            &client,
-            token,
-            callback,
-        )
-        .await;
-
-        mock.assert_async().await;
-        let chunks = chunks.lock().unwrap();
-        assert!(
-            !chunks
-                .iter()
-                .any(|c| matches!(c, StreamChunk::ThinkingToken(_))),
-            "reasoning must be dropped when thinking is off"
-        );
-        assert!(matches!(&chunks[0], StreamChunk::Token(t) if t == "Hello"));
-        assert_eq!(accumulated, "Hello");
-    }
-
     #[tokio::test]
     async fn stream_ollama_chat_sends_think_true_in_request() {
         let mut server = mockito::Server::new_async().await;
@@ -2819,6 +2763,7 @@ mod tests {
         let caps = Capabilities {
             vision: false,
             thinking: false,
+            reasoning_always: false,
             max_images: None,
         };
         let stats = apply_capability_filter(&mut messages, &caps);
@@ -2836,6 +2781,7 @@ mod tests {
         let caps = Capabilities {
             vision: true,
             thinking: false,
+            reasoning_always: false,
             max_images: None,
         };
         let stats = apply_capability_filter(&mut messages, &caps);
@@ -2857,6 +2803,7 @@ mod tests {
         let caps = Capabilities {
             vision: true,
             thinking: false,
+            reasoning_always: false,
             max_images: Some(1),
         };
         let stats = apply_capability_filter(&mut messages, &caps);
@@ -2872,6 +2819,7 @@ mod tests {
         let caps = Capabilities {
             vision: true,
             thinking: false,
+            reasoning_always: false,
             max_images: Some(2),
         };
         let stats = apply_capability_filter(&mut messages, &caps);
@@ -2885,6 +2833,7 @@ mod tests {
         let caps = Capabilities {
             vision: false,
             thinking: false,
+            reasoning_always: false,
             max_images: None,
         };
         let stats = apply_capability_filter(&mut messages, &caps);
@@ -2904,6 +2853,7 @@ mod tests {
         let caps = Capabilities {
             vision: true,
             thinking: false,
+            reasoning_always: false,
             max_images: Some(1),
         };
         let stats = apply_capability_filter(&mut messages, &caps);
diff --git a/src-tauri/src/openai.rs b/src-tauri/src/openai.rs
index bebe3c97..09420ec1 100644
--- a/src-tauri/src/openai.rs
+++ b/src-tauri/src/openai.rs
@@ -366,20 +366,19 @@ pub async fn stream_openai_chat(
                                 continue;
                             };
                             if let Some(choice) = event.choices.first() {
-                                // Reasoning is opt-in: when thinking is off, drop
-                                // the model's reasoning channel entirely so no
-                                // thinking block is ever shown, regardless of
-                                // whether the model honored the request-level
-                                // switch. The universal display guarantee.
-                                if enable_thinking {
-                                    if let Some(thinking) = choice
-                                        .delta
-                                        .reasoning_content
-                                        .as_deref()
-                                        .filter(|s| !s.is_empty())
-                                    {
-                                        on_chunk(StreamChunk::ThinkingToken(thinking.to_string()));
-                                    }
+                                // Whatever reasoning the model emits is always
+                                // shown (never hidden): an `Optional` model that
+                                // honored the OFF blast emits none, while a
+                                // model that always reasons gets its thinking
+                                // surfaced cleanly in the thinking block rather
+                                // than running invisibly.
+                                if let Some(thinking) = choice
+                                    .delta
+                                    .reasoning_content
+                                    .as_deref()
+                                    .filter(|s| !s.is_empty())
+                                {
+                                    on_chunk(StreamChunk::ThinkingToken(thinking.to_string()));
                                 }
                                 if let Some(token) =
                                     choice.delta.content.as_deref().filter(|s| !s.is_empty())
@@ -647,12 +646,8 @@ mod tests {
 
         let client = reqwest::Client::new();
         let (chunks, callback) = collect_chunks();
-        // Thinking is opted in, so the reasoning channel is surfaced.
         let accumulated = stream_openai_chat(
-            OpenAiChatParams {
-                enable_thinking: true,
-                ..chat_params(server.uri())
-            },
+            chat_params(server.uri()),
             &client,
             CancellationToken::new(),
             callback,
@@ -669,45 +664,6 @@ mod tests {
         );
     }
 
-    /// Universal display guarantee: when thinking is off, the model's reasoning
-    /// channel is dropped (no `ThinkingToken`), so the user never sees a
-    /// thinking block no matter what the model emits. This holds even for a
-    /// model that ignored the request-level reasoning switch and reasoned
-    /// anyway.
-    #[tokio::test]
-    async fn reasoning_dropped_when_thinking_off() {
-        let server = MockServer::start().await;
-        let body = format!(
-            "data: {{\"choices\":[{{\"delta\":{{\"reasoning_content\":\"secret thoughts\"}}}}]}}\n\n{}data: [DONE]\n",
-            sse_content_line("answer"),
-        );
-        mount_sse(&server, body.into_bytes()).await;
-
-        let client = reqwest::Client::new();
-        let (chunks, callback) = collect_chunks();
-        let accumulated = stream_openai_chat(
-            OpenAiChatParams {
-                enable_thinking: false,
-                ..chat_params(server.uri())
-            },
-            &client,
-            CancellationToken::new(),
-            callback,
-        )
-        .await;
-
-        let chunks = chunks.lock().unwrap();
-        assert!(
-            !chunks
-                .iter()
-                .any(|c| matches!(c, StreamChunk::ThinkingToken(_))),
-            "reasoning must be dropped when thinking is off"
-        );
-        assert!(matches!(&chunks[0], StreamChunk::Token(t) if t == "answer"));
-        assert!(matches!(&chunks[1], StreamChunk::Done));
-        assert_eq!(accumulated, "answer");
-    }
-
     /// `data: [DONE]` terminates the stream immediately: anything the server
     /// sends afterwards is never parsed and no second terminal chunk appears.
     #[tokio::test]

From 58f897c8ad391c994f2f831c6367f499a1d24f40 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 13:25:23 -0500
Subject: [PATCH 26/89] feat: badge models whose reasoning cannot be turned off

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/models/mod.rs                   | 38 +++++++++++++++++--
 src-tauri/src/models/registry.rs              | 26 +++++++++++++
 src/components/ModelPickerPanel.tsx           | 24 ++++++++++++
 .../__tests__/ModelPickerPanel.test.tsx       | 12 ++++++
 src/types/model.ts                            |  8 ++++
 src/types/starter.ts                          |  3 ++
 6 files changed, 108 insertions(+), 3 deletions(-)

diff --git a/src-tauri/src/models/mod.rs b/src-tauri/src/models/mod.rs
index 79a78d82..25c1c5ed 100644
--- a/src-tauri/src/models/mod.rs
+++ b/src-tauri/src/models/mod.rs
@@ -734,6 +734,12 @@ pub struct Capabilities {
     /// ThinkingBlock UI.
     #[serde(default)]
     pub thinking: bool,
+    /// Reasoning is structural and cannot be turned off (gpt-oss/Harmony,
+    /// DeepSeek-R1, QwQ, ...). Thuki still shows such a model's reasoning
+    /// cleanly and marks it in the picker so the user is not surprised by the
+    /// latency. `false` when reasoning is optional (off by default) or absent.
+    #[serde(default)]
+    pub reasoning_always: bool,
     /// Maximum number of images the model accepts in a single request, when
     /// known. `None` means "unknown / unbounded by Thuki" and the gate lets
     /// the request through. Today this is keyed off the model architecture
@@ -1001,16 +1007,20 @@ pub(crate) fn builtin_capabilities_from_manifest(
 ) -> HashMap<String, Capabilities> {
     rows.iter()
         .map(|row| {
-            let (vision, thinking) = registry::STARTERS
+            // Curated starters carry `reasoning_always` in the registry too;
+            // pasted repos default to not-always until runtime detection marks
+            // them (a follow-up). `thinking`/`vision` heal as before.
+            let (vision, thinking, reasoning_always) = registry::STARTERS
                 .iter()
                 .find(|s| s.repo == row.repo && s.file_name == row.file_name)
-                .map(|s| (s.vision, s.thinking))
-                .unwrap_or((row.vision, row.thinking));
+                .map(|s| (s.vision, s.thinking, s.reasoning_always))
+                .unwrap_or((row.vision, row.thinking, false));
             (
                 row.id.clone(),
                 Capabilities {
                     vision,
                     thinking,
+                    reasoning_always,
                     max_images: None,
                 },
             )
@@ -1032,6 +1042,7 @@ pub(crate) fn openai_capabilities(model: &str, vision: bool) -> HashMap<String,
         Capabilities {
             vision,
             thinking: false,
+            reasoning_always: false,
             max_images: None,
         },
     )])
@@ -3384,6 +3395,7 @@ mod tests {
         let caps = Capabilities {
             vision: true,
             thinking: false,
+            reasoning_always: false,
             max_images: Some(1),
         };
         let v = serde_json::to_value(&caps).unwrap();
@@ -3392,6 +3404,7 @@ mod tests {
             serde_json::json!({
                 "vision": true,
                 "thinking": false,
+                "reasoningAlways": false,
                 "maxImages": 1,
             })
         );
@@ -3402,6 +3415,7 @@ mod tests {
         let caps = Capabilities {
             vision: true,
             thinking: false,
+            reasoning_always: false,
             max_images: None,
         };
         let v = serde_json::to_value(&caps).unwrap();
@@ -3997,6 +4011,24 @@ mod tests {
         );
     }
 
+    /// gpt-oss (curated Smartest) reasons unstoppably; its `reasoning_always`
+    /// capability is healed from the registry so the picker can badge it. A
+    /// pasted (non-curated) row defaults to not-always (runtime detection is a
+    /// follow-up).
+    #[test]
+    fn builtin_capabilities_reasoning_always_from_registry() {
+        let smartest = registry::STARTERS
+            .iter()
+            .find(|s| s.tier == registry::Tier::Smartest)
+            .unwrap();
+        let caps = builtin_capabilities_from_manifest(&[registry::to_installed_model(smartest)]);
+        assert!(caps[&registry::to_installed_model(smartest).id].reasoning_always);
+
+        let pasted =
+            builtin_capabilities_from_manifest(&[manifest_row("org/repo:x.gguf", false, true)]);
+        assert!(!pasted["org/repo:x.gguf"].reasoning_always);
+    }
+
     #[test]
     fn builtin_capabilities_empty_manifest_yields_empty_map() {
         assert!(builtin_capabilities_from_manifest(&[]).is_empty());
diff --git a/src-tauri/src/models/registry.rs b/src-tauri/src/models/registry.rs
index 214ed225..6fa5ef75 100644
--- a/src-tauri/src/models/registry.rs
+++ b/src-tauri/src/models/registry.rs
@@ -49,6 +49,10 @@ pub struct Starter {
     pub vision: bool,
     /// Whether the model emits a thinking/scratchpad token stream.
     pub thinking: bool,
+    /// Whether the model's reasoning cannot be turned off (it always reasons).
+    /// `true` only for structurally-always-on families (e.g. gpt-oss/Harmony);
+    /// `false` when reasoning is optional (the default-off path) or absent.
+    pub reasoning_always: bool,
     /// Vision projection file name, when the model is multimodal.
     pub mmproj_file: Option<&'static str>,
     /// Lowercase hex SHA-256 of the mmproj blob, when present.
@@ -84,6 +88,7 @@ pub const STARTERS: &[Starter] = &[
         quant: "Q4_K_M",
         vision: true,
         thinking: true,
+        reasoning_always: false,
         mmproj_file: Some("mmproj-BF16.gguf"),
         mmproj_sha256: Some("853698ce7aa6c7ba732478bad280240969ddf7b0fcbf93900046f63903a83383"),
         mmproj_bytes: 921_705_024,
@@ -103,6 +108,7 @@ pub const STARTERS: &[Starter] = &[
         quant: "Q4_0",
         vision: true,
         thinking: false,
+        reasoning_always: false,
         mmproj_file: Some("mmproj-gemma-4-12b-it-qat-q4_0.gguf"),
         mmproj_sha256: Some("e70b0e5cd80323d5d588b4ed06780356b7b1ba03995a4b8164c6ae9db0ff5989"),
         mmproj_bytes: 175_115_264,
@@ -122,6 +128,7 @@ pub const STARTERS: &[Starter] = &[
         quant: "MXFP4",
         vision: false,
         thinking: true,
+        reasoning_always: true,
         mmproj_file: None,
         mmproj_sha256: None,
         mmproj_bytes: 0,
@@ -250,6 +257,25 @@ mod tests {
         assert!(starter(Tier::Smartest).thinking, "gpt-oss reasons");
     }
 
+    /// `reasoning_always` marks models whose reasoning cannot be turned off.
+    /// Only gpt-oss (Harmony) is structurally always-on; Qwen3.5's reasoning is
+    /// optional (off by default via the kwarg blast) and Gemma does not reason.
+    #[test]
+    fn reasoning_always_flag_per_tier() {
+        assert!(
+            starter(Tier::Smartest).reasoning_always,
+            "gpt-oss always reasons"
+        );
+        assert!(
+            !starter(Tier::Fast).reasoning_always,
+            "Qwen3.5 reasoning is optional"
+        );
+        assert!(
+            !starter(Tier::Balanced).reasoning_always,
+            "Gemma does not force reasoning"
+        );
+    }
+
     #[test]
     fn all_revisions_are_40_hex() {
         for s in STARTERS {
diff --git a/src/components/ModelPickerPanel.tsx b/src/components/ModelPickerPanel.tsx
index d163e004..c7fbd520 100644
--- a/src/components/ModelPickerPanel.tsx
+++ b/src/components/ModelPickerPanel.tsx
@@ -22,6 +22,16 @@ export const OLLAMA_LIBRARY_URL = 'https://ollama.com/library';
 export const OLLAMA_PILL_TOOLTIP =
   'Browse and pull any model on Ollama. Thuki auto-detects it.';
 
+/**
+ * Pill shown on models whose reasoning cannot be turned off (capability
+ * `reasoningAlways`). Positive, non-alarming framing per industry practice
+ * (Anthropic/OpenAI/Gemini never present reasoning as a caveat): the goal is
+ * to set expectations, not warn. `/think` is a no-op for these models.
+ */
+export const ALWAYS_THINKS_LABEL = 'Always thinks';
+export const ALWAYS_THINKS_TOOLTIP =
+  'This model reasons before every answer, so expect a brief pause. Its thinking shows in a collapsible block above each reply.';
+
 const CHECK_ICON_PATH = (
   <path
     d="M3 8l3.5 3.5L13 5"
@@ -290,6 +300,8 @@ export function ModelPickerPanel({
             const active = model === activeModel;
             const highlighted = index === safeHighlightedIndex;
             const capLabel = formatCapabilityLabel(capabilities, model);
+            const alwaysThinks =
+              capabilities?.[model]?.reasoningAlways === true;
             return (
               <button
                 key={model}
@@ -322,6 +334,18 @@ export function ModelPickerPanel({
                     </span>
                   )}
                 </span>
+                {alwaysThinks && (
+                  // A plain span with a native title: the row is a <button>,
+                  // so the Tooltip component (which wraps children in a <div>)
+                  // cannot be nested here without invalid phrasing content.
+                  <span
+                    data-testid="always-thinks-badge"
+                    title={ALWAYS_THINKS_TOOLTIP}
+                    className="shrink-0 self-center inline-flex items-center text-[10px] font-medium text-text-secondary bg-primary/8 border border-primary/15 rounded-md px-1.5 py-0.5 whitespace-nowrap"
+                  >
+                    {ALWAYS_THINKS_LABEL}
+                  </span>
+                )}
                 <svg
                   className="w-3.5 h-3.5 shrink-0 mt-0.5 text-primary"
                   style={{ opacity: active ? 1 : 0 }}
diff --git a/src/components/__tests__/ModelPickerPanel.test.tsx b/src/components/__tests__/ModelPickerPanel.test.tsx
index c9803217..b7c6d54e 100644
--- a/src/components/__tests__/ModelPickerPanel.test.tsx
+++ b/src/components/__tests__/ModelPickerPanel.test.tsx
@@ -379,6 +379,18 @@ describe('ModelPickerPanel', () => {
     });
     expect(row).toBeInTheDocument();
   });
+
+  it('renders the "Always thinks" badge only for reasoningAlways models', () => {
+    const capabilities: ModelCapabilitiesMap = {
+      'gemma4:e2b': { vision: true, thinking: false },
+      'qwen2.5:7b': { vision: false, thinking: true, reasoningAlways: true },
+      'llama3.2:3b': { vision: false, thinking: false },
+    };
+    renderPanel({ capabilities });
+    const badges = screen.getAllByTestId('always-thinks-badge');
+    expect(badges).toHaveLength(1);
+    expect(badges[0]).toHaveTextContent('Always thinks');
+  });
 });
 
 describe('formatCapabilityLabel', () => {
diff --git a/src/types/model.ts b/src/types/model.ts
index db535e14..9bae687d 100644
--- a/src/types/model.ts
+++ b/src/types/model.ts
@@ -39,6 +39,14 @@ export interface ModelPickerState {
 export interface ModelCapabilities {
   vision: boolean;
   thinking: boolean;
+  /**
+   * Whether the model's reasoning cannot be turned off (it always reasons,
+   * e.g. gpt-oss/Harmony, DeepSeek-R1). The picker badges such models so the
+   * user is not surprised by the latency; `/think` is a no-op for them.
+   * The backend always sends it; optional here so consumers treat a missing
+   * value as "not always" and read it as `reasoningAlways === true`.
+   */
+  reasoningAlways?: boolean;
   /**
    * Maximum number of images the model accepts in a single request, when
    * known. `null` (or omitted) means Thuki has no architecture-specific
diff --git a/src/types/starter.ts b/src/types/starter.ts
index bb1506a7..48fdcf64 100644
--- a/src/types/starter.ts
+++ b/src/types/starter.ts
@@ -28,6 +28,9 @@ export interface Starter {
   quant: string;
   vision: boolean;
   thinking: boolean;
+  /** Whether reasoning cannot be turned off (always reasons); true for gpt-oss.
+   * Backend always sends it; optional here for test-fixture ergonomics. */
+  reasoning_always?: boolean;
   mmproj_file: string | null;
   mmproj_sha256: string | null;
   mmproj_bytes: number;

From c872929589fce5deb8dd1caee0ad6cb3b39420e7 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 14:37:56 -0500
Subject: [PATCH 27/89] feat: dynamically classify reasoning capability of
 downloaded GGUF models

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 docs/configurations.md                        |   3 +
 src-tauri/src/commands.rs                     | 110 +++-
 src-tauri/src/config/defaults.rs              |  17 +
 src-tauri/src/database.rs                     |   5 +
 src-tauri/src/lib.rs                          |   9 +-
 src-tauri/src/models/gguf.rs                  | 532 ++++++++++++++++++
 src-tauri/src/models/manifest.rs              | 183 +++++-
 src-tauri/src/models/mod.rs                   | 184 +++++-
 src-tauri/src/models/reasoning.rs             | 299 ++++++++++
 src-tauri/src/models/registry.rs              |   2 +
 src/components/StarterMatrix.tsx              |  33 ++
 .../__tests__/StarterMatrix.test.tsx          |  31 +-
 12 files changed, 1391 insertions(+), 17 deletions(-)
 create mode 100644 src-tauri/src/models/gguf.rs
 create mode 100644 src-tauri/src/models/reasoning.rs

diff --git a/docs/configurations.md b/docs/configurations.md
index 341b7ca1..091fb1d3 100644
--- a/docs/configurations.md
+++ b/docs/configurations.md
@@ -188,6 +188,9 @@ The table below also lists the baked-in safety limits that govern Thuki's commun
 | `DOWNLOAD_PROGRESS_MIN_INTERVAL_MS`         | `500 ms` | No       | Pure IPC hygiene: a fast local connection can deliver thousands of chunks per second and the UI only needs a few updates per second, so throttling below the UI refresh rate is invisible to the user. | —      | Minimum interval between `Progress` events emitted while a model file downloads. An update is also emitted whenever at least 1% of the file has arrived since the last one, whichever comes first, and a final 100% update always precedes verification. |
 | `BLOB_HASH_BUFFER_BYTES`                     | `4 MiB`  | No       | Internal I/O buffer with no user-visible effect beyond verify speed. A few-MB buffer turns hashing a multi-GB blob into a few hundred reads instead of hundreds of thousands. | —      | Read-buffer size for streaming a downloaded blob through SHA-256 during verification. The common path hashes bytes as they download, so this applies only to a full-length partial left from a prior run or a resumed download's on-disk prefix. |
 | `MAX_HF_API_BODY_BYTES`                     | `4 MiB`  | No       | Defense-in-depth bound on attacker-controlled data from a remote service, mirroring `MAX_OLLAMA_TAGS_BODY_BYTES`. | —      | The largest Hugging Face API response body (repo file listings) Thuki will accept while resolving a model to download. Larger responses are rejected mid-stream and the request returns an error. |
+| `MAX_GGUF_KV_COUNT`                         | `4096`   | No       | Defense-in-depth bound on a downloaded GGUF's metadata-key count. A corrupt or hostile `metadata_kv_count` could otherwise drive an unbounded scan; real models carry a few dozen entries, so 4096 never truncates legitimate metadata. | —      | The most GGUF metadata key-value pairs the reasoning classifier scans when reading a downloaded model's chat template. Scanning stops at the cap. |
+| `MAX_GGUF_KEY_BYTES`                        | `1 KiB`  | No       | Defense-in-depth bound on a downloaded GGUF's metadata-key length. Keys are short dotted identifiers (`tokenizer.chat_template`); capping the length stops a corrupt length field from forcing a large allocation. | —      | The longest GGUF metadata key the reasoning classifier will read. A longer key stops the scan. |
+| `MAX_GGUF_STRING_BYTES`                     | `4 MiB`  | No       | Defense-in-depth bound on a downloaded GGUF's string values. Real chat templates run a few KB to ~100 KB; 4 MiB never truncates one while bounding the memory a corrupt length field can demand. | —      | The largest GGUF string value (the chat template or architecture) the reasoning classifier will materialize. A larger value stops the scan and the model relies on the runtime backstop instead. |
 | `HF_API_TIMEOUT_SECS`                       | `15 s`   | No       | Protocol cap on a hung remote service so the download UI cannot stall on metadata resolution; 15 s is generous for a small metadata call over the internet. | —      | How long Thuki waits for a Hugging Face API metadata call (repo file listing) to respond before giving up. Applies to resolving pasted repo ids and listing a repo's GGUF files, not to the model download itself. |
 | `HF_BASE_URL`                               | `https://huggingface.co` | No | Single origin for model metadata and downloads. Provenance comes from the pinned repo revisions in the curated starter registry, and those pins are only meaningful against the canonical Hub; an arbitrary mirror could serve different content under the same revision ids. | — | The Hugging Face origin Thuki uses for all model metadata calls and blob downloads. Every starter in the registry pins a repo at an exact revision and carries a compiled-in sha256 digest checked after download; the digest catches truncation, bit rot, and resume corruption, while the pinned revision on the canonical Hub is what fixes which content is fetched. |
 | `HF_SEARCH_LIMIT`                           | `30`     | No       | The per-page step for the in-app model browser. The "Load more" control raises the requested page size in multiples of this value, so it is a layout step rather than a user preference. | —      | How many GGUF model repos the first page of an in-app Hugging Face search returns, most-downloaded first. |
diff --git a/src-tauri/src/commands.rs b/src-tauri/src/commands.rs
index 864d1360..eda7ca80 100644
--- a/src-tauri/src/commands.rs
+++ b/src-tauri/src/commands.rs
@@ -350,6 +350,65 @@ pub(crate) async fn stream_builtin_chat(
     }
 }
 
+/// Sets `flag` when `chunk` carries reasoning output. The built-in runtime
+/// backstop wires this into the chunk pump so it learns whether a model emitted
+/// reasoning tokens even though reasoning was requested OFF.
+pub(crate) fn observe_reasoning_chunk(chunk: &StreamChunk, flag: &std::sync::atomic::AtomicBool) {
+    if matches!(chunk, StreamChunk::ThinkingToken(_)) {
+        flag.store(true, std::sync::atomic::Ordering::Relaxed);
+    }
+}
+
+/// Decides whether the runtime backstop should mark a built-in model as
+/// always-reasoning. True only when reasoning was requested OFF (`!think`) yet
+/// the model still streamed reasoning (`reasoning_seen`), the manifest does not
+/// already record it as always (`!current_reasoning_always`), and the model is
+/// not a curated starter (`!is_curated`, whose class is registry truth and must
+/// never be overridden from behavior).
+pub(crate) fn should_backstop_mark(
+    think: bool,
+    reasoning_seen: bool,
+    current_reasoning_always: bool,
+    is_curated: bool,
+) -> bool {
+    !think && reasoning_seen && !current_reasoning_always && !is_curated
+}
+
+/// Best-effort runtime backstop for the built-in engine: when a chat streamed
+/// reasoning while reasoning was OFF, persist `reasoning_always` so the picker
+/// badge and `/think` gate self-correct on the next read. Coverage-off: the
+/// decision lives in [`should_backstop_mark`]; this wrapper only reads the row
+/// and writes the flag. Never fails the turn (every error is logged and
+/// swallowed).
+#[cfg_attr(coverage_nightly, coverage(off))]
+fn backstop_mark_reasoning_always(
+    db: &crate::history::Database,
+    model_id: &str,
+    think: bool,
+    reasoning_seen: bool,
+) {
+    // Cheap exit before locking: only an OFF request that still saw reasoning
+    // can change anything.
+    if think || !reasoning_seen {
+        return;
+    }
+    let Ok(conn) = db.0.lock() else { return };
+    let Ok(Some(row)) = crate::models::manifest::get(&conn, model_id) else {
+        return;
+    };
+    let is_curated = crate::models::curated_reasoning_flags(&row.repo, &row.file_name).is_some();
+    if should_backstop_mark(think, reasoning_seen, row.reasoning_always, is_curated) {
+        match crate::models::manifest::mark_reasoning_always(&conn, model_id) {
+            Ok(()) => {
+                eprintln!("thuki: [models] reasoning backstop: marked {model_id} always-reasoning")
+            }
+            Err(e) => {
+                eprintln!("thuki: [models] reasoning backstop: failed to mark {model_id}: {e}")
+            }
+        }
+    }
+}
+
 /// Reads the API key for an `openai`-kind provider from the secret store.
 /// Errors degrade to `None` with a stderr log: a missing or unreadable key
 /// must not block a keyless local `/v1` server.
@@ -1180,7 +1239,18 @@ pub async fn ask_model(
             };
             match target {
                 Ok(target) => {
-                    stream_builtin_chat(
+                    // Observe whether reasoning streamed this turn so the
+                    // runtime backstop can mark a model that reasons even with
+                    // reasoning requested OFF (see `backstop_mark_reasoning_always`).
+                    let reasoning_seen =
+                        std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false));
+                    let seen_for_pump = std::sync::Arc::clone(&reasoning_seen);
+                    let backstop_model_id = model_id.clone();
+                    let builtin_pump = move |chunk: StreamChunk| {
+                        observe_reasoning_chunk(&chunk, &seen_for_pump);
+                        pump(chunk);
+                    };
+                    let content = stream_builtin_chat(
                         &engine,
                         target,
                         model_id,
@@ -1188,9 +1258,16 @@ pub async fn ask_model(
                         messages,
                         &client,
                         cancel_token.clone(),
-                        pump,
+                        builtin_pump,
                     )
-                    .await
+                    .await;
+                    backstop_mark_reasoning_always(
+                        &db,
+                        &backstop_model_id,
+                        think,
+                        reasoning_seen.load(std::sync::atomic::Ordering::Relaxed),
+                    );
+                    content
                 }
                 Err(err) => {
                     pump(StreamChunk::Error(err));
@@ -3107,6 +3184,7 @@ mod tests {
             quant: "Q4_K_M".to_string(),
             vision: mmproj_sha256.is_some(),
             thinking: false,
+            reasoning_always: false,
             mmproj_file: mmproj_sha256.map(|_| format!("{id}-mmproj.gguf")),
             mmproj_sha256: mmproj_sha256.map(str::to_string),
         }
@@ -3548,6 +3626,32 @@ mod tests {
         assert!(!parse_props_vision(b"not json"), "malformed body");
     }
 
+    #[test]
+    fn observe_reasoning_chunk_sets_flag_only_on_thinking_token() {
+        let flag = std::sync::atomic::AtomicBool::new(false);
+        observe_reasoning_chunk(&StreamChunk::Token("hi".into()), &flag);
+        assert!(!flag.load(std::sync::atomic::Ordering::Relaxed));
+        observe_reasoning_chunk(&StreamChunk::Done, &flag);
+        assert!(!flag.load(std::sync::atomic::Ordering::Relaxed));
+        observe_reasoning_chunk(&StreamChunk::ThinkingToken("step".into()), &flag);
+        assert!(flag.load(std::sync::atomic::Ordering::Relaxed));
+    }
+
+    #[test]
+    fn should_backstop_mark_only_fires_for_surprising_pasted_reasoning() {
+        // Reasoning requested OFF, model still reasoned, not yet recorded, not
+        // curated: the one case that should mark.
+        assert!(should_backstop_mark(false, true, false, false));
+        // /think was on: expected reasoning, never a surprise.
+        assert!(!should_backstop_mark(true, true, false, false));
+        // No reasoning streamed: nothing to learn.
+        assert!(!should_backstop_mark(false, false, false, false));
+        // Already recorded as always: no redundant write.
+        assert!(!should_backstop_mark(false, true, true, false));
+        // Curated starter: registry is truth, never override from behavior.
+        assert!(!should_backstop_mark(false, true, false, true));
+    }
+
     #[tokio::test]
     async fn fetch_builtin_vision_transport_error_fails_closed() {
         let client = reqwest::Client::new();
diff --git a/src-tauri/src/config/defaults.rs b/src-tauri/src/config/defaults.rs
index d8430d21..eecd4755 100644
--- a/src-tauri/src/config/defaults.rs
+++ b/src-tauri/src/config/defaults.rs
@@ -433,6 +433,23 @@ pub const MAX_HF_SEARCH_QUERY_LEN: usize = 200;
 /// capping adversarial inputs long before any network or database work.
 pub const MAX_MODEL_SLUG_LEN: usize = 256;
 
+/// Maximum metadata key-value pairs the GGUF reader will scan before giving
+/// up. Real GGUF models carry a few dozen KV entries; 4096 never truncates a
+/// legitimate header while bounding a malformed `metadata_kv_count` so the
+/// reasoning-classifier scan cannot loop on a corrupt or hostile file.
+pub const MAX_GGUF_KV_COUNT: u64 = 4096;
+
+/// Maximum accepted byte length for a single GGUF metadata key. Keys are short
+/// dotted identifiers (`tokenizer.chat_template`); 1 KiB is far above any real
+/// key and stops a corrupt length field from forcing a huge allocation.
+pub const MAX_GGUF_KEY_BYTES: u64 = 1024;
+
+/// Maximum accepted byte length for a GGUF string value the reader actually
+/// materializes (the chat template and architecture). Real chat templates run
+/// a few KB to ~100 KB; 4 MiB never truncates one while bounding the memory a
+/// corrupt or hostile length field can demand.
+pub const MAX_GGUF_STRING_BYTES: u64 = 4 * 1024 * 1024;
+
 /// Authoritative allowlist of `(section, key)` pairs the Settings GUI is
 /// permitted to write via the `set_config_field` Tauri command.
 ///
diff --git a/src-tauri/src/database.rs b/src-tauri/src/database.rs
index 9bb412bc..329e1c8a 100644
--- a/src-tauri/src/database.rs
+++ b/src-tauri/src/database.rs
@@ -229,6 +229,11 @@ fn run_migrations(conn: &Connection) -> SqlResult<()> {
     // this migration.
     ensure_column(conn, "messages", "model_name", "TEXT")?;
 
+    // Reasoning-capability class for installed models. NULL for rows written
+    // before the dynamic classifier existed; the startup heal re-classifies
+    // those, and every new install writes a non-NULL 0/1.
+    ensure_column(conn, "installed_models", "reasoning_always", "INTEGER")?;
+
     Ok(())
 }
 
diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs
index f41afe41..d63d1e6d 100644
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -2144,11 +2144,18 @@ pub fn run() {
                 initial_active_model,
             )));
             app.manage(models::ModelCapabilitiesCache::default());
-            app.manage(history::Database(std::sync::Mutex::new(db_conn)));
 
             // ── Model blob store + download slot for the built-in engine ──
             let model_store = models::storage::ModelStore::new(app_data_dir.join("models"))
                 .expect("failed to initialise model blob store");
+
+            // One-time heal: classify any installed models recorded before the
+            // dynamic reasoning classifier existed (reasoning_always IS NULL),
+            // reading each model's local GGUF, so the picker badge and /think
+            // gate are correct without waiting for the first chat.
+            models::heal_unclassified_reasoning(&db_conn, &model_store);
+
+            app.manage(history::Database(std::sync::Mutex::new(db_conn)));
             app.manage(model_store);
             app.manage(models::DownloadState::default());
 
diff --git a/src-tauri/src/models/gguf.rs b/src-tauri/src/models/gguf.rs
new file mode 100644
index 00000000..174ed115
--- /dev/null
+++ b/src-tauri/src/models/gguf.rs
@@ -0,0 +1,532 @@
+/*!
+ * Minimal, panic-safe GGUF metadata reader.
+ *
+ * The reasoning classifier ([`crate::models::reasoning`]) needs a model's
+ * embedded chat template (`tokenizer.chat_template`) and architecture
+ * (`general.architecture`). Both live in the GGUF metadata key-value header at
+ * the very start of the file, before any tensor data, so they can be read
+ * straight off the downloaded blob with no engine load and no network.
+ *
+ * This reader extracts ONLY those two string values; every other value is
+ * skipped by computing its on-disk size and seeking past it (the giant
+ * tokenizer arrays are never materialized). It is deliberately forgiving: any
+ * malformed, truncated, or hostile input resolves to "what was found so far"
+ * (often `None`) rather than panicking, matching Thuki's never-panic-on-input
+ * contract. A miss is harmless because the runtime behavioral backstop
+ * self-corrects an `Always` model from its real output.
+ *
+ * Format reference: the GGUF header is `magic("GGUF") | version(u32) |
+ * tensor_count(u64) | metadata_kv_count(u64)`, followed by `metadata_kv_count`
+ * key-value pairs. A key is `len(u64) | bytes`; a value is `type(u32)` then a
+ * type-dependent payload. Only versions 2 and 3 (u64 counts) are accepted; the
+ * obsolete v1 layout (u32 counts) is rejected.
+ */
+
+use std::io::{Read, Seek, SeekFrom};
+use std::path::Path;
+
+use crate::config::defaults::{MAX_GGUF_KEY_BYTES, MAX_GGUF_KV_COUNT, MAX_GGUF_STRING_BYTES};
+
+/// GGUF value type tag for a UTF-8 string (`len(u64) | bytes`).
+const GGUF_TYPE_STRING: u32 = 8;
+/// GGUF value type tag for an array (`elem_type(u32) | count(u64) | elements`).
+const GGUF_TYPE_ARRAY: u32 = 9;
+
+/// Metadata extracted from a GGUF header. Either field is `None` when the
+/// model does not carry it (or the reader stopped before reaching it).
+#[derive(Debug, Default, Clone, PartialEq, Eq)]
+pub struct GgufMetadata {
+    /// The embedded Jinja chat template (`tokenizer.chat_template`).
+    pub chat_template: Option<String>,
+    /// The model architecture (`general.architecture`, e.g. `qwen3`, `gpt-oss`).
+    pub architecture: Option<String>,
+}
+
+/// Reads `general.architecture` and `tokenizer.chat_template` from a GGUF
+/// stream. Returns `None` only when the stream is not a GGUF the reader
+/// understands (bad magic, unsupported version, or a header too short to carry
+/// the counts); a stream that is a valid GGUF but is truncated or malformed
+/// partway through returns `Some` with whatever was decoded before the fault.
+///
+/// Generic over [`Read`] + [`Seek`] so it is driven by an in-memory
+/// [`std::io::Cursor`] in tests and a [`std::io::BufReader`] over the blob
+/// file in production.
+pub fn read_gguf_metadata<R: Read + Seek>(r: &mut R) -> Option<GgufMetadata> {
+    let mut magic = [0u8; 4];
+    r.read_exact(&mut magic).ok()?;
+    if &magic != b"GGUF" {
+        return None;
+    }
+    let version = read_u32_le(r)?;
+    if version != 2 && version != 3 {
+        return None;
+    }
+    // tensor_count is not needed: the metadata KV block precedes the tensor
+    // info, so we never have to walk the tensors to reach the template.
+    let _tensor_count = read_u64_le(r)?;
+    let kv_count = read_u64_le(r)?;
+
+    let mut meta = GgufMetadata::default();
+    // Clamp the loop so a corrupt `metadata_kv_count` cannot drive an
+    // unbounded scan; real models sit far below the cap.
+    let limit = kv_count.min(MAX_GGUF_KV_COUNT);
+    for _ in 0..limit {
+        // Past this point every read failure is treated as "end of usable
+        // metadata": break and return what was decoded so far, never `?` (a
+        // truncation after the template was read must not discard it).
+        let Some(key_len) = read_u64_le(r) else { break };
+        if key_len > MAX_GGUF_KEY_BYTES {
+            break;
+        }
+        let mut key = vec![0u8; key_len as usize];
+        if r.read_exact(&mut key).is_err() {
+            break;
+        }
+        let Some(value_type) = read_u32_le(r) else {
+            break;
+        };
+
+        if value_type == GGUF_TYPE_STRING && key == b"tokenizer.chat_template" {
+            match read_string_value(r) {
+                Some(s) => meta.chat_template = Some(s),
+                None => break,
+            }
+        } else if value_type == GGUF_TYPE_STRING && key == b"general.architecture" {
+            match read_string_value(r) {
+                Some(s) => meta.architecture = Some(s),
+                None => break,
+            }
+        } else if skip_value(r, value_type).is_none() {
+            break;
+        }
+
+        // Both targets found: no reason to walk the rest of the header.
+        if meta.chat_template.is_some() && meta.architecture.is_some() {
+            break;
+        }
+    }
+    Some(meta)
+}
+
+/// Opens `path`, wraps it in a buffered reader, and extracts its GGUF
+/// metadata. Returns `None` when the file cannot be opened or is not a
+/// readable GGUF. Coverage-off: a thin filesystem wrapper around
+/// [`read_gguf_metadata`], which carries all the tested parsing logic.
+#[cfg_attr(coverage_nightly, coverage(off))]
+pub fn read_gguf_metadata_from_file(path: &Path) -> Option<GgufMetadata> {
+    let file = std::fs::File::open(path).ok()?;
+    let mut reader = std::io::BufReader::new(file);
+    read_gguf_metadata(&mut reader)
+}
+
+/// Reads a little-endian `u32`, or `None` on a short read.
+fn read_u32_le<R: Read>(r: &mut R) -> Option<u32> {
+    let mut b = [0u8; 4];
+    r.read_exact(&mut b).ok()?;
+    Some(u32::from_le_bytes(b))
+}
+
+/// Reads a little-endian `u64`, or `None` on a short read.
+fn read_u64_le<R: Read>(r: &mut R) -> Option<u64> {
+    let mut b = [0u8; 8];
+    r.read_exact(&mut b).ok()?;
+    Some(u64::from_le_bytes(b))
+}
+
+/// Reads a GGUF string value (`len(u64) | bytes`) the reader wants to keep.
+/// Refuses a length above [`MAX_GGUF_STRING_BYTES`] so a corrupt length cannot
+/// force a huge allocation. Decodes lossily so a non-UTF-8 byte never drops an
+/// otherwise-usable template.
+fn read_string_value<R: Read>(r: &mut R) -> Option<String> {
+    let len = read_u64_le(r)?;
+    if len > MAX_GGUF_STRING_BYTES {
+        return None;
+    }
+    let mut buf = vec![0u8; len as usize];
+    r.read_exact(&mut buf).ok()?;
+    Some(String::from_utf8_lossy(&buf).into_owned())
+}
+
+/// On-disk byte size of a fixed-width GGUF scalar value type, or `None` for a
+/// non-scalar (string, array) or unknown type tag.
+fn scalar_size(value_type: u32) -> Option<u64> {
+    match value_type {
+        // UINT8, INT8, BOOL
+        0 | 1 | 7 => Some(1),
+        // UINT16, INT16
+        2 | 3 => Some(2),
+        // UINT32, INT32, FLOAT32
+        4..=6 => Some(4),
+        // UINT64, INT64, FLOAT64
+        10..=12 => Some(8),
+        _ => None,
+    }
+}
+
+/// Advances the stream past a value of `value_type` without materializing it.
+/// Returns `None` on an unknown type, a malformed array, or a seek/read fault.
+fn skip_value<R: Read + Seek>(r: &mut R, value_type: u32) -> Option<()> {
+    match value_type {
+        GGUF_TYPE_STRING => {
+            let len = read_u64_le(r)?;
+            seek_forward(r, len)
+        }
+        GGUF_TYPE_ARRAY => skip_array(r),
+        scalar => {
+            let n = scalar_size(scalar)?;
+            seek_forward(r, n)
+        }
+    }
+}
+
+/// Skips an array value: `elem_type(u32) | count(u64) | elements`. A scalar
+/// element array is skipped in one seek; a string element array is walked
+/// element by element (each string is length-prefixed). Nested arrays and
+/// unknown element types are unsupported and return `None`.
+fn skip_array<R: Read + Seek>(r: &mut R) -> Option<()> {
+    let elem_type = read_u32_le(r)?;
+    let count = read_u64_le(r)?;
+    match elem_type {
+        GGUF_TYPE_STRING => {
+            for _ in 0..count {
+                let len = read_u64_le(r)?;
+                seek_forward(r, len)?;
+            }
+            Some(())
+        }
+        GGUF_TYPE_ARRAY => None,
+        scalar => {
+            let size = scalar_size(scalar)?;
+            let total = size.checked_mul(count)?;
+            seek_forward(r, total)
+        }
+    }
+}
+
+/// Seeks `n` bytes forward from the current position. Refuses a `n` that does
+/// not fit in the seek offset type so a corrupt length cannot wrap.
+fn seek_forward<R: Seek>(r: &mut R, n: u64) -> Option<()> {
+    let offset = i64::try_from(n).ok()?;
+    r.seek(SeekFrom::Current(offset)).ok()?;
+    Some(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::io::Cursor;
+
+    // ── GGUF byte builders (mirror the on-disk layout) ───────────────────────
+
+    /// Encodes a GGUF string: `len(u64) | bytes`.
+    fn enc_string(s: &[u8]) -> Vec<u8> {
+        let mut v = (s.len() as u64).to_le_bytes().to_vec();
+        v.extend_from_slice(s);
+        v
+    }
+
+    /// Encodes a string-valued KV pair: `key | type(8) | value`.
+    fn kv_string(key: &str, value: &[u8]) -> Vec<u8> {
+        let mut v = enc_string(key.as_bytes());
+        v.extend_from_slice(&GGUF_TYPE_STRING.to_le_bytes());
+        v.extend_from_slice(&enc_string(value));
+        v
+    }
+
+    /// Encodes a scalar KV pair with a raw `value_type` and raw payload bytes.
+    fn kv_scalar(key: &str, value_type: u32, payload: &[u8]) -> Vec<u8> {
+        let mut v = enc_string(key.as_bytes());
+        v.extend_from_slice(&value_type.to_le_bytes());
+        v.extend_from_slice(payload);
+        v
+    }
+
+    /// Encodes a `key | type(9) | elem_type | count | elements` array KV.
+    fn kv_array(key: &str, elem_type: u32, count: u64, elements: &[u8]) -> Vec<u8> {
+        let mut v = enc_string(key.as_bytes());
+        v.extend_from_slice(&GGUF_TYPE_ARRAY.to_le_bytes());
+        v.extend_from_slice(&elem_type.to_le_bytes());
+        v.extend_from_slice(&count.to_le_bytes());
+        v.extend_from_slice(elements);
+        v
+    }
+
+    /// Assembles a full GGUF header from `version` and pre-encoded KV blobs.
+    fn build_gguf(version: u32, kvs: &[Vec<u8>]) -> Vec<u8> {
+        let mut v = b"GGUF".to_vec();
+        v.extend_from_slice(&version.to_le_bytes());
+        v.extend_from_slice(&0u64.to_le_bytes()); // tensor_count
+        v.extend_from_slice(&(kvs.len() as u64).to_le_bytes()); // metadata_kv_count
+        for kv in kvs {
+            v.extend_from_slice(kv);
+        }
+        v
+    }
+
+    fn read(bytes: &[u8]) -> Option<GgufMetadata> {
+        read_gguf_metadata(&mut Cursor::new(bytes.to_vec()))
+    }
+
+    // ── Happy paths ──────────────────────────────────────────────────────────
+
+    #[test]
+    fn extracts_template_and_architecture() {
+        let bytes = build_gguf(
+            3,
+            &[
+                kv_string("general.architecture", b"qwen3"),
+                kv_string("tokenizer.chat_template", b"{%- if enable_thinking %}"),
+            ],
+        );
+        let meta = read(&bytes).unwrap();
+        assert_eq!(meta.architecture.as_deref(), Some("qwen3"));
+        assert_eq!(
+            meta.chat_template.as_deref(),
+            Some("{%- if enable_thinking %}")
+        );
+    }
+
+    #[test]
+    fn version_2_is_accepted() {
+        let bytes = build_gguf(2, &[kv_string("tokenizer.chat_template", b"<think>")]);
+        let meta = read(&bytes).unwrap();
+        assert_eq!(meta.chat_template.as_deref(), Some("<think>"));
+    }
+
+    #[test]
+    fn skips_scalar_kv_before_target() {
+        let bytes = build_gguf(
+            3,
+            &[
+                kv_scalar("some.u16", 2, &7u16.to_le_bytes()),
+                kv_scalar("some.i16", 3, &(-3i16).to_le_bytes()),
+                kv_scalar("some.u32", 4, &7u32.to_le_bytes()),
+                kv_scalar("some.bool", 7, &[1]),
+                kv_scalar("some.f64", 12, &1.5f64.to_le_bytes()),
+                kv_string("tokenizer.chat_template", b"<|channel|>"),
+            ],
+        );
+        assert_eq!(
+            read(&bytes).unwrap().chat_template.as_deref(),
+            Some("<|channel|>")
+        );
+    }
+
+    #[test]
+    fn skips_scalar_array_before_target() {
+        // token_type-style INT32 array: 3 elements, skipped in one seek.
+        let elems: Vec<u8> = [1i32, 2, 3].iter().flat_map(|n| n.to_le_bytes()).collect();
+        let bytes = build_gguf(
+            3,
+            &[
+                kv_array("tokenizer.ggml.token_type", 5, 3, &elems),
+                kv_string("tokenizer.chat_template", b"<think>"),
+            ],
+        );
+        assert_eq!(
+            read(&bytes).unwrap().chat_template.as_deref(),
+            Some("<think>")
+        );
+    }
+
+    #[test]
+    fn skips_string_array_before_target() {
+        // tokens-style string array walked element by element.
+        let mut elems = Vec::new();
+        elems.extend_from_slice(&enc_string(b"a"));
+        elems.extend_from_slice(&enc_string(b"bb"));
+        let bytes = build_gguf(
+            3,
+            &[
+                kv_array("tokenizer.ggml.tokens", GGUF_TYPE_STRING, 2, &elems),
+                kv_string("tokenizer.chat_template", b"<thought>"),
+            ],
+        );
+        assert_eq!(
+            read(&bytes).unwrap().chat_template.as_deref(),
+            Some("<thought>")
+        );
+    }
+
+    #[test]
+    fn architecture_only_is_returned_without_template() {
+        let bytes = build_gguf(3, &[kv_string("general.architecture", b"gemma3")]);
+        let meta = read(&bytes).unwrap();
+        assert_eq!(meta.architecture.as_deref(), Some("gemma3"));
+        assert_eq!(meta.chat_template, None);
+    }
+
+    #[test]
+    fn stops_after_both_found_ignoring_trailing_malformed_kv() {
+        // A nested-array KV (unsupported) AFTER both targets must not matter:
+        // the early-exit returns before the reader reaches it.
+        let bad_nested = kv_array("trailing.bad", GGUF_TYPE_ARRAY, 1, &[]);
+        let bytes = build_gguf(
+            3,
+            &[
+                kv_string("general.architecture", b"qwen3"),
+                kv_string("tokenizer.chat_template", b"<think>"),
+                bad_nested,
+            ],
+        );
+        let meta = read(&bytes).unwrap();
+        assert_eq!(meta.architecture.as_deref(), Some("qwen3"));
+        assert_eq!(meta.chat_template.as_deref(), Some("<think>"));
+    }
+
+    #[test]
+    fn lossy_decode_keeps_non_utf8_template() {
+        // An invalid UTF-8 byte (0xff) is replaced, not dropped.
+        let bytes = build_gguf(3, &[kv_string("tokenizer.chat_template", b"<think>\xff")]);
+        let template = read(&bytes).unwrap().chat_template.unwrap();
+        assert!(template.starts_with("<think>"));
+    }
+
+    // ── Header rejections (return None) ──────────────────────────────────────
+
+    #[test]
+    fn bad_magic_is_none() {
+        assert_eq!(read(b"NOPExxxxxxxxxxxxxxxxxxxx"), None);
+    }
+
+    #[test]
+    fn unsupported_version_is_none() {
+        let bytes = build_gguf(1, &[kv_string("tokenizer.chat_template", b"<think>")]);
+        assert_eq!(read(&bytes), None);
+    }
+
+    #[test]
+    fn truncated_before_counts_is_none() {
+        // "GGUF" + version only, no tensor/kv counts.
+        let mut bytes = b"GGUF".to_vec();
+        bytes.extend_from_slice(&3u32.to_le_bytes());
+        assert_eq!(read(&bytes), None);
+    }
+
+    // ── Mid-scan faults (return partial Some) ────────────────────────────────
+
+    #[test]
+    fn claimed_kv_but_no_body_returns_empty() {
+        // metadata_kv_count says 1 but the stream ends right after the counts.
+        let mut bytes = b"GGUF".to_vec();
+        bytes.extend_from_slice(&3u32.to_le_bytes());
+        bytes.extend_from_slice(&0u64.to_le_bytes()); // tensor_count
+        bytes.extend_from_slice(&1u64.to_le_bytes()); // kv_count = 1, but no KV follows
+        assert_eq!(read(&bytes), Some(GgufMetadata::default()));
+    }
+
+    #[test]
+    fn oversized_key_length_stops_scan() {
+        let mut huge_key = (MAX_GGUF_KEY_BYTES + 1).to_le_bytes().to_vec();
+        huge_key.extend_from_slice(&0u32.to_le_bytes()); // a stray type, never reached
+        let bytes = build_gguf(3, &[huge_key]);
+        assert_eq!(read(&bytes), Some(GgufMetadata::default()));
+    }
+
+    #[test]
+    fn truncated_key_bytes_stops_scan() {
+        // key_len claims 10 bytes but only 2 follow.
+        let mut kv = 10u64.to_le_bytes().to_vec();
+        kv.extend_from_slice(b"ab");
+        let bytes = build_gguf(3, &[kv]);
+        assert_eq!(read(&bytes), Some(GgufMetadata::default()));
+    }
+
+    #[test]
+    fn truncated_before_value_type_stops_scan() {
+        // A complete key but the stream ends before the value type u32.
+        let kv = enc_string(b"general.architecture");
+        let bytes = build_gguf(3, &[kv]);
+        assert_eq!(read(&bytes), Some(GgufMetadata::default()));
+    }
+
+    #[test]
+    fn target_string_value_too_large_stops_scan() {
+        let mut kv = enc_string(b"tokenizer.chat_template");
+        kv.extend_from_slice(&GGUF_TYPE_STRING.to_le_bytes());
+        kv.extend_from_slice(&(MAX_GGUF_STRING_BYTES + 1).to_le_bytes());
+        let bytes = build_gguf(3, &[kv]);
+        assert_eq!(read(&bytes), Some(GgufMetadata::default()));
+    }
+
+    #[test]
+    fn target_string_value_truncated_stops_scan() {
+        // Architecture value claims 20 bytes but only 3 are present.
+        let mut kv = enc_string(b"general.architecture");
+        kv.extend_from_slice(&GGUF_TYPE_STRING.to_le_bytes());
+        kv.extend_from_slice(&20u64.to_le_bytes());
+        kv.extend_from_slice(b"abc");
+        let bytes = build_gguf(3, &[kv]);
+        assert_eq!(read(&bytes), Some(GgufMetadata::default()));
+    }
+
+    #[test]
+    fn unknown_value_type_stops_scan() {
+        // Value type 99 is not a real GGUF type: the skip fails and the scan
+        // stops, but a target read before it is still returned.
+        let bytes = build_gguf(
+            3,
+            &[
+                kv_string("tokenizer.chat_template", b"<think>"),
+                kv_scalar("weird", 99, &[0, 0, 0, 0]),
+            ],
+        );
+        // chat_template was read first, then early-exit never triggers (arch
+        // missing) so the unknown type is reached and stops the scan; the
+        // template is preserved.
+        let meta = read(&bytes).unwrap();
+        assert_eq!(meta.chat_template.as_deref(), Some("<think>"));
+    }
+
+    #[test]
+    fn nested_array_element_stops_scan() {
+        // An array whose elements are themselves arrays is unsupported.
+        let bytes = build_gguf(3, &[kv_array("bad.nested", GGUF_TYPE_ARRAY, 1, &[])]);
+        assert_eq!(read(&bytes), Some(GgufMetadata::default()));
+    }
+
+    #[test]
+    fn array_count_overflow_stops_scan() {
+        // count * elem_size overflows u64; the checked multiply bails.
+        let mut kv = enc_string(b"bad.overflow");
+        kv.extend_from_slice(&GGUF_TYPE_ARRAY.to_le_bytes());
+        kv.extend_from_slice(&12u32.to_le_bytes()); // FLOAT64, size 8
+        kv.extend_from_slice(&u64::MAX.to_le_bytes()); // count
+        let bytes = build_gguf(3, &[kv]);
+        assert_eq!(read(&bytes), Some(GgufMetadata::default()));
+    }
+
+    #[test]
+    fn skip_string_value_advances_to_next_kv() {
+        // A non-target string KV is skipped (not kept), then the target read.
+        let bytes = build_gguf(
+            3,
+            &[
+                kv_string("general.name", b"Some Model"),
+                kv_string("tokenizer.chat_template", b"<seed:think>"),
+            ],
+        );
+        let meta = read(&bytes).unwrap();
+        assert_eq!(meta.chat_template.as_deref(), Some("<seed:think>"));
+    }
+
+    #[test]
+    fn file_wrapper_reads_a_written_gguf() {
+        let dir = std::env::temp_dir().join(format!("thuki-gguf-test-{}", std::process::id()));
+        std::fs::create_dir_all(&dir).unwrap();
+        let path = dir.join("model.gguf");
+        let bytes = build_gguf(3, &[kv_string("tokenizer.chat_template", b"<|channel|>")]);
+        std::fs::write(&path, &bytes).unwrap();
+
+        let meta = read_gguf_metadata_from_file(&path).unwrap();
+        assert_eq!(meta.chat_template.as_deref(), Some("<|channel|>"));
+
+        std::fs::remove_dir_all(&dir).ok();
+    }
+
+    #[test]
+    fn file_wrapper_missing_file_is_none() {
+        let path = std::env::temp_dir().join("thuki-gguf-does-not-exist.gguf");
+        assert_eq!(read_gguf_metadata_from_file(&path), None);
+    }
+}
diff --git a/src-tauri/src/models/manifest.rs b/src-tauri/src/models/manifest.rs
index 94c0290d..9b4fbc7b 100644
--- a/src-tauri/src/models/manifest.rs
+++ b/src-tauri/src/models/manifest.rs
@@ -39,6 +39,11 @@ pub struct InstalledModel {
     pub vision: bool,
     /// Whether the model exposes a thinking/scratchpad token stream.
     pub thinking: bool,
+    /// Whether the model's reasoning cannot be turned off (it always reasons).
+    /// Set by the reasoning classifier at install (and corrected by the runtime
+    /// backstop). For rows written before the column existed the stored value
+    /// is `NULL`, read here as `false` and re-classified by the startup heal.
+    pub reasoning_always: bool,
     /// Filename of the vision projection blob, if any.
     pub mmproj_file: Option<String>,
     /// SHA-256 hex digest of the mmproj blob, if any.
@@ -79,8 +84,8 @@ pub fn insert(conn: &Connection, model: &InstalledModel) -> SqlResult<Vec<String
     conn.execute(
         "INSERT OR REPLACE INTO installed_models \
          (id, display_name, repo, revision, file_name, sha256, size_bytes, \
-          quant, vision, thinking, mmproj_file, mmproj_sha256, created_at) \
-         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)",
+          quant, vision, thinking, reasoning_always, mmproj_file, mmproj_sha256, created_at) \
+         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14)",
         params![
             model.id,
             model.display_name,
@@ -92,6 +97,7 @@ pub fn insert(conn: &Connection, model: &InstalledModel) -> SqlResult<Vec<String
             model.quant,
             model.vision as i32,
             model.thinking as i32,
+            model.reasoning_always as i32,
             model.mmproj_file,
             model.mmproj_sha256,
             created_at,
@@ -130,13 +136,58 @@ pub fn insert(conn: &Connection, model: &InstalledModel) -> SqlResult<Vec<String
 pub fn list(conn: &Connection) -> SqlResult<Vec<InstalledModel>> {
     let mut stmt = conn.prepare(
         "SELECT id, display_name, repo, revision, file_name, sha256, \
-                size_bytes, quant, vision, thinking, mmproj_file, mmproj_sha256 \
+                size_bytes, quant, vision, thinking, mmproj_file, mmproj_sha256, \
+                reasoning_always \
          FROM installed_models ORDER BY display_name",
     )?;
     let rows = stmt.query_map([], row_to_model)?;
     rows.collect()
 }
 
+/// Returns the installed models whose `reasoning_always` is `NULL`: rows
+/// written before the column existed, never touched by the classifier. The
+/// startup heal re-classifies each from its local blob (or the registry for a
+/// curated row) and persists the result via [`update_classification`], so a
+/// subsequent call returns an empty list.
+pub fn list_unclassified(conn: &Connection) -> SqlResult<Vec<InstalledModel>> {
+    let mut stmt = conn.prepare(
+        "SELECT id, display_name, repo, revision, file_name, sha256, \
+                size_bytes, quant, vision, thinking, mmproj_file, mmproj_sha256, \
+                reasoning_always \
+         FROM installed_models WHERE reasoning_always IS NULL ORDER BY display_name",
+    )?;
+    let rows = stmt.query_map([], row_to_model)?;
+    rows.collect()
+}
+
+/// Persists a reasoning classification onto an existing row: sets both
+/// `thinking` and `reasoning_always`. Used by the startup heal to populate a
+/// previously-`NULL` row. A no-op (zero rows changed) when `id` is absent.
+pub fn update_classification(
+    conn: &Connection,
+    id: &str,
+    thinking: bool,
+    reasoning_always: bool,
+) -> SqlResult<()> {
+    conn.execute(
+        "UPDATE installed_models SET thinking = ?2, reasoning_always = ?3 WHERE id = ?1",
+        params![id, thinking as i32, reasoning_always as i32],
+    )?;
+    Ok(())
+}
+
+/// Marks a model as always-reasoning from observed runtime behavior (the
+/// backstop saw reasoning stream while reasoning was requested off). Forces
+/// both `reasoning_always` and `thinking` true, since a model that always
+/// reasons necessarily emits thinking tokens. Idempotent.
+pub fn mark_reasoning_always(conn: &Connection, id: &str) -> SqlResult<()> {
+    conn.execute(
+        "UPDATE installed_models SET reasoning_always = 1, thinking = 1 WHERE id = ?1",
+        params![id],
+    )?;
+    Ok(())
+}
+
 /// Returns the model with the given `id`, or `None` if not present.
 ///
 /// # Errors
@@ -145,7 +196,8 @@ pub fn list(conn: &Connection) -> SqlResult<Vec<InstalledModel>> {
 pub fn get(conn: &Connection, id: &str) -> SqlResult<Option<InstalledModel>> {
     conn.query_row(
         "SELECT id, display_name, repo, revision, file_name, sha256, \
-                size_bytes, quant, vision, thinking, mmproj_file, mmproj_sha256 \
+                size_bytes, quant, vision, thinking, mmproj_file, mmproj_sha256, \
+                reasoning_always \
          FROM installed_models WHERE id = ?1",
         params![id],
         row_to_model,
@@ -227,6 +279,12 @@ fn row_to_model(row: &rusqlite::Row<'_>) -> SqlResult<InstalledModel> {
         thinking: row.get::<_, i32>(9)? != 0,
         mmproj_file: row.get(10)?,
         mmproj_sha256: row.get(11)?,
+        // NULL (a pre-column row) reads as `false`; the startup heal then
+        // re-classifies it. A stored 0/1 is the classifier's verdict.
+        reasoning_always: row
+            .get::<_, Option<i32>>(12)?
+            .map(|v| v != 0)
+            .unwrap_or(false),
     })
 }
 
@@ -249,6 +307,7 @@ mod tests {
             quant: "Q4_K_M".to_string(),
             vision: false,
             thinking: false,
+            reasoning_always: false,
             mmproj_file: None,
             mmproj_sha256: None,
         }
@@ -479,6 +538,122 @@ mod tests {
         assert!(found.thinking);
     }
 
+    #[test]
+    fn reasoning_always_flag_roundtrips() {
+        let conn = open_in_memory().unwrap();
+        let m = InstalledModel {
+            thinking: true,
+            reasoning_always: true,
+            ..make_model("org/repo:ra.gguf", "sha_ra")
+        };
+        insert(&conn, &m).unwrap();
+        let found = get(&conn, "org/repo:ra.gguf").unwrap().unwrap();
+        assert!(found.reasoning_always);
+    }
+
+    #[test]
+    fn fresh_insert_is_not_unclassified() {
+        // insert always writes a non-NULL reasoning_always, so a freshly
+        // installed model is never picked up by the heal.
+        let conn = open_in_memory().unwrap();
+        insert(&conn, &make_model("org/repo:fresh.gguf", "sha_f")).unwrap();
+        assert!(list_unclassified(&conn).unwrap().is_empty());
+    }
+
+    /// Forces a row's `reasoning_always` back to NULL to simulate a row written
+    /// before the column existed.
+    fn null_out_reasoning(conn: &Connection, id: &str) {
+        conn.execute(
+            "UPDATE installed_models SET reasoning_always = NULL WHERE id = ?1",
+            params![id],
+        )
+        .unwrap();
+    }
+
+    #[test]
+    fn null_reasoning_row_is_unclassified_and_reads_false() {
+        let conn = open_in_memory().unwrap();
+        let m = InstalledModel {
+            reasoning_always: true,
+            ..make_model("org/repo:legacy.gguf", "sha_l")
+        };
+        insert(&conn, &m).unwrap();
+        null_out_reasoning(&conn, "org/repo:legacy.gguf");
+
+        // NULL reads as false through row_to_model.
+        let found = get(&conn, "org/repo:legacy.gguf").unwrap().unwrap();
+        assert!(!found.reasoning_always);
+
+        // ...and the row surfaces in the heal list.
+        let pending = list_unclassified(&conn).unwrap();
+        assert_eq!(pending.len(), 1);
+        assert_eq!(pending[0].id, "org/repo:legacy.gguf");
+    }
+
+    #[test]
+    fn update_classification_persists_and_clears_unclassified() {
+        let conn = open_in_memory().unwrap();
+        insert(&conn, &make_model("org/repo:u.gguf", "sha_u")).unwrap();
+        null_out_reasoning(&conn, "org/repo:u.gguf");
+
+        update_classification(&conn, "org/repo:u.gguf", true, true).unwrap();
+
+        let found = get(&conn, "org/repo:u.gguf").unwrap().unwrap();
+        assert!(found.thinking);
+        assert!(found.reasoning_always);
+        assert!(list_unclassified(&conn).unwrap().is_empty());
+    }
+
+    #[test]
+    fn update_classification_can_set_none_class() {
+        let conn = open_in_memory().unwrap();
+        let m = InstalledModel {
+            thinking: true,
+            ..make_model("org/repo:n.gguf", "sha_n")
+        };
+        insert(&conn, &m).unwrap();
+        null_out_reasoning(&conn, "org/repo:n.gguf");
+
+        update_classification(&conn, "org/repo:n.gguf", false, false).unwrap();
+        let found = get(&conn, "org/repo:n.gguf").unwrap().unwrap();
+        assert!(!found.thinking);
+        assert!(!found.reasoning_always);
+        // No longer NULL, so cleared from the heal list.
+        assert!(list_unclassified(&conn).unwrap().is_empty());
+    }
+
+    #[test]
+    fn mark_reasoning_always_forces_both_flags() {
+        let conn = open_in_memory().unwrap();
+        insert(&conn, &make_model("org/repo:b.gguf", "sha_b")).unwrap();
+
+        mark_reasoning_always(&conn, "org/repo:b.gguf").unwrap();
+        let found = get(&conn, "org/repo:b.gguf").unwrap().unwrap();
+        assert!(found.reasoning_always);
+        assert!(found.thinking);
+    }
+
+    #[test]
+    fn list_unclassified_propagates_sql_error_when_table_absent() {
+        let conn = open_in_memory().unwrap();
+        conn.execute_batch("DROP TABLE installed_models;").unwrap();
+        assert!(list_unclassified(&conn).is_err());
+    }
+
+    #[test]
+    fn update_classification_propagates_sql_error_when_table_absent() {
+        let conn = open_in_memory().unwrap();
+        conn.execute_batch("DROP TABLE installed_models;").unwrap();
+        assert!(update_classification(&conn, "x:y.gguf", true, true).is_err());
+    }
+
+    #[test]
+    fn mark_reasoning_always_propagates_sql_error_when_table_absent() {
+        let conn = open_in_memory().unwrap();
+        conn.execute_batch("DROP TABLE installed_models;").unwrap();
+        assert!(mark_reasoning_always(&conn, "x:y.gguf").is_err());
+    }
+
     #[test]
     fn size_bytes_roundtrip_large_value() {
         let conn = open_in_memory().unwrap();
diff --git a/src-tauri/src/models/mod.rs b/src-tauri/src/models/mod.rs
index 25c1c5ed..ea953dfc 100644
--- a/src-tauri/src/models/mod.rs
+++ b/src-tauri/src/models/mod.rs
@@ -16,7 +16,9 @@
  */
 
 pub mod download;
+pub mod gguf;
 pub mod manifest;
+pub mod reasoning;
 pub mod registry;
 pub mod storage;
 
@@ -1007,14 +1009,15 @@ pub(crate) fn builtin_capabilities_from_manifest(
 ) -> HashMap<String, Capabilities> {
     rows.iter()
         .map(|row| {
-            // Curated starters carry `reasoning_always` in the registry too;
-            // pasted repos default to not-always until runtime detection marks
-            // them (a follow-up). `thinking`/`vision` heal as before.
+            // Curated starters heal `vision`/`thinking`/`reasoning_always` from
+            // the registry (highest confidence). A pasted repo has no registry
+            // entry and keeps its row's classified flags: the install-time GGUF
+            // classifier populates them, and the runtime backstop corrects them.
             let (vision, thinking, reasoning_always) = registry::STARTERS
                 .iter()
                 .find(|s| s.repo == row.repo && s.file_name == row.file_name)
                 .map(|s| (s.vision, s.thinking, s.reasoning_always))
-                .unwrap_or((row.vision, row.thinking, false));
+                .unwrap_or((row.vision, row.thinking, row.reasoning_always));
             (
                 row.id.clone(),
                 Capabilities {
@@ -1373,8 +1376,11 @@ pub struct MmprojCompanion {
 
 /// Pure parse of an HF repo listing into the spec for one target `file`.
 /// Capability rule for pasted repos: vision = an `mmproj*.gguf` sibling with
-/// complete LFS metadata exists; thinking is derived from the model name by
-/// [`detect_thinking`] when the row is recorded in [`repo_installed_model`].
+/// complete LFS metadata exists. The reasoning class is recorded in two stages:
+/// [`repo_installed_model`] seeds `thinking` from the model name via
+/// [`detect_thinking`], then `finalize_install` refines `thinking` and sets
+/// `reasoning_always` from the downloaded GGUF's chat template (falling back to
+/// the name guess when the template cannot be read).
 pub fn resolve_listing(body: &[u8], file: &str) -> Result<RepoResolved, String> {
     let info: HfRepoInfo = serde_json::from_slice(body)
         .map_err(|e| format!("failed to decode Hugging Face API response: {e}"))?;
@@ -1937,12 +1943,97 @@ pub fn repo_installed_model(
         size_bytes: resolved.weights_size_bytes,
         quant: quant_from_filename(file),
         vision: resolved.mmproj.is_some(),
+        // Name-based first guess; finalize_install refines `thinking` and sets
+        // `reasoning_always` from the downloaded GGUF's chat template, falling
+        // back to this guess when the template cannot be read.
         thinking: detect_thinking(repo, file),
+        reasoning_always: false,
         mmproj_file: resolved.mmproj.as_ref().map(|m| m.file.clone()),
         mmproj_sha256: resolved.mmproj.as_ref().map(|m| m.sha256.clone()),
     }
 }
 
+/// The curated `(thinking, reasoning_always)` flags for a model, when it is a
+/// registry starter. `None` for a pasted/arbitrary repo. Curated flags are the
+/// highest-confidence source, so both the installer and the heal prefer them
+/// over a GGUF scan.
+pub(crate) fn curated_reasoning_flags(repo: &str, file_name: &str) -> Option<(bool, bool)> {
+    registry::STARTERS
+        .iter()
+        .find(|s| s.repo == repo && s.file_name == file_name)
+        .map(|s| (s.thinking, s.reasoning_always))
+}
+
+/// Derives `(thinking, reasoning_always)` for a pasted model from its chat
+/// template. A readable template is classified by
+/// [`reasoning::classify_reasoning`]; an absent template falls back to
+/// `fallback` (the placeholder flags), leaving the runtime backstop to correct
+/// an always-reasoning model from real output.
+pub(crate) fn pasted_reasoning_flags(
+    fallback: (bool, bool),
+    template: Option<&str>,
+    architecture: Option<&str>,
+) -> (bool, bool) {
+    match template {
+        Some(t) => reasoning::classify_reasoning(t, architecture).flags(),
+        None => fallback,
+    }
+}
+
+/// Resolves the final reasoning flags for a model: curated registry flags when
+/// it is a starter, otherwise the class read from the on-disk GGUF blob's chat
+/// template. Coverage-off: the registry lookup and template classification are
+/// tested through [`curated_reasoning_flags`] / [`pasted_reasoning_flags`]; this
+/// wrapper only adds the filesystem read of the blob.
+#[cfg_attr(coverage_nightly, coverage(off))]
+fn resolve_reasoning_flags(
+    store: &storage::ModelStore,
+    repo: &str,
+    file_name: &str,
+    sha256: &str,
+    fallback: (bool, bool),
+) -> (bool, bool) {
+    if let Some(curated) = curated_reasoning_flags(repo, file_name) {
+        return curated;
+    }
+    let meta = gguf::read_gguf_metadata_from_file(&store.blob_path(sha256));
+    let template = meta.as_ref().and_then(|m| m.chat_template.as_deref());
+    let architecture = meta.as_ref().and_then(|m| m.architecture.as_deref());
+    pasted_reasoning_flags(fallback, template, architecture)
+}
+
+/// Re-classifies installed built-in rows whose `reasoning_always` is `NULL`
+/// (rows written before the classifier existed) and persists the result so they
+/// stop appearing in [`manifest::list_unclassified`]. Best-effort: any list,
+/// blob-read, or write failure is logged and skipped, never fatal. Coverage-off:
+/// orchestration over tested helpers (`list_unclassified`, `resolve_reasoning_flags`,
+/// `update_classification`).
+#[cfg_attr(coverage_nightly, coverage(off))]
+pub fn heal_unclassified_reasoning(conn: &rusqlite::Connection, store: &storage::ModelStore) {
+    let pending = match manifest::list_unclassified(conn) {
+        Ok(rows) => rows,
+        Err(e) => {
+            eprintln!("thuki: [models] reasoning heal: failed to list rows: {e}");
+            return;
+        }
+    };
+    for row in pending {
+        let (thinking, reasoning_always) = resolve_reasoning_flags(
+            store,
+            &row.repo,
+            &row.file_name,
+            &row.sha256,
+            (row.thinking, row.reasoning_always),
+        );
+        if let Err(e) = manifest::update_classification(conn, &row.id, thinking, reasoning_always) {
+            eprintln!(
+                "thuki: [models] reasoning heal: failed to persist {}: {e}",
+                row.id
+            );
+        }
+    }
+}
+
 /// Deletion outcome consumed by the thin Tauri wrapper.
 #[derive(Debug, Clone, Copy, PartialEq)]
 pub struct DeleteOutcome {
@@ -2283,14 +2374,31 @@ fn finalize_install(
     app: &tauri::AppHandle,
     model: &manifest::InstalledModel,
 ) -> Result<(), String> {
+    let store = app.state::<storage::ModelStore>();
+    // Classify reasoning from the just-downloaded GGUF's chat template so the
+    // picker badge and `/think` gate are correct the instant the install lands.
+    // Curated starters keep their registry flags; a template that cannot be read
+    // keeps the placeholder flags for the runtime backstop to correct.
+    let (thinking, reasoning_always) = resolve_reasoning_flags(
+        store.inner(),
+        &model.repo,
+        &model.file_name,
+        &model.sha256,
+        (model.thinking, model.reasoning_always),
+    );
+    let model = manifest::InstalledModel {
+        thinking,
+        reasoning_always,
+        ..model.clone()
+    };
     let orphans = {
         let db = app.state::<crate::history::Database>();
         let conn = db.0.lock().map_err(|e| e.to_string())?;
-        manifest::insert(&conn, model).map_err(|e| e.to_string())?
+        manifest::insert(&conn, &model).map_err(|e| e.to_string())?
     };
     // Best-effort: the install itself succeeded, so a failure to reclaim the
     // superseded blobs must not fail the download; it only leaks disk space.
-    if let Err(e) = app.state::<storage::ModelStore>().remove_blobs(&orphans) {
+    if let Err(e) = store.remove_blobs(&orphans) {
         eprintln!("thuki: [models] failed to remove superseded blobs: {e}");
     }
     let config = app.state::<parking_lot::RwLock<AppConfig>>();
@@ -3958,6 +4066,7 @@ mod tests {
             quant: "Q4_K_M".to_string(),
             vision,
             thinking,
+            reasoning_always: false,
             mmproj_file: None,
             mmproj_sha256: None,
         }
@@ -4702,6 +4811,7 @@ mod tests {
             quant: "Q4_K_M".to_string(),
             vision: false,
             thinking: false,
+            reasoning_always: false,
             mmproj_file: None,
             mmproj_sha256: None,
         };
@@ -4990,6 +5100,9 @@ mod tests {
         assert_eq!(m.quant, "Q4_K_M");
         assert!(m.vision);
         assert!(!m.thinking);
+        // Pasted rows record placeholder reasoning flags; the real class is
+        // resolved from the GGUF in finalize_install.
+        assert!(!m.reasoning_always);
         assert_eq!(m.mmproj_file.as_deref(), Some("mmproj-model-f16.gguf"));
         assert_eq!(m.mmproj_sha256.as_deref(), Some(&*"b".repeat(64)));
 
@@ -5042,6 +5155,61 @@ mod tests {
         assert!(m.thinking);
     }
 
+    // ── Reasoning-flag resolution helpers ────────────────────────────────────
+
+    #[test]
+    fn curated_reasoning_flags_match_every_starter() {
+        for s in registry::STARTERS {
+            assert_eq!(
+                curated_reasoning_flags(s.repo, s.file_name),
+                Some((s.thinking, s.reasoning_always)),
+                "curated flags must mirror the registry for {}",
+                s.repo
+            );
+        }
+    }
+
+    #[test]
+    fn curated_reasoning_flags_none_for_pasted_repo() {
+        assert_eq!(curated_reasoning_flags("nope/repo", "x.gguf"), None);
+    }
+
+    #[test]
+    fn pasted_reasoning_flags_classify_from_template() {
+        // Optional family: thinking on, no badge.
+        assert_eq!(
+            pasted_reasoning_flags(
+                (false, false),
+                Some("{% if enable_thinking %}"),
+                Some("qwen3")
+            ),
+            (true, false)
+        );
+        // Always family: thinking on, badge.
+        assert_eq!(
+            pasted_reasoning_flags((false, false), Some("<think>"), None),
+            (true, true)
+        );
+        // Non-reasoning: both off.
+        assert_eq!(
+            pasted_reasoning_flags((false, false), Some("plain instruct"), None),
+            (false, false)
+        );
+    }
+
+    #[test]
+    fn pasted_reasoning_flags_fall_back_when_template_absent() {
+        // No readable template: keep the placeholder flags for the backstop.
+        assert_eq!(
+            pasted_reasoning_flags((true, true), None, None),
+            (true, true)
+        );
+        assert_eq!(
+            pasted_reasoning_flags((false, false), None, Some("qwen3")),
+            (false, false)
+        );
+    }
+
     // ── Model library: delete ────────────────────────────────────────────────
 
     #[test]
diff --git a/src-tauri/src/models/reasoning.rs b/src-tauri/src/models/reasoning.rs
new file mode 100644
index 00000000..4fabce4c
--- /dev/null
+++ b/src-tauri/src/models/reasoning.rs
@@ -0,0 +1,299 @@
+/*!
+ * Dynamic reasoning-capability classifier for locally-run GGUF models.
+ *
+ * Thuki must behave correctly for ANY model a user downloads, not just the
+ * three curated starters whose class is baked into the registry. The single
+ * authoritative signal a GGUF carries about whether (and how) it reasons is
+ * its embedded chat template ([`tokenizer.chat_template`]); the template's
+ * markers tell us which reasoning family a model belongs to.
+ *
+ * This module is the pure, side-effect-free heart of the classifier:
+ * [`classify_reasoning`] maps a chat-template string (plus the optional
+ * `general.architecture`) onto one of three classes. The byte-level template
+ * extraction lives in [`crate::models::gguf`]; persistence and the runtime
+ * behavioral backstop live in [`crate::models`] / [`crate::commands`].
+ *
+ * The three classes mirror the convergent industry taxonomy (OpenRouter
+ * `mandatory`, Ollama `thinking` capability, vLLM per-family parsers):
+ *
+ * - [`ReasoningClass::None`] — not a reasoning model. `/think` is a no-op, no
+ *   thinking block, no badge.
+ * - [`ReasoningClass::Optional`] — reasoning can be turned off. Thuki defaults
+ *   it OFF (the OFF blast in [`crate::openai`] suppresses it) and `/think`
+ *   turns it on per-message. No badge.
+ * - [`ReasoningClass::Always`] — reasoning is structural and cannot be turned
+ *   off. Thuki shows it cleanly and badges the model so the latency is not a
+ *   surprise; `/think` is a harmless no-op.
+ */
+
+/// Marker present in gpt-oss / Harmony templates: reasoning rides the
+/// `analysis` channel, which is structural and cannot be disabled.
+const MARKER_HARMONY_CHANNEL: &str = "<|channel|>";
+
+/// GGUF `general.architecture` value for gpt-oss / Harmony models. Used as a
+/// belt-and-suspenders signal alongside [`MARKER_HARMONY_CHANNEL`] so the
+/// curated Smartest starter (gpt-oss) classifies as `Always` even if a GGUF
+/// variant lays its channel markup out differently than expected.
+const ARCH_GPT_OSS: &str = "gpt-oss";
+
+/// The literal word that every "reasoning can be disabled" family threads
+/// through its template, whether as a kwarg (`enable_thinking`,
+/// `thinking_budget`) or a bare Jinja variable (`thinking`). Crucially the
+/// always-on tag families spell their tags `<think>` / `<thought>` /
+/// `<seed:think>` (no `ing`), so the presence of the whole word `thinking`
+/// is what separates "has an off switch" from "always reasons".
+const MARKER_THINKING_KWARG: &str = "thinking";
+
+/// Mistral Magistral / Ministral reasoning tags. Reasoning is driven by a
+/// system-prompt instruction rather than a template kwarg, so without Thuki's
+/// (absent) reasoning system prompt these models stay quiet: treated as
+/// `Optional` (default off), not `Always`.
+const MARKER_MISTRAL_THINK_OPEN: &str = "[THINK]";
+const MARKER_MISTRAL_THINK_CLOSE: &str = "[/THINK]";
+
+/// Always-on reasoning tags: a template that hard-opens one of these on the
+/// assistant turn and offers no off switch always reasons (DeepSeek-R1 and
+/// distills, QwQ, EXAONE-Deep, MiniMax-M2, Phi-4-reasoning, Seed-OSS variants
+/// without a budget kwarg). Checked only AFTER the off-switch word, so a
+/// family that ships both a tag and a kwarg (e.g. Seed-OSS `<seed:think>` +
+/// `thinking_budget`) is correctly classified `Optional`.
+const ALWAYS_TAGS: &[&str] = &[
+    "<think>",
+    "</think>",
+    "<thought>",
+    "</thought>",
+    "<seed:think>",
+];
+
+/// How a model reasons, derived from its chat template. See the module docs
+/// for the behavior each class drives.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum ReasoningClass {
+    /// Not a reasoning model.
+    None,
+    /// Reasoning can be turned off; Thuki defaults it off.
+    Optional,
+    /// Reasoning is structural and cannot be turned off.
+    Always,
+}
+
+impl ReasoningClass {
+    /// Projects the class onto the two manifest capability flags Thuki
+    /// persists and surfaces: `(thinking, reasoning_always)`.
+    ///
+    /// - `None`     -> `(false, false)`: no thinking block, no badge.
+    /// - `Optional` -> `(true,  false)`: thinking available, no badge.
+    /// - `Always`   -> `(true,  true )`: thinking shown, badge.
+    pub fn flags(self) -> (bool, bool) {
+        match self {
+            ReasoningClass::None => (false, false),
+            ReasoningClass::Optional => (true, false),
+            ReasoningClass::Always => (true, true),
+        }
+    }
+}
+
+/// Classifies a model's reasoning capability from its chat template and
+/// optional `general.architecture`, applying the family markers most-specific
+/// first:
+///
+/// 1. gpt-oss / Harmony (`<|channel|>` or `gpt-oss` architecture) -> `Always`.
+/// 2. An off-switch word (`enable_thinking` / `thinking` / `thinking_budget`)
+///    anywhere in the template -> `Optional` (the OFF blast controls it).
+/// 3. Mistral `[THINK]` / `[/THINK]` tags -> `Optional` (system-prompt
+///    driven; quiet without Thuki's reasoning prompt).
+/// 4. An always-on reasoning tag (`<think>` / `<thought>` / `<seed:think>`)
+///    with no off switch -> `Always`.
+/// 5. No reasoning markers at all -> `None`.
+///
+/// Never panics: any input (empty, binary garbage decoded as text, a template
+/// from a future family) resolves to one of the three classes. When the
+/// template scan is wrong for an `Always` model, the runtime behavioral
+/// backstop self-corrects from real output, so this fast path only needs to
+/// be right for the common families.
+pub fn classify_reasoning(chat_template: &str, architecture: Option<&str>) -> ReasoningClass {
+    let arch_is_gpt_oss = architecture
+        .map(|a| {
+            let lower = a.to_ascii_lowercase();
+            lower.contains(ARCH_GPT_OSS) || lower.contains("gptoss")
+        })
+        .unwrap_or(false);
+
+    // 1. gpt-oss / Harmony: highest-signal, structural reasoning channel.
+    if chat_template.contains(MARKER_HARMONY_CHANNEL) || arch_is_gpt_oss {
+        return ReasoningClass::Always;
+    }
+
+    // 2. Any "off switch" word means the model reads a disable signal and the
+    //    OFF blast already controls it. Covers `enable_thinking`,
+    //    `thinking_budget`, and a bare `thinking` Jinja variable in one check,
+    //    because the always-on tag families never spell the whole word.
+    if chat_template.contains(MARKER_THINKING_KWARG) {
+        return ReasoningClass::Optional;
+    }
+
+    // 3. Mistral reasoning is system-prompt driven, not template-gated, so it
+    //    is quiet by default under Thuki and treated as optional.
+    if chat_template.contains(MARKER_MISTRAL_THINK_OPEN)
+        || chat_template.contains(MARKER_MISTRAL_THINK_CLOSE)
+    {
+        return ReasoningClass::Optional;
+    }
+
+    // 4. A reasoning tag with no off switch: the model always reasons.
+    if ALWAYS_TAGS.iter().any(|tag| chat_template.contains(tag)) {
+        return ReasoningClass::Always;
+    }
+
+    // 5. No markers: not a reasoning model.
+    ReasoningClass::None
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn flags_map_each_class() {
+        assert_eq!(ReasoningClass::None.flags(), (false, false));
+        assert_eq!(ReasoningClass::Optional.flags(), (true, false));
+        assert_eq!(ReasoningClass::Always.flags(), (true, true));
+    }
+
+    // ── Always: gpt-oss / Harmony ────────────────────────────────────────────
+
+    #[test]
+    fn gpt_oss_channel_marker_is_always() {
+        let t = "<|start|>system<|message|>...<|channel|>analysis<|message|>...";
+        assert_eq!(classify_reasoning(t, None), ReasoningClass::Always);
+    }
+
+    #[test]
+    fn gpt_oss_architecture_is_always_even_without_channel_marker() {
+        // A gpt-oss GGUF whose template the scan does not recognize still
+        // classifies Always from the architecture tiebreak.
+        assert_eq!(
+            classify_reasoning("{{ messages }}", Some("gpt-oss")),
+            ReasoningClass::Always
+        );
+        assert_eq!(
+            classify_reasoning("", Some("GptOss")),
+            ReasoningClass::Always
+        );
+    }
+
+    // ── Always: tag families with no off switch ──────────────────────────────
+
+    #[test]
+    fn deepseek_r1_hard_open_think_is_always() {
+        // R1 hard-opens <think> after the assistant marker and reads no kwarg.
+        let t = "{{'<｜Assistant｜>'}}<think>\\n";
+        assert_eq!(classify_reasoning(t, None), ReasoningClass::Always);
+    }
+
+    #[test]
+    fn qwq_think_tag_qwen2_is_always() {
+        let t = "<|im_start|>assistant\\n<think>\\n";
+        assert_eq!(classify_reasoning(t, Some("qwen2")), ReasoningClass::Always);
+    }
+
+    #[test]
+    fn exaone_deep_thought_tag_is_always() {
+        let t = "<|assistant|>\\n<thought>";
+        assert_eq!(classify_reasoning(t, None), ReasoningClass::Always);
+    }
+
+    #[test]
+    fn closing_think_tag_alone_is_always() {
+        // Some templates only carry the closing tag in a prefill branch.
+        assert_eq!(
+            classify_reasoning("...</think>...", None),
+            ReasoningClass::Always
+        );
+        assert_eq!(
+            classify_reasoning("...</thought>...", None),
+            ReasoningClass::Always
+        );
+    }
+
+    // ── Optional: off-switch kwarg / variable families ───────────────────────
+
+    #[test]
+    fn qwen3_enable_thinking_is_optional() {
+        let t = "{%- if enable_thinking %}<think>{% endif %}";
+        assert_eq!(
+            classify_reasoning(t, Some("qwen3")),
+            ReasoningClass::Optional
+        );
+    }
+
+    #[test]
+    fn glm_enable_thinking_is_optional() {
+        let t = "<|assistant|>{% if enable_thinking %}...{% endif %}";
+        assert_eq!(classify_reasoning(t, None), ReasoningClass::Optional);
+    }
+
+    #[test]
+    fn granite_thinking_variable_is_optional() {
+        let t = "<|start_of_role|>{% if thinking %}...{% endif %}";
+        assert_eq!(classify_reasoning(t, None), ReasoningClass::Optional);
+    }
+
+    #[test]
+    fn deepseek_v31_thinking_branch_is_optional() {
+        let t = "{{'<｜Assistant｜>'}}{% if thinking %}<think>{% else %}</think>{% endif %}";
+        assert_eq!(classify_reasoning(t, None), ReasoningClass::Optional);
+    }
+
+    #[test]
+    fn seed_oss_budget_kwarg_wins_over_its_tag() {
+        // Seed-OSS ships both <seed:think> AND thinking_budget; the budget
+        // (off switch) must win so it is Optional, not Always.
+        let t = "<seed:think>{{ thinking_budget }}";
+        assert_eq!(classify_reasoning(t, None), ReasoningClass::Optional);
+    }
+
+    #[test]
+    fn mistral_bracket_think_is_optional() {
+        // Magistral reasoning is system-prompt driven; quiet by default.
+        assert_eq!(
+            classify_reasoning("...[THINK]...", None),
+            ReasoningClass::Optional
+        );
+        assert_eq!(
+            classify_reasoning("...[/THINK]...", None),
+            ReasoningClass::Optional
+        );
+    }
+
+    // ── None: plain instruct models ──────────────────────────────────────────
+
+    #[test]
+    fn gemma_plain_instruct_is_none() {
+        let t = "<start_of_turn>user\\n{{ content }}<end_of_turn>";
+        assert_eq!(classify_reasoning(t, Some("gemma3")), ReasoningClass::None);
+    }
+
+    #[test]
+    fn empty_template_is_none() {
+        assert_eq!(classify_reasoning("", None), ReasoningClass::None);
+    }
+
+    #[test]
+    fn arch_without_markers_does_not_force_a_class() {
+        // A non-gpt-oss architecture with no template markers stays None: the
+        // architecture only tiebreaks the gpt-oss case.
+        assert_eq!(
+            classify_reasoning("{{ messages }}", Some("llama")),
+            ReasoningClass::None
+        );
+    }
+
+    #[test]
+    fn channel_marker_beats_a_later_thinking_word() {
+        // Ordering guard: a Harmony template that also happens to mention the
+        // word "thinking" still classifies Always (channel checked first).
+        let t = "<|channel|>analysis ... enable_thinking";
+        assert_eq!(classify_reasoning(t, None), ReasoningClass::Always);
+    }
+}
diff --git a/src-tauri/src/models/registry.rs b/src-tauri/src/models/registry.rs
index 6fa5ef75..ccb0dfc6 100644
--- a/src-tauri/src/models/registry.rs
+++ b/src-tauri/src/models/registry.rs
@@ -197,6 +197,7 @@ pub fn to_installed_model(s: &Starter) -> InstalledModel {
         quant: s.quant.to_string(),
         vision: s.vision,
         thinking: s.thinking,
+        reasoning_always: s.reasoning_always,
         mmproj_file: s.mmproj_file.map(str::to_string),
         mmproj_sha256: s.mmproj_sha256.map(str::to_string),
     }
@@ -421,6 +422,7 @@ mod tests {
         assert_eq!(m.quant, balanced.quant);
         assert_eq!(m.vision, balanced.vision);
         assert_eq!(m.thinking, balanced.thinking);
+        assert_eq!(m.reasoning_always, balanced.reasoning_always);
         assert_eq!(m.mmproj_file.as_deref(), balanced.mmproj_file);
         assert_eq!(m.mmproj_sha256.as_deref(), balanced.mmproj_sha256);
 
diff --git a/src/components/StarterMatrix.tsx b/src/components/StarterMatrix.tsx
index e680256b..04c60255 100644
--- a/src/components/StarterMatrix.tsx
+++ b/src/components/StarterMatrix.tsx
@@ -22,6 +22,7 @@ import {
   type DownloadUiState,
 } from '../hooks/useDownloadModel';
 import type { RamFit, StarterOption, StarterTier } from '../types/starter';
+import { ALWAYS_THINKS_LABEL, ALWAYS_THINKS_TOOLTIP } from './ModelPickerPanel';
 
 const HF_BASE_URL = 'https://huggingface.co';
 
@@ -294,6 +295,7 @@ function LabelColumn() {
       {cell('Speed')}
       {cell('Quality')}
       {cell('Vision')}
+      {cell('Reasoning')}
       {cell('On your Mac')}
       {cell('Origin')}
       {cell('License')}
@@ -406,6 +408,37 @@ function TierColumn({
         )}
       </ValueCell>
 
+      <ValueCell>
+        {starter.reasoning_always ? (
+          // Mirror the model picker's pill so a download-time user is not
+          // surprised by the latency: this tier reasons before every answer.
+          <span
+            data-testid="starter-always-thinks-badge"
+            title={ALWAYS_THINKS_TOOLTIP}
+            style={{
+              display: 'inline-flex',
+              alignItems: 'center',
+              fontSize: 10,
+              fontWeight: 600,
+              color: 'rgba(255,255,255,0.82)',
+              background: 'rgba(255,141,92,0.12)',
+              border: '1px solid rgba(255,141,92,0.25)',
+              borderRadius: 6,
+              padding: '1px 6px',
+              whiteSpace: 'nowrap',
+            }}
+          >
+            {ALWAYS_THINKS_LABEL}
+          </span>
+        ) : starter.thinking ? (
+          <span style={{ color: 'rgba(255,255,255,0.6)', fontWeight: 600 }}>
+            On demand
+          </span>
+        ) : (
+          <span style={{ color: 'rgba(255,255,255,0.28)' }}>&mdash;</span>
+        )}
+      </ValueCell>
+
       <ValueCell>
         <span style={{ color: fitInfo.color, fontWeight: 700 }}>
           {fitInfo.label}
diff --git a/src/components/__tests__/StarterMatrix.test.tsx b/src/components/__tests__/StarterMatrix.test.tsx
index b1c27991..8225ae22 100644
--- a/src/components/__tests__/StarterMatrix.test.tsx
+++ b/src/components/__tests__/StarterMatrix.test.tsx
@@ -1,6 +1,7 @@
 import { render, screen, fireEvent } from '@testing-library/react';
 import { describe, it, expect, beforeEach, vi } from 'vitest';
 import { StarterMatrix } from '../StarterMatrix';
+import { ALWAYS_THINKS_LABEL } from '../ModelPickerPanel';
 import { invoke } from '../../testUtils/mocks/tauri';
 import type { DownloadUiState } from '../../hooks/useDownloadModel';
 import type { Starter, StarterOption, StarterTier } from '../../types/starter';
@@ -125,12 +126,40 @@ describe('StarterMatrix (picker)', () => {
   it('renders Vision yes/no and the On-your-Mac fit copy', () => {
     renderMatrix(THREE_TIERS);
     expect(screen.getAllByText('Yes')).toHaveLength(2); // fast + balanced
-    expect(screen.getByText('—')).toBeInTheDocument(); // smartest text-only
+    // Dashes: 1 Vision cell (smartest text-only) + 3 Reasoning cells (every
+    // fixture has thinking:false, so all three read as "no reasoning").
+    expect(screen.getAllByText('—')).toHaveLength(4);
     expect(screen.getByText('Comfortable')).toBeInTheDocument();
     expect(screen.getByText('Tight')).toBeInTheDocument();
     expect(screen.getByText('Heavy')).toBeInTheDocument();
   });
 
+  it('renders the reasoning class per tier (always badge, on-demand, none)', () => {
+    renderMatrix([
+      makeOption('fast', undefined, {
+        thinking: true,
+        reasoning_always: false,
+      }),
+      makeOption('balanced', undefined, {
+        thinking: false,
+        reasoning_always: false,
+      }),
+      makeOption('smartest', undefined, {
+        thinking: true,
+        reasoning_always: true,
+      }),
+    ]);
+    expect(screen.getByText('Reasoning')).toBeInTheDocument();
+    // smartest: always-reasoning pill.
+    expect(screen.getByTestId('starter-always-thinks-badge')).toHaveTextContent(
+      ALWAYS_THINKS_LABEL,
+    );
+    // fast: optional reasoning reads "On demand".
+    expect(screen.getByText('On demand')).toBeInTheDocument();
+    // balanced: no reasoning -> a dash (the none branch).
+    expect(screen.getAllByText('—').length).toBeGreaterThanOrEqual(1);
+  });
+
   it('opens the Hugging Face repo from the license cell', () => {
     renderMatrix(THREE_TIERS);
     expect(screen.getAllByText('Gemma Terms of Use ↗')).toHaveLength(2);

From 2a0ff7552cc6da310408b42e962159014f9ddeda Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 21:22:22 -0500
Subject: [PATCH 28/89] feat: add family grouping field to the curated starter
 registry

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/models/registry.rs | 19 +++++++++++++++++++
 src/types/starter.ts             |  4 ++++
 2 files changed, 23 insertions(+)

diff --git a/src-tauri/src/models/registry.rs b/src-tauri/src/models/registry.rs
index ccb0dfc6..69581d59 100644
--- a/src-tauri/src/models/registry.rs
+++ b/src-tauri/src/models/registry.rs
@@ -31,6 +31,10 @@ pub enum Tier {
 pub struct Starter {
     /// Which speed/quality tier this entry fills.
     pub tier: Tier,
+    /// Model family the Discover staff-picks accordion groups this entry under
+    /// (e.g. "Gemma", "Qwen", "gpt-oss"). Several starters can share a family
+    /// when the catalog offers more than one size of the same model.
+    pub family: &'static str,
     /// Human-readable label shown in the picker (e.g. "Gemma 4 12B").
     pub display_name: &'static str,
     /// Hugging Face repo slug.
@@ -79,6 +83,7 @@ pub struct Starter {
 pub const STARTERS: &[Starter] = &[
     Starter {
         tier: Tier::Fast,
+        family: "Qwen",
         display_name: "Qwen3.5 9B",
         repo: "unsloth/Qwen3.5-9B-GGUF",
         revision: "3885219b6810b007914f3a7950a8d1b469d598a5",
@@ -99,6 +104,7 @@ pub const STARTERS: &[Starter] = &[
     },
     Starter {
         tier: Tier::Balanced,
+        family: "Gemma",
         display_name: "Gemma 4 12B",
         repo: "google/gemma-4-12B-it-qat-q4_0-gguf",
         revision: "f6e7774e6148da3b7f201e42ba37cf084c1db35f",
@@ -119,6 +125,7 @@ pub const STARTERS: &[Starter] = &[
     },
     Starter {
         tier: Tier::Smartest,
+        family: "gpt-oss",
         display_name: "gpt-oss 20B",
         repo: "ggml-org/gpt-oss-20b-GGUF",
         revision: "e1dc459feff949ff451ce107337a2026daa80df8",
@@ -227,6 +234,18 @@ mod tests {
         );
     }
 
+    #[test]
+    fn family_per_tier() {
+        // The Discover staff-picks accordion groups starters by family, so
+        // every entry carries a non-empty family label.
+        assert_eq!(starter(Tier::Fast).family, "Qwen");
+        assert_eq!(starter(Tier::Balanced).family, "Gemma");
+        assert_eq!(starter(Tier::Smartest).family, "gpt-oss");
+        for s in STARTERS {
+            assert!(!s.family.is_empty(), "{}: family is empty", s.repo);
+        }
+    }
+
     #[test]
     fn vision_and_mmproj_per_tier() {
         // Fast (Qwen3.5) and Balanced (Gemma 4) are multimodal and each carries
diff --git a/src/types/starter.ts b/src/types/starter.ts
index 48fdcf64..57105e36 100644
--- a/src/types/starter.ts
+++ b/src/types/starter.ts
@@ -19,6 +19,10 @@ export type RamFit = 'fits' | 'tight' | 'too_big';
 /** One curated starter model from the compile-time registry. */
 export interface Starter {
   tier: StarterTier;
+  /** Model family the Discover staff-picks accordion groups this entry under
+   * (e.g. "Gemma", "Qwen", "gpt-oss"). Backend always sends it; optional here
+   * for test-fixture ergonomics. */
+  family?: string;
   display_name: string;
   repo: string;
   revision: string;

From ca582bc522d1bb0c1a18a0986d03c71e127ca535 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 21:24:17 -0500
Subject: [PATCH 29/89] refactor: relocate the raw Hugging Face browser to
 BrowseAllPane

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/settings/tabs/ModelTab.tsx                |  4 ++--
 ...ne.module.css => BrowseAllPane.module.css} |  0
 ...erPane.test.tsx => BrowseAllPane.test.tsx} | 19 ++++++++++---------
 .../{DiscoverPane.tsx => BrowseAllPane.tsx}   | 16 +++++++++-------
 4 files changed, 21 insertions(+), 18 deletions(-)
 rename src/settings/tabs/models/{DiscoverPane.module.css => BrowseAllPane.module.css} (100%)
 rename src/settings/tabs/models/{DiscoverPane.test.tsx => BrowseAllPane.test.tsx} (97%)
 rename src/settings/tabs/models/{DiscoverPane.tsx => BrowseAllPane.tsx} (94%)

diff --git a/src/settings/tabs/ModelTab.tsx b/src/settings/tabs/ModelTab.tsx
index aa1fa9fe..b425ddf9 100644
--- a/src/settings/tabs/ModelTab.tsx
+++ b/src/settings/tabs/ModelTab.tsx
@@ -12,7 +12,7 @@ import { useState } from 'react';
 import { ModelsSegmented, type ModelsSubview } from './models/ModelsSegmented';
 import { ProvidersPane } from './models/ProvidersPane';
 import { LibraryPane } from './models/LibraryPane';
-import { DiscoverPane } from './models/DiscoverPane';
+import { BrowseAllPane } from './models/BrowseAllPane';
 import styles from '../../styles/settings.module.css';
 import type { RawAppConfig } from '../types';
 
@@ -42,7 +42,7 @@ export function ModelTab({ config, resyncToken, onSaved }: ModelTabProps) {
         />
       ) : null}
 
-      {view === 'discover' ? <DiscoverPane onSaved={onSaved} /> : null}
+      {view === 'discover' ? <BrowseAllPane onSaved={onSaved} /> : null}
 
       {view === 'providers' ? (
         <ProvidersPane
diff --git a/src/settings/tabs/models/DiscoverPane.module.css b/src/settings/tabs/models/BrowseAllPane.module.css
similarity index 100%
rename from src/settings/tabs/models/DiscoverPane.module.css
rename to src/settings/tabs/models/BrowseAllPane.module.css
diff --git a/src/settings/tabs/models/DiscoverPane.test.tsx b/src/settings/tabs/models/BrowseAllPane.test.tsx
similarity index 97%
rename from src/settings/tabs/models/DiscoverPane.test.tsx
rename to src/settings/tabs/models/BrowseAllPane.test.tsx
index bf323d0d..11233798 100644
--- a/src/settings/tabs/models/DiscoverPane.test.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.test.tsx
@@ -1,5 +1,6 @@
 /**
- * Unit tests for the Discover pane: the in-app Hugging Face GGUF browser.
+ * Unit tests for the Browse-all pane: the in-app Hugging Face GGUF browser
+ * (Discover's advanced pathway).
  *
  * Covers the search field wiring, family filter chips, the result rows (org
  * parsing, gated rows, RAM-fit hint, the Hugging Face link), pagination (Load
@@ -22,7 +23,7 @@ import { beforeEach, afterEach, describe, expect, it, vi } from 'vitest';
 
 import { invoke } from '@tauri-apps/api/core';
 
-import { DiscoverPane } from './DiscoverPane';
+import { BrowseAllPane } from './BrowseAllPane';
 import {
   HF_SEARCH_DEBOUNCE_MS,
   HF_PAGE_SIZE,
@@ -115,7 +116,7 @@ async function renderPane(
   overrides: Record<string, unknown> = {},
 ) {
   mockCommands(discoverResponses(overrides));
-  const view = render(<DiscoverPane onSaved={onSaved} />);
+  const view = render(<BrowseAllPane onSaved={onSaved} />);
   await waitFor(() =>
     expect(invokeMock).toHaveBeenCalledWith('search_hf_models', {
       query: '',
@@ -126,7 +127,7 @@ async function renderPane(
   return view;
 }
 
-describe('DiscoverPane', () => {
+describe('BrowseAllPane', () => {
   it('renders a row per search result with the repo id and org line', async () => {
     await renderPane();
     expect(screen.getByText('google/gemma-4-12b-it-GGUF')).toBeInTheDocument();
@@ -165,7 +166,7 @@ describe('DiscoverPane', () => {
   it('typing in the search drives a debounced fetch and re-renders results', async () => {
     vi.useFakeTimers();
     mockCommands(discoverResponses());
-    render(<DiscoverPane onSaved={() => {}} />);
+    render(<BrowseAllPane onSaved={() => {}} />);
     await act(async () => {
       vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
       await Promise.resolve();
@@ -194,7 +195,7 @@ describe('DiscoverPane', () => {
   it('clicking a family chip sets the query to that family', async () => {
     vi.useFakeTimers();
     mockCommands(discoverResponses());
-    render(<DiscoverPane onSaved={() => {}} />);
+    render(<BrowseAllPane onSaved={() => {}} />);
     await act(async () => {
       vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
       await Promise.resolve();
@@ -218,7 +219,7 @@ describe('DiscoverPane', () => {
   it('the All chip clears the query and is active by default', async () => {
     vi.useFakeTimers();
     mockCommands(discoverResponses());
-    render(<DiscoverPane onSaved={() => {}} />);
+    render(<BrowseAllPane onSaved={() => {}} />);
     await act(async () => {
       vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
       await Promise.resolve();
@@ -467,7 +468,7 @@ describe('DiscoverPane', () => {
       resolveSearch = res;
     });
     mockCommands(discoverResponses({ search_hf_models: pending }));
-    render(<DiscoverPane onSaved={() => {}} />);
+    render(<BrowseAllPane onSaved={() => {}} />);
     await flush();
     expect(screen.getByText('Searching…')).toBeInTheDocument();
     await act(async () => {
@@ -494,7 +495,7 @@ describe('DiscoverPane', () => {
         gated: false,
       }));
     mockCommands(discoverResponses({ search_hf_models: full(HF_PAGE_SIZE) }));
-    render(<DiscoverPane onSaved={() => {}} />);
+    render(<BrowseAllPane onSaved={() => {}} />);
     await act(async () => {
       vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
       await Promise.resolve();
diff --git a/src/settings/tabs/models/DiscoverPane.tsx b/src/settings/tabs/models/BrowseAllPane.tsx
similarity index 94%
rename from src/settings/tabs/models/DiscoverPane.tsx
rename to src/settings/tabs/models/BrowseAllPane.tsx
index e87b726c..ca21e8d8 100644
--- a/src/settings/tabs/models/DiscoverPane.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.tsx
@@ -1,5 +1,7 @@
 /**
- * Discover pane: the in-app Hugging Face GGUF model browser.
+ * Browse-all pane: the in-app Hugging Face GGUF model browser, the advanced
+ * pathway of Discover (behind the "Browse all" tab; the curated "Staff picks"
+ * accordion is the default front door).
  *
  * A search field (driven by {@link useHfSearch}) plus a row of family filter
  * chips feed one debounced backend query that returns chat/text-generation
@@ -20,7 +22,7 @@ import { useDownloadModel } from '../../../hooks/useDownloadModel';
 import { useHfSearch } from './useHfSearch';
 import { Tooltip } from '../../../components/Tooltip';
 import { RAM_FIT_LABEL, RAM_FIT_TOOLTIP } from '../../../utils/ramFit';
-import styles from './DiscoverPane.module.css';
+import styles from './BrowseAllPane.module.css';
 import type { HfModelSummary } from '../../../types/hf';
 import type { HfGgufFile, RamFit } from '../../../types/starter';
 import type { RawAppConfig } from '../../types';
@@ -65,12 +67,12 @@ const DOWNLOAD_ICON = (
     <path d="M12 4v11M7 11l5 5 5-5M5 20h14" />
   </svg>
 );
-interface DiscoverPaneProps {
+interface BrowseAllPaneProps {
   /** Lift a fresh config snapshot after a successful install. */
   onSaved: (next: RawAppConfig) => void;
 }
 
-export function DiscoverPane({ onSaved }: DiscoverPaneProps) {
+export function BrowseAllPane({ onSaved }: BrowseAllPaneProps) {
   const { query, setQuery, results, loading, loadMore, canLoadMore } =
     useHfSearch();
 
@@ -126,7 +128,7 @@ export function DiscoverPane({ onSaved }: DiscoverPaneProps) {
           <p className={styles.state}>No models found.</p>
         ) : null}
         {results.map((model) => (
-          <DiscoverRow key={model.id} model={model} onSaved={onSaved} />
+          <BrowseAllRow key={model.id} model={model} onSaved={onSaved} />
         ))}
         {canLoadMore ? (
           <button type="button" className={styles.loadMore} onClick={loadMore}>
@@ -138,7 +140,7 @@ export function DiscoverPane({ onSaved }: DiscoverPaneProps) {
   );
 }
 
-interface DiscoverRowProps {
+interface BrowseAllRowProps {
   model: HfModelSummary;
   onSaved: (next: RawAppConfig) => void;
 }
@@ -148,7 +150,7 @@ interface DiscoverRowProps {
  * the first time the row expands; the download state machine is local to the
  * row so two rows cannot share an in-flight download.
  */
-function DiscoverRow({ model, onSaved }: DiscoverRowProps) {
+function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
   const [expanded, setExpanded] = useState(false);
   const [files, setFiles] = useState<HfGgufFile[] | null>(null);
   const [listError, setListError] = useState<string | null>(null);

From 15d2db163b1949b24b4b7a86e09beacae4b0df4d Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 21:31:40 -0500
Subject: [PATCH 30/89] feat: add curated Staff picks family accordion for
 Discover

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 .../tabs/models/StaffPicksPane.module.css     | 309 ++++++++++++
 .../tabs/models/StaffPicksPane.test.tsx       | 446 ++++++++++++++++++
 src/settings/tabs/models/StaffPicksPane.tsx   | 420 +++++++++++++++++
 3 files changed, 1175 insertions(+)
 create mode 100644 src/settings/tabs/models/StaffPicksPane.module.css
 create mode 100644 src/settings/tabs/models/StaffPicksPane.test.tsx
 create mode 100644 src/settings/tabs/models/StaffPicksPane.tsx

diff --git a/src/settings/tabs/models/StaffPicksPane.module.css b/src/settings/tabs/models/StaffPicksPane.module.css
new file mode 100644
index 00000000..42689a3f
--- /dev/null
+++ b/src/settings/tabs/models/StaffPicksPane.module.css
@@ -0,0 +1,309 @@
+/*
+ * Styles for the Staff-picks pane: Discover's curated family accordion. The
+ * premium tokens (--accent, --t1..--t3, --hair, --ok, --tight, --heavy, etc.)
+ * cascade from the Settings window root, so they are referenced via var() here.
+ */
+
+.pane {
+  display: flex;
+  flex-direction: column;
+  min-height: 0;
+}
+
+.hint {
+  font-size: 11.5px;
+  color: var(--t3);
+  margin: 0 2px 13px;
+  line-height: 1.5;
+}
+
+.empty {
+  padding: 36px 14px;
+  text-align: center;
+  font-size: 13px;
+  color: var(--t2);
+}
+
+.list {
+  display: flex;
+  flex-direction: column;
+  gap: 9px;
+}
+
+/* ── Family accordion section ─────────────────────────────────────────────── */
+
+.fam {
+  border: 1px solid var(--hair-soft);
+  border-radius: var(--radius-card);
+  background: var(--elev-1);
+  overflow: hidden;
+}
+
+.famHead {
+  display: flex;
+  align-items: center;
+  gap: 12px;
+  width: 100%;
+  padding: 13px 15px;
+  border: none;
+  background: transparent;
+  color: var(--t1);
+  font-family: inherit;
+  text-align: left;
+  cursor: pointer;
+  transition: background 140ms ease;
+}
+.famHead:hover {
+  background: var(--elev-2);
+}
+
+.famText {
+  flex: 1;
+  min-width: 0;
+}
+
+.famName {
+  display: block;
+  font-weight: 600;
+  font-size: 13.5px;
+  color: var(--t1);
+}
+
+.famSub {
+  display: block;
+  font-size: 11px;
+  color: var(--t3);
+  margin-top: 3px;
+}
+
+.chevWrap {
+  flex: none;
+  display: grid;
+  place-items: center;
+  color: var(--t3);
+  transition: transform 150ms ease;
+}
+.chevOpen {
+  transform: rotate(90deg);
+}
+.chev {
+  width: 10px;
+  height: 10px;
+  fill: none;
+  stroke: currentColor;
+  stroke-width: 1.6;
+  stroke-linecap: round;
+  stroke-linejoin: round;
+}
+
+.famBody {
+  border-top: 1px solid var(--hair-soft);
+}
+
+/* ── Model row ────────────────────────────────────────────────────────────── */
+
+.row {
+  padding: 0 4px;
+}
+.row + .row {
+  box-shadow: 0 -1px 0 var(--hair-soft);
+}
+
+.rowMain {
+  display: flex;
+  align-items: flex-start;
+  gap: 14px;
+  padding: 12px 11px;
+}
+
+.mid {
+  flex: 1;
+  min-width: 0;
+}
+
+.name {
+  display: flex;
+  align-items: center;
+  gap: 9px;
+  flex-wrap: wrap;
+  font-weight: 560;
+  font-size: 13px;
+  color: var(--t1);
+}
+
+.recommended {
+  font-size: 10px;
+  font-weight: 640;
+  letter-spacing: 0.04em;
+  text-transform: uppercase;
+  color: var(--accent);
+  background: var(--accent-soft);
+  padding: 2px 7px;
+  border-radius: var(--radius-pill);
+}
+.recommended::before {
+  content: '★ ';
+}
+
+/* Calm capability pills: one cohesive chrome, a small colour dot the only
+ * accent, matching the Library pane's vocabulary. */
+.pills {
+  display: flex;
+  gap: 6px;
+  flex-wrap: wrap;
+  margin-top: 8px;
+}
+.pill {
+  display: inline-flex;
+  align-items: center;
+  gap: 5px;
+  font-size: 10.5px;
+  font-weight: 540;
+  padding: 2px 8px;
+  border-radius: var(--radius-pill);
+  color: var(--t2);
+  background: rgba(255, 255, 255, 0.05);
+}
+.pill::before {
+  content: '';
+  width: 5px;
+  height: 5px;
+  border-radius: 50%;
+}
+.pillText::before {
+  background: var(--t3);
+}
+.pillVision::before {
+  background: var(--vis);
+}
+.pillThinking::before {
+  background: var(--rea);
+}
+
+.meta {
+  font-size: 11px;
+  color: var(--t3);
+  margin-top: 8px;
+  font-variant-numeric: tabular-nums;
+}
+
+.hfLink {
+  border: none;
+  background: transparent;
+  padding: 0;
+  font-family: inherit;
+  font-size: 11px;
+  color: var(--t3);
+  cursor: pointer;
+  transition: color 140ms ease;
+}
+.hfLink:hover {
+  color: var(--accent);
+}
+
+.right {
+  display: flex;
+  flex-direction: column;
+  align-items: flex-end;
+  gap: 10px;
+  flex: none;
+}
+
+/* RAM-fit hint: a coloured dot + label reusing the shared fit palette. */
+.fit {
+  display: inline-flex;
+  align-items: center;
+  gap: 5px;
+  font-size: 11px;
+  font-weight: 560;
+  white-space: nowrap;
+}
+.fit::before {
+  content: '';
+  width: 6px;
+  height: 6px;
+  border-radius: 50%;
+  background: currentColor;
+}
+.fitOk {
+  color: var(--ok);
+}
+.fitTight {
+  color: var(--tight);
+}
+.fitHeavy {
+  color: var(--heavy);
+}
+
+/* ── Row actions ──────────────────────────────────────────────────────────── */
+
+.getBtn {
+  flex: none;
+  font-size: 11.5px;
+  font-weight: 540;
+  color: var(--accent);
+  border: 1px solid rgba(255, 141, 92, 0.4);
+  border-radius: var(--radius-control);
+  padding: 6px 14px;
+  background: transparent;
+  font-family: inherit;
+  cursor: pointer;
+  transition:
+    border-color 140ms ease,
+    background 140ms ease;
+}
+.getBtn:hover {
+  border-color: rgba(255, 141, 92, 0.6);
+  background: var(--accent-soft);
+}
+.getPrimary {
+  color: #16110d;
+  border-color: transparent;
+  background: var(--accent);
+}
+.getPrimary:hover {
+  background: var(--accent);
+  filter: brightness(1.05);
+}
+
+.installed {
+  display: inline-flex;
+  align-items: center;
+  font-size: 11.5px;
+  font-weight: 600;
+  color: var(--ok);
+}
+
+.resumeWrap {
+  display: flex;
+  flex-direction: column;
+  align-items: flex-end;
+  gap: 6px;
+}
+.resumeBtn {
+  font-size: 11.5px;
+  font-weight: 540;
+  color: var(--accent);
+  border: 1px solid rgba(255, 141, 92, 0.4);
+  border-radius: var(--radius-control);
+  padding: 6px 12px;
+  background: transparent;
+  font-family: inherit;
+  cursor: pointer;
+}
+.discardBtn {
+  font-size: 10.5px;
+  font-weight: 540;
+  color: var(--t3);
+  border: none;
+  background: transparent;
+  font-family: inherit;
+  cursor: pointer;
+}
+.discardBtn:hover {
+  color: var(--t2);
+}
+
+.progress {
+  padding: 0 11px 13px;
+}
diff --git a/src/settings/tabs/models/StaffPicksPane.test.tsx b/src/settings/tabs/models/StaffPicksPane.test.tsx
new file mode 100644
index 00000000..f040d4f2
--- /dev/null
+++ b/src/settings/tabs/models/StaffPicksPane.test.tsx
@@ -0,0 +1,446 @@
+/**
+ * Unit tests for the Staff-picks pane: Discover's curated front door.
+ *
+ * Covers the family accordion (grouping, default-expanded recommended family,
+ * expand/collapse), the model rows (name, recommended star, capability pills,
+ * quant/size/license meta, RAM-fit hint), and the verified starter download
+ * flow (download -> progress -> ready lifts config + refreshes; installed;
+ * resume/discard of a partial; failure). The download channel is captured the
+ * same way BrowseAllPane.test.tsx does it: `onEvent` is grabbed off the invoke
+ * args and driven with `simulateMessage`.
+ */
+
+import {
+  act,
+  fireEvent,
+  render,
+  screen,
+  waitFor,
+  within,
+} from '@testing-library/react';
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+
+import { invoke } from '@tauri-apps/api/core';
+
+import { StaffPicksPane } from './StaffPicksPane';
+import type { RawAppConfig } from '../../types';
+import type { Starter, StarterOption } from '../../../types/starter';
+
+const invokeMock = invoke as unknown as ReturnType<typeof vi.fn>;
+
+type MockChannel = { simulateMessage: (msg: unknown) => void };
+let lastChannel: MockChannel | null = null;
+
+/** Marks a command response as a rejection in `mockCommands`. */
+class Reject {
+  constructor(public readonly value: unknown) {}
+}
+
+function mockCommands(responses: Record<string, unknown>) {
+  invokeMock.mockImplementation(
+    async (cmd: string, args?: Record<string, unknown>) => {
+      if (args && 'onEvent' in args) {
+        lastChannel = args.onEvent as unknown as MockChannel;
+      }
+      if (Object.prototype.hasOwnProperty.call(responses, cmd)) {
+        const v = responses[cmd];
+        if (v instanceof Reject) throw v.value;
+        if (typeof v === 'function') {
+          return (v as (a?: Record<string, unknown>) => unknown)(args);
+        }
+        return v;
+      }
+      return undefined;
+    },
+  );
+}
+
+function starter(over: Partial<Starter>): Starter {
+  return {
+    tier: 'balanced',
+    family: 'Gemma',
+    display_name: 'Gemma 4 12B',
+    repo: 'google/gemma-4-12B-it-qat-q4_0-gguf',
+    revision: 'a'.repeat(40),
+    file_name: 'gemma-4-12b-it-qat-q4_0.gguf',
+    sha256: 'b'.repeat(64),
+    size_bytes: 6_975_877_728,
+    quant: 'Q4_0',
+    vision: true,
+    thinking: false,
+    reasoning_always: false,
+    mmproj_file: 'mmproj.gguf',
+    mmproj_sha256: 'c'.repeat(64),
+    mmproj_bytes: 175_115_264,
+    est_runtime_gb: 9.5,
+    license_note: 'Apache 2.0',
+    origin: 'Google',
+    origin_repo: 'google/gemma-4-12B-it',
+    ...over,
+  };
+}
+
+function option(over: Partial<Starter>, opts: Partial<StarterOption> = {}): StarterOption {
+  return {
+    starter: starter(over),
+    fit: 'fits',
+    installed: false,
+    partial_bytes: null,
+    ...opts,
+  };
+}
+
+/** Three single-model families, mirroring the shipped registry. */
+const QWEN = option({
+  tier: 'fast',
+  family: 'Qwen',
+  display_name: 'Qwen3.5 9B',
+  repo: 'unsloth/Qwen3.5-9B-GGUF',
+  file_name: 'Qwen3.5-9B-Q4_K_M.gguf',
+  quant: 'Q4_K_M',
+  vision: true,
+  thinking: true,
+  origin: 'Alibaba',
+});
+const GEMMA = option({});
+const GPT_OSS = option(
+  {
+    tier: 'smartest',
+    family: 'gpt-oss',
+    display_name: 'gpt-oss 20B',
+    repo: 'ggml-org/gpt-oss-20b-GGUF',
+    file_name: 'gpt-oss-20b-mxfp4.gguf',
+    quant: 'MXFP4',
+    vision: false,
+    thinking: true,
+    reasoning_always: true,
+    mmproj_file: null,
+    mmproj_sha256: null,
+    mmproj_bytes: 0,
+    origin: 'OpenAI',
+  },
+);
+
+const STARTERS: StarterOption[] = [QWEN, GEMMA, GPT_OSS];
+
+const CONFIG_AFTER_INSTALL = { marker: 'fresh' } as unknown as RawAppConfig;
+
+function picksResponses(overrides: Record<string, unknown> = {}) {
+  return {
+    get_starter_options: STARTERS,
+    get_config: CONFIG_AFTER_INSTALL,
+    ...overrides,
+  };
+}
+
+async function flush() {
+  await act(async () => {
+    await Promise.resolve();
+    await Promise.resolve();
+  });
+}
+
+beforeEach(() => {
+  invokeMock.mockReset();
+  lastChannel = null;
+});
+
+async function renderPane(
+  onSaved: (next: RawAppConfig) => void = () => {},
+  overrides: Record<string, unknown> = {},
+) {
+  mockCommands(picksResponses(overrides));
+  const view = render(<StaffPicksPane onSaved={onSaved} />);
+  await waitFor(() =>
+    expect(invokeMock).toHaveBeenCalledWith('get_starter_options'),
+  );
+  await flush();
+  return view;
+}
+
+/** The accordion header button for a family. */
+function familyHeader(name: string): HTMLElement {
+  return screen.getByRole('button', { name: new RegExp(`^${name}`) });
+}
+
+describe('StaffPicksPane', () => {
+  it('renders a section per family with its name', async () => {
+    await renderPane();
+    expect(familyHeader('Qwen')).toBeInTheDocument();
+    expect(familyHeader('Gemma')).toBeInTheDocument();
+    expect(familyHeader('gpt-oss')).toBeInTheDocument();
+  });
+
+  it('expands the recommended family by default and collapses the rest', async () => {
+    await renderPane();
+    // Gemma holds the balanced (recommended) tier, so its model row is shown.
+    expect(screen.getByText('Gemma 4 12B')).toBeInTheDocument();
+    // The other families start collapsed.
+    expect(screen.queryByText('Qwen3.5 9B')).not.toBeInTheDocument();
+    expect(screen.queryByText('gpt-oss 20B')).not.toBeInTheDocument();
+  });
+
+  it('expands a collapsed family on click and collapses it again', async () => {
+    await renderPane();
+    fireEvent.click(familyHeader('Qwen'));
+    expect(screen.getByText('Qwen3.5 9B')).toBeInTheDocument();
+    fireEvent.click(familyHeader('Qwen'));
+    expect(screen.queryByText('Qwen3.5 9B')).not.toBeInTheDocument();
+  });
+
+  it('marks the recommended model and shows its meta and pills', async () => {
+    await renderPane();
+    const row = screen
+      .getByText('Gemma 4 12B')
+      .closest('[data-model-row]') as HTMLElement;
+    expect(within(row).getByText('Recommended')).toBeInTheDocument();
+    expect(within(row).getByText('Text')).toBeInTheDocument();
+    expect(within(row).getByText('Vision')).toBeInTheDocument();
+    expect(within(row).queryByText('Thinking')).not.toBeInTheDocument();
+    expect(within(row).getByText(/Q4_0/)).toBeInTheDocument();
+    expect(within(row).getByText(/7\.2 GB/)).toBeInTheDocument();
+    expect(within(row).getByText('Comfortable')).toBeInTheDocument();
+  });
+
+  it('shows a Thinking pill on a thinking-capable model', async () => {
+    await renderPane();
+    fireEvent.click(familyHeader('Qwen'));
+    const row = screen
+      .getByText('Qwen3.5 9B')
+      .closest('[data-model-row]') as HTMLElement;
+    expect(within(row).getByText('Thinking')).toBeInTheDocument();
+    expect(within(row).getByText('Vision')).toBeInTheDocument();
+  });
+
+  it('omits the Vision pill on a text-only model', async () => {
+    await renderPane();
+    fireEvent.click(familyHeader('gpt-oss'));
+    const row = screen
+      .getByText('gpt-oss 20B')
+      .closest('[data-model-row]') as HTMLElement;
+    expect(within(row).getByText('Text')).toBeInTheDocument();
+    expect(within(row).getByText('Thinking')).toBeInTheDocument();
+    expect(within(row).queryByText('Vision')).not.toBeInTheDocument();
+  });
+
+  it('downloads a model through the verified starter path', async () => {
+    await renderPane();
+    const row = screen
+      .getByText('Gemma 4 12B')
+      .closest('[data-model-row]') as HTMLElement;
+    fireEvent.click(within(row).getByRole('button', { name: 'Download' }));
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith(
+      'download_starter',
+      expect.objectContaining({ tier: 'balanced' }),
+    );
+  });
+
+  it('lifts a fresh config and refreshes when a download completes', async () => {
+    const onSaved = vi.fn();
+    await renderPane(onSaved);
+    const row = screen
+      .getByText('Gemma 4 12B')
+      .closest('[data-model-row]') as HTMLElement;
+    fireEvent.click(within(row).getByRole('button', { name: 'Download' }));
+    await flush();
+    expect(screen.getByText('Downloading model')).toBeInTheDocument();
+    act(() => {
+      lastChannel?.simulateMessage({ type: 'AllDone' });
+    });
+    await flush();
+    expect(onSaved).toHaveBeenCalledWith(CONFIG_AFTER_INSTALL);
+  });
+
+  it('leaves the lift to a later resync when get_config fails post-download', async () => {
+    const onSaved = vi.fn();
+    await renderPane(onSaved, {
+      get_config: new Reject(new Error('read failed')),
+    });
+    const row = screen
+      .getByText('Gemma 4 12B')
+      .closest('[data-model-row]') as HTMLElement;
+    fireEvent.click(within(row).getByRole('button', { name: 'Download' }));
+    await flush();
+    act(() => {
+      lastChannel?.simulateMessage({ type: 'AllDone' });
+    });
+    await flush();
+    expect(onSaved).not.toHaveBeenCalled();
+  });
+
+  it('cancels an in-flight download', async () => {
+    await renderPane();
+    const row = screen
+      .getByText('Gemma 4 12B')
+      .closest('[data-model-row]') as HTMLElement;
+    fireEvent.click(within(row).getByRole('button', { name: 'Download' }));
+    await flush();
+    fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('cancel_model_download');
+  });
+
+  it('retries after a failed download', async () => {
+    await renderPane();
+    const row = screen
+      .getByText('Gemma 4 12B')
+      .closest('[data-model-row]') as HTMLElement;
+    fireEvent.click(within(row).getByRole('button', { name: 'Download' }));
+    await flush();
+    act(() => {
+      lastChannel?.simulateMessage({
+        type: 'Failed',
+        data: { kind: 'other', message: 'boom' },
+      });
+    });
+    expect(screen.getByText('boom')).toBeInTheDocument();
+    fireEvent.click(screen.getByRole('button', { name: 'Retry' }));
+    await flush();
+    const starts = invokeMock.mock.calls.filter(
+      (c: unknown[]) => c[0] === 'download_starter',
+    );
+    expect(starts).toHaveLength(2);
+  });
+
+  it('returns to the row from a terminal failure via Choose a different model', async () => {
+    await renderPane();
+    const row = screen
+      .getByText('Gemma 4 12B')
+      .closest('[data-model-row]') as HTMLElement;
+    fireEvent.click(within(row).getByRole('button', { name: 'Download' }));
+    await flush();
+    act(() => {
+      lastChannel?.simulateMessage({
+        type: 'Failed',
+        data: { kind: 'disk_full', message: 'no space' },
+      });
+    });
+    fireEvent.click(
+      screen.getByRole('button', { name: 'Choose a different model' }),
+    );
+    expect(
+      screen.getByRole('button', { name: 'Download' }),
+    ).toBeInTheDocument();
+  });
+
+  it('shows Installed instead of a download button', async () => {
+    await renderPane(() => {}, {
+      get_starter_options: [{ ...GEMMA, installed: true }, QWEN, GPT_OSS],
+    });
+    const row = screen
+      .getByText('Gemma 4 12B')
+      .closest('[data-model-row]') as HTMLElement;
+    expect(within(row).getByText('Installed')).toBeInTheDocument();
+    expect(
+      within(row).queryByRole('button', { name: 'Download' }),
+    ).not.toBeInTheDocument();
+  });
+
+  it('offers Resume and Discard for an interrupted partial', async () => {
+    await renderPane(() => {}, {
+      get_starter_options: [
+        { ...GEMMA, partial_bytes: 2_000_000_000 },
+        QWEN,
+        GPT_OSS,
+      ],
+    });
+    const row = screen
+      .getByText('Gemma 4 12B')
+      .closest('[data-model-row]') as HTMLElement;
+    fireEvent.click(within(row).getByRole('button', { name: /Resume/ }));
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith(
+      'download_starter',
+      expect.objectContaining({ tier: 'balanced' }),
+    );
+  });
+
+  it('discards an interrupted partial and refreshes', async () => {
+    await renderPane(() => {}, {
+      get_starter_options: [
+        { ...GEMMA, partial_bytes: 2_000_000_000 },
+        QWEN,
+        GPT_OSS,
+      ],
+    });
+    const row = screen
+      .getByText('Gemma 4 12B')
+      .closest('[data-model-row]') as HTMLElement;
+    fireEvent.click(within(row).getByRole('button', { name: 'Discard' }));
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('discard_partial_download', {
+      sha256: 'b'.repeat(64),
+    });
+  });
+
+  it('opens the model on Hugging Face from its provenance link', async () => {
+    await renderPane();
+    const row = screen
+      .getByText('Gemma 4 12B')
+      .closest('[data-model-row]') as HTMLElement;
+    fireEvent.click(
+      within(row).getByRole('button', { name: /Hugging Face/ }),
+    );
+    expect(invokeMock).toHaveBeenCalledWith('open_url', {
+      url: 'https://huggingface.co/google/gemma-4-12B-it-qat-q4_0-gguf',
+    });
+  });
+
+  it('shows an empty state when no starters are available', async () => {
+    await renderPane(() => {}, { get_starter_options: [] });
+    expect(screen.getByText(/No curated models/)).toBeInTheDocument();
+  });
+
+  it('degrades to the empty state when the probe rejects', async () => {
+    await renderPane(() => {}, {
+      get_starter_options: new Reject(new Error('probe failed')),
+    });
+    expect(screen.getByText(/No curated models/)).toBeInTheDocument();
+  });
+
+  it('groups several sizes of one family under a single section', async () => {
+    const gemma4b = option({
+      tier: 'fast',
+      family: 'Gemma',
+      display_name: 'Gemma 4 4B',
+      file_name: 'gemma-4-4b.gguf',
+    });
+    const gemma12b = option({}); // balanced Gemma 4 12B
+    await renderPane(() => {}, {
+      get_starter_options: [gemma4b, gemma12b],
+    });
+    // One Gemma section (it holds the recommended tier, so it is open) lists
+    // both sizes, and the header counts them.
+    const header = familyHeader('Gemma');
+    expect(header).toHaveTextContent('2 models');
+    expect(screen.getByText('Gemma 4 4B')).toBeInTheDocument();
+    expect(screen.getByText('Gemma 4 12B')).toBeInTheDocument();
+  });
+
+  it('falls back to the maker blurb and display name for an unlabelled family', async () => {
+    const orphan = option({
+      tier: 'balanced',
+      family: undefined,
+      display_name: 'Mystery 7B',
+      origin: 'Acme',
+    });
+    await renderPane(() => {}, { get_starter_options: [orphan] });
+    // No family label: the section is keyed by the model name and its blurb
+    // falls back to the maker.
+    const header = familyHeader('Mystery 7B');
+    expect(header).toHaveTextContent('Acme');
+  });
+
+  it('falls back to expanding the first family when none is recommended', async () => {
+    // A catalog with no balanced tier: the first family expands so the pane is
+    // never fully collapsed.
+    const fastOnly = option({
+      tier: 'fast',
+      family: 'Qwen',
+      display_name: 'Qwen3.5 9B',
+    });
+    await renderPane(() => {}, { get_starter_options: [fastOnly] });
+    expect(screen.getByText('Qwen3.5 9B')).toBeInTheDocument();
+  });
+});
diff --git a/src/settings/tabs/models/StaffPicksPane.tsx b/src/settings/tabs/models/StaffPicksPane.tsx
new file mode 100644
index 00000000..8bdc2149
--- /dev/null
+++ b/src/settings/tabs/models/StaffPicksPane.tsx
@@ -0,0 +1,420 @@
+/**
+ * Staff-picks pane: the curated front door of Discover.
+ *
+ * Thuki hand-picks a short catalog of models, grouped by family. Each family is
+ * a collapsible accordion section; the one holding the recommended pick is open
+ * by default. A model row shows its friendly name, the one quant Thuki chose for
+ * it, size, capability pills (Text always, plus Vision / Thinking), a RAM-fit
+ * hint, and a single Download that runs the VERIFIED starter path
+ * (`download_starter`, pinned revision + sha256), unlike the Browse-all pane's
+ * arbitrary repo downloads. A finished install lifts a fresh config snapshot.
+ *
+ * Data comes from {@link useStarterOptions} (the same rows onboarding's picker
+ * uses); the download state machine is the shared {@link useDownloadModel}, so
+ * the in-flight / failed UI is the same {@link DownloadProgress} card the rest
+ * of the app shows. At most one model downloads at a time (the backend enforces
+ * it too); `activeTier` tracks which row owns the progress card.
+ */
+
+import { useEffect, useMemo, useRef, useState } from 'react';
+import { invoke } from '@tauri-apps/api/core';
+
+import { DownloadProgress } from '../../../components/DownloadProgress';
+import { useDownloadModel } from '../../../hooks/useDownloadModel';
+import { useStarterOptions } from '../../../components/StarterPicker';
+import { Tooltip } from '../../../components/Tooltip';
+import { RAM_FIT_LABEL, RAM_FIT_TOOLTIP } from '../../../utils/ramFit';
+import styles from './StaffPicksPane.module.css';
+import type { RawAppConfig } from '../../types';
+import type { RamFit, StarterOption, StarterTier } from '../../../types/starter';
+
+const HF_BASE_URL = 'https://huggingface.co';
+
+/** The tier marked as the recommended pick (and whose family opens by default). */
+const RECOMMENDED_TIER: StarterTier = 'balanced';
+
+/** RAM-fit hint colour class on this pane's stylesheet (labels are shared). */
+const FIT_CLASS: Record<RamFit, string> = {
+  fits: styles.fitOk,
+  tight: styles.fitTight,
+  too_big: styles.fitHeavy,
+};
+
+/** A plain-language line about what a family is good for, falling back to the
+ * model maker when a family has no hand-written blurb. Presentational only. */
+const FAMILY_BLURB: Record<string, string> = {
+  Qwen: 'Fast, capable all-rounder',
+  Gemma: 'Well-rounded, reads images',
+  'gpt-oss': 'Strongest reasoning',
+};
+
+/** Bytes rendered as decimal gigabytes with one decimal (e.g. "8.2"). */
+function gb(bytes: number): string {
+  return (bytes / 1e9).toFixed(1);
+}
+
+/** Weights + vision companion: the full on-disk cost of one starter. */
+function totalBytes(o: StarterOption): number {
+  return o.starter.size_bytes + o.starter.mmproj_bytes;
+}
+
+/** One family group: its label and the curated models under it, registry order. */
+interface FamilyGroup {
+  family: string;
+  blurb: string;
+  options: StarterOption[];
+}
+
+/** Groups starter rows by family, preserving first-seen (registry) order. */
+function groupByFamily(options: StarterOption[]): FamilyGroup[] {
+  const groups: FamilyGroup[] = [];
+  for (const o of options) {
+    const family = o.starter.family ?? o.starter.display_name;
+    const existing = groups.find((g) => g.family === family);
+    if (existing) {
+      existing.options.push(o);
+    } else {
+      groups.push({
+        family,
+        blurb: FAMILY_BLURB[family] ?? o.starter.origin,
+        options: [o],
+      });
+    }
+  }
+  return groups;
+}
+
+const CHEVRON = (
+  <svg viewBox="0 0 10 10" aria-hidden="true" className={styles.chev}>
+    <path d="M3 2l4 3-4 3" />
+  </svg>
+);
+
+interface StaffPicksPaneProps {
+  /** Lift a fresh config snapshot after a successful install. */
+  onSaved: (next: RawAppConfig) => void;
+}
+
+export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
+  const { options, refresh } = useStarterOptions();
+  const groups = useMemo(() => groupByFamily(options ?? []), [options]);
+
+  // The family holding the recommended tier opens by default; if the catalog
+  // has no recommended tier, the first family opens so the pane is never blank.
+  const defaultOpen = useMemo(() => {
+    const recommended = groups.find((g) =>
+      g.options.some((o) => o.starter.tier === RECOMMENDED_TIER),
+    );
+    const pick = recommended ?? groups[0];
+    return new Set(pick ? [pick.family] : []);
+  }, [groups]);
+
+  // Seed the open set ONCE, when the catalog first resolves (the mount fetch
+  // arrives after the initial empty render). A later refresh must not collapse
+  // families the user opened, so this never re-seeds.
+  const [open, setOpen] = useState<Set<string>>(new Set());
+  const seededRef = useRef(false);
+  useEffect(() => {
+    if (seededRef.current || groups.length === 0) return;
+    seededRef.current = true;
+    setOpen(defaultOpen);
+  }, [groups, defaultOpen]);
+
+  // One download at a time; activeTier names the row that owns the progress card.
+  const [activeTier, setActiveTier] = useState<StarterTier | null>(null);
+  const {
+    state,
+    progress,
+    etaSeconds,
+    start,
+    resume,
+    cancel,
+    retry,
+    reset,
+    discard,
+  } = useDownloadModel();
+
+  // A finished install (phase 'ready') lifts the fresh config, clears the
+  // active row, and refreshes the rows so the new model flips to Installed.
+  // An effect (not a render-time call) so it fires exactly once per transition.
+  useEffect(() => {
+    if (state.phase !== 'ready') return;
+    void (async () => {
+      try {
+        onSaved(await invoke<RawAppConfig>('get_config'));
+      } catch {
+        // The focus-driven resync picks the change up on next activation.
+      }
+      reset();
+      setActiveTier(null);
+      await refresh();
+    })();
+  }, [state.phase, onSaved, reset, refresh]);
+
+  function toggle(family: string) {
+    setOpen((cur) => {
+      const next = new Set(cur);
+      if (next.has(family)) {
+        next.delete(family);
+      } else {
+        next.add(family);
+      }
+      return next;
+    });
+  }
+
+  function startDownload(tier: StarterTier) {
+    setActiveTier(tier);
+    void start(tier);
+  }
+
+  function resumeDownload(tier: StarterTier) {
+    setActiveTier(tier);
+    void resume(tier);
+  }
+
+  async function discardPartial(sha256: string) {
+    await discard(sha256);
+    await refresh();
+  }
+
+  function returnToPicker() {
+    reset();
+    setActiveTier(null);
+  }
+
+  if (options !== null && groups.length === 0) {
+    return (
+      <div className={styles.pane}>
+        <p className={styles.empty}>No curated models are available.</p>
+      </div>
+    );
+  }
+
+  return (
+    <div className={styles.pane}>
+      <p className={styles.hint}>
+        Hand-picked by Thuki, grouped by family. Open a family to choose a size.
+      </p>
+      <div className={styles.list}>
+        {groups.map((group) => {
+          const expanded = open.has(group.family);
+          return (
+            <div className={styles.fam} key={group.family}>
+              <button
+                type="button"
+                className={styles.famHead}
+                aria-expanded={expanded}
+                onClick={() => toggle(group.family)}
+              >
+                <span className={styles.famText}>
+                  <span className={styles.famName}>{group.family}</span>
+                  <span className={styles.famSub}>
+                    {group.blurb} · {group.options.length}{' '}
+                    {group.options.length === 1 ? 'model' : 'models'}
+                  </span>
+                </span>
+                <span
+                  className={`${styles.chevWrap} ${expanded ? styles.chevOpen : ''}`}
+                >
+                  {CHEVRON}
+                </span>
+              </button>
+              {expanded ? (
+                <div className={styles.famBody}>
+                  {group.options.map((o) => (
+                    <ModelRow
+                      key={o.starter.tier}
+                      option={o}
+                      recommended={o.starter.tier === RECOMMENDED_TIER}
+                      active={activeTier === o.starter.tier}
+                      state={state}
+                      progress={progress}
+                      etaSeconds={etaSeconds}
+                      onDownload={startDownload}
+                      onResume={resumeDownload}
+                      onDiscard={discardPartial}
+                      onCancel={() => void cancel()}
+                      onRetry={() => void retry()}
+                      onChooseAnother={returnToPicker}
+                    />
+                  ))}
+                </div>
+              ) : null}
+            </div>
+          );
+        })}
+      </div>
+    </div>
+  );
+}
+
+interface ModelRowProps {
+  option: StarterOption;
+  recommended: boolean;
+  active: boolean;
+  state: ReturnType<typeof useDownloadModel>['state'];
+  progress: ReturnType<typeof useDownloadModel>['progress'];
+  etaSeconds: number | null;
+  onDownload: (tier: StarterTier) => void;
+  onResume: (tier: StarterTier) => void;
+  onDiscard: (sha256: string) => void;
+  onCancel: () => void;
+  onRetry: () => void;
+  onChooseAnother: () => void;
+}
+
+function ModelRow({
+  option,
+  recommended,
+  active,
+  state,
+  progress,
+  etaSeconds,
+  onDownload,
+  onResume,
+  onDiscard,
+  onCancel,
+  onRetry,
+  onChooseAnother,
+}: ModelRowProps) {
+  const { starter, fit, installed, partial_bytes } = option;
+  const showProgress = active && state.phase !== 'idle';
+
+  return (
+    <div className={styles.row} data-model-row data-tier={starter.tier}>
+      <div className={styles.rowMain}>
+        <div className={styles.mid}>
+          <div className={styles.name}>
+            {starter.display_name}
+            {recommended ? (
+              <span className={styles.recommended}>Recommended</span>
+            ) : null}
+          </div>
+          <div className={styles.pills}>
+            <span className={`${styles.pill} ${styles.pillText}`}>Text</span>
+            {starter.vision ? (
+              <span className={`${styles.pill} ${styles.pillVision}`}>
+                Vision
+              </span>
+            ) : null}
+            {starter.thinking ? (
+              <span className={`${styles.pill} ${styles.pillThinking}`}>
+                Thinking
+              </span>
+            ) : null}
+          </div>
+          <div className={styles.meta}>
+            {starter.quant} · {gb(totalBytes(option))} GB ·{' '}
+            <button
+              type="button"
+              className={styles.hfLink}
+              onClick={() =>
+                void invoke('open_url', {
+                  url: `${HF_BASE_URL}/${starter.repo}`,
+                })
+              }
+              aria-label={`View ${starter.display_name} on Hugging Face`}
+            >
+              {starter.license_note} ↗
+            </button>
+          </div>
+        </div>
+        {!showProgress ? (
+          <div className={styles.right}>
+            <Tooltip label={RAM_FIT_TOOLTIP[fit]} multiline placement="top">
+              <span className={`${styles.fit} ${FIT_CLASS[fit]}`}>
+                {RAM_FIT_LABEL[fit]}
+              </span>
+            </Tooltip>
+            <RowAction
+              option={option}
+              recommended={recommended}
+              installed={installed}
+              partialBytes={partial_bytes}
+              onDownload={onDownload}
+              onResume={onResume}
+              onDiscard={onDiscard}
+            />
+          </div>
+        ) : null}
+      </div>
+      {showProgress ? (
+        <div className={styles.progress}>
+          <DownloadProgress
+            state={state}
+            progress={progress}
+            etaSeconds={etaSeconds}
+            // The curated path has no pre-flight confirm card, so onConfirm /
+            // onCancelConfirm never fire; they point at the same covered
+            // handlers rather than dead no-op literals.
+            onConfirm={onChooseAnother}
+            onCancelConfirm={onChooseAnother}
+            onCancel={onCancel}
+            onRetry={onRetry}
+            onChooseAnother={onChooseAnother}
+          />
+        </div>
+      ) : null}
+    </div>
+  );
+}
+
+interface RowActionProps {
+  option: StarterOption;
+  recommended: boolean;
+  installed: boolean;
+  partialBytes: number | null;
+  onDownload: (tier: StarterTier) => void;
+  onResume: (tier: StarterTier) => void;
+  onDiscard: (sha256: string) => void;
+}
+
+/** The per-row affordance: an installed marker, a resume/discard pair when an
+ * interrupted partial exists, or the plain download button. */
+function RowAction({
+  option,
+  recommended,
+  installed,
+  partialBytes,
+  onDownload,
+  onResume,
+  onDiscard,
+}: RowActionProps) {
+  const { starter } = option;
+
+  if (installed) {
+    return <span className={styles.installed}>Installed</span>;
+  }
+
+  if (partialBytes !== null) {
+    return (
+      <span className={styles.resumeWrap}>
+        <button
+          type="button"
+          className={styles.resumeBtn}
+          onClick={() => onResume(starter.tier)}
+        >
+          Resume ({gb(partialBytes)} GB)
+        </button>
+        <button
+          type="button"
+          className={styles.discardBtn}
+          onClick={() => onDiscard(starter.sha256)}
+        >
+          Discard
+        </button>
+      </span>
+    );
+  }
+
+  return (
+    <button
+      type="button"
+      className={`${styles.getBtn} ${recommended ? styles.getPrimary : ''}`}
+      onClick={() => onDownload(starter.tier)}
+    >
+      Download
+    </button>
+  );
+}

From ee1340123f57eccfcf2f6ce993bd598c7466b9db Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 21:35:40 -0500
Subject: [PATCH 31/89] feat: front Discover with a Staff picks and Browse all
 pathway toggle

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/components/StarterPicker.tsx              |   5 +-
 .../__tests__/StarterPicker.test.tsx          |   7 +
 src/settings/tabs/ModelTab.tsx                |   4 +-
 .../tabs/models/DiscoverPane.module.css       |  68 ++++++++++
 .../tabs/models/DiscoverPane.test.tsx         | 123 ++++++++++++++++++
 src/settings/tabs/models/DiscoverPane.tsx     |  92 +++++++++++++
 6 files changed, 296 insertions(+), 3 deletions(-)
 create mode 100644 src/settings/tabs/models/DiscoverPane.module.css
 create mode 100644 src/settings/tabs/models/DiscoverPane.test.tsx
 create mode 100644 src/settings/tabs/models/DiscoverPane.tsx

diff --git a/src/components/StarterPicker.tsx b/src/components/StarterPicker.tsx
index 5fe3cf1c..cd4d5a32 100644
--- a/src/components/StarterPicker.tsx
+++ b/src/components/StarterPicker.tsx
@@ -61,7 +61,10 @@ export function useStarterOptions(): UseStarterOptionsResult {
 
   const refresh = useCallback(async () => {
     try {
-      setOptions(await invoke<StarterOption[]>('get_starter_options'));
+      const rows = await invoke<StarterOption[]>('get_starter_options');
+      // Guard the IPC boundary: a malformed (non-array) payload becomes an
+      // empty list so consumers that iterate the rows never crash.
+      setOptions(Array.isArray(rows) ? rows : []);
     } catch {
       setOptions([]);
     }
diff --git a/src/components/__tests__/StarterPicker.test.tsx b/src/components/__tests__/StarterPicker.test.tsx
index 657994f8..83a73350 100644
--- a/src/components/__tests__/StarterPicker.test.tsx
+++ b/src/components/__tests__/StarterPicker.test.tsx
@@ -273,6 +273,13 @@ describe('useStarterOptions', () => {
     expect(result.current.options).toEqual([]);
   });
 
+  it('coerces a malformed non-array payload to an empty list', async () => {
+    invoke.mockResolvedValueOnce({ not: 'an array' });
+    const { result } = renderHook(() => useStarterOptions());
+    await act(async () => {});
+    expect(result.current.options).toEqual([]);
+  });
+
   it('re-fetches on refresh', async () => {
     invoke.mockResolvedValueOnce([]);
     const { result } = renderHook(() => useStarterOptions());
diff --git a/src/settings/tabs/ModelTab.tsx b/src/settings/tabs/ModelTab.tsx
index b425ddf9..aa1fa9fe 100644
--- a/src/settings/tabs/ModelTab.tsx
+++ b/src/settings/tabs/ModelTab.tsx
@@ -12,7 +12,7 @@ import { useState } from 'react';
 import { ModelsSegmented, type ModelsSubview } from './models/ModelsSegmented';
 import { ProvidersPane } from './models/ProvidersPane';
 import { LibraryPane } from './models/LibraryPane';
-import { BrowseAllPane } from './models/BrowseAllPane';
+import { DiscoverPane } from './models/DiscoverPane';
 import styles from '../../styles/settings.module.css';
 import type { RawAppConfig } from '../types';
 
@@ -42,7 +42,7 @@ export function ModelTab({ config, resyncToken, onSaved }: ModelTabProps) {
         />
       ) : null}
 
-      {view === 'discover' ? <BrowseAllPane onSaved={onSaved} /> : null}
+      {view === 'discover' ? <DiscoverPane onSaved={onSaved} /> : null}
 
       {view === 'providers' ? (
         <ProvidersPane
diff --git a/src/settings/tabs/models/DiscoverPane.module.css b/src/settings/tabs/models/DiscoverPane.module.css
new file mode 100644
index 00000000..37342ac2
--- /dev/null
+++ b/src/settings/tabs/models/DiscoverPane.module.css
@@ -0,0 +1,68 @@
+/*
+ * Styles for the Discover host: the pathway tab control over the Staff-picks
+ * and Browse-all panes. Premium tokens cascade from the Settings window root.
+ *
+ * The tabs are a quiet segmented pill (the active pathway gets the accent fill),
+ * deliberately smaller and lighter than the Models section nav above it so the
+ * two tab rows do not compete.
+ */
+
+.host {
+  display: flex;
+  flex-direction: column;
+  min-height: 0;
+}
+
+.tabs {
+  display: inline-flex;
+  align-self: flex-start;
+  gap: 3px;
+  padding: 3px;
+  margin-bottom: 14px;
+  border-radius: var(--radius-pill);
+  border: 1px solid var(--hair-soft);
+  background: var(--elev-1);
+}
+
+.tab {
+  display: inline-flex;
+  align-items: center;
+  gap: 7px;
+  padding: 6px 14px;
+  border: none;
+  border-radius: var(--radius-pill);
+  background: transparent;
+  color: var(--t2);
+  font-family: inherit;
+  font-size: 12px;
+  font-weight: 560;
+  cursor: pointer;
+  transition:
+    color 140ms ease,
+    background 140ms ease;
+}
+.tab svg {
+  width: 13px;
+  height: 13px;
+  fill: none;
+  stroke: currentColor;
+  stroke-width: 1.8;
+  stroke-linecap: round;
+  stroke-linejoin: round;
+}
+.tab:hover:not(.tabActive) {
+  color: var(--t1);
+}
+.tab:focus-visible {
+  outline: none;
+  box-shadow: 0 0 0 2px var(--accent-soft);
+}
+
+.tabActive {
+  color: #16110d;
+  background: var(--accent);
+}
+
+.tabLabel {
+  color: inherit;
+}
diff --git a/src/settings/tabs/models/DiscoverPane.test.tsx b/src/settings/tabs/models/DiscoverPane.test.tsx
new file mode 100644
index 00000000..66888e21
--- /dev/null
+++ b/src/settings/tabs/models/DiscoverPane.test.tsx
@@ -0,0 +1,123 @@
+/**
+ * Unit tests for the Discover host: the two-pathway tab shell that places the
+ * curated Staff-picks accordion as the default front door and the raw Hugging
+ * Face browser behind a "Browse all" advanced tab. The child panes have their
+ * own suites; here we only test the tab control and which pane it shows.
+ */
+
+import { fireEvent, render, screen, waitFor } from '@testing-library/react';
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+
+import { invoke } from '@tauri-apps/api/core';
+
+import { DiscoverPane } from './DiscoverPane';
+import { clearHfSearchCache } from './useHfSearch';
+import type { Starter, StarterOption } from '../../../types/starter';
+
+const invokeMock = invoke as unknown as ReturnType<typeof vi.fn>;
+
+const STARTER: StarterOption = {
+  starter: {
+    tier: 'balanced',
+    family: 'Gemma',
+    display_name: 'Gemma 4 12B',
+    repo: 'google/gemma',
+    revision: 'a'.repeat(40),
+    file_name: 'gemma.gguf',
+    sha256: 'b'.repeat(64),
+    size_bytes: 7_000_000_000,
+    quant: 'Q4_0',
+    vision: true,
+    thinking: false,
+    reasoning_always: false,
+    mmproj_file: null,
+    mmproj_sha256: null,
+    mmproj_bytes: 0,
+    est_runtime_gb: 9.5,
+    license_note: 'Apache 2.0',
+    origin: 'Google',
+    origin_repo: 'google/gemma',
+  } as Starter,
+  fit: 'fits',
+  installed: false,
+  partial_bytes: null,
+};
+
+beforeEach(() => {
+  invokeMock.mockReset();
+  clearHfSearchCache();
+  invokeMock.mockImplementation(async (cmd: string) => {
+    if (cmd === 'get_starter_options') return [STARTER];
+    if (cmd === 'search_hf_models') return [];
+    return undefined;
+  });
+});
+
+function renderHost() {
+  return render(<DiscoverPane onSaved={() => {}} />);
+}
+
+/** Staff picks is showing when its curated hint is on screen. */
+function staffPicksVisible(): boolean {
+  return screen.queryByText(/Hand-picked by Thuki/) !== null;
+}
+
+/** Browse all is showing when its Hugging Face search box is on screen. */
+function browseAllVisible(): boolean {
+  return screen.queryByRole('searchbox') !== null;
+}
+
+describe('DiscoverPane host', () => {
+  it('shows two pathway tabs', () => {
+    renderHost();
+    expect(
+      screen.getByRole('tab', { name: 'Staff picks' }),
+    ).toBeInTheDocument();
+    expect(screen.getByRole('tab', { name: 'Browse all' })).toBeInTheDocument();
+  });
+
+  it('defaults to the curated Staff-picks pathway', async () => {
+    renderHost();
+    await waitFor(() => expect(staffPicksVisible()).toBe(true));
+    expect(browseAllVisible()).toBe(false);
+    expect(screen.getByRole('tab', { name: 'Staff picks' })).toHaveAttribute(
+      'aria-selected',
+      'true',
+    );
+  });
+
+  it('switches to the Browse-all pathway on click and back again', async () => {
+    renderHost();
+    fireEvent.click(screen.getByRole('tab', { name: 'Browse all' }));
+    await waitFor(() => expect(browseAllVisible()).toBe(true));
+    expect(staffPicksVisible()).toBe(false);
+    expect(screen.getByRole('tab', { name: 'Browse all' })).toHaveAttribute(
+      'aria-selected',
+      'true',
+    );
+
+    fireEvent.click(screen.getByRole('tab', { name: 'Staff picks' }));
+    await waitFor(() => expect(staffPicksVisible()).toBe(true));
+    expect(browseAllVisible()).toBe(false);
+  });
+
+  it('moves between tabs with the arrow keys', async () => {
+    renderHost();
+    const staff = screen.getByRole('tab', { name: 'Staff picks' });
+    fireEvent.keyDown(staff, { key: 'ArrowRight' });
+    await waitFor(() => expect(browseAllVisible()).toBe(true));
+    const browse = screen.getByRole('tab', { name: 'Browse all' });
+    fireEvent.keyDown(browse, { key: 'ArrowLeft' });
+    await waitFor(() => expect(staffPicksVisible()).toBe(true));
+  });
+
+  it('ignores non-arrow keys', () => {
+    renderHost();
+    const staff = screen.getByRole('tab', { name: 'Staff picks' });
+    fireEvent.keyDown(staff, { key: 'Enter' });
+    expect(screen.getByRole('tab', { name: 'Staff picks' })).toHaveAttribute(
+      'aria-selected',
+      'true',
+    );
+  });
+});
diff --git a/src/settings/tabs/models/DiscoverPane.tsx b/src/settings/tabs/models/DiscoverPane.tsx
new file mode 100644
index 00000000..49ed1b22
--- /dev/null
+++ b/src/settings/tabs/models/DiscoverPane.tsx
@@ -0,0 +1,92 @@
+/**
+ * Discover host: the two-pathway shell for finding a model.
+ *
+ * A tab control switches between the curated front door and the advanced
+ * browser:
+ * - "Staff picks" ({@link StaffPicksPane}) is the default: a short catalog Thuki
+ *   hand-picks and groups by family, one chosen quant per model.
+ * - "Browse all" ({@link BrowseAllPane}) is the full Hugging Face GGUF browser,
+ *   the escape hatch for users who want anything beyond the curated set.
+ *
+ * This file only routes between the two panes; each owns its own data and
+ * download flow. The tablist mirrors the Models segmented control's roving
+ * arrow-key pattern.
+ */
+
+import { useState } from 'react';
+
+import { StaffPicksPane } from './StaffPicksPane';
+import { BrowseAllPane } from './BrowseAllPane';
+import styles from './DiscoverPane.module.css';
+import type { RawAppConfig } from '../../types';
+
+type Pathway = 'staff' | 'browse';
+
+const STAR_ICON = (
+  <svg viewBox="0 0 24 24" aria-hidden="true">
+    <path d="M12 2l2.6 6.3L21 9l-5 4.3L17.6 20 12 16.5 6.4 20 8 13.3 3 9l6.4-.7z" />
+  </svg>
+);
+const SEARCH_ICON = (
+  <svg viewBox="0 0 24 24" aria-hidden="true">
+    <circle cx="11" cy="11" r="7" />
+    <path d="m20 20-3.5-3.5" />
+  </svg>
+);
+
+const TABS: ReadonlyArray<{ id: Pathway; label: string; icon: React.ReactNode }> =
+  [
+    { id: 'staff', label: 'Staff picks', icon: STAR_ICON },
+    { id: 'browse', label: 'Browse all', icon: SEARCH_ICON },
+  ];
+
+interface DiscoverPaneProps {
+  /** Lift a fresh config snapshot after a successful install. */
+  onSaved: (next: RawAppConfig) => void;
+}
+
+export function DiscoverPane({ onSaved }: DiscoverPaneProps) {
+  const [pathway, setPathway] = useState<Pathway>('staff');
+
+  return (
+    <div className={styles.host}>
+      <div className={styles.tabs} role="tablist" aria-label="Discover pathways">
+        {TABS.map((tab) => {
+          const active = tab.id === pathway;
+          return (
+            <button
+              key={tab.id}
+              type="button"
+              role="tab"
+              aria-selected={active}
+              tabIndex={active ? 0 : -1}
+              className={`${styles.tab} ${active ? styles.tabActive : ''}`}
+              onClick={() => setPathway(tab.id)}
+              onKeyDown={(e) => {
+                const isNext = e.key === 'ArrowRight';
+                const isPrev = e.key === 'ArrowLeft';
+                if (isNext || isPrev) {
+                  e.preventDefault();
+                  const idx = TABS.findIndex((t) => t.id === pathway);
+                  const next = isNext
+                    ? TABS[(idx + 1) % TABS.length]
+                    : TABS[(idx - 1 + TABS.length) % TABS.length];
+                  setPathway(next.id);
+                }
+              }}
+            >
+              {tab.icon}
+              <span className={styles.tabLabel}>{tab.label}</span>
+            </button>
+          );
+        })}
+      </div>
+
+      {pathway === 'staff' ? (
+        <StaffPicksPane onSaved={onSaved} />
+      ) : (
+        <BrowseAllPane onSaved={onSaved} />
+      )}
+    </div>
+  );
+}

From 7afffc12c2f154a32759869196cc551a31e33265 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 21:40:44 -0500
Subject: [PATCH 32/89] refactor: derive Staff picks open state without a
 seeding effect

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/settings/tabs/models/DiscoverPane.tsx     | 19 ++++++---
 .../tabs/models/StaffPicksPane.test.tsx       | 41 +++++++++----------
 src/settings/tabs/models/StaffPicksPane.tsx   | 27 ++++++------
 3 files changed, 46 insertions(+), 41 deletions(-)

diff --git a/src/settings/tabs/models/DiscoverPane.tsx b/src/settings/tabs/models/DiscoverPane.tsx
index 49ed1b22..2e4ab702 100644
--- a/src/settings/tabs/models/DiscoverPane.tsx
+++ b/src/settings/tabs/models/DiscoverPane.tsx
@@ -34,11 +34,14 @@ const SEARCH_ICON = (
   </svg>
 );
 
-const TABS: ReadonlyArray<{ id: Pathway; label: string; icon: React.ReactNode }> =
-  [
-    { id: 'staff', label: 'Staff picks', icon: STAR_ICON },
-    { id: 'browse', label: 'Browse all', icon: SEARCH_ICON },
-  ];
+const TABS: ReadonlyArray<{
+  id: Pathway;
+  label: string;
+  icon: React.ReactNode;
+}> = [
+  { id: 'staff', label: 'Staff picks', icon: STAR_ICON },
+  { id: 'browse', label: 'Browse all', icon: SEARCH_ICON },
+];
 
 interface DiscoverPaneProps {
   /** Lift a fresh config snapshot after a successful install. */
@@ -50,7 +53,11 @@ export function DiscoverPane({ onSaved }: DiscoverPaneProps) {
 
   return (
     <div className={styles.host}>
-      <div className={styles.tabs} role="tablist" aria-label="Discover pathways">
+      <div
+        className={styles.tabs}
+        role="tablist"
+        aria-label="Discover pathways"
+      >
         {TABS.map((tab) => {
           const active = tab.id === pathway;
           return (
diff --git a/src/settings/tabs/models/StaffPicksPane.test.tsx b/src/settings/tabs/models/StaffPicksPane.test.tsx
index f040d4f2..46fc9e8b 100644
--- a/src/settings/tabs/models/StaffPicksPane.test.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.test.tsx
@@ -80,7 +80,10 @@ function starter(over: Partial<Starter>): Starter {
   };
 }
 
-function option(over: Partial<Starter>, opts: Partial<StarterOption> = {}): StarterOption {
+function option(
+  over: Partial<Starter>,
+  opts: Partial<StarterOption> = {},
+): StarterOption {
   return {
     starter: starter(over),
     fit: 'fits',
@@ -103,23 +106,21 @@ const QWEN = option({
   origin: 'Alibaba',
 });
 const GEMMA = option({});
-const GPT_OSS = option(
-  {
-    tier: 'smartest',
-    family: 'gpt-oss',
-    display_name: 'gpt-oss 20B',
-    repo: 'ggml-org/gpt-oss-20b-GGUF',
-    file_name: 'gpt-oss-20b-mxfp4.gguf',
-    quant: 'MXFP4',
-    vision: false,
-    thinking: true,
-    reasoning_always: true,
-    mmproj_file: null,
-    mmproj_sha256: null,
-    mmproj_bytes: 0,
-    origin: 'OpenAI',
-  },
-);
+const GPT_OSS = option({
+  tier: 'smartest',
+  family: 'gpt-oss',
+  display_name: 'gpt-oss 20B',
+  repo: 'ggml-org/gpt-oss-20b-GGUF',
+  file_name: 'gpt-oss-20b-mxfp4.gguf',
+  quant: 'MXFP4',
+  vision: false,
+  thinking: true,
+  reasoning_always: true,
+  mmproj_file: null,
+  mmproj_sha256: null,
+  mmproj_bytes: 0,
+  origin: 'OpenAI',
+});
 
 const STARTERS: StarterOption[] = [QWEN, GEMMA, GPT_OSS];
 
@@ -379,9 +380,7 @@ describe('StaffPicksPane', () => {
     const row = screen
       .getByText('Gemma 4 12B')
       .closest('[data-model-row]') as HTMLElement;
-    fireEvent.click(
-      within(row).getByRole('button', { name: /Hugging Face/ }),
-    );
+    fireEvent.click(within(row).getByRole('button', { name: /Hugging Face/ }));
     expect(invokeMock).toHaveBeenCalledWith('open_url', {
       url: 'https://huggingface.co/google/gemma-4-12B-it-qat-q4_0-gguf',
     });
diff --git a/src/settings/tabs/models/StaffPicksPane.tsx b/src/settings/tabs/models/StaffPicksPane.tsx
index 8bdc2149..3106172d 100644
--- a/src/settings/tabs/models/StaffPicksPane.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.tsx
@@ -16,7 +16,7 @@
  * it too); `activeTier` tracks which row owns the progress card.
  */
 
-import { useEffect, useMemo, useRef, useState } from 'react';
+import { useEffect, useMemo, useState } from 'react';
 import { invoke } from '@tauri-apps/api/core';
 
 import { DownloadProgress } from '../../../components/DownloadProgress';
@@ -26,7 +26,11 @@ import { Tooltip } from '../../../components/Tooltip';
 import { RAM_FIT_LABEL, RAM_FIT_TOOLTIP } from '../../../utils/ramFit';
 import styles from './StaffPicksPane.module.css';
 import type { RawAppConfig } from '../../types';
-import type { RamFit, StarterOption, StarterTier } from '../../../types/starter';
+import type {
+  RamFit,
+  StarterOption,
+  StarterTier,
+} from '../../../types/starter';
 
 const HF_BASE_URL = 'https://huggingface.co';
 
@@ -109,16 +113,11 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
     return new Set(pick ? [pick.family] : []);
   }, [groups]);
 
-  // Seed the open set ONCE, when the catalog first resolves (the mount fetch
-  // arrives after the initial empty render). A later refresh must not collapse
-  // families the user opened, so this never re-seeds.
-  const [open, setOpen] = useState<Set<string>>(new Set());
-  const seededRef = useRef(false);
-  useEffect(() => {
-    if (seededRef.current || groups.length === 0) return;
-    seededRef.current = true;
-    setOpen(defaultOpen);
-  }, [groups, defaultOpen]);
+  // `null` means the user has not toggled a family yet, so the recommended
+  // family (defaultOpen) shows open; the first toggle replaces it with the
+  // user's own set, which then sticks across refreshes. No seeding effect.
+  const [open, setOpen] = useState<Set<string> | null>(null);
+  const effectiveOpen = open ?? defaultOpen;
 
   // One download at a time; activeTier names the row that owns the progress card.
   const [activeTier, setActiveTier] = useState<StarterTier | null>(null);
@@ -153,7 +152,7 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
 
   function toggle(family: string) {
     setOpen((cur) => {
-      const next = new Set(cur);
+      const next = new Set(cur ?? defaultOpen);
       if (next.has(family)) {
         next.delete(family);
       } else {
@@ -198,7 +197,7 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
       </p>
       <div className={styles.list}>
         {groups.map((group) => {
-          const expanded = open.has(group.family);
+          const expanded = effectiveOpen.has(group.family);
           return (
             <div className={styles.fam} key={group.family}>
               <button

From 4da28ab36efcfb63d92429be68222863955b1f39 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 22:42:06 -0500
Subject: [PATCH 33/89] fix: chevron disclosure on repo rows, download icon on
 quant rows in Browse all

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 .../tabs/models/BrowseAllPane.module.css      | 65 +++++++++++++------
 .../tabs/models/BrowseAllPane.test.tsx        | 22 +++----
 src/settings/tabs/models/BrowseAllPane.tsx    | 19 ++++--
 3 files changed, 69 insertions(+), 37 deletions(-)

diff --git a/src/settings/tabs/models/BrowseAllPane.module.css b/src/settings/tabs/models/BrowseAllPane.module.css
index d9169f99..82ee961b 100644
--- a/src/settings/tabs/models/BrowseAllPane.module.css
+++ b/src/settings/tabs/models/BrowseAllPane.module.css
@@ -214,37 +214,42 @@
   color: var(--heavy);
 }
 
-/* Icon-only download / open-quants button. */
-.get {
+/* Quiet disclosure button on the repo row: it expands the quant list, so it is
+ * a neutral chevron, not the accent download icon. The chevron flips up when
+ * the row is open. */
+.disclose {
   flex: none;
   width: 30px;
   height: 30px;
   display: grid;
   place-items: center;
-  color: var(--accent);
-  border: 1px solid rgba(255, 141, 92, 0.4);
+  color: var(--t3);
+  border: 1px solid var(--hair);
   border-radius: var(--radius-control);
   background: transparent;
   cursor: pointer;
   transition:
     color 140ms ease,
-    border-color 140ms ease,
-    background 140ms ease;
+    border-color 140ms ease;
 }
-.get svg {
-  width: 15px;
-  height: 15px;
+.disclose svg {
+  width: 16px;
+  height: 16px;
   stroke: currentColor;
   stroke-width: 1.7;
   fill: none;
   stroke-linecap: round;
   stroke-linejoin: round;
+  transition: transform 150ms ease;
 }
-.get:hover:not(:disabled) {
-  border-color: rgba(255, 141, 92, 0.6);
-  background: var(--accent-soft);
+.discloseOpen svg {
+  transform: rotate(180deg);
 }
-.get:disabled {
+.disclose:hover:not(:disabled) {
+  color: var(--t1);
+  border-color: rgba(255, 141, 92, 0.4);
+}
+.disclose:disabled {
   color: var(--t3);
   border-color: var(--hair-soft);
   opacity: 0.6;
@@ -305,17 +310,35 @@
   color: var(--t3);
 }
 
-.download {
+/* Icon-only download button on a quant row: the accent download-arrow, the
+ * one control that actually starts a download. */
+.quantGet {
   flex: none;
-  font-size: 11.5px;
-  font-weight: 540;
-  color: #16110d;
-  border: none;
+  width: 30px;
+  height: 30px;
+  display: grid;
+  place-items: center;
+  color: var(--accent);
+  border: 1px solid rgba(255, 141, 92, 0.4);
   border-radius: var(--radius-control);
-  padding: 5px 12px;
-  background: var(--accent);
-  font-family: inherit;
+  background: transparent;
   cursor: pointer;
+  transition:
+    border-color 140ms ease,
+    background 140ms ease;
+}
+.quantGet svg {
+  width: 15px;
+  height: 15px;
+  stroke: currentColor;
+  stroke-width: 1.7;
+  fill: none;
+  stroke-linecap: round;
+  stroke-linejoin: round;
+}
+.quantGet:hover {
+  border-color: rgba(255, 141, 92, 0.6);
+  background: var(--accent-soft);
 }
 
 /* ── Inline notes (gated, loading, empty, error) ──────────────────────── */
diff --git a/src/settings/tabs/models/BrowseAllPane.test.tsx b/src/settings/tabs/models/BrowseAllPane.test.tsx
index 11233798..386a0bca 100644
--- a/src/settings/tabs/models/BrowseAllPane.test.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.test.tsx
@@ -286,7 +286,7 @@ describe('BrowseAllPane', () => {
       .getByText('meta-llama/Llama-3-8B-GGUF')
       .closest('[data-row]') as HTMLElement;
     expect(
-      within(gatedRow).getByRole('button', { name: 'Get' }),
+      within(gatedRow).getByRole('button', { name: 'Show files' }),
     ).toBeDisabled();
     expect(within(gatedRow).getByText('Gated')).toBeInTheDocument();
   });
@@ -296,7 +296,7 @@ describe('BrowseAllPane', () => {
     const row = screen
       .getByText('google/gemma-4-12b-it-GGUF')
       .closest('[data-row]') as HTMLElement;
-    fireEvent.click(within(row).getByRole('button', { name: 'Get' }));
+    fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
     await flush();
     expect(invokeMock).toHaveBeenCalledWith('list_hf_repo_ggufs', {
       repo: 'google/gemma-4-12b-it-GGUF',
@@ -314,10 +314,10 @@ describe('BrowseAllPane', () => {
     const row = screen
       .getByText('google/gemma-4-12b-it-GGUF')
       .closest('[data-row]') as HTMLElement;
-    fireEvent.click(within(row).getByRole('button', { name: 'Get' }));
+    fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
     await flush();
     expect(screen.getByText('gemma-q4.gguf')).toBeInTheDocument();
-    fireEvent.click(within(row).getByRole('button', { name: 'Get' }));
+    fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
     expect(screen.queryByText('gemma-q4.gguf')).not.toBeInTheDocument();
   });
 
@@ -326,7 +326,7 @@ describe('BrowseAllPane', () => {
     const row = screen
       .getByText('google/gemma-4-12b-it-GGUF')
       .closest('[data-row]') as HTMLElement;
-    fireEvent.click(within(row).getByRole('button', { name: 'Get' }));
+    fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
     await flush();
     expect(screen.getByText('No GGUF files in this repo.')).toBeInTheDocument();
   });
@@ -336,7 +336,7 @@ describe('BrowseAllPane', () => {
     const row = screen
       .getByText('google/gemma-4-12b-it-GGUF')
       .closest('[data-row]') as HTMLElement;
-    fireEvent.click(within(row).getByRole('button', { name: 'Get' }));
+    fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
     await flush();
     expect(screen.getByText('No GGUF files in this repo.')).toBeInTheDocument();
   });
@@ -348,7 +348,7 @@ describe('BrowseAllPane', () => {
     const row = screen
       .getByText('google/gemma-4-12b-it-GGUF')
       .closest('[data-row]') as HTMLElement;
-    fireEvent.click(within(row).getByRole('button', { name: 'Get' }));
+    fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
     await flush();
     expect(screen.getByText(/repo unavailable/)).toBeInTheDocument();
   });
@@ -359,7 +359,7 @@ describe('BrowseAllPane', () => {
     const row = screen
       .getByText('google/gemma-4-12b-it-GGUF')
       .closest('[data-row]') as HTMLElement;
-    fireEvent.click(within(row).getByRole('button', { name: 'Get' }));
+    fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
     await flush();
     const downloadButtons = screen.getAllByRole('button', {
       name: 'Download',
@@ -402,7 +402,7 @@ describe('BrowseAllPane', () => {
     const row = screen
       .getByText('google/gemma-4-12b-it-GGUF')
       .closest('[data-row]') as HTMLElement;
-    fireEvent.click(within(row).getByRole('button', { name: 'Get' }));
+    fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
     await flush();
     fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[0]);
     await flush();
@@ -418,7 +418,7 @@ describe('BrowseAllPane', () => {
     const row = screen
       .getByText('google/gemma-4-12b-it-GGUF')
       .closest('[data-row]') as HTMLElement;
-    fireEvent.click(within(row).getByRole('button', { name: 'Get' }));
+    fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
     await flush();
     fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[0]);
     await flush();
@@ -433,7 +433,7 @@ describe('BrowseAllPane', () => {
     const row = screen
       .getByText('google/gemma-4-12b-it-GGUF')
       .closest('[data-row]') as HTMLElement;
-    fireEvent.click(within(row).getByRole('button', { name: 'Get' }));
+    fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
     await flush();
     fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[0]);
     await flush();
diff --git a/src/settings/tabs/models/BrowseAllPane.tsx b/src/settings/tabs/models/BrowseAllPane.tsx
index ca21e8d8..901d7720 100644
--- a/src/settings/tabs/models/BrowseAllPane.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.tsx
@@ -67,6 +67,14 @@ const DOWNLOAD_ICON = (
     <path d="M12 4v11M7 11l5 5 5-5M5 20h14" />
   </svg>
 );
+// A disclosure chevron for the repo row: it expands the quant list, so it must
+// NOT wear the download icon (which now lives on the rows that actually
+// download). The chevron rotates to point up when the row is open.
+const CHEVRON_ICON = (
+  <svg viewBox="0 0 24 24" aria-hidden="true">
+    <path d="M6 9l6 6 6-6" />
+  </svg>
+);
 interface BrowseAllPaneProps {
   /** Lift a fresh config snapshot after a successful install. */
   onSaved: (next: RawAppConfig) => void;
@@ -227,13 +235,13 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
         </div>
         <button
           type="button"
-          className={styles.get}
-          aria-label="Get"
+          className={`${styles.disclose} ${expanded ? styles.discloseOpen : ''}`}
+          aria-label="Show files"
           aria-expanded={expanded}
           disabled={model.gated}
           onClick={toggle}
         >
-          {DOWNLOAD_ICON}
+          {CHEVRON_ICON}
         </button>
       </div>
 
@@ -265,10 +273,11 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
                   </span>
                   <button
                     type="button"
-                    className={styles.download}
+                    className={styles.quantGet}
+                    aria-label="Download"
                     onClick={() => void startRepo(model.id, f.file)}
                   >
-                    Download
+                    {DOWNLOAD_ICON}
                   </button>
                 </div>
               ))

From 57b047ea26ffd51951e9e567e865212cdbd74329 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 22:51:53 -0500
Subject: [PATCH 34/89] feat: flatten Staff picks to an alphabetical list of
 rich model cards

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 .../tabs/models/StaffPicksPane.module.css     | 143 +++--------
 .../tabs/models/StaffPicksPane.test.tsx       | 233 +++++++-----------
 src/settings/tabs/models/StaffPicksPane.tsx   | 218 ++++++----------
 3 files changed, 196 insertions(+), 398 deletions(-)

diff --git a/src/settings/tabs/models/StaffPicksPane.module.css b/src/settings/tabs/models/StaffPicksPane.module.css
index 42689a3f..8759aa83 100644
--- a/src/settings/tabs/models/StaffPicksPane.module.css
+++ b/src/settings/tabs/models/StaffPicksPane.module.css
@@ -1,5 +1,5 @@
 /*
- * Styles for the Staff-picks pane: Discover's curated family accordion. The
+ * Styles for the Staff-picks pane: Discover's flat curated model cards. The
  * premium tokens (--accent, --t1..--t3, --hair, --ok, --tight, --heavy, etc.)
  * cascade from the Settings window root, so they are referenced via var() here.
  */
@@ -27,93 +27,22 @@
 .list {
   display: flex;
   flex-direction: column;
-  gap: 9px;
+  gap: 8px;
 }
 
-/* ── Family accordion section ─────────────────────────────────────────────── */
+/* ── Model card ───────────────────────────────────────────────────────────── */
 
-.fam {
+.card {
   border: 1px solid var(--hair-soft);
   border-radius: var(--radius-card);
   background: var(--elev-1);
-  overflow: hidden;
 }
 
-.famHead {
-  display: flex;
-  align-items: center;
-  gap: 12px;
-  width: 100%;
-  padding: 13px 15px;
-  border: none;
-  background: transparent;
-  color: var(--t1);
-  font-family: inherit;
-  text-align: left;
-  cursor: pointer;
-  transition: background 140ms ease;
-}
-.famHead:hover {
-  background: var(--elev-2);
-}
-
-.famText {
-  flex: 1;
-  min-width: 0;
-}
-
-.famName {
-  display: block;
-  font-weight: 600;
-  font-size: 13.5px;
-  color: var(--t1);
-}
-
-.famSub {
-  display: block;
-  font-size: 11px;
-  color: var(--t3);
-  margin-top: 3px;
-}
-
-.chevWrap {
-  flex: none;
-  display: grid;
-  place-items: center;
-  color: var(--t3);
-  transition: transform 150ms ease;
-}
-.chevOpen {
-  transform: rotate(90deg);
-}
-.chev {
-  width: 10px;
-  height: 10px;
-  fill: none;
-  stroke: currentColor;
-  stroke-width: 1.6;
-  stroke-linecap: round;
-  stroke-linejoin: round;
-}
-
-.famBody {
-  border-top: 1px solid var(--hair-soft);
-}
-
-/* ── Model row ────────────────────────────────────────────────────────────── */
-
-.row {
-  padding: 0 4px;
-}
-.row + .row {
-  box-shadow: 0 -1px 0 var(--hair-soft);
-}
-
-.rowMain {
+.cardMain {
   display: flex;
   align-items: flex-start;
   gap: 14px;
-  padding: 12px 11px;
+  padding: 13px 14px;
 }
 
 .mid {
@@ -122,27 +51,15 @@
 }
 
 .name {
-  display: flex;
-  align-items: center;
-  gap: 9px;
-  flex-wrap: wrap;
-  font-weight: 560;
-  font-size: 13px;
+  font-weight: 580;
+  font-size: 13.5px;
   color: var(--t1);
 }
 
-.recommended {
-  font-size: 10px;
-  font-weight: 640;
-  letter-spacing: 0.04em;
-  text-transform: uppercase;
-  color: var(--accent);
-  background: var(--accent-soft);
-  padding: 2px 7px;
-  border-radius: var(--radius-pill);
-}
-.recommended::before {
-  content: '★ ';
+.maker {
+  font-size: 11px;
+  color: var(--t3);
+  margin-top: 4px;
 }
 
 /* Calm capability pills: one cohesive chrome, a small colour dot the only
@@ -151,7 +68,7 @@
   display: flex;
   gap: 6px;
   flex-wrap: wrap;
-  margin-top: 8px;
+  margin-top: 9px;
 }
 .pill {
   display: inline-flex;
@@ -183,7 +100,7 @@
 .meta {
   font-size: 11px;
   color: var(--t3);
-  margin-top: 8px;
+  margin-top: 9px;
   font-variant-numeric: tabular-nums;
 }
 
@@ -235,36 +152,38 @@
   color: var(--heavy);
 }
 
-/* ── Row actions ──────────────────────────────────────────────────────────── */
+/* ── Card actions ─────────────────────────────────────────────────────────── */
 
+/* Icon-only download button: the accent download-arrow, matching the Browse-all
+ * quant rows so "download" reads the same everywhere. */
 .getBtn {
+  width: 32px;
+  height: 32px;
   flex: none;
-  font-size: 11.5px;
-  font-weight: 540;
+  display: grid;
+  place-items: center;
   color: var(--accent);
   border: 1px solid rgba(255, 141, 92, 0.4);
   border-radius: var(--radius-control);
-  padding: 6px 14px;
   background: transparent;
-  font-family: inherit;
   cursor: pointer;
   transition:
     border-color 140ms ease,
     background 140ms ease;
 }
+.getBtn svg {
+  width: 16px;
+  height: 16px;
+  fill: none;
+  stroke: currentColor;
+  stroke-width: 1.8;
+  stroke-linecap: round;
+  stroke-linejoin: round;
+}
 .getBtn:hover {
   border-color: rgba(255, 141, 92, 0.6);
   background: var(--accent-soft);
 }
-.getPrimary {
-  color: #16110d;
-  border-color: transparent;
-  background: var(--accent);
-}
-.getPrimary:hover {
-  background: var(--accent);
-  filter: brightness(1.05);
-}
 
 .installed {
   display: inline-flex;
@@ -305,5 +224,5 @@
 }
 
 .progress {
-  padding: 0 11px 13px;
+  padding: 0 14px 13px;
 }
diff --git a/src/settings/tabs/models/StaffPicksPane.test.tsx b/src/settings/tabs/models/StaffPicksPane.test.tsx
index 46fc9e8b..5e346f2f 100644
--- a/src/settings/tabs/models/StaffPicksPane.test.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.test.tsx
@@ -1,13 +1,13 @@
 /**
  * Unit tests for the Staff-picks pane: Discover's curated front door.
  *
- * Covers the family accordion (grouping, default-expanded recommended family,
- * expand/collapse), the model rows (name, recommended star, capability pills,
- * quant/size/license meta, RAM-fit hint), and the verified starter download
- * flow (download -> progress -> ready lifts config + refreshes; installed;
- * resume/discard of a partial; failure). The download channel is captured the
- * same way BrowseAllPane.test.tsx does it: `onEvent` is grabbed off the invoke
- * args and driven with `simulateMessage`.
+ * A flat, alphabetically-ordered list of rich model cards (no family grouping,
+ * no recommended highlight). Each card shows the model name, its maker and a
+ * one-line blurb, capability pills (Text always, plus Vision / Thinking), the
+ * one quant Thuki chose with its size and license, a RAM-fit hint, and a single
+ * icon download that runs the VERIFIED starter path (`download_starter`, pinned
+ * revision + sha256). The download channel is captured the same way
+ * BrowseAllPane.test.tsx does it.
  */
 
 import {
@@ -93,7 +93,7 @@ function option(
   };
 }
 
-/** Three single-model families, mirroring the shipped registry. */
+/** Three models, mirroring the shipped registry (deliberately NOT alpha order). */
 const QWEN = option({
   tier: 'fast',
   family: 'Qwen',
@@ -159,77 +159,90 @@ async function renderPane(
   return view;
 }
 
-/** The accordion header button for a family. */
-function familyHeader(name: string): HTMLElement {
-  return screen.getByRole('button', { name: new RegExp(`^${name}`) });
+/** The card element wrapping a model name. */
+function cardFor(name: string): HTMLElement {
+  return screen.getByText(name).closest('[data-model-card]') as HTMLElement;
 }
 
 describe('StaffPicksPane', () => {
-  it('renders a section per family with its name', async () => {
+  it('renders every model as a flat card, all visible at once', async () => {
     await renderPane();
-    expect(familyHeader('Qwen')).toBeInTheDocument();
-    expect(familyHeader('Gemma')).toBeInTheDocument();
-    expect(familyHeader('gpt-oss')).toBeInTheDocument();
+    expect(screen.getByText('Gemma 4 12B')).toBeInTheDocument();
+    expect(screen.getByText('Qwen3.5 9B')).toBeInTheDocument();
+    expect(screen.getByText('gpt-oss 20B')).toBeInTheDocument();
   });
 
-  it('expands the recommended family by default and collapses the rest', async () => {
+  it('orders the cards alphabetically by model name', async () => {
     await renderPane();
-    // Gemma holds the balanced (recommended) tier, so its model row is shown.
-    expect(screen.getByText('Gemma 4 12B')).toBeInTheDocument();
-    // The other families start collapsed.
-    expect(screen.queryByText('Qwen3.5 9B')).not.toBeInTheDocument();
-    expect(screen.queryByText('gpt-oss 20B')).not.toBeInTheDocument();
+    const names = screen
+      .getAllByTestId('staff-model-name')
+      .map((el) => el.textContent);
+    expect(names).toEqual(['Gemma 4 12B', 'gpt-oss 20B', 'Qwen3.5 9B']);
   });
 
-  it('expands a collapsed family on click and collapses it again', async () => {
+  it('shows no Recommended badge on any card', async () => {
     await renderPane();
-    fireEvent.click(familyHeader('Qwen'));
-    expect(screen.getByText('Qwen3.5 9B')).toBeInTheDocument();
-    fireEvent.click(familyHeader('Qwen'));
-    expect(screen.queryByText('Qwen3.5 9B')).not.toBeInTheDocument();
+    expect(screen.queryByText(/Recommended/)).not.toBeInTheDocument();
   });
 
-  it('marks the recommended model and shows its meta and pills', async () => {
+  it('shows the maker, blurb, pills, quant, size, license and fit on a card', async () => {
     await renderPane();
-    const row = screen
-      .getByText('Gemma 4 12B')
-      .closest('[data-model-row]') as HTMLElement;
-    expect(within(row).getByText('Recommended')).toBeInTheDocument();
-    expect(within(row).getByText('Text')).toBeInTheDocument();
-    expect(within(row).getByText('Vision')).toBeInTheDocument();
-    expect(within(row).queryByText('Thinking')).not.toBeInTheDocument();
-    expect(within(row).getByText(/Q4_0/)).toBeInTheDocument();
-    expect(within(row).getByText(/7\.2 GB/)).toBeInTheDocument();
-    expect(within(row).getByText('Comfortable')).toBeInTheDocument();
+    const card = cardFor('Gemma 4 12B');
+    expect(
+      within(card).getByText(/Google · Well-rounded, reads images/),
+    ).toBeInTheDocument();
+    expect(within(card).getByText('Text')).toBeInTheDocument();
+    expect(within(card).getByText('Vision')).toBeInTheDocument();
+    expect(within(card).queryByText('Thinking')).not.toBeInTheDocument();
+    expect(within(card).getByText(/Q4_0/)).toBeInTheDocument();
+    expect(within(card).getByText(/7\.2 GB/)).toBeInTheDocument();
+    expect(within(card).getByText(/Apache 2\.0/)).toBeInTheDocument();
+    expect(within(card).getByText('Comfortable')).toBeInTheDocument();
   });
 
-  it('shows a Thinking pill on a thinking-capable model', async () => {
+  it('shows a Thinking pill on a thinking model and omits Vision on a text-only one', async () => {
     await renderPane();
-    fireEvent.click(familyHeader('Qwen'));
-    const row = screen
-      .getByText('Qwen3.5 9B')
-      .closest('[data-model-row]') as HTMLElement;
-    expect(within(row).getByText('Thinking')).toBeInTheDocument();
-    expect(within(row).getByText('Vision')).toBeInTheDocument();
+    const qwen = cardFor('Qwen3.5 9B');
+    expect(within(qwen).getByText('Thinking')).toBeInTheDocument();
+    expect(within(qwen).getByText('Vision')).toBeInTheDocument();
+    const oss = cardFor('gpt-oss 20B');
+    expect(within(oss).getByText('Thinking')).toBeInTheDocument();
+    expect(within(oss).queryByText('Vision')).not.toBeInTheDocument();
   });
 
-  it('omits the Vision pill on a text-only model', async () => {
-    await renderPane();
-    fireEvent.click(familyHeader('gpt-oss'));
-    const row = screen
-      .getByText('gpt-oss 20B')
-      .closest('[data-model-row]') as HTMLElement;
-    expect(within(row).getByText('Text')).toBeInTheDocument();
-    expect(within(row).getByText('Thinking')).toBeInTheDocument();
-    expect(within(row).queryByText('Vision')).not.toBeInTheDocument();
+  it('falls back to the maker alone when a model has no blurb', async () => {
+    await renderPane(() => {}, {
+      get_starter_options: [
+        option({
+          family: 'Llama',
+          display_name: 'Llama 3.3 8B',
+          origin: 'Meta',
+        }),
+      ],
+    });
+    const card = cardFor('Llama 3.3 8B');
+    // No blurb for the Llama family: the maker line is just the maker.
+    expect(within(card).getByText('Meta')).toBeInTheDocument();
+  });
+
+  it('shows just the maker when a model carries no family at all', async () => {
+    await renderPane(() => {}, {
+      get_starter_options: [
+        option({
+          family: undefined,
+          display_name: 'Mystery 7B',
+          origin: 'Acme',
+        }),
+      ],
+    });
+    const card = cardFor('Mystery 7B');
+    expect(within(card).getByText('Acme')).toBeInTheDocument();
   });
 
   it('downloads a model through the verified starter path', async () => {
     await renderPane();
-    const row = screen
-      .getByText('Gemma 4 12B')
-      .closest('[data-model-row]') as HTMLElement;
-    fireEvent.click(within(row).getByRole('button', { name: 'Download' }));
+    const card = cardFor('Gemma 4 12B');
+    fireEvent.click(within(card).getByRole('button', { name: 'Download' }));
     await flush();
     expect(invokeMock).toHaveBeenCalledWith(
       'download_starter',
@@ -240,10 +253,8 @@ describe('StaffPicksPane', () => {
   it('lifts a fresh config and refreshes when a download completes', async () => {
     const onSaved = vi.fn();
     await renderPane(onSaved);
-    const row = screen
-      .getByText('Gemma 4 12B')
-      .closest('[data-model-row]') as HTMLElement;
-    fireEvent.click(within(row).getByRole('button', { name: 'Download' }));
+    const card = cardFor('Gemma 4 12B');
+    fireEvent.click(within(card).getByRole('button', { name: 'Download' }));
     await flush();
     expect(screen.getByText('Downloading model')).toBeInTheDocument();
     act(() => {
@@ -258,10 +269,8 @@ describe('StaffPicksPane', () => {
     await renderPane(onSaved, {
       get_config: new Reject(new Error('read failed')),
     });
-    const row = screen
-      .getByText('Gemma 4 12B')
-      .closest('[data-model-row]') as HTMLElement;
-    fireEvent.click(within(row).getByRole('button', { name: 'Download' }));
+    const card = cardFor('Gemma 4 12B');
+    fireEvent.click(within(card).getByRole('button', { name: 'Download' }));
     await flush();
     act(() => {
       lastChannel?.simulateMessage({ type: 'AllDone' });
@@ -272,10 +281,8 @@ describe('StaffPicksPane', () => {
 
   it('cancels an in-flight download', async () => {
     await renderPane();
-    const row = screen
-      .getByText('Gemma 4 12B')
-      .closest('[data-model-row]') as HTMLElement;
-    fireEvent.click(within(row).getByRole('button', { name: 'Download' }));
+    const card = cardFor('Gemma 4 12B');
+    fireEvent.click(within(card).getByRole('button', { name: 'Download' }));
     await flush();
     fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
     await flush();
@@ -284,10 +291,8 @@ describe('StaffPicksPane', () => {
 
   it('retries after a failed download', async () => {
     await renderPane();
-    const row = screen
-      .getByText('Gemma 4 12B')
-      .closest('[data-model-row]') as HTMLElement;
-    fireEvent.click(within(row).getByRole('button', { name: 'Download' }));
+    const card = cardFor('Gemma 4 12B');
+    fireEvent.click(within(card).getByRole('button', { name: 'Download' }));
     await flush();
     act(() => {
       lastChannel?.simulateMessage({
@@ -304,12 +309,10 @@ describe('StaffPicksPane', () => {
     expect(starts).toHaveLength(2);
   });
 
-  it('returns to the row from a terminal failure via Choose a different model', async () => {
+  it('returns to the card from a terminal failure via Choose a different model', async () => {
     await renderPane();
-    const row = screen
-      .getByText('Gemma 4 12B')
-      .closest('[data-model-row]') as HTMLElement;
-    fireEvent.click(within(row).getByRole('button', { name: 'Download' }));
+    const card = cardFor('Gemma 4 12B');
+    fireEvent.click(within(card).getByRole('button', { name: 'Download' }));
     await flush();
     act(() => {
       lastChannel?.simulateMessage({
@@ -320,8 +323,9 @@ describe('StaffPicksPane', () => {
     fireEvent.click(
       screen.getByRole('button', { name: 'Choose a different model' }),
     );
+    // The Gemma card is back to its download button, not stuck on the failure.
     expect(
-      screen.getByRole('button', { name: 'Download' }),
+      within(cardFor('Gemma 4 12B')).getByRole('button', { name: 'Download' }),
     ).toBeInTheDocument();
   });
 
@@ -329,12 +333,10 @@ describe('StaffPicksPane', () => {
     await renderPane(() => {}, {
       get_starter_options: [{ ...GEMMA, installed: true }, QWEN, GPT_OSS],
     });
-    const row = screen
-      .getByText('Gemma 4 12B')
-      .closest('[data-model-row]') as HTMLElement;
-    expect(within(row).getByText('Installed')).toBeInTheDocument();
+    const card = cardFor('Gemma 4 12B');
+    expect(within(card).getByText('Installed')).toBeInTheDocument();
     expect(
-      within(row).queryByRole('button', { name: 'Download' }),
+      within(card).queryByRole('button', { name: 'Download' }),
     ).not.toBeInTheDocument();
   });
 
@@ -346,10 +348,8 @@ describe('StaffPicksPane', () => {
         GPT_OSS,
       ],
     });
-    const row = screen
-      .getByText('Gemma 4 12B')
-      .closest('[data-model-row]') as HTMLElement;
-    fireEvent.click(within(row).getByRole('button', { name: /Resume/ }));
+    const card = cardFor('Gemma 4 12B');
+    fireEvent.click(within(card).getByRole('button', { name: /Resume/ }));
     await flush();
     expect(invokeMock).toHaveBeenCalledWith(
       'download_starter',
@@ -365,10 +365,8 @@ describe('StaffPicksPane', () => {
         GPT_OSS,
       ],
     });
-    const row = screen
-      .getByText('Gemma 4 12B')
-      .closest('[data-model-row]') as HTMLElement;
-    fireEvent.click(within(row).getByRole('button', { name: 'Discard' }));
+    const card = cardFor('Gemma 4 12B');
+    fireEvent.click(within(card).getByRole('button', { name: 'Discard' }));
     await flush();
     expect(invokeMock).toHaveBeenCalledWith('discard_partial_download', {
       sha256: 'b'.repeat(64),
@@ -377,10 +375,8 @@ describe('StaffPicksPane', () => {
 
   it('opens the model on Hugging Face from its provenance link', async () => {
     await renderPane();
-    const row = screen
-      .getByText('Gemma 4 12B')
-      .closest('[data-model-row]') as HTMLElement;
-    fireEvent.click(within(row).getByRole('button', { name: /Hugging Face/ }));
+    const card = cardFor('Gemma 4 12B');
+    fireEvent.click(within(card).getByRole('button', { name: /Hugging Face/ }));
     expect(invokeMock).toHaveBeenCalledWith('open_url', {
       url: 'https://huggingface.co/google/gemma-4-12B-it-qat-q4_0-gguf',
     });
@@ -397,49 +393,4 @@ describe('StaffPicksPane', () => {
     });
     expect(screen.getByText(/No curated models/)).toBeInTheDocument();
   });
-
-  it('groups several sizes of one family under a single section', async () => {
-    const gemma4b = option({
-      tier: 'fast',
-      family: 'Gemma',
-      display_name: 'Gemma 4 4B',
-      file_name: 'gemma-4-4b.gguf',
-    });
-    const gemma12b = option({}); // balanced Gemma 4 12B
-    await renderPane(() => {}, {
-      get_starter_options: [gemma4b, gemma12b],
-    });
-    // One Gemma section (it holds the recommended tier, so it is open) lists
-    // both sizes, and the header counts them.
-    const header = familyHeader('Gemma');
-    expect(header).toHaveTextContent('2 models');
-    expect(screen.getByText('Gemma 4 4B')).toBeInTheDocument();
-    expect(screen.getByText('Gemma 4 12B')).toBeInTheDocument();
-  });
-
-  it('falls back to the maker blurb and display name for an unlabelled family', async () => {
-    const orphan = option({
-      tier: 'balanced',
-      family: undefined,
-      display_name: 'Mystery 7B',
-      origin: 'Acme',
-    });
-    await renderPane(() => {}, { get_starter_options: [orphan] });
-    // No family label: the section is keyed by the model name and its blurb
-    // falls back to the maker.
-    const header = familyHeader('Mystery 7B');
-    expect(header).toHaveTextContent('Acme');
-  });
-
-  it('falls back to expanding the first family when none is recommended', async () => {
-    // A catalog with no balanced tier: the first family expands so the pane is
-    // never fully collapsed.
-    const fastOnly = option({
-      tier: 'fast',
-      family: 'Qwen',
-      display_name: 'Qwen3.5 9B',
-    });
-    await renderPane(() => {}, { get_starter_options: [fastOnly] });
-    expect(screen.getByText('Qwen3.5 9B')).toBeInTheDocument();
-  });
 });
diff --git a/src/settings/tabs/models/StaffPicksPane.tsx b/src/settings/tabs/models/StaffPicksPane.tsx
index 3106172d..1dbc5f72 100644
--- a/src/settings/tabs/models/StaffPicksPane.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.tsx
@@ -1,19 +1,20 @@
 /**
  * Staff-picks pane: the curated front door of Discover.
  *
- * Thuki hand-picks a short catalog of models, grouped by family. Each family is
- * a collapsible accordion section; the one holding the recommended pick is open
- * by default. A model row shows its friendly name, the one quant Thuki chose for
- * it, size, capability pills (Text always, plus Vision / Thinking), a RAM-fit
- * hint, and a single Download that runs the VERIFIED starter path
- * (`download_starter`, pinned revision + sha256), unlike the Browse-all pane's
- * arbitrary repo downloads. A finished install lifts a fresh config snapshot.
+ * A flat, alphabetically-ordered list of rich model cards. Thuki hand-picks a
+ * short catalog and shows each model directly (no family grouping, no
+ * recommended highlight): its friendly name, maker and a one-line blurb,
+ * capability pills (Text always, plus Vision / Thinking), the one quant Thuki
+ * chose with its size and license, a RAM-fit hint, and a single icon download
+ * that runs the VERIFIED starter path (`download_starter`, pinned revision +
+ * sha256), unlike the Browse-all pane's arbitrary repo downloads. A finished
+ * install lifts a fresh config snapshot.
  *
  * Data comes from {@link useStarterOptions} (the same rows onboarding's picker
  * uses); the download state machine is the shared {@link useDownloadModel}, so
  * the in-flight / failed UI is the same {@link DownloadProgress} card the rest
  * of the app shows. At most one model downloads at a time (the backend enforces
- * it too); `activeTier` tracks which row owns the progress card.
+ * it too); `activeTier` tracks which card owns the progress card.
  */
 
 import { useEffect, useMemo, useState } from 'react';
@@ -34,9 +35,6 @@ import type {
 
 const HF_BASE_URL = 'https://huggingface.co';
 
-/** The tier marked as the recommended pick (and whose family opens by default). */
-const RECOMMENDED_TIER: StarterTier = 'balanced';
-
 /** RAM-fit hint colour class on this pane's stylesheet (labels are shared). */
 const FIT_CLASS: Record<RamFit, string> = {
   fits: styles.fitOk,
@@ -44,14 +42,21 @@ const FIT_CLASS: Record<RamFit, string> = {
   too_big: styles.fitHeavy,
 };
 
-/** A plain-language line about what a family is good for, falling back to the
- * model maker when a family has no hand-written blurb. Presentational only. */
-const FAMILY_BLURB: Record<string, string> = {
+/** A plain-language line about what a model is good for, shown after the maker.
+ * Keyed by family so several sizes of one model share it; a model with no entry
+ * shows just its maker. Presentational only. */
+const MODEL_BLURB: Record<string, string> = {
   Qwen: 'Fast, capable all-rounder',
   Gemma: 'Well-rounded, reads images',
   'gpt-oss': 'Strongest reasoning',
 };
 
+const DOWNLOAD_ICON = (
+  <svg viewBox="0 0 24 24" aria-hidden="true">
+    <path d="M12 4v11M7 11l5 5 5-5M5 20h14" />
+  </svg>
+);
+
 /** Bytes rendered as decimal gigabytes with one decimal (e.g. "8.2"). */
 function gb(bytes: number): string {
   return (bytes / 1e9).toFixed(1);
@@ -62,38 +67,12 @@ function totalBytes(o: StarterOption): number {
   return o.starter.size_bytes + o.starter.mmproj_bytes;
 }
 
-/** One family group: its label and the curated models under it, registry order. */
-interface FamilyGroup {
-  family: string;
-  blurb: string;
-  options: StarterOption[];
+/** The maker line: the maker, plus a blurb when the family has one. */
+function makerLine(o: StarterOption): string {
+  const blurb = o.starter.family ? MODEL_BLURB[o.starter.family] : undefined;
+  return blurb ? `${o.starter.origin} · ${blurb}` : o.starter.origin;
 }
 
-/** Groups starter rows by family, preserving first-seen (registry) order. */
-function groupByFamily(options: StarterOption[]): FamilyGroup[] {
-  const groups: FamilyGroup[] = [];
-  for (const o of options) {
-    const family = o.starter.family ?? o.starter.display_name;
-    const existing = groups.find((g) => g.family === family);
-    if (existing) {
-      existing.options.push(o);
-    } else {
-      groups.push({
-        family,
-        blurb: FAMILY_BLURB[family] ?? o.starter.origin,
-        options: [o],
-      });
-    }
-  }
-  return groups;
-}
-
-const CHEVRON = (
-  <svg viewBox="0 0 10 10" aria-hidden="true" className={styles.chev}>
-    <path d="M3 2l4 3-4 3" />
-  </svg>
-);
-
 interface StaffPicksPaneProps {
   /** Lift a fresh config snapshot after a successful install. */
   onSaved: (next: RawAppConfig) => void;
@@ -101,25 +80,23 @@ interface StaffPicksPaneProps {
 
 export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
   const { options, refresh } = useStarterOptions();
-  const groups = useMemo(() => groupByFamily(options ?? []), [options]);
 
-  // The family holding the recommended tier opens by default; if the catalog
-  // has no recommended tier, the first family opens so the pane is never blank.
-  const defaultOpen = useMemo(() => {
-    const recommended = groups.find((g) =>
-      g.options.some((o) => o.starter.tier === RECOMMENDED_TIER),
-    );
-    const pick = recommended ?? groups[0];
-    return new Set(pick ? [pick.family] : []);
-  }, [groups]);
-
-  // `null` means the user has not toggled a family yet, so the recommended
-  // family (defaultOpen) shows open; the first toggle replaces it with the
-  // user's own set, which then sticks across refreshes. No seeding effect.
-  const [open, setOpen] = useState<Set<string> | null>(null);
-  const effectiveOpen = open ?? defaultOpen;
+  // Flat, case-insensitive alphabetical order by model name.
+  const ordered = useMemo(
+    () =>
+      [...(options ?? [])].sort((a, b) =>
+        a.starter.display_name.localeCompare(
+          b.starter.display_name,
+          undefined,
+          {
+            sensitivity: 'base',
+          },
+        ),
+      ),
+    [options],
+  );
 
-  // One download at a time; activeTier names the row that owns the progress card.
+  // One download at a time; activeTier names the card that owns the progress card.
   const [activeTier, setActiveTier] = useState<StarterTier | null>(null);
   const {
     state,
@@ -134,7 +111,7 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
   } = useDownloadModel();
 
   // A finished install (phase 'ready') lifts the fresh config, clears the
-  // active row, and refreshes the rows so the new model flips to Installed.
+  // active card, and refreshes the rows so the new model flips to Installed.
   // An effect (not a render-time call) so it fires exactly once per transition.
   useEffect(() => {
     if (state.phase !== 'ready') return;
@@ -150,18 +127,6 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
     })();
   }, [state.phase, onSaved, reset, refresh]);
 
-  function toggle(family: string) {
-    setOpen((cur) => {
-      const next = new Set(cur ?? defaultOpen);
-      if (next.has(family)) {
-        next.delete(family);
-      } else {
-        next.add(family);
-      }
-      return next;
-    });
-  }
-
   function startDownload(tier: StarterTier) {
     setActiveTier(tier);
     void start(tier);
@@ -182,7 +147,7 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
     setActiveTier(null);
   }
 
-  if (options !== null && groups.length === 0) {
+  if (options !== null && ordered.length === 0) {
     return (
       <div className={styles.pane}>
         <p className={styles.empty}>No curated models are available.</p>
@@ -193,64 +158,32 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
   return (
     <div className={styles.pane}>
       <p className={styles.hint}>
-        Hand-picked by Thuki, grouped by family. Open a family to choose a size.
+        Hand-picked by Thuki and tuned for Apple Silicon.
       </p>
       <div className={styles.list}>
-        {groups.map((group) => {
-          const expanded = effectiveOpen.has(group.family);
-          return (
-            <div className={styles.fam} key={group.family}>
-              <button
-                type="button"
-                className={styles.famHead}
-                aria-expanded={expanded}
-                onClick={() => toggle(group.family)}
-              >
-                <span className={styles.famText}>
-                  <span className={styles.famName}>{group.family}</span>
-                  <span className={styles.famSub}>
-                    {group.blurb} · {group.options.length}{' '}
-                    {group.options.length === 1 ? 'model' : 'models'}
-                  </span>
-                </span>
-                <span
-                  className={`${styles.chevWrap} ${expanded ? styles.chevOpen : ''}`}
-                >
-                  {CHEVRON}
-                </span>
-              </button>
-              {expanded ? (
-                <div className={styles.famBody}>
-                  {group.options.map((o) => (
-                    <ModelRow
-                      key={o.starter.tier}
-                      option={o}
-                      recommended={o.starter.tier === RECOMMENDED_TIER}
-                      active={activeTier === o.starter.tier}
-                      state={state}
-                      progress={progress}
-                      etaSeconds={etaSeconds}
-                      onDownload={startDownload}
-                      onResume={resumeDownload}
-                      onDiscard={discardPartial}
-                      onCancel={() => void cancel()}
-                      onRetry={() => void retry()}
-                      onChooseAnother={returnToPicker}
-                    />
-                  ))}
-                </div>
-              ) : null}
-            </div>
-          );
-        })}
+        {ordered.map((o) => (
+          <ModelCard
+            key={o.starter.tier}
+            option={o}
+            active={activeTier === o.starter.tier}
+            state={state}
+            progress={progress}
+            etaSeconds={etaSeconds}
+            onDownload={startDownload}
+            onResume={resumeDownload}
+            onDiscard={discardPartial}
+            onCancel={() => void cancel()}
+            onRetry={() => void retry()}
+            onChooseAnother={returnToPicker}
+          />
+        ))}
       </div>
     </div>
   );
 }
 
-interface ModelRowProps {
+interface ModelCardProps {
   option: StarterOption;
-  recommended: boolean;
   active: boolean;
   state: ReturnType<typeof useDownloadModel>['state'];
   progress: ReturnType<typeof useDownloadModel>['progress'];
@@ -263,9 +196,8 @@ interface ModelRowProps {
   onChooseAnother: () => void;
 }
 
-function ModelRow({
+function ModelCard({
   option,
-  recommended,
   active,
   state,
   progress,
@@ -276,20 +208,18 @@ function ModelRow({
   onCancel,
   onRetry,
   onChooseAnother,
-}: ModelRowProps) {
+}: ModelCardProps) {
   const { starter, fit, installed, partial_bytes } = option;
   const showProgress = active && state.phase !== 'idle';
 
   return (
-    <div className={styles.row} data-model-row data-tier={starter.tier}>
-      <div className={styles.rowMain}>
+    <div className={styles.card} data-model-card data-tier={starter.tier}>
+      <div className={styles.cardMain}>
         <div className={styles.mid}>
-          <div className={styles.name}>
+          <div className={styles.name} data-testid="staff-model-name">
             {starter.display_name}
-            {recommended ? (
-              <span className={styles.recommended}>Recommended</span>
-            ) : null}
           </div>
+          <div className={styles.maker}>{makerLine(option)}</div>
           <div className={styles.pills}>
             <span className={`${styles.pill} ${styles.pillText}`}>Text</span>
             {starter.vision ? (
@@ -326,9 +256,8 @@ function ModelRow({
                 {RAM_FIT_LABEL[fit]}
               </span>
             </Tooltip>
-            <RowAction
+            <CardAction
               option={option}
-              recommended={recommended}
               installed={installed}
               partialBytes={partial_bytes}
               onDownload={onDownload}
@@ -359,9 +288,8 @@ function ModelRow({
   );
 }
 
-interface RowActionProps {
+interface CardActionProps {
   option: StarterOption;
-  recommended: boolean;
   installed: boolean;
   partialBytes: number | null;
   onDownload: (tier: StarterTier) => void;
@@ -369,17 +297,16 @@ interface RowActionProps {
   onDiscard: (sha256: string) => void;
 }
 
-/** The per-row affordance: an installed marker, a resume/discard pair when an
- * interrupted partial exists, or the plain download button. */
-function RowAction({
+/** The per-card affordance: an installed marker, a resume/discard pair when an
+ * interrupted partial exists, or the icon download button. */
+function CardAction({
   option,
-  recommended,
   installed,
   partialBytes,
   onDownload,
   onResume,
   onDiscard,
-}: RowActionProps) {
+}: CardActionProps) {
   const { starter } = option;
 
   if (installed) {
@@ -410,10 +337,11 @@ function RowAction({
   return (
     <button
       type="button"
-      className={`${styles.getBtn} ${recommended ? styles.getPrimary : ''}`}
+      className={styles.getBtn}
+      aria-label="Download"
       onClick={() => onDownload(starter.tier)}
     >
-      Download
+      {DOWNLOAD_ICON}
     </button>
   );
 }

From e6507f5f33955ad5381730f9a8de7bc352fb4fe4 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 23:26:53 -0500
Subject: [PATCH 35/89] feat: group Staff picks into compact use-case sections,
 drop capability dots

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/models/registry.rs              |  28 ++-
 .../tabs/models/LibraryPane.module.css        |  27 +--
 src/settings/tabs/models/LibraryPane.tsx      |  14 +-
 .../tabs/models/StaffPicksPane.module.css     | 133 +++++------
 .../tabs/models/StaffPicksPane.test.tsx       | 145 ++++++------
 src/settings/tabs/models/StaffPicksPane.tsx   | 211 +++++++++---------
 src/styles/settings.module.css                |   2 -
 src/types/starter.ts                          |   9 +-
 8 files changed, 270 insertions(+), 299 deletions(-)

diff --git a/src-tauri/src/models/registry.rs b/src-tauri/src/models/registry.rs
index 69581d59..dd29453c 100644
--- a/src-tauri/src/models/registry.rs
+++ b/src-tauri/src/models/registry.rs
@@ -31,10 +31,14 @@ pub enum Tier {
 pub struct Starter {
     /// Which speed/quality tier this entry fills.
     pub tier: Tier,
-    /// Model family the Discover staff-picks accordion groups this entry under
-    /// (e.g. "Gemma", "Qwen", "gpt-oss"). Several starters can share a family
-    /// when the catalog offers more than one size of the same model.
+    /// Model family this entry belongs to (e.g. "Gemma", "Qwen", "gpt-oss").
+    /// Several starters can share a family when the catalog offers more than one
+    /// size of the same model.
     pub family: &'static str,
+    /// Use-case section the Discover staff-picks list groups this entry under
+    /// (e.g. "Everyday chat", "Compact & fast", "Deep reasoning"). Answers
+    /// "what is it for?" in plain words so a non-expert can pick by intent.
+    pub category: &'static str,
     /// Human-readable label shown in the picker (e.g. "Gemma 4 12B").
     pub display_name: &'static str,
     /// Hugging Face repo slug.
@@ -84,6 +88,7 @@ pub const STARTERS: &[Starter] = &[
     Starter {
         tier: Tier::Fast,
         family: "Qwen",
+        category: "Everyday chat",
         display_name: "Qwen3.5 9B",
         repo: "unsloth/Qwen3.5-9B-GGUF",
         revision: "3885219b6810b007914f3a7950a8d1b469d598a5",
@@ -105,6 +110,7 @@ pub const STARTERS: &[Starter] = &[
     Starter {
         tier: Tier::Balanced,
         family: "Gemma",
+        category: "Everyday chat",
         display_name: "Gemma 4 12B",
         repo: "google/gemma-4-12B-it-qat-q4_0-gguf",
         revision: "f6e7774e6148da3b7f201e42ba37cf084c1db35f",
@@ -126,6 +132,7 @@ pub const STARTERS: &[Starter] = &[
     Starter {
         tier: Tier::Smartest,
         family: "gpt-oss",
+        category: "Deep reasoning",
         display_name: "gpt-oss 20B",
         repo: "ggml-org/gpt-oss-20b-GGUF",
         revision: "e1dc459feff949ff451ce107337a2026daa80df8",
@@ -236,8 +243,7 @@ mod tests {
 
     #[test]
     fn family_per_tier() {
-        // The Discover staff-picks accordion groups starters by family, so
-        // every entry carries a non-empty family label.
+        // Each entry carries a non-empty family label.
         assert_eq!(starter(Tier::Fast).family, "Qwen");
         assert_eq!(starter(Tier::Balanced).family, "Gemma");
         assert_eq!(starter(Tier::Smartest).family, "gpt-oss");
@@ -246,6 +252,18 @@ mod tests {
         }
     }
 
+    #[test]
+    fn category_per_tier() {
+        // The Discover staff-picks list groups starters into use-case sections,
+        // so every entry carries a non-empty category label.
+        assert_eq!(starter(Tier::Fast).category, "Everyday chat");
+        assert_eq!(starter(Tier::Balanced).category, "Everyday chat");
+        assert_eq!(starter(Tier::Smartest).category, "Deep reasoning");
+        for s in STARTERS {
+            assert!(!s.category.is_empty(), "{}: category is empty", s.repo);
+        }
+    }
+
     #[test]
     fn vision_and_mmproj_per_tier() {
         // Fast (Qwen3.5) and Balanced (Gemma 4) are multimodal and each carries
diff --git a/src/settings/tabs/models/LibraryPane.module.css b/src/settings/tabs/models/LibraryPane.module.css
index f96d074b..5486de99 100644
--- a/src/settings/tabs/models/LibraryPane.module.css
+++ b/src/settings/tabs/models/LibraryPane.module.css
@@ -1,7 +1,7 @@
 /*
- * Styles for the Library pane of the Models surface. Tokens (--accent, --vis,
- * --rea, --ok, --tight, --heavy, --danger, --hair, --t1..--t3, --elev-*)
- * cascade from the Settings window, so they are referenced directly here.
+ * Styles for the Library pane of the Models surface. Tokens (--accent, --ok,
+ * --tight, --heavy, --danger, --hair, --t1..--t3, --elev-*) cascade from the
+ * Settings window, so they are referenced directly here.
  */
 
 .pane {
@@ -120,13 +120,11 @@
 }
 
 /* Capability pills sit in the name line, right after the model name. One calm,
- * cohesive chrome for all of them (neutral text + a faint shared background);
- * a small colour dot is the only accent, so Text / Vision / Thinking read as a
- * quiet family rather than a saturated rainbow. */
+ * cohesive chrome for all of them: neutral text on a faint shared background,
+ * no colour dot, so Text / Vision / Thinking read as a quiet family. */
 .pill {
   display: inline-flex;
   align-items: center;
-  gap: 5px;
   font-size: 10.5px;
   font-weight: 540;
   padding: 2px 8px;
@@ -134,21 +132,6 @@
   color: var(--t2);
   background: rgba(255, 255, 255, 0.05);
 }
-.pill::before {
-  content: '';
-  width: 5px;
-  height: 5px;
-  border-radius: 50%;
-}
-.pillText::before {
-  background: var(--t3);
-}
-.pillVision::before {
-  background: var(--vis);
-}
-.pillThinking::before {
-  background: var(--rea);
-}
 
 /* RAM-fit hint: a coloured dot + label reusing the onboarding fit palette. */
 .fit {
diff --git a/src/settings/tabs/models/LibraryPane.tsx b/src/settings/tabs/models/LibraryPane.tsx
index ab2fd267..9aa4dd98 100644
--- a/src/settings/tabs/models/LibraryPane.tsx
+++ b/src/settings/tabs/models/LibraryPane.tsx
@@ -204,20 +204,12 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
                   <div className={styles.mid}>
                     <div className={styles.name}>
                       {m.display_name}
-                      <span className={`${styles.pill} ${styles.pillText}`}>
-                        Text
-                      </span>
+                      <span className={styles.pill}>Text</span>
                       {caps?.vision ? (
-                        <span className={`${styles.pill} ${styles.pillVision}`}>
-                          Vision
-                        </span>
+                        <span className={styles.pill}>Vision</span>
                       ) : null}
                       {caps?.thinking ? (
-                        <span
-                          className={`${styles.pill} ${styles.pillThinking}`}
-                        >
-                          Thinking
-                        </span>
+                        <span className={styles.pill}>Thinking</span>
                       ) : null}
                     </div>
                     <div className={styles.org}>
diff --git a/src/settings/tabs/models/StaffPicksPane.module.css b/src/settings/tabs/models/StaffPicksPane.module.css
index 8759aa83..0490ff89 100644
--- a/src/settings/tabs/models/StaffPicksPane.module.css
+++ b/src/settings/tabs/models/StaffPicksPane.module.css
@@ -1,5 +1,5 @@
 /*
- * Styles for the Staff-picks pane: Discover's flat curated model cards. The
+ * Styles for the Staff-picks pane: Discover's compact use-case sections. The
  * premium tokens (--accent, --t1..--t3, --hair, --ok, --tight, --heavy, etc.)
  * cascade from the Settings window root, so they are referenced via var() here.
  */
@@ -13,7 +13,7 @@
 .hint {
   font-size: 11.5px;
   color: var(--t3);
-  margin: 0 2px 13px;
+  margin: 0 2px 14px;
   line-height: 1.5;
 }
 
@@ -24,25 +24,42 @@
   color: var(--t2);
 }
 
-.list {
-  display: flex;
-  flex-direction: column;
-  gap: 8px;
+/* ── Use-case section ─────────────────────────────────────────────────────── */
+
+.section {
+  margin-bottom: 16px;
+}
+.section:last-child {
+  margin-bottom: 0;
+}
+
+/* Quiet uppercase label, no icon. */
+.secLabel {
+  font-size: 10.5px;
+  font-weight: 640;
+  letter-spacing: 0.08em;
+  text-transform: uppercase;
+  color: var(--t3);
+  margin: 0 2px 8px;
 }
 
-/* ── Model card ───────────────────────────────────────────────────────────── */
+/* ── Compact model row ────────────────────────────────────────────────────── */
 
-.card {
+.row {
   border: 1px solid var(--hair-soft);
-  border-radius: var(--radius-card);
+  border-radius: 9px;
   background: var(--elev-1);
+  margin-bottom: 6px;
+}
+.row:last-child {
+  margin-bottom: 0;
 }
 
-.cardMain {
+.rowMain {
   display: flex;
-  align-items: flex-start;
-  gap: 14px;
-  padding: 13px 14px;
+  align-items: center;
+  gap: 12px;
+  padding: 9px 12px;
 }
 
 .mid {
@@ -50,79 +67,46 @@
   min-width: 0;
 }
 
-.name {
-  font-weight: 580;
-  font-size: 13.5px;
-  color: var(--t1);
+.top {
+  display: flex;
+  align-items: center;
+  gap: 8px;
+  flex-wrap: wrap;
 }
 
-.maker {
-  font-size: 11px;
-  color: var(--t3);
-  margin-top: 4px;
+.name {
+  font-weight: 560;
+  font-size: 12.5px;
+  color: var(--t1);
 }
 
-/* Calm capability pills: one cohesive chrome, a small colour dot the only
- * accent, matching the Library pane's vocabulary. */
+/* Calm capability pills: neutral text chips, no colour dot. */
 .pills {
-  display: flex;
-  gap: 6px;
-  flex-wrap: wrap;
-  margin-top: 9px;
+  display: inline-flex;
+  gap: 5px;
 }
 .pill {
   display: inline-flex;
   align-items: center;
-  gap: 5px;
-  font-size: 10.5px;
+  font-size: 9.5px;
   font-weight: 540;
-  padding: 2px 8px;
+  padding: 2px 7px;
   border-radius: var(--radius-pill);
   color: var(--t2);
   background: rgba(255, 255, 255, 0.05);
 }
-.pill::before {
-  content: '';
-  width: 5px;
-  height: 5px;
-  border-radius: 50%;
-}
-.pillText::before {
-  background: var(--t3);
-}
-.pillVision::before {
-  background: var(--vis);
-}
-.pillThinking::before {
-  background: var(--rea);
-}
 
-.meta {
-  font-size: 11px;
+.sub {
+  font-size: 10.5px;
   color: var(--t3);
-  margin-top: 9px;
+  margin-top: 3px;
   font-variant-numeric: tabular-nums;
 }
 
-.hfLink {
-  border: none;
-  background: transparent;
-  padding: 0;
-  font-family: inherit;
-  font-size: 11px;
-  color: var(--t3);
-  cursor: pointer;
-  transition: color 140ms ease;
-}
-.hfLink:hover {
-  color: var(--accent);
-}
-
 .right {
   display: flex;
-  flex-direction: column;
-  align-items: flex-end;
-  gap: 10px;
+  align-items: center;
+  gap: 12px;
   flex: none;
 }
 
@@ -152,13 +136,13 @@
   color: var(--heavy);
 }
 
-/* ── Card actions ─────────────────────────────────────────────────────────── */
+/* ── Row actions ──────────────────────────────────────────────────────────── */
 
 /* Icon-only download button: the accent download-arrow, matching the Browse-all
  * quant rows so "download" reads the same everywhere. */
 .getBtn {
-  width: 32px;
-  height: 32px;
+  width: 30px;
+  height: 30px;
   flex: none;
   display: grid;
   place-items: center;
@@ -172,8 +156,8 @@
     background 140ms ease;
 }
 .getBtn svg {
-  width: 16px;
-  height: 16px;
+  width: 15px;
+  height: 15px;
   fill: none;
   stroke: currentColor;
   stroke-width: 1.8;
@@ -188,16 +172,15 @@
 .installed {
   display: inline-flex;
   align-items: center;
-  font-size: 11.5px;
+  font-size: 11px;
   font-weight: 600;
   color: var(--ok);
 }
 
 .resumeWrap {
   display: flex;
-  flex-direction: column;
-  align-items: flex-end;
-  gap: 6px;
+  align-items: center;
+  gap: 8px;
 }
 .resumeBtn {
   font-size: 11.5px;
@@ -224,5 +207,5 @@
 }
 
 .progress {
-  padding: 0 14px 13px;
+  padding: 0 12px 12px;
 }
diff --git a/src/settings/tabs/models/StaffPicksPane.test.tsx b/src/settings/tabs/models/StaffPicksPane.test.tsx
index 5e346f2f..b6e6f6cb 100644
--- a/src/settings/tabs/models/StaffPicksPane.test.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.test.tsx
@@ -1,11 +1,12 @@
 /**
  * Unit tests for the Staff-picks pane: Discover's curated front door.
  *
- * A flat, alphabetically-ordered list of rich model cards (no family grouping,
- * no recommended highlight). Each card shows the model name, its maker and a
- * one-line blurb, capability pills (Text always, plus Vision / Thinking), the
- * one quant Thuki chose with its size and license, a RAM-fit hint, and a single
- * icon download that runs the VERIFIED starter path (`download_starter`, pinned
+ * Models are grouped into use-case sections (Everyday chat / Compact & fast /
+ * Deep reasoning), known sections first in a fixed order, then any extra
+ * category alphabetically; within a section models are alphabetical. Each
+ * compact row shows the model name, capability pills (Text always, plus Vision
+ * / Thinking), a `size · maker` sub-line, a RAM-fit hint, and a single icon
+ * download that runs the VERIFIED starter path (`download_starter`, pinned
  * revision + sha256). The download channel is captured the same way
  * BrowseAllPane.test.tsx does it.
  */
@@ -59,6 +60,7 @@ function starter(over: Partial<Starter>): Starter {
   return {
     tier: 'balanced',
     family: 'Gemma',
+    category: 'Everyday chat',
     display_name: 'Gemma 4 12B',
     repo: 'google/gemma-4-12B-it-qat-q4_0-gguf',
     revision: 'a'.repeat(40),
@@ -93,10 +95,11 @@ function option(
   };
 }
 
-/** Three models, mirroring the shipped registry (deliberately NOT alpha order). */
+/** Two everyday models + one reasoning model (deliberately NOT alpha order). */
 const QWEN = option({
   tier: 'fast',
   family: 'Qwen',
+  category: 'Everyday chat',
   display_name: 'Qwen3.5 9B',
   repo: 'unsloth/Qwen3.5-9B-GGUF',
   file_name: 'Qwen3.5-9B-Q4_K_M.gguf',
@@ -109,6 +112,7 @@ const GEMMA = option({});
 const GPT_OSS = option({
   tier: 'smartest',
   family: 'gpt-oss',
+  category: 'Deep reasoning',
   display_name: 'gpt-oss 20B',
   repo: 'ggml-org/gpt-oss-20b-GGUF',
   file_name: 'gpt-oss-20b-mxfp4.gguf',
@@ -159,90 +163,89 @@ async function renderPane(
   return view;
 }
 
-/** The card element wrapping a model name. */
-function cardFor(name: string): HTMLElement {
-  return screen.getByText(name).closest('[data-model-card]') as HTMLElement;
+/** The row element wrapping a model name. */
+function rowFor(name: string): HTMLElement {
+  return screen.getByText(name).closest('[data-model-row]') as HTMLElement;
 }
 
 describe('StaffPicksPane', () => {
-  it('renders every model as a flat card, all visible at once', async () => {
+  it('renders a section only for categories that have models', async () => {
     await renderPane();
-    expect(screen.getByText('Gemma 4 12B')).toBeInTheDocument();
-    expect(screen.getByText('Qwen3.5 9B')).toBeInTheDocument();
-    expect(screen.getByText('gpt-oss 20B')).toBeInTheDocument();
+    expect(screen.getByText('Everyday chat')).toBeInTheDocument();
+    expect(screen.getByText('Deep reasoning')).toBeInTheDocument();
+    // No model carries "Compact & fast", so that section never renders.
+    expect(screen.queryByText('Compact & fast')).not.toBeInTheDocument();
   });
 
-  it('orders the cards alphabetically by model name', async () => {
+  it('orders sections by the known order and models alphabetically within', async () => {
     await renderPane();
+    const sections = screen
+      .getAllByTestId('staff-section-label')
+      .map((el) => el.textContent);
+    expect(sections).toEqual(['Everyday chat', 'Deep reasoning']);
     const names = screen
       .getAllByTestId('staff-model-name')
       .map((el) => el.textContent);
-    expect(names).toEqual(['Gemma 4 12B', 'gpt-oss 20B', 'Qwen3.5 9B']);
+    // Everyday: Gemma before Qwen (alpha); then the reasoning section.
+    expect(names).toEqual(['Gemma 4 12B', 'Qwen3.5 9B', 'gpt-oss 20B']);
   });
 
-  it('shows no Recommended badge on any card', async () => {
+  it('shows no Recommended badge on any row', async () => {
     await renderPane();
     expect(screen.queryByText(/Recommended/)).not.toBeInTheDocument();
   });
 
-  it('shows the maker, blurb, pills, quant, size, license and fit on a card', async () => {
+  it('shows the name, pills, size and maker, and fit on a row', async () => {
     await renderPane();
-    const card = cardFor('Gemma 4 12B');
-    expect(
-      within(card).getByText(/Google · Well-rounded, reads images/),
-    ).toBeInTheDocument();
-    expect(within(card).getByText('Text')).toBeInTheDocument();
-    expect(within(card).getByText('Vision')).toBeInTheDocument();
-    expect(within(card).queryByText('Thinking')).not.toBeInTheDocument();
-    expect(within(card).getByText(/Q4_0/)).toBeInTheDocument();
-    expect(within(card).getByText(/7\.2 GB/)).toBeInTheDocument();
-    expect(within(card).getByText(/Apache 2\.0/)).toBeInTheDocument();
-    expect(within(card).getByText('Comfortable')).toBeInTheDocument();
+    const row = rowFor('Gemma 4 12B');
+    expect(within(row).getByText('Text')).toBeInTheDocument();
+    expect(within(row).getByText('Vision')).toBeInTheDocument();
+    expect(within(row).queryByText('Thinking')).not.toBeInTheDocument();
+    expect(within(row).getByText('7.2 GB · Google')).toBeInTheDocument();
+    expect(within(row).getByText('Comfortable')).toBeInTheDocument();
   });
 
   it('shows a Thinking pill on a thinking model and omits Vision on a text-only one', async () => {
     await renderPane();
-    const qwen = cardFor('Qwen3.5 9B');
+    const qwen = rowFor('Qwen3.5 9B');
     expect(within(qwen).getByText('Thinking')).toBeInTheDocument();
     expect(within(qwen).getByText('Vision')).toBeInTheDocument();
-    const oss = cardFor('gpt-oss 20B');
+    const oss = rowFor('gpt-oss 20B');
     expect(within(oss).getByText('Thinking')).toBeInTheDocument();
     expect(within(oss).queryByText('Vision')).not.toBeInTheDocument();
   });
 
-  it('falls back to the maker alone when a model has no blurb', async () => {
+  it('appends an unrecognized category after the known sections', async () => {
     await renderPane(() => {}, {
       get_starter_options: [
+        GEMMA,
         option({
-          family: 'Llama',
-          display_name: 'Llama 3.3 8B',
-          origin: 'Meta',
+          tier: 'fast',
+          category: 'Coding',
+          display_name: 'Qwen3 Coder 7B',
         }),
       ],
     });
-    const card = cardFor('Llama 3.3 8B');
-    // No blurb for the Llama family: the maker line is just the maker.
-    expect(within(card).getByText('Meta')).toBeInTheDocument();
+    const sections = screen
+      .getAllByTestId('staff-section-label')
+      .map((el) => el.textContent);
+    expect(sections).toEqual(['Everyday chat', 'Coding']);
   });
 
-  it('shows just the maker when a model carries no family at all', async () => {
+  it('buckets a model with no category under Other', async () => {
     await renderPane(() => {}, {
       get_starter_options: [
-        option({
-          family: undefined,
-          display_name: 'Mystery 7B',
-          origin: 'Acme',
-        }),
+        option({ category: undefined, display_name: 'Mystery 7B' }),
       ],
     });
-    const card = cardFor('Mystery 7B');
-    expect(within(card).getByText('Acme')).toBeInTheDocument();
+    expect(screen.getByText('Other')).toBeInTheDocument();
+    expect(screen.getByText('Mystery 7B')).toBeInTheDocument();
   });
 
   it('downloads a model through the verified starter path', async () => {
     await renderPane();
-    const card = cardFor('Gemma 4 12B');
-    fireEvent.click(within(card).getByRole('button', { name: 'Download' }));
+    const row = rowFor('Gemma 4 12B');
+    fireEvent.click(within(row).getByRole('button', { name: 'Download' }));
     await flush();
     expect(invokeMock).toHaveBeenCalledWith(
       'download_starter',
@@ -253,8 +256,8 @@ describe('StaffPicksPane', () => {
   it('lifts a fresh config and refreshes when a download completes', async () => {
     const onSaved = vi.fn();
     await renderPane(onSaved);
-    const card = cardFor('Gemma 4 12B');
-    fireEvent.click(within(card).getByRole('button', { name: 'Download' }));
+    const row = rowFor('Gemma 4 12B');
+    fireEvent.click(within(row).getByRole('button', { name: 'Download' }));
     await flush();
     expect(screen.getByText('Downloading model')).toBeInTheDocument();
     act(() => {
@@ -269,8 +272,8 @@ describe('StaffPicksPane', () => {
     await renderPane(onSaved, {
       get_config: new Reject(new Error('read failed')),
     });
-    const card = cardFor('Gemma 4 12B');
-    fireEvent.click(within(card).getByRole('button', { name: 'Download' }));
+    const row = rowFor('Gemma 4 12B');
+    fireEvent.click(within(row).getByRole('button', { name: 'Download' }));
     await flush();
     act(() => {
       lastChannel?.simulateMessage({ type: 'AllDone' });
@@ -281,8 +284,8 @@ describe('StaffPicksPane', () => {
 
   it('cancels an in-flight download', async () => {
     await renderPane();
-    const card = cardFor('Gemma 4 12B');
-    fireEvent.click(within(card).getByRole('button', { name: 'Download' }));
+    const row = rowFor('Gemma 4 12B');
+    fireEvent.click(within(row).getByRole('button', { name: 'Download' }));
     await flush();
     fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
     await flush();
@@ -291,8 +294,8 @@ describe('StaffPicksPane', () => {
 
   it('retries after a failed download', async () => {
     await renderPane();
-    const card = cardFor('Gemma 4 12B');
-    fireEvent.click(within(card).getByRole('button', { name: 'Download' }));
+    const row = rowFor('Gemma 4 12B');
+    fireEvent.click(within(row).getByRole('button', { name: 'Download' }));
     await flush();
     act(() => {
       lastChannel?.simulateMessage({
@@ -309,10 +312,10 @@ describe('StaffPicksPane', () => {
     expect(starts).toHaveLength(2);
   });
 
-  it('returns to the card from a terminal failure via Choose a different model', async () => {
+  it('returns to the row from a terminal failure via Choose a different model', async () => {
     await renderPane();
-    const card = cardFor('Gemma 4 12B');
-    fireEvent.click(within(card).getByRole('button', { name: 'Download' }));
+    const row = rowFor('Gemma 4 12B');
+    fireEvent.click(within(row).getByRole('button', { name: 'Download' }));
     await flush();
     act(() => {
       lastChannel?.simulateMessage({
@@ -323,9 +326,8 @@ describe('StaffPicksPane', () => {
     fireEvent.click(
       screen.getByRole('button', { name: 'Choose a different model' }),
     );
-    // The Gemma card is back to its download button, not stuck on the failure.
     expect(
-      within(cardFor('Gemma 4 12B')).getByRole('button', { name: 'Download' }),
+      within(rowFor('Gemma 4 12B')).getByRole('button', { name: 'Download' }),
     ).toBeInTheDocument();
   });
 
@@ -333,10 +335,10 @@ describe('StaffPicksPane', () => {
     await renderPane(() => {}, {
       get_starter_options: [{ ...GEMMA, installed: true }, QWEN, GPT_OSS],
     });
-    const card = cardFor('Gemma 4 12B');
-    expect(within(card).getByText('Installed')).toBeInTheDocument();
+    const row = rowFor('Gemma 4 12B');
+    expect(within(row).getByText('Installed')).toBeInTheDocument();
     expect(
-      within(card).queryByRole('button', { name: 'Download' }),
+      within(row).queryByRole('button', { name: 'Download' }),
     ).not.toBeInTheDocument();
   });
 
@@ -348,8 +350,8 @@ describe('StaffPicksPane', () => {
         GPT_OSS,
       ],
     });
-    const card = cardFor('Gemma 4 12B');
-    fireEvent.click(within(card).getByRole('button', { name: /Resume/ }));
+    const row = rowFor('Gemma 4 12B');
+    fireEvent.click(within(row).getByRole('button', { name: /Resume/ }));
     await flush();
     expect(invokeMock).toHaveBeenCalledWith(
       'download_starter',
@@ -365,23 +367,14 @@ describe('StaffPicksPane', () => {
         GPT_OSS,
       ],
     });
-    const card = cardFor('Gemma 4 12B');
-    fireEvent.click(within(card).getByRole('button', { name: 'Discard' }));
+    const row = rowFor('Gemma 4 12B');
+    fireEvent.click(within(row).getByRole('button', { name: 'Discard' }));
     await flush();
     expect(invokeMock).toHaveBeenCalledWith('discard_partial_download', {
       sha256: 'b'.repeat(64),
     });
   });
 
-  it('opens the model on Hugging Face from its provenance link', async () => {
-    await renderPane();
-    const card = cardFor('Gemma 4 12B');
-    fireEvent.click(within(card).getByRole('button', { name: /Hugging Face/ }));
-    expect(invokeMock).toHaveBeenCalledWith('open_url', {
-      url: 'https://huggingface.co/google/gemma-4-12B-it-qat-q4_0-gguf',
-    });
-  });
-
   it('shows an empty state when no starters are available', async () => {
     await renderPane(() => {}, { get_starter_options: [] });
     expect(screen.getByText(/No curated models/)).toBeInTheDocument();
diff --git a/src/settings/tabs/models/StaffPicksPane.tsx b/src/settings/tabs/models/StaffPicksPane.tsx
index 1dbc5f72..27e351b3 100644
--- a/src/settings/tabs/models/StaffPicksPane.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.tsx
@@ -1,20 +1,21 @@
 /**
  * Staff-picks pane: the curated front door of Discover.
  *
- * A flat, alphabetically-ordered list of rich model cards. Thuki hand-picks a
- * short catalog and shows each model directly (no family grouping, no
- * recommended highlight): its friendly name, maker and a one-line blurb,
- * capability pills (Text always, plus Vision / Thinking), the one quant Thuki
- * chose with its size and license, a RAM-fit hint, and a single icon download
- * that runs the VERIFIED starter path (`download_starter`, pinned revision +
- * sha256), unlike the Browse-all pane's arbitrary repo downloads. A finished
- * install lifts a fresh config snapshot.
+ * Thuki hand-picks a short catalog and groups it into use-case sections
+ * ("Everyday chat", "Compact & fast", "Deep reasoning", ...) so a non-expert
+ * can pick by intent. Known sections show first in a fixed order, then any
+ * extra category alphabetically; within a section models are alphabetical. Each
+ * compact row shows the model name, capability pills (Text always, plus Vision
+ * / Thinking), a `size · maker` sub-line, a RAM-fit hint, and a single icon
+ * download that runs the VERIFIED starter path (`download_starter`, pinned
+ * revision + sha256), unlike the Browse-all pane's arbitrary repo downloads. A
+ * finished install lifts a fresh config snapshot.
  *
  * Data comes from {@link useStarterOptions} (the same rows onboarding's picker
  * uses); the download state machine is the shared {@link useDownloadModel}, so
  * the in-flight / failed UI is the same {@link DownloadProgress} card the rest
  * of the app shows. At most one model downloads at a time (the backend enforces
- * it too); `activeTier` tracks which card owns the progress card.
+ * it too); `activeTier` tracks which row owns the progress card.
  */
 
 import { useEffect, useMemo, useState } from 'react';
@@ -33,8 +34,6 @@ import type {
   StarterTier,
 } from '../../../types/starter';
 
-const HF_BASE_URL = 'https://huggingface.co';
-
 /** RAM-fit hint colour class on this pane's stylesheet (labels are shared). */
 const FIT_CLASS: Record<RamFit, string> = {
   fits: styles.fitOk,
@@ -42,20 +41,12 @@ const FIT_CLASS: Record<RamFit, string> = {
   too_big: styles.fitHeavy,
 };
 
-/** A plain-language line about what a model is good for, shown after the maker.
- * Keyed by family so several sizes of one model share it; a model with no entry
- * shows just its maker. Presentational only. */
-const MODEL_BLURB: Record<string, string> = {
-  Qwen: 'Fast, capable all-rounder',
-  Gemma: 'Well-rounded, reads images',
-  'gpt-oss': 'Strongest reasoning',
-};
+/** The order use-case sections appear in. Categories outside this list follow
+ * it, alphabetically. */
+const CATEGORY_ORDER = ['Everyday chat', 'Compact & fast', 'Deep reasoning'];
 
-const DOWNLOAD_ICON = (
-  <svg viewBox="0 0 24 24" aria-hidden="true">
-    <path d="M12 4v11M7 11l5 5 5-5M5 20h14" />
-  </svg>
-);
+/** Bucket for a model that carries no category. */
+const UNCATEGORIZED = 'Other';
 
 /** Bytes rendered as decimal gigabytes with one decimal (e.g. "8.2"). */
 function gb(bytes: number): string {
@@ -67,10 +58,38 @@ function totalBytes(o: StarterOption): number {
   return o.starter.size_bytes + o.starter.mmproj_bytes;
 }
 
-/** The maker line: the maker, plus a blurb when the family has one. */
-function makerLine(o: StarterOption): string {
-  const blurb = o.starter.family ? MODEL_BLURB[o.starter.family] : undefined;
-  return blurb ? `${o.starter.origin} · ${blurb}` : o.starter.origin;
+/** One use-case section: its label and the models under it. */
+interface Section {
+  category: string;
+  options: StarterOption[];
+}
+
+/** Groups models into use-case sections: known categories first in their fixed
+ * order, then any extra category alphabetically; models within a section are
+ * alphabetical by name. */
+function groupByCategory(options: StarterOption[]): Section[] {
+  const buckets = new Map<string, StarterOption[]>();
+  for (const o of options) {
+    const category = o.starter.category ?? UNCATEGORIZED;
+    const list = buckets.get(category);
+    if (list) {
+      list.push(o);
+    } else {
+      buckets.set(category, [o]);
+    }
+  }
+  const known = CATEGORY_ORDER.filter((c) => buckets.has(c));
+  const extra = [...buckets.keys()]
+    .filter((c) => !CATEGORY_ORDER.includes(c))
+    .sort();
+  return [...known, ...extra].map((category) => ({
+    category,
+    options: (buckets.get(category) as StarterOption[]).sort((a, b) =>
+      a.starter.display_name.localeCompare(b.starter.display_name, undefined, {
+        sensitivity: 'base',
+      }),
+    ),
+  }));
 }
 
 interface StaffPicksPaneProps {
@@ -80,23 +99,9 @@ interface StaffPicksPaneProps {
 
 export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
   const { options, refresh } = useStarterOptions();
+  const sections = useMemo(() => groupByCategory(options ?? []), [options]);
 
-  // Flat, case-insensitive alphabetical order by model name.
-  const ordered = useMemo(
-    () =>
-      [...(options ?? [])].sort((a, b) =>
-        a.starter.display_name.localeCompare(
-          b.starter.display_name,
-          undefined,
-          {
-            sensitivity: 'base',
-          },
-        ),
-      ),
-    [options],
-  );
-
-  // One download at a time; activeTier names the card that owns the progress card.
+  // One download at a time; activeTier names the row that owns the progress card.
   const [activeTier, setActiveTier] = useState<StarterTier | null>(null);
   const {
     state,
@@ -111,7 +116,7 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
   } = useDownloadModel();
 
   // A finished install (phase 'ready') lifts the fresh config, clears the
-  // active card, and refreshes the rows so the new model flips to Installed.
+  // active row, and refreshes the rows so the new model flips to Installed.
   // An effect (not a render-time call) so it fires exactly once per transition.
   useEffect(() => {
     if (state.phase !== 'ready') return;
@@ -147,7 +152,7 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
     setActiveTier(null);
   }
 
-  if (options !== null && ordered.length === 0) {
+  if (options !== null && sections.length === 0) {
     return (
       <div className={styles.pane}>
         <p className={styles.empty}>No curated models are available.</p>
@@ -158,31 +163,36 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
   return (
     <div className={styles.pane}>
       <p className={styles.hint}>
-        Hand-picked by Thuki and tuned for Apple Silicon.
+        Pick by what you want to do. Thuki chose one build of each.
       </p>
-      <div className={styles.list}>
-        {ordered.map((o) => (
-          <ModelCard
-            key={o.starter.tier}
-            option={o}
-            active={activeTier === o.starter.tier}
-            state={state}
-            progress={progress}
-            etaSeconds={etaSeconds}
-            onDownload={startDownload}
-            onResume={resumeDownload}
-            onDiscard={discardPartial}
-            onCancel={() => void cancel()}
-            onRetry={() => void retry()}
-            onChooseAnother={returnToPicker}
-          />
-        ))}
-      </div>
+      {sections.map((section) => (
+        <div className={styles.section} key={section.category}>
+          <div className={styles.secLabel} data-testid="staff-section-label">
+            {section.category}
+          </div>
+          {section.options.map((o) => (
+            <ModelRow
+              key={o.starter.tier}
+              option={o}
+              active={activeTier === o.starter.tier}
+              state={state}
+              progress={progress}
+              etaSeconds={etaSeconds}
+              onDownload={startDownload}
+              onResume={resumeDownload}
+              onDiscard={discardPartial}
+              onCancel={() => void cancel()}
+              onRetry={() => void retry()}
+              onChooseAnother={returnToPicker}
+            />
+          ))}
+        </div>
+      ))}
     </div>
   );
 }
 
-interface ModelCardProps {
+interface ModelRowProps {
   option: StarterOption;
   active: boolean;
   state: ReturnType<typeof useDownloadModel>['state'];
@@ -196,7 +206,7 @@ interface ModelCardProps {
   onChooseAnother: () => void;
 }
 
-function ModelCard({
+function ModelRow({
   option,
   active,
   state,
@@ -208,45 +218,30 @@ function ModelCard({
   onCancel,
   onRetry,
   onChooseAnother,
-}: ModelCardProps) {
+}: ModelRowProps) {
   const { starter, fit, installed, partial_bytes } = option;
   const showProgress = active && state.phase !== 'idle';
 
   return (
-    <div className={styles.card} data-model-card data-tier={starter.tier}>
-      <div className={styles.cardMain}>
+    <div className={styles.row} data-model-row data-tier={starter.tier}>
+      <div className={styles.rowMain}>
         <div className={styles.mid}>
-          <div className={styles.name} data-testid="staff-model-name">
-            {starter.display_name}
-          </div>
-          <div className={styles.maker}>{makerLine(option)}</div>
-          <div className={styles.pills}>
-            <span className={`${styles.pill} ${styles.pillText}`}>Text</span>
-            {starter.vision ? (
-              <span className={`${styles.pill} ${styles.pillVision}`}>
-                Vision
-              </span>
-            ) : null}
-            {starter.thinking ? (
-              <span className={`${styles.pill} ${styles.pillThinking}`}>
-                Thinking
-              </span>
-            ) : null}
+          <div className={styles.top}>
+            <span className={styles.name} data-testid="staff-model-name">
+              {starter.display_name}
+            </span>
+            <span className={styles.pills}>
+              <span className={styles.pill}>Text</span>
+              {starter.vision ? (
+                <span className={styles.pill}>Vision</span>
+              ) : null}
+              {starter.thinking ? (
+                <span className={styles.pill}>Thinking</span>
+              ) : null}
+            </span>
           </div>
-          <div className={styles.meta}>
-            {starter.quant} · {gb(totalBytes(option))} GB ·{' '}
-            <button
-              type="button"
-              className={styles.hfLink}
-              onClick={() =>
-                void invoke('open_url', {
-                  url: `${HF_BASE_URL}/${starter.repo}`,
-                })
-              }
-              aria-label={`View ${starter.display_name} on Hugging Face`}
-            >
-              {starter.license_note} ↗
-            </button>
+          <div className={styles.sub}>
+            {gb(totalBytes(option))} GB · {starter.origin}
           </div>
         </div>
         {!showProgress ? (
@@ -256,7 +251,7 @@ function ModelCard({
                 {RAM_FIT_LABEL[fit]}
               </span>
             </Tooltip>
-            <CardAction
+            <RowAction
               option={option}
               installed={installed}
               partialBytes={partial_bytes}
@@ -288,7 +283,7 @@ function ModelCard({
   );
 }
 
-interface CardActionProps {
+interface RowActionProps {
   option: StarterOption;
   installed: boolean;
   partialBytes: number | null;
@@ -297,16 +292,22 @@ interface CardActionProps {
   onDiscard: (sha256: string) => void;
 }
 
-/** The per-card affordance: an installed marker, a resume/discard pair when an
+const DOWNLOAD_ICON = (
+  <svg viewBox="0 0 24 24" aria-hidden="true">
+    <path d="M12 4v11M7 11l5 5 5-5M5 20h14" />
+  </svg>
+);
+
+/** The per-row affordance: an installed marker, a resume/discard pair when an
  * interrupted partial exists, or the icon download button. */
-function CardAction({
+function RowAction({
   option,
   installed,
   partialBytes,
   onDownload,
   onResume,
   onDiscard,
-}: CardActionProps) {
+}: RowActionProps) {
   const { starter } = option;
 
   if (installed) {
diff --git a/src/styles/settings.module.css b/src/styles/settings.module.css
index 058e2dfd..a1f7f503 100644
--- a/src/styles/settings.module.css
+++ b/src/styles/settings.module.css
@@ -40,8 +40,6 @@
   --t3: rgba(236, 234, 231, 0.34);
   --accent: #ff8d5c;
   --accent-soft: rgba(255, 141, 92, 0.14);
-  --vis: #7fd1a6;
-  --rea: #b9a4f0;
   --ok: #79c08e;
   --tight: #e6b56b;
   /* RAM-fit "Heavy" (model larger than this Mac comfortably holds) and the
diff --git a/src/types/starter.ts b/src/types/starter.ts
index 57105e36..f589c226 100644
--- a/src/types/starter.ts
+++ b/src/types/starter.ts
@@ -19,10 +19,13 @@ export type RamFit = 'fits' | 'tight' | 'too_big';
 /** One curated starter model from the compile-time registry. */
 export interface Starter {
   tier: StarterTier;
-  /** Model family the Discover staff-picks accordion groups this entry under
-   * (e.g. "Gemma", "Qwen", "gpt-oss"). Backend always sends it; optional here
-   * for test-fixture ergonomics. */
+  /** Model family this entry belongs to (e.g. "Gemma", "Qwen", "gpt-oss").
+   * Backend always sends it; optional here for test-fixture ergonomics. */
   family?: string;
+  /** Use-case section the Discover staff-picks list groups this entry under
+   * (e.g. "Everyday chat", "Compact & fast", "Deep reasoning"). Backend always
+   * sends it; optional here for test-fixture ergonomics. */
+  category?: string;
   display_name: string;
   repo: string;
   revision: string;

From 35ca94215fa0adb4c3e7514d820877927df671b3 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 23:29:10 -0500
Subject: [PATCH 36/89] test: match the Discover host probe to the new Staff
 picks hint

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/settings/tabs/models/DiscoverPane.test.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/settings/tabs/models/DiscoverPane.test.tsx b/src/settings/tabs/models/DiscoverPane.test.tsx
index 66888e21..b6781efc 100644
--- a/src/settings/tabs/models/DiscoverPane.test.tsx
+++ b/src/settings/tabs/models/DiscoverPane.test.tsx
@@ -59,7 +59,7 @@ function renderHost() {
 
 /** Staff picks is showing when its curated hint is on screen. */
 function staffPicksVisible(): boolean {
-  return screen.queryByText(/Hand-picked by Thuki/) !== null;
+  return screen.queryByText(/Pick by what you want to do/) !== null;
 }
 
 /** Browse all is showing when its Hugging Face search box is on screen. */

From 892601834702a1d6447ee1b74c111352f1089293 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 23:33:45 -0500
Subject: [PATCH 37/89] fix: hover-activate the Settings and update panels
 after defocus

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/lib.rs | 94 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)

diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs
index d63d1e6d..4c5b95ad 100644
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -135,13 +135,55 @@ use _thuki_panel::ThukiPanel;
 #[cfg(target_os = "macos")]
 mod _settings_panel {
     use tauri::Manager;
+    use tauri_nspanel::TrackingAreaOptions;
     tauri_nspanel::tauri_panel! {
         panel!(ThukiSettingsPanel {
             config: {
                 can_become_key_window: true,
                 is_floating_panel: true
             }
+            with: {
+                // Same hover-activate rationale as ThukiPanel. Settings is a
+                // nonactivating panel with hides_on_deactivate(false), so once
+                // it is defocused (the user clicks another app) a plain click
+                // can never regain key on modern macOS and the webview drops
+                // clicks, drag, and hover - the form inputs go dead. An
+                // `active_always` tracking area keeps mouse events flowing while
+                // the app is inactive, and the mouse-entered callback (wired in
+                // `init_settings_panel`) makes the panel key on cursor-enter so
+                // the inputs come back without activating the app.
+                tracking_area: {
+                    options: TrackingAreaOptions::new()
+                        .active_always()
+                        .mouse_entered_and_exited()
+                        .mouse_moved()
+                        .cursor_update(),
+                    auto_resize: true
+                }
+            }
         })
+        panel_event!(ThukiSettingsEventsInner {})
+    }
+
+    /// Constructs the mouse-event handler and attaches it to the Settings panel.
+    ///
+    /// Mirrors `attach_overlay_event_handler` for ThukiPanel: the mouse-entered
+    /// callback makes the Settings overlay the key window the instant the cursor
+    /// enters it, restoring clicks/drag/typing after the panel has been
+    /// defocused (see the tracking-area comment on the panel).
+    pub fn attach_settings_event_handler(app_handle: tauri::AppHandle) {
+        use tauri_nspanel::ManagerExt;
+        let Ok(panel) = app_handle.get_webview_panel("settings") else {
+            return;
+        };
+        let cb_handle = app_handle.clone();
+        let events = ThukiSettingsEventsInner::new();
+        events.on_mouse_entered(move |_event| {
+            if let Ok(p) = cb_handle.get_webview_panel("settings") {
+                p.make_key_window();
+            }
+        });
+        panel.set_event_handler(Some(events.as_ref()));
     }
 }
 #[cfg(target_os = "macos")]
@@ -157,13 +199,55 @@ use _settings_panel::ThukiSettingsPanel;
 #[cfg(target_os = "macos")]
 mod _update_panel {
     use tauri::Manager;
+    use tauri_nspanel::TrackingAreaOptions;
     tauri_nspanel::tauri_panel! {
         panel!(ThukiUpdatePanel {
             config: {
                 can_become_key_window: true,
                 is_floating_panel: true
             }
+            with: {
+                // Same hover-activate rationale as ThukiPanel. The update panel
+                // is nonactivating with hides_on_deactivate(false), so after it
+                // is defocused a plain click can never regain key on modern
+                // macOS and the webview drops clicks, drag, and hover - the four
+                // action buttons go dead. An `active_always` tracking area keeps
+                // mouse events flowing while the app is inactive, and the
+                // mouse-entered callback (wired in `init_update_panel`) makes the
+                // panel key on cursor-enter so the buttons come back without
+                // activating the app.
+                tracking_area: {
+                    options: TrackingAreaOptions::new()
+                        .active_always()
+                        .mouse_entered_and_exited()
+                        .mouse_moved()
+                        .cursor_update(),
+                    auto_resize: true
+                }
+            }
         })
+        panel_event!(ThukiUpdateEventsInner {})
+    }
+
+    /// Constructs the mouse-event handler and attaches it to the update panel.
+    ///
+    /// Mirrors `attach_overlay_event_handler` for ThukiPanel: the mouse-entered
+    /// callback makes the update overlay the key window the instant the cursor
+    /// enters it, restoring clicks after the panel has been defocused (see the
+    /// tracking-area comment on the panel).
+    pub fn attach_update_event_handler(app_handle: tauri::AppHandle) {
+        use tauri_nspanel::ManagerExt;
+        let Ok(panel) = app_handle.get_webview_panel("update") else {
+            return;
+        };
+        let cb_handle = app_handle.clone();
+        let events = ThukiUpdateEventsInner::new();
+        events.on_mouse_entered(move |_event| {
+            if let Ok(p) = cb_handle.get_webview_panel("update") {
+                p.make_key_window();
+            }
+        });
+        panel.set_event_handler(Some(events.as_ref()));
     }
 }
 #[cfg(target_os = "macos")]
@@ -1470,6 +1554,11 @@ fn init_settings_panel(app_handle: &tauri::AppHandle) {
                     .can_join_all_spaces()
                     .into(),
             );
+            // Hover-activate: take key focus the moment the cursor enters the
+            // Settings overlay, mirroring init_panel. Pairs with the
+            // `active_always` tracking area on ThukiSettingsPanel so a defocused
+            // nonactivating panel regains key without activating the app.
+            _settings_panel::attach_settings_event_handler(app_handle.clone());
         }
         Err(e) => {
             eprintln!("thuki: [settings] NSPanel conversion failed: {e:?}");
@@ -1515,6 +1604,11 @@ fn init_update_panel(app_handle: &tauri::AppHandle) {
                     .can_join_all_spaces()
                     .into(),
             );
+            // Hover-activate: take key focus the moment the cursor enters the
+            // update overlay, mirroring init_panel. Pairs with the
+            // `active_always` tracking area on ThukiUpdatePanel so a defocused
+            // nonactivating panel regains key without activating the app.
+            _update_panel::attach_update_event_handler(app_handle.clone());
         }
         Err(e) => {
             eprintln!("thuki: [update] NSPanel conversion failed: {e:?}");

From f5c33b370b65a297d32d04de77915b4658345503 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 23:30:00 -0500
Subject: [PATCH 38/89] feat: rework the Generation settings rows and provider
 hero

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 docs/configurations.md                        |   2 +-
 src/settings/configHelpers.ts                 |   2 +-
 .../tabs/models/ProvidersPane.test.tsx        | 163 ++++++++++++++--
 src/settings/tabs/models/ProvidersPane.tsx    | 180 +++++++++++++-----
 src/styles/settings.module.css                | 129 +++++++++----
 5 files changed, 373 insertions(+), 103 deletions(-)

diff --git a/docs/configurations.md b/docs/configurations.md
index 091fb1d3..ec6d8577 100644
--- a/docs/configurations.md
+++ b/docs/configurations.md
@@ -151,7 +151,7 @@ Upgrading from an older version is automatic: a pre-providers config with a flat
 | Constant          | Default    | Tunable? | Bounds              | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
 | :---------------- | :--------- | :------- | :------------------ | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `active_provider` | `"builtin"` | Yes      | id of a provider    | Which provider receives inference. Must match the `id` of one of the `[[inference.providers]]` entries; an empty or dangling value resets to `builtin`. Exception: a config that predates the providers list is pinned to `ollama` on load, because no working built-in provider existed when that file was written.                                                                                                                                                                                                                                                                                              |
-| `num_ctx`         | `16384`    | Yes      | `[2048, 1048576]`   | Context window size in tokens sent to the active provider with every request. For the built-in engine, the value becomes `--ctx-size` when the `llama-server` process starts, so changing it restarts the engine. For Ollama, warmup and chat share this value so the same runner instance and its cached KV prefix for the system prompt are reused: they must match or Ollama creates a second runner and the warmup saves nothing. Ollama silently clamps this to the model's physical maximum. For OpenAI-compatible providers the value is informational only; the server controls the actual context. Raise to fit longer conversations: each doubling roughly doubles VRAM for the KV cache; lower to reclaim GPU memory. See [Tuning the Context Window](./tuning-context-window.md). |
+| `num_ctx`         | `16384`    | Yes      | `[2048, 1048576]`   | Context window size in tokens sent to the active provider with every request. For the built-in engine, the value becomes `--ctx-size` when the `llama-server` process starts, so changing it restarts the engine. For Ollama, warmup and chat share this value so the same runner instance and its cached KV prefix for the system prompt are reused: they must match or Ollama creates a second runner and the warmup saves nothing. Ollama silently clamps this to the model's physical maximum. For OpenAI-compatible providers the value is informational only; the server controls the actual context. Raise to fit longer conversations: the KV cache grows roughly linearly with the context size (the model weights stay the same), so each doubling roughly doubles its memory footprint; benchmark on your hardware before pushing it high, and lower to reclaim memory. See [Tuning the Context Window](./tuning-context-window.md). |
 | `keep_warm_inactivity_minutes` | `0` | Yes | `-1` or `[0, 1440]` | Minutes of inactivity before Thuki releases the active model from memory. Governs both local providers: the built-in engine stops its sidecar to free RAM, and Ollama is told to release the model from VRAM. Not applicable to a remote OpenAI-compatible server, whose residency Thuki does not manage. `0` uses the provider's natural short default (about 5 minutes): Ollama defers to its own timer, the built-in engine applies its own ~5-minute timer (`DEFAULT_BUILTIN_IDLE_MINUTES`). `-1` keeps the model resident forever. Raise for longer sessions between uses; lower to reclaim memory sooner. |
 
 Each `[[inference.providers]]` block has these fields:
diff --git a/src/settings/configHelpers.ts b/src/settings/configHelpers.ts
index 048b0972..cc0b2777 100644
--- a/src/settings/configHelpers.ts
+++ b/src/settings/configHelpers.ts
@@ -27,7 +27,7 @@ const HELPERS = {
     openai_vision:
       'Whether the selected model accepts image inputs. OpenAI-compatible servers expose no capability probe, so you declare it yourself. Turn it on only if the model truly supports images; otherwise requests with attachments will fail.',
     num_ctx:
-      "The size of the context window in tokens, applied to whichever provider is active. For the built-in engine the value becomes --ctx-size when llama-server starts, so changing it restarts the engine (a few seconds). For Ollama it is sent with every request, shared between warmup and chat so the same runner and its cached system-prompt prefix are reused, and silently capped at the model's trained maximum. For OpenAI-compatible servers it is informational only; the server controls the actual context. Raise to fit longer conversations without the model forgetting early messages; lower to reduce memory use. Valid range: 2048–1048576. The default (16384) comfortably fits the system prompt plus several long turns.",
+      'How much conversation the model keeps in working memory, in tokens. Larger fits longer chats, but the KV cache uses more memory as it grows, so benchmark before pushing it high. For the built-in engine, changing this restarts the engine. Range: 2048–1048576.',
   },
   prompt: {
     system:
diff --git a/src/settings/tabs/models/ProvidersPane.test.tsx b/src/settings/tabs/models/ProvidersPane.test.tsx
index 35a682d2..ef960071 100644
--- a/src/settings/tabs/models/ProvidersPane.test.tsx
+++ b/src/settings/tabs/models/ProvidersPane.test.tsx
@@ -14,6 +14,7 @@ import {
 } from '../../../testUtils/mocks/tauri';
 
 import { ProvidersPane } from './ProvidersPane';
+import styles from '../../../styles/settings.module.css';
 import type { RawAppConfig, RawProvider } from '../../types';
 
 const invokeMock = invoke as unknown as ReturnType<typeof vi.fn>;
@@ -153,7 +154,6 @@ describe('ProvidersPane active hero', () => {
     expect(screen.getByText('Active provider')).toBeInTheDocument();
     expect(screen.getByText('Ollama')).toBeInTheDocument();
     expect(screen.getByText('http://127.0.0.1:11434')).toBeInTheDocument();
-    expect(screen.getByText('Active')).toBeInTheDocument();
   });
 
   it('falls back to Ollama labelling when the active id matches no provider', () => {
@@ -485,7 +485,133 @@ describe('ProvidersPane generation', () => {
     });
     fireEvent.change(slider, { target: { value: '800' } });
     fireEvent.mouseUp(slider);
-    expect(screen.getByText(/tokens ·/)).toBeInTheDocument();
+    expect(screen.getByText('tokens')).toBeInTheDocument();
+  });
+
+  it('shows the token value in an editable field with no turns line', () => {
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA], { num_ctx: 32768 }));
+    expect(
+      screen.getByRole('spinbutton', { name: 'Context window size in tokens' }),
+    ).toHaveValue(32768);
+    expect(screen.getByText('tokens')).toBeInTheDocument();
+    expect(screen.queryByText(/turns/)).toBeNull();
+  });
+
+  it('commits a typed token value and moves the slider', () => {
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+    const input = screen.getByRole('spinbutton', {
+      name: 'Context window size in tokens',
+    });
+    fireEvent.focus(input);
+    fireEvent.change(input, { target: { value: '65536' } });
+    fireEvent.blur(input);
+    expect(input).toHaveValue(65536);
+    expect(
+      screen.getByRole('slider', { name: 'Context window tokens' }),
+    ).toHaveAttribute('aria-valuenow', '65536');
+  });
+
+  it('clamps a typed token value above the maximum', () => {
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+    const input = screen.getByRole('spinbutton', {
+      name: 'Context window size in tokens',
+    });
+    fireEvent.change(input, { target: { value: '9999999' } });
+    fireEvent.blur(input);
+    expect(input).toHaveValue(1048576);
+  });
+
+  it('reverts a non-numeric token entry to the current value', () => {
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA], { num_ctx: 32768 }));
+    const input = screen.getByRole('spinbutton', {
+      name: 'Context window size in tokens',
+    });
+    fireEvent.change(input, { target: { value: '' } });
+    fireEvent.blur(input);
+    expect(input).toHaveValue(32768);
+  });
+
+  it('commits the token field on Enter and ignores other keys', () => {
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+    const input = screen.getByRole('spinbutton', {
+      name: 'Context window size in tokens',
+    });
+    fireEvent.change(input, { target: { value: '8192' } });
+    // A non-Enter key does not blur/commit; Enter does.
+    fireEvent.keyDown(input, { key: 'a' });
+    fireEvent.keyDown(input, { key: 'Enter' });
+    expect(input).toHaveValue(8192);
+  });
+
+  it('keeps the focused token field unchanged across a resync', () => {
+    const { rerender } = renderPane(
+      makeConfig('builtin', [BUILTIN, OLLAMA], { num_ctx: 16384 }),
+    );
+    const input = screen.getByRole('spinbutton', {
+      name: 'Context window size in tokens',
+    });
+    fireEvent.focus(input);
+    fireEvent.change(input, { target: { value: '65536' } });
+    rerender(
+      <ProvidersPane
+        config={makeConfig('builtin', [BUILTIN, OLLAMA], { num_ctx: 32768 })}
+        resyncToken={5}
+        onSaved={() => {}}
+        onAddModel={() => {}}
+      />,
+    );
+    // Focused: the resync must not clobber the in-progress entry.
+    expect(input).toHaveValue(65536);
+  });
+
+  it('opens the tuning guide from the Learn link', () => {
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+    fireEvent.click(
+      screen.getByRole('button', {
+        name: /Learn how to tune Context Window/,
+      }),
+    );
+    expect(invokeMock).toHaveBeenCalledWith('open_url', {
+      url: 'https://github.com/quiet-node/thuki/blob/main/docs/tuning-context-window.md#the-5-minute-benchmark-recipe',
+    });
+  });
+
+  it('spaces the doubling milestones evenly across the track', () => {
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+    // Each milestone doubles the last, so on the log track they sit at equal
+    // ~11.1% gaps and the thumb lands on the milestone it reads.
+    const leftOf = (label: string) =>
+      (screen.getByText(label) as HTMLElement).style.left;
+    expect(leftOf('2K')).toBe('0%');
+    expect(leftOf('4K')).toBe('11.1%');
+    expect(leftOf('8K')).toBe('22.2%');
+    expect(leftOf('16K')).toBe('33.3%');
+    expect(leftOf('32K')).toBe('44.4%');
+    expect(leftOf('64K')).toBe('55.6%');
+    expect(leftOf('128K')).toBe('66.7%');
+    expect(leftOf('256K')).toBe('77.8%');
+    expect(leftOf('512K')).toBe('88.9%');
+    expect(leftOf('1M')).toBe('100%');
+  });
+
+  it('explains the context window through a tooltip, not a subtitle', () => {
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+    expect(
+      screen.getByRole('button', { name: 'About Context window' }),
+    ).toBeInTheDocument();
+    expect(
+      screen.queryByText('How much conversation the model remembers'),
+    ).toBeNull();
+  });
+
+  it('drops the system prompt subtitle in favour of its tooltip', () => {
+    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
+    expect(
+      screen.getByRole('button', { name: 'About System prompt' }),
+    ).toBeInTheDocument();
+    expect(
+      screen.queryByText('Persona sent at the start of every chat'),
+    ).toBeNull();
   });
 
   it('commits a context-window change via touch and keyboard', () => {
@@ -561,6 +687,23 @@ describe('ProvidersPane generation', () => {
     expect(screen.getByText('No model loaded')).toBeInTheDocument();
   });
 
+  it('greens the status dot when a model is resident', async () => {
+    mockInvoke({ get_engine_status: engineStatus('loaded') });
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+    await waitFor(() =>
+      expect(screen.getByText('Engine: loaded')).toBeInTheDocument(),
+    );
+    const dot = screen.getByText('Engine: loaded').querySelector('span');
+    expect(dot).toHaveClass(styles.genStatusDotLive);
+  });
+
+  it('dims the status dot when the engine is stopped', () => {
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+    const dot = screen.getByText('Engine: stopped').querySelector('span');
+    expect(dot).toHaveClass(styles.genStatusDot);
+    expect(dot).not.toHaveClass(styles.genStatusDotLive);
+  });
+
   it('reflects warmup load + evict events', async () => {
     renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
     // Let the mount-time get_loaded_model settle so the event is not clobbered.
@@ -665,17 +808,13 @@ describe('ProvidersPane robustness', () => {
     expect(url).toHaveValue('http://127.0.0.1:11434');
   });
 
-  it('pluralises the installed count in the footnote', () => {
-    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
-    expect(screen.getByText(/0 installed models/)).toBeInTheDocument();
-  });
-
-  it('singularises one installed model', async () => {
+  it('does not render an installed-count footnote', async () => {
     mockInvoke({ list_installed_models: INSTALLED });
-    renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
-    await waitFor(() =>
-      expect(screen.getByText(/1 installed model/)).toBeInTheDocument(),
+    renderPane(
+      makeConfig('builtin', [{ ...BUILTIN, model: INSTALLED[0].id }, OLLAMA]),
     );
+    await screen.findByRole('combobox', { name: 'Built-in model' });
+    expect(screen.queryByText(/installed model/)).toBeNull();
   });
 
   it('reflects the engine:status event stream for the built-in engine', async () => {
@@ -718,7 +857,7 @@ describe('ProvidersPane robustness', () => {
 
   it('tolerates a config with no built-in provider', () => {
     renderPane(makeConfig('ollama', [OLLAMA]));
-    expect(screen.getByText(/0 installed models/)).toBeInTheDocument();
+    expect(screen.getByText('Active provider')).toBeInTheDocument();
   });
 
   it('renders no openai card in the hero when the dev flag is off', () => {
diff --git a/src/settings/tabs/models/ProvidersPane.tsx b/src/settings/tabs/models/ProvidersPane.tsx
index 2bdf0bf6..9988d962 100644
--- a/src/settings/tabs/models/ProvidersPane.tsx
+++ b/src/settings/tabs/models/ProvidersPane.tsx
@@ -41,7 +41,6 @@ interface ProvidersPaneProps {
 
 const PROMPT_MAX_CHARS = 32000;
 const PROMPT_TEXTAREA_ROWS = 12;
-const TOKENS_PER_TURN_ESTIMATE = 400;
 
 const KEEP_WARM_TOOLTIP =
   'Keep Warm holds your active model resident in memory after each use, ' +
@@ -50,10 +49,25 @@ const KEEP_WARM_TOOLTIP =
   'Unload now releases it immediately. ' +
   'If set to 0, each provider uses its natural short default (about 5 minutes).';
 
-// Log-scale context window slider: slider pos [0..1000] maps to a token count.
+// Context window slider: slider pos [0..1000] maps logarithmically to a token
+// count between CTX_MIN and CTX_MAX. The milestones double each step (2K, 4K,
+// ... 1M), so on the log track they land at equal intervals and the thumb
+// always sits on the milestone it reads.
 const CTX_MIN = 2048;
 const CTX_MAX = 1_048_576;
 const CTX_LOG_RATIO = Math.log(CTX_MAX / CTX_MIN);
+const CTX_TICKS: { label: string; value: number }[] = [
+  { label: '2K', value: 2048 },
+  { label: '4K', value: 4096 },
+  { label: '8K', value: 8192 },
+  { label: '16K', value: 16384 },
+  { label: '32K', value: 32768 },
+  { label: '64K', value: 65536 },
+  { label: '128K', value: 131072 },
+  { label: '256K', value: 262144 },
+  { label: '512K', value: 524288 },
+  { label: '1M', value: 1048576 },
+];
 
 function ctxToPos(v: number): number {
   return Math.round((1000 * Math.log(v / CTX_MIN)) / CTX_LOG_RATIO);
@@ -64,7 +78,10 @@ function posToCtx(pos: number): number {
     1024
   );
 }
-const CTX_TICKS = ['2K', '8K', '32K', '128K', '512K', '1M'];
+
+// Deep link to the 5-minute benchmark recipe, opened via the open_url command.
+const CTX_TUNING_URL =
+  'https://github.com/quiet-node/thuki/blob/main/docs/tuning-context-window.md#the-5-minute-benchmark-recipe';
 
 /** One-line description shown under a provider's name. */
 function providerSubtitle(p: RawProvider): string {
@@ -150,6 +167,7 @@ export function ProvidersPane({
   );
   const [ctxChip, setCtxChip] = useState(String(config.inference.num_ctx));
   const ctxDraggingRef = useRef(false);
+  const ctxInputFocusedRef = useRef(false);
   const { resetTo: resetNumCtx } = useDebouncedSave(
     'inference',
     'num_ctx',
@@ -202,11 +220,13 @@ export function ProvidersPane({
       setRawMin(String(config.inference.keep_warm_inactivity_minutes));
       resetMin(config.inference.keep_warm_inactivity_minutes);
     }
-    const nextCtx = config.inference.num_ctx;
-    setNumCtx(nextCtx);
-    setCtxPos(ctxToPos(nextCtx));
-    setCtxChip(String(nextCtx));
-    resetNumCtx(nextCtx);
+    if (!ctxInputFocusedRef.current) {
+      const nextCtx = config.inference.num_ctx;
+      setNumCtx(nextCtx);
+      setCtxPos(ctxToPos(nextCtx));
+      setCtxChip(String(nextCtx));
+      resetNumCtx(nextCtx);
+    }
     setPromptValue(config.prompt.system);
     resetPrompt(config.prompt.system);
     if (!ollamaUrlFocusedRef.current) setOllamaUrl(ollamaBaseUrl);
@@ -218,6 +238,18 @@ export function ProvidersPane({
     setCtxChip(String(v));
   }
 
+  // The token field accepts a typed value: commit it clamped to the valid
+  // range on blur/Enter, or revert to the current value when it is not a number.
+  function commitCtxInput() {
+    ctxInputFocusedRef.current = false;
+    const n = parseInt(ctxChip, 10);
+    if (Number.isNaN(n)) {
+      setCtxChip(String(numCtx));
+    } else {
+      commitCtx(Math.max(CTX_MIN, Math.min(CTX_MAX, n)));
+    }
+  }
+
   function commitOllamaUrl() {
     const next = ollamaUrl.trim();
     if (next === ollamaBaseUrl) return;
@@ -257,9 +289,19 @@ export function ProvidersPane({
     void invoke('evict_model').catch(() => {});
   }
 
-  const ctxTurns = Math.round(numCtx / TOKENS_PER_TURN_ESTIMATE);
   const fillPct = `${ctxPos / 10}%`;
 
+  // Keep-warm live status: the text shown beside the name, plus whether a model
+  // is actually resident (drives the status dot color: green when warm).
+  const engineWarm =
+    activeKind === 'builtin' ? engineState === 'loaded' : loadedModel !== null;
+  const warmStatusText =
+    activeKind === 'builtin'
+      ? `Engine: ${engineState}`
+      : loadedModel !== null
+        ? `${loadedModel} in VRAM`
+        : 'No model loaded';
+
   // The active Ollama model value, constrained to the installed list.
   const ollamaModelValue =
     activeModel && availableModels.includes(activeModel)
@@ -295,10 +337,6 @@ export function ProvidersPane({
                 : 'Local or remote Ollama'}
             </div>
           </div>
-          <span className={styles.heroActive}>
-            <span className={styles.heroLiveDot} aria-hidden />
-            Active
-          </span>
         </div>
 
         {activeKind === 'builtin' ? (
@@ -437,15 +475,68 @@ export function ProvidersPane({
         </span>
       </div>
       <div className={styles.listcard}>
-        {/* Context window */}
-        <div className={styles.genRow}>
-          <div className={styles.genLabel}>
-            <div className={styles.genName}>Context window</div>
-            <div className={styles.genHelp}>
-              How much conversation the model remembers
+        {/* Context window: the header carries the label, an info tooltip, a
+            deep link to the tuning guide, and an editable token field; the
+            slider spans the full card width below. */}
+        <div className={`${styles.genRow} ${styles.genRowCtx}`}>
+          <div className={styles.genCtxHead}>
+            <div className={styles.genName}>
+              Context window
+              <Tooltip label={configHelp('inference', 'num_ctx')} multiline>
+                <button
+                  type="button"
+                  className={styles.infoBtn}
+                  aria-label="About Context window"
+                >
+                  ?
+                </button>
+              </Tooltip>
+              <Tooltip label="Learn how to tune Context Window ↗">
+                <button
+                  type="button"
+                  className={`${styles.infoBtn} ${styles.genCtxLearnBtn}`}
+                  aria-label="Learn how to tune Context Window"
+                  onClick={() =>
+                    void invoke('open_url', { url: CTX_TUNING_URL })
+                  }
+                >
+                  <svg
+                    viewBox="0 0 16 16"
+                    fill="none"
+                    stroke="currentColor"
+                    strokeWidth="1.5"
+                    strokeLinecap="round"
+                    strokeLinejoin="round"
+                    aria-hidden="true"
+                  >
+                    <path d="M9 3.5h3.5V7" />
+                    <path d="M12.5 3.5 7.5 8.5" />
+                    <path d="M11 9.5V12a.5.5 0 0 1-.5.5H4a.5.5 0 0 1-.5-.5V5.5A.5.5 0 0 1 4 5h2.5" />
+                  </svg>
+                </button>
+              </Tooltip>
             </div>
+            <span className={styles.genCtxValue}>
+              <input
+                type="number"
+                className={styles.genCtxInput}
+                value={ctxChip}
+                min={CTX_MIN}
+                max={CTX_MAX}
+                aria-label="Context window size in tokens"
+                onFocus={() => {
+                  ctxInputFocusedRef.current = true;
+                }}
+                onChange={(e) => setCtxChip(e.target.value)}
+                onBlur={commitCtxInput}
+                onKeyDown={(e) => {
+                  if (e.key === 'Enter') (e.target as HTMLInputElement).blur();
+                }}
+              />
+              <span className={styles.genCtxValueUnit}>tokens</span>
+            </span>
           </div>
-          <div className={styles.genCtxControl}>
+          <div>
             <input
               type="range"
               className={styles.ctxSlider}
@@ -478,26 +569,23 @@ export function ProvidersPane({
               }}
             />
             <div className={styles.ctxTickRow} aria-hidden="true">
-              {CTX_TICKS.map((label, i) => (
+              {CTX_TICKS.map(({ label, value }) => (
                 <span
                   key={label}
                   className={styles.ctxTick}
-                  style={{ left: `${(i / (CTX_TICKS.length - 1)) * 100}%` }}
+                  style={{ left: `${ctxToPos(value) / 10}%` }}
                 >
                   {label}
                 </span>
               ))}
             </div>
-            <div className={styles.genCtxValue}>
-              {Number(ctxChip).toLocaleString()} tokens ·{' '}
-              {ctxTurns.toLocaleString()} turns
-            </div>
           </div>
         </div>
 
-        {/* Keep model warm */}
-        <div className={styles.genRow}>
-          <div className={styles.genLabel}>
+        {/* Keep model warm: status rides the header line next to the name; the
+            release timer and Unload sit on their own row beneath it. */}
+        <div className={`${styles.genRow} ${styles.genRowWarm}`}>
+          <div className={styles.genWarmHead}>
             <div className={styles.genName}>
               Keep model warm
               <Tooltip label={KEEP_WARM_TOOLTIP} multiline>
@@ -510,15 +598,20 @@ export function ProvidersPane({
                 </button>
               </Tooltip>
             </div>
-            <div className={styles.genHelp}>
-              {activeKind === 'builtin'
-                ? `Engine: ${engineState}`
-                : loadedModel !== null
-                  ? `${loadedModel} in VRAM`
-                  : 'No model loaded'}
-            </div>
+            <span className={styles.genWarmStatus}>
+              <span
+                className={
+                  engineWarm
+                    ? `${styles.genStatusDot} ${styles.genStatusDotLive}`
+                    : styles.genStatusDot
+                }
+                aria-hidden="true"
+              />
+              {warmStatusText}
+            </span>
           </div>
-          <div className={styles.genWarmControl}>
+          <div className={styles.genWarmControls}>
+            <span className={styles.genWarmPrefix}>Release after</span>
             <input
               type="number"
               className={styles.keepWarmNumberInput}
@@ -550,7 +643,7 @@ export function ProvidersPane({
             <span className={styles.keepWarmUnit}>min</span>
             <button
               type="button"
-              className={styles.switchBtn}
+              className={`${styles.switchBtn} ${styles.genWarmUnload}`}
               aria-label="Unload now"
               disabled={activeKind === 'builtin' && engineState !== 'loaded'}
               onClick={handleEngineEject}
@@ -580,9 +673,6 @@ export function ProvidersPane({
                 </button>
               </Tooltip>
             </div>
-            <div className={styles.genHelp}>
-              Persona sent at the start of every chat
-            </div>
           </div>
           <button
             type="button"
@@ -610,14 +700,6 @@ export function ProvidersPane({
         ) : null}
       </div>
 
-      {/* A small installed-count footer mirrors the other panes. The active
-          model's identity already lives in the hero and the Running footer, so
-          this stays a neutral count rather than restating it. */}
-      <div className={styles.genFootnote}>
-        {installed.length} installed{' '}
-        {installed.length === 1 ? 'model' : 'models'}
-      </div>
-
       <div className={styles.devSection}>
         <button
           type="button"
diff --git a/src/styles/settings.module.css b/src/styles/settings.module.css
index a1f7f503..26acaac5 100644
--- a/src/styles/settings.module.css
+++ b/src/styles/settings.module.css
@@ -406,22 +406,6 @@
   font-size: 12px;
   color: var(--t3);
 }
-.heroActive {
-  display: flex;
-  align-items: center;
-  gap: 6px;
-  margin-left: auto;
-  font-size: 11px;
-  font-weight: 580;
-  color: var(--accent);
-}
-.heroLiveDot {
-  width: 6px;
-  height: 6px;
-  border-radius: 50%;
-  background: var(--accent);
-  box-shadow: 0 0 7px var(--accent);
-}
 .heroModel {
   display: flex;
   align-items: center;
@@ -530,46 +514,111 @@
   font-weight: 500;
   color: var(--t1);
 }
-.genHelp {
-  margin-top: 3px;
-  font-size: 11px;
-  color: var(--t3);
+/* Context window row: a compact header (label left, token readout right) over a
+   full-width slider track. */
+.genRowCtx {
+  flex-direction: column;
+  align-items: stretch;
+  gap: 10px;
 }
-.genCtxControl {
-  flex: none;
-  width: 280px;
+.genCtxHead {
   display: flex;
-  flex-direction: column;
-  gap: 8px;
+  align-items: center;
+  gap: 6px;
 }
-.genCtxControl .ctxSlider {
-  width: 100%;
+.genCtxValue {
+  margin-left: auto;
+  display: inline-flex;
+  align-items: center;
+  gap: 7px;
 }
-.genCtxControl .ctxTickRow {
-  position: relative;
-  height: 12px;
+/* Editable token field: type a count or let the slider drive it. */
+.genCtxInput {
+  width: 86px;
+  height: 30px;
+  text-align: right;
+  background: rgba(36, 30, 26, 0.6);
+  border: 1px solid rgba(255, 141, 92, 0.3);
+  border-radius: 8px;
+  padding: 0 10px;
+  color: var(--accent);
+  font-family: inherit;
+  font-size: 14px;
+  font-weight: 700;
+  font-variant-numeric: tabular-nums;
+  outline: none;
+  -moz-appearance: textfield;
 }
-.genCtxValue {
-  font-size: 11px;
+.genCtxInput::-webkit-inner-spin-button,
+.genCtxInput::-webkit-outer-spin-button {
+  -webkit-appearance: none;
+  margin: 0;
+}
+.genCtxInput:focus-visible {
+  border-color: rgba(255, 141, 92, 0.6);
+  box-shadow: 0 0 0 3px rgba(255, 141, 92, 0.12);
+}
+.genCtxValueUnit {
+  font-size: 12px;
   color: var(--t3);
-  text-align: right;
 }
-.genWarmControl {
+/* Icon-button deep link to the tuning guide, sat next to the info tooltip.
+   The compound selector beats .infoBtn's cursor: help so the link reads as
+   clickable. */
+.infoBtn.genCtxLearnBtn {
+  cursor: pointer;
+}
+.genCtxLearnBtn svg {
+  width: 10px;
+  height: 10px;
+}
+/* Keep model warm row: header line (name + live status) over a controls line
+   (release timer + Unload). */
+.genRowWarm {
+  flex-direction: column;
+  align-items: stretch;
+  gap: 11px;
+}
+.genWarmHead {
+  display: flex;
+  align-items: center;
+  gap: 6px;
+}
+.genWarmStatus {
+  margin-left: auto;
+  display: inline-flex;
+  align-items: center;
+  gap: 6px;
+  font-size: 11.5px;
+  color: var(--t2);
+}
+.genStatusDot {
+  width: 6px;
+  height: 6px;
+  border-radius: 50%;
   flex: none;
+  background: var(--t3);
+}
+.genStatusDotLive {
+  background: var(--ok);
+  box-shadow: 0 0 7px var(--ok);
+}
+.genWarmControls {
   display: flex;
   align-items: center;
   gap: 8px;
 }
+.genWarmPrefix {
+  font-size: 11.5px;
+  color: var(--t3);
+}
+.genWarmUnload {
+  margin-left: auto;
+}
 .genPromptEditor {
   padding: 0 16px 14px;
   box-shadow: 0 -1px 0 var(--hair-soft);
 }
-.genFootnote {
-  margin-top: 12px;
-  padding: 0 2px;
-  font-size: 11px;
-  color: var(--t3);
-}
 
 /* ─── Body (scrolling content) ──────────────────────────────────────────── */
 

From ae1e418093a58553c9fece19509a4305f84f6f58 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 23:49:45 -0500
Subject: [PATCH 39/89] polish: underline Discover toggle, drop the staff hint
 and fit dots

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 .../tabs/models/BrowseAllPane.module.css      | 10 +---
 .../tabs/models/DiscoverPane.module.css       | 51 ++++++++-----------
 .../tabs/models/DiscoverPane.test.tsx         |  4 +-
 src/settings/tabs/models/DiscoverPane.tsx     | 23 ++-------
 .../tabs/models/LibraryPane.module.css        | 10 +---
 .../tabs/models/StaffPicksPane.module.css     | 17 +------
 src/settings/tabs/models/StaffPicksPane.tsx   |  3 --
 7 files changed, 30 insertions(+), 88 deletions(-)

diff --git a/src/settings/tabs/models/BrowseAllPane.module.css b/src/settings/tabs/models/BrowseAllPane.module.css
index 82ee961b..6c07491b 100644
--- a/src/settings/tabs/models/BrowseAllPane.module.css
+++ b/src/settings/tabs/models/BrowseAllPane.module.css
@@ -188,22 +188,14 @@
   background: rgba(230, 181, 107, 0.1);
 }
 
-/* RAM-fit hint (estimated on the row, accurate per-quant in the accordion). */
+/* Per-quant RAM-fit hint: a coloured label (no dot). */
 .fit {
   flex: none;
   display: inline-flex;
   align-items: center;
-  gap: 5px;
   font-size: 11px;
   font-weight: 560;
 }
-.fit::before {
-  content: '';
-  width: 6px;
-  height: 6px;
-  border-radius: 50%;
-  background: currentColor;
-}
 .fitOk {
   color: var(--ok);
 }
diff --git a/src/settings/tabs/models/DiscoverPane.module.css b/src/settings/tabs/models/DiscoverPane.module.css
index 37342ac2..7a1edf8a 100644
--- a/src/settings/tabs/models/DiscoverPane.module.css
+++ b/src/settings/tabs/models/DiscoverPane.module.css
@@ -2,9 +2,9 @@
  * Styles for the Discover host: the pathway tab control over the Staff-picks
  * and Browse-all panes. Premium tokens cascade from the Settings window root.
  *
- * The tabs are a quiet segmented pill (the active pathway gets the accent fill),
- * deliberately smaller and lighter than the Models section nav above it so the
- * two tab rows do not compete.
+ * The tabs are a centered, icon-free underline control: text only, the active
+ * pathway carrying a short accent underline. Quieter and more editorial than a
+ * filled pill, and it does not compete with the Models section nav above it.
  */
 
 .host {
@@ -14,52 +14,45 @@
 }
 
 .tabs {
-  display: inline-flex;
-  align-self: flex-start;
-  gap: 3px;
-  padding: 3px;
-  margin-bottom: 14px;
-  border-radius: var(--radius-pill);
-  border: 1px solid var(--hair-soft);
-  background: var(--elev-1);
+  display: flex;
+  justify-content: center;
+  gap: 26px;
+  margin-bottom: 16px;
 }
 
 .tab {
+  position: relative;
   display: inline-flex;
   align-items: center;
-  gap: 7px;
-  padding: 6px 14px;
+  padding: 7px 2px 11px;
   border: none;
-  border-radius: var(--radius-pill);
   background: transparent;
   color: var(--t2);
   font-family: inherit;
-  font-size: 12px;
+  font-size: 13px;
   font-weight: 560;
   cursor: pointer;
-  transition:
-    color 140ms ease,
-    background 140ms ease;
-}
-.tab svg {
-  width: 13px;
-  height: 13px;
-  fill: none;
-  stroke: currentColor;
-  stroke-width: 1.8;
-  stroke-linecap: round;
-  stroke-linejoin: round;
+  transition: color 140ms ease;
 }
 .tab:hover:not(.tabActive) {
   color: var(--t1);
 }
 .tab:focus-visible {
   outline: none;
-  box-shadow: 0 0 0 2px var(--accent-soft);
+  color: var(--t1);
 }
 
 .tabActive {
-  color: #16110d;
+  color: var(--t1);
+}
+.tabActive::after {
+  content: '';
+  position: absolute;
+  left: 0;
+  right: 0;
+  bottom: 0;
+  height: 2px;
+  border-radius: 2px;
   background: var(--accent);
 }
 
diff --git a/src/settings/tabs/models/DiscoverPane.test.tsx b/src/settings/tabs/models/DiscoverPane.test.tsx
index b6781efc..07d9b9ca 100644
--- a/src/settings/tabs/models/DiscoverPane.test.tsx
+++ b/src/settings/tabs/models/DiscoverPane.test.tsx
@@ -57,9 +57,9 @@ function renderHost() {
   return render(<DiscoverPane onSaved={() => {}} />);
 }
 
-/** Staff picks is showing when its curated hint is on screen. */
+/** Staff picks is showing when its curated use-case sections are on screen. */
 function staffPicksVisible(): boolean {
-  return screen.queryByText(/Pick by what you want to do/) !== null;
+  return screen.queryByTestId('staff-section-label') !== null;
 }
 
 /** Browse all is showing when its Hugging Face search box is on screen. */
diff --git a/src/settings/tabs/models/DiscoverPane.tsx b/src/settings/tabs/models/DiscoverPane.tsx
index 2e4ab702..bf40d021 100644
--- a/src/settings/tabs/models/DiscoverPane.tsx
+++ b/src/settings/tabs/models/DiscoverPane.tsx
@@ -22,25 +22,9 @@ import type { RawAppConfig } from '../../types';
 
 type Pathway = 'staff' | 'browse';
 
-const STAR_ICON = (
-  <svg viewBox="0 0 24 24" aria-hidden="true">
-    <path d="M12 2l2.6 6.3L21 9l-5 4.3L17.6 20 12 16.5 6.4 20 8 13.3 3 9l6.4-.7z" />
-  </svg>
-);
-const SEARCH_ICON = (
-  <svg viewBox="0 0 24 24" aria-hidden="true">
-    <circle cx="11" cy="11" r="7" />
-    <path d="m20 20-3.5-3.5" />
-  </svg>
-);
-
-const TABS: ReadonlyArray<{
-  id: Pathway;
-  label: string;
-  icon: React.ReactNode;
-}> = [
-  { id: 'staff', label: 'Staff picks', icon: STAR_ICON },
-  { id: 'browse', label: 'Browse all', icon: SEARCH_ICON },
+const TABS: ReadonlyArray<{ id: Pathway; label: string }> = [
+  { id: 'staff', label: 'Staff picks' },
+  { id: 'browse', label: 'Browse all' },
 ];
 
 interface DiscoverPaneProps {
@@ -82,7 +66,6 @@ export function DiscoverPane({ onSaved }: DiscoverPaneProps) {
                 }
               }}
             >
-              {tab.icon}
               <span className={styles.tabLabel}>{tab.label}</span>
             </button>
           );
diff --git a/src/settings/tabs/models/LibraryPane.module.css b/src/settings/tabs/models/LibraryPane.module.css
index 5486de99..c2c7971b 100644
--- a/src/settings/tabs/models/LibraryPane.module.css
+++ b/src/settings/tabs/models/LibraryPane.module.css
@@ -133,21 +133,13 @@
   background: rgba(255, 255, 255, 0.05);
 }
 
-/* RAM-fit hint: a coloured dot + label reusing the onboarding fit palette. */
+/* RAM-fit hint: a coloured label (no dot) reusing the onboarding fit palette. */
 .fit {
   display: inline-flex;
   align-items: center;
-  gap: 5px;
   font-size: 11px;
   font-weight: 560;
 }
-.fit::before {
-  content: '';
-  width: 6px;
-  height: 6px;
-  border-radius: 50%;
-  background: currentColor;
-}
 .fitOk {
   color: var(--ok);
 }
diff --git a/src/settings/tabs/models/StaffPicksPane.module.css b/src/settings/tabs/models/StaffPicksPane.module.css
index 0490ff89..92173b60 100644
--- a/src/settings/tabs/models/StaffPicksPane.module.css
+++ b/src/settings/tabs/models/StaffPicksPane.module.css
@@ -10,13 +10,6 @@
   min-height: 0;
 }
 
-.hint {
-  font-size: 11.5px;
-  color: var(--t3);
-  margin: 0 2px 14px;
-  line-height: 1.5;
-}
-
 .empty {
   padding: 36px 14px;
   text-align: center;
@@ -110,22 +103,14 @@
   flex: none;
 }
 
-/* RAM-fit hint: a coloured dot + label reusing the shared fit palette. */
+/* RAM-fit hint: a coloured label (no dot) reusing the shared fit palette. */
 .fit {
   display: inline-flex;
   align-items: center;
-  gap: 5px;
   font-size: 11px;
   font-weight: 560;
   white-space: nowrap;
 }
-.fit::before {
-  content: '';
-  width: 6px;
-  height: 6px;
-  border-radius: 50%;
-  background: currentColor;
-}
 .fitOk {
   color: var(--ok);
 }
diff --git a/src/settings/tabs/models/StaffPicksPane.tsx b/src/settings/tabs/models/StaffPicksPane.tsx
index 27e351b3..f49fd806 100644
--- a/src/settings/tabs/models/StaffPicksPane.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.tsx
@@ -162,9 +162,6 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
 
   return (
     <div className={styles.pane}>
-      <p className={styles.hint}>
-        Pick by what you want to do. Thuki chose one build of each.
-      </p>
       {sections.map((section) => (
         <div className={styles.section} key={section.category}>
           <div className={styles.secLabel} data-testid="staff-section-label">

From ea70228ece04d9de390b1482a575c4ba12659cb6 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 00:08:08 -0500
Subject: [PATCH 40/89] polish: warm capability-pill colors, drop the installed
 badge on Discover

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 .../tabs/models/LibraryPane.module.css        | 16 ++++++++++----
 src/settings/tabs/models/LibraryPane.tsx      | 14 ++++++++++---
 .../tabs/models/StaffPicksPane.module.css     | 21 ++++++++++---------
 .../tabs/models/StaffPicksPane.test.tsx       |  7 +++++--
 src/settings/tabs/models/StaffPicksPane.tsx   | 18 ++++++++++------
 src/styles/settings.module.css                |  6 ++++++
 6 files changed, 57 insertions(+), 25 deletions(-)

diff --git a/src/settings/tabs/models/LibraryPane.module.css b/src/settings/tabs/models/LibraryPane.module.css
index c2c7971b..56195c8b 100644
--- a/src/settings/tabs/models/LibraryPane.module.css
+++ b/src/settings/tabs/models/LibraryPane.module.css
@@ -119,9 +119,9 @@
   flex: none;
 }
 
-/* Capability pills sit in the name line, right after the model name. One calm,
- * cohesive chrome for all of them: neutral text on a faint shared background,
- * no colour dot, so Text / Vision / Thinking read as a quiet family. */
+/* Capability pills sit in the name line, right after the model name. A neutral
+ * chip with the colour carried in the text only: each capability has its own
+ * warm hue (stone / rose / gold), a quiet family rather than a loud rainbow. */
 .pill {
   display: inline-flex;
   align-items: center;
@@ -129,9 +129,17 @@
   font-weight: 540;
   padding: 2px 8px;
   border-radius: var(--radius-pill);
-  color: var(--t2);
   background: rgba(255, 255, 255, 0.05);
 }
+.pillText {
+  color: var(--cap-text);
+}
+.pillVision {
+  color: var(--cap-vision);
+}
+.pillThinking {
+  color: var(--cap-think);
+}
 
 /* RAM-fit hint: a coloured label (no dot) reusing the onboarding fit palette. */
 .fit {
diff --git a/src/settings/tabs/models/LibraryPane.tsx b/src/settings/tabs/models/LibraryPane.tsx
index 9aa4dd98..ab2fd267 100644
--- a/src/settings/tabs/models/LibraryPane.tsx
+++ b/src/settings/tabs/models/LibraryPane.tsx
@@ -204,12 +204,20 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
                   <div className={styles.mid}>
                     <div className={styles.name}>
                       {m.display_name}
-                      <span className={styles.pill}>Text</span>
+                      <span className={`${styles.pill} ${styles.pillText}`}>
+                        Text
+                      </span>
                       {caps?.vision ? (
-                        <span className={styles.pill}>Vision</span>
+                        <span className={`${styles.pill} ${styles.pillVision}`}>
+                          Vision
+                        </span>
                       ) : null}
                       {caps?.thinking ? (
-                        <span className={styles.pill}>Thinking</span>
+                        <span
+                          className={`${styles.pill} ${styles.pillThinking}`}
+                        >
+                          Thinking
+                        </span>
                       ) : null}
                     </div>
                     <div className={styles.org}>
diff --git a/src/settings/tabs/models/StaffPicksPane.module.css b/src/settings/tabs/models/StaffPicksPane.module.css
index 92173b60..dfc89553 100644
--- a/src/settings/tabs/models/StaffPicksPane.module.css
+++ b/src/settings/tabs/models/StaffPicksPane.module.css
@@ -73,7 +73,8 @@
   color: var(--t1);
 }
 
-/* Calm capability pills: neutral text chips, no colour dot. */
+/* Calm capability pills: a neutral chip, the colour carried in the text only.
+ * Each capability has its own warm hue (stone / rose / gold). */
 .pills {
   display: inline-flex;
   gap: 5px;
@@ -85,9 +86,17 @@
   font-weight: 540;
   padding: 2px 7px;
   border-radius: var(--radius-pill);
-  color: var(--t2);
   background: rgba(255, 255, 255, 0.05);
 }
+.pillText {
+  color: var(--cap-text);
+}
+.pillVision {
+  color: var(--cap-vision);
+}
+.pillThinking {
+  color: var(--cap-think);
+}
 
 .sub {
   font-size: 10.5px;
@@ -154,14 +163,6 @@
   background: var(--accent-soft);
 }
 
-.installed {
-  display: inline-flex;
-  align-items: center;
-  font-size: 11px;
-  font-weight: 600;
-  color: var(--ok);
-}
-
 .resumeWrap {
   display: flex;
   align-items: center;
diff --git a/src/settings/tabs/models/StaffPicksPane.test.tsx b/src/settings/tabs/models/StaffPicksPane.test.tsx
index b6e6f6cb..24c20fb5 100644
--- a/src/settings/tabs/models/StaffPicksPane.test.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.test.tsx
@@ -331,15 +331,18 @@ describe('StaffPicksPane', () => {
     ).toBeInTheDocument();
   });
 
-  it('shows Installed instead of a download button', async () => {
+  it('shows no download button and no label for an installed model', async () => {
     await renderPane(() => {}, {
       get_starter_options: [{ ...GEMMA, installed: true }, QWEN, GPT_OSS],
     });
     const row = rowFor('Gemma 4 12B');
-    expect(within(row).getByText('Installed')).toBeInTheDocument();
+    // Already installed: no download affordance and no "Installed" badge; the
+    // row still shows the model and its fit.
     expect(
       within(row).queryByRole('button', { name: 'Download' }),
     ).not.toBeInTheDocument();
+    expect(within(row).queryByText('Installed')).not.toBeInTheDocument();
+    expect(within(row).getByText('Comfortable')).toBeInTheDocument();
   });
 
   it('offers Resume and Discard for an interrupted partial', async () => {
diff --git a/src/settings/tabs/models/StaffPicksPane.tsx b/src/settings/tabs/models/StaffPicksPane.tsx
index f49fd806..0165f984 100644
--- a/src/settings/tabs/models/StaffPicksPane.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.tsx
@@ -228,12 +228,16 @@ function ModelRow({
               {starter.display_name}
             </span>
             <span className={styles.pills}>
-              <span className={styles.pill}>Text</span>
+              <span className={`${styles.pill} ${styles.pillText}`}>Text</span>
               {starter.vision ? (
-                <span className={styles.pill}>Vision</span>
+                <span className={`${styles.pill} ${styles.pillVision}`}>
+                  Vision
+                </span>
               ) : null}
               {starter.thinking ? (
-                <span className={styles.pill}>Thinking</span>
+                <span className={`${styles.pill} ${styles.pillThinking}`}>
+                  Thinking
+                </span>
               ) : null}
             </span>
           </div>
@@ -295,8 +299,10 @@ const DOWNLOAD_ICON = (
   </svg>
 );
 
-/** The per-row affordance: an installed marker, a resume/discard pair when an
- * interrupted partial exists, or the icon download button. */
+/** The per-row affordance. An already-installed model shows nothing (no
+ * download button, no badge): it lives in Library, so on this Discover surface
+ * the absence of a download is the signal. A resume/discard pair shows when an
+ * interrupted partial exists; otherwise the icon download button. */
 function RowAction({
   option,
   installed,
@@ -308,7 +314,7 @@ function RowAction({
   const { starter } = option;
 
   if (installed) {
-    return <span className={styles.installed}>Installed</span>;
+    return null;
   }
 
   if (partialBytes !== null) {
diff --git a/src/styles/settings.module.css b/src/styles/settings.module.css
index 26acaac5..af6981ee 100644
--- a/src/styles/settings.module.css
+++ b/src/styles/settings.module.css
@@ -40,6 +40,12 @@
   --t3: rgba(236, 234, 231, 0.34);
   --accent: #ff8d5c;
   --accent-soft: rgba(255, 141, 92, 0.14);
+  /* Capability-pill text colours: a calm warm trio, each distinct and none
+   * reusing a status colour (or purple/blue/green). Text = stone, Vision =
+   * dusty rose, Thinking = wheat gold. */
+  --cap-text: #b8aa9a;
+  --cap-vision: #d59aad;
+  --cap-think: #d9c27e;
   --ok: #79c08e;
   --tight: #e6b56b;
   /* RAM-fit "Heavy" (model larger than this Mac comfortably holds) and the

From ff7f4abc94260be658485eb41b6b99f3af8e38b1 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 00:19:57 -0500
Subject: [PATCH 41/89] polish: remove the Running model sidebar card, hug
 RAM-fit tooltips to content

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/components/Tooltip.tsx                    |   7 +-
 src/components/__tests__/Tooltip.test.tsx     |  13 +-
 src/settings/SettingsWindow.test.tsx          |  48 ---
 src/settings/SettingsWindow.tsx               |   2 -
 .../components/RunningModelFooter.test.tsx    | 289 ------------------
 .../components/RunningModelFooter.tsx         | 113 -------
 src/styles/settings.module.css                |  50 ---
 7 files changed, 12 insertions(+), 510 deletions(-)
 delete mode 100644 src/settings/components/RunningModelFooter.test.tsx
 delete mode 100644 src/settings/components/RunningModelFooter.tsx

diff --git a/src/components/Tooltip.tsx b/src/components/Tooltip.tsx
index e537edda..e871f3ae 100644
--- a/src/components/Tooltip.tsx
+++ b/src/components/Tooltip.tsx
@@ -18,8 +18,9 @@ interface TooltipProps {
   children: React.ReactNode;
   /**
    * When true, the tooltip box preserves newlines in `label` and wraps long
-   * lines at a ~320px max width. Single-line icon tooltips should leave this
-   * off for the tight one-line presentation.
+   * lines, sizing to its content up to a 225px max width (so short hints hug
+   * their text instead of filling a fixed box). Single-line icon tooltips
+   * should leave this off for the tight one-line presentation.
    */
   multiline?: boolean;
   /**
@@ -173,7 +174,7 @@ export function Tooltip({
                     }`}
                   />
                   <div
-                    style={multiline ? { width: 225 } : undefined}
+                    style={multiline ? { maxWidth: 225 } : undefined}
                     className={`relative rounded-lg border border-surface-border bg-surface-base px-2.5 py-1.5 text-[11px] text-text-primary shadow-chat ${
                       multiline
                         ? 'whitespace-pre-line leading-snug'
diff --git a/src/components/__tests__/Tooltip.test.tsx b/src/components/__tests__/Tooltip.test.tsx
index 6aad437a..ea373b44 100644
--- a/src/components/__tests__/Tooltip.test.tsx
+++ b/src/components/__tests__/Tooltip.test.tsx
@@ -150,7 +150,7 @@ describe('Tooltip', () => {
     expect(wrapper?.classList.contains('inline-flex')).toBe(true);
   });
 
-  it('renders multiline tooltips at a fixed 225px width so the box stays directly below the trigger near edges', () => {
+  it('caps multiline tooltips at 225px max-width so short content hugs and long content wraps', () => {
     render(
       <Tooltip label={'Open by design: browse and pull any model.'} multiline>
         <button type="button">Trigger</button>
@@ -164,10 +164,13 @@ describe('Tooltip', () => {
       '[style*="position: fixed"]',
     ) as HTMLElement | null;
     expect(fixedBox).not.toBeNull();
-    // The inner content div (under the fixed-positioned outer + motion
-    // wrapper) carries the explicit 225px width style.
-    const inner = fixedBox?.querySelector('div[style*="width"]');
+    // The inner content div carries a max-width (not a fixed width) so the box
+    // shrinks to short content instead of always being 225px wide.
+    const inner = fixedBox?.querySelector(
+      'div[style*="max-width"]',
+    ) as HTMLElement | null;
     expect(inner).not.toBeNull();
-    expect((inner as HTMLElement).style.width).toBe('225px');
+    expect(inner!.style.maxWidth).toBe('225px');
+    expect(inner!.style.width).toBe('');
   });
 });
diff --git a/src/settings/SettingsWindow.test.tsx b/src/settings/SettingsWindow.test.tsx
index 2093d097..4a8ef9c9 100644
--- a/src/settings/SettingsWindow.test.tsx
+++ b/src/settings/SettingsWindow.test.tsx
@@ -608,52 +608,4 @@ describe('SettingsWindow left sidebar (Phase 3)', () => {
       'true',
     );
   });
-
-  it('shows the running-model footer with the active built-in model and size', async () => {
-    const builtinConfig: RawAppConfig = {
-      ...SAMPLE,
-      inference: {
-        ...SAMPLE.inference,
-        active_provider: 'builtin',
-        providers: SAMPLE.inference.providers.map((p) =>
-          p.kind === 'builtin'
-            ? { ...p, model: 'org/Qwen3.5-9B-GGUF:Qwen3.5-9B-Q4_K_M.gguf' }
-            : p,
-        ),
-      },
-    };
-    invokeMock.mockImplementation(async (cmd: string) => {
-      if (cmd === 'get_config') return builtinConfig;
-      if (cmd === 'list_installed_models') {
-        return [
-          {
-            id: 'org/Qwen3.5-9B-GGUF:Qwen3.5-9B-Q4_K_M.gguf',
-            display_name: 'Qwen3.5 9B',
-            size_bytes: 6_600_000_000,
-            quant: 'Q4_K_M',
-          },
-        ];
-      }
-      if (cmd === 'get_engine_status') {
-        return { state: 'loaded', model_path: '/x', port: 1, error: null };
-      }
-      return defaultInvoke(cmd);
-    });
-
-    render(<SettingsWindow />);
-    const footer = await screen.findByRole('status', {
-      name: /running model/i,
-    });
-    expect(footer).toHaveTextContent('Qwen3.5 9B');
-    expect(footer).toHaveTextContent(/Built-in/);
-    expect(footer).toHaveTextContent(/6\.6 GB/);
-  });
-
-  it('running-model footer shows a placeholder when no model is resolved', async () => {
-    render(<SettingsWindow />);
-    const footer = await screen.findByRole('status', {
-      name: /running model/i,
-    });
-    expect(footer).toHaveTextContent(/No model/i);
-  });
 });
diff --git a/src/settings/SettingsWindow.tsx b/src/settings/SettingsWindow.tsx
index 91f09469..52f37f6b 100644
--- a/src/settings/SettingsWindow.tsx
+++ b/src/settings/SettingsWindow.tsx
@@ -31,7 +31,6 @@ import { SearchTab } from './tabs/SearchTab';
 import { DisplayTab } from './tabs/DisplayTab';
 import { AboutTab } from './tabs/AboutTab';
 import { SavedPill } from './components';
-import { RunningModelFooter } from './components/RunningModelFooter';
 import { WindowControls } from '../components/WindowControls';
 import { UpdateBanner } from '../components/UpdateBanner';
 import { useUpdater } from '../hooks/useUpdater';
@@ -395,7 +394,6 @@ export function SettingsWindow() {
             })}
           </div>
           <div className={styles.sideSpacer} />
-          <RunningModelFooter config={config} />
         </div>
 
         <div className={styles.main}>
diff --git a/src/settings/components/RunningModelFooter.test.tsx b/src/settings/components/RunningModelFooter.test.tsx
deleted file mode 100644
index 34717fff..00000000
--- a/src/settings/components/RunningModelFooter.test.tsx
+++ /dev/null
@@ -1,289 +0,0 @@
-import { render, screen, waitFor, act } from '@testing-library/react';
-import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
-
-import { invoke } from '@tauri-apps/api/core';
-import {
-  emitTauriEvent,
-  clearEventHandlers,
-} from '../../testUtils/mocks/tauri';
-
-import { RunningModelFooter } from './RunningModelFooter';
-import type { RawAppConfig, RawProvider } from '../types';
-
-const invokeMock = invoke as unknown as ReturnType<typeof vi.fn>;
-
-const BUILTIN: RawProvider = {
-  id: 'builtin',
-  kind: 'builtin',
-  label: 'Built-in',
-  base_url: '',
-  model: '',
-  vision: false,
-};
-const OLLAMA: RawProvider = {
-  id: 'ollama',
-  kind: 'ollama',
-  label: 'Ollama',
-  base_url: 'http://127.0.0.1:11434',
-  model: '',
-  vision: false,
-};
-const OPENAI: RawProvider = {
-  id: 'openai',
-  kind: 'openai',
-  label: 'LM Studio',
-  base_url: 'http://127.0.0.1:1234',
-  model: '',
-  vision: false,
-};
-
-function makeConfig(
-  activeProvider: string,
-  providers: RawProvider[],
-): RawAppConfig {
-  return {
-    inference: {
-      active_provider: activeProvider,
-      keep_warm_inactivity_minutes: 0,
-      num_ctx: 16384,
-      providers,
-    },
-    prompt: { system: '' },
-    window: {
-      overlay_width: 600,
-      max_chat_height: 648,
-      max_images: 3,
-      text_base_px: 15,
-      text_line_height: 1.5,
-      text_letter_spacing_px: 0,
-      text_font_weight: 500,
-    },
-    quote: {
-      max_display_lines: 4,
-      max_display_chars: 300,
-      max_context_length: 4096,
-    },
-    behavior: { auto_replace: false, auto_close: false },
-    search: {
-      searxng_url: '',
-      reader_url: '',
-      max_iterations: 3,
-      top_k_urls: 10,
-      searxng_max_results: 10,
-      search_timeout_s: 20,
-      reader_per_url_timeout_s: 10,
-      reader_batch_timeout_s: 30,
-      judge_timeout_s: 30,
-      router_timeout_s: 45,
-    },
-    debug: { trace_enabled: false },
-  };
-}
-
-const QWEN_ROW = {
-  id: 'org/Qwen3.5-9B-GGUF:Qwen3.5-9B-Q4_K_M.gguf',
-  display_name: 'Qwen3.5 9B',
-  size_bytes: 6_600_000_000,
-  quant: 'Q4_K_M',
-};
-
-function mockInvoke(over: Record<string, unknown> = {}) {
-  invokeMock.mockImplementation(async (cmd: string) => {
-    if (Object.prototype.hasOwnProperty.call(over, cmd)) {
-      const v = over[cmd];
-      if (v instanceof Error) throw v;
-      return v;
-    }
-    switch (cmd) {
-      case 'list_installed_models':
-        return [];
-      case 'get_engine_status':
-        return { state: 'stopped', model_path: '', port: null, error: null };
-      default:
-        return undefined;
-    }
-  });
-}
-
-beforeEach(() => {
-  invokeMock.mockReset();
-  clearEventHandlers();
-  mockInvoke();
-});
-
-afterEach(() => {
-  clearEventHandlers();
-});
-
-describe('RunningModelFooter', () => {
-  it('shows the built-in model name, size, and a live dot when the engine is loaded', async () => {
-    const builtin = { ...BUILTIN, model: QWEN_ROW.id };
-    mockInvoke({
-      list_installed_models: [QWEN_ROW],
-      get_engine_status: {
-        state: 'loaded',
-        model_path: '/x',
-        port: 1,
-        error: null,
-      },
-    });
-
-    render(
-      <RunningModelFooter config={makeConfig('builtin', [builtin, OLLAMA])} />,
-    );
-
-    const footer = await screen.findByRole('status', {
-      name: /running model/i,
-    });
-    await waitFor(() => expect(footer).toHaveTextContent('Qwen3.5 9B'));
-    expect(footer).toHaveTextContent('Built-in · 6.6 GB');
-    expect(footer.querySelector('[class*="runningModelDot"]')).not.toBeNull();
-    // Live dot, not the idle variant.
-    expect(footer.querySelector('[class*="DotIdle"]')).toBeNull();
-  });
-
-  it('shows a placeholder when the active built-in model is not installed', async () => {
-    const builtin = { ...BUILTIN, model: 'org/missing:m.gguf' };
-    mockInvoke({ list_installed_models: [QWEN_ROW] });
-
-    render(
-      <RunningModelFooter config={makeConfig('builtin', [builtin, OLLAMA])} />,
-    );
-
-    const footer = await screen.findByRole('status', {
-      name: /running model/i,
-    });
-    await waitFor(() => expect(footer).toHaveTextContent(/No model/i));
-  });
-
-  it('shows the Ollama model name and label with an idle dot', async () => {
-    const ollama = { ...OLLAMA, model: 'llama3.1:8b' };
-    render(
-      <RunningModelFooter config={makeConfig('ollama', [BUILTIN, ollama])} />,
-    );
-
-    const footer = await screen.findByRole('status', {
-      name: /running model/i,
-    });
-    expect(footer).toHaveTextContent('llama3.1:8b');
-    expect(footer).toHaveTextContent('Ollama');
-    expect(footer.querySelector('[class*="DotIdle"]')).not.toBeNull();
-  });
-
-  it('shows a placeholder when the active Ollama provider has no model', async () => {
-    render(
-      <RunningModelFooter config={makeConfig('ollama', [BUILTIN, OLLAMA])} />,
-    );
-    const footer = await screen.findByRole('status', {
-      name: /running model/i,
-    });
-    expect(footer).toHaveTextContent(/No model/i);
-  });
-
-  it('shows the OpenAI provider model and label', async () => {
-    const openai = { ...OPENAI, model: 'qwen2.5-coder' };
-    render(
-      <RunningModelFooter config={makeConfig('openai', [BUILTIN, openai])} />,
-    );
-    const footer = await screen.findByRole('status', {
-      name: /running model/i,
-    });
-    expect(footer).toHaveTextContent('qwen2.5-coder');
-    expect(footer).toHaveTextContent('LM Studio');
-  });
-
-  it('falls back to a placeholder when the active provider id matches nothing', async () => {
-    render(
-      <RunningModelFooter config={makeConfig('ghost', [BUILTIN, OLLAMA])} />,
-    );
-    const footer = await screen.findByRole('status', {
-      name: /running model/i,
-    });
-    expect(footer).toHaveTextContent(/No model/i);
-  });
-
-  it('tolerates a config with no built-in provider', async () => {
-    const ollama = { ...OLLAMA, model: 'llama3.1:8b' };
-    render(<RunningModelFooter config={makeConfig('ollama', [ollama])} />);
-    const footer = await screen.findByRole('status', {
-      name: /running model/i,
-    });
-    expect(footer).toHaveTextContent('llama3.1:8b');
-  });
-
-  it('treats a non-array installed payload as empty', async () => {
-    const builtin = { ...BUILTIN, model: QWEN_ROW.id };
-    mockInvoke({ list_installed_models: null });
-    render(
-      <RunningModelFooter config={makeConfig('builtin', [builtin, OLLAMA])} />,
-    );
-    const footer = await screen.findByRole('status', {
-      name: /running model/i,
-    });
-    await waitFor(() => expect(footer).toHaveTextContent(/No model/i));
-  });
-
-  it('survives a failed installed-models read', async () => {
-    const builtin = { ...BUILTIN, model: QWEN_ROW.id };
-    mockInvoke({ list_installed_models: new Error('io') });
-    render(
-      <RunningModelFooter config={makeConfig('builtin', [builtin, OLLAMA])} />,
-    );
-    const footer = await screen.findByRole('status', {
-      name: /running model/i,
-    });
-    await waitFor(() => expect(footer).toHaveTextContent(/No model/i));
-  });
-
-  it('survives a failed engine-status read', async () => {
-    const builtin = { ...BUILTIN, model: QWEN_ROW.id };
-    mockInvoke({
-      list_installed_models: [QWEN_ROW],
-      get_engine_status: new Error('engine down'),
-    });
-    render(
-      <RunningModelFooter config={makeConfig('builtin', [builtin, OLLAMA])} />,
-    );
-    const footer = await screen.findByRole('status', {
-      name: /running model/i,
-    });
-    await waitFor(() => expect(footer).toHaveTextContent('Qwen3.5 9B'));
-    // Engine status unknown -> idle dot.
-    expect(footer.querySelector('[class*="DotIdle"]')).not.toBeNull();
-  });
-
-  it('reflects a live engine via the engine:status event stream', async () => {
-    const builtin = { ...BUILTIN, model: QWEN_ROW.id };
-    mockInvoke({ list_installed_models: [QWEN_ROW] });
-    render(
-      <RunningModelFooter config={makeConfig('builtin', [builtin, OLLAMA])} />,
-    );
-    const footer = await screen.findByRole('status', {
-      name: /running model/i,
-    });
-    await waitFor(() => expect(footer).toHaveTextContent('Qwen3.5 9B'));
-    expect(footer.querySelector('[class*="DotIdle"]')).not.toBeNull();
-
-    await act(async () => {
-      emitTauriEvent('engine:status', {
-        state: 'loaded',
-        model_path: '/x',
-        port: 1,
-        error: null,
-      });
-    });
-    expect(footer.querySelector('[class*="DotIdle"]')).toBeNull();
-  });
-
-  it('omits the meta line when the active provider has a model but no label', async () => {
-    const ollama = { ...OLLAMA, model: 'llama3.1:8b', label: '' };
-    render(
-      <RunningModelFooter config={makeConfig('ollama', [BUILTIN, ollama])} />,
-    );
-    const footer = await screen.findByRole('status', {
-      name: /running model/i,
-    });
-    expect(footer).toHaveTextContent('llama3.1:8b');
-    expect(footer.querySelector('[class*="runningModelMeta"]')).toBeNull();
-  });
-});
diff --git a/src/settings/components/RunningModelFooter.tsx b/src/settings/components/RunningModelFooter.tsx
deleted file mode 100644
index fb3a6a6a..00000000
--- a/src/settings/components/RunningModelFooter.tsx
+++ /dev/null
@@ -1,113 +0,0 @@
-/**
- * "Running model" footer pinned to the bottom of the Settings sidebar.
- *
- * Always visible, it names the model the active provider will answer with,
- * adds a size hint for the built-in engine, and shows a live dot that lights
- * when that model is currently resident in memory.
- *
- * Data sources, kept deliberately small:
- * - The active provider, its label, and (for Ollama/OpenAI) its model come
- *   straight from the config snapshot the parent already owns; the active
- *   model persists onto the provider's `model` field.
- * - The built-in engine's display name + on-disk size come from the manifest
- *   (`list_installed_models`), refreshed whenever the selected built-in model
- *   id changes.
- * - Liveness for the built-in engine follows `get_engine_status` plus the
- *   `engine:status` event stream. Ollama/OpenAI residency is not polled here,
- *   so their dot stays idle.
- */
-
-import { useEffect, useState } from 'react';
-import { invoke } from '@tauri-apps/api/core';
-import { listen } from '@tauri-apps/api/event';
-
-import styles from '../../styles/settings.module.css';
-import type { RawAppConfig } from '../types';
-import type { EngineStatus, InstalledModel } from '../../types/starter';
-
-/** Bytes rendered as decimal gigabytes with one decimal (e.g. "6.6"). */
-function gb(bytes: number): string {
-  return (bytes / 1e9).toFixed(1);
-}
-
-interface RunningModelFooterProps {
-  config: RawAppConfig;
-}
-
-export function RunningModelFooter({ config }: RunningModelFooterProps) {
-  const [installed, setInstalled] = useState<InstalledModel[]>([]);
-  const [engineState, setEngineState] =
-    useState<EngineStatus['state']>('stopped');
-
-  const providers = config.inference.providers;
-  const active = providers.find(
-    (p) => p.id === config.inference.active_provider,
-  );
-  const kind = active?.kind ?? 'ollama';
-  const builtinModelId =
-    providers.find((p) => p.kind === 'builtin')?.model ?? '';
-
-  // Manifest read seeds the built-in size/name; re-runs when the selected
-  // built-in model id changes (a download/delete/switch lifts a new config).
-  useEffect(() => {
-    void invoke<InstalledModel[]>('list_installed_models')
-      .then((rows) => setInstalled(Array.isArray(rows) ? rows : []))
-      .catch(() => setInstalled([]));
-  }, [builtinModelId]);
-
-  // Engine lifecycle drives the live dot for the built-in engine. Seed from
-  // the current snapshot (the backend only emits on transitions) then follow
-  // the event stream.
-  useEffect(() => {
-    invoke<EngineStatus>('get_engine_status')
-      .then((status) => setEngineState(status.state))
-      .catch(() => {
-        // Keep the stopped default; the event stream corrects it.
-      });
-    const unlisten = listen<EngineStatus>('engine:status', (e) => {
-      setEngineState(e.payload.state);
-    });
-    return () => {
-      void unlisten.then((fn) => fn());
-    };
-  }, []);
-
-  let name: string | null;
-  let meta: string | null;
-  if (kind === 'builtin') {
-    const row = installed.find((m) => m.id === builtinModelId);
-    name = row ? row.display_name : null;
-    meta = row ? `Built-in · ${gb(row.size_bytes)} GB` : null;
-  } else {
-    name = active && active.model !== '' ? active.model : null;
-    meta = active ? active.label : null;
-  }
-
-  const live = kind === 'builtin' && engineState === 'loaded';
-
-  return (
-    <div
-      className={styles.runningModel}
-      role="status"
-      aria-label="Running model"
-    >
-      <div className={styles.runningModelEyebrow}>Running</div>
-      {name ? (
-        <>
-          <div className={styles.runningModelName}>
-            <span
-              className={
-                live ? styles.runningModelDot : styles.runningModelDotIdle
-              }
-              aria-hidden
-            />
-            {name}
-          </div>
-          {meta ? <div className={styles.runningModelMeta}>{meta}</div> : null}
-        </>
-      ) : (
-        <div className={styles.runningModelMeta}>No model selected</div>
-      )}
-    </div>
-  );
-}
diff --git a/src/styles/settings.module.css b/src/styles/settings.module.css
index af6981ee..06c253eb 100644
--- a/src/styles/settings.module.css
+++ b/src/styles/settings.module.css
@@ -250,56 +250,6 @@
   flex: 1;
 }
 
-/* Running-model footer (always visible, pinned to the sidebar bottom). The
- * spacer above pushes it down when there is room; this margin guarantees a
- * gap from the last section item even when the spacer collapses. */
-.runningModel {
-  margin-top: 14px;
-  padding: 9px 10px;
-  border: 1px solid var(--hair-soft);
-  border-radius: var(--radius-card);
-  background: var(--elev-1);
-}
-.runningModelEyebrow {
-  font-size: 9.5px;
-  font-weight: 600;
-  letter-spacing: 0.07em;
-  text-transform: uppercase;
-  color: var(--t3);
-}
-.runningModelName {
-  display: flex;
-  align-items: center;
-  gap: 6px;
-  margin-top: 4px;
-  font-size: 12px;
-  font-weight: 580;
-  color: var(--t1);
-}
-.runningModelDot {
-  flex: none;
-  width: 6px;
-  height: 6px;
-  border-radius: 50%;
-  background: var(--accent);
-  box-shadow: 0 0 7px var(--accent);
-}
-.runningModelDotIdle {
-  flex: none;
-  width: 6px;
-  height: 6px;
-  border-radius: 50%;
-  /* The active/selected model, just not resident yet: accent (no glow), never
-   * grey, which reads as "disabled". The glow distinguishes the live state. */
-  background: var(--accent);
-  opacity: 0.85;
-}
-.runningModelMeta {
-  margin-top: 3px;
-  font-size: 10.5px;
-  color: var(--t3);
-}
-
 /* ─── Models surface (segmented Library / Discover / Providers) ──────────── */
 
 /* Icon-above-label tabs that mirror the original Settings section nav: a plain

From 97778ce9b40047acf80a751920f58357d3fd391a Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 00:31:31 -0500
Subject: [PATCH 42/89] polish: render RAM-fit tooltips on a single line

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/settings/tabs/models/BrowseAllPane.tsx  | 6 +-----
 src/settings/tabs/models/LibraryPane.tsx    | 6 +-----
 src/settings/tabs/models/StaffPicksPane.tsx | 2 +-
 3 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/src/settings/tabs/models/BrowseAllPane.tsx b/src/settings/tabs/models/BrowseAllPane.tsx
index 901d7720..7d2c4e94 100644
--- a/src/settings/tabs/models/BrowseAllPane.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.tsx
@@ -258,11 +258,7 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
                 <div className={styles.quantRow} key={f.file}>
                   <span className={styles.quantName}>{f.file}</span>
                   {f.fit ? (
-                    <Tooltip
-                      label={RAM_FIT_TOOLTIP[f.fit]}
-                      multiline
-                      placement="top"
-                    >
+                    <Tooltip label={RAM_FIT_TOOLTIP[f.fit]} placement="top">
                       <span className={`${styles.fit} ${FIT_CLASS[f.fit]}`}>
                         {RAM_FIT_LABEL[f.fit]}
                       </span>
diff --git a/src/settings/tabs/models/LibraryPane.tsx b/src/settings/tabs/models/LibraryPane.tsx
index ab2fd267..f2fe5fe7 100644
--- a/src/settings/tabs/models/LibraryPane.tsx
+++ b/src/settings/tabs/models/LibraryPane.tsx
@@ -228,11 +228,7 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
                   </div>
                   <div className={styles.right}>
                     {m.fit ? (
-                      <Tooltip
-                        label={RAM_FIT_TOOLTIP[m.fit]}
-                        multiline
-                        placement="top"
-                      >
+                      <Tooltip label={RAM_FIT_TOOLTIP[m.fit]} placement="top">
                         <span className={`${styles.fit} ${FIT_CLASS[m.fit]}`}>
                           {RAM_FIT_LABEL[m.fit]}
                         </span>
diff --git a/src/settings/tabs/models/StaffPicksPane.tsx b/src/settings/tabs/models/StaffPicksPane.tsx
index 0165f984..025a3497 100644
--- a/src/settings/tabs/models/StaffPicksPane.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.tsx
@@ -247,7 +247,7 @@ function ModelRow({
         </div>
         {!showProgress ? (
           <div className={styles.right}>
-            <Tooltip label={RAM_FIT_TOOLTIP[fit]} multiline placement="top">
+            <Tooltip label={RAM_FIT_TOOLTIP[fit]} placement="top">
               <span className={`${styles.fit} ${FIT_CLASS[fit]}`}>
                 {RAM_FIT_LABEL[fit]}
               </span>

From 6d8418a11104dd7a1d9b2f30187192eb38d74876 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 00:48:01 -0500
Subject: [PATCH 43/89] feat: decouple Staff Picks onto an id-keyed catalog
 over the starter registry

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/lib.rs             |   4 +
 src-tauri/src/models/mod.rs      | 170 +++++++++++++++++++++++++------
 src-tauri/src/models/registry.rs |  80 +++++++++++++--
 3 files changed, 216 insertions(+), 38 deletions(-)

diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs
index 4c5b95ad..4f9c7f8b 100644
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -2350,12 +2350,16 @@ pub fn run() {
             #[cfg(not(coverage))]
             models::get_starter_options,
             #[cfg(not(coverage))]
+            models::get_staff_picks,
+            #[cfg(not(coverage))]
             models::get_system_ram_bytes,
             #[cfg(not(coverage))]
             models::get_models_dir_free_bytes,
             #[cfg(not(coverage))]
             models::download_starter,
             #[cfg(not(coverage))]
+            models::download_staff_pick,
+            #[cfg(not(coverage))]
             models::download_repo_model,
             #[cfg(not(coverage))]
             models::list_hf_repo_ggufs,
diff --git a/src-tauri/src/models/mod.rs b/src-tauri/src/models/mod.rs
index ea953dfc..720ed0a1 100644
--- a/src-tauri/src/models/mod.rs
+++ b/src-tauri/src/models/mod.rs
@@ -1192,42 +1192,73 @@ pub struct StarterOption {
     pub partial_bytes: Option<u64>,
 }
 
-/// Builds the starter picker rows from the manifest, the blob store's partial
-/// slots, and the machine's RAM. A manifest read error degrades to "not
-/// installed" rather than failing the whole picker.
+/// Annotates one registry entry with the machine-specific facts the picker
+/// renders next to it: RAM fit, installed state, and resumable-partial size. A
+/// manifest read error degrades to "not installed" rather than failing the row.
+fn annotate_starter(
+    s: &registry::Starter,
+    conn: &rusqlite::Connection,
+    store: &storage::ModelStore,
+    ram_bytes: u64,
+) -> StarterOption {
+    StarterOption {
+        starter: s.clone(),
+        fit: registry::ram_fit(s.est_runtime_gb, ram_bytes),
+        installed: matches!(
+            manifest::get(conn, &registry::to_installed_model(s).id),
+            Ok(Some(_))
+        ),
+        partial_bytes: store.existing_partial_len(s.sha256),
+    }
+}
+
+/// The onboarding starter picker rows: exactly the three tier heroes, annotated
+/// for this machine. Onboarding's 3-up comparison is fixed at one model per
+/// tier, so it draws only the heroes even as the Staff Picks catalog grows.
 pub fn build_starter_options(
     conn: &rusqlite::Connection,
     store: &storage::ModelStore,
     ram_bytes: u64,
+) -> Vec<StarterOption> {
+    registry::onboarding_heroes()
+        .into_iter()
+        .map(|s| annotate_starter(s, conn, store, ram_bytes))
+        .collect()
+}
+
+/// The full Staff Picks catalog: every curated registry entry annotated for
+/// this machine. The frontend groups the rows by `starter.category`; unlike
+/// [`build_starter_options`] this is not capped at one model per tier.
+pub fn build_staff_picks(
+    conn: &rusqlite::Connection,
+    store: &storage::ModelStore,
+    ram_bytes: u64,
 ) -> Vec<StarterOption> {
     registry::STARTERS
         .iter()
-        .map(|s| StarterOption {
-            starter: s.clone(),
-            fit: registry::ram_fit(s.est_runtime_gb, ram_bytes),
-            installed: matches!(
-                manifest::get(conn, &registry::to_installed_model(s).id),
-                Ok(Some(_))
-            ),
-            partial_bytes: store.existing_partial_len(s.sha256),
-        })
+        .map(|s| annotate_starter(s, conn, store, ram_bytes))
         .collect()
 }
 
-/// Maps a frontend tier string (`"fast" | "balanced" | "smartest"`) onto its
-/// curated starter. Every [`registry::Tier`] has exactly one `STARTERS`
-/// entry (asserted by registry tests), so the lookup is total.
+/// Maps a Staff Picks `id` onto its curated registry entry. An unknown id
+/// yields an error rather than a panic, so a stale frontend id can never crash
+/// the download path.
+pub fn starter_for_id(id: &str) -> Result<&'static registry::Starter, String> {
+    registry::by_id(id).ok_or_else(|| format!("unknown staff pick id: {id}"))
+}
+
+/// Maps a frontend tier string (`"fast" | "balanced" | "smartest"`) onto the
+/// onboarding hero for that tier. The hero is resolved by id from
+/// [`registry::ONBOARDING_HERO_IDS`], so adding more models of the same tier to
+/// the Staff Picks catalog never changes which model onboarding downloads.
 pub fn starter_for_tier(tier: &str) -> Result<&'static registry::Starter, String> {
-    let tier = match tier {
-        "fast" => registry::Tier::Fast,
-        "balanced" => registry::Tier::Balanced,
-        "smartest" => registry::Tier::Smartest,
+    let idx = match tier {
+        "fast" => 0,
+        "balanced" => 1,
+        "smartest" => 2,
         other => return Err(format!("unknown starter tier: {other}")),
     };
-    Ok(registry::STARTERS
-        .iter()
-        .find(|s| s.tier == tier)
-        .expect("every tier has a starter"))
+    starter_for_id(registry::ONBOARDING_HERO_IDS[idx])
 }
 
 /// The builtin provider's currently configured model id (empty when none).
@@ -2133,6 +2164,19 @@ pub fn get_starter_options(
     Ok(build_starter_options(&conn, &store, system_ram_bytes()))
 }
 
+/// Returns the full Staff Picks catalog: every curated registry entry annotated
+/// with RAM fit, installed state, and resumable-partial size. The frontend
+/// groups the rows by `starter.category` into use-case sections.
+#[cfg_attr(coverage_nightly, coverage(off))]
+#[cfg_attr(not(coverage), tauri::command)]
+pub fn get_staff_picks(
+    db: tauri::State<'_, crate::history::Database>,
+    store: tauri::State<'_, storage::ModelStore>,
+) -> Result<Vec<StarterOption>, String> {
+    let conn = db.0.lock().map_err(|e| e.to_string())?;
+    Ok(build_staff_picks(&conn, &store, system_ram_bytes()))
+}
+
 /// Total physical RAM in bytes, for frontend sizing copy.
 #[cfg_attr(coverage_nightly, coverage(off))]
 #[cfg_attr(not(coverage), tauri::command)]
@@ -2171,6 +2215,30 @@ pub fn download_starter(
     Ok(())
 }
 
+/// Starts downloading a Staff Picks catalog entry by its stable `id`. Same
+/// verified path as [`download_starter`] (pinned revision + sha256, manifest
+/// record on success), but keyed by id so a category can hold any number of
+/// models. Progress streams over `on_event`.
+#[cfg_attr(coverage_nightly, coverage(off))]
+#[cfg_attr(not(coverage), tauri::command)]
+pub fn download_staff_pick(
+    id: String,
+    on_event: tauri::ipc::Channel<download::DownloadEvent>,
+    app: tauri::AppHandle,
+    download_state: tauri::State<'_, DownloadState>,
+) -> Result<(), String> {
+    let starter = starter_for_id(&id)?;
+    let token = claim_download(&download_state)?;
+    spawn_model_download(
+        app,
+        registry::download_specs(starter),
+        registry::to_installed_model(starter),
+        token,
+        on_event,
+    );
+    Ok(())
+}
+
 /// Starts downloading a pasted-repo model after resolving its digest, size,
 /// pinned revision, and optional mmproj companion from the Hugging Face API.
 #[cfg_attr(coverage_nightly, coverage(off))]
@@ -4196,21 +4264,26 @@ mod tests {
     }
 
     #[test]
-    fn build_starter_options_marks_installed_and_partial() {
+    fn build_starter_options_returns_annotated_onboarding_heroes() {
         let conn = crate::database::open_in_memory().unwrap();
         let (_dir, store) = make_store();
 
-        // First starter is installed (manifest row present); second has an
-        // in-flight partial; third is untouched.
-        let starters = registry::STARTERS;
-        manifest::insert(&conn, &registry::to_installed_model(&starters[0])).unwrap();
-        std::fs::write(store.partial_path(starters[1].sha256), [0u8; 10]).unwrap();
+        // Onboarding draws exactly the three tier heroes, in tier order. First
+        // hero is installed (manifest row present); second has an in-flight
+        // partial; third is untouched.
+        let heroes = registry::onboarding_heroes();
+        manifest::insert(&conn, &registry::to_installed_model(heroes[0])).unwrap();
+        std::fs::write(store.partial_path(heroes[1].sha256), [0u8; 10]).unwrap();
 
         const GIB: u64 = 1 << 30;
         let opts = build_starter_options(&conn, &store, 16 * GIB);
 
-        assert_eq!(opts.len(), starters.len());
-        assert_eq!(opts[0].starter, starters[0]);
+        assert_eq!(opts.len(), heroes.len());
+        assert_eq!(
+            opts.iter().map(|o| o.starter.id).collect::<Vec<_>>(),
+            registry::ONBOARDING_HERO_IDS.to_vec()
+        );
+        assert_eq!(&opts[0].starter, heroes[0]);
         assert!(opts[0].installed);
         assert_eq!(opts[0].partial_bytes, None);
         assert!(!opts[1].installed);
@@ -4218,7 +4291,7 @@ mod tests {
         assert!(!opts[2].installed);
         assert_eq!(opts[2].partial_bytes, None);
         // Fit hints come straight from registry::ram_fit at the given RAM.
-        for (opt, s) in opts.iter().zip(starters) {
+        for (opt, s) in opts.iter().zip(heroes) {
             assert_eq!(opt.fit, registry::ram_fit(s.est_runtime_gb, 16 * GIB));
         }
     }
@@ -4265,6 +4338,39 @@ mod tests {
         assert!(starter_for_tier("turbo").is_err());
     }
 
+    #[test]
+    fn starter_for_id_resolves_and_rejects() {
+        // The id-keyed Staff Picks download path resolves a real slug and
+        // rejects an unknown one with an error rather than a panic.
+        assert_eq!(starter_for_id("qwen3.5-9b").unwrap().id, "qwen3.5-9b");
+        assert_eq!(starter_for_id("gpt-oss-20b").unwrap().id, "gpt-oss-20b");
+        assert!(starter_for_id("not-a-real-id").is_err());
+    }
+
+    #[test]
+    fn build_staff_picks_covers_every_registry_entry() {
+        let conn = crate::database::open_in_memory().unwrap();
+        let (_dir, store) = make_store();
+        // Install the first catalog entry; only it must read back as installed.
+        manifest::insert(&conn, &registry::to_installed_model(&registry::STARTERS[0])).unwrap();
+
+        const GIB: u64 = 1 << 30;
+        let opts = build_staff_picks(&conn, &store, 16 * GIB);
+
+        // Every registry entry is present, in registry order.
+        assert_eq!(opts.len(), registry::STARTERS.len());
+        assert_eq!(
+            opts.iter().map(|o| o.starter.id).collect::<Vec<_>>(),
+            registry::STARTERS.iter().map(|s| s.id).collect::<Vec<_>>()
+        );
+        assert!(opts[0].installed);
+        assert!(opts[1..].iter().all(|o| !o.installed));
+        // Fit comes straight from registry::ram_fit at the given RAM.
+        for (opt, s) in opts.iter().zip(registry::STARTERS) {
+            assert_eq!(opt.fit, registry::ram_fit(s.est_runtime_gb, 16 * GIB));
+        }
+    }
+
     // ── Model library: download claim ────────────────────────────────────────
 
     #[test]
diff --git a/src-tauri/src/models/registry.rs b/src-tauri/src/models/registry.rs
index dd29453c..f6a002e1 100644
--- a/src-tauri/src/models/registry.rs
+++ b/src-tauri/src/models/registry.rs
@@ -29,7 +29,14 @@ pub enum Tier {
 /// need, baked in at compile time.
 #[derive(Debug, Clone, serde::Serialize, PartialEq)]
 pub struct Starter {
-    /// Which speed/quality tier this entry fills.
+    /// Stable slug, unique across the registry (e.g. `"gemma-4-12b"`). The
+    /// download key and the React row key for the Staff Picks catalog, where a
+    /// single category can hold many models. Onboarding keys on `tier` instead
+    /// and shows only the three [`ONBOARDING_HERO_IDS`] heroes.
+    pub id: &'static str,
+    /// Coarse speed/quality dial for the model. Onboarding's 3-up comparison
+    /// shows one hero per tier; in the Staff Picks catalog several entries can
+    /// share a tier, so it is a size/speed hint there, not a unique key.
     pub tier: Tier,
     /// Model family this entry belongs to (e.g. "Gemma", "Qwen", "gpt-oss").
     /// Several starters can share a family when the catalog offers more than one
@@ -86,6 +93,7 @@ pub struct Starter {
 /// The curated starters, ordered Fast, Balanced, Smartest.
 pub const STARTERS: &[Starter] = &[
     Starter {
+        id: "qwen3.5-9b",
         tier: Tier::Fast,
         family: "Qwen",
         category: "Everyday chat",
@@ -108,6 +116,7 @@ pub const STARTERS: &[Starter] = &[
         origin_repo: "Qwen/Qwen3.5-9B",
     },
     Starter {
+        id: "gemma-4-12b",
         tier: Tier::Balanced,
         family: "Gemma",
         category: "Everyday chat",
@@ -130,6 +139,7 @@ pub const STARTERS: &[Starter] = &[
         origin_repo: "google/gemma-4-12B-it",
     },
     Starter {
+        id: "gpt-oss-20b",
         tier: Tier::Smartest,
         family: "gpt-oss",
         category: "Deep reasoning",
@@ -153,6 +163,30 @@ pub const STARTERS: &[Starter] = &[
     },
 ];
 
+/// Ids of the three onboarding hero starters, in tier order
+/// (Fast, Balanced, Smartest). Onboarding's 3-up comparison selects exactly
+/// these by id; the Staff Picks catalog may hold any number of other entries
+/// without disturbing the onboarding heroes.
+pub const ONBOARDING_HERO_IDS: [&str; 3] = ["qwen3.5-9b", "gemma-4-12b", "gpt-oss-20b"];
+
+/// The registry entry with this id, if any. The id-keyed download path and the
+/// onboarding-hero lookup both resolve entries through here, so a bad id yields
+/// `None` rather than a panic.
+pub fn by_id(id: &str) -> Option<&'static Starter> {
+    STARTERS.iter().find(|s| s.id == id)
+}
+
+/// The three onboarding hero starters, resolved from [`ONBOARDING_HERO_IDS`] in
+/// tier order. Any id that is absent from the registry is skipped, so the
+/// result is the heroes that actually exist; a registry test asserts all three
+/// resolve, so in practice the list is always length three.
+pub fn onboarding_heroes() -> Vec<&'static Starter> {
+    ONBOARDING_HERO_IDS
+        .iter()
+        .filter_map(|id| by_id(id))
+        .collect()
+}
+
 /// RAM-fit hint rendered as a badge on each starter row.
 #[derive(Debug, Clone, Copy, PartialEq, serde::Serialize)]
 #[serde(rename_all = "snake_case")]
@@ -228,17 +262,51 @@ mod tests {
         s.len() == len && s.bytes().all(|b| matches!(b, b'0'..=b'9' | b'a'..=b'f'))
     }
 
+    /// Resolves the onboarding hero for a tier by id, not by find-first-of-tier:
+    /// the catalog can hold several entries of the same tier, so only the hero
+    /// ids identify the three onboarding models unambiguously.
     fn starter(tier: Tier) -> &'static Starter {
-        STARTERS.iter().find(|s| s.tier == tier).unwrap()
+        let idx = match tier {
+            Tier::Fast => 0,
+            Tier::Balanced => 1,
+            Tier::Smartest => 2,
+        };
+        by_id(ONBOARDING_HERO_IDS[idx]).unwrap()
+    }
+
+    #[test]
+    fn ids_are_present_and_unique() {
+        // The Staff Picks catalog and the id-keyed download path key on `id`,
+        // so every entry needs a non-empty slug and no two may collide.
+        let mut seen = std::collections::HashSet::new();
+        for s in STARTERS {
+            assert!(!s.id.is_empty(), "{}: id is empty", s.repo);
+            assert!(seen.insert(s.id), "duplicate id: {}", s.id);
+        }
     }
 
     #[test]
-    fn three_tiers_present() {
-        assert_eq!(STARTERS.len(), 3);
+    fn by_id_resolves_present_and_misses_unknown() {
+        // by_id finds a present entry and returns None for an unknown slug,
+        // so the lookup never panics on a bad id.
+        assert_eq!(by_id(STARTERS[0].id).unwrap().id, STARTERS[0].id);
+        assert!(by_id("no-such-model").is_none());
+    }
+
+    #[test]
+    fn onboarding_heroes_are_three_in_tier_order() {
+        // The onboarding picker shows exactly three heroes, one per tier, in
+        // Fast/Balanced/Smartest order; each id resolves to a real entry.
+        assert_eq!(ONBOARDING_HERO_IDS.len(), 3);
+        let heroes = onboarding_heroes();
+        assert_eq!(heroes.len(), 3);
         assert_eq!(
-            STARTERS.iter().map(|s| s.tier).collect::<Vec<_>>(),
+            heroes.iter().map(|s| s.tier).collect::<Vec<_>>(),
             vec![Tier::Fast, Tier::Balanced, Tier::Smartest]
         );
+        for id in ONBOARDING_HERO_IDS {
+            assert!(by_id(id).is_some(), "hero id missing from registry: {id}");
+        }
     }
 
     #[test]
@@ -394,7 +462,7 @@ mod tests {
             (32, [RamFit::Fits, RamFit::Fits, RamFit::Fits]),
         ];
         for (ram_gib, expected) in table {
-            for (s, want) in STARTERS.iter().zip(expected) {
+            for (s, want) in onboarding_heroes().iter().zip(expected) {
                 let got = ram_fit(s.est_runtime_gb, ram_gib * GIB);
                 assert_eq!(
                     got, *want,

From 0877bfa884fb829bbd7da06ec422917c281a6c23 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 02:14:11 -0500
Subject: [PATCH 44/89] feat: repoint Staff Picks UI onto the id-keyed catalog
 via useStaffPicks and startById

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/__tests__/App.test.tsx                    |  1 +
 src/components/StarterPicker.tsx              | 39 ++++++++-
 .../__tests__/StarterPicker.test.tsx          | 46 +++++++++-
 src/hooks/__tests__/useDownloadModel.test.tsx | 30 +++++++
 src/hooks/useDownloadModel.ts                 | 17 ++++
 .../tabs/models/DiscoverPane.test.tsx         |  3 +-
 .../tabs/models/StaffPicksPane.test.tsx       | 34 ++++----
 src/settings/tabs/models/StaffPicksPane.tsx   | 86 +++++++++----------
 src/types/starter.ts                          | 11 +++
 9 files changed, 203 insertions(+), 64 deletions(-)

diff --git a/src/__tests__/App.test.tsx b/src/__tests__/App.test.tsx
index b6a0db22..58dc56e5 100644
--- a/src/__tests__/App.test.tsx
+++ b/src/__tests__/App.test.tsx
@@ -63,6 +63,7 @@ function makeDownloadCtx(
     cancelConfirm: vi.fn(),
     start: vi.fn(async () => {}),
     startRepo: vi.fn(async () => {}),
+    startById: vi.fn(async () => {}),
     cancel: vi.fn(async () => {}),
     retry: vi.fn(async () => {}),
     resume: vi.fn(async () => {}),
diff --git a/src/components/StarterPicker.tsx b/src/components/StarterPicker.tsx
index cd4d5a32..891c6bea 100644
--- a/src/components/StarterPicker.tsx
+++ b/src/components/StarterPicker.tsx
@@ -9,7 +9,12 @@
 
 import { useCallback, useEffect, useState } from 'react';
 import { invoke } from '@tauri-apps/api/core';
-import type { RamFit, StarterOption, StarterTier } from '../types/starter';
+import type {
+  RamFit,
+  StaffPickOption,
+  StarterOption,
+  StarterTier,
+} from '../types/starter';
 
 const HF_BASE_URL = 'https://huggingface.co';
 
@@ -77,6 +82,38 @@ export function useStarterOptions(): UseStarterOptionsResult {
   return { options, refresh };
 }
 
+export interface UseStaffPicksResult {
+  /** The Staff Picks catalog rows; `null` while the first fetch is in flight. */
+  options: StaffPickOption[] | null;
+  /** Re-fetch (e.g. after a cancel kept a resumable partial). */
+  refresh: () => Promise<void>;
+}
+
+/**
+ * Loads the full Staff Picks catalog from the backend. A fetch failure (or a
+ * malformed non-array payload) degrades to an empty list so the pane renders
+ * nothing rather than crashing. Sibling of {@link useStarterOptions}: the
+ * catalog is id-keyed and category-grouped, not capped at one model per tier.
+ */
+export function useStaffPicks(): UseStaffPicksResult {
+  const [options, setOptions] = useState<StaffPickOption[] | null>(null);
+
+  const refresh = useCallback(async () => {
+    try {
+      const rows = await invoke<StaffPickOption[]>('get_staff_picks');
+      setOptions(Array.isArray(rows) ? rows : []);
+    } catch {
+      setOptions([]);
+    }
+  }, []);
+
+  useEffect(() => {
+    void refresh();
+  }, [refresh]);
+
+  return { options, refresh };
+}
+
 export interface StarterPickerProps {
   options: StarterOption[];
   /** The highlighted tier. Consumers default this to 'balanced'. */
diff --git a/src/components/__tests__/StarterPicker.test.tsx b/src/components/__tests__/StarterPicker.test.tsx
index 83a73350..86c59f6d 100644
--- a/src/components/__tests__/StarterPicker.test.tsx
+++ b/src/components/__tests__/StarterPicker.test.tsx
@@ -6,7 +6,11 @@ import {
   act,
 } from '@testing-library/react';
 import { describe, it, expect, beforeEach, vi } from 'vitest';
-import { StarterPicker, useStarterOptions } from '../StarterPicker';
+import {
+  StarterPicker,
+  useStaffPicks,
+  useStarterOptions,
+} from '../StarterPicker';
 import { invoke } from '../../testUtils/mocks/tauri';
 import type { Starter, StarterOption, StarterTier } from '../../types/starter';
 
@@ -291,3 +295,43 @@ describe('useStarterOptions', () => {
     expect(result.current.options).toEqual(THREE_TIERS);
   });
 });
+
+describe('useStaffPicks', () => {
+  beforeEach(() => {
+    invoke.mockReset();
+  });
+
+  it('starts null and loads the catalog from get_staff_picks on mount', async () => {
+    invoke.mockResolvedValueOnce(THREE_TIERS);
+    const { result } = renderHook(() => useStaffPicks());
+    expect(result.current.options).toBeNull();
+    await act(async () => {});
+    expect(result.current.options).toEqual(THREE_TIERS);
+    expect(invoke).toHaveBeenCalledWith('get_staff_picks');
+  });
+
+  it('degrades to an empty list when the fetch rejects', async () => {
+    invoke.mockRejectedValueOnce('backend down');
+    const { result } = renderHook(() => useStaffPicks());
+    await act(async () => {});
+    expect(result.current.options).toEqual([]);
+  });
+
+  it('coerces a malformed non-array payload to an empty list', async () => {
+    invoke.mockResolvedValueOnce({ not: 'an array' });
+    const { result } = renderHook(() => useStaffPicks());
+    await act(async () => {});
+    expect(result.current.options).toEqual([]);
+  });
+
+  it('re-fetches on refresh', async () => {
+    invoke.mockResolvedValueOnce([]);
+    const { result } = renderHook(() => useStaffPicks());
+    await act(async () => {});
+    expect(result.current.options).toEqual([]);
+
+    invoke.mockResolvedValueOnce(THREE_TIERS);
+    await act(() => result.current.refresh());
+    expect(result.current.options).toEqual(THREE_TIERS);
+  });
+});
diff --git a/src/hooks/__tests__/useDownloadModel.test.tsx b/src/hooks/__tests__/useDownloadModel.test.tsx
index 7ba24e82..990d9827 100644
--- a/src/hooks/__tests__/useDownloadModel.test.tsx
+++ b/src/hooks/__tests__/useDownloadModel.test.tsx
@@ -379,6 +379,36 @@ describe('useDownloadModel', () => {
     });
   });
 
+  it('starts a Staff Picks download through download_staff_pick', async () => {
+    const { result } = renderHook(() => useDownloadModel());
+    await act(() => result.current.startById('gemma-4-12b'));
+    expect(result.current.state).toEqual({ phase: 'downloading' });
+    expect(invoke).toHaveBeenCalledWith('download_staff_pick', {
+      id: 'gemma-4-12b',
+      onEvent: expect.anything(),
+    });
+    act(() => channel().simulateMessage({ type: 'AllDone' }));
+    expect(result.current.state).toEqual({ phase: 'ready' });
+  });
+
+  it('retries the last Staff Picks download after a failure', async () => {
+    const { result } = renderHook(() => useDownloadModel());
+    await act(() => result.current.startById('gpt-oss-20b'));
+    act(() =>
+      channel().simulateMessage({
+        type: 'Failed',
+        data: { kind: 'http', message: 'HTTP 500' },
+      }),
+    );
+
+    await act(() => result.current.retry());
+    expect(result.current.state).toEqual({ phase: 'downloading' });
+    expect(invoke).toHaveBeenLastCalledWith('download_staff_pick', {
+      id: 'gpt-oss-20b',
+      onEvent: expect.anything(),
+    });
+  });
+
   it('reset returns failed to idle and clears the stale progress', async () => {
     const { result } = renderHook(() => useDownloadModel());
     await act(() => result.current.start('smartest'));
diff --git a/src/hooks/useDownloadModel.ts b/src/hooks/useDownloadModel.ts
index 8dc51454..89f45f15 100644
--- a/src/hooks/useDownloadModel.ts
+++ b/src/hooks/useDownloadModel.ts
@@ -153,6 +153,13 @@ export interface UseDownloadModel {
    * with a channel. Same event stream and terminal states as `start`.
    */
   startRepo: (repo: string, file: string) => Promise<void>;
+  /**
+   * idle -> downloading for a Staff Picks catalog entry, keyed by its stable
+   * `id`; invokes `download_staff_pick` with a channel. Same event stream and
+   * terminal states as `start`; `retry` replays it, and a resume is just
+   * calling it again (the backend resumes the partial via Range).
+   */
+  startById: (id: string) => Promise<void>;
   /**
    * Invokes `cancel_model_download`. The state flips back to idle when the
    * backend's Cancelled event lands; the partial is KEPT, so the caller
@@ -379,6 +386,15 @@ export function useDownloadModel(
     [run],
   );
 
+  const startById = useCallback(
+    async (id: string) => {
+      const replay = () => run('download_staff_pick', { id });
+      lastStartRef.current = replay;
+      await replay();
+    },
+    [run],
+  );
+
   const cancel = useCallback(async () => {
     await invoke('cancel_model_download');
   }, []);
@@ -429,6 +445,7 @@ export function useDownloadModel(
     cancelConfirm,
     start,
     startRepo,
+    startById,
     cancel,
     retry,
     resume: start,
diff --git a/src/settings/tabs/models/DiscoverPane.test.tsx b/src/settings/tabs/models/DiscoverPane.test.tsx
index 07d9b9ca..2bfb3333 100644
--- a/src/settings/tabs/models/DiscoverPane.test.tsx
+++ b/src/settings/tabs/models/DiscoverPane.test.tsx
@@ -18,6 +18,7 @@ const invokeMock = invoke as unknown as ReturnType<typeof vi.fn>;
 
 const STARTER: StarterOption = {
   starter: {
+    id: 'gemma-4-12b',
     tier: 'balanced',
     family: 'Gemma',
     display_name: 'Gemma 4 12B',
@@ -47,7 +48,7 @@ beforeEach(() => {
   invokeMock.mockReset();
   clearHfSearchCache();
   invokeMock.mockImplementation(async (cmd: string) => {
-    if (cmd === 'get_starter_options') return [STARTER];
+    if (cmd === 'get_staff_picks') return [STARTER];
     if (cmd === 'search_hf_models') return [];
     return undefined;
   });
diff --git a/src/settings/tabs/models/StaffPicksPane.test.tsx b/src/settings/tabs/models/StaffPicksPane.test.tsx
index 24c20fb5..c3fffd4d 100644
--- a/src/settings/tabs/models/StaffPicksPane.test.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.test.tsx
@@ -6,7 +6,7 @@
  * category alphabetically; within a section models are alphabetical. Each
  * compact row shows the model name, capability pills (Text always, plus Vision
  * / Thinking), a `size · maker` sub-line, a RAM-fit hint, and a single icon
- * download that runs the VERIFIED starter path (`download_starter`, pinned
+ * download that runs the VERIFIED starter path (`download_staff_pick`, pinned
  * revision + sha256). The download channel is captured the same way
  * BrowseAllPane.test.tsx does it.
  */
@@ -58,6 +58,7 @@ function mockCommands(responses: Record<string, unknown>) {
 
 function starter(over: Partial<Starter>): Starter {
   return {
+    id: 'gemma-4-12b',
     tier: 'balanced',
     family: 'Gemma',
     category: 'Everyday chat',
@@ -97,6 +98,7 @@ function option(
 
 /** Two everyday models + one reasoning model (deliberately NOT alpha order). */
 const QWEN = option({
+  id: 'qwen3.5-9b',
   tier: 'fast',
   family: 'Qwen',
   category: 'Everyday chat',
@@ -110,6 +112,7 @@ const QWEN = option({
 });
 const GEMMA = option({});
 const GPT_OSS = option({
+  id: 'gpt-oss-20b',
   tier: 'smartest',
   family: 'gpt-oss',
   category: 'Deep reasoning',
@@ -132,7 +135,7 @@ const CONFIG_AFTER_INSTALL = { marker: 'fresh' } as unknown as RawAppConfig;
 
 function picksResponses(overrides: Record<string, unknown> = {}) {
   return {
-    get_starter_options: STARTERS,
+    get_staff_picks: STARTERS,
     get_config: CONFIG_AFTER_INSTALL,
     ...overrides,
   };
@@ -157,7 +160,7 @@ async function renderPane(
   mockCommands(picksResponses(overrides));
   const view = render(<StaffPicksPane onSaved={onSaved} />);
   await waitFor(() =>
-    expect(invokeMock).toHaveBeenCalledWith('get_starter_options'),
+    expect(invokeMock).toHaveBeenCalledWith('get_staff_picks'),
   );
   await flush();
   return view;
@@ -217,9 +220,10 @@ describe('StaffPicksPane', () => {
 
   it('appends an unrecognized category after the known sections', async () => {
     await renderPane(() => {}, {
-      get_starter_options: [
+      get_staff_picks: [
         GEMMA,
         option({
+          id: 'qwen3-coder-7b',
           tier: 'fast',
           category: 'Coding',
           display_name: 'Qwen3 Coder 7B',
@@ -234,7 +238,7 @@ describe('StaffPicksPane', () => {
 
   it('buckets a model with no category under Other', async () => {
     await renderPane(() => {}, {
-      get_starter_options: [
+      get_staff_picks: [
         option({ category: undefined, display_name: 'Mystery 7B' }),
       ],
     });
@@ -248,8 +252,8 @@ describe('StaffPicksPane', () => {
     fireEvent.click(within(row).getByRole('button', { name: 'Download' }));
     await flush();
     expect(invokeMock).toHaveBeenCalledWith(
-      'download_starter',
-      expect.objectContaining({ tier: 'balanced' }),
+      'download_staff_pick',
+      expect.objectContaining({ id: 'gemma-4-12b' }),
     );
   });
 
@@ -307,7 +311,7 @@ describe('StaffPicksPane', () => {
     fireEvent.click(screen.getByRole('button', { name: 'Retry' }));
     await flush();
     const starts = invokeMock.mock.calls.filter(
-      (c: unknown[]) => c[0] === 'download_starter',
+      (c: unknown[]) => c[0] === 'download_staff_pick',
     );
     expect(starts).toHaveLength(2);
   });
@@ -333,7 +337,7 @@ describe('StaffPicksPane', () => {
 
   it('shows no download button and no label for an installed model', async () => {
     await renderPane(() => {}, {
-      get_starter_options: [{ ...GEMMA, installed: true }, QWEN, GPT_OSS],
+      get_staff_picks: [{ ...GEMMA, installed: true }, QWEN, GPT_OSS],
     });
     const row = rowFor('Gemma 4 12B');
     // Already installed: no download affordance and no "Installed" badge; the
@@ -347,7 +351,7 @@ describe('StaffPicksPane', () => {
 
   it('offers Resume and Discard for an interrupted partial', async () => {
     await renderPane(() => {}, {
-      get_starter_options: [
+      get_staff_picks: [
         { ...GEMMA, partial_bytes: 2_000_000_000 },
         QWEN,
         GPT_OSS,
@@ -357,14 +361,14 @@ describe('StaffPicksPane', () => {
     fireEvent.click(within(row).getByRole('button', { name: /Resume/ }));
     await flush();
     expect(invokeMock).toHaveBeenCalledWith(
-      'download_starter',
-      expect.objectContaining({ tier: 'balanced' }),
+      'download_staff_pick',
+      expect.objectContaining({ id: 'gemma-4-12b' }),
     );
   });
 
   it('discards an interrupted partial and refreshes', async () => {
     await renderPane(() => {}, {
-      get_starter_options: [
+      get_staff_picks: [
         { ...GEMMA, partial_bytes: 2_000_000_000 },
         QWEN,
         GPT_OSS,
@@ -379,13 +383,13 @@ describe('StaffPicksPane', () => {
   });
 
   it('shows an empty state when no starters are available', async () => {
-    await renderPane(() => {}, { get_starter_options: [] });
+    await renderPane(() => {}, { get_staff_picks: [] });
     expect(screen.getByText(/No curated models/)).toBeInTheDocument();
   });
 
   it('degrades to the empty state when the probe rejects', async () => {
     await renderPane(() => {}, {
-      get_starter_options: new Reject(new Error('probe failed')),
+      get_staff_picks: new Reject(new Error('probe failed')),
     });
     expect(screen.getByText(/No curated models/)).toBeInTheDocument();
   });
diff --git a/src/settings/tabs/models/StaffPicksPane.tsx b/src/settings/tabs/models/StaffPicksPane.tsx
index 025a3497..5afa7c95 100644
--- a/src/settings/tabs/models/StaffPicksPane.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.tsx
@@ -7,15 +7,17 @@
  * extra category alphabetically; within a section models are alphabetical. Each
  * compact row shows the model name, capability pills (Text always, plus Vision
  * / Thinking), a `size · maker` sub-line, a RAM-fit hint, and a single icon
- * download that runs the VERIFIED starter path (`download_starter`, pinned
- * revision + sha256), unlike the Browse-all pane's arbitrary repo downloads. A
- * finished install lifts a fresh config snapshot.
+ * download that runs the VERIFIED catalog path (`download_staff_pick`, keyed by
+ * the entry's stable id, pinned revision + sha256), unlike the Browse-all
+ * pane's arbitrary repo downloads. A finished install lifts a fresh config
+ * snapshot.
  *
- * Data comes from {@link useStarterOptions} (the same rows onboarding's picker
- * uses); the download state machine is the shared {@link useDownloadModel}, so
- * the in-flight / failed UI is the same {@link DownloadProgress} card the rest
- * of the app shows. At most one model downloads at a time (the backend enforces
- * it too); `activeTier` tracks which row owns the progress card.
+ * Data comes from {@link useStaffPicks}, the id-keyed catalog (decoupled from
+ * onboarding's three tier heroes so a category can hold any number of models);
+ * the download state machine is the shared {@link useDownloadModel}, so the
+ * in-flight / failed UI is the same {@link DownloadProgress} card the rest of
+ * the app shows. At most one model downloads at a time (the backend enforces it
+ * too); `activeId` tracks which row owns the progress card.
  */
 
 import { useEffect, useMemo, useState } from 'react';
@@ -23,16 +25,12 @@ import { invoke } from '@tauri-apps/api/core';
 
 import { DownloadProgress } from '../../../components/DownloadProgress';
 import { useDownloadModel } from '../../../hooks/useDownloadModel';
-import { useStarterOptions } from '../../../components/StarterPicker';
+import { useStaffPicks } from '../../../components/StarterPicker';
 import { Tooltip } from '../../../components/Tooltip';
 import { RAM_FIT_LABEL, RAM_FIT_TOOLTIP } from '../../../utils/ramFit';
 import styles from './StaffPicksPane.module.css';
 import type { RawAppConfig } from '../../types';
-import type {
-  RamFit,
-  StarterOption,
-  StarterTier,
-} from '../../../types/starter';
+import type { RamFit, StaffPickOption } from '../../../types/starter';
 
 /** RAM-fit hint colour class on this pane's stylesheet (labels are shared). */
 const FIT_CLASS: Record<RamFit, string> = {
@@ -54,21 +52,21 @@ function gb(bytes: number): string {
 }
 
 /** Weights + vision companion: the full on-disk cost of one starter. */
-function totalBytes(o: StarterOption): number {
+function totalBytes(o: StaffPickOption): number {
   return o.starter.size_bytes + o.starter.mmproj_bytes;
 }
 
 /** One use-case section: its label and the models under it. */
 interface Section {
   category: string;
-  options: StarterOption[];
+  options: StaffPickOption[];
 }
 
 /** Groups models into use-case sections: known categories first in their fixed
  * order, then any extra category alphabetically; models within a section are
  * alphabetical by name. */
-function groupByCategory(options: StarterOption[]): Section[] {
-  const buckets = new Map<string, StarterOption[]>();
+function groupByCategory(options: StaffPickOption[]): Section[] {
+  const buckets = new Map<string, StaffPickOption[]>();
   for (const o of options) {
     const category = o.starter.category ?? UNCATEGORIZED;
     const list = buckets.get(category);
@@ -84,7 +82,7 @@ function groupByCategory(options: StarterOption[]): Section[] {
     .sort();
   return [...known, ...extra].map((category) => ({
     category,
-    options: (buckets.get(category) as StarterOption[]).sort((a, b) =>
+    options: (buckets.get(category) as StaffPickOption[]).sort((a, b) =>
       a.starter.display_name.localeCompare(b.starter.display_name, undefined, {
         sensitivity: 'base',
       }),
@@ -98,17 +96,16 @@ interface StaffPicksPaneProps {
 }
 
 export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
-  const { options, refresh } = useStarterOptions();
+  const { options, refresh } = useStaffPicks();
   const sections = useMemo(() => groupByCategory(options ?? []), [options]);
 
-  // One download at a time; activeTier names the row that owns the progress card.
-  const [activeTier, setActiveTier] = useState<StarterTier | null>(null);
+  // One download at a time; activeId names the row that owns the progress card.
+  const [activeId, setActiveId] = useState<string | null>(null);
   const {
     state,
     progress,
     etaSeconds,
-    start,
-    resume,
+    startById,
     cancel,
     retry,
     reset,
@@ -127,19 +124,16 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
         // The focus-driven resync picks the change up on next activation.
       }
       reset();
-      setActiveTier(null);
+      setActiveId(null);
       await refresh();
     })();
   }, [state.phase, onSaved, reset, refresh]);
 
-  function startDownload(tier: StarterTier) {
-    setActiveTier(tier);
-    void start(tier);
-  }
-
-  function resumeDownload(tier: StarterTier) {
-    setActiveTier(tier);
-    void resume(tier);
+  // Download and resume both run the same id-keyed verified path; the backend
+  // resumes from a kept partial via Range, so resume is just starting again.
+  function startDownload(id: string) {
+    setActiveId(id);
+    void startById(id);
   }
 
   async function discardPartial(sha256: string) {
@@ -149,7 +143,7 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
 
   function returnToPicker() {
     reset();
-    setActiveTier(null);
+    setActiveId(null);
   }
 
   if (options !== null && sections.length === 0) {
@@ -169,14 +163,14 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
           </div>
           {section.options.map((o) => (
             <ModelRow
-              key={o.starter.tier}
+              key={o.starter.id}
               option={o}
-              active={activeTier === o.starter.tier}
+              active={activeId === o.starter.id}
               state={state}
               progress={progress}
               etaSeconds={etaSeconds}
               onDownload={startDownload}
-              onResume={resumeDownload}
+              onResume={startDownload}
               onDiscard={discardPartial}
               onCancel={() => void cancel()}
               onRetry={() => void retry()}
@@ -190,13 +184,13 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
 }
 
 interface ModelRowProps {
-  option: StarterOption;
+  option: StaffPickOption;
   active: boolean;
   state: ReturnType<typeof useDownloadModel>['state'];
   progress: ReturnType<typeof useDownloadModel>['progress'];
   etaSeconds: number | null;
-  onDownload: (tier: StarterTier) => void;
-  onResume: (tier: StarterTier) => void;
+  onDownload: (id: string) => void;
+  onResume: (id: string) => void;
   onDiscard: (sha256: string) => void;
   onCancel: () => void;
   onRetry: () => void;
@@ -220,7 +214,7 @@ function ModelRow({
   const showProgress = active && state.phase !== 'idle';
 
   return (
-    <div className={styles.row} data-model-row data-tier={starter.tier}>
+    <div className={styles.row} data-model-row data-id={starter.id}>
       <div className={styles.rowMain}>
         <div className={styles.mid}>
           <div className={styles.top}>
@@ -285,11 +279,11 @@ function ModelRow({
 }
 
 interface RowActionProps {
-  option: StarterOption;
+  option: StaffPickOption;
   installed: boolean;
   partialBytes: number | null;
-  onDownload: (tier: StarterTier) => void;
-  onResume: (tier: StarterTier) => void;
+  onDownload: (id: string) => void;
+  onResume: (id: string) => void;
   onDiscard: (sha256: string) => void;
 }
 
@@ -323,7 +317,7 @@ function RowAction({
         <button
           type="button"
           className={styles.resumeBtn}
-          onClick={() => onResume(starter.tier)}
+          onClick={() => onResume(starter.id)}
         >
           Resume ({gb(partialBytes)} GB)
         </button>
@@ -343,7 +337,7 @@ function RowAction({
       type="button"
       className={styles.getBtn}
       aria-label="Download"
-      onClick={() => onDownload(starter.tier)}
+      onClick={() => onDownload(starter.id)}
     >
       {DOWNLOAD_ICON}
     </button>
diff --git a/src/types/starter.ts b/src/types/starter.ts
index f589c226..2713a2a4 100644
--- a/src/types/starter.ts
+++ b/src/types/starter.ts
@@ -18,6 +18,10 @@ export type RamFit = 'fits' | 'tight' | 'too_big';
 
 /** One curated starter model from the compile-time registry. */
 export interface Starter {
+  /** Stable slug, unique across the registry; the Staff Picks row key and the
+   * id-keyed download key. Backend always sends it; optional here for
+   * test-fixture ergonomics (onboarding keys on `tier` and never reads it). */
+  id?: string;
   tier: StarterTier;
   /** Model family this entry belongs to (e.g. "Gemma", "Qwen", "gpt-oss").
    * Backend always sends it; optional here for test-fixture ergonomics. */
@@ -57,6 +61,13 @@ export interface StarterOption {
   partial_bytes: number | null;
 }
 
+/** One Staff Picks catalog row. Same shape as {@link StarterOption}, but the
+ * catalog is id-keyed: `starter.id` is always present, so the pane keys rows
+ * and starts downloads by it. */
+export interface StaffPickOption extends StarterOption {
+  starter: Starter & { id: string };
+}
+
 /** Failure category carried by a `Failed` download event. */
 export type DownloadFailKind =
   | 'offline'

From e3274e3ef20ca08e7a576b9cbd48548fe737e61f Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 11:12:40 -0500
Subject: [PATCH 45/89] feat: expand the Staff Picks catalog with seven
 deeply-vetted models

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/models/registry.rs | 294 ++++++++++++++++++++++++++++++-
 1 file changed, 286 insertions(+), 8 deletions(-)

diff --git a/src-tauri/src/models/registry.rs b/src-tauri/src/models/registry.rs
index f6a002e1..a9c6cf97 100644
--- a/src-tauri/src/models/registry.rs
+++ b/src-tauri/src/models/registry.rs
@@ -1,15 +1,21 @@
 /*!
- * Curated starter model registry for the built-in llama.cpp engine.
+ * Curated model registry for the built-in llama.cpp engine.
  *
- * Three tiers (Fast / Balanced / Smartest) cover the RAM spectrum of Apple
- * Silicon Macs. Every entry pins a Hugging Face repo at an exact git revision
- * and carries the SHA-256 of each blob, so a starter download is reproducible
- * and verifiable end to end (the digests feed straight into
- * [`crate::models::download::DownloadSpec`] which verifies them on install).
+ * This is the Staff Picks catalog: a small, deeply-vetted set of models grouped
+ * into use-case sections (Everyday chat / Compact & fast / Deep reasoning).
+ * Three of the entries double as the onboarding heroes (one per tier, see
+ * [`ONBOARDING_HERO_IDS`]); the rest exist only in the catalog. Every entry
+ * pins a Hugging Face repo at an exact git revision and carries the SHA-256 of
+ * each blob, so a download is reproducible and verifiable end to end (the
+ * digests feed straight into [`crate::models::download::DownloadSpec`] which
+ * verifies them on install). Provenance comes from the pinned revision and a
+ * trusted GGUF source (the maker's own repo, `unsloth`, `bartowski`, or
+ * `ggml-org`); the SHA-256 is an integrity check only.
  *
  * Hashes and sizes were read from the Hugging Face tree-at-revision API
- * (`/api/models/<repo>/tree/<revision>`) on 2026-06-17, so each digest
- * matches the pinned commit, not whatever `main` later points to.
+ * (`/api/models/<repo>/tree/<revision>`): the three heroes on 2026-06-17, the
+ * rest of the catalog on 2026-06-20, so each digest matches its pinned commit,
+ * not whatever `main` later points to.
  */
 
 use crate::config::defaults::HF_BASE_URL;
@@ -161,6 +167,170 @@ pub const STARTERS: &[Starter] = &[
         origin: "OpenAI",
         origin_repo: "openai/gpt-oss-20b",
     },
+    // ── Everyday chat ──────────────────────────────────────────────────────
+    Starter {
+        id: "llama-3.1-8b",
+        tier: Tier::Balanced,
+        family: "Llama",
+        category: "Everyday chat",
+        display_name: "Llama 3.1 8B",
+        repo: "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
+        revision: "bf5b95e96dac0462e2a09145ec66cae9a3f12067",
+        file_name: "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf",
+        sha256: "7b064f5842bf9532c91456deda288a1b672397a54fa729aa665952863033557c",
+        size_bytes: 4_920_739_232,
+        quant: "Q4_K_M",
+        vision: false,
+        thinking: false,
+        reasoning_always: false,
+        mmproj_file: None,
+        mmproj_sha256: None,
+        mmproj_bytes: 0,
+        est_runtime_gb: 7.0,
+        license_note: "Llama 3.1 Community",
+        origin: "Meta",
+        origin_repo: "meta-llama/Llama-3.1-8B-Instruct",
+    },
+    Starter {
+        id: "mistral-nemo-12b",
+        tier: Tier::Balanced,
+        family: "Mistral",
+        category: "Everyday chat",
+        display_name: "Mistral Nemo 12B",
+        repo: "bartowski/Mistral-Nemo-Instruct-2407-GGUF",
+        revision: "a2dd64a0a76ea1bdb2bb6ab6fa5496b003c7c908",
+        file_name: "Mistral-Nemo-Instruct-2407-Q4_K_M.gguf",
+        sha256: "7c1a10d202d8788dbe5628dc962254d10654c853cae6aaeca0618f05490d4a46",
+        size_bytes: 7_477_208_192,
+        quant: "Q4_K_M",
+        vision: false,
+        thinking: false,
+        reasoning_always: false,
+        mmproj_file: None,
+        mmproj_sha256: None,
+        mmproj_bytes: 0,
+        est_runtime_gb: 9.9,
+        license_note: "Apache 2.0",
+        origin: "Mistral",
+        origin_repo: "mistralai/Mistral-Nemo-Instruct-2407",
+    },
+    // ── Compact & fast ─────────────────────────────────────────────────────
+    Starter {
+        id: "phi-4-mini-3.8b",
+        tier: Tier::Fast,
+        family: "Phi",
+        category: "Compact & fast",
+        display_name: "Phi-4 Mini 3.8B",
+        repo: "unsloth/Phi-4-mini-instruct-GGUF",
+        revision: "78eb92a46fc37e6b524df991ed9aca9bc6aa7b80",
+        file_name: "Phi-4-mini-instruct-Q4_K_M.gguf",
+        sha256: "88c00229914083cd112853aab84ed51b87bdf6b9ce42f532d8c85c7c63b1730a",
+        size_bytes: 2_491_874_272,
+        quant: "Q4_K_M",
+        vision: false,
+        thinking: false,
+        reasoning_always: false,
+        mmproj_file: None,
+        mmproj_sha256: None,
+        mmproj_bytes: 0,
+        est_runtime_gb: 4.7,
+        license_note: "MIT",
+        origin: "Microsoft",
+        origin_repo: "microsoft/Phi-4-mini-instruct",
+    },
+    Starter {
+        id: "llama-3.2-3b",
+        tier: Tier::Fast,
+        family: "Llama",
+        category: "Compact & fast",
+        display_name: "Llama 3.2 3B",
+        repo: "bartowski/Llama-3.2-3B-Instruct-GGUF",
+        revision: "5ab33fa94d1d04e903623ae72c95d1696f09f9e8",
+        file_name: "Llama-3.2-3B-Instruct-Q4_K_M.gguf",
+        sha256: "6c1a2b41161032677be168d354123594c0e6e67d2b9227c84f296ad037c728ff",
+        size_bytes: 2_019_377_696,
+        quant: "Q4_K_M",
+        vision: false,
+        thinking: false,
+        reasoning_always: false,
+        mmproj_file: None,
+        mmproj_sha256: None,
+        mmproj_bytes: 0,
+        est_runtime_gb: 4.0,
+        license_note: "Llama 3.2 Community",
+        origin: "Meta",
+        origin_repo: "meta-llama/Llama-3.2-3B-Instruct",
+    },
+    Starter {
+        id: "gemma-4-e4b",
+        tier: Tier::Fast,
+        family: "Gemma",
+        category: "Compact & fast",
+        display_name: "Gemma 4 E4B",
+        repo: "google/gemma-4-E4B-it-qat-q4_0-gguf",
+        revision: "bb3b92e6f031fa438b409f898dd9f14f499a0cb0",
+        file_name: "gemma-4-E4B_q4_0-it.gguf",
+        sha256: "e8b6a059ba86947a44ace84d6e5679795bc41862c25c30513142588f0e9dba1d",
+        size_bytes: 5_154_939_136,
+        quant: "Q4_0",
+        vision: true,
+        thinking: false,
+        reasoning_always: false,
+        mmproj_file: Some("gemma-4-E4B-it-mmproj.gguf"),
+        mmproj_sha256: Some("c6398448d84a4836fdedf58f9775979e69ae0cc4dfdf4d697b5597693a555b12"),
+        mmproj_bytes: 991_551_904,
+        est_runtime_gb: 7.4,
+        license_note: "Gemma",
+        origin: "Google",
+        origin_repo: "google/gemma-4-E4B-it",
+    },
+    // ── Deep reasoning ─────────────────────────────────────────────────────
+    Starter {
+        id: "phi-4-reasoning-plus-14b",
+        tier: Tier::Smartest,
+        family: "Phi",
+        category: "Deep reasoning",
+        display_name: "Phi-4 Reasoning Plus 14B",
+        repo: "unsloth/Phi-4-reasoning-plus-GGUF",
+        revision: "80fff8542dc7b88dba725b660beefd80e91e80c9",
+        file_name: "Phi-4-reasoning-plus-Q4_K_M.gguf",
+        sha256: "faf720745e20df40f52ee218be14c72b33070f7aacc508b3fbc61d47f32b4ffe",
+        size_bytes: 9_053_117_120,
+        quant: "Q4_K_M",
+        vision: false,
+        thinking: true,
+        reasoning_always: true,
+        mmproj_file: None,
+        mmproj_sha256: None,
+        mmproj_bytes: 0,
+        est_runtime_gb: 12.0,
+        license_note: "MIT",
+        origin: "Microsoft",
+        origin_repo: "microsoft/Phi-4-reasoning-plus",
+    },
+    Starter {
+        id: "deepseek-r1-distill-8b",
+        tier: Tier::Balanced,
+        family: "DeepSeek",
+        category: "Deep reasoning",
+        display_name: "DeepSeek-R1 Distill 8B",
+        repo: "unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF",
+        revision: "615f8936e16dfde29dcc00be71145d4d5ce8ed53",
+        file_name: "DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf",
+        sha256: "0addb1339a82385bcd973186cd80d18dcc71885d45eabd899781a118d03827d9",
+        size_bytes: 4_920_737_216,
+        quant: "Q4_K_M",
+        vision: false,
+        thinking: true,
+        reasoning_always: true,
+        mmproj_file: None,
+        mmproj_sha256: None,
+        mmproj_bytes: 0,
+        est_runtime_gb: 7.0,
+        license_note: "MIT",
+        origin: "DeepSeek",
+        origin_repo: "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
+    },
 ];
 
 /// Ids of the three onboarding hero starters, in tier order
@@ -309,6 +479,114 @@ mod tests {
         }
     }
 
+    #[test]
+    fn catalog_is_the_vetted_models_grouped_by_category() {
+        // The curated Staff Picks catalog: deeply-vetted models, grouped into
+        // the use-case sections the Discover surface renders. Locks the exact
+        // set so a stray add/remove is a deliberate, reviewed change.
+        use std::collections::BTreeMap;
+        let mut by_cat: BTreeMap<&str, Vec<&str>> = BTreeMap::new();
+        for s in STARTERS {
+            by_cat.entry(s.category).or_default().push(s.id);
+        }
+        for v in by_cat.values_mut() {
+            v.sort_unstable();
+        }
+        let mut expected: BTreeMap<&str, Vec<&str>> = BTreeMap::new();
+        expected.insert(
+            "Everyday chat",
+            vec![
+                "gemma-4-12b",
+                "llama-3.1-8b",
+                "mistral-nemo-12b",
+                "qwen3.5-9b",
+            ],
+        );
+        expected.insert(
+            "Compact & fast",
+            vec!["gemma-4-e4b", "llama-3.2-3b", "phi-4-mini-3.8b"],
+        );
+        expected.insert(
+            "Deep reasoning",
+            vec![
+                "deepseek-r1-distill-8b",
+                "gpt-oss-20b",
+                "phi-4-reasoning-plus-14b",
+            ],
+        );
+        for v in expected.values_mut() {
+            v.sort_unstable();
+        }
+        assert_eq!(by_cat, expected);
+    }
+
+    #[test]
+    fn every_entry_carries_origin_and_license() {
+        for s in STARTERS {
+            assert!(!s.license_note.is_empty(), "{}: empty license", s.id);
+            assert!(!s.origin.is_empty(), "{}: empty origin", s.id);
+            assert!(!s.display_name.is_empty(), "{}: empty display_name", s.id);
+            assert!(!s.family.is_empty(), "{}: empty family", s.id);
+            // origin_repo is an "org/name" slug the picker turns into an HF URL.
+            assert_eq!(
+                s.origin_repo.split('/').count(),
+                2,
+                "{}: origin_repo is not org/name: {}",
+                s.id,
+                s.origin_repo
+            );
+        }
+    }
+
+    #[test]
+    fn mmproj_fields_are_internally_consistent() {
+        for s in STARTERS {
+            // The mmproj file, its digest, and a non-zero byte count travel
+            // together, and a vision entry ships a projector while a text entry
+            // does not (llama.cpp needs the mmproj to see images).
+            assert_eq!(
+                s.mmproj_file.is_some(),
+                s.mmproj_sha256.is_some(),
+                "{}: mmproj file/sha presence mismatch",
+                s.id
+            );
+            assert_eq!(
+                s.mmproj_file.is_some(),
+                s.mmproj_bytes > 0,
+                "{}: mmproj file/bytes presence mismatch",
+                s.id
+            );
+            assert_eq!(
+                s.vision,
+                s.mmproj_file.is_some(),
+                "{}: vision/mmproj mismatch",
+                s.id
+            );
+        }
+    }
+
+    #[test]
+    fn reasoning_always_entries_also_emit_thinking() {
+        // A model whose reasoning cannot be turned off must also be flagged as
+        // a thinking model, or the picker badge and `/think` gate disagree.
+        for s in STARTERS {
+            if s.reasoning_always {
+                assert!(s.thinking, "{}: reasoning_always implies thinking", s.id);
+            }
+        }
+    }
+
+    #[test]
+    fn every_entry_has_a_positive_runtime_estimate() {
+        for s in STARTERS {
+            assert!(
+                s.est_runtime_gb > 0.0,
+                "{}: non-positive est_runtime_gb",
+                s.id
+            );
+        }
+    }
+
     #[test]
     fn family_per_tier() {
         // Each entry carries a non-empty family label.

From 8f9fb50b3c384b3db006e3a0b717997f136bd05f Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 11:23:46 -0500
Subject: [PATCH 46/89] refactor: derive the manifest id through a single
 installed_model_id helper

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/models/mod.rs      |  2 +-
 src-tauri/src/models/registry.rs | 22 ++++++++++++++++++++--
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/src-tauri/src/models/mod.rs b/src-tauri/src/models/mod.rs
index 720ed0a1..1d9e6600 100644
--- a/src-tauri/src/models/mod.rs
+++ b/src-tauri/src/models/mod.rs
@@ -1205,7 +1205,7 @@ fn annotate_starter(
         starter: s.clone(),
         fit: registry::ram_fit(s.est_runtime_gb, ram_bytes),
         installed: matches!(
-            manifest::get(conn, &registry::to_installed_model(s).id),
+            manifest::get(conn, &registry::installed_model_id(s)),
             Ok(Some(_))
         ),
         partial_bytes: store.existing_partial_len(s.sha256),
diff --git a/src-tauri/src/models/registry.rs b/src-tauri/src/models/registry.rs
index a9c6cf97..9424e108 100644
--- a/src-tauri/src/models/registry.rs
+++ b/src-tauri/src/models/registry.rs
@@ -402,10 +402,18 @@ pub fn download_specs(s: &Starter) -> Vec<DownloadSpec> {
     specs
 }
 
-/// Manifest row for an installed starter. id = `"<repo>:<file_name>"`.
+/// The manifest-row id for a starter: `"<repo>:<file_name>"`. The single source
+/// of truth for how a curated entry maps onto its installed-manifest key, so the
+/// installed-state probe can resolve the id without building a whole
+/// [`InstalledModel`] just to read one field.
+pub fn installed_model_id(s: &Starter) -> String {
+    format!("{}:{}", s.repo, s.file_name)
+}
+
+/// Manifest row for an installed starter. id = [`installed_model_id`].
 pub fn to_installed_model(s: &Starter) -> InstalledModel {
     InstalledModel {
-        id: format!("{}:{}", s.repo, s.file_name),
+        id: installed_model_id(s),
         display_name: s.display_name.to_string(),
         repo: s.repo.to_string(),
         revision: s.revision.to_string(),
@@ -791,6 +799,16 @@ mod tests {
         );
     }
 
+    #[test]
+    fn installed_model_id_is_repo_colon_file() {
+        // The manifest-row id is "<repo>:<file_name>"; `installed_model_id` is
+        // its single source of truth, so `to_installed_model` never drifts from
+        // the installed-state probe.
+        let s = &STARTERS[0];
+        assert_eq!(installed_model_id(s), format!("{}:{}", s.repo, s.file_name));
+        assert_eq!(to_installed_model(s).id, installed_model_id(s));
+    }
+
     #[test]
     fn to_installed_model_maps_fields() {
         let balanced = starter(Tier::Balanced);

From 7e4b71c19df7e878905935429fb4155777d730ef Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 11:26:41 -0500
Subject: [PATCH 47/89] feat: balance the Staff Picks catalog to three models
 per category

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/models/registry.rs | 56 ++++++++++++++------------------
 1 file changed, 24 insertions(+), 32 deletions(-)

diff --git a/src-tauri/src/models/registry.rs b/src-tauri/src/models/registry.rs
index 9424e108..2454e8d8 100644
--- a/src-tauri/src/models/registry.rs
+++ b/src-tauri/src/models/registry.rs
@@ -168,29 +168,6 @@ pub const STARTERS: &[Starter] = &[
         origin_repo: "openai/gpt-oss-20b",
     },
     // ── Everyday chat ──────────────────────────────────────────────────────
-    Starter {
-        id: "llama-3.1-8b",
-        tier: Tier::Balanced,
-        family: "Llama",
-        category: "Everyday chat",
-        display_name: "Llama 3.1 8B",
-        repo: "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
-        revision: "bf5b95e96dac0462e2a09145ec66cae9a3f12067",
-        file_name: "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf",
-        sha256: "7b064f5842bf9532c91456deda288a1b672397a54fa729aa665952863033557c",
-        size_bytes: 4_920_739_232,
-        quant: "Q4_K_M",
-        vision: false,
-        thinking: false,
-        reasoning_always: false,
-        mmproj_file: None,
-        mmproj_sha256: None,
-        mmproj_bytes: 0,
-        est_runtime_gb: 7.0,
-        license_note: "Llama 3.1 Community",
-        origin: "Meta",
-        origin_repo: "meta-llama/Llama-3.1-8B-Instruct",
-    },
     Starter {
         id: "mistral-nemo-12b",
         tier: Tier::Balanced,
@@ -489,9 +466,11 @@ mod tests {
 
     #[test]
     fn catalog_is_the_vetted_models_grouped_by_category() {
-        // The curated Staff Picks catalog: deeply-vetted models, grouped into
-        // the use-case sections the Discover surface renders. Locks the exact
-        // set so a stray add/remove is a deliberate, reviewed change.
+        // The curated Staff Picks catalog: nine deeply-vetted models, exactly
+        // three per use-case section the Discover surface renders. The three
+        // onboarding heroes are among them, so a model downloaded during
+        // onboarding shows up here as Installed with no duplicate row. Locks the
+        // exact set so a stray add/remove is a deliberate, reviewed change.
         use std::collections::BTreeMap;
         let mut by_cat: BTreeMap<&str, Vec<&str>> = BTreeMap::new();
         for s in STARTERS {
@@ -503,12 +482,7 @@ mod tests {
         let mut expected: BTreeMap<&str, Vec<&str>> = BTreeMap::new();
         expected.insert(
             "Everyday chat",
-            vec![
-                "gemma-4-12b",
-                "llama-3.1-8b",
-                "mistral-nemo-12b",
-                "qwen3.5-9b",
-            ],
+            vec!["gemma-4-12b", "mistral-nemo-12b", "qwen3.5-9b"],
         );
         expected.insert(
             "Compact & fast",
@@ -528,6 +502,24 @@ mod tests {
         assert_eq!(by_cat, expected);
     }
 
+    #[test]
+    fn every_category_holds_exactly_three_models() {
+        // The Discover surface is balanced: nine models, exactly three per
+        // use-case section, so no section dwarfs the others.
+        use std::collections::BTreeMap;
+        let mut counts: BTreeMap<&str, usize> = BTreeMap::new();
+        for s in STARTERS {
+            *counts.entry(s.category).or_default() += 1;
+        }
+        assert_eq!(STARTERS.len(), 9, "catalog should hold nine models");
+        for (category, n) in counts {
+            assert_eq!(
+                n, 3,
+                "category {category} should hold exactly three, has {n}"
+            );
+        }
+    }
+
     #[test]
     fn every_entry_carries_origin_and_license() {
         for s in STARTERS {

From 765931e2a0cf8b2e356606e7d893718db3323722 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 22:48:49 -0500
Subject: [PATCH 48/89] fix: word built-in keep-warm status like Ollama (model
 in VRAM / Loading / No model loaded)

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 .../tabs/models/ProvidersPane.test.tsx        | 56 +++++++++++++++----
 src/settings/tabs/models/ProvidersPane.tsx    | 21 ++++++-
 2 files changed, 64 insertions(+), 13 deletions(-)

diff --git a/src/settings/tabs/models/ProvidersPane.test.tsx b/src/settings/tabs/models/ProvidersPane.test.tsx
index ef960071..cf5afcd3 100644
--- a/src/settings/tabs/models/ProvidersPane.test.tsx
+++ b/src/settings/tabs/models/ProvidersPane.test.tsx
@@ -53,6 +53,10 @@ const INSTALLED = [
   },
 ];
 
+// A built-in provider whose selected model resolves to INSTALLED[0], so the
+// keep-warm status line can name it (e.g. "Qwen3.5 9B in VRAM").
+const BUILTIN_LOADED: RawProvider = { ...BUILTIN, model: INSTALLED[0].id };
+
 function makeConfig(
   activeProvider: string,
   providers: RawProvider[],
@@ -637,18 +641,42 @@ describe('ProvidersPane generation', () => {
     expect(input).toHaveValue(0);
   });
 
-  it('shows the built-in engine state and gates Unload until loaded', () => {
-    mockInvoke({ get_engine_status: engineStatus('loaded') });
-    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+  it('names the resident built-in model in VRAM and enables Unload when loaded', () => {
+    mockInvoke({
+      get_engine_status: engineStatus('loaded'),
+      list_installed_models: INSTALLED,
+    });
+    renderPane(makeConfig('builtin', [BUILTIN_LOADED, OLLAMA]));
     return waitFor(() => {
-      expect(screen.getByText('Engine: loaded')).toBeInTheDocument();
+      expect(screen.getByText('Qwen3.5 9B in VRAM')).toBeInTheDocument();
       expect(screen.getByRole('button', { name: 'Unload now' })).toBeEnabled();
     });
   });
 
-  it('disables Unload while the built-in engine is stopped', () => {
+  it('shows Loading… while the built-in engine is starting', () => {
+    mockInvoke({
+      get_engine_status: engineStatus('starting'),
+      list_installed_models: INSTALLED,
+    });
+    renderPane(makeConfig('builtin', [BUILTIN_LOADED, OLLAMA]));
+    return waitFor(() => {
+      expect(screen.getByText('Loading…')).toBeInTheDocument();
+      expect(screen.getByRole('button', { name: 'Unload now' })).toBeDisabled();
+    });
+  });
+
+  it('falls back to no-model-loaded when the engine is loaded but the model is unknown', () => {
+    mockInvoke({ get_engine_status: engineStatus('loaded') });
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+    return waitFor(() =>
+      expect(screen.getByText('No model loaded')).toBeInTheDocument(),
+    );
+  });
+
+  it('disables Unload and shows no-model-loaded while the built-in engine is stopped', () => {
     renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
     expect(screen.getByRole('button', { name: 'Unload now' })).toBeDisabled();
+    expect(screen.getByText('No model loaded')).toBeInTheDocument();
   });
 
   it('ejects the model on Unload click when loaded', async () => {
@@ -688,18 +716,21 @@ describe('ProvidersPane generation', () => {
   });
 
   it('greens the status dot when a model is resident', async () => {
-    mockInvoke({ get_engine_status: engineStatus('loaded') });
-    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+    mockInvoke({
+      get_engine_status: engineStatus('loaded'),
+      list_installed_models: INSTALLED,
+    });
+    renderPane(makeConfig('builtin', [BUILTIN_LOADED, OLLAMA]));
     await waitFor(() =>
-      expect(screen.getByText('Engine: loaded')).toBeInTheDocument(),
+      expect(screen.getByText('Qwen3.5 9B in VRAM')).toBeInTheDocument(),
     );
-    const dot = screen.getByText('Engine: loaded').querySelector('span');
+    const dot = screen.getByText('Qwen3.5 9B in VRAM').querySelector('span');
     expect(dot).toHaveClass(styles.genStatusDotLive);
   });
 
   it('dims the status dot when the engine is stopped', () => {
     renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
-    const dot = screen.getByText('Engine: stopped').querySelector('span');
+    const dot = screen.getByText('No model loaded').querySelector('span');
     expect(dot).toHaveClass(styles.genStatusDot);
     expect(dot).not.toHaveClass(styles.genStatusDotLive);
   });
@@ -818,14 +849,15 @@ describe('ProvidersPane robustness', () => {
   });
 
   it('reflects the engine:status event stream for the built-in engine', async () => {
-    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+    mockInvoke({ list_installed_models: INSTALLED });
+    renderPane(makeConfig('builtin', [BUILTIN_LOADED, OLLAMA]));
     await act(async () => {
       await Promise.resolve();
     });
     await act(async () => {
       emitTauriEvent('engine:status', engineStatus('loaded'));
     });
-    expect(screen.getByText('Engine: loaded')).toBeInTheDocument();
+    expect(screen.getByText('Qwen3.5 9B in VRAM')).toBeInTheDocument();
   });
 
   it('falls back to the first Ollama model when the active one is not listed', async () => {
diff --git a/src/settings/tabs/models/ProvidersPane.tsx b/src/settings/tabs/models/ProvidersPane.tsx
index 9988d962..b8605ce0 100644
--- a/src/settings/tabs/models/ProvidersPane.tsx
+++ b/src/settings/tabs/models/ProvidersPane.tsx
@@ -295,9 +295,28 @@ export function ProvidersPane({
   // is actually resident (drives the status dot color: green when warm).
   const engineWarm =
     activeKind === 'builtin' ? engineState === 'loaded' : loadedModel !== null;
+  // Friendly name of the selected built-in model, for the residency line
+  // (matching the built-in model dropdown below).
+  const builtinDisplayName =
+    installed.find((m) => m.id === builtinModelId)?.display_name ?? '';
+  // Built-in residency reads like Ollama's wording, driven by the live engine
+  // state (the warmup:* events only fire for Ollama): loaded → "<model> in
+  // VRAM", starting → "Loading…", otherwise → "No model loaded". "loaded"
+  // means the llama-server sidecar is up and serving.
+  let builtinResidency: string;
+  if (engineState === 'loaded') {
+    builtinResidency =
+      builtinDisplayName !== ''
+        ? `${builtinDisplayName} in VRAM`
+        : 'No model loaded';
+  } else if (engineState === 'starting') {
+    builtinResidency = 'Loading…';
+  } else {
+    builtinResidency = 'No model loaded';
+  }
   const warmStatusText =
     activeKind === 'builtin'
-      ? `Engine: ${engineState}`
+      ? builtinResidency
       : loadedModel !== null
         ? `${loadedModel} in VRAM`
         : 'No model loaded';

From 1af58d7c3d4780b2c5004dbe579ea8d3faee66b2 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 23:07:56 -0500
Subject: [PATCH 49/89] feat: warm-load the built-in engine on summon and first
 keystroke, matching Ollama

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/lib.rs    | 66 +++++++++++++++++++------------
 src-tauri/src/warmup.rs | 88 ++++++++++++++++++++++++++++++-----------
 2 files changed, 105 insertions(+), 49 deletions(-)

diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs
index 4f9c7f8b..727ae623 100644
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -517,10 +517,9 @@ fn show_overlay(app_handle: &tauri::AppHandle, ctx: crate::context::ActivationCo
     // Pre-load the active model so the user's first message does not pay
     // the cold-start penalty. Fires on all show paths: double-tap, tray,
     // and first-launch auto-show. Branches by the active provider's kind:
-    // Ollama keeps its native /api/chat warmup, the built-in engine gets a
-    // /v1 prime ONLY when it is already serving (summoning the overlay must
-    // never load a model implicitly), and openai providers get no warmup
-    // (nothing local to warm).
+    // Ollama warms via its native /api/chat, the built-in engine starts
+    // (or reuses) its sidecar and primes the KV cache, and openai providers
+    // get no warmup (nothing local to warm).
     let warmup_kind = app_handle
         .state::<parking_lot::RwLock<crate::config::AppConfig>>()
         .read()
@@ -568,29 +567,44 @@ fn show_overlay(app_handle: &tauri::AppHandle, ctx: crate::context::ActivationCo
             }
         }
         crate::config::defaults::PROVIDER_KIND_BUILTIN => {
-            let status = app_handle
-                .state::<engine::runner::EngineHandle>()
-                .status()
-                .borrow()
-                .clone();
-            if let Some(port) = warmup::builtin_prime_port(&status) {
-                let (model, system_prompt) = {
-                    let cfg = app_handle
-                        .state::<parking_lot::RwLock<crate::config::AppConfig>>()
-                        .read()
-                        .clone();
-                    (
-                        cfg.inference.active_provider_model().to_string(),
-                        cfg.prompt.resolved_system.clone(),
-                    )
+            let (model_id, num_ctx, system_prompt) = {
+                let cfg_state = app_handle.state::<parking_lot::RwLock<crate::config::AppConfig>>();
+                let cfg = cfg_state.read();
+                (
+                    cfg.inference.active_provider_model().to_string(),
+                    cfg.inference.num_ctx,
+                    cfg.prompt.resolved_system.clone(),
+                )
+            };
+            if warmup::builtin_should_warm(&model_id) {
+                let store = app_handle.state::<models::storage::ModelStore>();
+                let db = app_handle.state::<history::Database>();
+                // Resolve the manifest row to an engine Target inside a scope so
+                // the connection guard drops before the spawned load. A poisoned
+                // lock is recovered: an unrelated panic does not invalidate it.
+                let target = {
+                    let conn = match db.0.lock() {
+                        Ok(conn) => conn,
+                        Err(poisoned) => poisoned.into_inner(),
+                    };
+                    crate::commands::builtin_target(&conn, &store, &model_id, num_ctx)
                 };
-                let client = app_handle.state::<reqwest::Client>().inner().clone();
-                tauri::async_runtime::spawn(warmup::prime_builtin(
-                    port,
-                    model,
-                    system_prompt,
-                    client,
-                ));
+                // A missing/uninstalled model yields an Err; warmup is
+                // best-effort, so just skip rather than surfacing anything.
+                if let Ok(target) = target {
+                    let engine = app_handle
+                        .state::<engine::runner::EngineHandle>()
+                        .inner()
+                        .clone();
+                    let client = app_handle.state::<reqwest::Client>().inner().clone();
+                    tauri::async_runtime::spawn(warmup::warm_builtin(
+                        engine,
+                        target,
+                        model_id,
+                        system_prompt,
+                        client,
+                    ));
+                }
             }
         }
         _ => {}
diff --git a/src-tauri/src/warmup.rs b/src-tauri/src/warmup.rs
index a90bf33d..857fad27 100644
--- a/src-tauri/src/warmup.rs
+++ b/src-tauri/src/warmup.rs
@@ -99,15 +99,12 @@ pub(crate) fn vram_poll_active(kind: &str) -> bool {
     kind == PROVIDER_KIND_OLLAMA
 }
 
-/// The engine port to prime, when the built-in engine already serves a model.
-/// `None` for every other lifecycle state: summoning the overlay must never
-/// load a model implicitly (loads happen on explicit chat or download).
-pub(crate) fn builtin_prime_port(status: &crate::engine::runner::EngineStatus) -> Option<u16> {
-    if status.state == "loaded" {
-        status.port
-    } else {
-        None
-    }
+/// Whether the built-in engine should warm-load on the chat-intent signal:
+/// only when a model is actually selected. Mirrors the Ollama arm, which also
+/// no-ops without a model. An empty id means no built-in model has been picked
+/// yet, so there is nothing to load.
+pub(crate) fn builtin_should_warm(model_id: &str) -> bool {
+    !model_id.is_empty()
 }
 
 /// Builds the prime request body for the built-in engine: a plain
@@ -146,6 +143,25 @@ pub(crate) async fn prime_builtin(
         .await;
 }
 
+/// Built-in arm of `warm_up_model`: starts (or reuses) the engine so the
+/// selected model is resident by the time the user submits, then primes the
+/// KV cache for the system-prompt prefix. Best-effort: a superseded or failed
+/// load, or a failed prime, is swallowed exactly like the Ollama warmup.
+/// Coverage-off: `ensure_loaded` is covered by the runner tests, `prime_builtin`
+/// by its own; this only sequences them.
+#[cfg_attr(coverage_nightly, coverage(off))]
+pub(crate) async fn warm_builtin(
+    engine: crate::engine::runner::EngineHandle,
+    target: crate::engine::state::Target,
+    model: String,
+    system_prompt: String,
+    client: reqwest::Client,
+) {
+    if let Ok(port) = engine.ensure_loaded(target).await {
+        prime_builtin(port, model, system_prompt, client).await;
+    }
+}
+
 /// Built-in arm of `evict_model`: stops the engine sidecar and resolves once
 /// the process exit is confirmed. The `warmup:model-evicted` emit stays in
 /// the thin Tauri command because it needs an `AppHandle`.
@@ -258,6 +274,8 @@ pub fn warm_up_model(
     config: tauri::State<parking_lot::RwLock<crate::config::AppConfig>>,
     client: tauri::State<reqwest::Client>,
     engine: tauri::State<crate::engine::runner::EngineHandle>,
+    db: tauri::State<crate::history::Database>,
+    store: tauri::State<crate::models::storage::ModelStore>,
 ) {
     let kind = config.read().inference.active_provider_kind().to_string();
     match kind.as_str() {
@@ -292,14 +310,37 @@ pub fn warm_up_model(
             }
         }
         PROVIDER_KIND_BUILTIN => {
-            let status = engine.status().borrow().clone();
-            if let Some(port) = builtin_prime_port(&status) {
+            let (model_id, num_ctx, system_prompt) = {
                 let cfg = config.read();
-                let model = cfg.inference.active_provider_model().to_string();
-                let system_prompt = cfg.prompt.resolved_system.clone();
-                drop(cfg);
-                let client = client.inner().clone();
-                tauri::async_runtime::spawn(prime_builtin(port, model, system_prompt, client));
+                (
+                    cfg.inference.active_provider_model().to_string(),
+                    cfg.inference.num_ctx,
+                    cfg.prompt.resolved_system.clone(),
+                )
+            };
+            if !builtin_should_warm(&model_id) {
+                return;
+            }
+            // Resolve the manifest row to an engine Target inside a scope so the
+            // connection guard drops before the spawned load. A poisoned lock is
+            // recovered: an unrelated panic does not invalidate the connection.
+            let target = {
+                let conn = match db.0.lock() {
+                    Ok(conn) => conn,
+                    Err(poisoned) => poisoned.into_inner(),
+                };
+                crate::commands::builtin_target(&conn, &store, &model_id, num_ctx)
+            };
+            // A missing/uninstalled model yields an Err; warmup is best-effort,
+            // so just skip rather than surfacing anything.
+            if let Ok(target) = target {
+                tauri::async_runtime::spawn(warm_builtin(
+                    engine.inner().clone(),
+                    target,
+                    model_id,
+                    system_prompt,
+                    client.inner().clone(),
+                ));
             }
         }
         _ => {}
@@ -1485,13 +1526,14 @@ mod tests {
     }
 
     #[test]
-    fn prime_skipped_when_engine_not_loaded() {
-        assert_eq!(builtin_prime_port(&engine_status("stopped", None)), None);
-        assert_eq!(builtin_prime_port(&engine_status("starting", None)), None);
-        assert_eq!(builtin_prime_port(&engine_status("failed", None)), None);
-        assert_eq!(
-            builtin_prime_port(&engine_status("loaded", Some(40123))),
-            Some(40123)
+    fn builtin_should_warm_requires_a_selected_model() {
+        assert!(
+            !builtin_should_warm(""),
+            "no picked model means nothing to warm-load"
+        );
+        assert!(
+            builtin_should_warm("org/repo:m.gguf"),
+            "a selected model warms the engine on the chat-intent signal"
         );
     }
 

From 913aae41314700d168408a84ccd214f34fda9bb1 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Fri, 19 Jun 2026 23:54:39 -0500
Subject: [PATCH 50/89] fix: spawn llama-server with --parallel 1 so the
 warm-up prime and first message share one KV slot

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/engine/process.rs | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src-tauri/src/engine/process.rs b/src-tauri/src/engine/process.rs
index 50a4df0f..5c83b904 100644
--- a/src-tauri/src/engine/process.rs
+++ b/src-tauri/src/engine/process.rs
@@ -118,7 +118,7 @@ pub struct TokioEngineProcess {
 }
 
 /// Pure: the `llama-server` command line for one spawn:
-/// `-m <model> [--mmproj <p>] --ctx-size <n> --host 127.0.0.1 --port <p> --no-webui`.
+/// `-m <model> [--mmproj <p>] --ctx-size <n> --host 127.0.0.1 --port <p> --no-webui --parallel 1`.
 fn llama_server_args(args: &SpawnArgs) -> Vec<std::ffi::OsString> {
     let mut argv: Vec<std::ffi::OsString> = vec!["-m".into(), args.model_path.clone().into()];
     if let Some(mmproj) = &args.mmproj_path {
@@ -132,6 +132,14 @@ fn llama_server_args(args: &SpawnArgs) -> Vec<std::ffi::OsString> {
     argv.push("--port".into());
     argv.push(args.port.to_string().into());
     argv.push("--no-webui".into());
+    // Single decode slot. Thuki is single-user, so it never needs parallel
+    // slots, and the default (n_parallel = 4) actively hurts: the summon-time
+    // warm-up prime and the user's first message can land on different KV
+    // slots, so the first message re-does the full system-prompt prefill cold
+    // instead of reusing the prime's cache (slow first turn, fast after). One
+    // slot also gives the conversation the full --ctx-size instead of ctx / 4.
+    argv.push("--parallel".into());
+    argv.push("1".into());
     argv
 }
 
@@ -215,6 +223,8 @@ mod tests {
                 "--port",
                 "4242",
                 "--no-webui",
+                "--parallel",
+                "1",
             ]
         );
     }
@@ -235,6 +245,8 @@ mod tests {
                 "--port",
                 "4242",
                 "--no-webui",
+                "--parallel",
+                "1",
             ]
         );
     }

From 9ea6467156d7a8a5ecc781ac7736af6254cc7d0e Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 11:05:33 -0500
Subject: [PATCH 51/89] polish: quiet the confirm dialog's primary action to a
 tinted accent with a soft hover bloom

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/styles/settings.module.css | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/src/styles/settings.module.css b/src/styles/settings.module.css
index 06c253eb..69b65eef 100644
--- a/src/styles/settings.module.css
+++ b/src/styles/settings.module.css
@@ -852,19 +852,30 @@
   background: rgba(48, 34, 32, 0.95);
   border-color: rgba(255, 138, 128, 0.5);
 }
-/* Affirmative primary action (e.g. the confirm in a Switch dialog): the one
- * accent fill, matching the segmented-control active + Add-model treatment. */
+/* Affirmative primary action (e.g. the confirm in a Switch dialog). The accent
+ * reads as a calm tint, not a solid fill, so it names the action without
+ * shouting (restraint over decoration). Hover blooms a soft, slow accent glow:
+ * the override transition below is intentionally a touch longer than the base
+ * button's so the warm ring feels like it eases in rather than snaps. */
 .buttonPrimary {
-  background: var(--accent);
-  border-color: rgba(255, 141, 92, 0.5);
-  border-top-color: rgba(255, 141, 92, 0.5);
-  color: #16110d;
+  background: var(--accent-soft);
+  border-color: rgba(255, 141, 92, 0.38);
+  border-top-color: rgba(255, 141, 92, 0.45);
+  color: var(--accent);
   font-weight: 600;
-  box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.18);
+  box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.05);
+  transition:
+    background 220ms cubic-bezier(0.4, 0, 0.2, 1),
+    border-color 220ms cubic-bezier(0.4, 0, 0.2, 1),
+    box-shadow 260ms cubic-bezier(0.4, 0, 0.2, 1);
 }
 .buttonPrimary:hover {
-  background: #ff9d72;
-  border-color: rgba(255, 141, 92, 0.7);
+  background: rgba(255, 141, 92, 0.22);
+  border-color: rgba(255, 141, 92, 0.6);
+  box-shadow:
+    inset 0 1px 0 rgba(255, 255, 255, 0.08),
+    0 0 0 4px rgba(255, 141, 92, 0.1),
+    0 2px 10px -4px rgba(255, 141, 92, 0.35);
 }
 
 /* ─── Slider (hairline rail with warm gradient thumb) ───────────────────── */

From 51678a2007f38f1cb9c6e278fb7614a5b88daced Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 11:25:41 -0500
Subject: [PATCH 52/89] fix: evict the Ollama model from VRAM when switching
 away from Ollama, mirroring the built-in sidecar unload

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/settings_commands.rs       | 64 ++++++++++++++++++++++--
 src-tauri/src/settings_commands/tests.rs | 34 ++++++++++++-
 2 files changed, 92 insertions(+), 6 deletions(-)

diff --git a/src-tauri/src/settings_commands.rs b/src-tauri/src/settings_commands.rs
index 4591bc04..453503d9 100644
--- a/src-tauri/src/settings_commands.rs
+++ b/src-tauri/src/settings_commands.rs
@@ -161,6 +161,17 @@ pub(crate) fn builtin_deactivated(prior_kind: &str, resolved: &AppConfig) -> boo
             != crate::config::defaults::PROVIDER_KIND_BUILTIN
 }
 
+/// True when a config write moved the ACTIVE provider away from Ollama
+/// (ollama -> builtin/openai). The mirror of [`builtin_deactivated`]: switching
+/// between non-ollama kinds or onto ollama never matches. Pulled out so the
+/// predicate is covered by tests instead of riding inside the coverage-off
+/// command bodies that fire the Ollama eviction.
+pub(crate) fn ollama_deactivated(prior_kind: &str, resolved: &AppConfig) -> bool {
+    prior_kind == crate::config::defaults::PROVIDER_KIND_OLLAMA
+        && resolved.inference.active_provider_kind()
+            != crate::config::defaults::PROVIDER_KIND_OLLAMA
+}
+
 /// Fires a best-effort engine unload when a config write switched the active
 /// provider away from the built-in engine. Without it, a multi-GB
 /// llama-server stays resident until quit: the eviction UI branches by the
@@ -179,6 +190,45 @@ fn unload_engine_if_builtin_deactivated(app: &AppHandle, prior_kind: &str, resol
     }
 }
 
+/// Fires a best-effort Ollama eviction when a config write switched the active
+/// provider away from Ollama (ollama -> builtin/openai). The mirror of
+/// [`unload_engine_if_builtin_deactivated`]: without it the model Thuki loaded
+/// into Ollama's VRAM lingers for its `keep_alive` TTL after the user has moved
+/// on, holding memory for a provider that is no longer active. Only the model
+/// Thuki was chatting with (the Ollama provider's configured `model`) is
+/// evicted; models other apps loaded are left alone. Spawned so the switch
+/// never blocks on, nor can fail because of, Ollama being unreachable.
+#[cfg_attr(coverage_nightly, coverage(off))]
+fn evict_ollama_if_deactivated(app: &AppHandle, prior_kind: &str, resolved: &AppConfig) {
+    if !ollama_deactivated(prior_kind, resolved) {
+        return;
+    }
+    // The provider switch moves only the active_provider pointer; the Ollama
+    // provider entry still carries the model + endpoint Thuki was using.
+    let Some(ollama) = resolved
+        .inference
+        .providers
+        .iter()
+        .find(|p| p.kind == crate::config::defaults::PROVIDER_KIND_OLLAMA)
+    else {
+        return;
+    };
+    let model = ollama.model.clone();
+    if model.is_empty() {
+        return;
+    }
+    let endpoint = format!("{}/api/generate", ollama.base_url.trim_end_matches('/'));
+    let client = app.state::<reqwest::Client>().inner().clone();
+    // Suppress any in-flight warmup that would re-announce the model as loaded
+    // after we evict it, matching the explicit Unload-now path.
+    app.state::<crate::warmup::WarmupState>().mark_evicted();
+    let app_handle = app.clone();
+    tauri::async_runtime::spawn(async move {
+        let _ = crate::warmup::evict_model_request(&endpoint, &model, &client).await;
+        let _ = app_handle.emit("warmup:model-evicted", ());
+    });
+}
+
 // ─── Tauri command surface ──────────────────────────────────────────────────
 
 /// Returns the current resolved `AppConfig` snapshot.
@@ -350,9 +400,13 @@ pub fn set_active_provider(
             *guard = mirror;
         }
     }
-    // Switching away from the built-in engine releases its memory; the
-    // sidecar would otherwise stay resident with no unload affordance.
+    // Switching away from a local provider releases its memory immediately so
+    // the now-inactive provider holds no RAM/VRAM: the built-in engine's
+    // sidecar is killed, and the Ollama model is evicted from VRAM. Exactly one
+    // fires (the prior kind is builtin, ollama, or openai); openai is remote and
+    // needs neither.
     unload_engine_if_builtin_deactivated(&app, &prior_kind, &resolved);
+    evict_ollama_if_deactivated(&app, &prior_kind, &resolved);
     emit_config_updated(&app);
     Ok(resolved)
 }
@@ -885,9 +939,11 @@ pub fn reload_config_from_disk(
     // Manual edits to `[inference] keep_warm_inactivity_minutes` reach the
     // engine runner through the same refresh path.
     forward_keep_warm_idle_minutes(&app, prior_keep_warm_minutes, &resolved);
-    // A hand-edited `active_provider` that moved away from the built-in
-    // engine releases the sidecar, mirroring the Settings radio path.
+    // A hand-edited `active_provider` that moved away from a local provider
+    // releases its memory (builtin sidecar killed, Ollama model evicted),
+    // mirroring the Settings radio path.
     unload_engine_if_builtin_deactivated(&app, &prior_kind, &resolved);
+    evict_ollama_if_deactivated(&app, &prior_kind, &resolved);
     emit_config_updated(&app);
     Ok(resolved)
 }
diff --git a/src-tauri/src/settings_commands/tests.rs b/src-tauri/src/settings_commands/tests.rs
index 5c31e544..2eb5d66a 100644
--- a/src-tauri/src/settings_commands/tests.rs
+++ b/src-tauri/src/settings_commands/tests.rs
@@ -14,8 +14,8 @@ use toml_edit::DocumentMut;
 use super::{
     add_openai_provider_to_disk, builtin_deactivated, cleanup_provider_secrets,
     coerce_json_to_toml, is_allowed_field, is_allowed_section, is_http_url, json_type_name,
-    json_value_to_toml_item, keep_warm_idle_minutes_changed, patch_document, read_document,
-    remove_openai_provider_from_disk, reset_section_on_disk, trace_enabled_changed,
+    json_value_to_toml_item, keep_warm_idle_minutes_changed, ollama_deactivated, patch_document,
+    read_document, remove_openai_provider_from_disk, reset_section_on_disk, trace_enabled_changed,
     validate_provider_value, write_active_provider_to_disk, write_field_to_disk,
     write_provider_field_to_disk,
 };
@@ -1655,6 +1655,36 @@ fn builtin_deactivated_ignores_non_builtin_transitions_and_no_ops() {
     assert!(!builtin_deactivated("", &config_with_active("ollama")));
 }
 
+// ─── ollama_deactivated ──────────────────────────────────────────────────────
+
+#[test]
+fn ollama_deactivated_detects_switch_away_from_ollama() {
+    // ollama -> builtin and ollama -> openai both free the Ollama model.
+    assert!(ollama_deactivated("ollama", &config_with_active("builtin")));
+    assert!(ollama_deactivated("ollama", &config_with_active("openai")));
+}
+
+#[test]
+fn ollama_deactivated_ignores_switch_onto_ollama() {
+    assert!(!ollama_deactivated(
+        "builtin",
+        &config_with_active("ollama")
+    ));
+}
+
+#[test]
+fn ollama_deactivated_ignores_non_ollama_transitions_and_no_ops() {
+    // ollama -> ollama: nothing changed.
+    assert!(!ollama_deactivated("ollama", &config_with_active("ollama")));
+    // builtin -> builtin: never an Ollama deactivation.
+    assert!(!ollama_deactivated(
+        "builtin",
+        &config_with_active("builtin")
+    ));
+    // Unresolved prior kind (empty) never counts as ollama.
+    assert!(!ollama_deactivated("", &config_with_active("builtin")));
+}
+
 // ─── Helpers ─────────────────────────────────────────────────────────────────
 
 fn matches_type_mismatch(err: &ConfigError, section: &str, key: &str) {

From e2eef814da67c8c2c8d94388c917041b9eb659a7 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 11:25:41 -0500
Subject: [PATCH 53/89] polish: drop the status dot from the keep-warm line,
 leaving just the residency text

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 .../tabs/models/ProvidersPane.test.tsx        | 21 -------------------
 src/settings/tabs/models/ProvidersPane.tsx    | 17 ++-------------
 src/styles/settings.module.css                | 14 -------------
 3 files changed, 2 insertions(+), 50 deletions(-)

diff --git a/src/settings/tabs/models/ProvidersPane.test.tsx b/src/settings/tabs/models/ProvidersPane.test.tsx
index cf5afcd3..d8fd1a3e 100644
--- a/src/settings/tabs/models/ProvidersPane.test.tsx
+++ b/src/settings/tabs/models/ProvidersPane.test.tsx
@@ -14,7 +14,6 @@ import {
 } from '../../../testUtils/mocks/tauri';
 
 import { ProvidersPane } from './ProvidersPane';
-import styles from '../../../styles/settings.module.css';
 import type { RawAppConfig, RawProvider } from '../../types';
 
 const invokeMock = invoke as unknown as ReturnType<typeof vi.fn>;
@@ -715,26 +714,6 @@ describe('ProvidersPane generation', () => {
     expect(screen.getByText('No model loaded')).toBeInTheDocument();
   });
 
-  it('greens the status dot when a model is resident', async () => {
-    mockInvoke({
-      get_engine_status: engineStatus('loaded'),
-      list_installed_models: INSTALLED,
-    });
-    renderPane(makeConfig('builtin', [BUILTIN_LOADED, OLLAMA]));
-    await waitFor(() =>
-      expect(screen.getByText('Qwen3.5 9B in VRAM')).toBeInTheDocument(),
-    );
-    const dot = screen.getByText('Qwen3.5 9B in VRAM').querySelector('span');
-    expect(dot).toHaveClass(styles.genStatusDotLive);
-  });
-
-  it('dims the status dot when the engine is stopped', () => {
-    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
-    const dot = screen.getByText('No model loaded').querySelector('span');
-    expect(dot).toHaveClass(styles.genStatusDot);
-    expect(dot).not.toHaveClass(styles.genStatusDotLive);
-  });
-
   it('reflects warmup load + evict events', async () => {
     renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
     // Let the mount-time get_loaded_model settle so the event is not clobbered.
diff --git a/src/settings/tabs/models/ProvidersPane.tsx b/src/settings/tabs/models/ProvidersPane.tsx
index b8605ce0..dce5b836 100644
--- a/src/settings/tabs/models/ProvidersPane.tsx
+++ b/src/settings/tabs/models/ProvidersPane.tsx
@@ -291,10 +291,7 @@ export function ProvidersPane({
 
   const fillPct = `${ctxPos / 10}%`;
 
-  // Keep-warm live status: the text shown beside the name, plus whether a model
-  // is actually resident (drives the status dot color: green when warm).
-  const engineWarm =
-    activeKind === 'builtin' ? engineState === 'loaded' : loadedModel !== null;
+  // Keep-warm live status: the text shown beside the name.
   // Friendly name of the selected built-in model, for the residency line
   // (matching the built-in model dropdown below).
   const builtinDisplayName =
@@ -617,17 +614,7 @@ export function ProvidersPane({
                 </button>
               </Tooltip>
             </div>
-            <span className={styles.genWarmStatus}>
-              <span
-                className={
-                  engineWarm
-                    ? `${styles.genStatusDot} ${styles.genStatusDotLive}`
-                    : styles.genStatusDot
-                }
-                aria-hidden="true"
-              />
-              {warmStatusText}
-            </span>
+            <span className={styles.genWarmStatus}>{warmStatusText}</span>
           </div>
           <div className={styles.genWarmControls}>
             <span className={styles.genWarmPrefix}>Release after</span>
diff --git a/src/styles/settings.module.css b/src/styles/settings.module.css
index 69b65eef..dd405d6b 100644
--- a/src/styles/settings.module.css
+++ b/src/styles/settings.module.css
@@ -542,23 +542,9 @@
 }
 .genWarmStatus {
   margin-left: auto;
-  display: inline-flex;
-  align-items: center;
-  gap: 6px;
   font-size: 11.5px;
   color: var(--t2);
 }
-.genStatusDot {
-  width: 6px;
-  height: 6px;
-  border-radius: 50%;
-  flex: none;
-  background: var(--t3);
-}
-.genStatusDotLive {
-  background: var(--ok);
-  box-shadow: 0 0 7px var(--ok);
-}
 .genWarmControls {
   display: flex;
   align-items: center;

From ad7c705ce7135c30d8d46becd64898fe67fcdaa6 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 12:06:04 -0500
Subject: [PATCH 54/89] feat: record each catalog model's vetted context window
 in the registry

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/models/registry.rs | 55 ++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/src-tauri/src/models/registry.rs b/src-tauri/src/models/registry.rs
index 2454e8d8..948d77cb 100644
--- a/src-tauri/src/models/registry.rs
+++ b/src-tauri/src/models/registry.rs
@@ -86,6 +86,12 @@ pub struct Starter {
     /// sliding-window-aware cache). Sanity-check any new entry against a
     /// real load before trusting the estimate.
     pub est_runtime_gb: f64,
+    /// Maximum context window in tokens the model was trained for: its GGUF
+    /// `context_length` metadata (llama.cpp's `n_ctx_train`), vetted against the
+    /// maker's published config. Surfaced in the picker so a user can see how
+    /// much a model can attend to. Display only: the engine loads the user's
+    /// separate, clamped `num_ctx`, never this value.
+    pub context_length: u32,
     /// Short license label surfaced next to the download button.
     pub license_note: &'static str,
     /// Model maker (e.g. "OpenAI"), shown in the picker's Origin row.
@@ -117,6 +123,7 @@ pub const STARTERS: &[Starter] = &[
         mmproj_sha256: Some("853698ce7aa6c7ba732478bad280240969ddf7b0fcbf93900046f63903a83383"),
         mmproj_bytes: 921_705_024,
         est_runtime_gb: 8.5,
+        context_length: 262_144,
         license_note: "Apache 2.0",
         origin: "Alibaba",
         origin_repo: "Qwen/Qwen3.5-9B",
@@ -140,6 +147,7 @@ pub const STARTERS: &[Starter] = &[
         mmproj_sha256: Some("e70b0e5cd80323d5d588b4ed06780356b7b1ba03995a4b8164c6ae9db0ff5989"),
         mmproj_bytes: 175_115_264,
         est_runtime_gb: 9.5,
+        context_length: 262_144,
         license_note: "Apache 2.0",
         origin: "Google",
         origin_repo: "google/gemma-4-12B-it",
@@ -163,6 +171,7 @@ pub const STARTERS: &[Starter] = &[
         mmproj_sha256: None,
         mmproj_bytes: 0,
         est_runtime_gb: 13.3,
+        context_length: 131_072,
         license_note: "Apache 2.0",
         origin: "OpenAI",
         origin_repo: "openai/gpt-oss-20b",
@@ -187,6 +196,7 @@ pub const STARTERS: &[Starter] = &[
         mmproj_sha256: None,
         mmproj_bytes: 0,
         est_runtime_gb: 9.9,
+        context_length: 131_072,
         license_note: "Apache 2.0",
         origin: "Mistral",
         origin_repo: "mistralai/Mistral-Nemo-Instruct-2407",
@@ -211,6 +221,7 @@ pub const STARTERS: &[Starter] = &[
         mmproj_sha256: None,
         mmproj_bytes: 0,
         est_runtime_gb: 4.7,
+        context_length: 131_072,
         license_note: "MIT",
         origin: "Microsoft",
         origin_repo: "microsoft/Phi-4-mini-instruct",
@@ -234,6 +245,7 @@ pub const STARTERS: &[Starter] = &[
         mmproj_sha256: None,
         mmproj_bytes: 0,
         est_runtime_gb: 4.0,
+        context_length: 131_072,
         license_note: "Llama 3.2 Community",
         origin: "Meta",
         origin_repo: "meta-llama/Llama-3.2-3B-Instruct",
@@ -257,6 +269,7 @@ pub const STARTERS: &[Starter] = &[
         mmproj_sha256: Some("c6398448d84a4836fdedf58f9775979e69ae0cc4dfdf4d697b5597693a555b12"),
         mmproj_bytes: 991_551_904,
         est_runtime_gb: 7.4,
+        context_length: 131_072,
         license_note: "Gemma",
         origin: "Google",
         origin_repo: "google/gemma-4-E4B-it",
@@ -281,6 +294,7 @@ pub const STARTERS: &[Starter] = &[
         mmproj_sha256: None,
         mmproj_bytes: 0,
         est_runtime_gb: 12.0,
+        context_length: 32_768,
         license_note: "MIT",
         origin: "Microsoft",
         origin_repo: "microsoft/Phi-4-reasoning-plus",
@@ -304,6 +318,7 @@ pub const STARTERS: &[Starter] = &[
         mmproj_sha256: None,
         mmproj_bytes: 0,
         est_runtime_gb: 7.0,
+        context_length: 131_072,
         license_note: "MIT",
         origin: "DeepSeek",
         origin_repo: "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
@@ -502,6 +517,46 @@ mod tests {
         assert_eq!(by_cat, expected);
     }
 
+    #[test]
+    fn context_windows_match_the_vetted_values() {
+        // The model's trained max context (GGUF `context_length`), vetted per
+        // entry against the maker's config; Mistral Nemo is corrected from its
+        // GGUF's inflated 1,024,000 down to its real 131,072.
+        let want: &[(&str, u32)] = &[
+            ("qwen3.5-9b", 262_144),
+            ("gemma-4-12b", 262_144),
+            ("mistral-nemo-12b", 131_072),
+            ("phi-4-mini-3.8b", 131_072),
+            ("llama-3.2-3b", 131_072),
+            ("gemma-4-e4b", 131_072),
+            ("gpt-oss-20b", 131_072),
+            ("phi-4-reasoning-plus-14b", 32_768),
+            ("deepseek-r1-distill-8b", 131_072),
+        ];
+        for (id, ctx) in want {
+            assert_eq!(
+                by_id(id).unwrap().context_length,
+                *ctx,
+                "{id} context window"
+            );
+        }
+    }
+
+    #[test]
+    fn every_entry_has_a_sane_context_window() {
+        // Display-only trained max; a floor/ceiling guards against a typo and
+        // documents that the value is bounded. The real KV allocation is the
+        // user's separate, clamped `num_ctx`, never this number.
+        for s in STARTERS {
+            assert!(
+                (2048..=1_048_576).contains(&s.context_length),
+                "{}: context_length {} out of sane range",
+                s.id,
+                s.context_length
+            );
+        }
+    }
+
     #[test]
     fn every_category_holds_exactly_three_models() {
         // The Discover surface is balanced: nine models, exactly three per

From b0890304869e804bebb789f02c33225e97487cb9 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 12:10:28 -0500
Subject: [PATCH 55/89] feat: show each model's context window as a pill in
 Staff Picks

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 .../tabs/models/StaffPicksPane.module.css     |  6 ++++
 .../tabs/models/StaffPicksPane.test.tsx       | 20 +++++++++++-
 src/settings/tabs/models/StaffPicksPane.tsx   | 10 ++++++
 src/types/starter.ts                          |  4 +++
 src/utils/__tests__/contextWindow.test.ts     | 32 +++++++++++++++++++
 src/utils/contextWindow.ts                    | 22 +++++++++++++
 6 files changed, 93 insertions(+), 1 deletion(-)
 create mode 100644 src/utils/__tests__/contextWindow.test.ts
 create mode 100644 src/utils/contextWindow.ts

diff --git a/src/settings/tabs/models/StaffPicksPane.module.css b/src/settings/tabs/models/StaffPicksPane.module.css
index dfc89553..9b53baac 100644
--- a/src/settings/tabs/models/StaffPicksPane.module.css
+++ b/src/settings/tabs/models/StaffPicksPane.module.css
@@ -97,6 +97,12 @@
 .pillThinking {
   color: var(--cap-think);
 }
+/* Context window is a spec, not a capability, so it reads in a neutral metadata
+ * tone rather than one of the accent capability colours. */
+.pillContext {
+  color: var(--t3);
+  font-variant-numeric: tabular-nums;
+}
 
 .sub {
   font-size: 10.5px;
diff --git a/src/settings/tabs/models/StaffPicksPane.test.tsx b/src/settings/tabs/models/StaffPicksPane.test.tsx
index c3fffd4d..3ee1d1a7 100644
--- a/src/settings/tabs/models/StaffPicksPane.test.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.test.tsx
@@ -100,6 +100,7 @@ function option(
 const QWEN = option({
   id: 'qwen3.5-9b',
   tier: 'fast',
+  context_length: 262_144,
   family: 'Qwen',
   category: 'Everyday chat',
   display_name: 'Qwen3.5 9B',
@@ -110,10 +111,11 @@ const QWEN = option({
   thinking: true,
   origin: 'Alibaba',
 });
-const GEMMA = option({});
+const GEMMA = option({ context_length: 131_072 });
 const GPT_OSS = option({
   id: 'gpt-oss-20b',
   tier: 'smartest',
+  context_length: 131_072,
   family: 'gpt-oss',
   category: 'Deep reasoning',
   display_name: 'gpt-oss 20B',
@@ -208,6 +210,22 @@ describe('StaffPicksPane', () => {
     expect(within(row).getByText('Comfortable')).toBeInTheDocument();
   });
 
+  it('shows the context-window pill, formatted compactly', async () => {
+    await renderPane();
+    expect(within(rowFor('Gemma 4 12B')).getByText('128K')).toBeInTheDocument();
+    expect(within(rowFor('Qwen3.5 9B')).getByText('256K')).toBeInTheDocument();
+  });
+
+  it('omits the context pill for a model with no context window', async () => {
+    await renderPane(() => {}, {
+      get_staff_picks: [
+        option({ context_length: undefined, display_name: 'Mystery 7B' }),
+      ],
+    });
+    const row = rowFor('Mystery 7B');
+    expect(within(row).queryByText(/K$/)).not.toBeInTheDocument();
+  });
+
   it('shows a Thinking pill on a thinking model and omits Vision on a text-only one', async () => {
     await renderPane();
     const qwen = rowFor('Qwen3.5 9B');
diff --git a/src/settings/tabs/models/StaffPicksPane.tsx b/src/settings/tabs/models/StaffPicksPane.tsx
index 5afa7c95..aa9d4dcd 100644
--- a/src/settings/tabs/models/StaffPicksPane.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.tsx
@@ -27,6 +27,7 @@ import { DownloadProgress } from '../../../components/DownloadProgress';
 import { useDownloadModel } from '../../../hooks/useDownloadModel';
 import { useStaffPicks } from '../../../components/StarterPicker';
 import { Tooltip } from '../../../components/Tooltip';
+import { formatContextWindow } from '../../../utils/contextWindow';
 import { RAM_FIT_LABEL, RAM_FIT_TOOLTIP } from '../../../utils/ramFit';
 import styles from './StaffPicksPane.module.css';
 import type { RawAppConfig } from '../../types';
@@ -212,6 +213,8 @@ function ModelRow({
 }: ModelRowProps) {
   const { starter, fit, installed, partial_bytes } = option;
   const showProgress = active && state.phase !== 'idle';
+  // Empty when the model carries no context window, so the pill is skipped.
+  const contextLabel = formatContextWindow(starter.context_length ?? 0);
 
   return (
     <div className={styles.row} data-model-row data-id={starter.id}>
@@ -233,6 +236,13 @@ function ModelRow({
                   Thinking
                 </span>
               ) : null}
+              {contextLabel ? (
+                <Tooltip label="Context window" placement="top">
+                  <span className={`${styles.pill} ${styles.pillContext}`}>
+                    {contextLabel}
+                  </span>
+                </Tooltip>
+              ) : null}
             </span>
           </div>
           <div className={styles.sub}>
diff --git a/src/types/starter.ts b/src/types/starter.ts
index 2713a2a4..72c5269d 100644
--- a/src/types/starter.ts
+++ b/src/types/starter.ts
@@ -46,6 +46,10 @@ export interface Starter {
   mmproj_sha256: string | null;
   mmproj_bytes: number;
   est_runtime_gb: number;
+  /** Maximum context window in tokens the model was trained for (its GGUF
+   * `context_length`). Backend always sends it for catalog rows; optional here
+   * for test-fixture ergonomics and for sources that cannot determine it. */
+  context_length?: number;
   license_note: string;
   /** Model maker shown in the Origin row (e.g. "OpenAI"). */
   origin: string;
diff --git a/src/utils/__tests__/contextWindow.test.ts b/src/utils/__tests__/contextWindow.test.ts
new file mode 100644
index 00000000..0807e886
--- /dev/null
+++ b/src/utils/__tests__/contextWindow.test.ts
@@ -0,0 +1,32 @@
+import { describe, expect, it } from 'vitest';
+
+import { formatContextWindow } from '../contextWindow';
+
+describe('formatContextWindow', () => {
+  it('formats the common power-of-two windows as round K labels', () => {
+    expect(formatContextWindow(32_768)).toBe('32K');
+    expect(formatContextWindow(131_072)).toBe('128K');
+    expect(formatContextWindow(262_144)).toBe('256K');
+  });
+
+  it('rounds an odd token count to the nearest K', () => {
+    expect(formatContextWindow(40_000)).toBe('39K');
+    expect(formatContextWindow(8_000)).toBe('8K');
+  });
+
+  it('switches to M at a mebitoken and trims a whole-number decimal', () => {
+    expect(formatContextWindow(1_048_576)).toBe('1M');
+    expect(formatContextWindow(1_572_864)).toBe('1.5M');
+  });
+
+  it('renders a sub-1K count raw, with no unit', () => {
+    expect(formatContextWindow(512)).toBe('512');
+  });
+
+  it('returns an empty string for non-positive or non-finite input so the pill can be skipped', () => {
+    expect(formatContextWindow(0)).toBe('');
+    expect(formatContextWindow(-1)).toBe('');
+    expect(formatContextWindow(Number.NaN)).toBe('');
+    expect(formatContextWindow(Number.POSITIVE_INFINITY)).toBe('');
+  });
+});
diff --git a/src/utils/contextWindow.ts b/src/utils/contextWindow.ts
new file mode 100644
index 00000000..1dd65213
--- /dev/null
+++ b/src/utils/contextWindow.ts
@@ -0,0 +1,22 @@
+/**
+ * Formats a model's context window (in tokens) as a compact, human-readable
+ * label for the model picker, e.g. 131072 -> "128K", 262144 -> "256K",
+ * 1048576 -> "1M". The scale is 1024-based so the common power-of-two windows
+ * read as round numbers, matching how llama.cpp tooling reports them.
+ *
+ * Defensive by design: the input may be an unvetted GGUF `context_length` from
+ * an arbitrary Hugging Face repo, so a non-positive or non-finite value yields
+ * an empty string (the caller skips the pill rather than rendering "NaNK").
+ */
+export function formatContextWindow(tokens: number): string {
+  if (!Number.isFinite(tokens) || tokens <= 0) return '';
+  const K = 1024;
+  if (tokens >= K * K) {
+    // Trim a whole-number decimal: 1048576 -> "1M", 1572864 -> "1.5M".
+    return `${Number((tokens / (K * K)).toFixed(1))}M`;
+  }
+  if (tokens >= K) {
+    return `${Math.round(tokens / K)}K`;
+  }
+  return `${Math.round(tokens)}`;
+}

From 9e4fa823794c9cc3197cbf533797d180c75c4e3e Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 12:24:23 -0500
Subject: [PATCH 56/89] feat: surface the context window for Browse-all repos
 from sanitized GGUF metadata

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 docs/configurations.md                        |   1 +
 src-tauri/src/config/defaults.rs              |  10 ++
 src-tauri/src/models/mod.rs                   | 103 ++++++++++++++++--
 .../tabs/models/BrowseAllPane.module.css      |   8 ++
 .../tabs/models/BrowseAllPane.test.tsx        |  34 +++++-
 src/settings/tabs/models/BrowseAllPane.tsx    |  10 ++
 src/types/starter.ts                          |   3 +
 7 files changed, 159 insertions(+), 10 deletions(-)

diff --git a/docs/configurations.md b/docs/configurations.md
index ec6d8577..611345d4 100644
--- a/docs/configurations.md
+++ b/docs/configurations.md
@@ -195,6 +195,7 @@ The table below also lists the baked-in safety limits that govern Thuki's commun
 | `HF_BASE_URL`                               | `https://huggingface.co` | No | Single origin for model metadata and downloads. Provenance comes from the pinned repo revisions in the curated starter registry, and those pins are only meaningful against the canonical Hub; an arbitrary mirror could serve different content under the same revision ids. | — | The Hugging Face origin Thuki uses for all model metadata calls and blob downloads. Every starter in the registry pins a repo at an exact revision and carries a compiled-in sha256 digest checked after download; the digest catches truncation, bit rot, and resume corruption, while the pinned revision on the canonical Hub is what fixes which content is fetched. |
 | `HF_SEARCH_LIMIT`                           | `30`     | No       | The per-page step for the in-app model browser. The "Load more" control raises the requested page size in multiples of this value, so it is a layout step rather than a user preference. | —      | How many GGUF model repos the first page of an in-app Hugging Face search returns, most-downloaded first. |
 | `HF_SEARCH_LIMIT_MAX`                        | `120`    | No       | Defense-in-depth bound on request size: "Load more" grows the requested page size in `HF_SEARCH_LIMIT` steps, and this caps the largest single request so a runaway page count cannot ask the Hub for an unbounded result set. | —      | The largest page size a single in-app Hugging Face search request may ask for, regardless of how many times "Load more" was pressed. |
+| `MAX_MODEL_CONTEXT_LENGTH`                   | `1 M`    | No       | Defense-in-depth bound on attacker-controlled GGUF metadata: a repo's `context_length` is editable (`gguf_set_metadata.py`) and occasionally inflated, so a value above this sane ceiling is treated as untrustworthy and dropped rather than shown. Mirrors the `num_ctx` upper bound; 1 M tokens covers every current model. | — | The largest model context window Thuki will trust and display from a Browse-all repo's parsed GGUF metadata. A larger declared value is dropped (no context window shown) rather than rendered. Curated Staff Picks models carry a hand-vetted value in the registry instead. |
 | `RUNTIME_OVERHEAD_GB`                        | `2.0`    | No       | Feeds the approximate RAM-fit hint shown in Library and Discover only; the authoritative per-starter memory estimates live in the model registry. A user-tunable overhead would imply a precision the hint does not claim. | —      | Resident-memory overhead added on top of a model's weights size (KV cache plus runtime buffers) when estimating whether it fits in this Mac's RAM. |
 | `MAX_HF_SEARCH_QUERY_LEN`                   | `200 bytes` | No    | Defense-in-depth bound on attacker-influenced input: the query reaches the fixed Hub host (no SSRF) and is percent-encoded by the client, but an unbounded string is still rejected to cap request size. | —      | The longest search string Thuki sends to the Hugging Face model search. A longer query is rejected before any network call. |
 | `OPENAI_MODELS_TIMEOUT_SECS`                | `5 s`    | No       | Protocol cap on a hung server so the Settings model dropdown cannot stall; the OpenAI-compatible server is local or LAN-hosted in the common case, so 5 s is generous. | —      | How long Thuki waits for an OpenAI-compatible server's `/v1/models` listing to respond before giving up. Applies to the Settings model dropdown for that provider, not to chat requests. |
diff --git a/src-tauri/src/config/defaults.rs b/src-tauri/src/config/defaults.rs
index eecd4755..db09d3ec 100644
--- a/src-tauri/src/config/defaults.rs
+++ b/src-tauri/src/config/defaults.rs
@@ -67,6 +67,16 @@ pub const DEFAULT_NUM_CTX: u32 = 16384;
 /// current consumer model including the largest 1 M-context variants.
 pub const BOUNDS_NUM_CTX: (u32, u32) = (2048, 1_048_576);
 
+/// Upper bound on a model's context window that Thuki will trust and display
+/// from external GGUF metadata (the `context_length` field of an arbitrary
+/// Hugging Face repo, shown in the Browse-all listing). Defense-in-depth: the
+/// field is attacker-controllable and editable (`gguf_set_metadata.py`), so a
+/// value above this sane ceiling is treated as untrustworthy and dropped rather
+/// than rendered. Mirrors the [`BOUNDS_NUM_CTX`] upper bound: 1 M tokens covers
+/// every current model. Why not tunable: it bounds attacker-controlled data, a
+/// security guard rather than a user preference.
+pub const MAX_MODEL_CONTEXT_LENGTH: u32 = 1_048_576;
+
 /// Accepted range for `keep_warm_inactivity_minutes`.
 /// -1 = keep resident forever, 0 = provider's natural short default (~5 min),
 /// 1..=1440 = explicit timeout. Values below -1 or above 1440 are clamped to
diff --git a/src-tauri/src/models/mod.rs b/src-tauri/src/models/mod.rs
index 1d9e6600..8a460611 100644
--- a/src-tauri/src/models/mod.rs
+++ b/src-tauri/src/models/mod.rs
@@ -32,9 +32,10 @@ use tauri::Manager;
 use crate::config::defaults::{
     DEFAULT_OLLAMA_SHOW_REQUEST_TIMEOUT_SECS, DEFAULT_OLLAMA_TAGS_REQUEST_TIMEOUT_SECS,
     HF_API_TIMEOUT_SECS, HF_BASE_URL, HF_SEARCH_LIMIT_MAX, MAX_HF_API_BODY_BYTES,
-    MAX_HF_SEARCH_QUERY_LEN, MAX_MODEL_SLUG_LEN, MAX_OLLAMA_SHOW_BODY_BYTES,
-    MAX_OLLAMA_TAGS_BODY_BYTES, OPENAI_MODELS_TIMEOUT_SECS, PROVIDER_ID_BUILTIN,
-    PROVIDER_KIND_BUILTIN, PROVIDER_KIND_OLLAMA, PROVIDER_KIND_OPENAI, RUNTIME_OVERHEAD_GB,
+    MAX_HF_SEARCH_QUERY_LEN, MAX_MODEL_CONTEXT_LENGTH, MAX_MODEL_SLUG_LEN,
+    MAX_OLLAMA_SHOW_BODY_BYTES, MAX_OLLAMA_TAGS_BODY_BYTES, OPENAI_MODELS_TIMEOUT_SECS,
+    PROVIDER_ID_BUILTIN, PROVIDER_KIND_BUILTIN, PROVIDER_KIND_OLLAMA, PROVIDER_KIND_OPENAI,
+    RUNTIME_OVERHEAD_GB,
 };
 use crate::config::AppConfig;
 
@@ -1343,6 +1344,11 @@ pub struct HfGgufFile {
     pub file: String,
     /// File size in bytes; 0 when the API reports no size.
     pub size_bytes: u64,
+    /// Model's trained context window in tokens, from the repo's parsed GGUF
+    /// `context_length` metadata (same for every quant of a repo). `None` when
+    /// the API does not report it or the value fails the trust check in
+    /// [`sanitize_context_length`].
+    pub context_length: Option<u32>,
 }
 
 /// Subset of the HF `/api/models/<repo>?blobs=true` response Thuki consumes.
@@ -1354,6 +1360,27 @@ struct HfRepoInfo {
     sha: Option<String>,
     #[serde(default)]
     siblings: Vec<HfSibling>,
+    /// HF-parsed GGUF metadata for the repo (present for GGUF repos). Only the
+    /// model's context window is consumed; everything else is ignored.
+    #[serde(default)]
+    gguf: Option<HfGgufMeta>,
+}
+
+/// The slice of HF's parsed `gguf` metadata block Thuki reads: the model's
+/// trained context window. Untrusted external input, sanitized before use.
+#[derive(Deserialize)]
+struct HfGgufMeta {
+    #[serde(default)]
+    context_length: Option<u64>,
+}
+
+/// Trust check for an externally-reported context window. Accepts a positive
+/// value no larger than [`MAX_MODEL_CONTEXT_LENGTH`] and narrows it to `u32`;
+/// anything missing, zero, or implausibly large is dropped to `None` so a
+/// hand-edited or malicious GGUF cannot inject an absurd figure into the UI.
+pub fn sanitize_context_length(raw: Option<u64>) -> Option<u32> {
+    raw.filter(|&n| n >= 1 && n <= MAX_MODEL_CONTEXT_LENGTH as u64)
+        .map(|n| n as u32)
 }
 
 /// One repo file in the HF listing. Only LFS-backed `.gguf` files matter.
@@ -1455,6 +1482,9 @@ pub fn resolve_listing(body: &[u8], file: &str) -> Result<RepoResolved, String>
 pub fn parse_gguf_listing(body: &[u8]) -> Result<Vec<HfGgufFile>, String> {
     let info: HfRepoInfo = serde_json::from_slice(body)
         .map_err(|e| format!("failed to decode Hugging Face API response: {e}"))?;
+    // The context window is a repo-level property (identical across quants), so
+    // it is resolved once and stamped onto every row.
+    let context_length = sanitize_context_length(info.gguf.and_then(|g| g.context_length));
     Ok(info
         .siblings
         .into_iter()
@@ -1464,6 +1494,7 @@ pub fn parse_gguf_listing(body: &[u8]) -> Result<Vec<HfGgufFile>, String> {
             HfGgufFile {
                 file: s.rfilename,
                 size_bytes,
+                context_length,
             }
         })
         .collect())
@@ -4483,6 +4514,7 @@ mod tests {
     fn hf_fixture() -> serde_json::Value {
         serde_json::json!({
             "sha": "c".repeat(40),
+            "gguf": {"context_length": 131072},
             "siblings": [
                 {"rfilename": "README.md", "size": 10},
                 {"rfilename": "model-Q4_K_M.gguf",
@@ -4504,20 +4536,63 @@ mod tests {
             vec![
                 HfGgufFile {
                     file: "model-Q4_K_M.gguf".to_string(),
-                    size_bytes: 1000
+                    size_bytes: 1000,
+                    context_length: Some(131072),
                 },
                 HfGgufFile {
                     file: "extra.gguf".to_string(),
-                    size_bytes: 7
+                    size_bytes: 7,
+                    context_length: Some(131072),
                 },
                 HfGgufFile {
                     file: "bare.gguf".to_string(),
-                    size_bytes: 0
+                    size_bytes: 0,
+                    context_length: Some(131072),
                 },
             ]
         );
     }
 
+    #[test]
+    fn parse_gguf_listing_omits_context_when_absent_or_implausible() {
+        // No `gguf` block at all -> no context window.
+        let body = serde_json::json!({
+            "sha": "c".repeat(40),
+            "siblings": [{"rfilename": "model-Q4_K_M.gguf", "size": 1}],
+        })
+        .to_string();
+        assert_eq!(
+            parse_gguf_listing(body.as_bytes()).unwrap()[0].context_length,
+            None
+        );
+        // An implausibly large declared context is dropped, not shown.
+        let body = serde_json::json!({
+            "sha": "c".repeat(40),
+            "gguf": {"context_length": 9_000_000_000u64},
+            "siblings": [{"rfilename": "model-Q4_K_M.gguf", "size": 1}],
+        })
+        .to_string();
+        assert_eq!(
+            parse_gguf_listing(body.as_bytes()).unwrap()[0].context_length,
+            None
+        );
+    }
+
+    #[test]
+    fn sanitize_context_length_trusts_only_sane_values() {
+        assert_eq!(sanitize_context_length(None), None);
+        assert_eq!(sanitize_context_length(Some(0)), None);
+        assert_eq!(sanitize_context_length(Some(131_072)), Some(131_072));
+        assert_eq!(
+            sanitize_context_length(Some(MAX_MODEL_CONTEXT_LENGTH as u64)),
+            Some(MAX_MODEL_CONTEXT_LENGTH)
+        );
+        assert_eq!(
+            sanitize_context_length(Some(MAX_MODEL_CONTEXT_LENGTH as u64 + 1)),
+            None
+        );
+    }
+
     #[test]
     fn parse_gguf_listing_rejects_invalid_json() {
         let err = parse_gguf_listing(b"not json").unwrap_err();
@@ -4529,9 +4604,13 @@ mod tests {
         let v = serde_json::to_value(HfGgufFile {
             file: "x.gguf".to_string(),
             size_bytes: 5,
+            context_length: Some(8192),
         })
         .unwrap();
-        assert_eq!(v, serde_json::json!({"file": "x.gguf", "size_bytes": 5}));
+        assert_eq!(
+            v,
+            serde_json::json!({"file": "x.gguf", "size_bytes": 5, "context_length": 8192})
+        );
     }
 
     // ── Model library: resolve_listing (pure) ───────────────────────────────
@@ -4889,10 +4968,12 @@ mod tests {
             HfGgufFile {
                 file: "a.gguf".to_string(),
                 size_bytes: 1 << 30,
+                context_length: None,
             },
             HfGgufFile {
                 file: "b.gguf".to_string(),
                 size_bytes: 0,
+                context_length: None,
             },
         ];
         let rows = annotate_gguf_rows(files.clone(), 64 << 30);
@@ -4934,12 +5015,18 @@ mod tests {
             file: HfGgufFile {
                 file: "w.gguf".to_string(),
                 size_bytes: 42,
+                context_length: Some(131072),
             },
             fit: None,
         };
         assert_eq!(
             serde_json::to_value(file_row).unwrap(),
-            serde_json::json!({"file": "w.gguf", "size_bytes": 42, "fit": serde_json::Value::Null})
+            serde_json::json!({
+                "file": "w.gguf",
+                "size_bytes": 42,
+                "context_length": 131072,
+                "fit": serde_json::Value::Null,
+            })
         );
     }
 
diff --git a/src/settings/tabs/models/BrowseAllPane.module.css b/src/settings/tabs/models/BrowseAllPane.module.css
index 6c07491b..4d241b28 100644
--- a/src/settings/tabs/models/BrowseAllPane.module.css
+++ b/src/settings/tabs/models/BrowseAllPane.module.css
@@ -339,6 +339,14 @@
   font-size: 11px;
   color: var(--t3);
 }
+/* Repo-level context window, shown once above the quant rows in a neutral
+ * metadata tone. */
+.ctxLine {
+  font-size: 11px;
+  color: var(--t3);
+  padding: 2px 0 4px;
+  font-variant-numeric: tabular-nums;
+}
 
 .error {
   font-size: 11px;
diff --git a/src/settings/tabs/models/BrowseAllPane.test.tsx b/src/settings/tabs/models/BrowseAllPane.test.tsx
index 386a0bca..ee1f082d 100644
--- a/src/settings/tabs/models/BrowseAllPane.test.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.test.tsx
@@ -74,8 +74,13 @@ const RESULTS: HfModelSummary[] = [
 ];
 
 const GGUFS: HfGgufFile[] = [
-  { file: 'gemma-q4.gguf', size_bytes: 5_000_000_000, fit: 'tight' },
-  { file: 'gemma-q8.gguf', size_bytes: 9_000_000_000 },
+  {
+    file: 'gemma-q4.gguf',
+    size_bytes: 5_000_000_000,
+    fit: 'tight',
+    context_length: 131_072,
+  },
+  { file: 'gemma-q8.gguf', size_bytes: 9_000_000_000, context_length: 131_072 },
 ];
 
 const CONFIG_AFTER_INSTALL = { marker: 'fresh' } as unknown as RawAppConfig;
@@ -309,6 +314,31 @@ describe('BrowseAllPane', () => {
     expect(screen.getByText('9.0 GB')).toBeInTheDocument();
   });
 
+  it('shows the repo context window once above the quant list', async () => {
+    await renderPane();
+    const row = screen
+      .getByText('google/gemma-4-12b-it-GGUF')
+      .closest('[data-row]') as HTMLElement;
+    fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
+    await flush();
+    // One context line for the whole repo, not one per quant row.
+    expect(within(row).getByText('128K context window')).toBeInTheDocument();
+  });
+
+  it('omits the context line when the repo reports no context window', async () => {
+    await renderPane(() => {}, {
+      list_hf_repo_ggufs: [
+        { file: 'x.gguf', size_bytes: 1, context_length: null },
+      ],
+    });
+    const row = screen
+      .getByText('google/gemma-4-12b-it-GGUF')
+      .closest('[data-row]') as HTMLElement;
+    fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
+    await flush();
+    expect(within(row).queryByText(/context window/)).not.toBeInTheDocument();
+  });
+
   it('collapses an expanded row when the download button is clicked again', async () => {
     await renderPane();
     const row = screen
diff --git a/src/settings/tabs/models/BrowseAllPane.tsx b/src/settings/tabs/models/BrowseAllPane.tsx
index 7d2c4e94..740ce659 100644
--- a/src/settings/tabs/models/BrowseAllPane.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.tsx
@@ -21,6 +21,7 @@ import { DownloadProgress } from '../../../components/DownloadProgress';
 import { useDownloadModel } from '../../../hooks/useDownloadModel';
 import { useHfSearch } from './useHfSearch';
 import { Tooltip } from '../../../components/Tooltip';
+import { formatContextWindow } from '../../../utils/contextWindow';
 import { RAM_FIT_LABEL, RAM_FIT_TOOLTIP } from '../../../utils/ramFit';
 import styles from './BrowseAllPane.module.css';
 import type { HfModelSummary } from '../../../types/hf';
@@ -210,6 +211,9 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
   }, [state.phase, onSaved, reset]);
 
   const showProgress = state.phase !== 'idle';
+  // The context window is a repo-level property (identical across quants), so
+  // it is shown once above the quant list. Empty when unknown, which skips it.
+  const contextLabel = formatContextWindow(files?.[0]?.context_length ?? 0);
 
   return (
     <div className={styles.rowWrap} data-row>
@@ -253,6 +257,12 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
           {files !== null && files.length === 0 && listError === null ? (
             <p className={styles.note}>No GGUF files in this repo.</p>
           ) : null}
+          {!showProgress &&
+          files !== null &&
+          files.length > 0 &&
+          contextLabel ? (
+            <div className={styles.ctxLine}>{contextLabel} context window</div>
+          ) : null}
           {!showProgress && files !== null && files.length > 0
             ? files.map((f) => (
                 <div className={styles.quantRow} key={f.file}>
diff --git a/src/types/starter.ts b/src/types/starter.ts
index 72c5269d..6e1a8131 100644
--- a/src/types/starter.ts
+++ b/src/types/starter.ts
@@ -119,6 +119,9 @@ export interface HfGgufFile {
   file: string;
   size_bytes: number;
   fit?: RamFit | null;
+  /** Model's trained context window in tokens, from the repo's parsed GGUF
+   * `context_length` metadata; `null`/absent when unknown or untrusted. */
+  context_length?: number | null;
 }
 
 /** Engine lifecycle snapshot published on the `engine:status` event. */

From 5884ed5d71dee71e1e597996ca86c220e8b546d6 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 14:57:48 -0500
Subject: [PATCH 57/89] feat: move the Staff Picks context window into the size
 and maker sub-line

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 .../tabs/models/StaffPicksPane.module.css        |  6 ------
 src/settings/tabs/models/StaffPicksPane.test.tsx | 16 +++++++++-------
 src/settings/tabs/models/StaffPicksPane.tsx      | 10 ++--------
 3 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/src/settings/tabs/models/StaffPicksPane.module.css b/src/settings/tabs/models/StaffPicksPane.module.css
index 9b53baac..dfc89553 100644
--- a/src/settings/tabs/models/StaffPicksPane.module.css
+++ b/src/settings/tabs/models/StaffPicksPane.module.css
@@ -97,12 +97,6 @@
 .pillThinking {
   color: var(--cap-think);
 }
-/* Context window is a spec, not a capability, so it reads in a neutral metadata
- * tone rather than one of the accent capability colours. */
-.pillContext {
-  color: var(--t3);
-  font-variant-numeric: tabular-nums;
-}
 
 .sub {
   font-size: 10.5px;
diff --git a/src/settings/tabs/models/StaffPicksPane.test.tsx b/src/settings/tabs/models/StaffPicksPane.test.tsx
index 3ee1d1a7..e54e5cbf 100644
--- a/src/settings/tabs/models/StaffPicksPane.test.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.test.tsx
@@ -200,30 +200,32 @@ describe('StaffPicksPane', () => {
     expect(screen.queryByText(/Recommended/)).not.toBeInTheDocument();
   });
 
-  it('shows the name, pills, size and maker, and fit on a row', async () => {
+  it('shows the name, pills, the size · context · maker sub-line, and fit', async () => {
     await renderPane();
     const row = rowFor('Gemma 4 12B');
     expect(within(row).getByText('Text')).toBeInTheDocument();
     expect(within(row).getByText('Vision')).toBeInTheDocument();
     expect(within(row).queryByText('Thinking')).not.toBeInTheDocument();
-    expect(within(row).getByText('7.2 GB · Google')).toBeInTheDocument();
+    // Context window sits in the metadata sub-line, between size and maker.
+    expect(within(row).getByText('7.2 GB · 128K · Google')).toBeInTheDocument();
     expect(within(row).getByText('Comfortable')).toBeInTheDocument();
   });
 
-  it('shows the context-window pill, formatted compactly', async () => {
+  it('places the context window between size and maker for each model', async () => {
     await renderPane();
-    expect(within(rowFor('Gemma 4 12B')).getByText('128K')).toBeInTheDocument();
-    expect(within(rowFor('Qwen3.5 9B')).getByText('256K')).toBeInTheDocument();
+    expect(
+      within(rowFor('Qwen3.5 9B')).getByText('7.2 GB · 256K · Alibaba'),
+    ).toBeInTheDocument();
   });
 
-  it('omits the context pill for a model with no context window', async () => {
+  it('falls back to size · maker when a model has no context window', async () => {
     await renderPane(() => {}, {
       get_staff_picks: [
         option({ context_length: undefined, display_name: 'Mystery 7B' }),
       ],
     });
     const row = rowFor('Mystery 7B');
-    expect(within(row).queryByText(/K$/)).not.toBeInTheDocument();
+    expect(within(row).getByText('7.2 GB · Google')).toBeInTheDocument();
   });
 
   it('shows a Thinking pill on a thinking model and omits Vision on a text-only one', async () => {
diff --git a/src/settings/tabs/models/StaffPicksPane.tsx b/src/settings/tabs/models/StaffPicksPane.tsx
index aa9d4dcd..02caed81 100644
--- a/src/settings/tabs/models/StaffPicksPane.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.tsx
@@ -236,17 +236,11 @@ function ModelRow({
                   Thinking
                 </span>
               ) : null}
-              {contextLabel ? (
-                <Tooltip label="Context window" placement="top">
-                  <span className={`${styles.pill} ${styles.pillContext}`}>
-                    {contextLabel}
-                  </span>
-                </Tooltip>
-              ) : null}
             </span>
           </div>
           <div className={styles.sub}>
-            {gb(totalBytes(option))} GB · {starter.origin}
+            {gb(totalBytes(option))} GB
+            {contextLabel ? ` · ${contextLabel}` : ''} · {starter.origin}
           </div>
         </div>
         {!showProgress ? (

From da8fa4982a97c0c163a7ec461cee6da4be722a32 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 15:05:39 -0500
Subject: [PATCH 58/89] feat: show the Browse-all context window on the repo
 row via the search query

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/models/mod.rs                   | 94 +++++++------------
 .../tabs/models/BrowseAllPane.module.css      |  8 --
 .../tabs/models/BrowseAllPane.test.tsx        | 49 ++++------
 src/settings/tabs/models/BrowseAllPane.tsx    | 14 +--
 src/types/hf.ts                               |  4 +
 src/types/starter.ts                          |  3 -
 6 files changed, 60 insertions(+), 112 deletions(-)

diff --git a/src-tauri/src/models/mod.rs b/src-tauri/src/models/mod.rs
index 8a460611..a70e1bb9 100644
--- a/src-tauri/src/models/mod.rs
+++ b/src-tauri/src/models/mod.rs
@@ -1344,11 +1344,6 @@ pub struct HfGgufFile {
     pub file: String,
     /// File size in bytes; 0 when the API reports no size.
     pub size_bytes: u64,
-    /// Model's trained context window in tokens, from the repo's parsed GGUF
-    /// `context_length` metadata (same for every quant of a repo). `None` when
-    /// the API does not report it or the value fails the trust check in
-    /// [`sanitize_context_length`].
-    pub context_length: Option<u32>,
 }
 
 /// Subset of the HF `/api/models/<repo>?blobs=true` response Thuki consumes.
@@ -1360,14 +1355,11 @@ struct HfRepoInfo {
     sha: Option<String>,
     #[serde(default)]
     siblings: Vec<HfSibling>,
-    /// HF-parsed GGUF metadata for the repo (present for GGUF repos). Only the
-    /// model's context window is consumed; everything else is ignored.
-    #[serde(default)]
-    gguf: Option<HfGgufMeta>,
 }
 
 /// The slice of HF's parsed `gguf` metadata block Thuki reads: the model's
-/// trained context window. Untrusted external input, sanitized before use.
+/// trained context window. Present on a search row when the query requests
+/// `expand[]=gguf`. Untrusted external input, sanitized before use.
 #[derive(Deserialize)]
 struct HfGgufMeta {
     #[serde(default)]
@@ -1482,9 +1474,6 @@ pub fn resolve_listing(body: &[u8], file: &str) -> Result<RepoResolved, String>
 pub fn parse_gguf_listing(body: &[u8]) -> Result<Vec<HfGgufFile>, String> {
     let info: HfRepoInfo = serde_json::from_slice(body)
         .map_err(|e| format!("failed to decode Hugging Face API response: {e}"))?;
-    // The context window is a repo-level property (identical across quants), so
-    // it is resolved once and stamped onto every row.
-    let context_length = sanitize_context_length(info.gguf.and_then(|g| g.context_length));
     Ok(info
         .siblings
         .into_iter()
@@ -1494,7 +1483,6 @@ pub fn parse_gguf_listing(body: &[u8]) -> Result<Vec<HfGgufFile>, String> {
             HfGgufFile {
                 file: s.rfilename,
                 size_bytes,
-                context_length,
             }
         })
         .collect())
@@ -1615,6 +1603,10 @@ pub struct HfModelSummary {
     /// approval). Gated repos cannot be fetched anonymously, so the UI can flag
     /// them instead of offering a download that would fail.
     pub gated: bool,
+    /// Model's trained context window in tokens, from the repo's parsed GGUF
+    /// `context_length` metadata (a per-repo property, identical across quants).
+    /// `None` when the API omits it or the value fails [`sanitize_context_length`].
+    pub context_length: Option<u32>,
 }
 
 /// One entry in the Hugging Face `/api/models` search response. Only the fields
@@ -1631,6 +1623,10 @@ struct HfSearchEntry {
     /// defaults to `false`.
     #[serde(default, deserialize_with = "deserialize_gated")]
     gated: bool,
+    /// HF-parsed GGUF metadata, present because the search requests
+    /// `expand[]=gguf`. Only the context window is read; sanitized before use.
+    #[serde(default)]
+    gguf: Option<HfGgufMeta>,
 }
 
 /// Normalizes Hugging Face's polymorphic `gated` field (a bool `false` or a
@@ -1659,6 +1655,7 @@ pub fn parse_search_results(body: &[u8]) -> Result<Vec<HfModelSummary>, String>
             id: e.id,
             downloads: e.downloads,
             gated: e.gated,
+            context_length: sanitize_context_length(e.gguf.and_then(|g| g.context_length)),
         })
         .collect())
 }
@@ -1798,6 +1795,10 @@ async fn fetch_hf_search_inner(
         ("sort", "downloads"),
         ("direction", "-1"),
         ("limit", &limit),
+        // `expand[]=gguf` asks the search to include each repo's parsed GGUF
+        // metadata (the model's context window) inline, so the browser can show
+        // it on every row without a second request per repo.
+        ("expand[]", "gguf"),
     ];
     // An empty query browses the most-downloaded GGUF repos; only attach the
     // search term when the user actually typed one.
@@ -4514,7 +4515,6 @@ mod tests {
     fn hf_fixture() -> serde_json::Value {
         serde_json::json!({
             "sha": "c".repeat(40),
-            "gguf": {"context_length": 131072},
             "siblings": [
                 {"rfilename": "README.md", "size": 10},
                 {"rfilename": "model-Q4_K_M.gguf",
@@ -4537,47 +4537,19 @@ mod tests {
                 HfGgufFile {
                     file: "model-Q4_K_M.gguf".to_string(),
                     size_bytes: 1000,
-                    context_length: Some(131072),
                 },
                 HfGgufFile {
                     file: "extra.gguf".to_string(),
                     size_bytes: 7,
-                    context_length: Some(131072),
                 },
                 HfGgufFile {
                     file: "bare.gguf".to_string(),
                     size_bytes: 0,
-                    context_length: Some(131072),
                 },
             ]
         );
     }
 
-    #[test]
-    fn parse_gguf_listing_omits_context_when_absent_or_implausible() {
-        // No `gguf` block at all -> no context window.
-        let body = serde_json::json!({
-            "sha": "c".repeat(40),
-            "siblings": [{"rfilename": "model-Q4_K_M.gguf", "size": 1}],
-        })
-        .to_string();
-        assert_eq!(
-            parse_gguf_listing(body.as_bytes()).unwrap()[0].context_length,
-            None
-        );
-        // An implausibly large declared context is dropped, not shown.
-        let body = serde_json::json!({
-            "sha": "c".repeat(40),
-            "gguf": {"context_length": 9_000_000_000u64},
-            "siblings": [{"rfilename": "model-Q4_K_M.gguf", "size": 1}],
-        })
-        .to_string();
-        assert_eq!(
-            parse_gguf_listing(body.as_bytes()).unwrap()[0].context_length,
-            None
-        );
-    }
-
     #[test]
     fn sanitize_context_length_trusts_only_sane_values() {
         assert_eq!(sanitize_context_length(None), None);
@@ -4604,13 +4576,9 @@ mod tests {
         let v = serde_json::to_value(HfGgufFile {
             file: "x.gguf".to_string(),
             size_bytes: 5,
-            context_length: Some(8192),
         })
         .unwrap();
-        assert_eq!(
-            v,
-            serde_json::json!({"file": "x.gguf", "size_bytes": 5, "context_length": 8192})
-        );
+        assert_eq!(v, serde_json::json!({"file": "x.gguf", "size_bytes": 5}));
     }
 
     // ── Model library: resolve_listing (pure) ───────────────────────────────
@@ -4882,46 +4850,55 @@ mod tests {
     /// absent, null) plus an empty-id row that must be dropped.
     fn search_fixture() -> serde_json::Value {
         serde_json::json!([
-            {"id": "org/alpha-GGUF", "downloads": 1000, "gated": false},
+            {"id": "org/alpha-GGUF", "downloads": 1000, "gated": false,
+             "gguf": {"context_length": 131072}},
             {"id": "org/beta-GGUF", "downloads": 500, "gated": "manual"},
             {"id": "org/gamma-GGUF"},
-            {"id": "org/delta-GGUF", "downloads": 1, "gated": true},
+            {"id": "org/delta-GGUF", "downloads": 1, "gated": true,
+             "gguf": {"context_length": 9000000000u64}},
             {"id": "org/epsilon-GGUF", "downloads": 2, "gated": null},
             {"id": "", "downloads": 9}
         ])
     }
 
     #[test]
-    fn parse_search_results_maps_rows_and_normalizes_gated() {
+    fn parse_search_results_maps_rows_normalizes_gated_and_context() {
         let body = search_fixture().to_string();
         let rows = parse_search_results(body.as_bytes()).unwrap();
         assert_eq!(
             rows,
             vec![
+                // alpha carries a valid context window from its expanded gguf.
                 HfModelSummary {
                     id: "org/alpha-GGUF".to_string(),
                     downloads: 1000,
                     gated: false,
+                    context_length: Some(131072),
                 },
                 HfModelSummary {
                     id: "org/beta-GGUF".to_string(),
                     downloads: 500,
                     gated: true,
+                    context_length: None,
                 },
                 HfModelSummary {
                     id: "org/gamma-GGUF".to_string(),
                     downloads: 0,
                     gated: false,
+                    context_length: None,
                 },
+                // delta's declared context is implausibly large, so it is dropped.
                 HfModelSummary {
                     id: "org/delta-GGUF".to_string(),
                     downloads: 1,
                     gated: true,
+                    context_length: None,
                 },
                 HfModelSummary {
                     id: "org/epsilon-GGUF".to_string(),
                     downloads: 2,
                     gated: false,
+                    context_length: None,
                 },
             ]
         );
@@ -4939,11 +4916,14 @@ mod tests {
             id: "o/r".to_string(),
             downloads: 7,
             gated: true,
+            context_length: Some(131072),
         })
         .unwrap();
         assert_eq!(
             v,
-            serde_json::json!({"id": "o/r", "downloads": 7, "gated": true})
+            serde_json::json!({
+                "id": "o/r", "downloads": 7, "gated": true, "context_length": 131072,
+            })
         );
     }
 
@@ -4968,12 +4948,10 @@ mod tests {
             HfGgufFile {
                 file: "a.gguf".to_string(),
                 size_bytes: 1 << 30,
-                context_length: None,
             },
             HfGgufFile {
                 file: "b.gguf".to_string(),
                 size_bytes: 0,
-                context_length: None,
             },
         ];
         let rows = annotate_gguf_rows(files.clone(), 64 << 30);
@@ -5015,18 +4993,12 @@ mod tests {
             file: HfGgufFile {
                 file: "w.gguf".to_string(),
                 size_bytes: 42,
-                context_length: Some(131072),
             },
             fit: None,
         };
         assert_eq!(
             serde_json::to_value(file_row).unwrap(),
-            serde_json::json!({
-                "file": "w.gguf",
-                "size_bytes": 42,
-                "context_length": 131072,
-                "fit": serde_json::Value::Null,
-            })
+            serde_json::json!({"file": "w.gguf", "size_bytes": 42, "fit": serde_json::Value::Null})
         );
     }
 
diff --git a/src/settings/tabs/models/BrowseAllPane.module.css b/src/settings/tabs/models/BrowseAllPane.module.css
index 4d241b28..6c07491b 100644
--- a/src/settings/tabs/models/BrowseAllPane.module.css
+++ b/src/settings/tabs/models/BrowseAllPane.module.css
@@ -339,14 +339,6 @@
   font-size: 11px;
   color: var(--t3);
 }
-/* Repo-level context window, shown once above the quant rows in a neutral
- * metadata tone. */
-.ctxLine {
-  font-size: 11px;
-  color: var(--t3);
-  padding: 2px 0 4px;
-  font-variant-numeric: tabular-nums;
-}
 
 .error {
   font-size: 11px;
diff --git a/src/settings/tabs/models/BrowseAllPane.test.tsx b/src/settings/tabs/models/BrowseAllPane.test.tsx
index ee1f082d..e139c33b 100644
--- a/src/settings/tabs/models/BrowseAllPane.test.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.test.tsx
@@ -68,19 +68,20 @@ function mockCommands(responses: Record<string, unknown>) {
 }
 
 const RESULTS: HfModelSummary[] = [
-  { id: 'google/gemma-4-12b-it-GGUF', downloads: 1_200_000, gated: false },
+  {
+    id: 'google/gemma-4-12b-it-GGUF',
+    downloads: 1_200_000,
+    gated: false,
+    context_length: 262_144,
+  },
+  // No context window: covers the "skip the segment" path.
   { id: 'unsloth/gemma-4-27b-it-GGUF', downloads: 410_000, gated: false },
   { id: 'meta-llama/Llama-3-8B-GGUF', downloads: 9_000, gated: true },
 ];
 
 const GGUFS: HfGgufFile[] = [
-  {
-    file: 'gemma-q4.gguf',
-    size_bytes: 5_000_000_000,
-    fit: 'tight',
-    context_length: 131_072,
-  },
-  { file: 'gemma-q8.gguf', size_bytes: 9_000_000_000, context_length: 131_072 },
+  { file: 'gemma-q4.gguf', size_bytes: 5_000_000_000, fit: 'tight' },
+  { file: 'gemma-q8.gguf', size_bytes: 9_000_000_000 },
 ];
 
 const CONFIG_AFTER_INSTALL = { marker: 'fresh' } as unknown as RawAppConfig;
@@ -137,8 +138,9 @@ describe('BrowseAllPane', () => {
     await renderPane();
     expect(screen.getByText('google/gemma-4-12b-it-GGUF')).toBeInTheDocument();
     expect(
-      screen.getByText('google · 1,200,000 downloads'),
+      screen.getByText('google · 1,200,000 downloads · 256K'),
     ).toBeInTheDocument();
+    // The second result has no context window, so the segment is omitted.
     expect(screen.getByText('unsloth · 410,000 downloads')).toBeInTheDocument();
   });
 
@@ -314,29 +316,14 @@ describe('BrowseAllPane', () => {
     expect(screen.getByText('9.0 GB')).toBeInTheDocument();
   });
 
-  it('shows the repo context window once above the quant list', async () => {
+  it('shows the per-repo context window on the collapsed row, after downloads', async () => {
     await renderPane();
-    const row = screen
-      .getByText('google/gemma-4-12b-it-GGUF')
-      .closest('[data-row]') as HTMLElement;
-    fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
-    await flush();
-    // One context line for the whole repo, not one per quant row.
-    expect(within(row).getByText('128K context window')).toBeInTheDocument();
-  });
-
-  it('omits the context line when the repo reports no context window', async () => {
-    await renderPane(() => {}, {
-      list_hf_repo_ggufs: [
-        { file: 'x.gguf', size_bytes: 1, context_length: null },
-      ],
-    });
-    const row = screen
-      .getByText('google/gemma-4-12b-it-GGUF')
-      .closest('[data-row]') as HTMLElement;
-    fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
-    await flush();
-    expect(within(row).queryByText(/context window/)).not.toBeInTheDocument();
+    // No need to expand: the search call already carried the context window.
+    expect(
+      screen.getByText('google · 1,200,000 downloads · 256K'),
+    ).toBeInTheDocument();
+    // A repo with no context window keeps the plain org · downloads line.
+    expect(screen.getByText('unsloth · 410,000 downloads')).toBeInTheDocument();
   });
 
   it('collapses an expanded row when the download button is clicked again', async () => {
diff --git a/src/settings/tabs/models/BrowseAllPane.tsx b/src/settings/tabs/models/BrowseAllPane.tsx
index 740ce659..0d38047f 100644
--- a/src/settings/tabs/models/BrowseAllPane.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.tsx
@@ -211,9 +211,10 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
   }, [state.phase, onSaved, reset]);
 
   const showProgress = state.phase !== 'idle';
-  // The context window is a repo-level property (identical across quants), so
-  // it is shown once above the quant list. Empty when unknown, which skips it.
-  const contextLabel = formatContextWindow(files?.[0]?.context_length ?? 0);
+  // The context window is a per-repo property (the search carries it via
+  // expand[]=gguf), so it shows on the collapsed row without expanding. Empty
+  // when unknown, which skips it.
+  const contextLabel = formatContextWindow(model.context_length ?? 0);
 
   return (
     <div className={styles.rowWrap} data-row>
@@ -235,6 +236,7 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
           </div>
           <div className={styles.org}>
             {org} · {model.downloads.toLocaleString()} downloads
+            {contextLabel ? ` · ${contextLabel}` : ''}
           </div>
         </div>
         <button
@@ -257,12 +259,6 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
           {files !== null && files.length === 0 && listError === null ? (
             <p className={styles.note}>No GGUF files in this repo.</p>
           ) : null}
-          {!showProgress &&
-          files !== null &&
-          files.length > 0 &&
-          contextLabel ? (
-            <div className={styles.ctxLine}>{contextLabel} context window</div>
-          ) : null}
           {!showProgress && files !== null && files.length > 0
             ? files.map((f) => (
                 <div className={styles.quantRow} key={f.file}>
diff --git a/src/types/hf.ts b/src/types/hf.ts
index 33a9a895..af82cd9b 100644
--- a/src/types/hf.ts
+++ b/src/types/hf.ts
@@ -25,4 +25,8 @@ export interface HfModelSummary {
   downloads: number;
   /** True when the repo is gated; anonymous downloads fail. */
   gated: boolean;
+  /** Model's trained context window in tokens, from the repo's parsed GGUF
+   * metadata (a per-repo property shared by every quant); `null`/absent when
+   * unknown or untrusted. */
+  context_length?: number | null;
 }
diff --git a/src/types/starter.ts b/src/types/starter.ts
index 6e1a8131..72c5269d 100644
--- a/src/types/starter.ts
+++ b/src/types/starter.ts
@@ -119,9 +119,6 @@ export interface HfGgufFile {
   file: string;
   size_bytes: number;
   fit?: RamFit | null;
-  /** Model's trained context window in tokens, from the repo's parsed GGUF
-   * `context_length` metadata; `null`/absent when unknown or untrusted. */
-  context_length?: number | null;
 }
 
 /** Engine lifecycle snapshot published on the `engine:status` event. */

From ec7f8fc37bba7ab255a1c501f1fc2c1d6811237f Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 15:10:37 -0500
Subject: [PATCH 59/89] feat: show the context window on Library rows, healed
 from the registry

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/models/mod.rs                   | 33 +++++++++++++++----
 src-tauri/src/models/registry.rs              | 21 ++++++++++++
 src/settings/tabs/models/LibraryPane.test.tsx |  8 +++--
 src/settings/tabs/models/LibraryPane.tsx      |  4 +++
 src/types/starter.ts                          |  3 ++
 5 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/src-tauri/src/models/mod.rs b/src-tauri/src/models/mod.rs
index a70e1bb9..9b9241e1 100644
--- a/src-tauri/src/models/mod.rs
+++ b/src-tauri/src/models/mod.rs
@@ -1014,11 +1014,10 @@ pub(crate) fn builtin_capabilities_from_manifest(
             // the registry (highest confidence). A pasted repo has no registry
             // entry and keeps its row's classified flags: the install-time GGUF
             // classifier populates them, and the runtime backstop corrects them.
-            let (vision, thinking, reasoning_always) = registry::STARTERS
-                .iter()
-                .find(|s| s.repo == row.repo && s.file_name == row.file_name)
-                .map(|s| (s.vision, s.thinking, s.reasoning_always))
-                .unwrap_or((row.vision, row.thinking, row.reasoning_always));
+            let (vision, thinking, reasoning_always) =
+                registry::by_repo_file(&row.repo, &row.file_name)
+                    .map(|s| (s.vision, s.thinking, s.reasoning_always))
+                    .unwrap_or((row.vision, row.thinking, row.reasoning_always));
             (
                 row.id.clone(),
                 Capabilities {
@@ -1686,6 +1685,10 @@ pub struct InstalledModelView {
     #[serde(flatten)]
     pub model: manifest::InstalledModel,
     pub fit: Option<registry::RamFit>,
+    /// Trained context window in tokens, healed from the curated registry by
+    /// repo + file. `None` for a pasted model with no registry entry (its
+    /// context is not recorded in the manifest).
+    pub context_length: Option<u32>,
 }
 
 /// Estimated resident memory (GiB) for a GGUF weights blob of `size_bytes`:
@@ -1736,7 +1739,15 @@ pub fn build_installed_views(
             } else {
                 None
             };
-            InstalledModelView { model, fit }
+            // Curated models heal their context window from the registry; a
+            // pasted repo has no entry, so it shows none.
+            let context_length =
+                registry::by_repo_file(&model.repo, &model.file_name).map(|s| s.context_length);
+            InstalledModelView {
+                model,
+                fit,
+                context_length,
+            }
         })
         .collect()
 }
@@ -4982,9 +4993,19 @@ mod tests {
         };
         let views = build_installed_views(vec![model.clone()], 64 << 30);
         assert_eq!(views[0].fit, Some(registry::RamFit::Fits));
+        // A pasted repo has no registry entry, so its context window is unknown.
+        assert_eq!(views[0].context_length, None);
         // Unknown host RAM drops the verdict.
         let views = build_installed_views(vec![model], 0);
         assert_eq!(views[0].fit, None);
+
+        // A curated model heals its context window from the registry.
+        let curated = registry::to_installed_model(&registry::STARTERS[0]);
+        let views = build_installed_views(vec![curated], 64 << 30);
+        assert_eq!(
+            views[0].context_length,
+            Some(registry::STARTERS[0].context_length)
+        );
     }
 
     #[test]
diff --git a/src-tauri/src/models/registry.rs b/src-tauri/src/models/registry.rs
index 948d77cb..47c7e516 100644
--- a/src-tauri/src/models/registry.rs
+++ b/src-tauri/src/models/registry.rs
@@ -338,6 +338,17 @@ pub fn by_id(id: &str) -> Option<&'static Starter> {
     STARTERS.iter().find(|s| s.id == id)
 }
 
+/// The registry entry matching this repo + weights file name, if any. An
+/// installed model heals its curated facts (capabilities, context window) from
+/// the registry through here, so a later flag or pin correction reaches models
+/// downloaded before it. A pasted (non-curated) repo has no entry and yields
+/// `None`.
+pub fn by_repo_file(repo: &str, file_name: &str) -> Option<&'static Starter> {
+    STARTERS
+        .iter()
+        .find(|s| s.repo == repo && s.file_name == file_name)
+}
+
 /// The three onboarding hero starters, resolved from [`ONBOARDING_HERO_IDS`] in
 /// tier order. Any id that is absent from the registry is skipped, so the
 /// result is the heroes that actually exist; a registry test asserts all three
@@ -455,6 +466,16 @@ mod tests {
         }
     }
 
+    #[test]
+    fn by_repo_file_matches_repo_and_weights_file() {
+        // Heals an installed model's curated facts from the registry: it matches
+        // on repo + weights file, and misses when either differs.
+        let s = &STARTERS[0];
+        assert_eq!(by_repo_file(s.repo, s.file_name).unwrap().id, s.id);
+        assert!(by_repo_file(s.repo, "other.gguf").is_none());
+        assert!(by_repo_file("other/repo", s.file_name).is_none());
+    }
+
     #[test]
     fn by_id_resolves_present_and_misses_unknown() {
         // by_id finds a present entry and returns None for an unknown slug,
diff --git a/src/settings/tabs/models/LibraryPane.test.tsx b/src/settings/tabs/models/LibraryPane.test.tsx
index d2c28457..2f778b91 100644
--- a/src/settings/tabs/models/LibraryPane.test.tsx
+++ b/src/settings/tabs/models/LibraryPane.test.tsx
@@ -111,6 +111,7 @@ const GEMMA: InstalledModel = {
   size_bytes: 2_489_757_856,
   quant: 'Q4_K_M',
   fit: 'fits',
+  context_length: 262_144,
 };
 
 // No `fit` here: exercises the "RAM unknown" branch (no fit pill).
@@ -198,8 +199,11 @@ describe('LibraryPane', () => {
     mockCommands(libraryResponses());
     await renderPane();
     expect(screen.getByText('gemma')).toBeInTheDocument();
-    expect(screen.getByText('org/gemma · Q4_K_M · 2.5 GB')).toBeInTheDocument();
-    // Empty quant drops out of the org line.
+    // Curated model: context window healed from the registry, after the size.
+    expect(
+      screen.getByText('org/gemma · Q4_K_M · 2.5 GB · 256K'),
+    ).toBeInTheDocument();
+    // Empty quant and (here) no context drop out of the org line.
     expect(screen.getByText('org/qwen · 9.0 GB')).toBeInTheDocument();
   });
 
diff --git a/src/settings/tabs/models/LibraryPane.tsx b/src/settings/tabs/models/LibraryPane.tsx
index f2fe5fe7..531d99ce 100644
--- a/src/settings/tabs/models/LibraryPane.tsx
+++ b/src/settings/tabs/models/LibraryPane.tsx
@@ -17,6 +17,7 @@ import { invoke } from '@tauri-apps/api/core';
 import { useModelCapabilities } from '../../../hooks/useModelCapabilities';
 import { ConfirmDialog } from '../../components';
 import { Tooltip } from '../../../components/Tooltip';
+import { formatContextWindow } from '../../../utils/contextWindow';
 import { RAM_FIT_LABEL, RAM_FIT_TOOLTIP } from '../../../utils/ramFit';
 import styles from './LibraryPane.module.css';
 import type { RawAppConfig } from '../../types';
@@ -194,6 +195,8 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
             const active = m.id === activeModel;
             const caps = capabilities[m.id];
             const repo = m.id.split(':')[0];
+            // Empty when the model carries no context window, which skips it.
+            const contextLabel = formatContextWindow(m.context_length ?? 0);
             return (
               <div
                 key={m.id}
@@ -224,6 +227,7 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
                       {repo}
                       {m.quant !== '' ? ` · ${m.quant}` : ''} ·{' '}
                       {gb(m.size_bytes)} GB
+                      {contextLabel ? ` · ${contextLabel}` : ''}
                     </div>
                   </div>
                   <div className={styles.right}>
diff --git a/src/types/starter.ts b/src/types/starter.ts
index 72c5269d..1f75ff6d 100644
--- a/src/types/starter.ts
+++ b/src/types/starter.ts
@@ -111,6 +111,9 @@ export interface InstalledModel {
   /** RAM-fit on this Mac, computed by the backend from the recorded size.
    * `null`/absent when host RAM or the size is unknown. */
   fit?: RamFit | null;
+  /** Trained context window in tokens, healed from the curated registry by the
+   * backend; `null`/absent for a pasted model with no registry entry. */
+  context_length?: number | null;
 }
 
 /** One `.gguf` row from `list_hf_repo_ggufs`, for the paste-a-repo browser.

From 2ee29e8b7e74477eb9ac55437fd5d7dfc031a2fe Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 02:26:16 -0500
Subject: [PATCH 60/89] feat: redesign the model download into an inline
 hairline with a unified vision bar

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/components/DownloadProgress.tsx           | 336 ++++++++++++------
 .../__tests__/DownloadProgress.test.tsx       |  51 ++-
 src/components/__tests__/downloadLine.test.ts |  73 ++++
 .../tabs/models/BrowseAllPane.test.tsx        |   4 +-
 .../tabs/models/StaffPicksPane.test.tsx       |   2 +-
 src/settings/tabs/models/StaffPicksPane.tsx   |  11 +
 6 files changed, 344 insertions(+), 133 deletions(-)
 create mode 100644 src/components/__tests__/downloadLine.test.ts

diff --git a/src/components/DownloadProgress.tsx b/src/components/DownloadProgress.tsx
index bf6ae948..c63dce6e 100644
--- a/src/components/DownloadProgress.tsx
+++ b/src/components/DownloadProgress.tsx
@@ -28,6 +28,12 @@ export interface DownloadProgressProps {
   state: DownloadUiState;
   progress: DownloadProgressInfo | null;
   etaSeconds: number | null;
+  /** Cumulative bytes across weights + companion: the unified numerator. */
+  combinedBytes?: number | null;
+  /** Full on-disk total (weights + companion): the unified denominator. */
+  grandTotalBytes?: number | null;
+  /** Rolling download rate in bytes per second; drives the unified ETA. */
+  speedBytesPerSec?: number | null;
   confirmInfo?: ConfirmInfo;
   onConfirm: () => void;
   onCancelConfirm: () => void;
@@ -56,6 +62,67 @@ function gb(bytes: number): string {
   return (bytes / 1e9).toFixed(1);
 }
 
+/** Inputs for the single download figures line. */
+export interface DownloadLineInput {
+  /** Per-file byte counts; the fallback when no grand total is known. */
+  progress: DownloadProgressInfo | null;
+  /** Rolling ETA seconds for the per-file fallback path. */
+  etaSeconds: number | null;
+  /** Cumulative bytes across weights + companion: the unified numerator. */
+  combinedBytes: number | null;
+  /** Full on-disk total (weights + companion): the unified denominator. */
+  grandTotalBytes: number | null;
+  /** Rolling rate; drives the unified ETA when present. */
+  speedBytesPerSec: number | null;
+}
+
+/** Percent plus a "x / y GB · ~eta" string, or null figures before any bytes. */
+export interface DownloadLine {
+  percent: number;
+  figures: string | null;
+}
+
+/**
+ * One continuous progress reading. Prefers the unified weights + companion
+ * figure, so a vision download is a single bar to 100% that never resets
+ * between the two files; falls back to the current file's own byte counts for
+ * single-file repo downloads where no grand total is known up front.
+ */
+export function downloadLine({
+  progress,
+  etaSeconds,
+  combinedBytes,
+  grandTotalBytes,
+  speedBytesPerSec,
+}: DownloadLineInput): DownloadLine {
+  let bytes: number;
+  let total: number;
+  let eta: number | null;
+  if (
+    grandTotalBytes !== null &&
+    grandTotalBytes > 0 &&
+    combinedBytes !== null
+  ) {
+    bytes = combinedBytes;
+    total = grandTotalBytes;
+    eta =
+      speedBytesPerSec !== null
+        ? Math.max(0, Math.round((total - bytes) / speedBytesPerSec))
+        : etaSeconds;
+  } else if (progress !== null && progress.totalBytes > 0) {
+    bytes = progress.bytes;
+    total = progress.totalBytes;
+    eta = etaSeconds;
+  } else {
+    return { percent: 0, figures: null };
+  }
+  const percent = Math.min(100, Math.floor((bytes / total) * 100));
+  const figures =
+    `${gb(bytes)} / ${gb(total)} GB` +
+    (eta !== null ? ` · ~${formatEta(eta)}` : '');
+  return { percent, figures };
+}
+
 /** Failure headline per kind. Exact copy; consumed verbatim by tests. */
 function failureHeadline(kind: string, message: string): string {
   switch (kind) {
@@ -82,6 +149,9 @@ export function DownloadProgress({
   state,
   progress,
   etaSeconds,
+  combinedBytes = null,
+  grandTotalBytes = null,
+  speedBytesPerSec = null,
   confirmInfo,
   onConfirm,
   onCancelConfirm,
@@ -120,80 +190,62 @@ export function DownloadProgress({
         </Card>
       );
     case 'downloading':
-    case 'downloading_mmproj':
+    case 'downloading_mmproj': {
+      const { percent, figures } = downloadLine({
+        progress,
+        etaSeconds,
+        combinedBytes,
+        grandTotalBytes,
+        speedBytesPerSec,
+      });
       return (
-        <Card>
-          <Headline>
-            {state.phase === 'downloading_mmproj'
-              ? 'Downloading vision companion'
-              : 'Downloading model'}
-          </Headline>
-          <ProgressBar
-            percent={
-              progress && progress.totalBytes > 0
-                ? Math.floor((progress.bytes / progress.totalBytes) * 100)
-                : 0
-            }
-          />
-          {progress ? (
-            <Detail>
-              {gb(progress.bytes)} GB of {gb(progress.totalBytes)} GB
-            </Detail>
-          ) : null}
-          {etaSeconds !== null ? (
-            <Detail>About {formatEta(etaSeconds)} left</Detail>
-          ) : null}
-          <ButtonRow>
-            <FlowButton label="Cancel" onClick={onCancel} />
-          </ButtonRow>
-        </Card>
+        <Hairline edge={<Edge percent={percent} tone="accent" />}>
+          <span data-testid="download-figures" style={FIGURES_STYLE}>
+            <strong style={{ color: '#f0f0f2', fontWeight: 700 }}>
+              {percent}%
+            </strong>
+            {figures !== null ? ` · ${figures}` : ''}
+            {state.phase === 'downloading_mmproj' ? ' · finishing vision' : ''}
+          </span>
+          <span style={{ flex: 1 }} />
+          <CancelX onClick={onCancel} />
+        </Hairline>
       );
+    }
     case 'verifying':
       return (
-        <Card>
-          <Headline>Verifying download</Headline>
-          <ProgressBar indeterminate />
-        </Card>
+        <Hairline edge={<Edge indeterminate tone="accent" />}>
+          <StatusText>Verifying download</StatusText>
+        </Hairline>
       );
     case 'installing':
       return (
-        <Card>
-          <Headline>Installing</Headline>
-          <ProgressBar indeterminate />
-        </Card>
+        <Hairline edge={<Edge indeterminate tone="accent" />}>
+          <StatusText>Installing</StatusText>
+        </Hairline>
       );
     case 'warming_up':
       return (
-        <Card>
-          <Headline>Starting the engine</Headline>
-          <ProgressBar indeterminate />
-        </Card>
+        <Hairline edge={<Edge indeterminate tone="accent" />}>
+          <StatusText>Starting the engine</StatusText>
+        </Hairline>
       );
     case 'ready':
       return (
-        <Card>
-          <Headline>
-            <span
-              style={{
-                display: 'inline-flex',
-                alignItems: 'center',
-                gap: 6,
-                color: '#22c55e',
-              }}
-            >
-              <svg width="12" height="12" viewBox="0 0 16 16" fill="none">
-                <path
-                  d="M3 8.5l3.2 3.2L13 5"
-                  stroke="currentColor"
-                  strokeWidth="1.8"
-                  strokeLinecap="round"
-                  strokeLinejoin="round"
-                />
-              </svg>
-              Ready
-            </span>
-          </Headline>
-        </Card>
+        <Hairline edge={<Edge percent={100} tone="green" />}>
+          <StatusText ready>
+            <svg width="12" height="12" viewBox="0 0 16 16" fill="none">
+              <path
+                d="M3 8.5l3.2 3.2L13 5"
+                stroke="currentColor"
+                strokeWidth="1.8"
+                strokeLinecap="round"
+                strokeLinejoin="round"
+              />
+            </svg>
+            Ready
+          </StatusText>
+        </Hairline>
       );
     case 'failed':
       return (
@@ -274,51 +326,135 @@ function Detail({
   );
 }
 
-interface ProgressBarProps {
-  percent?: number;
-  indeterminate?: boolean;
+/** Subtitle figures line: muted, tabular so the digits do not jitter. */
+const FIGURES_STYLE: React.CSSProperties = {
+  fontSize: 11.5,
+  color: 'rgba(236,234,231,0.54)',
+  fontVariantNumeric: 'tabular-nums',
+  lineHeight: 1.4,
+};
+
+/**
+ * Inline shell for every active state: one quiet line with a 2px accent edge
+ * pinned to the bottom of the row (the hairline). No box of its own, so the
+ * download reads as part of the model row rather than a nested card.
+ */
+function Hairline({
+  children,
+  edge,
+}: {
+  children: React.ReactNode;
+  edge: React.ReactNode;
+}) {
+  return (
+    <div
+      data-download-progress
+      style={{
+        position: 'relative',
+        display: 'flex',
+        alignItems: 'center',
+        gap: 8,
+        padding: '8px 2px 12px',
+        minHeight: 30,
+      }}
+    >
+      {children}
+      {edge}
+    </div>
+  );
 }
 
-function ProgressBar({ percent = 0, indeterminate = false }: ProgressBarProps) {
+/**
+ * The 2px progress edge. Determinate fills to `percent`; indeterminate shows a
+ * fixed segment. `tone` is the warm accent while working and green at ready.
+ */
+function Edge({
+  percent = 0,
+  indeterminate = false,
+  tone,
+}: {
+  percent?: number;
+  indeterminate?: boolean;
+  tone: 'accent' | 'green';
+}) {
   return (
-    <div>
-      {!indeterminate ? (
-        <div
-          style={{
-            textAlign: 'right',
-            fontSize: 10.5,
-            color: 'rgba(255,255,255,0.45)',
-            marginBottom: 3,
-          }}
-        >
-          {percent}%
-        </div>
-      ) : null}
-      <div
-        data-progress-bar
-        data-indeterminate={indeterminate}
+    <span
+      data-progress-bar
+      data-indeterminate={indeterminate}
+      style={{
+        position: 'absolute',
+        left: 0,
+        right: 0,
+        bottom: 0,
+        height: 2,
+        borderRadius: 999,
+        background: 'rgba(255,255,255,0.08)',
+        overflow: 'hidden',
+      }}
+    >
+      <span
         style={{
-          position: 'relative',
-          height: 5,
+          position: 'absolute',
+          left: 0,
+          top: 0,
+          bottom: 0,
+          width: indeterminate ? '40%' : `${percent}%`,
           borderRadius: 999,
-          background: 'rgba(255,255,255,0.06)',
-          overflow: 'hidden',
+          background:
+            tone === 'green'
+              ? '#5fcf86'
+              : 'linear-gradient(90deg, #ffa06f, #d45a1e)',
         }}
-      >
-        <div
-          style={{
-            position: 'absolute',
-            top: 0,
-            left: 0,
-            bottom: 0,
-            width: indeterminate ? '40%' : `${percent}%`,
-            borderRadius: 999,
-            background: 'linear-gradient(135deg, #ff8d5c 0%, #d45a1e 100%)',
-            opacity: indeterminate ? 0.6 : 1,
-          }}
-        />
-      </div>
-    </div>
+      />
+    </span>
+  );
+}
+
+/** The single status line for the post-download steps (and the ready check). */
+function StatusText({
+  children,
+  ready = false,
+}: {
+  children: React.ReactNode;
+  ready?: boolean;
+}) {
+  return (
+    <p
+      style={{
+        margin: 0,
+        fontSize: 12.5,
+        fontWeight: 600,
+        color: ready ? '#5fcf86' : '#f0f0f2',
+        display: 'inline-flex',
+        alignItems: 'center',
+        gap: 6,
+        lineHeight: 1.4,
+      }}
+    >
+      {children}
+    </p>
+  );
+}
+
+/** The inline cancel control: a quiet × that warms on hover via the theme. */
+function CancelX({ onClick }: { onClick: () => void }) {
+  return (
+    <button
+      aria-label="Cancel"
+      onClick={onClick}
+      style={{
+        background: 'transparent',
+        border: 'none',
+        color: 'rgba(236,234,231,0.34)',
+        fontSize: 15,
+        lineHeight: 1,
+        cursor: 'pointer',
+        fontFamily: 'inherit',
+        padding: '2px 6px',
+      }}
+    >
+      ✕
+    </button>
   );
 }
 
diff --git a/src/components/__tests__/DownloadProgress.test.tsx b/src/components/__tests__/DownloadProgress.test.tsx
index 6e9768f1..426f907b 100644
--- a/src/components/__tests__/DownloadProgress.test.tsx
+++ b/src/components/__tests__/DownloadProgress.test.tsx
@@ -125,51 +125,42 @@ describe('DownloadProgress', () => {
       totalBytes: 8_200_000_000,
     };
 
-    it('shows percent, byte counts, ETA, and a working Cancel', () => {
+    it('shows the unified percent, byte figures, and a working Cancel', () => {
       const { onCancel } = renderProgress(
         { phase: 'downloading' },
-        { progress, etaSeconds: 300 },
+        { combinedBytes: 1.2e9, grandTotalBytes: 2.0e9, etaSeconds: 240 },
       );
-      expect(screen.getByText('Downloading model')).toBeInTheDocument();
-      expect(screen.getByText('30%')).toBeInTheDocument();
-      expect(screen.getByText('2.5 GB of 8.2 GB')).toBeInTheDocument();
-      expect(screen.getByText('About 5m left')).toBeInTheDocument();
+      expect(screen.getByTestId('download-figures')).toHaveTextContent(
+        '60% · 1.2 / 2.0 GB · ~4m',
+      );
+      // The boxy "Downloading model" headline is gone in the hairline design.
+      expect(screen.queryByText('Downloading model')).not.toBeInTheDocument();
 
       fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
       expect(onCancel).toHaveBeenCalledTimes(1);
     });
 
-    it('labels the mmproj phase as the vision companion', () => {
-      renderProgress(
-        { phase: 'downloading_mmproj' },
-        { progress, etaSeconds: null },
+    it('falls back to per-file figures for a single-file repo download', () => {
+      renderProgress({ phase: 'downloading' }, { progress, etaSeconds: 300 });
+      expect(screen.getByTestId('download-figures')).toHaveTextContent(
+        '30% · 2.5 / 8.2 GB · ~5m',
       );
-      expect(
-        screen.getByText('Downloading vision companion'),
-      ).toBeInTheDocument();
-      expect(screen.queryByText(/left$/)).not.toBeInTheDocument();
     });
 
-    it('falls back to 0% before the first Started event lands', () => {
-      renderProgress({ phase: 'downloading' });
-      expect(screen.getByText('0%')).toBeInTheDocument();
-      expect(screen.queryByText(/GB of/)).not.toBeInTheDocument();
-    });
-
-    it('guards the percent math against a zero total', () => {
+    it('keeps one continuous bar and notes the vision companion leg', () => {
       renderProgress(
-        { phase: 'downloading' },
-        { progress: { file: 'w.gguf', bytes: 10, totalBytes: 0 } },
+        { phase: 'downloading_mmproj' },
+        { combinedBytes: 1.8e9, grandTotalBytes: 2.0e9 },
+      );
+      expect(screen.getByTestId('download-figures')).toHaveTextContent(
+        '90% · 1.8 / 2.0 GB · finishing vision',
       );
-      expect(screen.getByText('0%')).toBeInTheDocument();
     });
 
-    it('formats sub-minute and multi-hour ETAs', () => {
-      renderProgress({ phase: 'downloading' }, { progress, etaSeconds: 45 });
-      expect(screen.getByText('About 45s left')).toBeInTheDocument();
-
-      renderProgress({ phase: 'downloading' }, { progress, etaSeconds: 7300 });
-      expect(screen.getByText('About 2h 1m left')).toBeInTheDocument();
+    it('shows 0% with no byte figures before the first bytes land', () => {
+      renderProgress({ phase: 'downloading' });
+      expect(screen.getByTestId('download-figures')).toHaveTextContent('0%');
+      expect(screen.queryByText(/GB/)).not.toBeInTheDocument();
     });
   });
 
diff --git a/src/components/__tests__/downloadLine.test.ts b/src/components/__tests__/downloadLine.test.ts
new file mode 100644
index 00000000..5b4e5187
--- /dev/null
+++ b/src/components/__tests__/downloadLine.test.ts
@@ -0,0 +1,73 @@
+import { describe, it, expect } from 'vitest';
+import { downloadLine } from '../DownloadProgress';
+
+const base = {
+  progress: null,
+  etaSeconds: null,
+  combinedBytes: null,
+  grandTotalBytes: null,
+  speedBytesPerSec: null,
+};
+
+describe('downloadLine', () => {
+  it('uses the unified combined/grand-total figure with ETA from etaSeconds', () => {
+    const line = downloadLine({
+      ...base,
+      combinedBytes: 1.2e9,
+      grandTotalBytes: 2.0e9,
+      etaSeconds: 240,
+    });
+    expect(line).toEqual({ percent: 60, figures: '1.2 / 2.0 GB · ~4m' });
+  });
+
+  it('derives the unified ETA from the rolling speed when present', () => {
+    const line = downloadLine({
+      ...base,
+      combinedBytes: 1.0e9,
+      grandTotalBytes: 2.0e9,
+      speedBytesPerSec: 1e8,
+      // etaSeconds is ignored on the unified path when a speed is available.
+      etaSeconds: 9999,
+    });
+    expect(line).toEqual({ percent: 50, figures: '1.0 / 2.0 GB · ~10s' });
+  });
+
+  it('falls back to per-file progress when no grand total is known', () => {
+    const line = downloadLine({
+      ...base,
+      progress: { file: 'w.gguf', bytes: 2.5e9, totalBytes: 8.2e9 },
+      etaSeconds: 300,
+    });
+    expect(line).toEqual({ percent: 30, figures: '2.5 / 8.2 GB · ~5m' });
+  });
+
+  it('clamps the unified percent to 100 and omits ETA when unmeasurable', () => {
+    const line = downloadLine({
+      ...base,
+      combinedBytes: 2.1e9,
+      grandTotalBytes: 2.0e9,
+    });
+    expect(line).toEqual({ percent: 100, figures: '2.1 / 2.0 GB' });
+  });
+
+  it('formats a multi-hour ETA on the per-file path', () => {
+    const line = downloadLine({
+      ...base,
+      progress: { file: 'w.gguf', bytes: 1e9, totalBytes: 10e9 },
+      etaSeconds: 7300,
+    });
+    expect(line).toEqual({ percent: 10, figures: '1.0 / 10.0 GB · ~2h 1m' });
+  });
+
+  it('returns 0% and no figures before any bytes are known', () => {
+    expect(downloadLine(base)).toEqual({ percent: 0, figures: null });
+  });
+
+  it('returns 0% and no figures when the per-file total is zero', () => {
+    const line = downloadLine({
+      ...base,
+      progress: { file: 'w.gguf', bytes: 10, totalBytes: 0 },
+    });
+    expect(line).toEqual({ percent: 0, figures: null });
+  });
+});
diff --git a/src/settings/tabs/models/BrowseAllPane.test.tsx b/src/settings/tabs/models/BrowseAllPane.test.tsx
index e139c33b..653af386 100644
--- a/src/settings/tabs/models/BrowseAllPane.test.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.test.tsx
@@ -400,7 +400,7 @@ describe('BrowseAllPane', () => {
         },
       });
     });
-    expect(screen.getByText('Downloading model')).toBeInTheDocument();
+    expect(screen.getByTestId('download-figures')).toBeInTheDocument();
     act(() => {
       lastChannel?.simulateMessage({ type: 'AllDone' });
     });
@@ -439,7 +439,7 @@ describe('BrowseAllPane', () => {
     await flush();
     fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[0]);
     await flush();
-    expect(screen.getByText('Downloading model')).toBeInTheDocument();
+    expect(screen.getByTestId('download-figures')).toBeInTheDocument();
     fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
     await flush();
     expect(invokeMock).toHaveBeenCalledWith('cancel_model_download');
diff --git a/src/settings/tabs/models/StaffPicksPane.test.tsx b/src/settings/tabs/models/StaffPicksPane.test.tsx
index e54e5cbf..db959eb2 100644
--- a/src/settings/tabs/models/StaffPicksPane.test.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.test.tsx
@@ -283,7 +283,7 @@ describe('StaffPicksPane', () => {
     const row = rowFor('Gemma 4 12B');
     fireEvent.click(within(row).getByRole('button', { name: 'Download' }));
     await flush();
-    expect(screen.getByText('Downloading model')).toBeInTheDocument();
+    expect(screen.getByTestId('download-figures')).toBeInTheDocument();
     act(() => {
       lastChannel?.simulateMessage({ type: 'AllDone' });
     });
diff --git a/src/settings/tabs/models/StaffPicksPane.tsx b/src/settings/tabs/models/StaffPicksPane.tsx
index 02caed81..d64b9e20 100644
--- a/src/settings/tabs/models/StaffPicksPane.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.tsx
@@ -106,6 +106,8 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
     state,
     progress,
     etaSeconds,
+    combinedBytes,
+    speedBytesPerSec,
     startById,
     cancel,
     retry,
@@ -170,6 +172,8 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
               state={state}
               progress={progress}
               etaSeconds={etaSeconds}
+              combinedBytes={combinedBytes}
+              speedBytesPerSec={speedBytesPerSec}
               onDownload={startDownload}
               onResume={startDownload}
               onDiscard={discardPartial}
@@ -190,6 +194,8 @@ interface ModelRowProps {
   state: ReturnType<typeof useDownloadModel>['state'];
   progress: ReturnType<typeof useDownloadModel>['progress'];
   etaSeconds: number | null;
+  combinedBytes: number | null;
+  speedBytesPerSec: number | null;
   onDownload: (id: string) => void;
   onResume: (id: string) => void;
   onDiscard: (sha256: string) => void;
@@ -204,6 +210,8 @@ function ModelRow({
   state,
   progress,
   etaSeconds,
+  combinedBytes,
+  speedBytesPerSec,
   onDownload,
   onResume,
   onDiscard,
@@ -267,6 +275,9 @@ function ModelRow({
             state={state}
             progress={progress}
             etaSeconds={etaSeconds}
+            combinedBytes={combinedBytes}
+            grandTotalBytes={totalBytes(option)}
+            speedBytesPerSec={speedBytesPerSec}
             // The curated path has no pre-flight confirm card, so onConfirm /
             // onCancelConfirm never fire; they point at the same covered
             // handlers rather than dead no-op literals.

From 32d6a637f887e3fc3e2aec5c1a73a15b2ac2b985 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 11:09:04 -0500
Subject: [PATCH 61/89] fix: keep the Browse all quant list visible while one
 gguf downloads

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 .../tabs/models/BrowseAllPane.test.tsx        |  20 ++++
 src/settings/tabs/models/BrowseAllPane.tsx    | 106 +++++++++++-------
 2 files changed, 84 insertions(+), 42 deletions(-)

diff --git a/src/settings/tabs/models/BrowseAllPane.test.tsx b/src/settings/tabs/models/BrowseAllPane.test.tsx
index 653af386..d629e1bc 100644
--- a/src/settings/tabs/models/BrowseAllPane.test.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.test.tsx
@@ -445,6 +445,26 @@ describe('BrowseAllPane', () => {
     expect(invokeMock).toHaveBeenCalledWith('cancel_model_download');
   });
 
+  it('keeps the other quant rows visible and downloadable while one downloads', async () => {
+    await renderPane();
+    const row = screen
+      .getByText('google/gemma-4-12b-it-GGUF')
+      .closest('[data-row]') as HTMLElement;
+    fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
+    await flush();
+    // Start the first quant's download.
+    fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[0]);
+    await flush();
+    // The active row shows progress...
+    expect(screen.getByTestId('download-figures')).toBeInTheDocument();
+    // ...and the OTHER quant file is still listed, not hidden.
+    expect(screen.getByText('gemma-q8.gguf')).toBeInTheDocument();
+    // Its Download button stays (disabled, since one download runs at a time).
+    const others = screen.getAllByRole('button', { name: 'Download' });
+    expect(others).toHaveLength(1);
+    expect(others[0]).toBeDisabled();
+  });
+
   it('retries after a failure and offers a path back to the quant list', async () => {
     await renderPane();
     const row = screen
diff --git a/src/settings/tabs/models/BrowseAllPane.tsx b/src/settings/tabs/models/BrowseAllPane.tsx
index 0d38047f..6463be36 100644
--- a/src/settings/tabs/models/BrowseAllPane.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.tsx
@@ -163,6 +163,9 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
   const [expanded, setExpanded] = useState(false);
   const [files, setFiles] = useState<HfGgufFile[] | null>(null);
   const [listError, setListError] = useState<string | null>(null);
+  // The file the user pressed download on, so its row (and only its row) shows
+  // the in-flight progress while the rest of the quant list stays put.
+  const [activeFile, setActiveFile] = useState<string | null>(null);
 
   const { state, progress, etaSeconds, startRepo, cancel, retry, reset } =
     useDownloadModel();
@@ -259,49 +262,68 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
           {files !== null && files.length === 0 && listError === null ? (
             <p className={styles.note}>No GGUF files in this repo.</p>
           ) : null}
-          {!showProgress && files !== null && files.length > 0
-            ? files.map((f) => (
-                <div className={styles.quantRow} key={f.file}>
-                  <span className={styles.quantName}>{f.file}</span>
-                  {f.fit ? (
-                    <Tooltip label={RAM_FIT_TOOLTIP[f.fit]} placement="top">
-                      <span className={`${styles.fit} ${FIT_CLASS[f.fit]}`}>
-                        {RAM_FIT_LABEL[f.fit]}
-                      </span>
-                    </Tooltip>
-                  ) : null}
-                  <span className={styles.quantSize}>
-                    {gb(f.size_bytes)} GB
-                  </span>
-                  <button
-                    type="button"
-                    className={styles.quantGet}
-                    aria-label="Download"
-                    onClick={() => void startRepo(model.id, f.file)}
-                  >
-                    {DOWNLOAD_ICON}
-                  </button>
-                </div>
-              ))
+          {files !== null && files.length > 0
+            ? files.map((f) => {
+                // Only the row whose file is downloading swaps its controls for
+                // the inline progress; every other row stays a normal,
+                // browsable quant (its download disabled until this one ends,
+                // since the engine runs one download at a time).
+                const downloading = showProgress && activeFile === f.file;
+                return (
+                  <div className={styles.quantRow} key={f.file}>
+                    <span className={styles.quantName}>{f.file}</span>
+                    {downloading ? (
+                      <DownloadProgress
+                        state={state}
+                        progress={progress}
+                        etaSeconds={etaSeconds}
+                        // The repo download flow has no pre-flight confirm step
+                        // (only the starter picker does), so the confirm card
+                        // never renders; these required props point at the same
+                        // covered handlers rather than dead no-op literals.
+                        onConfirm={reset}
+                        onCancelConfirm={reset}
+                        onCancel={() => void cancel()}
+                        onRetry={() => void retry()}
+                        // A terminal failure must leave a path back to the quant
+                        // list, not just Retry; reset returns to the file rows.
+                        onChooseAnother={reset}
+                      />
+                    ) : (
+                      <>
+                        {f.fit ? (
+                          <Tooltip
+                            label={RAM_FIT_TOOLTIP[f.fit]}
+                            placement="top"
+                          >
+                            <span
+                              className={`${styles.fit} ${FIT_CLASS[f.fit]}`}
+                            >
+                              {RAM_FIT_LABEL[f.fit]}
+                            </span>
+                          </Tooltip>
+                        ) : null}
+                        <span className={styles.quantSize}>
+                          {gb(f.size_bytes)} GB
+                        </span>
+                        <button
+                          type="button"
+                          className={styles.quantGet}
+                          aria-label="Download"
+                          disabled={showProgress}
+                          onClick={() => {
+                            setActiveFile(f.file);
+                            void startRepo(model.id, f.file);
+                          }}
+                        >
+                          {DOWNLOAD_ICON}
+                        </button>
+                      </>
+                    )}
+                  </div>
+                );
+              })
             : null}
-          {showProgress ? (
-            <DownloadProgress
-              state={state}
-              progress={progress}
-              etaSeconds={etaSeconds}
-              // The repo download flow has no pre-flight confirm step (only
-              // the starter picker does), so the confirm card never renders;
-              // these required props point at the same covered handlers as
-              // their respective cards rather than dead no-op literals.
-              onConfirm={reset}
-              onCancelConfirm={reset}
-              onCancel={() => void cancel()}
-              onRetry={() => void retry()}
-              // A terminal failure must leave a path back to the quant list,
-              // not just Retry; reset returns to the file rows.
-              onChooseAnother={reset}
-            />
-          ) : null}
         </div>
       ) : null}
     </div>

From c35c545cec74aa0af0532c835eeb903fa260d980 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 11:28:22 -0500
Subject: [PATCH 62/89] feat: render the failed download state as an inline
 hairline

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/components/DownloadProgress.tsx | 73 ++++++++++++++++++++++-------
 1 file changed, 56 insertions(+), 17 deletions(-)

diff --git a/src/components/DownloadProgress.tsx b/src/components/DownloadProgress.tsx
index c63dce6e..835e1e23 100644
--- a/src/components/DownloadProgress.tsx
+++ b/src/components/DownloadProgress.tsx
@@ -249,19 +249,25 @@ export function DownloadProgress({
       );
     case 'failed':
       return (
-        <Card>
-          <Headline>{failureHeadline(state.kind, state.message)}</Headline>
-          {state.kind === 'http' ? <Detail>{state.message}</Detail> : null}
-          <ButtonRow>
-            <FlowButton label="Retry" primary onClick={onRetry} />
-            {onChooseAnother ? (
-              <FlowButton
-                label="Choose a different model"
-                onClick={onChooseAnother}
-              />
+        <Hairline edge={<Edge percent={100} tone="red" />}>
+          <span style={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
+            <span style={{ fontSize: 12.5, fontWeight: 600, color: '#ff7a6e' }}>
+              {failureHeadline(state.kind, state.message)}
+            </span>
+            {state.kind === 'http' ? (
+              <span style={FIGURES_STYLE}>{state.message}</span>
             ) : null}
-          </ButtonRow>
-        </Card>
+          </span>
+          <span style={{ flex: 1 }} />
+          <GhostButton label="Retry" tone="accent" onClick={onRetry} />
+          {onChooseAnother ? (
+            <GhostButton
+              label="Choose a different model"
+              tone="muted"
+              onClick={onChooseAnother}
+            />
+          ) : null}
+        </Hairline>
       );
     default:
       // idle and resume_pending have no progress UI; the picker owns them.
@@ -375,8 +381,14 @@ function Edge({
 }: {
   percent?: number;
   indeterminate?: boolean;
-  tone: 'accent' | 'green';
+  tone: 'accent' | 'green' | 'red';
 }) {
+  const fill =
+    tone === 'green'
+      ? '#5fcf86'
+      : tone === 'red'
+        ? '#ef6b6b'
+        : 'linear-gradient(90deg, #ffa06f, #d45a1e)';
   return (
     <span
       data-progress-bar
@@ -400,16 +412,43 @@ function Edge({
           bottom: 0,
           width: indeterminate ? '40%' : `${percent}%`,
           borderRadius: 999,
-          background:
-            tone === 'green'
-              ? '#5fcf86'
-              : 'linear-gradient(90deg, #ffa06f, #d45a1e)',
+          background: fill,
         }}
       />
     </span>
   );
 }
 
+/** A borderless text button for the inline hairline actions (Retry, etc.). */
+function GhostButton({
+  label,
+  tone,
+  onClick,
+}: {
+  label: string;
+  tone: 'accent' | 'muted';
+  onClick: () => void;
+}) {
+  return (
+    <button
+      onClick={onClick}
+      style={{
+        background: 'transparent',
+        border: 'none',
+        fontFamily: 'inherit',
+        fontSize: 11.5,
+        fontWeight: 700,
+        cursor: 'pointer',
+        whiteSpace: 'nowrap',
+        padding: '2px 4px',
+        color: tone === 'accent' ? '#ff8d5c' : 'rgba(236,234,231,0.54)',
+      }}
+    >
+      {label}
+    </button>
+  );
+}
+
 /** The single status line for the post-download steps (and the ready check). */
 function StatusText({
   children,

From f53fde023956abe7d92a19055ef79ef7b54e0dcd Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 11:40:39 -0500
Subject: [PATCH 63/89] feat: show interrupted partials as a quiet Paused row
 in Staff picks

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 .../tabs/models/StaffPicksPane.module.css     |  12 +--
 .../tabs/models/StaffPicksPane.test.tsx       |  13 +++
 src/settings/tabs/models/StaffPicksPane.tsx   | 101 +++++++++---------
 3 files changed, 66 insertions(+), 60 deletions(-)

diff --git a/src/settings/tabs/models/StaffPicksPane.module.css b/src/settings/tabs/models/StaffPicksPane.module.css
index dfc89553..ad2f203f 100644
--- a/src/settings/tabs/models/StaffPicksPane.module.css
+++ b/src/settings/tabs/models/StaffPicksPane.module.css
@@ -163,18 +163,12 @@
   background: var(--accent-soft);
 }
 
-.resumeWrap {
-  display: flex;
-  align-items: center;
-  gap: 8px;
-}
 .resumeBtn {
   font-size: 11.5px;
-  font-weight: 540;
+  font-weight: 700;
   color: var(--accent);
-  border: 1px solid rgba(255, 141, 92, 0.4);
-  border-radius: var(--radius-control);
-  padding: 6px 12px;
+  border: none;
+  padding: 4px 4px;
   background: transparent;
   font-family: inherit;
   cursor: pointer;
diff --git a/src/settings/tabs/models/StaffPicksPane.test.tsx b/src/settings/tabs/models/StaffPicksPane.test.tsx
index db959eb2..de0fe886 100644
--- a/src/settings/tabs/models/StaffPicksPane.test.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.test.tsx
@@ -386,6 +386,19 @@ describe('StaffPicksPane', () => {
     );
   });
 
+  it('shows the paused percent and hides the fit hint for a partial', async () => {
+    await renderPane(() => {}, {
+      get_starter_options: [
+        { ...GEMMA, partial_bytes: 2_000_000_000 },
+        QWEN,
+        GPT_OSS,
+      ],
+    });
+    const row = rowFor('Gemma 4 12B');
+    expect(within(row).getByText(/^Paused · \d+%$/)).toBeInTheDocument();
+    expect(within(row).queryByText('Comfortable')).not.toBeInTheDocument();
+  });
+
   it('discards an interrupted partial and refreshes', async () => {
     await renderPane(() => {}, {
       get_staff_picks: [
diff --git a/src/settings/tabs/models/StaffPicksPane.tsx b/src/settings/tabs/models/StaffPicksPane.tsx
index d64b9e20..6a142c30 100644
--- a/src/settings/tabs/models/StaffPicksPane.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.tsx
@@ -223,6 +223,13 @@ function ModelRow({
   const showProgress = active && state.phase !== 'idle';
   // Empty when the model carries no context window, so the pill is skipped.
   const contextLabel = formatContextWindow(starter.context_length ?? 0);
+  // An interrupted partial (not installed, not actively downloading) reads as a
+  // calm "Paused · N%" rather than a size line, with quiet resume/discard.
+  const paused = !showProgress && !installed && partial_bytes !== null;
+  const pausedPct =
+    partial_bytes !== null
+      ? Math.min(100, Math.floor((partial_bytes / totalBytes(option)) * 100))
+      : 0;
 
   return (
     <div className={styles.row} data-model-row data-id={starter.id}>
@@ -247,25 +254,47 @@ function ModelRow({
             </span>
           </div>
           <div className={styles.sub}>
-            {gb(totalBytes(option))} GB
-            {contextLabel ? ` · ${contextLabel}` : ''} · {starter.origin}
+            {paused
+              ? `Paused · ${pausedPct}%`
+              : `${gb(totalBytes(option))} GB${
+                  contextLabel ? ` · ${contextLabel}` : ''
+                } · ${starter.origin}`}
           </div>
         </div>
         {!showProgress ? (
           <div className={styles.right}>
-            <Tooltip label={RAM_FIT_TOOLTIP[fit]} placement="top">
-              <span className={`${styles.fit} ${FIT_CLASS[fit]}`}>
-                {RAM_FIT_LABEL[fit]}
-              </span>
-            </Tooltip>
-            <RowAction
-              option={option}
-              installed={installed}
-              partialBytes={partial_bytes}
-              onDownload={onDownload}
-              onResume={onResume}
-              onDiscard={onDiscard}
-            />
+            {paused ? (
+              <>
+                <button
+                  type="button"
+                  className={styles.resumeBtn}
+                  onClick={() => onResume(starter.id)}
+                >
+                  Resume
+                </button>
+                <button
+                  type="button"
+                  className={styles.discardBtn}
+                  aria-label="Discard"
+                  onClick={() => onDiscard(starter.sha256)}
+                >
+                  Discard
+                </button>
+              </>
+            ) : (
+              <>
+                <Tooltip label={RAM_FIT_TOOLTIP[fit]} placement="top">
+                  <span className={`${styles.fit} ${FIT_CLASS[fit]}`}>
+                    {RAM_FIT_LABEL[fit]}
+                  </span>
+                </Tooltip>
+                <RowAction
+                  option={option}
+                  installed={installed}
+                  onDownload={onDownload}
+                />
+              </>
+            )}
           </div>
         ) : null}
       </div>
@@ -296,10 +325,7 @@ function ModelRow({
 interface RowActionProps {
   option: StaffPickOption;
   installed: boolean;
-  partialBytes: number | null;
   onDownload: (id: string) => void;
-  onResume: (id: string) => void;
-  onDiscard: (sha256: string) => void;
 }
 
 const DOWNLOAD_ICON = (
@@ -308,45 +334,18 @@ const DOWNLOAD_ICON = (
   </svg>
 );
 
-/** The per-row affordance. An already-installed model shows nothing (no
- * download button, no badge): it lives in Library, so on this Discover surface
- * the absence of a download is the signal. A resume/discard pair shows when an
- * interrupted partial exists; otherwise the icon download button. */
-function RowAction({
-  option,
-  installed,
-  partialBytes,
-  onDownload,
-  onResume,
-  onDiscard,
-}: RowActionProps) {
+/** The per-row download affordance. An already-installed model shows nothing
+ * (no download button, no badge): it lives in Library, so on this Discover
+ * surface the absence of a download is the signal. The interrupted-partial
+ * resume/discard pair is owned by the row itself; this renders the plain icon
+ * download button otherwise. */
+function RowAction({ option, installed, onDownload }: RowActionProps) {
   const { starter } = option;
 
   if (installed) {
     return null;
   }
 
-  if (partialBytes !== null) {
-    return (
-      <span className={styles.resumeWrap}>
-        <button
-          type="button"
-          className={styles.resumeBtn}
-          onClick={() => onResume(starter.id)}
-        >
-          Resume ({gb(partialBytes)} GB)
-        </button>
-        <button
-          type="button"
-          className={styles.discardBtn}
-          onClick={() => onDiscard(starter.sha256)}
-        >
-          Discard
-        </button>
-      </span>
-    );
-  }
-
   return (
     <button
       type="button"

From 40cc0b185841c5be185e4658ccabc86c29829fb0 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 11:52:57 -0500
Subject: [PATCH 64/89] fix: flip Staff picks to Paused on cancel and keep the
 fit hint

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 .../tabs/models/StaffPicksPane.test.tsx       | 37 +++++++++++++++++--
 src/settings/tabs/models/StaffPicksPane.tsx   | 32 +++++++++-------
 2 files changed, 53 insertions(+), 16 deletions(-)

diff --git a/src/settings/tabs/models/StaffPicksPane.test.tsx b/src/settings/tabs/models/StaffPicksPane.test.tsx
index de0fe886..b89ec8af 100644
--- a/src/settings/tabs/models/StaffPicksPane.test.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.test.tsx
@@ -316,6 +316,37 @@ describe('StaffPicksPane', () => {
     expect(invokeMock).toHaveBeenCalledWith('cancel_model_download');
   });
 
+  it('shows the paused row immediately after cancel, without a tab switch', async () => {
+    let calls = 0;
+    await renderPane(() => {}, {
+      // The post-cancel refresh re-reads the options; the partial now exists.
+      get_staff_picks: () => {
+        calls += 1;
+        return calls <= 1
+          ? STARTERS
+          : [{ ...GEMMA, partial_bytes: 1_000_000_000 }, QWEN, GPT_OSS];
+      },
+    });
+    fireEvent.click(
+      within(rowFor('Gemma 4 12B')).getByRole('button', { name: 'Download' }),
+    );
+    await flush();
+    expect(
+      within(rowFor('Gemma 4 12B')).getByTestId('download-figures'),
+    ).toBeInTheDocument();
+    fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
+    await flush();
+    act(() => lastChannel?.simulateMessage({ type: 'Cancelled' }));
+    await flush();
+    await waitFor(() => {
+      const row = rowFor('Gemma 4 12B');
+      expect(within(row).getByText(/^Paused · \d+%$/)).toBeInTheDocument();
+      expect(
+        within(row).getByRole('button', { name: /Resume/ }),
+      ).toBeInTheDocument();
+    });
+  });
+
   it('retries after a failed download', async () => {
     await renderPane();
     const row = rowFor('Gemma 4 12B');
@@ -386,9 +417,9 @@ describe('StaffPicksPane', () => {
     );
   });
 
-  it('shows the paused percent and hides the fit hint for a partial', async () => {
+  it('shows the paused percent and keeps the fit hint for a partial', async () => {
     await renderPane(() => {}, {
-      get_starter_options: [
+      get_staff_picks: [
         { ...GEMMA, partial_bytes: 2_000_000_000 },
         QWEN,
         GPT_OSS,
@@ -396,7 +427,7 @@ describe('StaffPicksPane', () => {
     });
     const row = rowFor('Gemma 4 12B');
     expect(within(row).getByText(/^Paused · \d+%$/)).toBeInTheDocument();
-    expect(within(row).queryByText('Comfortable')).not.toBeInTheDocument();
+    expect(within(row).getByText('Comfortable')).toBeInTheDocument();
   });
 
   it('discards an interrupted partial and refreshes', async () => {
diff --git a/src/settings/tabs/models/StaffPicksPane.tsx b/src/settings/tabs/models/StaffPicksPane.tsx
index 6a142c30..b0fce61a 100644
--- a/src/settings/tabs/models/StaffPicksPane.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.tsx
@@ -144,6 +144,14 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
     await refresh();
   }
 
+  // Cancelling leaves the partial on disk; re-read the options so the row flips
+  // straight to its Paused/Resume state instead of snapping back to a fresh
+  // download until the next remount.
+  async function cancelDownload() {
+    await cancel();
+    await refresh();
+  }
+
   function returnToPicker() {
     reset();
     setActiveId(null);
@@ -177,7 +185,7 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
               onDownload={startDownload}
               onResume={startDownload}
               onDiscard={discardPartial}
-              onCancel={() => void cancel()}
+              onCancel={() => void cancelDownload()}
               onRetry={() => void retry()}
               onChooseAnother={returnToPicker}
             />
@@ -263,6 +271,11 @@ function ModelRow({
         </div>
         {!showProgress ? (
           <div className={styles.right}>
+            <Tooltip label={RAM_FIT_TOOLTIP[fit]} placement="top">
+              <span className={`${styles.fit} ${FIT_CLASS[fit]}`}>
+                {RAM_FIT_LABEL[fit]}
+              </span>
+            </Tooltip>
             {paused ? (
               <>
                 <button
@@ -282,18 +295,11 @@ function ModelRow({
                 </button>
               </>
             ) : (
-              <>
-                <Tooltip label={RAM_FIT_TOOLTIP[fit]} placement="top">
-                  <span className={`${styles.fit} ${FIT_CLASS[fit]}`}>
-                    {RAM_FIT_LABEL[fit]}
-                  </span>
-                </Tooltip>
-                <RowAction
-                  option={option}
-                  installed={installed}
-                  onDownload={onDownload}
-                />
-              </>
+              <RowAction
+                option={option}
+                installed={installed}
+                onDownload={onDownload}
+              />
             )}
           </div>
         ) : null}

From bceb0ed9a3a93d1a7e4dc6e8263052559b0b642c Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 12:11:39 -0500
Subject: [PATCH 65/89] feat: surface interrupted partials as
 Paused/Resume/Discard in Browse all

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/models/mod.rs                   | 105 +++++++++++++++-
 .../tabs/models/BrowseAllPane.module.css      |  40 ++++++
 .../tabs/models/BrowseAllPane.test.tsx        | 117 +++++++++++++++++-
 src/settings/tabs/models/BrowseAllPane.tsx    | 102 ++++++++++++---
 src/types/starter.ts                          |   4 +
 5 files changed, 344 insertions(+), 24 deletions(-)

diff --git a/src-tauri/src/models/mod.rs b/src-tauri/src/models/mod.rs
index 9b9241e1..18431f8a 100644
--- a/src-tauri/src/models/mod.rs
+++ b/src-tauri/src/models/mod.rs
@@ -1343,6 +1343,12 @@ pub struct HfGgufFile {
     pub file: String,
     /// File size in bytes; 0 when the API reports no size.
     pub size_bytes: u64,
+    /// LFS content digest: the blob key used to resume or discard the partial.
+    /// Empty when the repo file is not LFS-backed (rare for GGUF weights).
+    pub sha256: String,
+    /// Length of an interrupted partial for this file on disk, or `None` when
+    /// there is none. Drives the Browse-all Paused / Resume / Discard row.
+    pub partial_bytes: Option<u64>,
 }
 
 /// Subset of the HF `/api/models/<repo>?blobs=true` response Thuki consumes.
@@ -1479,9 +1485,16 @@ pub fn parse_gguf_listing(body: &[u8]) -> Result<Vec<HfGgufFile>, String> {
         .filter(|s| s.rfilename.ends_with(".gguf") && !s.rfilename.starts_with("mmproj"))
         .map(|s| {
             let size_bytes = s.lfs.as_ref().and_then(|l| l.size).or(s.size).unwrap_or(0);
+            let sha256 = s
+                .lfs
+                .as_ref()
+                .and_then(|l| l.sha256.clone())
+                .unwrap_or_default();
             HfGgufFile {
                 file: s.rfilename,
                 size_bytes,
+                sha256,
+                partial_bytes: None,
             }
         })
         .collect())
@@ -1722,6 +1735,24 @@ pub fn annotate_gguf_rows(files: Vec<HfGgufFile>, ram_bytes: u64) -> Vec<HfGgufF
         .collect()
 }
 
+/// Fills each row's `partial_bytes` from the blob store so Browse-all can offer
+/// Resume / Discard for any file with an interrupted partial on disk. A row
+/// whose `sha256` is empty (a non-LFS file) has no content-addressed partial
+/// and stays `None`.
+pub fn attach_partials(
+    rows: Vec<HfGgufFileRow>,
+    store: &storage::ModelStore,
+) -> Vec<HfGgufFileRow> {
+    rows.into_iter()
+        .map(|mut row| {
+            if !row.file.sha256.is_empty() {
+                row.file.partial_bytes = store.existing_partial_len(&row.file.sha256);
+            }
+            row
+        })
+        .collect()
+}
+
 /// Annotates installed models with their RAM-fit on the host, from the recorded
 /// weights size. A model gets `None` when host RAM or the size is 0.
 pub fn build_installed_views(
@@ -2312,9 +2343,13 @@ pub async fn download_repo_model(
 pub async fn list_hf_repo_ggufs(
     repo: String,
     client: tauri::State<'_, reqwest::Client>,
+    store: tauri::State<'_, storage::ModelStore>,
 ) -> Result<Vec<HfGgufFileRow>, String> {
     let files = fetch_repo_gguf_listing(&client, HF_BASE_URL, &repo).await?;
-    Ok(annotate_gguf_rows(files, system_ram_bytes()))
+    Ok(attach_partials(
+        annotate_gguf_rows(files, system_ram_bytes()),
+        &store,
+    ))
 }
 
 /// Searches Hugging Face for GGUF model repos matching `query`, most-downloaded
@@ -4548,14 +4583,20 @@ mod tests {
                 HfGgufFile {
                     file: "model-Q4_K_M.gguf".to_string(),
                     size_bytes: 1000,
+                    sha256: "a".repeat(64),
+                    partial_bytes: None,
                 },
                 HfGgufFile {
                     file: "extra.gguf".to_string(),
                     size_bytes: 7,
+                    sha256: String::new(),
+                    partial_bytes: None,
                 },
                 HfGgufFile {
                     file: "bare.gguf".to_string(),
                     size_bytes: 0,
+                    sha256: String::new(),
+                    partial_bytes: None,
                 },
             ]
         );
@@ -4576,6 +4617,42 @@ mod tests {
         );
     }
 
+    #[test]
+    fn attach_partials_reports_planted_and_skips_empty_sha() {
+        let (_dir, store) = make_store();
+        let sha = "a".repeat(64);
+        // Plant a 9-byte partial for the LFS-backed row.
+        let path = store.partial_path(&sha);
+        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
+        std::fs::write(&path, [0u8; 9]).unwrap();
+
+        let rows = vec![
+            HfGgufFileRow {
+                file: HfGgufFile {
+                    file: "weights.gguf".to_string(),
+                    size_bytes: 100,
+                    sha256: sha.clone(),
+                    partial_bytes: None,
+                },
+                fit: None,
+            },
+            HfGgufFileRow {
+                file: HfGgufFile {
+                    file: "no-lfs.gguf".to_string(),
+                    size_bytes: 50,
+                    sha256: String::new(),
+                    partial_bytes: None,
+                },
+                fit: None,
+            },
+        ];
+        let out = attach_partials(rows, &store);
+        // The LFS-backed row reflects the planted partial; the empty-sha row is
+        // skipped entirely.
+        assert_eq!(out[0].file.partial_bytes, Some(9));
+        assert_eq!(out[1].file.partial_bytes, None);
+    }
+
     #[test]
     fn parse_gguf_listing_rejects_invalid_json() {
         let err = parse_gguf_listing(b"not json").unwrap_err();
@@ -4587,9 +4664,19 @@ mod tests {
         let v = serde_json::to_value(HfGgufFile {
             file: "x.gguf".to_string(),
             size_bytes: 5,
+            sha256: "a".repeat(64),
+            partial_bytes: Some(3),
         })
         .unwrap();
-        assert_eq!(v, serde_json::json!({"file": "x.gguf", "size_bytes": 5}));
+        assert_eq!(
+            v,
+            serde_json::json!({
+                "file": "x.gguf",
+                "size_bytes": 5,
+                "sha256": "a".repeat(64),
+                "partial_bytes": 3,
+            })
+        );
     }
 
     // ── Model library: resolve_listing (pure) ───────────────────────────────
@@ -4959,10 +5046,14 @@ mod tests {
             HfGgufFile {
                 file: "a.gguf".to_string(),
                 size_bytes: 1 << 30,
+                sha256: String::new(),
+                partial_bytes: None,
             },
             HfGgufFile {
                 file: "b.gguf".to_string(),
                 size_bytes: 0,
+                sha256: String::new(),
+                partial_bytes: None,
             },
         ];
         let rows = annotate_gguf_rows(files.clone(), 64 << 30);
@@ -5014,12 +5105,20 @@ mod tests {
             file: HfGgufFile {
                 file: "w.gguf".to_string(),
                 size_bytes: 42,
+                sha256: String::new(),
+                partial_bytes: None,
             },
             fit: None,
         };
         assert_eq!(
             serde_json::to_value(file_row).unwrap(),
-            serde_json::json!({"file": "w.gguf", "size_bytes": 42, "fit": serde_json::Value::Null})
+            serde_json::json!({
+                "file": "w.gguf",
+                "size_bytes": 42,
+                "sha256": "",
+                "partial_bytes": serde_json::Value::Null,
+                "fit": serde_json::Value::Null,
+            })
         );
     }
 
diff --git a/src/settings/tabs/models/BrowseAllPane.module.css b/src/settings/tabs/models/BrowseAllPane.module.css
index 6c07491b..16ff8ce5 100644
--- a/src/settings/tabs/models/BrowseAllPane.module.css
+++ b/src/settings/tabs/models/BrowseAllPane.module.css
@@ -333,6 +333,46 @@
   background: var(--accent-soft);
 }
 
+/* Paused partial: a calm label plus quiet ghost Resume / Discard, mirroring the
+ * Staff-picks paused row. */
+.quantPaused {
+  flex: none;
+  font-size: 11px;
+  color: var(--t2);
+  font-variant-numeric: tabular-nums;
+}
+.quantResume {
+  flex: none;
+  font-size: 11.5px;
+  font-weight: 700;
+  color: var(--accent);
+  border: none;
+  background: transparent;
+  font-family: inherit;
+  cursor: pointer;
+  padding: 4px 4px;
+}
+.quantDiscard {
+  flex: none;
+  font-size: 11px;
+  font-weight: 600;
+  color: var(--t3);
+  border: none;
+  background: transparent;
+  font-family: inherit;
+  cursor: pointer;
+  padding: 4px 4px;
+}
+.quantDiscard:hover {
+  color: var(--t2);
+}
+.quantGet:disabled,
+.quantResume:disabled,
+.quantDiscard:disabled {
+  opacity: 0.4;
+  cursor: default;
+}
+
 /* ── Inline notes (gated, loading, empty, error) ──────────────────────── */
 
 .note {
diff --git a/src/settings/tabs/models/BrowseAllPane.test.tsx b/src/settings/tabs/models/BrowseAllPane.test.tsx
index d629e1bc..88b5a652 100644
--- a/src/settings/tabs/models/BrowseAllPane.test.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.test.tsx
@@ -80,8 +80,25 @@ const RESULTS: HfModelSummary[] = [
 ];
 
 const GGUFS: HfGgufFile[] = [
-  { file: 'gemma-q4.gguf', size_bytes: 5_000_000_000, fit: 'tight' },
-  { file: 'gemma-q8.gguf', size_bytes: 9_000_000_000 },
+  {
+    file: 'gemma-q4.gguf',
+    size_bytes: 5_000_000_000,
+    fit: 'tight',
+    sha256: 'a'.repeat(64),
+    partial_bytes: null,
+  },
+  {
+    file: 'gemma-q8.gguf',
+    size_bytes: 9_000_000_000,
+    sha256: 'b'.repeat(64),
+    partial_bytes: null,
+  },
+];
+
+/** GGUFS with an interrupted partial on the first quant. */
+const GGUFS_PARTIAL: HfGgufFile[] = [
+  { ...GGUFS[0], partial_bytes: 1_000_000_000 },
+  GGUFS[1],
 ];
 
 const CONFIG_AFTER_INSTALL = { marker: 'fresh' } as unknown as RawAppConfig;
@@ -445,6 +462,102 @@ describe('BrowseAllPane', () => {
     expect(invokeMock).toHaveBeenCalledWith('cancel_model_download');
   });
 
+  async function expandRepo(): Promise<HTMLElement> {
+    const row = screen
+      .getByText('google/gemma-4-12b-it-GGUF')
+      .closest('[data-row]') as HTMLElement;
+    fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
+    await flush();
+    return row;
+  }
+
+  it('shows Paused with Resume and Discard for an interrupted partial', async () => {
+    await renderPane(() => {}, { list_hf_repo_ggufs: GGUFS_PARTIAL });
+    await expandRepo();
+    expect(screen.getByText(/^Paused · \d+%$/)).toBeInTheDocument();
+    expect(screen.getByRole('button', { name: 'Resume' })).toBeInTheDocument();
+    expect(screen.getByRole('button', { name: 'Discard' })).toBeInTheDocument();
+  });
+
+  it('resumes an interrupted partial', async () => {
+    await renderPane(() => {}, { list_hf_repo_ggufs: GGUFS_PARTIAL });
+    await expandRepo();
+    fireEvent.click(screen.getByRole('button', { name: 'Resume' }));
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith(
+      'download_repo_model',
+      expect.objectContaining({
+        repo: 'google/gemma-4-12b-it-GGUF',
+        file: 'gemma-q4.gguf',
+      }),
+    );
+  });
+
+  it('discards an interrupted partial and refetches the listing', async () => {
+    let calls = 0;
+    await renderPane(() => {}, {
+      list_hf_repo_ggufs: () => {
+        calls += 1;
+        return calls <= 1 ? GGUFS_PARTIAL : GGUFS;
+      },
+    });
+    await expandRepo();
+    fireEvent.click(screen.getByRole('button', { name: 'Discard' }));
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('discard_partial_download', {
+      sha256: 'a'.repeat(64),
+    });
+    // The refetch returns no partial: the row drops back to a Download control.
+    await waitFor(() =>
+      expect(
+        screen.queryByRole('button', { name: 'Resume' }),
+      ).not.toBeInTheDocument(),
+    );
+  });
+
+  it('flips a file to Paused immediately after cancel', async () => {
+    let calls = 0;
+    await renderPane(() => {}, {
+      list_hf_repo_ggufs: () => {
+        calls += 1;
+        return calls <= 1 ? GGUFS : GGUFS_PARTIAL;
+      },
+    });
+    await expandRepo();
+    fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[0]);
+    await flush();
+    expect(screen.getByTestId('download-figures')).toBeInTheDocument();
+    fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
+    await flush();
+    act(() => lastChannel?.simulateMessage({ type: 'Cancelled' }));
+    await flush();
+    await waitFor(() => {
+      expect(screen.getByText(/^Paused · \d+%$/)).toBeInTheDocument();
+      expect(
+        screen.getByRole('button', { name: 'Resume' }),
+      ).toBeInTheDocument();
+    });
+  });
+
+  it('keeps the listing when the post-discard refetch fails', async () => {
+    let calls = 0;
+    await renderPane(() => {}, {
+      list_hf_repo_ggufs: () => {
+        calls += 1;
+        if (calls === 1) return GGUFS_PARTIAL;
+        throw new Error('list failed');
+      },
+    });
+    await expandRepo();
+    fireEvent.click(screen.getByRole('button', { name: 'Discard' }));
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('discard_partial_download', {
+      sha256: 'a'.repeat(64),
+    });
+    // The refetch threw: the row stays exactly as it was, no crash.
+    expect(screen.getByText(/^Paused · \d+%$/)).toBeInTheDocument();
+  });
+
   it('keeps the other quant rows visible and downloadable while one downloads', async () => {
     await renderPane();
     const row = screen
diff --git a/src/settings/tabs/models/BrowseAllPane.tsx b/src/settings/tabs/models/BrowseAllPane.tsx
index 6463be36..20f32a6d 100644
--- a/src/settings/tabs/models/BrowseAllPane.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.tsx
@@ -213,6 +213,31 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
     })();
   }, [state.phase, onSaved, reset]);
 
+  // Silent re-read of the listing (no loading flash): the rows carry fresh
+  // `partial_bytes`, so a file flips to/from its Paused state in place.
+  const refetchFiles = useCallback(async () => {
+    try {
+      // The listing was already validated on first load; trust the typed array.
+      setFiles(
+        await invoke<HfGgufFile[]>('list_hf_repo_ggufs', { repo: model.id }),
+      );
+    } catch {
+      // Keep the current list; the partial indicator self-heals on next expand.
+    }
+  }, [model.id]);
+
+  // Cancelling leaves the partial on disk; re-read the listing so the file
+  // flips straight to its Paused / Resume / Discard controls.
+  async function cancelDownload() {
+    await cancel();
+    await refetchFiles();
+  }
+
+  async function discardFile(sha256: string) {
+    await invoke('discard_partial_download', { sha256 });
+    await refetchFiles();
+  }
+
   const showProgress = state.phase !== 'idle';
   // The context window is a per-repo property (the search carries it via
   // expand[]=gguf), so it shows on the collapsed row without expanding. Empty
@@ -265,10 +290,19 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
           {files !== null && files.length > 0
             ? files.map((f) => {
                 // Only the row whose file is downloading swaps its controls for
-                // the inline progress; every other row stays a normal,
-                // browsable quant (its download disabled until this one ends,
-                // since the engine runs one download at a time).
+                // the inline progress. A file with an interrupted partial reads
+                // as Paused with Resume / Discard; everything else is a normal,
+                // browsable quant. Resume and Discard are disabled while any
+                // download runs, since the engine handles one at a time.
                 const downloading = showProgress && activeFile === f.file;
+                const paused = !downloading && f.partial_bytes !== null;
+                const pausedPct =
+                  f.partial_bytes !== null
+                    ? Math.min(
+                        100,
+                        Math.floor((f.partial_bytes / f.size_bytes) * 100),
+                      )
+                    : 0;
                 return (
                   <div className={styles.quantRow} key={f.file}>
                     <span className={styles.quantName}>{f.file}</span>
@@ -283,7 +317,7 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
                         // covered handlers rather than dead no-op literals.
                         onConfirm={reset}
                         onCancelConfirm={reset}
-                        onCancel={() => void cancel()}
+                        onCancel={() => void cancelDownload()}
                         onRetry={() => void retry()}
                         // A terminal failure must leave a path back to the quant
                         // list, not just Retry; reset returns to the file rows.
@@ -303,21 +337,51 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
                             </span>
                           </Tooltip>
                         ) : null}
-                        <span className={styles.quantSize}>
-                          {gb(f.size_bytes)} GB
-                        </span>
-                        <button
-                          type="button"
-                          className={styles.quantGet}
-                          aria-label="Download"
-                          disabled={showProgress}
-                          onClick={() => {
-                            setActiveFile(f.file);
-                            void startRepo(model.id, f.file);
-                          }}
-                        >
-                          {DOWNLOAD_ICON}
-                        </button>
+                        {paused ? (
+                          <>
+                            <span className={styles.quantPaused}>
+                              Paused · {pausedPct}%
+                            </span>
+                            <button
+                              type="button"
+                              className={styles.quantResume}
+                              disabled={showProgress}
+                              onClick={() => {
+                                setActiveFile(f.file);
+                                void startRepo(model.id, f.file);
+                              }}
+                            >
+                              Resume
+                            </button>
+                            <button
+                              type="button"
+                              className={styles.quantDiscard}
+                              aria-label="Discard"
+                              disabled={showProgress}
+                              onClick={() => void discardFile(f.sha256)}
+                            >
+                              Discard
+                            </button>
+                          </>
+                        ) : (
+                          <>
+                            <span className={styles.quantSize}>
+                              {gb(f.size_bytes)} GB
+                            </span>
+                            <button
+                              type="button"
+                              className={styles.quantGet}
+                              aria-label="Download"
+                              disabled={showProgress}
+                              onClick={() => {
+                                setActiveFile(f.file);
+                                void startRepo(model.id, f.file);
+                              }}
+                            >
+                              {DOWNLOAD_ICON}
+                            </button>
+                          </>
+                        )}
                       </>
                     )}
                   </div>
diff --git a/src/types/starter.ts b/src/types/starter.ts
index 1f75ff6d..271f46f9 100644
--- a/src/types/starter.ts
+++ b/src/types/starter.ts
@@ -122,6 +122,10 @@ export interface HfGgufFile {
   file: string;
   size_bytes: number;
   fit?: RamFit | null;
+  /** LFS content digest; the key used to discard this file's partial. */
+  sha256: string;
+  /** Bytes of an interrupted partial for this file on disk, or null when none. */
+  partial_bytes: number | null;
 }
 
 /** Engine lifecycle snapshot published on the `engine:status` event. */

From 9bac5f050524d5eb58e621b27cb33648463437ca Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 17:52:27 -0500
Subject: [PATCH 66/89] fix: keep Discover model downloads alive across tab
 switches and the Settings webview

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/__tests__/App.test.tsx                    |   4 +
 src/contexts/DownloadContext.tsx              |  65 ++++-
 .../__tests__/DownloadContext.test.tsx        |  53 ++++
 src/settings/SettingsWindow.test.tsx          |  19 ++
 src/settings/SettingsWindow.tsx               | 264 +++++++++---------
 .../tabs/models/BrowseAllPane.test.tsx        |  15 +-
 src/settings/tabs/models/BrowseAllPane.tsx    | 111 +++++---
 .../tabs/models/DiscoverPane.test.tsx         | 157 ++++++++++-
 .../tabs/models/StaffPicksPane.test.tsx       |  19 +-
 src/settings/tabs/models/StaffPicksPane.tsx   |  74 +++--
 src/settings/tabs/tabs.test.tsx               |   3 +
 11 files changed, 585 insertions(+), 199 deletions(-)

diff --git a/src/__tests__/App.test.tsx b/src/__tests__/App.test.tsx
index 58dc56e5..22519012 100644
--- a/src/__tests__/App.test.tsx
+++ b/src/__tests__/App.test.tsx
@@ -81,6 +81,10 @@ function makeDownloadCtx(
     pausedBytes: 0,
     pauseDownload: vi.fn(),
     resumeFromPause: vi.fn(),
+    activeDownload: null,
+    startStaffPick: vi.fn(),
+    startRepoDownload: vi.fn(),
+    clearActiveDownload: vi.fn(),
     ...overrides,
   };
 }
diff --git a/src/contexts/DownloadContext.tsx b/src/contexts/DownloadContext.tsx
index 74625ee5..0f9cdd03 100644
--- a/src/contexts/DownloadContext.tsx
+++ b/src/contexts/DownloadContext.tsx
@@ -32,6 +32,18 @@ import {
 import { useConfig } from './ConfigContext';
 import type { StarterOption, StarterTier } from '../types/starter';
 
+/**
+ * Identity of the Settings → Discover download in flight, kept here (above the
+ * panes) so the owning row re-shows live progress after a Staff-picks /
+ * Browse-all tab switch unmounts and remounts it. The single-slot backend
+ * download outlives a pane unmount, so the frontend's view of it must too:
+ * `'staff'` is a curated catalog entry keyed by its stable id; `'repo'` is a
+ * Browse-all repo + GGUF file.
+ */
+export type ActiveDownload =
+  | { kind: 'staff'; id: string }
+  | { kind: 'repo'; repo: string; file: string };
+
 export interface DownloadContextValue extends UseDownloadModel {
   /** Tier whose download is in flight; null when idle. */
   downloadingTier: StarterTier | null;
@@ -76,6 +88,18 @@ export interface DownloadContextValue extends UseDownloadModel {
   pauseDownload: () => void;
   /** Resume a paused download from where it stopped. */
   resumeFromPause: () => void;
+  /**
+   * Which Discover row owns the in-flight download, or null when none does.
+   * Survives a pane unmount so the row re-binds to the live progress on
+   * remount instead of re-reading the on-disk partial as a stale "Paused".
+   */
+  activeDownload: ActiveDownload | null;
+  /** Start (or resume) a Staff Picks catalog download and record its row id. */
+  startStaffPick: (id: string) => void;
+  /** Start (or resume) a Browse-all repo download and record its row identity. */
+  startRepoDownload: (repo: string, file: string) => void;
+  /** Forget the active Discover download row (terminal card dismissed/paused). */
+  clearActiveDownload: () => void;
 }
 
 const DownloadContext = createContext<DownloadContextValue | null>(null);
@@ -89,8 +113,19 @@ export function DownloadProvider({ children }: { children: ReactNode }) {
   const [activeOption, setActiveOption] = useState<StarterOption | null>(null);
   const [pauseRequested, setPauseRequested] = useState(false);
   const [pausedBytes, setPausedBytes] = useState(0);
+  const [activeDownload, setActiveDownload] = useState<ActiveDownload | null>(
+    null,
+  );
 
-  const { start, resume, cancel, discard, combinedBytes } = download;
+  const {
+    start,
+    resume,
+    startById,
+    startRepo,
+    cancel,
+    discard,
+    combinedBytes,
+  } = download;
   const downloadPhase = download.state.phase;
 
   // A pause is only *committed* once the cancel has fully landed (machine back
@@ -182,6 +217,26 @@ export function DownloadProvider({ children }: { children: ReactNode }) {
     resumeDownload(activeOption!.starter.tier, activeOption!, pausedBytes);
   }, [activeOption, pausedBytes, resumeDownload]);
 
+  // Discover (Settings) download identity. A resume runs the same start path
+  // (the backend resumes the kept partial via Range), so resume reuses these.
+  const startStaffPick = useCallback(
+    (id: string) => {
+      setActiveDownload({ kind: 'staff', id });
+      void startById(id);
+    },
+    [startById],
+  );
+
+  const startRepoDownload = useCallback(
+    (repo: string, file: string) => {
+      setActiveDownload({ kind: 'repo', repo, file });
+      void startRepo(repo, file);
+    },
+    [startRepo],
+  );
+
+  const clearActiveDownload = useCallback(() => setActiveDownload(null), []);
+
   const grandTotalBytes =
     activeOption === null
       ? null
@@ -201,6 +256,10 @@ export function DownloadProvider({ children }: { children: ReactNode }) {
       pausedBytes,
       pauseDownload,
       resumeFromPause,
+      activeDownload,
+      startStaffPick,
+      startRepoDownload,
+      clearActiveDownload,
     }),
     [
       download,
@@ -215,6 +274,10 @@ export function DownloadProvider({ children }: { children: ReactNode }) {
       pausedBytes,
       pauseDownload,
       resumeFromPause,
+      activeDownload,
+      startStaffPick,
+      startRepoDownload,
+      clearActiveDownload,
     ],
   );
 
diff --git a/src/contexts/__tests__/DownloadContext.test.tsx b/src/contexts/__tests__/DownloadContext.test.tsx
index 8f3fe83d..4c465216 100644
--- a/src/contexts/__tests__/DownloadContext.test.tsx
+++ b/src/contexts/__tests__/DownloadContext.test.tsx
@@ -113,6 +113,59 @@ describe('DownloadContext', () => {
     expect(result.current.resumeSeedBytes).toBeNull();
     expect(result.current.activeOption).toBeNull();
     expect(result.current.grandTotalBytes).toBeNull();
+    expect(result.current.activeDownload).toBeNull();
+  });
+
+  it('startStaffPick records the row id and runs the verified catalog path', async () => {
+    const { result } = renderHook(() => useDownloadCtx(), { wrapper });
+
+    await act(async () => {
+      result.current.startStaffPick('gemma-4-12b');
+    });
+
+    expect(result.current.activeDownload).toEqual({
+      kind: 'staff',
+      id: 'gemma-4-12b',
+    });
+    expect(result.current.state).toEqual({ phase: 'downloading' });
+    expect(invoke).toHaveBeenCalledWith('download_staff_pick', {
+      id: 'gemma-4-12b',
+      onEvent: expect.anything(),
+    });
+  });
+
+  it('startRepoDownload records the repo + file and runs the repo path', async () => {
+    const { result } = renderHook(() => useDownloadCtx(), { wrapper });
+
+    await act(async () => {
+      result.current.startRepoDownload('org/repo', 'weights-q4.gguf');
+    });
+
+    expect(result.current.activeDownload).toEqual({
+      kind: 'repo',
+      repo: 'org/repo',
+      file: 'weights-q4.gguf',
+    });
+    expect(result.current.state).toEqual({ phase: 'downloading' });
+    expect(invoke).toHaveBeenCalledWith('download_repo_model', {
+      repo: 'org/repo',
+      file: 'weights-q4.gguf',
+      onEvent: expect.anything(),
+    });
+  });
+
+  it('clearActiveDownload forgets the active Discover download row', async () => {
+    const { result } = renderHook(() => useDownloadCtx(), { wrapper });
+
+    await act(async () => {
+      result.current.startStaffPick('gemma-4-12b');
+    });
+    expect(result.current.activeDownload).not.toBeNull();
+
+    act(() => {
+      result.current.clearActiveDownload();
+    });
+    expect(result.current.activeDownload).toBeNull();
   });
 
   it('beginDownload records the tier, option, grand total and starts the machine', async () => {
diff --git a/src/settings/SettingsWindow.test.tsx b/src/settings/SettingsWindow.test.tsx
index 4a8ef9c9..145e699a 100644
--- a/src/settings/SettingsWindow.test.tsx
+++ b/src/settings/SettingsWindow.test.tsx
@@ -159,6 +159,25 @@ describe('SettingsWindow', () => {
     );
   });
 
+  // Regression: the Settings window is its own webview root. The Discover panes
+  // read the app-root download context, so the Settings tree must provide a
+  // DownloadProvider or opening Discover throws and blanks the window.
+  it('opens Discover without crashing the Settings window', async () => {
+    invokeMock.mockImplementation(async (cmd: string) => {
+      if (cmd === 'get_staff_picks') return [];
+      return defaultInvoke(cmd);
+    });
+    render(<SettingsWindow />);
+    await waitFor(() => screen.getByRole('tab', { name: /Models/ }));
+    await act(async () => {
+      fireEvent.click(screen.getByRole('tab', { name: 'Discover' }));
+      await Promise.resolve();
+    });
+    expect(
+      await screen.findByRole('tab', { name: 'Staff picks' }),
+    ).toBeInTheDocument();
+  });
+
   it('switching tabs swaps the active tab body', async () => {
     render(<SettingsWindow />);
     await waitFor(() => screen.getByRole('tab', { name: /Display/ }));
diff --git a/src/settings/SettingsWindow.tsx b/src/settings/SettingsWindow.tsx
index 52f37f6b..342383e1 100644
--- a/src/settings/SettingsWindow.tsx
+++ b/src/settings/SettingsWindow.tsx
@@ -23,6 +23,7 @@ import {
 import { invoke } from '@tauri-apps/api/core';
 import { getCurrentWindow } from '@tauri-apps/api/window';
 
+import { DownloadProvider } from '../contexts/DownloadContext';
 import { useConfigSync } from './hooks/useConfigSync';
 import { useSettingsAutoResize } from './hooks/useSettingsAutoResize';
 import { ModelTab } from './tabs/ModelTab';
@@ -304,143 +305,152 @@ export function SettingsWindow() {
 
   if (!config) return null;
 
+  // The Settings window is its own webview root (see `main.tsx`), so it needs
+  // its own DownloadProvider: the Discover panes read the download machine from
+  // it, and hosting it here (above the section nav and the Models segmented
+  // control) keeps a Discover download alive across every in-window tab switch.
+  // It is independent of the main overlay's provider; the backend single-slot
+  // download is the real cross-window coordinator.
   return (
-    <div className={styles.window} onMouseDown={handleDragStart}>
-      <WindowControls onClose={handleHide} />
+    <DownloadProvider>
+      <div className={styles.window} onMouseDown={handleDragStart}>
+        <WindowControls onClose={handleHide} />
 
-      {marker && !markerDismissed ? (
-        <div className={styles.banner} role="alert">
-          <span className={styles.bannerIcon} aria-hidden>
-            ⚠
-          </span>
-          <span className={styles.bannerText}>
-            Your previous <code>config.toml</code> had a syntax error and was
-            saved as <code>{baseName(marker.path)}</code>. Defaults are now
-            active.
-          </span>
-          <span className={styles.bannerActions}>
-            <button
-              type="button"
-              className={`${styles.button} ${styles.buttonGhost}`}
-              onClick={() =>
-                void invoke('open_url', {
-                  url: `file://${encodeURI(marker.path).replace(/'/g, '%27')}`,
-                })
-              }
-            >
-              Reveal
-            </button>
-            <button
-              type="button"
-              className={`${styles.button} ${styles.buttonGhost}`}
-              onClick={() => setMarkerDismissed(true)}
-            >
-              Dismiss
-            </button>
-          </span>
-        </div>
-      ) : null}
+        {marker && !markerDismissed ? (
+          <div className={styles.banner} role="alert">
+            <span className={styles.bannerIcon} aria-hidden>
+              ⚠
+            </span>
+            <span className={styles.bannerText}>
+              Your previous <code>config.toml</code> had a syntax error and was
+              saved as <code>{baseName(marker.path)}</code>. Defaults are now
+              active.
+            </span>
+            <span className={styles.bannerActions}>
+              <button
+                type="button"
+                className={`${styles.button} ${styles.buttonGhost}`}
+                onClick={() =>
+                  void invoke('open_url', {
+                    url: `file://${encodeURI(marker.path).replace(/'/g, '%27')}`,
+                  })
+                }
+              >
+                Reveal
+              </button>
+              <button
+                type="button"
+                className={`${styles.button} ${styles.buttonGhost}`}
+                onClick={() => setMarkerDismissed(true)}
+              >
+                Dismiss
+              </button>
+            </span>
+          </div>
+        ) : null}
 
-      {updater.state.update && !settingsSnoozed ? (
-        <UpdateBanner
-          version={updater.state.update.version}
-          notesUrl={updater.state.update.notes_url}
-          onInstall={() => void updater.openWindow()}
-          onLater={() => void updater.snoozeSettings(24)}
-        />
-      ) : null}
+        {updater.state.update && !settingsSnoozed ? (
+          <UpdateBanner
+            version={updater.state.update.version}
+            notesUrl={updater.state.update.notes_url}
+            onInstall={() => void updater.openWindow()}
+            onLater={() => void updater.snoozeSettings(24)}
+          />
+        ) : null}
 
-      <div className={styles.stage}>
-        <div className={styles.side}>
-          <div className={styles.sideGroup}>Settings</div>
-          <div
-            role="tablist"
-            aria-label="Settings sections"
-            aria-orientation="vertical"
-            className={styles.sideTabs}
-          >
-            {TABS.map((tab) => {
-              const active = tab.id === activeTab;
-              return (
-                <button
-                  key={tab.id}
-                  type="button"
-                  role="tab"
-                  aria-selected={active}
-                  aria-controls={`panel-${tab.id}`}
-                  tabIndex={active ? 0 : -1}
-                  className={`${styles.sideItem} ${active ? styles.sideItemActive : ''}`}
-                  onClick={() => setActiveTab(tab.id)}
-                  onKeyDown={(e) => {
-                    const isNext =
-                      e.key === 'ArrowDown' || e.key === 'ArrowRight';
-                    const isPrev = e.key === 'ArrowUp' || e.key === 'ArrowLeft';
-                    if (isNext || isPrev) {
-                      e.preventDefault();
-                      const idx = TABS.findIndex((t) => t.id === activeTab);
-                      const next = isNext
-                        ? TABS[(idx + 1) % TABS.length]
-                        : TABS[(idx - 1 + TABS.length) % TABS.length];
-                      setActiveTab(next.id);
-                    }
-                  }}
-                >
-                  <span className={styles.sideItemIcon} aria-hidden>
-                    {tab.icon}
-                  </span>
-                  <span className={styles.sideItemLabel}>{tab.label}</span>
-                </button>
-              );
-            })}
+        <div className={styles.stage}>
+          <div className={styles.side}>
+            <div className={styles.sideGroup}>Settings</div>
+            <div
+              role="tablist"
+              aria-label="Settings sections"
+              aria-orientation="vertical"
+              className={styles.sideTabs}
+            >
+              {TABS.map((tab) => {
+                const active = tab.id === activeTab;
+                return (
+                  <button
+                    key={tab.id}
+                    type="button"
+                    role="tab"
+                    aria-selected={active}
+                    aria-controls={`panel-${tab.id}`}
+                    tabIndex={active ? 0 : -1}
+                    className={`${styles.sideItem} ${active ? styles.sideItemActive : ''}`}
+                    onClick={() => setActiveTab(tab.id)}
+                    onKeyDown={(e) => {
+                      const isNext =
+                        e.key === 'ArrowDown' || e.key === 'ArrowRight';
+                      const isPrev =
+                        e.key === 'ArrowUp' || e.key === 'ArrowLeft';
+                      if (isNext || isPrev) {
+                        e.preventDefault();
+                        const idx = TABS.findIndex((t) => t.id === activeTab);
+                        const next = isNext
+                          ? TABS[(idx + 1) % TABS.length]
+                          : TABS[(idx - 1 + TABS.length) % TABS.length];
+                        setActiveTab(next.id);
+                      }
+                    }}
+                  >
+                    <span className={styles.sideItemIcon} aria-hidden>
+                      {tab.icon}
+                    </span>
+                    <span className={styles.sideItemLabel}>{tab.label}</span>
+                  </button>
+                );
+              })}
+            </div>
+            <div className={styles.sideSpacer} />
           </div>
-          <div className={styles.sideSpacer} />
-        </div>
 
-        <div className={styles.main}>
-          <div
-            className={`${styles.body} ${bodyShouldScroll ? styles.bodyScrollable : ''}`}
-            id={`panel-${activeTab}`}
-            role="tabpanel"
-          >
-            <div ref={setContentEl}>
-              {activeTab === 'general' ? (
-                <ModelTab
-                  config={config}
-                  resyncToken={resyncToken}
-                  onSaved={handleSaved}
-                />
-              ) : null}
-              {activeTab === 'behavior' ? (
-                <BehaviorTab
-                  config={config}
-                  resyncToken={resyncToken}
-                  onSaved={handleSaved}
-                />
-              ) : null}
-              {activeTab === 'search' ? (
-                <SearchTab
-                  config={config}
-                  resyncToken={resyncToken}
-                  onSaved={handleSaved}
-                />
-              ) : null}
-              {activeTab === 'display' ? (
-                <DisplayTab
-                  config={config}
-                  resyncToken={resyncToken}
-                  onSaved={handleSaved}
-                />
-              ) : null}
-              {activeTab === 'about' ? (
-                <AboutTab onSaved={handleSaved} onReload={reload} />
-              ) : null}
+          <div className={styles.main}>
+            <div
+              className={`${styles.body} ${bodyShouldScroll ? styles.bodyScrollable : ''}`}
+              id={`panel-${activeTab}`}
+              role="tabpanel"
+            >
+              <div ref={setContentEl}>
+                {activeTab === 'general' ? (
+                  <ModelTab
+                    config={config}
+                    resyncToken={resyncToken}
+                    onSaved={handleSaved}
+                  />
+                ) : null}
+                {activeTab === 'behavior' ? (
+                  <BehaviorTab
+                    config={config}
+                    resyncToken={resyncToken}
+                    onSaved={handleSaved}
+                  />
+                ) : null}
+                {activeTab === 'search' ? (
+                  <SearchTab
+                    config={config}
+                    resyncToken={resyncToken}
+                    onSaved={handleSaved}
+                  />
+                ) : null}
+                {activeTab === 'display' ? (
+                  <DisplayTab
+                    config={config}
+                    resyncToken={resyncToken}
+                    onSaved={handleSaved}
+                  />
+                ) : null}
+                {activeTab === 'about' ? (
+                  <AboutTab onSaved={handleSaved} onReload={reload} />
+                ) : null}
+              </div>
             </div>
           </div>
         </div>
-      </div>
 
-      <SavedPill visible={savedVisible} />
-    </div>
+        <SavedPill visible={savedVisible} />
+      </div>
+    </DownloadProvider>
   );
 }
 
diff --git a/src/settings/tabs/models/BrowseAllPane.test.tsx b/src/settings/tabs/models/BrowseAllPane.test.tsx
index 88b5a652..58ac0747 100644
--- a/src/settings/tabs/models/BrowseAllPane.test.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.test.tsx
@@ -24,6 +24,7 @@ import { beforeEach, afterEach, describe, expect, it, vi } from 'vitest';
 import { invoke } from '@tauri-apps/api/core';
 
 import { BrowseAllPane } from './BrowseAllPane';
+import { DownloadProvider } from '../../../contexts/DownloadContext';
 import {
   HF_SEARCH_DEBOUNCE_MS,
   HF_PAGE_SIZE,
@@ -139,7 +140,9 @@ async function renderPane(
   overrides: Record<string, unknown> = {},
 ) {
   mockCommands(discoverResponses(overrides));
-  const view = render(<BrowseAllPane onSaved={onSaved} />);
+  const view = render(<BrowseAllPane onSaved={onSaved} />, {
+    wrapper: DownloadProvider,
+  });
   await waitFor(() =>
     expect(invokeMock).toHaveBeenCalledWith('search_hf_models', {
       query: '',
@@ -190,7 +193,7 @@ describe('BrowseAllPane', () => {
   it('typing in the search drives a debounced fetch and re-renders results', async () => {
     vi.useFakeTimers();
     mockCommands(discoverResponses());
-    render(<BrowseAllPane onSaved={() => {}} />);
+    render(<BrowseAllPane onSaved={() => {}} />, { wrapper: DownloadProvider });
     await act(async () => {
       vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
       await Promise.resolve();
@@ -219,7 +222,7 @@ describe('BrowseAllPane', () => {
   it('clicking a family chip sets the query to that family', async () => {
     vi.useFakeTimers();
     mockCommands(discoverResponses());
-    render(<BrowseAllPane onSaved={() => {}} />);
+    render(<BrowseAllPane onSaved={() => {}} />, { wrapper: DownloadProvider });
     await act(async () => {
       vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
       await Promise.resolve();
@@ -243,7 +246,7 @@ describe('BrowseAllPane', () => {
   it('the All chip clears the query and is active by default', async () => {
     vi.useFakeTimers();
     mockCommands(discoverResponses());
-    render(<BrowseAllPane onSaved={() => {}} />);
+    render(<BrowseAllPane onSaved={() => {}} />, { wrapper: DownloadProvider });
     await act(async () => {
       vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
       await Promise.resolve();
@@ -618,7 +621,7 @@ describe('BrowseAllPane', () => {
       resolveSearch = res;
     });
     mockCommands(discoverResponses({ search_hf_models: pending }));
-    render(<BrowseAllPane onSaved={() => {}} />);
+    render(<BrowseAllPane onSaved={() => {}} />, { wrapper: DownloadProvider });
     await flush();
     expect(screen.getByText('Searching…')).toBeInTheDocument();
     await act(async () => {
@@ -645,7 +648,7 @@ describe('BrowseAllPane', () => {
         gated: false,
       }));
     mockCommands(discoverResponses({ search_hf_models: full(HF_PAGE_SIZE) }));
-    render(<BrowseAllPane onSaved={() => {}} />);
+    render(<BrowseAllPane onSaved={() => {}} />, { wrapper: DownloadProvider });
     await act(async () => {
       vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
       await Promise.resolve();
diff --git a/src/settings/tabs/models/BrowseAllPane.tsx b/src/settings/tabs/models/BrowseAllPane.tsx
index 20f32a6d..b3281b04 100644
--- a/src/settings/tabs/models/BrowseAllPane.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.tsx
@@ -14,11 +14,11 @@
  * batch. A finished install lifts a fresh config snapshot and collapses the row.
  */
 
-import { useCallback, useEffect, useState } from 'react';
+import { useCallback, useEffect, useRef, useState } from 'react';
 import { invoke } from '@tauri-apps/api/core';
 
 import { DownloadProgress } from '../../../components/DownloadProgress';
-import { useDownloadModel } from '../../../hooks/useDownloadModel';
+import { useDownloadCtx } from '../../../contexts/DownloadContext';
 import { useHfSearch } from './useHfSearch';
 import { Tooltip } from '../../../components/Tooltip';
 import { formatContextWindow } from '../../../utils/contextWindow';
@@ -160,15 +160,34 @@ interface BrowseAllRowProps {
  * row so two rows cannot share an in-flight download.
  */
 function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
-  const [expanded, setExpanded] = useState(false);
+  // The download machine lives at the app root (DownloadProvider), shared with
+  // every other row and pane, so a tab switch that unmounts Browse all never
+  // drops an in-flight download: the single-slot backend download outlives the
+  // pane. `activeDownload` names the repo + file that owns it.
+  const {
+    state,
+    progress,
+    etaSeconds,
+    startRepoDownload,
+    cancel,
+    retry,
+    reset,
+    activeDownload,
+    clearActiveDownload,
+  } = useDownloadCtx();
+
+  // The file this repo's row is currently downloading, or null when another row
+  // (or no download) owns the single in-flight slot. Drives which quant swaps to
+  // the live progress card and, on a remount after a tab switch, the re-expand.
+  const activeRepoFile =
+    activeDownload?.kind === 'repo' && activeDownload.repo === model.id
+      ? activeDownload.file
+      : null;
+  const ownsActiveDownload = activeRepoFile !== null;
+
+  const [expanded, setExpanded] = useState(ownsActiveDownload);
   const [files, setFiles] = useState<HfGgufFile[] | null>(null);
   const [listError, setListError] = useState<string | null>(null);
-  // The file the user pressed download on, so its row (and only its row) shows
-  // the in-flight progress while the rest of the quant list stays put.
-  const [activeFile, setActiveFile] = useState<string | null>(null);
-
-  const { state, progress, etaSeconds, startRepo, cancel, retry, reset } =
-    useDownloadModel();
 
   const org = orgOf(model.id);
 
@@ -185,6 +204,14 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
     }
   }, [model.id]);
 
+  // A remount that lands on the row owning the in-flight download (re-expanded
+  // above) loads its quant list once so the live progress shows again instead
+  // of staying behind a collapsed row. Fires only on mount.
+  const restoreActiveRow = useRef(ownsActiveDownload);
+  useEffect(() => {
+    if (restoreActiveRow.current) void loadFiles();
+  }, [loadFiles]);
+
   function toggle() {
     if (expanded) {
       setExpanded(false);
@@ -198,10 +225,11 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
     void invoke('open_url', { url: `${HF_BASE_URL}/${model.id}` });
   }
 
-  // A finished install: the backend already wrote the builtin provider's
-  // model field, so lift the fresh config snapshot and collapse the row.
+  // A finished install: the backend already wrote the builtin provider's model
+  // field, so lift the fresh config snapshot and collapse the row. The machine
+  // is shared across rows, so only the row that owns the download reacts.
   useEffect(() => {
-    if (state.phase !== 'ready') return;
+    if (state.phase !== 'ready' || !ownsActiveDownload) return;
     void (async () => {
       try {
         onSaved(await invoke<RawAppConfig>('get_config'));
@@ -209,9 +237,10 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
         // The focus-driven resync picks the change up on next activation.
       }
       reset();
+      clearActiveDownload();
       setExpanded(false);
     })();
-  }, [state.phase, onSaved, reset]);
+  }, [state.phase, ownsActiveDownload, onSaved, reset, clearActiveDownload]);
 
   // Silent re-read of the listing (no loading flash): the rows carry fresh
   // `partial_bytes`, so a file flips to/from its Paused state in place.
@@ -226,19 +255,29 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
     }
   }, [model.id]);
 
-  // Cancelling leaves the partial on disk; re-read the listing so the file
-  // flips straight to its Paused / Resume / Discard controls.
+  // Cancelling leaves the partial on disk; forget the active row and re-read the
+  // listing so the file flips straight to its Paused / Resume / Discard controls.
   async function cancelDownload() {
     await cancel();
+    clearActiveDownload();
     await refetchFiles();
   }
 
+  // A terminal card's exit (Choose a different model, and the unused confirm
+  // fallbacks): return to the quant list and forget the active row.
+  function returnToList() {
+    reset();
+    clearActiveDownload();
+  }
+
   async function discardFile(sha256: string) {
     await invoke('discard_partial_download', { sha256 });
     await refetchFiles();
   }
 
-  const showProgress = state.phase !== 'idle';
+  // True while ANY download runs (the engine handles one at a time): every other
+  // row's controls disable; the owning file swaps to the live progress card.
+  const anyInFlight = state.phase !== 'idle';
   // The context window is a per-repo property (the search carries it via
   // expand[]=gguf), so it shows on the collapsed row without expanding. Empty
   // when unknown, which skips it.
@@ -289,12 +328,12 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
           ) : null}
           {files !== null && files.length > 0
             ? files.map((f) => {
-                // Only the row whose file is downloading swaps its controls for
-                // the inline progress. A file with an interrupted partial reads
-                // as Paused with Resume / Discard; everything else is a normal,
-                // browsable quant. Resume and Discard are disabled while any
-                // download runs, since the engine handles one at a time.
-                const downloading = showProgress && activeFile === f.file;
+                // Only the file that owns the in-flight download swaps its
+                // controls for the inline progress. A file with an interrupted
+                // partial reads as Paused with Resume / Discard; everything else
+                // is a normal, browsable quant. Resume and Discard are disabled
+                // while any download runs, since the engine handles one at a time.
+                const downloading = anyInFlight && activeRepoFile === f.file;
                 const paused = !downloading && f.partial_bytes !== null;
                 const pausedPct =
                   f.partial_bytes !== null
@@ -315,13 +354,13 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
                         // (only the starter picker does), so the confirm card
                         // never renders; these required props point at the same
                         // covered handlers rather than dead no-op literals.
-                        onConfirm={reset}
-                        onCancelConfirm={reset}
+                        onConfirm={returnToList}
+                        onCancelConfirm={returnToList}
                         onCancel={() => void cancelDownload()}
                         onRetry={() => void retry()}
                         // A terminal failure must leave a path back to the quant
-                        // list, not just Retry; reset returns to the file rows.
-                        onChooseAnother={reset}
+                        // list, not just Retry; this returns to the file rows.
+                        onChooseAnother={returnToList}
                       />
                     ) : (
                       <>
@@ -345,11 +384,10 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
                             <button
                               type="button"
                               className={styles.quantResume}
-                              disabled={showProgress}
-                              onClick={() => {
-                                setActiveFile(f.file);
-                                void startRepo(model.id, f.file);
-                              }}
+                              disabled={anyInFlight}
+                              onClick={() =>
+                                startRepoDownload(model.id, f.file)
+                              }
                             >
                               Resume
                             </button>
@@ -357,7 +395,7 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
                               type="button"
                               className={styles.quantDiscard}
                               aria-label="Discard"
-                              disabled={showProgress}
+                              disabled={anyInFlight}
                               onClick={() => void discardFile(f.sha256)}
                             >
                               Discard
@@ -372,11 +410,10 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
                               type="button"
                               className={styles.quantGet}
                               aria-label="Download"
-                              disabled={showProgress}
-                              onClick={() => {
-                                setActiveFile(f.file);
-                                void startRepo(model.id, f.file);
-                              }}
+                              disabled={anyInFlight}
+                              onClick={() =>
+                                startRepoDownload(model.id, f.file)
+                              }
                             >
                               {DOWNLOAD_ICON}
                             </button>
diff --git a/src/settings/tabs/models/DiscoverPane.test.tsx b/src/settings/tabs/models/DiscoverPane.test.tsx
index 2bfb3333..74b878e0 100644
--- a/src/settings/tabs/models/DiscoverPane.test.tsx
+++ b/src/settings/tabs/models/DiscoverPane.test.tsx
@@ -5,13 +5,20 @@
  * own suites; here we only test the tab control and which pane it shows.
  */
 
-import { fireEvent, render, screen, waitFor } from '@testing-library/react';
+import {
+  act,
+  fireEvent,
+  render,
+  screen,
+  waitFor,
+} from '@testing-library/react';
 import { beforeEach, describe, expect, it, vi } from 'vitest';
 
 import { invoke } from '@tauri-apps/api/core';
 
 import { DiscoverPane } from './DiscoverPane';
 import { clearHfSearchCache } from './useHfSearch';
+import { DownloadProvider } from '../../../contexts/DownloadContext';
 import type { Starter, StarterOption } from '../../../types/starter';
 
 const invokeMock = invoke as unknown as ReturnType<typeof vi.fn>;
@@ -55,7 +62,9 @@ beforeEach(() => {
 });
 
 function renderHost() {
-  return render(<DiscoverPane onSaved={() => {}} />);
+  return render(<DiscoverPane onSaved={() => {}} />, {
+    wrapper: DownloadProvider,
+  });
 }
 
 /** Staff picks is showing when its curated use-case sections are on screen. */
@@ -122,3 +131,147 @@ describe('DiscoverPane host', () => {
     );
   });
 });
+
+describe('DiscoverPane download persistence', () => {
+  type MockChannel = { simulateMessage: (msg: unknown) => void };
+  let channel: MockChannel | null = null;
+
+  async function flush() {
+    await act(async () => {
+      await Promise.resolve();
+      await Promise.resolve();
+    });
+  }
+
+  beforeEach(() => {
+    invokeMock.mockReset();
+    clearHfSearchCache();
+    channel = null;
+    invokeMock.mockImplementation(
+      async (cmd: string, args?: Record<string, unknown>) => {
+        if (args && 'onEvent' in args) {
+          channel = args.onEvent as unknown as MockChannel;
+        }
+        if (cmd === 'get_staff_picks') return [STARTER];
+        if (cmd === 'search_hf_models') {
+          return [
+            {
+              id: 'google/gemma-4-12b-it-GGUF',
+              downloads: 1_200_000,
+              gated: false,
+            },
+          ];
+        }
+        if (cmd === 'list_hf_repo_ggufs') {
+          return [
+            {
+              file: 'gemma-q4.gguf',
+              size_bytes: 5_000_000_000,
+              fit: 'tight',
+              sha256: 'a'.repeat(64),
+              partial_bytes: null,
+            },
+          ];
+        }
+        return undefined;
+      },
+    );
+  });
+
+  // The bug: starting a download in Staff picks, switching to Browse all, then
+  // back drops the live progress (the pane owned a component-local download
+  // machine that died on unmount while the single-slot backend download kept
+  // running). The shared app-root machine must keep the progress alive.
+  it('keeps a live Staff-picks download visible across a Browse-all round trip', async () => {
+    render(<DiscoverPane onSaved={() => {}} />, { wrapper: DownloadProvider });
+    await waitFor(() => expect(staffPicksVisible()).toBe(true));
+
+    fireEvent.click(screen.getByRole('button', { name: 'Download' }));
+    await flush();
+    act(() =>
+      channel?.simulateMessage({
+        type: 'Started',
+        data: {
+          file: 'gemma.gguf',
+          total_bytes: 7_000_000_000,
+          resumed_from: 0,
+        },
+      }),
+    );
+    act(() =>
+      channel?.simulateMessage({
+        type: 'Progress',
+        data: {
+          file: 'gemma.gguf',
+          bytes: 2_520_000_000,
+          total_bytes: 7_000_000_000,
+        },
+      }),
+    );
+    expect(screen.getByTestId('download-figures')).toBeInTheDocument();
+
+    fireEvent.click(screen.getByRole('tab', { name: 'Browse all' }));
+    await waitFor(() => expect(browseAllVisible()).toBe(true));
+    fireEvent.click(screen.getByRole('tab', { name: 'Staff picks' }));
+    await waitFor(() => expect(staffPicksVisible()).toBe(true));
+
+    // Live progress is still on screen: no Paused row, no "already in progress".
+    expect(screen.getByTestId('download-figures')).toBeInTheDocument();
+    expect(screen.queryByText(/^Paused · /)).not.toBeInTheDocument();
+    expect(
+      screen.queryByText('a download is already in progress'),
+    ).not.toBeInTheDocument();
+  });
+
+  // The symmetric case for the advanced pathway: a Browse-all repo download must
+  // also survive a Staff-picks round trip, re-binding to the owning row (which
+  // re-expands) instead of resetting to a collapsed, idle row.
+  it('keeps a live Browse-all download visible across a Staff-picks round trip', async () => {
+    render(<DiscoverPane onSaved={() => {}} />, { wrapper: DownloadProvider });
+    await waitFor(() => expect(staffPicksVisible()).toBe(true));
+
+    fireEvent.click(screen.getByRole('tab', { name: 'Browse all' }));
+    await waitFor(() => expect(browseAllVisible()).toBe(true));
+    await waitFor(() =>
+      expect(
+        screen.getByText('google/gemma-4-12b-it-GGUF'),
+      ).toBeInTheDocument(),
+    );
+    fireEvent.click(screen.getByRole('button', { name: 'Show files' }));
+    await waitFor(() =>
+      expect(screen.getByText('gemma-q4.gguf')).toBeInTheDocument(),
+    );
+    fireEvent.click(screen.getByRole('button', { name: 'Download' }));
+    await flush();
+    act(() =>
+      channel?.simulateMessage({
+        type: 'Started',
+        data: {
+          file: 'gemma-q4.gguf',
+          total_bytes: 5_000_000_000,
+          resumed_from: 0,
+        },
+      }),
+    );
+    act(() =>
+      channel?.simulateMessage({
+        type: 'Progress',
+        data: {
+          file: 'gemma-q4.gguf',
+          bytes: 1_500_000_000,
+          total_bytes: 5_000_000_000,
+        },
+      }),
+    );
+    expect(screen.getByTestId('download-figures')).toBeInTheDocument();
+
+    fireEvent.click(screen.getByRole('tab', { name: 'Staff picks' }));
+    await waitFor(() => expect(staffPicksVisible()).toBe(true));
+    fireEvent.click(screen.getByRole('tab', { name: 'Browse all' }));
+    await waitFor(() => expect(browseAllVisible()).toBe(true));
+
+    await waitFor(() =>
+      expect(screen.getByTestId('download-figures')).toBeInTheDocument(),
+    );
+  });
+});
diff --git a/src/settings/tabs/models/StaffPicksPane.test.tsx b/src/settings/tabs/models/StaffPicksPane.test.tsx
index b89ec8af..3caaf7e7 100644
--- a/src/settings/tabs/models/StaffPicksPane.test.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.test.tsx
@@ -24,6 +24,7 @@ import { beforeEach, describe, expect, it, vi } from 'vitest';
 import { invoke } from '@tauri-apps/api/core';
 
 import { StaffPicksPane } from './StaffPicksPane';
+import { DownloadProvider } from '../../../contexts/DownloadContext';
 import type { RawAppConfig } from '../../types';
 import type { Starter, StarterOption } from '../../../types/starter';
 
@@ -160,7 +161,9 @@ async function renderPane(
   overrides: Record<string, unknown> = {},
 ) {
   mockCommands(picksResponses(overrides));
-  const view = render(<StaffPicksPane onSaved={onSaved} />);
+  const view = render(<StaffPicksPane onSaved={onSaved} />, {
+    wrapper: DownloadProvider,
+  });
   await waitFor(() =>
     expect(invokeMock).toHaveBeenCalledWith('get_staff_picks'),
   );
@@ -277,6 +280,20 @@ describe('StaffPicksPane', () => {
     );
   });
 
+  it('disables other rows while one model is downloading', async () => {
+    await renderPane();
+    fireEvent.click(
+      within(rowFor('Gemma 4 12B')).getByRole('button', { name: 'Download' }),
+    );
+    await flush();
+    // The active row shows progress; the other rows' Download buttons disable so
+    // a second click cannot collide with the single backend download slot and
+    // surface "a download is already in progress".
+    expect(
+      within(rowFor('Qwen3.5 9B')).getByRole('button', { name: 'Download' }),
+    ).toBeDisabled();
+  });
+
   it('lifts a fresh config and refreshes when a download completes', async () => {
     const onSaved = vi.fn();
     await renderPane(onSaved);
diff --git a/src/settings/tabs/models/StaffPicksPane.tsx b/src/settings/tabs/models/StaffPicksPane.tsx
index b0fce61a..c83977a6 100644
--- a/src/settings/tabs/models/StaffPicksPane.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.tsx
@@ -20,11 +20,11 @@
  * too); `activeId` tracks which row owns the progress card.
  */
 
-import { useEffect, useMemo, useState } from 'react';
+import { useEffect, useMemo } from 'react';
 import { invoke } from '@tauri-apps/api/core';
 
 import { DownloadProgress } from '../../../components/DownloadProgress';
-import { useDownloadModel } from '../../../hooks/useDownloadModel';
+import { useDownloadCtx } from '../../../contexts/DownloadContext';
 import { useStaffPicks } from '../../../components/StarterPicker';
 import { Tooltip } from '../../../components/Tooltip';
 import { formatContextWindow } from '../../../utils/contextWindow';
@@ -100,20 +100,26 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
   const { options, refresh } = useStaffPicks();
   const sections = useMemo(() => groupByCategory(options ?? []), [options]);
 
-  // One download at a time; activeId names the row that owns the progress card.
-  const [activeId, setActiveId] = useState<string | null>(null);
+  // The download machine lives at the app root (DownloadProvider) so a Staff
+  // picks / Browse all tab switch, which unmounts this pane, never drops an
+  // in-flight download: the single-slot backend download outlives the pane, so
+  // the frontend view of it must too. `activeDownload` names the row that owns
+  // the shared progress card; it survives the remount and re-binds the row to
+  // the live progress instead of re-reading the partial as a stale "Paused".
   const {
     state,
     progress,
     etaSeconds,
     combinedBytes,
     speedBytesPerSec,
-    startById,
+    startStaffPick,
     cancel,
     retry,
     reset,
     discard,
-  } = useDownloadModel();
+    activeDownload,
+    clearActiveDownload,
+  } = useDownloadCtx();
 
   // A finished install (phase 'ready') lifts the fresh config, clears the
   // active row, and refreshes the rows so the new model flips to Installed.
@@ -127,36 +133,36 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
         // The focus-driven resync picks the change up on next activation.
       }
       reset();
-      setActiveId(null);
+      clearActiveDownload();
       await refresh();
     })();
-  }, [state.phase, onSaved, reset, refresh]);
-
-  // Download and resume both run the same id-keyed verified path; the backend
-  // resumes from a kept partial via Range, so resume is just starting again.
-  function startDownload(id: string) {
-    setActiveId(id);
-    void startById(id);
-  }
+  }, [state.phase, onSaved, reset, clearActiveDownload, refresh]);
 
   async function discardPartial(sha256: string) {
     await discard(sha256);
     await refresh();
   }
 
-  // Cancelling leaves the partial on disk; re-read the options so the row flips
-  // straight to its Paused/Resume state instead of snapping back to a fresh
-  // download until the next remount.
+  // Cancelling leaves the partial on disk; forget the active row and re-read the
+  // options so the row flips straight to its Paused/Resume state instead of
+  // snapping back to a fresh download until the next remount.
   async function cancelDownload() {
     await cancel();
+    clearActiveDownload();
     await refresh();
   }
 
   function returnToPicker() {
     reset();
-    setActiveId(null);
+    clearActiveDownload();
   }
 
+  // The engine downloads one model at a time, so while any download is in flight
+  // every other row's Download / Resume / Discard is disabled: the owning row
+  // shows the progress card, the rest wait rather than colliding with the
+  // single backend slot and surfacing "a download is already in progress".
+  const anyInFlight = state.phase !== 'idle';
+
   if (options !== null && sections.length === 0) {
     return (
       <div className={styles.pane}>
@@ -176,14 +182,18 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
             <ModelRow
               key={o.starter.id}
               option={o}
-              active={activeId === o.starter.id}
+              active={
+                activeDownload?.kind === 'staff' &&
+                activeDownload.id === o.starter.id
+              }
+              anyInFlight={anyInFlight}
               state={state}
               progress={progress}
               etaSeconds={etaSeconds}
               combinedBytes={combinedBytes}
               speedBytesPerSec={speedBytesPerSec}
-              onDownload={startDownload}
-              onResume={startDownload}
+              onDownload={startStaffPick}
+              onResume={startStaffPick}
               onDiscard={discardPartial}
               onCancel={() => void cancelDownload()}
               onRetry={() => void retry()}
@@ -199,8 +209,10 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
 interface ModelRowProps {
   option: StaffPickOption;
   active: boolean;
-  state: ReturnType<typeof useDownloadModel>['state'];
-  progress: ReturnType<typeof useDownloadModel>['progress'];
+  /** True while any model is downloading, so this row's actions are disabled. */
+  anyInFlight: boolean;
+  state: ReturnType<typeof useDownloadCtx>['state'];
+  progress: ReturnType<typeof useDownloadCtx>['progress'];
   etaSeconds: number | null;
   combinedBytes: number | null;
   speedBytesPerSec: number | null;
@@ -215,6 +227,7 @@ interface ModelRowProps {
 function ModelRow({
   option,
   active,
+  anyInFlight,
   state,
   progress,
   etaSeconds,
@@ -281,6 +294,7 @@ function ModelRow({
                 <button
                   type="button"
                   className={styles.resumeBtn}
+                  disabled={anyInFlight}
                   onClick={() => onResume(starter.id)}
                 >
                   Resume
@@ -289,6 +303,7 @@ function ModelRow({
                   type="button"
                   className={styles.discardBtn}
                   aria-label="Discard"
+                  disabled={anyInFlight}
                   onClick={() => onDiscard(starter.sha256)}
                 >
                   Discard
@@ -298,6 +313,7 @@ function ModelRow({
               <RowAction
                 option={option}
                 installed={installed}
+                anyInFlight={anyInFlight}
                 onDownload={onDownload}
               />
             )}
@@ -331,6 +347,8 @@ function ModelRow({
 interface RowActionProps {
   option: StaffPickOption;
   installed: boolean;
+  /** True while any model is downloading, so the download button is disabled. */
+  anyInFlight: boolean;
   onDownload: (id: string) => void;
 }
 
@@ -345,7 +363,12 @@ const DOWNLOAD_ICON = (
  * surface the absence of a download is the signal. The interrupted-partial
  * resume/discard pair is owned by the row itself; this renders the plain icon
  * download button otherwise. */
-function RowAction({ option, installed, onDownload }: RowActionProps) {
+function RowAction({
+  option,
+  installed,
+  anyInFlight,
+  onDownload,
+}: RowActionProps) {
   const { starter } = option;
 
   if (installed) {
@@ -357,6 +380,7 @@ function RowAction({ option, installed, onDownload }: RowActionProps) {
       type="button"
       className={styles.getBtn}
       aria-label="Download"
+      disabled={anyInFlight}
       onClick={() => onDownload(starter.id)}
     >
       {DOWNLOAD_ICON}
diff --git a/src/settings/tabs/tabs.test.tsx b/src/settings/tabs/tabs.test.tsx
index 353df96e..bd8c28e2 100644
--- a/src/settings/tabs/tabs.test.tsx
+++ b/src/settings/tabs/tabs.test.tsx
@@ -22,6 +22,7 @@ import { invoke } from '@tauri-apps/api/core';
 import { clearEventHandlers } from '../../testUtils/mocks/tauri';
 
 import { ModelTab } from './ModelTab';
+import { DownloadProvider } from '../../contexts/DownloadContext';
 import { DisplayTab } from './DisplayTab';
 import { SearchTab } from './SearchTab';
 import { AboutTab } from './AboutTab';
@@ -131,6 +132,7 @@ afterEach(() => {
 async function renderModelTab() {
   const view = render(
     <ModelTab config={CONFIG} resyncToken={0} onSaved={() => {}} />,
+    { wrapper: DownloadProvider },
   );
   await act(async () => {
     await Promise.resolve();
@@ -182,6 +184,7 @@ describe('ModelTab (router)', () => {
     };
     render(
       <ModelTab config={builtinActive} resyncToken={0} onSaved={() => {}} />,
+      { wrapper: DownloadProvider },
     );
     await act(async () => {
       await Promise.resolve();

From f5c94c8cc108d8375b60d2314dcd967f7365e1bf Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 18:42:16 -0500
Subject: [PATCH 67/89] fix: flip the Library model menu above its trigger when
 space is tight

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 .../tabs/models/LibraryPane.module.css        |  8 +++++
 src/settings/tabs/models/LibraryPane.test.tsx | 19 +++++++++++
 src/settings/tabs/models/LibraryPane.tsx      | 33 ++++++++++++++++---
 3 files changed, 56 insertions(+), 4 deletions(-)

diff --git a/src/settings/tabs/models/LibraryPane.module.css b/src/settings/tabs/models/LibraryPane.module.css
index 56195c8b..154385a8 100644
--- a/src/settings/tabs/models/LibraryPane.module.css
+++ b/src/settings/tabs/models/LibraryPane.module.css
@@ -215,6 +215,14 @@
   animation: menuIn 130ms cubic-bezier(0.2, 0.8, 0.3, 1);
 }
 
+/* Flip above the trigger when a downward menu would be clipped by the
+ * auto-sized window's hidden overflow (set by the pane for tight bottom rows). */
+.menu[data-side='top'] {
+  top: auto;
+  bottom: 36px;
+  transform-origin: bottom right;
+}
+
 .menuItem {
   display: flex;
   align-items: center;
diff --git a/src/settings/tabs/models/LibraryPane.test.tsx b/src/settings/tabs/models/LibraryPane.test.tsx
index 2f778b91..939203cf 100644
--- a/src/settings/tabs/models/LibraryPane.test.tsx
+++ b/src/settings/tabs/models/LibraryPane.test.tsx
@@ -478,6 +478,25 @@ describe('LibraryPane', () => {
     ).toBeInTheDocument();
   });
 
+  it('drops the popover below the trigger when there is room beneath it', async () => {
+    mockCommands(libraryResponses());
+    await renderPane();
+    openMenu('gemma');
+    expect(screen.getByRole('menu')).toHaveAttribute('data-side', 'bottom');
+  });
+
+  it('flips the popover above the trigger when the space below is tight', async () => {
+    mockCommands(libraryResponses());
+    await renderPane();
+    const manage = screen.getByRole('button', { name: 'Manage qwen' });
+    // Simulate the trigger sitting near the window's bottom edge, where a
+    // downward menu would be clipped by the Settings window's hidden overflow.
+    manage.getBoundingClientRect = () =>
+      ({ bottom: window.innerHeight - 8 }) as unknown as DOMRect;
+    fireEvent.click(manage);
+    expect(screen.getByRole('menu')).toHaveAttribute('data-side', 'top');
+  });
+
   it('toggles the popover closed when its own button is clicked again', async () => {
     mockCommands(libraryResponses());
     await renderPane();
diff --git a/src/settings/tabs/models/LibraryPane.tsx b/src/settings/tabs/models/LibraryPane.tsx
index 531d99ce..ff882e8c 100644
--- a/src/settings/tabs/models/LibraryPane.tsx
+++ b/src/settings/tabs/models/LibraryPane.tsx
@@ -25,6 +25,14 @@ import type { InstalledModel, RamFit } from '../../../types/starter';
 
 const HF_BASE_URL = 'https://huggingface.co';
 
+/**
+ * Approximate height (px) the popover needs below the ⋮ trigger. When the space
+ * beneath it is tighter than this, the menu flips above the button so it is
+ * never clipped: the Settings window auto-hugs its content and `.body` hides
+ * overflow, so a downward menu on the last row would spill past the window.
+ */
+const MENU_DROP_ESTIMATE_PX = 210;
+
 /** RAM-fit hint colour class on this pane's stylesheet (labels are shared). */
 const FIT_CLASS: Record<RamFit, string> = {
   fits: styles.fitOk,
@@ -73,6 +81,7 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
 
   const [installed, setInstalled] = useState<InstalledModel[]>([]);
   const [openMenu, setOpenMenu] = useState<string | null>(null);
+  const [menuDropUp, setMenuDropUp] = useState(false);
   const [confirmDelete, setConfirmDelete] = useState<string | null>(null);
   const [deleteError, setDeleteError] = useState<string | null>(null);
 
@@ -111,6 +120,20 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
     };
   }, [openMenu]);
 
+  // Open the popover for `id` (or close it if already open). On open, flip the
+  // menu above the trigger when there is not enough room below: rows near the
+  // window's bottom edge would otherwise be clipped by the hidden body overflow.
+  function toggleMenu(id: string, trigger: HTMLElement) {
+    if (openMenu === id) {
+      setOpenMenu(null);
+      return;
+    }
+    const spaceBelow =
+      window.innerHeight - trigger.getBoundingClientRect().bottom;
+    setMenuDropUp(spaceBelow < MENU_DROP_ESTIMATE_PX);
+    setOpenMenu(id);
+  }
+
   // The backend writes the builtin provider's model field; lift the fresh
   // snapshot so the active row moves without a tab remount.
   function selectModel(id: string) {
@@ -245,14 +268,16 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
                         aria-label={`Manage ${m.display_name}`}
                         aria-haspopup="menu"
                         aria-expanded={openMenu === m.id}
-                        onClick={() =>
-                          setOpenMenu((cur) => (cur === m.id ? null : m.id))
-                        }
+                        onClick={(e) => toggleMenu(m.id, e.currentTarget)}
                       >
                         ⋮
                       </button>
                       {openMenu === m.id ? (
-                        <div className={styles.menu} role="menu">
+                        <div
+                          className={styles.menu}
+                          role="menu"
+                          data-side={menuDropUp ? 'top' : 'bottom'}
+                        >
                           {active ? null : (
                             <button
                               type="button"

From dcea71c97711c6834c90402a49a70b7180098eaa Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 18:42:16 -0500
Subject: [PATCH 68/89] fix: name the built-in engine's actual resident model
 in keep-warm status

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/warmup.rs                       | 83 ++++++++++++++-----
 .../tabs/models/ProvidersPane.test.tsx        | 26 ++++--
 src/settings/tabs/models/ProvidersPane.tsx    | 55 ++++++------
 3 files changed, 108 insertions(+), 56 deletions(-)

diff --git a/src-tauri/src/warmup.rs b/src-tauri/src/warmup.rs
index 857fad27..772d11f7 100644
--- a/src-tauri/src/warmup.rs
+++ b/src-tauri/src/warmup.rs
@@ -169,18 +169,27 @@ pub(crate) async fn evict_builtin(engine: &crate::engine::runner::EngineHandle)
     engine.unload().await;
 }
 
-/// Built-in arm of `get_loaded_model`: the provider's configured model id
-/// when the engine status watch reports a loaded model, `None` otherwise
-/// (including when no model has been picked yet).
+/// Built-in arm of `get_loaded_model`: the display name of the model the engine
+/// is *actually* serving, resolved from the live status's `model_path` against
+/// `installed` (each entry a `(display_name, weights blob path)` pair), or
+/// `None` when the engine is not loaded or the resident blob matches no row.
+///
+/// This reads true VRAM residency, never the frontend-selected model: switching
+/// the active model rewrites config immediately, but the sidecar keeps serving
+/// the previous model until a reload, so the configured id would misreport what
+/// occupies memory.
 pub(crate) fn builtin_loaded_model(
     status: &crate::engine::runner::EngineStatus,
-    model_id: &str,
+    installed: &[(String, std::path::PathBuf)],
 ) -> Option<String> {
-    if status.state == "loaded" && !model_id.is_empty() {
-        Some(model_id.to_string())
-    } else {
-        None
-    }
+    if status.state != "loaded" || status.model_path.is_empty() {
+        return None;
+    }
+    let resident = std::path::Path::new(&status.model_path);
+    installed
+        .iter()
+        .find(|(_, path)| path.as_path() == resident)
+        .map(|(name, _)| name.clone())
 }
 
 impl Default for WarmupState {
@@ -408,13 +417,28 @@ pub async fn get_loaded_model(
     config: tauri::State<'_, parking_lot::RwLock<crate::config::AppConfig>>,
     client: tauri::State<'_, reqwest::Client>,
     engine: tauri::State<'_, crate::engine::runner::EngineHandle>,
+    db: tauri::State<'_, crate::history::Database>,
+    store: tauri::State<'_, crate::models::storage::ModelStore>,
 ) -> Result<Option<String>, String> {
     let kind = config.read().inference.active_provider_kind().to_string();
     match kind.as_str() {
         PROVIDER_KIND_BUILTIN => {
-            let model_id = config.read().inference.active_provider_model().to_string();
             let status = engine.status().borrow().clone();
-            Ok(builtin_loaded_model(&status, &model_id))
+            // Resolve the engine's resident blob back to its installed name. A
+            // poisoned lock is recovered: an unrelated panic must not blind the
+            // residency line.
+            let installed = {
+                let conn = match db.0.lock() {
+                    Ok(conn) => conn,
+                    Err(poisoned) => poisoned.into_inner(),
+                };
+                crate::models::manifest::list(&conn)
+                    .unwrap_or_default()
+                    .into_iter()
+                    .map(|m| (m.display_name, store.blob_path(&m.sha256)))
+                    .collect::<Vec<_>>()
+            };
+            Ok(builtin_loaded_model(&status, &installed))
         }
         PROVIDER_KIND_OLLAMA => {
             let model = models.0.lock().ok().and_then(|g| g.clone());
@@ -1569,20 +1593,39 @@ mod tests {
     }
 
     #[test]
-    fn get_loaded_model_builtin_from_status() {
+    fn builtin_loaded_model_names_the_resident_blob_not_the_selection() {
+        use std::path::PathBuf;
+        let resident = PathBuf::from("/blobs/sha_mistral");
+        let installed = vec![
+            ("Gemma 4 12B".to_string(), PathBuf::from("/blobs/sha_gemma")),
+            ("Mistral Nemo 12B".to_string(), resident.clone()),
+        ];
+
+        // Loaded: the engine is serving the Mistral blob, so the resident model
+        // is named from the live `model_path`, independent of any selection.
+        let mut loaded = engine_status("loaded", Some(40123));
+        loaded.model_path = resident.display().to_string();
         assert_eq!(
-            builtin_loaded_model(&engine_status("loaded", Some(40123)), "org/repo:m.gguf"),
-            Some("org/repo:m.gguf".to_string())
+            builtin_loaded_model(&loaded, &installed),
+            Some("Mistral Nemo 12B".to_string())
         );
+
+        // Not loaded: nothing is resident even if a path lingers in the status.
+        let mut stopped = engine_status("stopped", None);
+        stopped.model_path = resident.display().to_string();
+        assert_eq!(builtin_loaded_model(&stopped, &installed), None);
+
+        // Loaded but the resident blob matches no installed row: report nothing
+        // rather than guessing a name.
+        let mut orphan = engine_status("loaded", Some(40123));
+        orphan.model_path = "/blobs/sha_unknown".to_string();
+        assert_eq!(builtin_loaded_model(&orphan, &installed), None);
+
+        // Loaded with an empty path (defensive): nothing to name.
         assert_eq!(
-            builtin_loaded_model(&engine_status("stopped", None), "org/repo:m.gguf"),
+            builtin_loaded_model(&engine_status("loaded", Some(40123)), &installed),
             None
         );
-        assert_eq!(
-            builtin_loaded_model(&engine_status("loaded", Some(40123)), ""),
-            None,
-            "no picked model means nothing to report even while loaded"
-        );
     }
 
     // ── evict_builtin against a scripted engine ──────────────────────────────
diff --git a/src/settings/tabs/models/ProvidersPane.test.tsx b/src/settings/tabs/models/ProvidersPane.test.tsx
index d8fd1a3e..b9683127 100644
--- a/src/settings/tabs/models/ProvidersPane.test.tsx
+++ b/src/settings/tabs/models/ProvidersPane.test.tsx
@@ -640,14 +640,20 @@ describe('ProvidersPane generation', () => {
     expect(input).toHaveValue(0);
   });
 
-  it('names the resident built-in model in VRAM and enables Unload when loaded', () => {
+  it('names the model the engine is actually serving, not the selected one', () => {
+    // The selection is Qwen, but the engine is still serving Mistral: switching
+    // the active model does not reload the sidecar, so the label must follow
+    // what the backend reports as resident, never the frontend selection.
     mockInvoke({
       get_engine_status: engineStatus('loaded'),
+      get_loaded_model: 'Mistral Nemo 12B',
       list_installed_models: INSTALLED,
     });
     renderPane(makeConfig('builtin', [BUILTIN_LOADED, OLLAMA]));
     return waitFor(() => {
-      expect(screen.getByText('Qwen3.5 9B in VRAM')).toBeInTheDocument();
+      expect(screen.getByText('Mistral Nemo 12B in VRAM')).toBeInTheDocument();
+      // The selected (but not-yet-resident) model is never shown as resident.
+      expect(screen.queryByText('Qwen3.5 9B in VRAM')).not.toBeInTheDocument();
       expect(screen.getByRole('button', { name: 'Unload now' })).toBeEnabled();
     });
   });
@@ -827,16 +833,26 @@ describe('ProvidersPane robustness', () => {
     expect(screen.queryByText(/installed model/)).toBeNull();
   });
 
-  it('reflects the engine:status event stream for the built-in engine', async () => {
-    mockInvoke({ list_installed_models: INSTALLED });
+  it('refreshes the resident built-in model when an engine:status event arrives', async () => {
+    // Mount with nothing resident yet.
+    mockInvoke({ list_installed_models: INSTALLED, get_loaded_model: null });
     renderPane(makeConfig('builtin', [BUILTIN_LOADED, OLLAMA]));
     await act(async () => {
       await Promise.resolve();
     });
+    expect(screen.getByText('No model loaded')).toBeInTheDocument();
+    // The engine finishes loading: the status event drives a fresh backend read
+    // that names the now-resident model.
+    mockInvoke({
+      list_installed_models: INSTALLED,
+      get_loaded_model: 'Qwen3.5 9B',
+    });
     await act(async () => {
       emitTauriEvent('engine:status', engineStatus('loaded'));
     });
-    expect(screen.getByText('Qwen3.5 9B in VRAM')).toBeInTheDocument();
+    await waitFor(() =>
+      expect(screen.getByText('Qwen3.5 9B in VRAM')).toBeInTheDocument(),
+    );
   });
 
   it('falls back to the first Ollama model when the active one is not listed', async () => {
diff --git a/src/settings/tabs/models/ProvidersPane.tsx b/src/settings/tabs/models/ProvidersPane.tsx
index dce5b836..ccb92d0d 100644
--- a/src/settings/tabs/models/ProvidersPane.tsx
+++ b/src/settings/tabs/models/ProvidersPane.tsx
@@ -118,20 +118,27 @@ export function ProvidersPane({
       .catch(() => setInstalled([]));
   }, [builtinModelId]);
 
-  // Engine lifecycle + Ollama VRAM residency for the keep-warm status line.
+  // Engine lifecycle + the active provider's resident model, for the keep-warm
+  // status line.
   const [engineState, setEngineState] =
     useState<EngineStatus['state']>('stopped');
   const [loadedModel, setLoadedModel] = useState<string | null>(null);
   useEffect(() => {
+    // Re-reads which model the active provider actually has resident. The
+    // built-in engine names it from its loaded blob, so this must be re-run on
+    // every engine transition rather than derived from the frontend selection.
+    const refreshLoaded = () =>
+      void invoke<string | null>('get_loaded_model')
+        .then(setLoadedModel)
+        .catch(() => {});
     invoke<EngineStatus>('get_engine_status')
       .then((s) => setEngineState(s.state))
       .catch(() => {});
-    invoke<string | null>('get_loaded_model')
-      .then(setLoadedModel)
-      .catch(() => {});
-    const unlistenStatus = listen<EngineStatus>('engine:status', (e) =>
-      setEngineState(e.payload.state),
-    );
+    refreshLoaded();
+    const unlistenStatus = listen<EngineStatus>('engine:status', (e) => {
+      setEngineState(e.payload.state);
+      refreshLoaded();
+    });
     const unlistenLoaded = listen<string>('warmup:model-loaded', (e) =>
       setLoadedModel(e.payload),
     );
@@ -291,32 +298,18 @@ export function ProvidersPane({
 
   const fillPct = `${ctxPos / 10}%`;
 
-  // Keep-warm live status: the text shown beside the name.
-  // Friendly name of the selected built-in model, for the residency line
-  // (matching the built-in model dropdown below).
-  const builtinDisplayName =
-    installed.find((m) => m.id === builtinModelId)?.display_name ?? '';
-  // Built-in residency reads like Ollama's wording, driven by the live engine
-  // state (the warmup:* events only fire for Ollama): loaded → "<model> in
-  // VRAM", starting → "Loading…", otherwise → "No model loaded". "loaded"
-  // means the llama-server sidecar is up and serving.
-  let builtinResidency: string;
-  if (engineState === 'loaded') {
-    builtinResidency =
-      builtinDisplayName !== ''
-        ? `${builtinDisplayName} in VRAM`
-        : 'No model loaded';
-  } else if (engineState === 'starting') {
-    builtinResidency = 'Loading…';
+  // Keep-warm live status: the text shown beside the name. `loadedModel` is the
+  // display name of the model the active provider actually has resident (the
+  // built-in engine's loaded blob, or Ollama's /api/ps), never the frontend
+  // selection. While the built-in engine is mid-load it reports "Loading…".
+  let warmStatusText: string;
+  if (loadedModel) {
+    warmStatusText = `${loadedModel} in VRAM`;
+  } else if (activeKind === 'builtin' && engineState === 'starting') {
+    warmStatusText = 'Loading…';
   } else {
-    builtinResidency = 'No model loaded';
+    warmStatusText = 'No model loaded';
   }
-  const warmStatusText =
-    activeKind === 'builtin'
-      ? builtinResidency
-      : loadedModel !== null
-        ? `${loadedModel} in VRAM`
-        : 'No model loaded';
 
   // The active Ollama model value, constrained to the installed list.
   const ollamaModelValue =

From 7f47e2fb068180d4f6cb40c0fe86ddf18896b805 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 19:12:47 -0500
Subject: [PATCH 69/89] feat: download multiple models in parallel from
 Settings Discover

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/models/mod.rs                   | 176 +++++---
 src-tauri/src/models/registry.rs              |  20 +
 src/__tests__/App.test.tsx                    |   4 -
 src/contexts/DownloadContext.tsx              |  65 +--
 src/contexts/DownloadsContext.tsx             | 245 +++++++++++
 .../__tests__/DownloadContext.test.tsx        |  60 +--
 .../__tests__/DownloadsContext.test.tsx       | 225 ++++++++++
 src/hooks/__tests__/useDownloadModel.test.tsx |  11 +-
 src/hooks/downloadKey.ts                      |  32 ++
 src/hooks/downloadReducer.ts                  | 288 +++++++++++++
 src/hooks/useDownloadModel.ts                 | 390 +++++-------------
 src/settings/SettingsWindow.tsx               |  18 +-
 .../tabs/models/BrowseAllPane.test.tsx        |  53 ++-
 src/settings/tabs/models/BrowseAllPane.tsx    | 339 ++++++++-------
 .../tabs/models/DiscoverPane.test.tsx         |   8 +-
 .../tabs/models/StaffPicksPane.test.tsx       |  28 +-
 src/settings/tabs/models/StaffPicksPane.tsx   | 230 ++++-------
 src/settings/tabs/tabs.test.tsx               |   6 +-
 .../__tests__/ModelCheckStep.test.tsx         |   8 +-
 19 files changed, 1379 insertions(+), 827 deletions(-)
 create mode 100644 src/contexts/DownloadsContext.tsx
 create mode 100644 src/contexts/__tests__/DownloadsContext.test.tsx
 create mode 100644 src/hooks/downloadKey.ts
 create mode 100644 src/hooks/downloadReducer.ts

diff --git a/src-tauri/src/models/mod.rs b/src-tauri/src/models/mod.rs
index 18431f8a..4126b99b 100644
--- a/src-tauri/src/models/mod.rs
+++ b/src-tauri/src/models/mod.rs
@@ -1131,48 +1131,65 @@ async fn reconcile_capabilities(
 /// Stable error returned when a repo id fails [`is_valid_repo_id`].
 const INVALID_REPO_ID_ERR: &str = "invalid Hugging Face repo id";
 
-/// Cancellation handle for the (at most one) in-flight model download.
-/// `Some` while a download is running; `None` otherwise. Claimed atomically
-/// via [`claim_download`] so a second download cannot start until the first
-/// completes, fails, or is cancelled.
+/// Cancellation handles for the in-flight model downloads, keyed by the
+/// caller-supplied download key (the frontend's stable per-row identity, e.g. a
+/// Staff Picks id or `repo\0file`). Empty when nothing is downloading. Distinct
+/// keys download concurrently; a duplicate key is rejected via [`claim_download`]
+/// so the same row cannot start twice.
+///
+/// Parallelism never corrupts the content-addressed blob store, and this map is
+/// not what protects it: [`download::run_download`] verifies each blob's sha256
+/// before renaming its partial into the store, and [`download::download_one`]
+/// skips a blob whose final file already exists. With per-key dedupe here and
+/// the distinct blob shas the registry guarantees (asserted in
+/// `registry::tests`), no two concurrent downloads target the same blob, so no
+/// per-blob lock is needed.
 #[derive(Default)]
-pub struct DownloadState(pub std::sync::Mutex<Option<tokio_util::sync::CancellationToken>>);
+pub struct DownloadState(
+    pub std::sync::Mutex<std::collections::HashMap<String, tokio_util::sync::CancellationToken>>,
+);
 
-/// Atomically claims the single download slot. Returns a fresh cancellation
-/// token on success; an error when another download already holds the slot
-/// (or the lock is poisoned).
+/// Atomically claims a download slot for `key`. Returns a fresh cancellation
+/// token on success; an error when `key` already has an in-flight download (or
+/// the lock is poisoned).
 pub fn claim_download(
     state: &DownloadState,
+    key: &str,
 ) -> Result<tokio_util::sync::CancellationToken, String> {
     let mut guard = state.0.lock().map_err(|e| e.to_string())?;
-    if guard.is_some() {
+    if guard.contains_key(key) {
         return Err("a download is already in progress".to_string());
     }
     let token = tokio_util::sync::CancellationToken::new();
-    *guard = Some(token.clone());
+    guard.insert(key.to_string(), token.clone());
     Ok(token)
 }
 
-/// Clears the download slot. Best-effort: a poisoned lock is ignored because
-/// release runs on the task teardown path where there is nothing left to do.
-pub fn release_download(state: &DownloadState) {
+/// Releases the slot held by `key`. Best-effort: a poisoned lock is ignored
+/// because release runs on the task teardown path where there is nothing left
+/// to do.
+pub fn release_download(state: &DownloadState, key: &str) {
     if let Ok(mut guard) = state.0.lock() {
-        *guard = None;
+        guard.remove(key);
     }
 }
 
-/// True while a model download holds the slot. Read before quitting so the app
-/// can warn that quitting discards the in-flight download.
+/// True while any model download holds a slot. Read before quitting so the app
+/// can warn that quitting discards the in-flight download(s).
 pub fn download_in_flight(state: &DownloadState) -> bool {
-    state.0.lock().map(|guard| guard.is_some()).unwrap_or(false)
+    state
+        .0
+        .lock()
+        .map(|guard| !guard.is_empty())
+        .unwrap_or(false)
 }
 
-/// Cancels the in-flight download's token, if one is claimed. Does NOT clear
-/// the slot: the download task notices the cancellation, emits `Cancelled`,
-/// and releases the slot itself.
-pub fn cancel_active_download(state: &DownloadState) {
+/// Cancels the download held by `key`, if one is in flight. Does NOT remove the
+/// slot: the download task notices the cancellation, emits `Cancelled`, and
+/// releases its own slot. A missing key is a harmless no-op.
+pub fn cancel_download(state: &DownloadState, key: &str) {
     if let Ok(guard) = state.0.lock() {
-        if let Some(token) = guard.as_ref() {
+        if let Some(token) = guard.get(key) {
             token.cancel();
         }
     }
@@ -2161,7 +2178,7 @@ pub fn delete_installed_model_inner(
     builtin_model: &str,
 ) -> Result<DeleteOutcome, String> {
     let guard = state.0.lock().map_err(|e| e.to_string())?;
-    if guard.is_some() {
+    if !guard.is_empty() {
         return Err("a download is already in progress".to_string());
     }
     let orphans = manifest::delete(conn, id).map_err(|e| e.to_string())?;
@@ -2185,7 +2202,7 @@ pub fn discard_partial_inner(
         return Err("invalid sha256".to_string());
     }
     let guard = state.0.lock().map_err(|e| e.to_string())?;
-    if guard.is_some() {
+    if !guard.is_empty() {
         return Err("a download is already in progress".to_string());
     }
     match std::fs::remove_file(store.partial_path(sha256)) {
@@ -2273,16 +2290,18 @@ pub fn get_models_dir_free_bytes(store: tauri::State<'_, storage::ModelStore>) -
 #[cfg_attr(not(coverage), tauri::command)]
 pub fn download_starter(
     tier: String,
+    key: String,
     on_event: tauri::ipc::Channel<download::DownloadEvent>,
     app: tauri::AppHandle,
     download_state: tauri::State<'_, DownloadState>,
 ) -> Result<(), String> {
     let starter = starter_for_tier(&tier)?;
-    let token = claim_download(&download_state)?;
+    let token = claim_download(&download_state, &key)?;
     spawn_model_download(
         app,
         registry::download_specs(starter),
         registry::to_installed_model(starter),
+        key,
         token,
         on_event,
     );
@@ -2297,16 +2316,18 @@ pub fn download_starter(
 #[cfg_attr(not(coverage), tauri::command)]
 pub fn download_staff_pick(
     id: String,
+    key: String,
     on_event: tauri::ipc::Channel<download::DownloadEvent>,
     app: tauri::AppHandle,
     download_state: tauri::State<'_, DownloadState>,
 ) -> Result<(), String> {
     let starter = starter_for_id(&id)?;
-    let token = claim_download(&download_state)?;
+    let token = claim_download(&download_state, &key)?;
     spawn_model_download(
         app,
         registry::download_specs(starter),
         registry::to_installed_model(starter),
+        key,
         token,
         on_event,
     );
@@ -2320,17 +2341,19 @@ pub fn download_staff_pick(
 pub async fn download_repo_model(
     repo: String,
     file: String,
+    key: String,
     on_event: tauri::ipc::Channel<download::DownloadEvent>,
     app: tauri::AppHandle,
     client: tauri::State<'_, reqwest::Client>,
     download_state: tauri::State<'_, DownloadState>,
 ) -> Result<(), String> {
     let resolved = resolve_repo_spec(&client, HF_BASE_URL, &repo, &file).await?;
-    let token = claim_download(&download_state)?;
+    let token = claim_download(&download_state, &key)?;
     spawn_model_download(
         app,
         repo_download_specs(HF_BASE_URL, &repo, &file, &resolved),
         repo_installed_model(&repo, &file, &resolved),
+        key,
         token,
         on_event,
     );
@@ -2381,12 +2404,13 @@ pub async fn list_openai_models(
     fetch_openai_models(&client, &base_url, api_key.as_deref()).await
 }
 
-/// Cancels the in-flight model download, if any. The download task emits
-/// `Cancelled` and keeps the partial for a later resume.
+/// Cancels the in-flight model download identified by `key`, if any. The
+/// download task emits `Cancelled` and keeps the partial for a later resume.
+/// Other concurrent downloads are unaffected.
 #[cfg_attr(coverage_nightly, coverage(off))]
 #[cfg_attr(not(coverage), tauri::command)]
-pub fn cancel_model_download(download_state: tauri::State<'_, DownloadState>) {
-    cancel_active_download(&download_state);
+pub fn cancel_model_download(key: String, download_state: tauri::State<'_, DownloadState>) {
+    cancel_download(&download_state, &key);
 }
 
 /// Removes the partial file for `sha256` (the user chose Discard over Resume).
@@ -2487,6 +2511,7 @@ fn spawn_model_download(
     app: tauri::AppHandle,
     specs: Vec<download::DownloadSpec>,
     model: manifest::InstalledModel,
+    key: String,
     token: tokio_util::sync::CancellationToken,
     on_event: tauri::ipc::Channel<download::DownloadEvent>,
 ) {
@@ -2507,7 +2532,7 @@ fn spawn_model_download(
             }
             let _ = on_event_finalize.send(finalize_outcome_event(finalized));
         }
-        release_download(&app.state::<DownloadState>());
+        release_download(&app.state::<DownloadState>(), &key);
     });
 }
 
@@ -2548,7 +2573,23 @@ fn finalize_install(
         eprintln!("thuki: [models] failed to remove superseded blobs: {e}");
     }
     let config = app.state::<parking_lot::RwLock<AppConfig>>();
-    persist_active_provider_model(app, &config, PROVIDER_ID_BUILTIN, &model.id)
+    // Auto-select only the first model: adopt this download as the built-in
+    // model when the provider has none yet; otherwise a completed download just
+    // installs and leaves the user's active choice alone. Parallel downloads
+    // finish in arbitrary order, so a last-one-wins overwrite would be
+    // unpredictable.
+    if adopt_as_builtin_model(&builtin_provider_model(&config.read())) {
+        persist_active_provider_model(app, &config, PROVIDER_ID_BUILTIN, &model.id)
+    } else {
+        Ok(())
+    }
+}
+
+/// Whether a freshly installed model should become the built-in provider's
+/// active model: only when the provider has no model selected yet (empty id).
+/// Keeps "auto-select the first model" predictable under parallel downloads.
+fn adopt_as_builtin_model(current_builtin_model: &str) -> bool {
+    current_builtin_model.is_empty()
 }
 
 // ─── Tests ──────────────────────────────────────────────────────────────────
@@ -4452,35 +4493,45 @@ mod tests {
     // ── Model library: download claim ────────────────────────────────────────
 
     #[test]
-    fn download_claim_rejects_second_concurrent() {
+    fn download_claim_allows_distinct_keys_and_rejects_a_duplicate() {
         let state = DownloadState::default();
-        let token = claim_download(&state).unwrap();
+        let token = claim_download(&state, "model-a").unwrap();
         assert!(!token.is_cancelled());
-        let err = claim_download(&state).unwrap_err();
+        // A different model downloads concurrently: its own slot is granted.
+        assert!(claim_download(&state, "model-b").is_ok());
+        // The same key cannot start twice while it is in flight.
+        let err = claim_download(&state, "model-a").unwrap_err();
         assert_eq!(err, "a download is already in progress");
-        // Release clears the claim so a new download can start.
-        release_download(&state);
-        assert!(claim_download(&state).is_ok());
+        // Releasing one key frees only that slot.
+        release_download(&state, "model-a");
+        assert!(claim_download(&state, "model-a").is_ok());
     }
 
     #[test]
-    fn download_in_flight_tracks_the_claim() {
+    fn download_in_flight_tracks_any_claim() {
         let state = DownloadState::default();
         assert!(!download_in_flight(&state));
-        let _token = claim_download(&state).unwrap();
+        let _a = claim_download(&state, "a").unwrap();
+        let _b = claim_download(&state, "b").unwrap();
+        assert!(download_in_flight(&state));
+        // One release leaves the other download in flight.
+        release_download(&state, "a");
         assert!(download_in_flight(&state));
-        release_download(&state);
+        release_download(&state, "b");
         assert!(!download_in_flight(&state));
     }
 
     #[test]
-    fn cancel_active_download_cancels_claimed_token_and_tolerates_idle() {
+    fn cancel_download_cancels_only_the_keyed_token_and_tolerates_idle() {
         let state = DownloadState::default();
-        // No claim yet: cancelling is a harmless no-op.
-        cancel_active_download(&state);
-        let token = claim_download(&state).unwrap();
-        cancel_active_download(&state);
-        assert!(token.is_cancelled());
+        // No such key: cancelling is a harmless no-op.
+        cancel_download(&state, "missing");
+        let a = claim_download(&state, "a").unwrap();
+        let b = claim_download(&state, "b").unwrap();
+        cancel_download(&state, "a");
+        assert!(a.is_cancelled());
+        // Cancelling one download leaves the others running.
+        assert!(!b.is_cancelled());
     }
 
     #[test]
@@ -4491,14 +4542,23 @@ mod tests {
             let _guard = state_ref.0.lock().unwrap();
             panic!("poison");
         });
-        assert!(claim_download(&state).is_err());
+        assert!(claim_download(&state, "k").is_err());
         let (_dir, store) = make_store();
         assert!(discard_partial_inner(&state, &store, &"a".repeat(64)).is_err());
         let conn = crate::database::open_in_memory().unwrap();
         assert!(delete_installed_model_inner(&state, &conn, &store, "x:y.gguf", "").is_err());
         // Best-effort operations must not panic on the poisoned lock.
-        cancel_active_download(&state);
-        release_download(&state);
+        cancel_download(&state, "k");
+        release_download(&state, "k");
+    }
+
+    #[test]
+    fn adopt_as_builtin_model_only_for_the_first_model() {
+        // No model selected yet: the first completed download is adopted.
+        assert!(adopt_as_builtin_model(""));
+        // A model is already active: a later parallel completion does not steal
+        // the active slot.
+        assert!(!adopt_as_builtin_model("google/gemma:gemma-q4.gguf"));
     }
 
     #[test]
@@ -5537,15 +5597,16 @@ mod tests {
         std::fs::write(store.blob_path(&m.sha256), b"w").unwrap();
 
         // A claimed download slot must refuse the delete and leave the row
-        // and blob untouched.
-        let _token = claim_download(&state).unwrap();
+        // and blob untouched, even though the in-flight download is a different
+        // model: a finishing download could insert or share refcounted blobs.
+        let _token = claim_download(&state, "other-model").unwrap();
         let err = delete_installed_model_inner(&state, &conn, &store, &m.id, "").unwrap_err();
         assert_eq!(err, "a download is already in progress");
         assert!(manifest::get(&conn, &m.id).unwrap().is_some());
         assert!(store.blob_path(&m.sha256).exists());
 
         // Releasing the slot lets the delete proceed.
-        release_download(&state);
+        release_download(&state, "other-model");
         assert!(delete_installed_model_inner(&state, &conn, &store, &m.id, "").is_ok());
     }
 
@@ -5578,11 +5639,12 @@ mod tests {
         assert!(discard_partial_inner(&state, &store, "short").is_err());
         assert!(discard_partial_inner(&state, &store, &"Z".repeat(64)).is_err());
 
-        // Rejected while a download is claimed.
-        let _token = claim_download(&state).unwrap();
+        // Rejected while any download is claimed (a finishing download may be
+        // writing this very partial or about to share its blob).
+        let _token = claim_download(&state, "some-model").unwrap();
         let err = discard_partial_inner(&state, &store, &sha).unwrap_err();
         assert!(err.contains("in progress"), "got: {err}");
-        release_download(&state);
+        release_download(&state, "some-model");
 
         // Removes an existing partial; a missing partial is fine (idempotent).
         std::fs::write(store.partial_path(&sha), b"bytes").unwrap();
diff --git a/src-tauri/src/models/registry.rs b/src-tauri/src/models/registry.rs
index 47c7e516..bc5615a2 100644
--- a/src-tauri/src/models/registry.rs
+++ b/src-tauri/src/models/registry.rs
@@ -455,6 +455,26 @@ mod tests {
         by_id(ONBOARDING_HERO_IDS[idx]).unwrap()
     }
 
+    #[test]
+    fn blob_shas_are_unique_across_entries() {
+        // Parallel downloads rely on no two catalog entries sharing a blob: the
+        // content-addressed store would otherwise see two concurrent writers to
+        // the same `tmp/<sha>.partial`. If a future entry legitimately shares a
+        // blob (e.g. a common mmproj companion), add per-sha download
+        // serialization before relaxing this guard. See `DownloadState` docs.
+        let mut seen = std::collections::HashSet::new();
+        for s in STARTERS {
+            assert!(
+                seen.insert(s.sha256),
+                "duplicate weights sha256: {}",
+                s.sha256
+            );
+            if let Some(mmproj) = s.mmproj_sha256 {
+                assert!(seen.insert(mmproj), "duplicate blob sha256: {mmproj}");
+            }
+        }
+    }
+
     #[test]
     fn ids_are_present_and_unique() {
         // The Staff Picks catalog and the id-keyed download path key on `id`,
diff --git a/src/__tests__/App.test.tsx b/src/__tests__/App.test.tsx
index 22519012..58dc56e5 100644
--- a/src/__tests__/App.test.tsx
+++ b/src/__tests__/App.test.tsx
@@ -81,10 +81,6 @@ function makeDownloadCtx(
     pausedBytes: 0,
     pauseDownload: vi.fn(),
     resumeFromPause: vi.fn(),
-    activeDownload: null,
-    startStaffPick: vi.fn(),
-    startRepoDownload: vi.fn(),
-    clearActiveDownload: vi.fn(),
     ...overrides,
   };
 }
diff --git a/src/contexts/DownloadContext.tsx b/src/contexts/DownloadContext.tsx
index 0f9cdd03..74625ee5 100644
--- a/src/contexts/DownloadContext.tsx
+++ b/src/contexts/DownloadContext.tsx
@@ -32,18 +32,6 @@ import {
 import { useConfig } from './ConfigContext';
 import type { StarterOption, StarterTier } from '../types/starter';
 
-/**
- * Identity of the Settings → Discover download in flight, kept here (above the
- * panes) so the owning row re-shows live progress after a Staff-picks /
- * Browse-all tab switch unmounts and remounts it. The single-slot backend
- * download outlives a pane unmount, so the frontend's view of it must too:
- * `'staff'` is a curated catalog entry keyed by its stable id; `'repo'` is a
- * Browse-all repo + GGUF file.
- */
-export type ActiveDownload =
-  | { kind: 'staff'; id: string }
-  | { kind: 'repo'; repo: string; file: string };
-
 export interface DownloadContextValue extends UseDownloadModel {
   /** Tier whose download is in flight; null when idle. */
   downloadingTier: StarterTier | null;
@@ -88,18 +76,6 @@ export interface DownloadContextValue extends UseDownloadModel {
   pauseDownload: () => void;
   /** Resume a paused download from where it stopped. */
   resumeFromPause: () => void;
-  /**
-   * Which Discover row owns the in-flight download, or null when none does.
-   * Survives a pane unmount so the row re-binds to the live progress on
-   * remount instead of re-reading the on-disk partial as a stale "Paused".
-   */
-  activeDownload: ActiveDownload | null;
-  /** Start (or resume) a Staff Picks catalog download and record its row id. */
-  startStaffPick: (id: string) => void;
-  /** Start (or resume) a Browse-all repo download and record its row identity. */
-  startRepoDownload: (repo: string, file: string) => void;
-  /** Forget the active Discover download row (terminal card dismissed/paused). */
-  clearActiveDownload: () => void;
 }
 
 const DownloadContext = createContext<DownloadContextValue | null>(null);
@@ -113,19 +89,8 @@ export function DownloadProvider({ children }: { children: ReactNode }) {
   const [activeOption, setActiveOption] = useState<StarterOption | null>(null);
   const [pauseRequested, setPauseRequested] = useState(false);
   const [pausedBytes, setPausedBytes] = useState(0);
-  const [activeDownload, setActiveDownload] = useState<ActiveDownload | null>(
-    null,
-  );
 
-  const {
-    start,
-    resume,
-    startById,
-    startRepo,
-    cancel,
-    discard,
-    combinedBytes,
-  } = download;
+  const { start, resume, cancel, discard, combinedBytes } = download;
   const downloadPhase = download.state.phase;
 
   // A pause is only *committed* once the cancel has fully landed (machine back
@@ -217,26 +182,6 @@ export function DownloadProvider({ children }: { children: ReactNode }) {
     resumeDownload(activeOption!.starter.tier, activeOption!, pausedBytes);
   }, [activeOption, pausedBytes, resumeDownload]);
 
-  // Discover (Settings) download identity. A resume runs the same start path
-  // (the backend resumes the kept partial via Range), so resume reuses these.
-  const startStaffPick = useCallback(
-    (id: string) => {
-      setActiveDownload({ kind: 'staff', id });
-      void startById(id);
-    },
-    [startById],
-  );
-
-  const startRepoDownload = useCallback(
-    (repo: string, file: string) => {
-      setActiveDownload({ kind: 'repo', repo, file });
-      void startRepo(repo, file);
-    },
-    [startRepo],
-  );
-
-  const clearActiveDownload = useCallback(() => setActiveDownload(null), []);
-
   const grandTotalBytes =
     activeOption === null
       ? null
@@ -256,10 +201,6 @@ export function DownloadProvider({ children }: { children: ReactNode }) {
       pausedBytes,
       pauseDownload,
       resumeFromPause,
-      activeDownload,
-      startStaffPick,
-      startRepoDownload,
-      clearActiveDownload,
     }),
     [
       download,
@@ -274,10 +215,6 @@ export function DownloadProvider({ children }: { children: ReactNode }) {
       pausedBytes,
       pauseDownload,
       resumeFromPause,
-      activeDownload,
-      startStaffPick,
-      startRepoDownload,
-      clearActiveDownload,
     ],
   );
 
diff --git a/src/contexts/DownloadsContext.tsx b/src/contexts/DownloadsContext.tsx
new file mode 100644
index 00000000..004dea48
--- /dev/null
+++ b/src/contexts/DownloadsContext.tsx
@@ -0,0 +1,245 @@
+/**
+ * Settings-window download registry: many model downloads at once.
+ *
+ * Unlike onboarding (one starter at a time, {@link useDownloadModel}), the
+ * Settings → Discover panes let a user fire off several downloads in parallel.
+ * This provider holds one live download per key (the backend allows concurrent
+ * downloads keyed the same way; see `DownloadState` in `models/mod.rs`) and,
+ * sitting at the Settings window root, keeps every one of them alive across the
+ * Library / Discover / Providers and Staff picks / Browse all tab switches that
+ * unmount the panes.
+ *
+ * Each entry advances through the shared {@link reduceDownloadEvent} reducer
+ * (engine handoff off: a Settings download finishes at `ready`). A row looks up
+ * its own download by {@link downloadKey}; absence means "not downloading".
+ */
+
+import {
+  createContext,
+  use,
+  useCallback,
+  useMemo,
+  useRef,
+  useState,
+  type ReactNode,
+} from 'react';
+import { Channel, invoke } from '@tauri-apps/api/core';
+import {
+  type DownloadAccumulator,
+  type DownloadProgressInfo,
+  type DownloadUiState,
+  reduceDownloadEvent,
+  startingAccumulator,
+} from '../hooks/downloadReducer';
+import { downloadKey, type DownloadIdentity } from '../hooks/downloadKey';
+import type { DownloadEvent } from '../types/starter';
+
+/** What the Settings panes start: a Staff Picks id or a Browse-all repo file. */
+type RegistryIdentity = Extract<
+  DownloadIdentity,
+  { kind: 'staff' } | { kind: 'repo' }
+>;
+
+/** The render-facing view of one live download. */
+export interface DownloadView {
+  state: DownloadUiState;
+  progress: DownloadProgressInfo | null;
+  etaSeconds: number | null;
+  combinedBytes: number | null;
+  speedBytesPerSec: number | null;
+}
+
+/** Internal record: the identity (for retry replay) plus its accumulator. */
+interface RegistryEntry {
+  identity: RegistryIdentity;
+  acc: DownloadAccumulator;
+}
+
+export interface DownloadsContextValue {
+  /** The live download for `key` ({@link downloadKey}), or undefined when none. */
+  get: (key: string) => DownloadView | undefined;
+  /**
+   * Whether any live download belongs to `repo`. Lets a Browse-all repo row
+   * re-expand itself after a tab switch remounts it collapsed, before its quant
+   * list (which would reveal the per-file downloads) has been fetched.
+   */
+  hasRepoDownload: (repo: string) => boolean;
+  /** Start (or resume) a Staff Picks catalog download by its stable id. */
+  startStaffPick: (id: string) => void;
+  /** Start (or resume) a Browse-all repo download by repo + GGUF file. */
+  startRepoDownload: (repo: string, file: string) => void;
+  /** Cancel the download for `key`; the partial is kept for a later resume. */
+  cancel: (key: string) => void;
+  /** Retry the failed download for `key` (replays its original command). */
+  retry: (key: string) => void;
+  /** Discard a kept partial by blob sha256. */
+  discard: (sha256: string) => Promise<void>;
+  /** Drop a terminal (ready / failed) entry so its row returns to normal. */
+  clear: (key: string) => void;
+}
+
+const DownloadsContext = createContext<DownloadsContextValue | null>(null);
+
+/** The download command + args for a registry identity. */
+function commandFor(
+  identity: RegistryIdentity,
+): [string, Record<string, unknown>] {
+  switch (identity.kind) {
+    case 'staff':
+      return ['download_staff_pick', { id: identity.id }];
+    case 'repo':
+      return [
+        'download_repo_model',
+        { repo: identity.repo, file: identity.file },
+      ];
+  }
+}
+
+export function DownloadsProvider({ children }: { children: ReactNode }) {
+  const [entries, setEntries] = useState<Map<string, RegistryEntry>>(
+    () => new Map(),
+  );
+  // Latest entries for the imperative retry path (reads identity outside React
+  // state). Mirrored every render so it never lags the rendered map.
+  const entriesRef = useRef(entries);
+  entriesRef.current = entries;
+
+  const begin = useCallback((identity: RegistryIdentity) => {
+    const key = downloadKey(identity);
+    const [command, args] = commandFor(identity);
+    setEntries((prev) => {
+      const next = new Map(prev);
+      next.set(key, { identity, acc: startingAccumulator() });
+      return next;
+    });
+    const channel = new Channel<DownloadEvent>();
+    channel.onmessage = (event) =>
+      setEntries((prev) => {
+        const cur = prev.get(key);
+        // Entry cleared (Choose another) while a late event was in flight: drop.
+        if (!cur) return prev;
+        const acc = reduceDownloadEvent(cur.acc, event, false);
+        const next = new Map(prev);
+        // A Cancelled event resets to idle: prune so the row returns to its
+        // Paused/partial controls instead of lingering as a dead download.
+        if (acc.state.phase === 'idle') {
+          next.delete(key);
+        } else {
+          next.set(key, { ...cur, acc });
+        }
+        return next;
+      });
+    void invoke(command, { ...args, key, onEvent: channel }).catch((err) =>
+      // A rejected invoke means the command failed before streaming (e.g. the
+      // repo spec could not be resolved), so no channel event will arrive: mark
+      // the entry failed from the identity in scope.
+      setEntries((prev) => {
+        const next = new Map(prev);
+        next.set(key, {
+          identity,
+          acc: {
+            ...startingAccumulator(),
+            state: { phase: 'failed', kind: 'other', message: String(err) },
+          },
+        });
+        return next;
+      }),
+    );
+  }, []);
+
+  const startStaffPick = useCallback(
+    (id: string) => begin({ kind: 'staff', id }),
+    [begin],
+  );
+
+  const startRepoDownload = useCallback(
+    (repo: string, file: string) => begin({ kind: 'repo', repo, file }),
+    [begin],
+  );
+
+  const cancel = useCallback((key: string) => {
+    void invoke('cancel_model_download', { key });
+  }, []);
+
+  const retry = useCallback(
+    (key: string) => {
+      const entry = entriesRef.current.get(key);
+      if (entry) begin(entry.identity);
+    },
+    [begin],
+  );
+
+  const discard = useCallback(async (sha256: string) => {
+    await invoke('discard_partial_download', { sha256 });
+  }, []);
+
+  const clear = useCallback((key: string) => {
+    setEntries((prev) => {
+      if (!prev.has(key)) return prev;
+      const next = new Map(prev);
+      next.delete(key);
+      return next;
+    });
+  }, []);
+
+  const get = useCallback(
+    (key: string): DownloadView | undefined => {
+      const entry = entries.get(key);
+      if (!entry) return undefined;
+      const { state, progress, etaSeconds, combinedBytes, speedBytesPerSec } =
+        entry.acc;
+      return { state, progress, etaSeconds, combinedBytes, speedBytesPerSec };
+    },
+    [entries],
+  );
+
+  const hasRepoDownload = useCallback(
+    (repo: string): boolean => {
+      for (const entry of entries.values()) {
+        if (entry.identity.kind === 'repo' && entry.identity.repo === repo) {
+          return true;
+        }
+      }
+      return false;
+    },
+    [entries],
+  );
+
+  const value = useMemo<DownloadsContextValue>(
+    () => ({
+      get,
+      hasRepoDownload,
+      startStaffPick,
+      startRepoDownload,
+      cancel,
+      retry,
+      discard,
+      clear,
+    }),
+    [
+      get,
+      hasRepoDownload,
+      startStaffPick,
+      startRepoDownload,
+      cancel,
+      retry,
+      discard,
+      clear,
+    ],
+  );
+
+  return <DownloadsContext value={value}>{children}</DownloadsContext>;
+}
+
+/**
+ * Returns the Settings download registry. Throws when no `DownloadsProvider`
+ * wraps the caller: a live multi-download has no sensible static fallback, so a
+ * missing provider is a wiring bug.
+ */
+export function useDownloads(): DownloadsContextValue {
+  const value = use(DownloadsContext);
+  if (value === null) {
+    throw new Error('useDownloads must be used within a DownloadsProvider');
+  }
+  return value;
+}
diff --git a/src/contexts/__tests__/DownloadContext.test.tsx b/src/contexts/__tests__/DownloadContext.test.tsx
index 4c465216..dade3ec9 100644
--- a/src/contexts/__tests__/DownloadContext.test.tsx
+++ b/src/contexts/__tests__/DownloadContext.test.tsx
@@ -113,59 +113,6 @@ describe('DownloadContext', () => {
     expect(result.current.resumeSeedBytes).toBeNull();
     expect(result.current.activeOption).toBeNull();
     expect(result.current.grandTotalBytes).toBeNull();
-    expect(result.current.activeDownload).toBeNull();
-  });
-
-  it('startStaffPick records the row id and runs the verified catalog path', async () => {
-    const { result } = renderHook(() => useDownloadCtx(), { wrapper });
-
-    await act(async () => {
-      result.current.startStaffPick('gemma-4-12b');
-    });
-
-    expect(result.current.activeDownload).toEqual({
-      kind: 'staff',
-      id: 'gemma-4-12b',
-    });
-    expect(result.current.state).toEqual({ phase: 'downloading' });
-    expect(invoke).toHaveBeenCalledWith('download_staff_pick', {
-      id: 'gemma-4-12b',
-      onEvent: expect.anything(),
-    });
-  });
-
-  it('startRepoDownload records the repo + file and runs the repo path', async () => {
-    const { result } = renderHook(() => useDownloadCtx(), { wrapper });
-
-    await act(async () => {
-      result.current.startRepoDownload('org/repo', 'weights-q4.gguf');
-    });
-
-    expect(result.current.activeDownload).toEqual({
-      kind: 'repo',
-      repo: 'org/repo',
-      file: 'weights-q4.gguf',
-    });
-    expect(result.current.state).toEqual({ phase: 'downloading' });
-    expect(invoke).toHaveBeenCalledWith('download_repo_model', {
-      repo: 'org/repo',
-      file: 'weights-q4.gguf',
-      onEvent: expect.anything(),
-    });
-  });
-
-  it('clearActiveDownload forgets the active Discover download row', async () => {
-    const { result } = renderHook(() => useDownloadCtx(), { wrapper });
-
-    await act(async () => {
-      result.current.startStaffPick('gemma-4-12b');
-    });
-    expect(result.current.activeDownload).not.toBeNull();
-
-    act(() => {
-      result.current.clearActiveDownload();
-    });
-    expect(result.current.activeDownload).toBeNull();
   });
 
   it('beginDownload records the tier, option, grand total and starts the machine', async () => {
@@ -184,6 +131,7 @@ describe('DownloadContext', () => {
     expect(result.current.state).toEqual({ phase: 'downloading' });
     expect(invoke).toHaveBeenCalledWith('download_starter', {
       tier: 'balanced',
+      key: 'tier:balanced',
       onEvent: expect.anything(),
     });
   });
@@ -207,6 +155,7 @@ describe('DownloadContext', () => {
     expect(result.current.state).toEqual({ phase: 'downloading' });
     expect(invoke).toHaveBeenCalledWith('download_starter', {
       tier: 'fast',
+      key: 'tier:fast',
       onEvent: expect.anything(),
     });
   });
@@ -239,7 +188,9 @@ describe('DownloadContext', () => {
     // until the backend Cancelled lands (slot released) so a resume cannot
     // race; meanwhile `isPausing` is true for instant "Pausing…" feedback.
     expect(result.current.pausedBytes).toBe(60);
-    expect(invoke).toHaveBeenCalledWith('cancel_model_download');
+    expect(invoke).toHaveBeenCalledWith('cancel_model_download', {
+      key: 'tier:balanced',
+    });
     expect(result.current.isPaused).toBe(false);
     expect(result.current.isPausing).toBe(true);
 
@@ -328,6 +279,7 @@ describe('DownloadContext', () => {
       expect(result.current.state).toEqual({ phase: 'downloading' });
       expect(invoke).toHaveBeenCalledWith('download_starter', {
         tier: 'fast',
+        key: 'tier:fast',
         onEvent: expect.anything(),
       });
     });
diff --git a/src/contexts/__tests__/DownloadsContext.test.tsx b/src/contexts/__tests__/DownloadsContext.test.tsx
new file mode 100644
index 00000000..e30f3fe7
--- /dev/null
+++ b/src/contexts/__tests__/DownloadsContext.test.tsx
@@ -0,0 +1,225 @@
+import { renderHook, act } from '@testing-library/react';
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+import type { ReactNode } from 'react';
+import { DownloadsProvider, useDownloads } from '../DownloadsContext';
+import {
+  invoke,
+  enableChannelCapture,
+  getLastChannel,
+  resetChannelCapture,
+  type Channel,
+} from '../../testUtils/mocks/tauri';
+import { downloadKey } from '../../hooks/downloadKey';
+import type { DownloadEvent } from '../../types/starter';
+
+/** The captured download channel, typed for simulateMessage calls. */
+function channel(): Channel<DownloadEvent> {
+  return getLastChannel() as Channel<DownloadEvent>;
+}
+
+function wrapper({ children }: { children: ReactNode }) {
+  return <DownloadsProvider>{children}</DownloadsProvider>;
+}
+
+const STAFF_KEY = downloadKey({ kind: 'staff', id: 'gemma-4-12b' });
+const REPO_KEY = downloadKey({
+  kind: 'repo',
+  repo: 'org/repo',
+  file: 'w.gguf',
+});
+
+describe('DownloadsContext', () => {
+  beforeEach(() => {
+    invoke.mockReset();
+    enableChannelCapture();
+  });
+
+  afterEach(() => {
+    resetChannelCapture();
+    vi.restoreAllMocks();
+  });
+
+  it('throws when useDownloads is called outside a provider', () => {
+    const spy = vi.spyOn(console, 'error').mockImplementation(() => {});
+    expect(() => renderHook(() => useDownloads())).toThrow(
+      'useDownloads must be used within a DownloadsProvider',
+    );
+    spy.mockRestore();
+  });
+
+  it('has no downloads when idle', () => {
+    const { result } = renderHook(() => useDownloads(), { wrapper });
+    expect(result.current.get(STAFF_KEY)).toBeUndefined();
+    expect(result.current.hasRepoDownload('org/repo')).toBe(false);
+  });
+
+  it('starts a Staff Picks download keyed by its id', async () => {
+    const { result } = renderHook(() => useDownloads(), { wrapper });
+
+    await act(async () => {
+      result.current.startStaffPick('gemma-4-12b');
+    });
+
+    expect(result.current.get(STAFF_KEY)?.state).toEqual({
+      phase: 'downloading',
+    });
+    expect(invoke).toHaveBeenCalledWith('download_staff_pick', {
+      id: 'gemma-4-12b',
+      key: STAFF_KEY,
+      onEvent: expect.anything(),
+    });
+  });
+
+  it('advances a download through its channel events to ready', async () => {
+    const { result } = renderHook(() => useDownloads(), { wrapper });
+    await act(async () => {
+      result.current.startStaffPick('gemma-4-12b');
+    });
+
+    act(() =>
+      channel().simulateMessage({
+        type: 'Started',
+        data: { file: 'w.gguf', total_bytes: 100, resumed_from: 0 },
+      }),
+    );
+    act(() =>
+      channel().simulateMessage({
+        type: 'Progress',
+        data: { file: 'w.gguf', bytes: 60, total_bytes: 100 },
+      }),
+    );
+    expect(result.current.get(STAFF_KEY)?.combinedBytes).toBe(60);
+
+    act(() => channel().simulateMessage({ type: 'AllDone' }));
+    expect(result.current.get(STAFF_KEY)?.state).toEqual({ phase: 'ready' });
+  });
+
+  it('prunes an entry when its download is cancelled', async () => {
+    const { result } = renderHook(() => useDownloads(), { wrapper });
+    await act(async () => {
+      result.current.startStaffPick('gemma-4-12b');
+    });
+    expect(result.current.get(STAFF_KEY)).toBeDefined();
+
+    act(() => channel().simulateMessage({ type: 'Cancelled' }));
+    expect(result.current.get(STAFF_KEY)).toBeUndefined();
+  });
+
+  it('marks a download failed when the start invoke rejects', async () => {
+    invoke.mockImplementation(async (cmd: string) => {
+      if (cmd === 'download_staff_pick')
+        throw 'a download is already in progress';
+    });
+    const { result } = renderHook(() => useDownloads(), { wrapper });
+
+    await act(async () => {
+      result.current.startStaffPick('gemma-4-12b');
+      await Promise.resolve();
+    });
+
+    expect(result.current.get(STAFF_KEY)?.state).toEqual({
+      phase: 'failed',
+      kind: 'other',
+      message: 'a download is already in progress',
+    });
+  });
+
+  it('cancel targets the keyed download', async () => {
+    const { result } = renderHook(() => useDownloads(), { wrapper });
+    await act(async () => {
+      result.current.cancel(STAFF_KEY);
+    });
+    expect(invoke).toHaveBeenCalledWith('cancel_model_download', {
+      key: STAFF_KEY,
+    });
+  });
+
+  it('retry replays the failed download, clear forgets it', async () => {
+    const { result } = renderHook(() => useDownloads(), { wrapper });
+    await act(async () => {
+      result.current.startStaffPick('gemma-4-12b');
+    });
+    act(() =>
+      channel().simulateMessage({
+        type: 'Failed',
+        data: { kind: 'http', message: 'HTTP 500' },
+      }),
+    );
+
+    await act(async () => {
+      result.current.retry(STAFF_KEY);
+    });
+    expect(
+      invoke.mock.calls.filter((c) => c[0] === 'download_staff_pick'),
+    ).toHaveLength(2);
+
+    // A retry with no entry for the key is a no-op (nothing to replay).
+    invoke.mockClear();
+    await act(async () => {
+      result.current.retry('staff:does-not-exist');
+    });
+    expect(invoke).not.toHaveBeenCalled();
+
+    act(() =>
+      channel().simulateMessage({
+        type: 'Failed',
+        data: { kind: 'http', message: 'again' },
+      }),
+    );
+    act(() => {
+      result.current.clear(STAFF_KEY);
+    });
+    expect(result.current.get(STAFF_KEY)).toBeUndefined();
+    // Clearing a key with no entry is a harmless no-op.
+    act(() => {
+      result.current.clear('staff:does-not-exist');
+    });
+  });
+
+  it('discard removes a kept partial by sha', async () => {
+    const { result } = renderHook(() => useDownloads(), { wrapper });
+    await act(async () => {
+      await result.current.discard('a'.repeat(64));
+    });
+    expect(invoke).toHaveBeenCalledWith('discard_partial_download', {
+      sha256: 'a'.repeat(64),
+    });
+  });
+
+  it('tracks repo downloads for the re-expand check', async () => {
+    const { result } = renderHook(() => useDownloads(), { wrapper });
+    await act(async () => {
+      result.current.startRepoDownload('org/repo', 'w.gguf');
+    });
+    expect(result.current.get(REPO_KEY)?.state).toEqual({
+      phase: 'downloading',
+    });
+    expect(result.current.hasRepoDownload('org/repo')).toBe(true);
+    expect(result.current.hasRepoDownload('other/repo')).toBe(false);
+    expect(invoke).toHaveBeenCalledWith('download_repo_model', {
+      repo: 'org/repo',
+      file: 'w.gguf',
+      key: REPO_KEY,
+      onEvent: expect.anything(),
+    });
+  });
+
+  it('ignores a late channel event after its entry is cleared', async () => {
+    const { result } = renderHook(() => useDownloads(), { wrapper });
+    await act(async () => {
+      result.current.startStaffPick('gemma-4-12b');
+    });
+    const late = channel();
+    act(() => {
+      result.current.clear(STAFF_KEY);
+    });
+    // The download task may still emit; with no entry the event is dropped.
+    act(() =>
+      late.simulateMessage({
+        type: 'Progress',
+        data: { file: 'w.gguf', bytes: 10, total_bytes: 100 },
+      }),
+    );
+    expect(result.current.get(STAFF_KEY)).toBeUndefined();
+  });
+});
diff --git a/src/hooks/__tests__/useDownloadModel.test.tsx b/src/hooks/__tests__/useDownloadModel.test.tsx
index 990d9827..80244d4d 100644
--- a/src/hooks/__tests__/useDownloadModel.test.tsx
+++ b/src/hooks/__tests__/useDownloadModel.test.tsx
@@ -63,6 +63,7 @@ describe('useDownloadModel', () => {
     expect(result.current.state).toEqual({ phase: 'downloading' });
     expect(invoke).toHaveBeenCalledWith('download_starter', {
       tier: 'balanced',
+      key: 'tier:balanced',
       onEvent: expect.anything(),
     });
 
@@ -290,7 +291,9 @@ describe('useDownloadModel', () => {
     expect(result.current.progress?.bytes).toBe(40);
 
     await act(() => result.current.cancel());
-    expect(invoke).toHaveBeenCalledWith('cancel_model_download');
+    expect(invoke).toHaveBeenCalledWith('cancel_model_download', {
+      key: 'tier:fast',
+    });
     // State waits for the backend's Cancelled event.
     expect(result.current.state).toEqual({ phase: 'downloading' });
 
@@ -325,6 +328,7 @@ describe('useDownloadModel', () => {
     expect(result.current.state).toEqual({ phase: 'downloading' });
     expect(invoke).toHaveBeenLastCalledWith('download_starter', {
       tier: 'smartest',
+      key: 'tier:smartest',
       onEvent: expect.anything(),
     });
   });
@@ -343,6 +347,7 @@ describe('useDownloadModel', () => {
     expect(invoke).toHaveBeenCalledWith('download_repo_model', {
       repo: 'owner/repo',
       file: 'w.gguf',
+      key: 'repo:owner/repo\nw.gguf',
       onEvent: expect.anything(),
     });
     act(() => channel().simulateMessage({ type: 'AllDone' }));
@@ -364,6 +369,7 @@ describe('useDownloadModel', () => {
     expect(invoke).toHaveBeenLastCalledWith('download_repo_model', {
       repo: 'owner/repo',
       file: 'w.gguf',
+      key: 'repo:owner/repo\nw.gguf',
       onEvent: expect.anything(),
     });
   });
@@ -385,6 +391,7 @@ describe('useDownloadModel', () => {
     expect(result.current.state).toEqual({ phase: 'downloading' });
     expect(invoke).toHaveBeenCalledWith('download_staff_pick', {
       id: 'gemma-4-12b',
+      key: 'staff:gemma-4-12b',
       onEvent: expect.anything(),
     });
     act(() => channel().simulateMessage({ type: 'AllDone' }));
@@ -405,6 +412,7 @@ describe('useDownloadModel', () => {
     expect(result.current.state).toEqual({ phase: 'downloading' });
     expect(invoke).toHaveBeenLastCalledWith('download_staff_pick', {
       id: 'gpt-oss-20b',
+      key: 'staff:gpt-oss-20b',
       onEvent: expect.anything(),
     });
   });
@@ -460,6 +468,7 @@ describe('useDownloadModel', () => {
     expect(result.current.state).toEqual({ phase: 'downloading' });
     expect(invoke).toHaveBeenCalledWith('download_starter', {
       tier: 'balanced',
+      key: 'tier:balanced',
       onEvent: expect.anything(),
     });
   });
diff --git a/src/hooks/downloadKey.ts b/src/hooks/downloadKey.ts
new file mode 100644
index 00000000..91e8ee65
--- /dev/null
+++ b/src/hooks/downloadKey.ts
@@ -0,0 +1,32 @@
+/**
+ * Stable per-download identity and its backend slot key.
+ *
+ * The backend keys its concurrent-download slots by an opaque string the
+ * frontend supplies (see `DownloadState` in `models/mod.rs`). Deriving that key
+ * in one place keeps the onboarding hook ({@link useDownloadModel}) and the
+ * Settings download registry ({@link useDownloads}) naming the same download the
+ * same way, so the backend's per-key dedupe behaves predictably across both.
+ */
+
+/** What a download produces, enough to name it and to replay/display it. */
+export type DownloadIdentity =
+  | { kind: 'tier'; tier: string }
+  | { kind: 'staff'; id: string }
+  | { kind: 'repo'; repo: string; file: string };
+
+/**
+ * The backend slot key for a download. Kind-prefixed so a Staff Picks id can
+ * never collide with a repo path, and newline-joined for repos (a newline
+ * cannot appear in a Hugging Face repo id or GGUF filename) so a `repo`/`file`
+ * pair maps to exactly one key.
+ */
+export function downloadKey(identity: DownloadIdentity): string {
+  switch (identity.kind) {
+    case 'tier':
+      return `tier:${identity.tier}`;
+    case 'staff':
+      return `staff:${identity.id}`;
+    case 'repo':
+      return `repo:${identity.repo}\n${identity.file}`;
+  }
+}
diff --git a/src/hooks/downloadReducer.ts b/src/hooks/downloadReducer.ts
new file mode 100644
index 00000000..bd7378ed
--- /dev/null
+++ b/src/hooks/downloadReducer.ts
@@ -0,0 +1,288 @@
+/**
+ * Pure state for a single model download, plus the reducer that advances it on
+ * each backend `DownloadEvent`.
+ *
+ * This is the one source of truth for "what a download channel's events mean".
+ * The single-download onboarding hook ({@link useDownloadModel}) and the
+ * multi-download Settings registry ({@link useDownloads}) both drive their state
+ * through {@link reduceDownloadEvent}, so the two never diverge. The reducer is
+ * pure (no React, no refs, no I/O): the byte accumulators that the old hook kept
+ * in refs live on the accumulator here, so a registry can hold one per download.
+ *
+ * The post-download engine handoff (`installing -> warming_up -> ready`, driven
+ * by the `engine:status` event when `awaitEngine` is set) is NOT modeled here:
+ * it is a separate event stream owned by the onboarding hook. This reducer only
+ * interprets the per-download `DownloadEvent` channel.
+ */
+
+import type {
+  DownloadEvent,
+  DownloadFailKind,
+  StarterTier,
+} from '../types/starter';
+
+/** Failure kinds the UI can show: the backend's plus the engine handoff's. */
+export type DownloadUiFailKind = DownloadFailKind | 'engine';
+
+/** The download UI state machine's discriminated union. */
+export type DownloadUiState =
+  | { phase: 'idle' }
+  | { phase: 'confirming'; tier: StarterTier }
+  | { phase: 'downloading' }
+  | { phase: 'downloading_mmproj' }
+  | { phase: 'verifying' }
+  | { phase: 'installing' }
+  | { phase: 'warming_up' }
+  | { phase: 'ready' }
+  | { phase: 'resume_pending' }
+  | { phase: 'failed'; kind: DownloadUiFailKind; message: string };
+
+/** Last reported byte counts for the file currently downloading. */
+export interface DownloadProgressInfo {
+  file: string;
+  bytes: number;
+  totalBytes: number;
+}
+
+/** One ETA sample: a Progress event's byte count and arrival time. */
+export interface EtaSample {
+  t: number;
+  bytes: number;
+}
+
+/** Rolling-rate window: only Progress samples this recent feed the ETA. */
+const ETA_WINDOW_MS = 10_000;
+
+/**
+ * Everything needed to render one download and to fold the next event in. The
+ * fields below `speedBytesPerSec` are internal accumulators (the old hook's
+ * refs); consumers read the render fields and pass the whole accumulator back
+ * into {@link reduceDownloadEvent}.
+ */
+export interface DownloadAccumulator {
+  state: DownloadUiState;
+  progress: DownloadProgressInfo | null;
+  etaSeconds: number | null;
+  /**
+   * Cumulative bytes downloaded across every file of the current run (weights +
+   * vision companion), or null when idle. One continuous figure: never resets
+   * between the two files.
+   */
+  combinedBytes: number | null;
+  /** Rolling download rate in bytes per second, or null until measurable. */
+  speedBytesPerSec: number | null;
+  /** Recent Progress samples inside the rolling ETA window. */
+  samples: EtaSample[];
+  /** How many `Started` events have arrived (1 = weights, 2 = mmproj). */
+  startedCount: number;
+  /** Bytes from files that have already fully completed this run. */
+  completedBytes: number;
+  /** Declared total of the file currently downloading. */
+  currentFileTotal: number;
+}
+
+/** A fresh accumulator parked at `idle` with empty counters. */
+export function initialAccumulator(): DownloadAccumulator {
+  return {
+    state: { phase: 'idle' },
+    progress: null,
+    etaSeconds: null,
+    combinedBytes: null,
+    speedBytesPerSec: null,
+    samples: [],
+    startedCount: 0,
+    completedBytes: 0,
+    currentFileTotal: 0,
+  };
+}
+
+/** An accumulator reset to the start of a fresh run (phase `downloading`). */
+export function startingAccumulator(): DownloadAccumulator {
+  return { ...initialAccumulator(), state: { phase: 'downloading' } };
+}
+
+/**
+ * True while a download is active but not yet terminal: bytes still moving
+ * (`downloading`/`downloading_mmproj`) or the post-download verify/install/warm
+ * steps running. False for idle, the pre-flight confirm/resume states, and the
+ * terminal `ready`/`failed`.
+ */
+export function isDownloadInFlight(phase: DownloadUiState['phase']): boolean {
+  return (
+    phase === 'downloading' ||
+    phase === 'downloading_mmproj' ||
+    phase === 'verifying' ||
+    phase === 'installing' ||
+    phase === 'warming_up'
+  );
+}
+
+/**
+ * A short, jargon-free reason for a failed download, by kind, so the UI tells
+ * the user what actually went wrong instead of a generic message.
+ */
+export function downloadFailureMessage(kind: DownloadUiFailKind): string {
+  switch (kind) {
+    case 'offline':
+      return 'You appear to be offline.';
+    case 'http':
+      return 'Hugging Face had an error. Try again.';
+    case 'checksum':
+      return 'The download did not verify. Retrying starts it fresh.';
+    case 'disk_full':
+      return 'Not enough disk space.';
+    case 'engine':
+      return "Thuki's engine could not start.";
+    case 'other':
+      return 'Model download failed.';
+  }
+}
+
+/**
+ * Bytes per second from the rolling sample window, or `null` while the rate is
+ * not yet measurable (fewer than two samples, zero elapsed time, or no forward
+ * progress between the window's edges).
+ */
+export function computeSpeedBytesPerSec(samples: EtaSample[]): number | null {
+  if (samples.length < 2) return null;
+  const first = samples[0];
+  const last = samples[samples.length - 1];
+  const elapsedSeconds = (last.t - first.t) / 1000;
+  const deltaBytes = last.bytes - first.bytes;
+  if (elapsedSeconds <= 0 || deltaBytes <= 0) return null;
+  return deltaBytes / elapsedSeconds;
+}
+
+/**
+ * Remaining seconds from the rolling sample window, or `null` while the rate is
+ * not yet measurable (fewer than two samples, zero elapsed time, or no forward
+ * progress between the window's edges).
+ */
+export function computeEtaSeconds(
+  samples: EtaSample[],
+  bytes: number,
+  totalBytes: number,
+): number | null {
+  const bytesPerSecond = computeSpeedBytesPerSec(samples);
+  if (bytesPerSecond === null) return null;
+  return Math.max(0, Math.round((totalBytes - bytes) / bytesPerSecond));
+}
+
+/** Appends a sample and drops any that have aged out of the rolling window. */
+function pushSample(
+  samples: EtaSample[],
+  sample: EtaSample,
+  now: number,
+): EtaSample[] {
+  const next = [...samples, sample];
+  let start = 0;
+  while (start < next.length && now - next[start].t > ETA_WINDOW_MS) {
+    start += 1;
+  }
+  return start > 0 ? next.slice(start) : next;
+}
+
+/**
+ * Folds one backend `DownloadEvent` into the accumulator, returning a new
+ * accumulator (the input is never mutated). `awaitEngine` decides the terminal
+ * step: when set, `AllDone` parks in `installing` for the `engine:status`
+ * handoff; otherwise it goes straight to `ready`.
+ */
+export function reduceDownloadEvent(
+  acc: DownloadAccumulator,
+  event: DownloadEvent,
+  awaitEngine: boolean,
+): DownloadAccumulator {
+  switch (event.type) {
+    case 'Started': {
+      const startedCount = acc.startedCount + 1;
+      return {
+        ...acc,
+        startedCount,
+        samples: [],
+        etaSeconds: null,
+        speedBytesPerSec: null,
+        currentFileTotal: event.data.total_bytes,
+        progress: {
+          file: event.data.file,
+          bytes: event.data.resumed_from,
+          totalBytes: event.data.total_bytes,
+        },
+        combinedBytes: acc.completedBytes + event.data.resumed_from,
+        // The second Started is always the mmproj companion: specs are ordered
+        // weights first, mmproj second.
+        state:
+          startedCount >= 2
+            ? { phase: 'downloading_mmproj' }
+            : { phase: 'downloading' },
+      };
+    }
+    case 'Progress': {
+      const now = Date.now();
+      const samples = pushSample(
+        acc.samples,
+        { t: now, bytes: event.data.bytes },
+        now,
+      );
+      // A resume re-hash labels itself `verifying` before the remaining bytes
+      // stream; the first streamed Progress returns the label to the active
+      // downloading phase so the transfer is not mislabeled. Any other phase is
+      // left untouched.
+      const state: DownloadUiState =
+        acc.state.phase === 'verifying'
+          ? acc.startedCount >= 2
+            ? { phase: 'downloading_mmproj' }
+            : { phase: 'downloading' }
+          : acc.state;
+      return {
+        ...acc,
+        samples,
+        state,
+        progress: {
+          file: event.data.file,
+          bytes: event.data.bytes,
+          totalBytes: event.data.total_bytes,
+        },
+        etaSeconds: computeEtaSeconds(
+          samples,
+          event.data.bytes,
+          event.data.total_bytes,
+        ),
+        speedBytesPerSec: computeSpeedBytesPerSec(samples),
+        combinedBytes: acc.completedBytes + event.data.bytes,
+      };
+    }
+    case 'Verifying':
+      return { ...acc, state: { phase: 'verifying' } };
+    case 'FileDone': {
+      // Fold this file's bytes into the completed total and snap the cumulative
+      // figure to the boundary so the bar never dips. The next Started (mmproj)
+      // or AllDone moves the state.
+      const completedBytes = acc.completedBytes + acc.currentFileTotal;
+      return {
+        ...acc,
+        completedBytes,
+        currentFileTotal: 0,
+        combinedBytes: completedBytes,
+      };
+    }
+    case 'AllDone':
+      return {
+        ...acc,
+        state: awaitEngine ? { phase: 'installing' } : { phase: 'ready' },
+      };
+    case 'Cancelled':
+      return initialAccumulator();
+    case 'Failed':
+      // Terminal from ANY state, including verifying (finalize failure: the
+      // manifest write failed, so AllDone never arrives).
+      return {
+        ...acc,
+        state: {
+          phase: 'failed',
+          kind: event.data.kind,
+          message: event.data.message,
+        },
+      };
+  }
+}
diff --git a/src/hooks/useDownloadModel.ts b/src/hooks/useDownloadModel.ts
index 89f45f15..64e4a856 100644
--- a/src/hooks/useDownloadModel.ts
+++ b/src/hooks/useDownloadModel.ts
@@ -1,134 +1,57 @@
 /**
- * Download-state machine for starter model downloads.
+ * Download-state machine for a single starter model download (onboarding).
  *
- * Drives the shared download UI (StarterPicker + DownloadProgress) through
- * one discriminated-union state, fed by the `download_starter` Tauri channel
- * and, optionally, the `engine:status` Tauri event.
+ * Drives the onboarding download UI (StarterPicker + DownloadProgress) through
+ * one discriminated-union state, fed by the `download_*` Tauri channel and,
+ * optionally, the `engine:status` Tauri event. Per-event state transitions live
+ * in the shared {@link reduceDownloadEvent} reducer so this single-download hook
+ * and the multi-download Settings registry ({@link useDownloads}) never diverge.
  *
- * Engine handoff: by default `AllDone` transitions straight to `ready`,
- * because after a Settings-context download nobody starts the engine until
- * the first chat, so waiting on `engine:status` would hang forever. A
- * consumer that does prime the engine right after the download (onboarding)
- * passes `awaitEngine: true`; then `AllDone` parks in `installing` and the
- * `engine:status` listener advances `installing -> warming_up -> ready`
- * (or `failed` with kind `engine`).
+ * Engine handoff: by default `AllDone` transitions straight to `ready`, because
+ * after a Settings-context download nobody starts the engine until the first
+ * chat, so waiting on `engine:status` would hang forever. A consumer that does
+ * prime the engine right after the download (onboarding) passes
+ * `awaitEngine: true`; then `AllDone` parks in `installing` and the
+ * `engine:status` listener advances `installing -> warming_up -> ready` (or
+ * `failed` with kind `engine`).
  *
  * The backend emits `AllDone` only after the install is recorded; a finalize
- * failure (the manifest write failed) emits `Failed` instead of `AllDone`.
- * `Failed` is terminal from any state. Terminal means no *event* moves the
- * machine out of it; the user can still leave through `reset`, an explicit
- * action that returns the terminal `failed`/`ready` cards to the picker.
+ * failure (the manifest write failed) emits `Failed` instead. `Failed` is
+ * terminal from any state. Terminal means no *event* moves the machine out of
+ * it; the user can still leave through `reset`.
  */
 
 import { useCallback, useEffect, useRef, useState } from 'react';
 import { Channel, invoke } from '@tauri-apps/api/core';
 import { listen } from '@tauri-apps/api/event';
+import {
+  type DownloadAccumulator,
+  type DownloadProgressInfo,
+  type DownloadUiState,
+  initialAccumulator,
+  reduceDownloadEvent,
+  startingAccumulator,
+} from './downloadReducer';
+import { downloadKey } from './downloadKey';
 import type {
   DownloadEvent,
-  DownloadFailKind,
   EngineStatus,
   StarterTier,
 } from '../types/starter';
 
-/** Failure kinds the UI can show: the backend's plus the engine handoff's. */
-export type DownloadUiFailKind = DownloadFailKind | 'engine';
-
-/** The download UI state machine's discriminated union. */
-export type DownloadUiState =
-  | { phase: 'idle' }
-  | { phase: 'confirming'; tier: StarterTier }
-  | { phase: 'downloading' }
-  | { phase: 'downloading_mmproj' }
-  | { phase: 'verifying' }
-  | { phase: 'installing' }
-  | { phase: 'warming_up' }
-  | { phase: 'ready' }
-  | { phase: 'resume_pending' }
-  | { phase: 'failed'; kind: DownloadUiFailKind; message: string };
-
-/**
- * True while a download is active but not yet terminal: bytes still moving
- * (`downloading`/`downloading_mmproj`) or the post-download verify/install/warm
- * steps running. False for idle, the pre-flight confirm/resume states, and the
- * terminal `ready`/`failed`. Shared by the picker's "Continue setup" line, the
- * ambient strip, and the submit soft-block so all three agree on "in flight".
- */
-export function isDownloadInFlight(phase: DownloadUiState['phase']): boolean {
-  return (
-    phase === 'downloading' ||
-    phase === 'downloading_mmproj' ||
-    phase === 'verifying' ||
-    phase === 'installing' ||
-    phase === 'warming_up'
-  );
-}
-
-/**
- * A short, jargon-free reason for a failed download, by kind, so the ambient
- * strip tells the user what actually went wrong instead of a generic message.
- */
-export function downloadFailureMessage(kind: DownloadUiFailKind): string {
-  switch (kind) {
-    case 'offline':
-      return 'You appear to be offline.';
-    case 'http':
-      return 'Hugging Face had an error. Try again.';
-    case 'checksum':
-      return 'The download did not verify. Retrying starts it fresh.';
-    case 'disk_full':
-      return 'Not enough disk space.';
-    case 'engine':
-      return "Thuki's engine could not start.";
-    case 'other':
-      return 'Model download failed.';
-  }
-}
-
-/** Last reported byte counts for the file currently downloading. */
-export interface DownloadProgressInfo {
-  file: string;
-  bytes: number;
-  totalBytes: number;
-}
-
-/** One ETA sample: a Progress event's byte count and arrival time. */
-interface EtaSample {
-  t: number;
-  bytes: number;
-}
-
-/** Rolling-rate window: only Progress samples this recent feed the ETA. */
-const ETA_WINDOW_MS = 10_000;
-
-/**
- * Bytes per second from the rolling sample window, or `null` while the rate
- * is not yet measurable (fewer than two samples, zero elapsed time, or no
- * forward progress between the window's edges).
- */
-export function computeSpeedBytesPerSec(samples: EtaSample[]): number | null {
-  if (samples.length < 2) return null;
-  const first = samples[0];
-  const last = samples[samples.length - 1];
-  const elapsedSeconds = (last.t - first.t) / 1000;
-  const deltaBytes = last.bytes - first.bytes;
-  if (elapsedSeconds <= 0 || deltaBytes <= 0) return null;
-  return deltaBytes / elapsedSeconds;
-}
-
-/**
- * Remaining seconds from the rolling sample window, or `null` while the
- * rate is not yet measurable (fewer than two samples, zero elapsed time,
- * or no forward progress between the window's edges).
- */
-export function computeEtaSeconds(
-  samples: EtaSample[],
-  bytes: number,
-  totalBytes: number,
-): number | null {
-  const bytesPerSecond = computeSpeedBytesPerSec(samples);
-  if (bytesPerSecond === null) return null;
-  return Math.max(0, Math.round((totalBytes - bytes) / bytesPerSecond));
-}
+// Re-export the shared download vocabulary so existing consumers keep importing
+// it from this hook; the definitions now live in `downloadReducer`.
+export {
+  computeEtaSeconds,
+  computeSpeedBytesPerSec,
+  downloadFailureMessage,
+  isDownloadInFlight,
+} from './downloadReducer';
+export type {
+  DownloadProgressInfo,
+  DownloadUiFailKind,
+  DownloadUiState,
+} from './downloadReducer';
 
 export interface UseDownloadModel {
   state: DownloadUiState;
@@ -161,15 +84,15 @@ export interface UseDownloadModel {
    */
   startById: (id: string) => Promise<void>;
   /**
-   * Invokes `cancel_model_download`. The state flips back to idle when the
-   * backend's Cancelled event lands; the partial is KEPT, so the caller
-   * refreshes options to surface resume_pending.
+   * Invokes `cancel_model_download` for the run this hook last started. The
+   * state flips back to idle when the backend's Cancelled event lands; the
+   * partial is KEPT, so the caller refreshes options to surface resume_pending.
    */
   cancel: () => Promise<void>;
   /**
    * failed -> downloading. A checksum failure already deleted the partial
    * on the backend, so retrying is just starting the same download (starter
-   * tier or pasted repo, whichever ran last) again.
+   * tier, staff pick, or pasted repo, whichever ran last) again.
    */
   retry: () => Promise<void>;
   /** resume_pending -> downloading; the backend resumes via Range. */
@@ -201,131 +124,37 @@ export function useDownloadModel(
 ): UseDownloadModel {
   const awaitEngine = options?.awaitEngine === true;
 
-  const [state, setState] = useState<DownloadUiState>({ phase: 'idle' });
-  const [progress, setProgress] = useState<DownloadProgressInfo | null>(null);
-  const [etaSeconds, setEtaSeconds] = useState<number | null>(null);
-  const [combinedBytes, setCombinedBytes] = useState<number | null>(null);
-  const [speedBytesPerSec, setSpeedBytesPerSec] = useState<number | null>(null);
-
-  const samplesRef = useRef<EtaSample[]>([]);
-  const startedCountRef = useRef(0);
-  /** Bytes from files that have already fully completed this run. */
-  const completedBytesRef = useRef(0);
-  /** Declared total of the file currently downloading. */
-  const currentFileTotalRef = useRef(0);
-  /** Replays the most recent start (tier or repo) for `retry`. */
+  const [acc, setAcc] = useState<DownloadAccumulator>(initialAccumulator);
+  /** Download key of the run in flight, so `cancel` targets the right slot. */
+  const currentKeyRef = useRef('');
+  /** Replays the most recent start (tier / repo / id) for `retry`. */
   const lastStartRef = useRef<(() => Promise<void>) | null>(null);
 
-  const handleEvent = useCallback(
-    (event: DownloadEvent) => {
-      switch (event.type) {
-        case 'Started': {
-          startedCountRef.current += 1;
-          samplesRef.current = [];
-          setEtaSeconds(null);
-          setSpeedBytesPerSec(null);
-          currentFileTotalRef.current = event.data.total_bytes;
-          setProgress({
-            file: event.data.file,
-            bytes: event.data.resumed_from,
-            totalBytes: event.data.total_bytes,
-          });
-          setCombinedBytes(completedBytesRef.current + event.data.resumed_from);
-          // The second Started is always the mmproj companion: specs are
-          // ordered weights first, mmproj second.
-          setState(
-            startedCountRef.current >= 2
-              ? { phase: 'downloading_mmproj' }
-              : { phase: 'downloading' },
-          );
-          break;
-        }
-        case 'Progress': {
-          const now = Date.now();
-          const samples = samplesRef.current;
-          samples.push({ t: now, bytes: event.data.bytes });
-          while (samples.length > 0 && now - samples[0].t > ETA_WINDOW_MS) {
-            samples.shift();
-          }
-          setProgress({
-            file: event.data.file,
-            bytes: event.data.bytes,
-            totalBytes: event.data.total_bytes,
-          });
-          setEtaSeconds(
-            computeEtaSeconds(
-              samples,
-              event.data.bytes,
-              event.data.total_bytes,
-            ),
-          );
-          setSpeedBytesPerSec(computeSpeedBytesPerSec(samples));
-          setCombinedBytes(completedBytesRef.current + event.data.bytes);
-          // A resume re-hash labels itself `verifying` before the remaining
-          // bytes stream; the first streamed Progress returns the label to the
-          // active downloading phase so the transfer is not mislabeled. Any
-          // other phase is left untouched (same reference → no re-render).
-          setState((prev) =>
-            prev.phase === 'verifying'
-              ? startedCountRef.current >= 2
-                ? { phase: 'downloading_mmproj' }
-                : { phase: 'downloading' }
-              : prev,
-          );
-          break;
-        }
-        case 'Verifying':
-          setState({ phase: 'verifying' });
-          break;
-        case 'FileDone':
-          // Fold this file's bytes into the completed total and snap the
-          // cumulative figure to the boundary so the bar never dips. The next
-          // Started (mmproj) or AllDone moves the state.
-          completedBytesRef.current += currentFileTotalRef.current;
-          currentFileTotalRef.current = 0;
-          setCombinedBytes(completedBytesRef.current);
-          break;
-        case 'AllDone':
-          setState(awaitEngine ? { phase: 'installing' } : { phase: 'ready' });
-          break;
-        case 'Cancelled':
-          setProgress(null);
-          setEtaSeconds(null);
-          setSpeedBytesPerSec(null);
-          setCombinedBytes(null);
-          completedBytesRef.current = 0;
-          currentFileTotalRef.current = 0;
-          setState({ phase: 'idle' });
-          break;
-        case 'Failed':
-          // Terminal from ANY state, including verifying (finalize failure:
-          // the manifest write failed, so AllDone never arrives).
-          setState({
-            phase: 'failed',
-            kind: event.data.kind,
-            message: event.data.message,
-          });
-          break;
-      }
-    },
-    [awaitEngine],
-  );
-
   useEffect(() => {
     if (!awaitEngine) return;
     const unlistenPromise = listen<EngineStatus>('engine:status', (event) => {
       const status = event.payload;
-      setState((prev) => {
-        if (prev.phase !== 'installing' && prev.phase !== 'warming_up') {
+      setAcc((prev) => {
+        if (
+          prev.state.phase !== 'installing' &&
+          prev.state.phase !== 'warming_up'
+        ) {
           return prev;
         }
-        if (status.state === 'starting') return { phase: 'warming_up' };
-        if (status.state === 'loaded') return { phase: 'ready' };
+        if (status.state === 'starting') {
+          return { ...prev, state: { phase: 'warming_up' } };
+        }
+        if (status.state === 'loaded') {
+          return { ...prev, state: { phase: 'ready' } };
+        }
         if (status.state === 'failed') {
           return {
-            phase: 'failed',
-            kind: 'engine',
-            message: status.error ?? 'the engine could not start',
+            ...prev,
+            state: {
+              phase: 'failed',
+              kind: 'engine',
+              message: status.error ?? 'the engine could not start',
+            },
           };
         }
         return prev;
@@ -337,40 +166,38 @@ export function useDownloadModel(
   }, [awaitEngine]);
 
   const beginConfirm = useCallback((tier: StarterTier) => {
-    setState({ phase: 'confirming', tier });
+    setAcc((prev) => ({ ...prev, state: { phase: 'confirming', tier } }));
   }, []);
 
   const cancelConfirm = useCallback(() => {
-    setState({ phase: 'idle' });
+    setAcc(initialAccumulator());
   }, []);
 
-  /** Shared start path: resets per-run trackers, wires the event channel,
-   * and invokes the given download command. */
+  /** Shared start path: resets the accumulator, wires the event channel, and
+   * invokes the given download command with its download key. */
   const run = useCallback(
-    async (command: string, args: Record<string, unknown>) => {
-      startedCountRef.current = 0;
-      samplesRef.current = [];
-      completedBytesRef.current = 0;
-      currentFileTotalRef.current = 0;
-      setProgress(null);
-      setEtaSeconds(null);
-      setSpeedBytesPerSec(null);
-      setCombinedBytes(null);
-      setState({ phase: 'downloading' });
+    async (command: string, args: Record<string, unknown>, key: string) => {
+      currentKeyRef.current = key;
+      setAcc(startingAccumulator());
       const channel = new Channel<DownloadEvent>();
-      channel.onmessage = handleEvent;
+      channel.onmessage = (event) =>
+        setAcc((prev) => reduceDownloadEvent(prev, event, awaitEngine));
       try {
-        await invoke(command, { ...args, onEvent: channel });
+        await invoke(command, { ...args, key, onEvent: channel });
       } catch (err) {
-        setState({ phase: 'failed', kind: 'other', message: String(err) });
+        setAcc((prev) => ({
+          ...prev,
+          state: { phase: 'failed', kind: 'other', message: String(err) },
+        }));
       }
     },
-    [handleEvent],
+    [awaitEngine],
   );
 
   const start = useCallback(
     async (tier: StarterTier) => {
-      const replay = () => run('download_starter', { tier });
+      const replay = () =>
+        run('download_starter', { tier }, downloadKey({ kind: 'tier', tier }));
       lastStartRef.current = replay;
       await replay();
     },
@@ -379,7 +206,12 @@ export function useDownloadModel(
 
   const startRepo = useCallback(
     async (repo: string, file: string) => {
-      const replay = () => run('download_repo_model', { repo, file });
+      const replay = () =>
+        run(
+          'download_repo_model',
+          { repo, file },
+          downloadKey({ kind: 'repo', repo, file }),
+        );
       lastStartRef.current = replay;
       await replay();
     },
@@ -388,7 +220,8 @@ export function useDownloadModel(
 
   const startById = useCallback(
     async (id: string) => {
-      const replay = () => run('download_staff_pick', { id });
+      const replay = () =>
+        run('download_staff_pick', { id }, downloadKey({ kind: 'staff', id }));
       lastStartRef.current = replay;
       await replay();
     },
@@ -396,7 +229,7 @@ export function useDownloadModel(
   );
 
   const cancel = useCallback(async () => {
-    await invoke('cancel_model_download');
+    await invoke('cancel_model_download', { key: currentKeyRef.current });
   }, []);
 
   const retry = useCallback(async () => {
@@ -409,38 +242,43 @@ export function useDownloadModel(
     try {
       await invoke('discard_partial_download', { sha256 });
     } catch (err) {
-      setState({ phase: 'failed', kind: 'other', message: String(err) });
+      setAcc((prev) => ({
+        ...prev,
+        state: { phase: 'failed', kind: 'other', message: String(err) },
+      }));
       return;
     }
-    setState({ phase: 'idle' });
+    setAcc((prev) => ({ ...prev, state: { phase: 'idle' } }));
   }, []);
 
   const enterResumePending = useCallback(() => {
-    setState({ phase: 'resume_pending' });
+    setAcc((prev) => ({ ...prev, state: { phase: 'resume_pending' } }));
   }, []);
 
   const reset = useCallback(() => {
-    setState((prev) =>
-      prev.phase === 'failed' || prev.phase === 'ready'
-        ? { phase: 'idle' }
-        : prev,
+    setAcc((prev) =>
+      prev.state.phase === 'failed' || prev.state.phase === 'ready'
+        ? initialAccumulator()
+        : {
+            // Stale byte counts from the run that just ended; the next start
+            // reseeds them. Callers only invoke reset from the terminal cards.
+            ...prev,
+            progress: null,
+            etaSeconds: null,
+            speedBytesPerSec: null,
+            combinedBytes: null,
+            completedBytes: 0,
+            currentFileTotal: 0,
+          },
     );
-    // Stale byte counts from the run that just ended; the next start
-    // reseeds them. Callers only invoke reset from the terminal cards.
-    setProgress(null);
-    setEtaSeconds(null);
-    setSpeedBytesPerSec(null);
-    setCombinedBytes(null);
-    completedBytesRef.current = 0;
-    currentFileTotalRef.current = 0;
   }, []);
 
   return {
-    state,
-    progress,
-    etaSeconds,
-    combinedBytes,
-    speedBytesPerSec,
+    state: acc.state,
+    progress: acc.progress,
+    etaSeconds: acc.etaSeconds,
+    combinedBytes: acc.combinedBytes,
+    speedBytesPerSec: acc.speedBytesPerSec,
     beginConfirm,
     cancelConfirm,
     start,
diff --git a/src/settings/SettingsWindow.tsx b/src/settings/SettingsWindow.tsx
index 342383e1..afe0f58c 100644
--- a/src/settings/SettingsWindow.tsx
+++ b/src/settings/SettingsWindow.tsx
@@ -23,7 +23,7 @@ import {
 import { invoke } from '@tauri-apps/api/core';
 import { getCurrentWindow } from '@tauri-apps/api/window';
 
-import { DownloadProvider } from '../contexts/DownloadContext';
+import { DownloadsProvider } from '../contexts/DownloadsContext';
 import { useConfigSync } from './hooks/useConfigSync';
 import { useSettingsAutoResize } from './hooks/useSettingsAutoResize';
 import { ModelTab } from './tabs/ModelTab';
@@ -305,14 +305,14 @@ export function SettingsWindow() {
 
   if (!config) return null;
 
-  // The Settings window is its own webview root (see `main.tsx`), so it needs
-  // its own DownloadProvider: the Discover panes read the download machine from
-  // it, and hosting it here (above the section nav and the Models segmented
-  // control) keeps a Discover download alive across every in-window tab switch.
-  // It is independent of the main overlay's provider; the backend single-slot
-  // download is the real cross-window coordinator.
+  // The Settings window is its own webview root (see `main.tsx`), so it hosts
+  // its own download registry: the Discover panes read their downloads from it,
+  // and hosting it here (above the section nav and the Models segmented control)
+  // keeps every in-flight download alive across each in-window tab switch. It is
+  // independent of the main overlay's onboarding provider; the backend's keyed
+  // slots are the real cross-window coordinator.
   return (
-    <DownloadProvider>
+    <DownloadsProvider>
       <div className={styles.window} onMouseDown={handleDragStart}>
         <WindowControls onClose={handleHide} />
 
@@ -450,7 +450,7 @@ export function SettingsWindow() {
 
         <SavedPill visible={savedVisible} />
       </div>
-    </DownloadProvider>
+    </DownloadsProvider>
   );
 }
 
diff --git a/src/settings/tabs/models/BrowseAllPane.test.tsx b/src/settings/tabs/models/BrowseAllPane.test.tsx
index 58ac0747..418454a4 100644
--- a/src/settings/tabs/models/BrowseAllPane.test.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.test.tsx
@@ -24,7 +24,7 @@ import { beforeEach, afterEach, describe, expect, it, vi } from 'vitest';
 import { invoke } from '@tauri-apps/api/core';
 
 import { BrowseAllPane } from './BrowseAllPane';
-import { DownloadProvider } from '../../../contexts/DownloadContext';
+import { DownloadsProvider } from '../../../contexts/DownloadsContext';
 import {
   HF_SEARCH_DEBOUNCE_MS,
   HF_PAGE_SIZE,
@@ -141,7 +141,7 @@ async function renderPane(
 ) {
   mockCommands(discoverResponses(overrides));
   const view = render(<BrowseAllPane onSaved={onSaved} />, {
-    wrapper: DownloadProvider,
+    wrapper: DownloadsProvider,
   });
   await waitFor(() =>
     expect(invokeMock).toHaveBeenCalledWith('search_hf_models', {
@@ -193,7 +193,9 @@ describe('BrowseAllPane', () => {
   it('typing in the search drives a debounced fetch and re-renders results', async () => {
     vi.useFakeTimers();
     mockCommands(discoverResponses());
-    render(<BrowseAllPane onSaved={() => {}} />, { wrapper: DownloadProvider });
+    render(<BrowseAllPane onSaved={() => {}} />, {
+      wrapper: DownloadsProvider,
+    });
     await act(async () => {
       vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
       await Promise.resolve();
@@ -222,7 +224,9 @@ describe('BrowseAllPane', () => {
   it('clicking a family chip sets the query to that family', async () => {
     vi.useFakeTimers();
     mockCommands(discoverResponses());
-    render(<BrowseAllPane onSaved={() => {}} />, { wrapper: DownloadProvider });
+    render(<BrowseAllPane onSaved={() => {}} />, {
+      wrapper: DownloadsProvider,
+    });
     await act(async () => {
       vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
       await Promise.resolve();
@@ -246,7 +250,9 @@ describe('BrowseAllPane', () => {
   it('the All chip clears the query and is active by default', async () => {
     vi.useFakeTimers();
     mockCommands(discoverResponses());
-    render(<BrowseAllPane onSaved={() => {}} />, { wrapper: DownloadProvider });
+    render(<BrowseAllPane onSaved={() => {}} />, {
+      wrapper: DownloadsProvider,
+    });
     await act(async () => {
       vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
       await Promise.resolve();
@@ -390,7 +396,7 @@ describe('BrowseAllPane', () => {
     expect(screen.getByText(/repo unavailable/)).toBeInTheDocument();
   });
 
-  it('downloads a chosen quant, progresses, and on ready lifts config and collapses', async () => {
+  it('downloads a chosen quant, progresses, and on ready lifts config', async () => {
     const onSaved = vi.fn();
     await renderPane(onSaved);
     const row = screen
@@ -426,9 +432,14 @@ describe('BrowseAllPane', () => {
     });
     await flush();
     expect(onSaved).toHaveBeenCalledWith(CONFIG_AFTER_INSTALL);
+    // The progress card clears once the entry is dropped; the row stays expanded
+    // (parallel: a sibling quant could still be downloading) with its quants
+    // listed, not collapsed.
     await waitFor(() =>
-      expect(screen.queryByText('gemma-q4.gguf')).not.toBeInTheDocument(),
+      expect(screen.queryByTestId('download-figures')).not.toBeInTheDocument(),
     );
+    expect(screen.getByText('gemma-q4.gguf')).toBeInTheDocument();
+    expect(screen.getByText('gemma-q8.gguf')).toBeInTheDocument();
   });
 
   it('leaves the lift to a later resync when get_config fails post-download', async () => {
@@ -462,7 +473,9 @@ describe('BrowseAllPane', () => {
     expect(screen.getByTestId('download-figures')).toBeInTheDocument();
     fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
     await flush();
-    expect(invokeMock).toHaveBeenCalledWith('cancel_model_download');
+    expect(invokeMock).toHaveBeenCalledWith('cancel_model_download', {
+      key: 'repo:google/gemma-4-12b-it-GGUF\ngemma-q4.gguf',
+    });
   });
 
   async function expandRepo(): Promise<HTMLElement> {
@@ -561,7 +574,7 @@ describe('BrowseAllPane', () => {
     expect(screen.getByText(/^Paused · \d+%$/)).toBeInTheDocument();
   });
 
-  it('keeps the other quant rows visible and downloadable while one downloads', async () => {
+  it('downloads a second quant in parallel with the first', async () => {
     await renderPane();
     const row = screen
       .getByText('google/gemma-4-12b-it-GGUF')
@@ -571,14 +584,20 @@ describe('BrowseAllPane', () => {
     // Start the first quant's download.
     fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[0]);
     await flush();
-    // The active row shows progress...
+    // The active quant shows progress...
     expect(screen.getByTestId('download-figures')).toBeInTheDocument();
-    // ...and the OTHER quant file is still listed, not hidden.
+    // ...and the OTHER quant stays listed and downloadable: clicking it starts a
+    // second concurrent download (each quant is keyed separately on the backend).
     expect(screen.getByText('gemma-q8.gguf')).toBeInTheDocument();
-    // Its Download button stays (disabled, since one download runs at a time).
     const others = screen.getAllByRole('button', { name: 'Download' });
     expect(others).toHaveLength(1);
-    expect(others[0]).toBeDisabled();
+    expect(others[0]).toBeEnabled();
+    fireEvent.click(others[0]);
+    await flush();
+    const repoStarts = invokeMock.mock.calls.filter(
+      (c: unknown[]) => c[0] === 'download_repo_model',
+    );
+    expect(repoStarts).toHaveLength(2);
   });
 
   it('retries after a failure and offers a path back to the quant list', async () => {
@@ -621,7 +640,9 @@ describe('BrowseAllPane', () => {
       resolveSearch = res;
     });
     mockCommands(discoverResponses({ search_hf_models: pending }));
-    render(<BrowseAllPane onSaved={() => {}} />, { wrapper: DownloadProvider });
+    render(<BrowseAllPane onSaved={() => {}} />, {
+      wrapper: DownloadsProvider,
+    });
     await flush();
     expect(screen.getByText('Searching…')).toBeInTheDocument();
     await act(async () => {
@@ -648,7 +669,9 @@ describe('BrowseAllPane', () => {
         gated: false,
       }));
     mockCommands(discoverResponses({ search_hf_models: full(HF_PAGE_SIZE) }));
-    render(<BrowseAllPane onSaved={() => {}} />, { wrapper: DownloadProvider });
+    render(<BrowseAllPane onSaved={() => {}} />, {
+      wrapper: DownloadsProvider,
+    });
     await act(async () => {
       vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
       await Promise.resolve();
diff --git a/src/settings/tabs/models/BrowseAllPane.tsx b/src/settings/tabs/models/BrowseAllPane.tsx
index b3281b04..33517586 100644
--- a/src/settings/tabs/models/BrowseAllPane.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.tsx
@@ -6,19 +6,23 @@
  * A search field (driven by {@link useHfSearch}) plus a row of family filter
  * chips feed one debounced backend query that returns chat/text-generation
  * GGUF repos. Each lean row shows the repo id, an org + downloads sub-line, a
- * link out to the repo on Hugging Face, and an icon-only download button. That
- * button expands a quant accordion listing the repo's `.gguf` files
- * (`list_hf_repo_ggufs`, each with an accurate per-quant RAM-fit, the only
- * place fit is shown) and downloads the chosen one through the shared
- * {@link useDownloadModel} kit. A "Load more" control pages past the first
- * batch. A finished install lifts a fresh config snapshot and collapses the row.
+ * link out to the repo on Hugging Face, and a disclosure chevron. Expanding a
+ * row lists the repo's `.gguf` files (`list_hf_repo_ggufs`, each with an
+ * accurate per-quant RAM-fit, the only place fit is shown); each quant downloads
+ * through the Settings {@link useDownloads} registry, so multiple quants (and
+ * multiple repos) can download in parallel. A "Load more" control pages past
+ * the first batch; a finished install lifts a fresh config snapshot.
  */
 
 import { useCallback, useEffect, useRef, useState } from 'react';
 import { invoke } from '@tauri-apps/api/core';
 
 import { DownloadProgress } from '../../../components/DownloadProgress';
-import { useDownloadCtx } from '../../../contexts/DownloadContext';
+import {
+  useDownloads,
+  type DownloadsContextValue,
+} from '../../../contexts/DownloadsContext';
+import { downloadKey } from '../../../hooks/downloadKey';
 import { useHfSearch } from './useHfSearch';
 import { Tooltip } from '../../../components/Tooltip';
 import { formatContextWindow } from '../../../utils/contextWindow';
@@ -155,40 +159,21 @@ interface BrowseAllRowProps {
 }
 
 /**
- * One repo row plus its lazy quant accordion. The GGUF file list is fetched
- * the first time the row expands; the download state machine is local to the
- * row so two rows cannot share an in-flight download.
+ * One repo row plus its lazy quant accordion. The GGUF file list is fetched the
+ * first time the row expands; each quant downloads independently through the
+ * registry, so several quants of one repo can run at once.
  */
 function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
-  // The download machine lives at the app root (DownloadProvider), shared with
-  // every other row and pane, so a tab switch that unmounts Browse all never
-  // drops an in-flight download: the single-slot backend download outlives the
-  // pane. `activeDownload` names the repo + file that owns it.
-  const {
-    state,
-    progress,
-    etaSeconds,
-    startRepoDownload,
-    cancel,
-    retry,
-    reset,
-    activeDownload,
-    clearActiveDownload,
-  } = useDownloadCtx();
-
-  // The file this repo's row is currently downloading, or null when another row
-  // (or no download) owns the single in-flight slot. Drives which quant swaps to
-  // the live progress card and, on a remount after a tab switch, the re-expand.
-  const activeRepoFile =
-    activeDownload?.kind === 'repo' && activeDownload.repo === model.id
-      ? activeDownload.file
-      : null;
-  const ownsActiveDownload = activeRepoFile !== null;
-
-  const [expanded, setExpanded] = useState(ownsActiveDownload);
+  const downloads = useDownloads();
   const [files, setFiles] = useState<HfGgufFile[] | null>(null);
   const [listError, setListError] = useState<string | null>(null);
 
+  // Re-expand a repo that still has a live download after a tab switch remounts
+  // this row collapsed. The registry survives the unmount; this row's
+  // expand/files state does not, so it rebuilds from the registry on mount.
+  const hasLiveDownload = downloads.hasRepoDownload(model.id);
+  const [expanded, setExpanded] = useState(hasLiveDownload);
+
   const org = orgOf(model.id);
 
   const loadFiles = useCallback(async () => {
@@ -204,12 +189,11 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
     }
   }, [model.id]);
 
-  // A remount that lands on the row owning the in-flight download (re-expanded
-  // above) loads its quant list once so the live progress shows again instead
-  // of staying behind a collapsed row. Fires only on mount.
-  const restoreActiveRow = useRef(ownsActiveDownload);
+  // On a remount that auto-expanded the row (a download is still live), fetch
+  // the quant list once so the live progress shows again. Fires only on mount.
+  const restoreOnMountRef = useRef(hasLiveDownload);
   useEffect(() => {
-    if (restoreActiveRow.current) void loadFiles();
+    if (restoreOnMountRef.current) void loadFiles();
   }, [loadFiles]);
 
   function toggle() {
@@ -225,23 +209,6 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
     void invoke('open_url', { url: `${HF_BASE_URL}/${model.id}` });
   }
 
-  // A finished install: the backend already wrote the builtin provider's model
-  // field, so lift the fresh config snapshot and collapse the row. The machine
-  // is shared across rows, so only the row that owns the download reacts.
-  useEffect(() => {
-    if (state.phase !== 'ready' || !ownsActiveDownload) return;
-    void (async () => {
-      try {
-        onSaved(await invoke<RawAppConfig>('get_config'));
-      } catch {
-        // The focus-driven resync picks the change up on next activation.
-      }
-      reset();
-      clearActiveDownload();
-      setExpanded(false);
-    })();
-  }, [state.phase, ownsActiveDownload, onSaved, reset, clearActiveDownload]);
-
   // Silent re-read of the listing (no loading flash): the rows carry fresh
   // `partial_bytes`, so a file flips to/from its Paused state in place.
   const refetchFiles = useCallback(async () => {
@@ -255,29 +222,6 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
     }
   }, [model.id]);
 
-  // Cancelling leaves the partial on disk; forget the active row and re-read the
-  // listing so the file flips straight to its Paused / Resume / Discard controls.
-  async function cancelDownload() {
-    await cancel();
-    clearActiveDownload();
-    await refetchFiles();
-  }
-
-  // A terminal card's exit (Choose a different model, and the unused confirm
-  // fallbacks): return to the quant list and forget the active row.
-  function returnToList() {
-    reset();
-    clearActiveDownload();
-  }
-
-  async function discardFile(sha256: string) {
-    await invoke('discard_partial_download', { sha256 });
-    await refetchFiles();
-  }
-
-  // True while ANY download runs (the engine handles one at a time): every other
-  // row's controls disable; the owning file swaps to the live progress card.
-  const anyInFlight = state.phase !== 'idle';
   // The context window is a per-repo property (the search carries it via
   // expand[]=gguf), so it shows on the collapsed row without expanding. Empty
   // when unknown, which skips it.
@@ -327,106 +271,145 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
             <p className={styles.note}>No GGUF files in this repo.</p>
           ) : null}
           {files !== null && files.length > 0
-            ? files.map((f) => {
-                // Only the file that owns the in-flight download swaps its
-                // controls for the inline progress. A file with an interrupted
-                // partial reads as Paused with Resume / Discard; everything else
-                // is a normal, browsable quant. Resume and Discard are disabled
-                // while any download runs, since the engine handles one at a time.
-                const downloading = anyInFlight && activeRepoFile === f.file;
-                const paused = !downloading && f.partial_bytes !== null;
-                const pausedPct =
-                  f.partial_bytes !== null
-                    ? Math.min(
-                        100,
-                        Math.floor((f.partial_bytes / f.size_bytes) * 100),
-                      )
-                    : 0;
-                return (
-                  <div className={styles.quantRow} key={f.file}>
-                    <span className={styles.quantName}>{f.file}</span>
-                    {downloading ? (
-                      <DownloadProgress
-                        state={state}
-                        progress={progress}
-                        etaSeconds={etaSeconds}
-                        // The repo download flow has no pre-flight confirm step
-                        // (only the starter picker does), so the confirm card
-                        // never renders; these required props point at the same
-                        // covered handlers rather than dead no-op literals.
-                        onConfirm={returnToList}
-                        onCancelConfirm={returnToList}
-                        onCancel={() => void cancelDownload()}
-                        onRetry={() => void retry()}
-                        // A terminal failure must leave a path back to the quant
-                        // list, not just Retry; this returns to the file rows.
-                        onChooseAnother={returnToList}
-                      />
-                    ) : (
-                      <>
-                        {f.fit ? (
-                          <Tooltip
-                            label={RAM_FIT_TOOLTIP[f.fit]}
-                            placement="top"
-                          >
-                            <span
-                              className={`${styles.fit} ${FIT_CLASS[f.fit]}`}
-                            >
-                              {RAM_FIT_LABEL[f.fit]}
-                            </span>
-                          </Tooltip>
-                        ) : null}
-                        {paused ? (
-                          <>
-                            <span className={styles.quantPaused}>
-                              Paused · {pausedPct}%
-                            </span>
-                            <button
-                              type="button"
-                              className={styles.quantResume}
-                              disabled={anyInFlight}
-                              onClick={() =>
-                                startRepoDownload(model.id, f.file)
-                              }
-                            >
-                              Resume
-                            </button>
-                            <button
-                              type="button"
-                              className={styles.quantDiscard}
-                              aria-label="Discard"
-                              disabled={anyInFlight}
-                              onClick={() => void discardFile(f.sha256)}
-                            >
-                              Discard
-                            </button>
-                          </>
-                        ) : (
-                          <>
-                            <span className={styles.quantSize}>
-                              {gb(f.size_bytes)} GB
-                            </span>
-                            <button
-                              type="button"
-                              className={styles.quantGet}
-                              aria-label="Download"
-                              disabled={anyInFlight}
-                              onClick={() =>
-                                startRepoDownload(model.id, f.file)
-                              }
-                            >
-                              {DOWNLOAD_ICON}
-                            </button>
-                          </>
-                        )}
-                      </>
-                    )}
-                  </div>
-                );
-              })
+            ? files.map((f) => (
+                <QuantRow
+                  key={f.file}
+                  file={f}
+                  repo={model.id}
+                  downloads={downloads}
+                  onSaved={onSaved}
+                  refetch={refetchFiles}
+                />
+              ))
             : null}
         </div>
       ) : null}
     </div>
   );
 }
+
+interface QuantRowProps {
+  file: HfGgufFile;
+  repo: string;
+  downloads: DownloadsContextValue;
+  onSaved: (next: RawAppConfig) => void;
+  refetch: () => Promise<void>;
+}
+
+/**
+ * One quant file row. Owns its own download by key, so each quant in a repo can
+ * download in parallel with the others: a downloading quant shows the inline
+ * progress card while its siblings stay browsable and downloadable.
+ */
+function QuantRow({ file, repo, downloads, onSaved, refetch }: QuantRowProps) {
+  const key = downloadKey({ kind: 'repo', repo, file: file.file });
+  const entry = downloads.get(key);
+  const { clear } = downloads;
+  const downloading = entry !== undefined;
+  const phase = entry?.state.phase;
+
+  // A finished install: the backend recorded the model, so lift the fresh config
+  // and re-read the listing (the quant flips to its installed state) and drop
+  // the entry. Per quant, so parallel installs settle independently.
+  useEffect(() => {
+    if (phase !== 'ready') return;
+    void (async () => {
+      try {
+        onSaved(await invoke<RawAppConfig>('get_config'));
+      } catch {
+        // The focus-driven resync picks the change up on next activation.
+      }
+      clear(key);
+      await refetch();
+    })();
+  }, [phase, key, clear, onSaved, refetch]);
+
+  // Cancelling keeps the partial on disk; re-read the listing so the file flips
+  // to its Paused / Resume / Discard controls once the Cancelled event prunes.
+  async function cancelDownload() {
+    downloads.cancel(key);
+    await refetch();
+  }
+
+  async function discardFile() {
+    await downloads.discard(file.sha256);
+    await refetch();
+  }
+
+  // Dismiss this quant's terminal card back to the file rows. Also wired to the
+  // confirm-card callbacks, which never fire here (the repo path has no
+  // pre-flight confirm step), so all three share one covered handler.
+  const dismiss = () => clear(key);
+
+  const paused = !downloading && file.partial_bytes !== null;
+  const pausedPct =
+    file.partial_bytes !== null
+      ? Math.min(100, Math.floor((file.partial_bytes / file.size_bytes) * 100))
+      : 0;
+
+  return (
+    <div className={styles.quantRow}>
+      <span className={styles.quantName}>{file.file}</span>
+      {downloading && entry ? (
+        <DownloadProgress
+          state={entry.state}
+          progress={entry.progress}
+          etaSeconds={entry.etaSeconds}
+          // The repo download flow has no pre-flight confirm step (only the
+          // starter picker does), so the confirm card never renders; these
+          // share the same covered dismiss handler rather than dead no-op
+          // literals.
+          onConfirm={dismiss}
+          onCancelConfirm={dismiss}
+          onCancel={() => void cancelDownload()}
+          onRetry={() => downloads.retry(key)}
+          // A terminal failure must leave a path back to the quant list, not
+          // just Retry; this returns to the file rows.
+          onChooseAnother={dismiss}
+        />
+      ) : (
+        <>
+          {file.fit ? (
+            <Tooltip label={RAM_FIT_TOOLTIP[file.fit]} placement="top">
+              <span className={`${styles.fit} ${FIT_CLASS[file.fit]}`}>
+                {RAM_FIT_LABEL[file.fit]}
+              </span>
+            </Tooltip>
+          ) : null}
+          {paused ? (
+            <>
+              <span className={styles.quantPaused}>Paused · {pausedPct}%</span>
+              <button
+                type="button"
+                className={styles.quantResume}
+                onClick={() => downloads.startRepoDownload(repo, file.file)}
+              >
+                Resume
+              </button>
+              <button
+                type="button"
+                className={styles.quantDiscard}
+                aria-label="Discard"
+                onClick={() => void discardFile()}
+              >
+                Discard
+              </button>
+            </>
+          ) : (
+            <>
+              <span className={styles.quantSize}>{gb(file.size_bytes)} GB</span>
+              <button
+                type="button"
+                className={styles.quantGet}
+                aria-label="Download"
+                onClick={() => downloads.startRepoDownload(repo, file.file)}
+              >
+                {DOWNLOAD_ICON}
+              </button>
+            </>
+          )}
+        </>
+      )}
+    </div>
+  );
+}
diff --git a/src/settings/tabs/models/DiscoverPane.test.tsx b/src/settings/tabs/models/DiscoverPane.test.tsx
index 74b878e0..3363ec93 100644
--- a/src/settings/tabs/models/DiscoverPane.test.tsx
+++ b/src/settings/tabs/models/DiscoverPane.test.tsx
@@ -18,7 +18,7 @@ import { invoke } from '@tauri-apps/api/core';
 
 import { DiscoverPane } from './DiscoverPane';
 import { clearHfSearchCache } from './useHfSearch';
-import { DownloadProvider } from '../../../contexts/DownloadContext';
+import { DownloadsProvider } from '../../../contexts/DownloadsContext';
 import type { Starter, StarterOption } from '../../../types/starter';
 
 const invokeMock = invoke as unknown as ReturnType<typeof vi.fn>;
@@ -63,7 +63,7 @@ beforeEach(() => {
 
 function renderHost() {
   return render(<DiscoverPane onSaved={() => {}} />, {
-    wrapper: DownloadProvider,
+    wrapper: DownloadsProvider,
   });
 }
 
@@ -183,7 +183,7 @@ describe('DiscoverPane download persistence', () => {
   // machine that died on unmount while the single-slot backend download kept
   // running). The shared app-root machine must keep the progress alive.
   it('keeps a live Staff-picks download visible across a Browse-all round trip', async () => {
-    render(<DiscoverPane onSaved={() => {}} />, { wrapper: DownloadProvider });
+    render(<DiscoverPane onSaved={() => {}} />, { wrapper: DownloadsProvider });
     await waitFor(() => expect(staffPicksVisible()).toBe(true));
 
     fireEvent.click(screen.getByRole('button', { name: 'Download' }));
@@ -227,7 +227,7 @@ describe('DiscoverPane download persistence', () => {
   // also survive a Staff-picks round trip, re-binding to the owning row (which
   // re-expands) instead of resetting to a collapsed, idle row.
   it('keeps a live Browse-all download visible across a Staff-picks round trip', async () => {
-    render(<DiscoverPane onSaved={() => {}} />, { wrapper: DownloadProvider });
+    render(<DiscoverPane onSaved={() => {}} />, { wrapper: DownloadsProvider });
     await waitFor(() => expect(staffPicksVisible()).toBe(true));
 
     fireEvent.click(screen.getByRole('tab', { name: 'Browse all' }));
diff --git a/src/settings/tabs/models/StaffPicksPane.test.tsx b/src/settings/tabs/models/StaffPicksPane.test.tsx
index 3caaf7e7..eee98793 100644
--- a/src/settings/tabs/models/StaffPicksPane.test.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.test.tsx
@@ -24,7 +24,7 @@ import { beforeEach, describe, expect, it, vi } from 'vitest';
 import { invoke } from '@tauri-apps/api/core';
 
 import { StaffPicksPane } from './StaffPicksPane';
-import { DownloadProvider } from '../../../contexts/DownloadContext';
+import { DownloadsProvider } from '../../../contexts/DownloadsContext';
 import type { RawAppConfig } from '../../types';
 import type { Starter, StarterOption } from '../../../types/starter';
 
@@ -162,7 +162,7 @@ async function renderPane(
 ) {
   mockCommands(picksResponses(overrides));
   const view = render(<StaffPicksPane onSaved={onSaved} />, {
-    wrapper: DownloadProvider,
+    wrapper: DownloadsProvider,
   });
   await waitFor(() =>
     expect(invokeMock).toHaveBeenCalledWith('get_staff_picks'),
@@ -280,18 +280,24 @@ describe('StaffPicksPane', () => {
     );
   });
 
-  it('disables other rows while one model is downloading', async () => {
+  it('downloads a second model in parallel with the first', async () => {
     await renderPane();
     fireEvent.click(
       within(rowFor('Gemma 4 12B')).getByRole('button', { name: 'Download' }),
     );
     await flush();
-    // The active row shows progress; the other rows' Download buttons disable so
-    // a second click cannot collide with the single backend download slot and
-    // surface "a download is already in progress".
-    expect(
-      within(rowFor('Qwen3.5 9B')).getByRole('button', { name: 'Download' }),
-    ).toBeDisabled();
+    // The first row shows progress; another row stays downloadable, and clicking
+    // it starts a second concurrent download (the backend keys each separately).
+    const qwenDownload = within(rowFor('Qwen3.5 9B')).getByRole('button', {
+      name: 'Download',
+    });
+    expect(qwenDownload).toBeEnabled();
+    fireEvent.click(qwenDownload);
+    await flush();
+    const starts = invokeMock.mock.calls.filter(
+      (c: unknown[]) => c[0] === 'download_staff_pick',
+    );
+    expect(starts).toHaveLength(2);
   });
 
   it('lifts a fresh config and refreshes when a download completes', async () => {
@@ -330,7 +336,9 @@ describe('StaffPicksPane', () => {
     await flush();
     fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
     await flush();
-    expect(invokeMock).toHaveBeenCalledWith('cancel_model_download');
+    expect(invokeMock).toHaveBeenCalledWith('cancel_model_download', {
+      key: 'staff:gemma-4-12b',
+    });
   });
 
   it('shows the paused row immediately after cancel, without a tab switch', async () => {
diff --git a/src/settings/tabs/models/StaffPicksPane.tsx b/src/settings/tabs/models/StaffPicksPane.tsx
index c83977a6..bd426ac6 100644
--- a/src/settings/tabs/models/StaffPicksPane.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.tsx
@@ -13,18 +13,21 @@
  * snapshot.
  *
  * Data comes from {@link useStaffPicks}, the id-keyed catalog (decoupled from
- * onboarding's three tier heroes so a category can hold any number of models);
- * the download state machine is the shared {@link useDownloadModel}, so the
- * in-flight / failed UI is the same {@link DownloadProgress} card the rest of
- * the app shows. At most one model downloads at a time (the backend enforces it
- * too); `activeId` tracks which row owns the progress card.
+ * onboarding's three tier heroes so a category can hold any number of models).
+ * Downloads run through the Settings {@link useDownloads} registry, so several
+ * models can download in parallel: each row binds to its own download by
+ * {@link downloadKey} and shows the shared {@link DownloadProgress} card.
  */
 
 import { useEffect, useMemo } from 'react';
 import { invoke } from '@tauri-apps/api/core';
 
 import { DownloadProgress } from '../../../components/DownloadProgress';
-import { useDownloadCtx } from '../../../contexts/DownloadContext';
+import {
+  useDownloads,
+  type DownloadsContextValue,
+} from '../../../contexts/DownloadsContext';
+import { downloadKey } from '../../../hooks/downloadKey';
 import { useStaffPicks } from '../../../components/StarterPicker';
 import { Tooltip } from '../../../components/Tooltip';
 import { formatContextWindow } from '../../../utils/contextWindow';
@@ -99,69 +102,7 @@ interface StaffPicksPaneProps {
 export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
   const { options, refresh } = useStaffPicks();
   const sections = useMemo(() => groupByCategory(options ?? []), [options]);
-
-  // The download machine lives at the app root (DownloadProvider) so a Staff
-  // picks / Browse all tab switch, which unmounts this pane, never drops an
-  // in-flight download: the single-slot backend download outlives the pane, so
-  // the frontend view of it must too. `activeDownload` names the row that owns
-  // the shared progress card; it survives the remount and re-binds the row to
-  // the live progress instead of re-reading the partial as a stale "Paused".
-  const {
-    state,
-    progress,
-    etaSeconds,
-    combinedBytes,
-    speedBytesPerSec,
-    startStaffPick,
-    cancel,
-    retry,
-    reset,
-    discard,
-    activeDownload,
-    clearActiveDownload,
-  } = useDownloadCtx();
-
-  // A finished install (phase 'ready') lifts the fresh config, clears the
-  // active row, and refreshes the rows so the new model flips to Installed.
-  // An effect (not a render-time call) so it fires exactly once per transition.
-  useEffect(() => {
-    if (state.phase !== 'ready') return;
-    void (async () => {
-      try {
-        onSaved(await invoke<RawAppConfig>('get_config'));
-      } catch {
-        // The focus-driven resync picks the change up on next activation.
-      }
-      reset();
-      clearActiveDownload();
-      await refresh();
-    })();
-  }, [state.phase, onSaved, reset, clearActiveDownload, refresh]);
-
-  async function discardPartial(sha256: string) {
-    await discard(sha256);
-    await refresh();
-  }
-
-  // Cancelling leaves the partial on disk; forget the active row and re-read the
-  // options so the row flips straight to its Paused/Resume state instead of
-  // snapping back to a fresh download until the next remount.
-  async function cancelDownload() {
-    await cancel();
-    clearActiveDownload();
-    await refresh();
-  }
-
-  function returnToPicker() {
-    reset();
-    clearActiveDownload();
-  }
-
-  // The engine downloads one model at a time, so while any download is in flight
-  // every other row's Download / Resume / Discard is disabled: the owning row
-  // shows the progress card, the rest wait rather than colliding with the
-  // single backend slot and surfacing "a download is already in progress".
-  const anyInFlight = state.phase !== 'idle';
+  const downloads = useDownloads();
 
   if (options !== null && sections.length === 0) {
     return (
@@ -182,22 +123,9 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
             <ModelRow
               key={o.starter.id}
               option={o}
-              active={
-                activeDownload?.kind === 'staff' &&
-                activeDownload.id === o.starter.id
-              }
-              anyInFlight={anyInFlight}
-              state={state}
-              progress={progress}
-              etaSeconds={etaSeconds}
-              combinedBytes={combinedBytes}
-              speedBytesPerSec={speedBytesPerSec}
-              onDownload={startStaffPick}
-              onResume={startStaffPick}
-              onDiscard={discardPartial}
-              onCancel={() => void cancelDownload()}
-              onRetry={() => void retry()}
-              onChooseAnother={returnToPicker}
+              downloads={downloads}
+              onSaved={onSaved}
+              refresh={refresh}
             />
           ))}
         </div>
@@ -208,40 +136,57 @@ export function StaffPicksPane({ onSaved }: StaffPicksPaneProps) {
 
 interface ModelRowProps {
   option: StaffPickOption;
-  active: boolean;
-  /** True while any model is downloading, so this row's actions are disabled. */
-  anyInFlight: boolean;
-  state: ReturnType<typeof useDownloadCtx>['state'];
-  progress: ReturnType<typeof useDownloadCtx>['progress'];
-  etaSeconds: number | null;
-  combinedBytes: number | null;
-  speedBytesPerSec: number | null;
-  onDownload: (id: string) => void;
-  onResume: (id: string) => void;
-  onDiscard: (sha256: string) => void;
-  onCancel: () => void;
-  onRetry: () => void;
-  onChooseAnother: () => void;
+  /** The Settings download registry; each row owns its own download by key. */
+  downloads: DownloadsContextValue;
+  /** Lift a fresh config snapshot after this row's install completes. */
+  onSaved: (next: RawAppConfig) => void;
+  /** Re-read the curated rows so installed / paused state reflects on disk. */
+  refresh: () => Promise<void>;
 }
 
-function ModelRow({
-  option,
-  active,
-  anyInFlight,
-  state,
-  progress,
-  etaSeconds,
-  combinedBytes,
-  speedBytesPerSec,
-  onDownload,
-  onResume,
-  onDiscard,
-  onCancel,
-  onRetry,
-  onChooseAnother,
-}: ModelRowProps) {
+function ModelRow({ option, downloads, onSaved, refresh }: ModelRowProps) {
   const { starter, fit, installed, partial_bytes } = option;
-  const showProgress = active && state.phase !== 'idle';
+  const key = downloadKey({ kind: 'staff', id: starter.id });
+  const entry = downloads.get(key);
+  const { clear } = downloads;
+  // An entry exists only while this row's download is live (downloading,
+  // verifying, ready-pending, or failed); a Cancelled download is pruned.
+  const showProgress = entry !== undefined;
+  const phase = entry?.state.phase;
+
+  // A finished install (phase 'ready') lifts the fresh config, drops the entry,
+  // and refreshes the rows so the new model flips to Installed. Per row, so
+  // parallel installs each settle independently.
+  useEffect(() => {
+    if (phase !== 'ready') return;
+    void (async () => {
+      try {
+        onSaved(await invoke<RawAppConfig>('get_config'));
+      } catch {
+        // The focus-driven resync picks the change up on next activation.
+      }
+      clear(key);
+      await refresh();
+    })();
+  }, [phase, key, clear, onSaved, refresh]);
+
+  async function discardPartial() {
+    await downloads.discard(starter.sha256);
+    await refresh();
+  }
+
+  // Dismiss this row's terminal card back to its normal controls. Also wired to
+  // the confirm-card callbacks, which never fire here (the curated path has no
+  // pre-flight confirm step), so all three share one covered handler.
+  const dismiss = () => clear(key);
+
+  // Cancelling keeps the partial on disk; re-read the options so the row flips
+  // to its Paused/Resume state once the Cancelled event prunes the entry.
+  async function cancelDownload() {
+    downloads.cancel(key);
+    await refresh();
+  }
+
   // Empty when the model carries no context window, so the pill is skipped.
   const contextLabel = formatContextWindow(starter.context_length ?? 0);
   // An interrupted partial (not installed, not actively downloading) reads as a
@@ -294,8 +239,7 @@ function ModelRow({
                 <button
                   type="button"
                   className={styles.resumeBtn}
-                  disabled={anyInFlight}
-                  onClick={() => onResume(starter.id)}
+                  onClick={() => downloads.startStaffPick(starter.id)}
                 >
                   Resume
                 </button>
@@ -303,40 +247,37 @@ function ModelRow({
                   type="button"
                   className={styles.discardBtn}
                   aria-label="Discard"
-                  disabled={anyInFlight}
-                  onClick={() => onDiscard(starter.sha256)}
+                  onClick={() => void discardPartial()}
                 >
                   Discard
                 </button>
               </>
             ) : (
               <RowAction
-                option={option}
                 installed={installed}
-                anyInFlight={anyInFlight}
-                onDownload={onDownload}
+                onDownload={() => downloads.startStaffPick(starter.id)}
               />
             )}
           </div>
         ) : null}
       </div>
-      {showProgress ? (
+      {showProgress && entry ? (
         <div className={styles.progress}>
           <DownloadProgress
-            state={state}
-            progress={progress}
-            etaSeconds={etaSeconds}
-            combinedBytes={combinedBytes}
+            state={entry.state}
+            progress={entry.progress}
+            etaSeconds={entry.etaSeconds}
+            combinedBytes={entry.combinedBytes}
             grandTotalBytes={totalBytes(option)}
-            speedBytesPerSec={speedBytesPerSec}
+            speedBytesPerSec={entry.speedBytesPerSec}
             // The curated path has no pre-flight confirm card, so onConfirm /
-            // onCancelConfirm never fire; they point at the same covered
-            // handlers rather than dead no-op literals.
-            onConfirm={onChooseAnother}
-            onCancelConfirm={onChooseAnother}
-            onCancel={onCancel}
-            onRetry={onRetry}
-            onChooseAnother={onChooseAnother}
+            // onCancelConfirm never fire; they share the same covered dismiss
+            // handler rather than dead no-op literals.
+            onConfirm={dismiss}
+            onCancelConfirm={dismiss}
+            onCancel={() => void cancelDownload()}
+            onRetry={() => downloads.retry(key)}
+            onChooseAnother={dismiss}
           />
         </div>
       ) : null}
@@ -345,11 +286,8 @@ function ModelRow({
 }
 
 interface RowActionProps {
-  option: StaffPickOption;
   installed: boolean;
-  /** True while any model is downloading, so the download button is disabled. */
-  anyInFlight: boolean;
-  onDownload: (id: string) => void;
+  onDownload: () => void;
 }
 
 const DOWNLOAD_ICON = (
@@ -363,14 +301,7 @@ const DOWNLOAD_ICON = (
  * surface the absence of a download is the signal. The interrupted-partial
  * resume/discard pair is owned by the row itself; this renders the plain icon
  * download button otherwise. */
-function RowAction({
-  option,
-  installed,
-  anyInFlight,
-  onDownload,
-}: RowActionProps) {
-  const { starter } = option;
-
+function RowAction({ installed, onDownload }: RowActionProps) {
   if (installed) {
     return null;
   }
@@ -380,8 +311,7 @@ function RowAction({
       type="button"
       className={styles.getBtn}
       aria-label="Download"
-      disabled={anyInFlight}
-      onClick={() => onDownload(starter.id)}
+      onClick={onDownload}
     >
       {DOWNLOAD_ICON}
     </button>
diff --git a/src/settings/tabs/tabs.test.tsx b/src/settings/tabs/tabs.test.tsx
index bd8c28e2..abe144cc 100644
--- a/src/settings/tabs/tabs.test.tsx
+++ b/src/settings/tabs/tabs.test.tsx
@@ -22,7 +22,7 @@ import { invoke } from '@tauri-apps/api/core';
 import { clearEventHandlers } from '../../testUtils/mocks/tauri';
 
 import { ModelTab } from './ModelTab';
-import { DownloadProvider } from '../../contexts/DownloadContext';
+import { DownloadsProvider } from '../../contexts/DownloadsContext';
 import { DisplayTab } from './DisplayTab';
 import { SearchTab } from './SearchTab';
 import { AboutTab } from './AboutTab';
@@ -132,7 +132,7 @@ afterEach(() => {
 async function renderModelTab() {
   const view = render(
     <ModelTab config={CONFIG} resyncToken={0} onSaved={() => {}} />,
-    { wrapper: DownloadProvider },
+    { wrapper: DownloadsProvider },
   );
   await act(async () => {
     await Promise.resolve();
@@ -184,7 +184,7 @@ describe('ModelTab (router)', () => {
     };
     render(
       <ModelTab config={builtinActive} resyncToken={0} onSaved={() => {}} />,
-      { wrapper: DownloadProvider },
+      { wrapper: DownloadsProvider },
     );
     await act(async () => {
       await Promise.resolve();
diff --git a/src/view/onboarding/__tests__/ModelCheckStep.test.tsx b/src/view/onboarding/__tests__/ModelCheckStep.test.tsx
index 57a39480..2be31f7a 100644
--- a/src/view/onboarding/__tests__/ModelCheckStep.test.tsx
+++ b/src/view/onboarding/__tests__/ModelCheckStep.test.tsx
@@ -949,7 +949,9 @@ describe('ModelCheckStep (builtin flow)', () => {
     await act(async () => {
       fireEvent.click(screen.getByRole('button', { name: 'Pause download' }));
     });
-    expect(invoke).toHaveBeenCalledWith('cancel_model_download');
+    expect(invoke).toHaveBeenCalledWith('cancel_model_download', {
+      key: 'tier:balanced',
+    });
 
     await act(async () => {
       channel.simulateMessage({ type: 'Cancelled' });
@@ -1052,7 +1054,9 @@ describe('ModelCheckStep (builtin flow)', () => {
       fireEvent.click(screen.getByText('Use it instead'));
     });
 
-    expect(invoke).toHaveBeenCalledWith('cancel_model_download');
+    expect(invoke).toHaveBeenCalledWith('cancel_model_download', {
+      key: 'tier:balanced',
+    });
     expect(invoke).toHaveBeenCalledWith('set_active_provider', {
       providerId: 'ollama',
     });

From 8163e594626e1c69444aee2fc0630a6098df8ba2 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 19:16:58 -0500
Subject: [PATCH 70/89] feat(settings): gate Library and Discover for
 non-built-in providers

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/settings/SettingsWindow.test.tsx          |   7 +
 src/settings/tabs/ModelTab.test.tsx           | 240 ++++++++++++++++++
 src/settings/tabs/ModelTab.tsx                |  44 +++-
 .../tabs/models/BuiltinOnlyGate.module.css    | 102 ++++++++
 .../tabs/models/BuiltinOnlyGate.test.tsx      |  68 +++++
 src/settings/tabs/models/BuiltinOnlyGate.tsx  |  57 +++++
 6 files changed, 512 insertions(+), 6 deletions(-)
 create mode 100644 src/settings/tabs/ModelTab.test.tsx
 create mode 100644 src/settings/tabs/models/BuiltinOnlyGate.module.css
 create mode 100644 src/settings/tabs/models/BuiltinOnlyGate.test.tsx
 create mode 100644 src/settings/tabs/models/BuiltinOnlyGate.tsx

diff --git a/src/settings/SettingsWindow.test.tsx b/src/settings/SettingsWindow.test.tsx
index 145e699a..f2facc8e 100644
--- a/src/settings/SettingsWindow.test.tsx
+++ b/src/settings/SettingsWindow.test.tsx
@@ -163,7 +163,14 @@ describe('SettingsWindow', () => {
   // read the app-root download context, so the Settings tree must provide a
   // DownloadProvider or opening Discover throws and blanks the window.
   it('opens Discover without crashing the Settings window', async () => {
+    // Built-in active so Discover renders ungated; this test guards the
+    // DownloadProvider wiring, not the non-built-in gate (covered in ModelTab).
+    const builtinActive: RawAppConfig = {
+      ...SAMPLE,
+      inference: { ...SAMPLE.inference, active_provider: 'builtin' },
+    };
     invokeMock.mockImplementation(async (cmd: string) => {
+      if (cmd === 'get_config') return builtinActive;
       if (cmd === 'get_staff_picks') return [];
       return defaultInvoke(cmd);
     });
diff --git a/src/settings/tabs/ModelTab.test.tsx b/src/settings/tabs/ModelTab.test.tsx
new file mode 100644
index 00000000..db7839c8
--- /dev/null
+++ b/src/settings/tabs/ModelTab.test.tsx
@@ -0,0 +1,240 @@
+/**
+ * Tests for the Models router's provider gating: Library and Discover manage
+ * the built-in engine's models, so while a non-built-in provider is active
+ * they are shown behind a switch-to-built-in gate. Providers is never gated.
+ * The router's plain view switching is covered in tabs.test.
+ */
+
+import {
+  act,
+  fireEvent,
+  render,
+  screen,
+  waitFor,
+} from '@testing-library/react';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+
+import { invoke } from '@tauri-apps/api/core';
+import { clearEventHandlers } from '../../testUtils/mocks/tauri';
+
+import { ModelTab } from './ModelTab';
+import { DownloadsProvider } from '../../contexts/DownloadsContext';
+import type { RawAppConfig } from '../types';
+
+const invokeMock = invoke as unknown as ReturnType<typeof vi.fn>;
+
+function buildConfig(
+  activeProvider: string,
+  providers: RawAppConfig['inference']['providers'],
+): RawAppConfig {
+  return {
+    inference: {
+      active_provider: activeProvider,
+      keep_warm_inactivity_minutes: 0,
+      num_ctx: 16384,
+      providers,
+    },
+    prompt: { system: 'hello' },
+    window: {
+      overlay_width: 600,
+      max_chat_height: 400,
+      max_images: 4,
+      text_base_px: 16,
+      text_line_height: 1.5,
+      text_letter_spacing_px: 0,
+      text_font_weight: 400,
+    },
+    quote: {
+      max_display_lines: 3,
+      max_display_chars: 200,
+      max_context_length: 4000,
+    },
+    behavior: { auto_replace: false, auto_close: false },
+    search: {
+      searxng_url: '',
+      reader_url: '',
+      max_iterations: 3,
+      top_k_urls: 5,
+      searxng_max_results: 10,
+      search_timeout_s: 30,
+      reader_per_url_timeout_s: 10,
+      reader_batch_timeout_s: 20,
+      judge_timeout_s: 15,
+      router_timeout_s: 15,
+    },
+    debug: { trace_enabled: false },
+  };
+}
+
+const BUILTIN = {
+  id: 'builtin',
+  kind: 'builtin',
+  label: 'Built-in (Thuki)',
+  base_url: '',
+  model: '',
+  vision: false,
+};
+const OLLAMA = {
+  id: 'ollama',
+  kind: 'ollama',
+  label: 'Ollama',
+  base_url: 'http://127.0.0.1:11434',
+  model: '',
+  vision: false,
+};
+
+beforeEach(() => {
+  invokeMock.mockReset();
+  invokeMock.mockImplementation((cmd: string) => {
+    if (cmd === 'get_loaded_model') return Promise.resolve(null);
+    if (cmd === 'get_engine_status') {
+      return Promise.resolve({ state: 'stopped' });
+    }
+    if (cmd === 'get_model_picker_state') {
+      return Promise.resolve({ active: null, all: [], ollamaReachable: false });
+    }
+    if (cmd === 'list_installed_models') return Promise.resolve([]);
+    return Promise.resolve(buildConfig('ollama', [BUILTIN, OLLAMA]));
+  });
+});
+
+afterEach(() => {
+  clearEventHandlers();
+});
+
+async function renderTab(config: RawAppConfig, onSaved = () => {}) {
+  const view = render(
+    <ModelTab config={config} resyncToken={0} onSaved={onSaved} />,
+    { wrapper: DownloadsProvider },
+  );
+  await act(async () => {
+    await Promise.resolve();
+  });
+  return view;
+}
+
+async function open(name: string) {
+  await act(async () => {
+    fireEvent.click(screen.getByRole('tab', { name }));
+    await Promise.resolve();
+  });
+}
+
+describe('ModelTab provider gating', () => {
+  it('gates the Library view while a non-built-in provider is active', async () => {
+    await renderTab(buildConfig('ollama', [BUILTIN, OLLAMA]));
+    await open('Library');
+    expect(
+      screen.getByRole('button', { name: 'Switch to built-in' }),
+    ).toBeInTheDocument();
+    expect(screen.getByText(/You're using Ollama now/)).toBeInTheDocument();
+  });
+
+  it('gates the Discover view while a non-built-in provider is active', async () => {
+    await renderTab(buildConfig('ollama', [BUILTIN, OLLAMA]));
+    await open('Discover');
+    expect(
+      screen.getByRole('button', { name: 'Switch to built-in' }),
+    ).toBeInTheDocument();
+  });
+
+  it('does not gate Library when the built-in engine is active', async () => {
+    await renderTab(buildConfig('builtin', [BUILTIN, OLLAMA]));
+    await open('Library');
+    expect(
+      screen.queryByRole('button', { name: 'Switch to built-in' }),
+    ).toBeNull();
+  });
+
+  it('never gates the Providers view', async () => {
+    await renderTab(buildConfig('ollama', [BUILTIN, OLLAMA]));
+    expect(
+      screen.queryByRole('button', { name: 'Switch to built-in' }),
+    ).toBeNull();
+    expect(screen.getByText('Active provider')).toBeInTheDocument();
+  });
+
+  it('switches to the built-in provider when the gate button is clicked', async () => {
+    const next = buildConfig('builtin', [BUILTIN, OLLAMA]);
+    const onSaved = vi.fn();
+    invokeMock.mockImplementation((cmd: string) => {
+      if (cmd === 'set_active_provider') return Promise.resolve(next);
+      if (cmd === 'get_loaded_model') return Promise.resolve(null);
+      if (cmd === 'get_engine_status')
+        return Promise.resolve({ state: 'stopped' });
+      if (cmd === 'get_model_picker_state') {
+        return Promise.resolve({
+          active: null,
+          all: [],
+          ollamaReachable: false,
+        });
+      }
+      if (cmd === 'list_installed_models') return Promise.resolve([]);
+      return Promise.resolve(buildConfig('ollama', [BUILTIN, OLLAMA]));
+    });
+    await renderTab(buildConfig('ollama', [BUILTIN, OLLAMA]), onSaved);
+    await open('Library');
+    await act(async () => {
+      fireEvent.click(
+        screen.getByRole('button', { name: 'Switch to built-in' }),
+      );
+      await Promise.resolve();
+    });
+    expect(invokeMock).toHaveBeenCalledWith('set_active_provider', {
+      providerId: 'builtin',
+    });
+    await waitFor(() => expect(onSaved).toHaveBeenCalledWith(next));
+  });
+
+  it('does nothing when there is no built-in provider to switch to', async () => {
+    await renderTab(buildConfig('ollama', [OLLAMA]));
+    await open('Library');
+    await act(async () => {
+      fireEvent.click(
+        screen.getByRole('button', { name: 'Switch to built-in' }),
+      );
+      await Promise.resolve();
+    });
+    expect(invokeMock).not.toHaveBeenCalledWith(
+      'set_active_provider',
+      expect.anything(),
+    );
+  });
+
+  it('swallows a failed switch without throwing', async () => {
+    const onSaved = vi.fn();
+    invokeMock.mockImplementation((cmd: string) => {
+      if (cmd === 'set_active_provider')
+        return Promise.reject(new Error('nope'));
+      if (cmd === 'get_loaded_model') return Promise.resolve(null);
+      if (cmd === 'get_engine_status')
+        return Promise.resolve({ state: 'stopped' });
+      if (cmd === 'get_model_picker_state') {
+        return Promise.resolve({
+          active: null,
+          all: [],
+          ollamaReachable: false,
+        });
+      }
+      if (cmd === 'list_installed_models') return Promise.resolve([]);
+      return Promise.resolve(buildConfig('ollama', [BUILTIN, OLLAMA]));
+    });
+    await renderTab(buildConfig('ollama', [BUILTIN, OLLAMA]), onSaved);
+    await open('Library');
+    await act(async () => {
+      fireEvent.click(
+        screen.getByRole('button', { name: 'Switch to built-in' }),
+      );
+      await Promise.resolve();
+    });
+    expect(onSaved).not.toHaveBeenCalled();
+  });
+
+  it('falls back to a generic label when the active provider is unresolved', async () => {
+    await renderTab(buildConfig('mystery', [BUILTIN, OLLAMA]));
+    await open('Library');
+    expect(
+      screen.getByText(/You're using another provider now/),
+    ).toBeInTheDocument();
+  });
+});
diff --git a/src/settings/tabs/ModelTab.tsx b/src/settings/tabs/ModelTab.tsx
index aa1fa9fe..f6f06448 100644
--- a/src/settings/tabs/ModelTab.tsx
+++ b/src/settings/tabs/ModelTab.tsx
@@ -8,11 +8,13 @@
  */
 
 import { useState } from 'react';
+import { invoke } from '@tauri-apps/api/core';
 
 import { ModelsSegmented, type ModelsSubview } from './models/ModelsSegmented';
 import { ProvidersPane } from './models/ProvidersPane';
 import { LibraryPane } from './models/LibraryPane';
 import { DiscoverPane } from './models/DiscoverPane';
+import { BuiltinOnlyGate } from './models/BuiltinOnlyGate';
 import styles from '../../styles/settings.module.css';
 import type { RawAppConfig } from '../types';
 
@@ -28,6 +30,22 @@ export function ModelTab({ config, resyncToken, onSaved }: ModelTabProps) {
   const [view, setView] = useState<ModelsSubview>('providers');
   const goToDiscover = () => setView('discover');
 
+  // Library and Discover manage the built-in engine's models, so they are
+  // gated behind a switch prompt while a non-built-in provider is active.
+  const { providers, active_provider } = config.inference;
+  const activeProvider = providers.find((p) => p.id === active_provider);
+  const gated = activeProvider?.kind !== 'builtin';
+  const activeLabel = activeProvider?.label ?? 'another provider';
+  const builtinId = providers.find((p) => p.kind === 'builtin')?.id;
+
+  // Activate the built-in engine from the gate; a no-op if it is not configured.
+  function switchToBuiltin() {
+    if (builtinId === undefined) return;
+    void invoke<RawAppConfig>('set_active_provider', { providerId: builtinId })
+      .then(onSaved)
+      .catch(() => {});
+  }
+
   return (
     <>
       <div className={styles.barrow}>
@@ -35,14 +53,28 @@ export function ModelTab({ config, resyncToken, onSaved }: ModelTabProps) {
       </div>
 
       {view === 'library' ? (
-        <LibraryPane
-          config={config}
-          onSaved={onSaved}
-          onAddModel={goToDiscover}
-        />
+        <BuiltinOnlyGate
+          gated={gated}
+          activeLabel={activeLabel}
+          onSwitch={switchToBuiltin}
+        >
+          <LibraryPane
+            config={config}
+            onSaved={onSaved}
+            onAddModel={goToDiscover}
+          />
+        </BuiltinOnlyGate>
       ) : null}
 
-      {view === 'discover' ? <DiscoverPane onSaved={onSaved} /> : null}
+      {view === 'discover' ? (
+        <BuiltinOnlyGate
+          gated={gated}
+          activeLabel={activeLabel}
+          onSwitch={switchToBuiltin}
+        >
+          <DiscoverPane onSaved={onSaved} />
+        </BuiltinOnlyGate>
+      ) : null}
 
       {view === 'providers' ? (
         <ProvidersPane
diff --git a/src/settings/tabs/models/BuiltinOnlyGate.module.css b/src/settings/tabs/models/BuiltinOnlyGate.module.css
new file mode 100644
index 00000000..00cb2392
--- /dev/null
+++ b/src/settings/tabs/models/BuiltinOnlyGate.module.css
@@ -0,0 +1,102 @@
+/*
+ * Built-in-only gate. Tokens (--accent, --accent-soft, --hair-soft, --t1, --t2)
+ * cascade from the Settings window, so they are referenced directly.
+ */
+
+.wrap {
+  position: relative;
+  min-height: 240px;
+}
+
+/* The real pane, dimmed and blurred behind glass, inert to pointer + AT. */
+.faint {
+  opacity: 0.32;
+  filter: blur(1.6px);
+  pointer-events: none;
+  user-select: none;
+}
+
+.overlay {
+  position: absolute;
+  inset: 0;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  padding: 16px;
+}
+
+.card {
+  max-width: 340px;
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  text-align: center;
+  gap: 11px;
+  padding: 20px 22px;
+  background: rgba(22, 18, 15, 0.97);
+  border: 1px solid var(--hair-soft);
+  border-top-color: rgba(255, 141, 92, 0.2);
+  border-radius: 12px;
+  box-shadow:
+    0 14px 34px -10px rgba(0, 0, 0, 0.7),
+    0 0 0 1px rgba(255, 141, 92, 0.06),
+    inset 0 1px 0 rgba(255, 255, 255, 0.05);
+}
+
+.title {
+  margin: 0;
+  font-size: 13.5px;
+  font-weight: 580;
+  color: var(--t1);
+}
+
+.body {
+  margin: 0;
+  font-size: 12px;
+  line-height: 1.5;
+  color: var(--t2);
+}
+
+/* Affirmative primary action, mirroring the Providers Switch vocabulary: a calm
+ * accent tint that names the action without shouting. */
+.switch {
+  display: inline-flex;
+  align-items: center;
+  gap: 7px;
+  margin-top: 2px;
+  padding: 7px 14px;
+  border: 1px solid rgba(255, 141, 92, 0.38);
+  border-top-color: rgba(255, 141, 92, 0.45);
+  border-radius: 8px;
+  background: var(--accent-soft);
+  color: var(--accent);
+  font-family: inherit;
+  font-size: 12.5px;
+  font-weight: 600;
+  cursor: pointer;
+  box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.05);
+  transition:
+    background 220ms cubic-bezier(0.4, 0, 0.2, 1),
+    border-color 220ms cubic-bezier(0.4, 0, 0.2, 1),
+    box-shadow 260ms cubic-bezier(0.4, 0, 0.2, 1);
+}
+.switch:hover {
+  background: rgba(255, 141, 92, 0.22);
+  border-color: rgba(255, 141, 92, 0.6);
+  box-shadow:
+    inset 0 1px 0 rgba(255, 255, 255, 0.08),
+    0 0 0 4px rgba(255, 141, 92, 0.1),
+    0 2px 10px -4px rgba(255, 141, 92, 0.35);
+}
+.switch:focus-visible {
+  outline: none;
+  box-shadow:
+    inset 0 1px 0 rgba(255, 255, 255, 0.05),
+    0 0 0 3px rgba(255, 141, 92, 0.22);
+}
+
+@media (prefers-reduced-motion: reduce) {
+  .switch {
+    transition: none;
+  }
+}
diff --git a/src/settings/tabs/models/BuiltinOnlyGate.test.tsx b/src/settings/tabs/models/BuiltinOnlyGate.test.tsx
new file mode 100644
index 00000000..bdb99624
--- /dev/null
+++ b/src/settings/tabs/models/BuiltinOnlyGate.test.tsx
@@ -0,0 +1,68 @@
+/**
+ * Unit tests for the built-in-only gate: the overlay shown over Library and
+ * Discover while a non-built-in provider is active. The gate keeps the real
+ * pane mounted behind glass (so the user sees what is waiting) and offers a
+ * one-click switch back to the built-in engine.
+ */
+
+import { render, screen, fireEvent } from '@testing-library/react';
+import { describe, expect, it, vi } from 'vitest';
+
+import { BuiltinOnlyGate } from './BuiltinOnlyGate';
+
+describe('BuiltinOnlyGate', () => {
+  it('renders children unchanged when not gated', () => {
+    render(
+      <BuiltinOnlyGate gated={false} activeLabel="Ollama" onSwitch={() => {}}>
+        <p>pane content</p>
+      </BuiltinOnlyGate>,
+    );
+    expect(screen.getByText('pane content')).toBeInTheDocument();
+    expect(
+      screen.queryByRole('button', { name: 'Switch to built-in' }),
+    ).toBeNull();
+  });
+
+  it('overlays a switch prompt naming the active provider when gated', () => {
+    render(
+      <BuiltinOnlyGate gated activeLabel="Ollama" onSwitch={() => {}}>
+        <p>pane content</p>
+      </BuiltinOnlyGate>,
+    );
+    expect(
+      screen.getByRole('button', { name: 'Switch to built-in' }),
+    ).toBeInTheDocument();
+    expect(screen.getByText(/You're using Ollama now/)).toBeInTheDocument();
+  });
+
+  it('keeps the gated children mounted but hidden from assistive tech', () => {
+    render(
+      <BuiltinOnlyGate gated activeLabel="Ollama" onSwitch={() => {}}>
+        <p>pane content</p>
+      </BuiltinOnlyGate>,
+    );
+    const child = screen.getByText('pane content');
+    expect(child).toBeInTheDocument();
+    expect(child.closest('[aria-hidden="true"]')).not.toBeNull();
+  });
+
+  it('marks the gated children inert so keyboard focus cannot reach them', () => {
+    render(
+      <BuiltinOnlyGate gated activeLabel="Ollama" onSwitch={() => {}}>
+        <button>hidden action</button>
+      </BuiltinOnlyGate>,
+    );
+    expect(screen.getByText('hidden action').closest('[inert]')).not.toBeNull();
+  });
+
+  it('calls onSwitch when the switch button is clicked', () => {
+    const onSwitch = vi.fn();
+    render(
+      <BuiltinOnlyGate gated activeLabel="Ollama" onSwitch={onSwitch}>
+        <p>pane content</p>
+      </BuiltinOnlyGate>,
+    );
+    fireEvent.click(screen.getByRole('button', { name: 'Switch to built-in' }));
+    expect(onSwitch).toHaveBeenCalledTimes(1);
+  });
+});
diff --git a/src/settings/tabs/models/BuiltinOnlyGate.tsx b/src/settings/tabs/models/BuiltinOnlyGate.tsx
new file mode 100644
index 00000000..aefd1faf
--- /dev/null
+++ b/src/settings/tabs/models/BuiltinOnlyGate.tsx
@@ -0,0 +1,57 @@
+/**
+ * Built-in-only gate for the Library and Discover panes.
+ *
+ * Library and Discover manage models for Thuki's bundled engine. While another
+ * provider (Ollama, OpenAI) is active they do not apply, but hiding them would
+ * bury the built-in engine and leave on-device models undiscoverable. Instead
+ * the real pane stays mounted behind a dimmed, inert layer (so the user sees
+ * what is waiting) beneath a centered card that activates the built-in engine
+ * in one click. When ungated the children render untouched.
+ */
+
+import type { ReactNode } from 'react';
+
+import styles from './BuiltinOnlyGate.module.css';
+
+interface BuiltinOnlyGateProps {
+  /** True when a non-built-in provider is active, so the surface is gated. */
+  gated: boolean;
+  /** The active provider's label, named in the explanation copy. */
+  activeLabel: string;
+  /** Activate the built-in engine. */
+  onSwitch: () => void;
+  /** The real pane: rendered directly when ungated, behind glass when gated. */
+  children: ReactNode;
+}
+
+export function BuiltinOnlyGate({
+  gated,
+  activeLabel,
+  onSwitch,
+  children,
+}: BuiltinOnlyGateProps) {
+  if (!gated) return <>{children}</>;
+
+  return (
+    <div className={styles.wrap}>
+      {/* `inert` removes the dimmed pane from tab order, hit-testing, and the
+          accessibility tree so gated controls cannot be reached by keyboard or
+          pointer; aria-hidden is kept as a belt-and-suspenders for older AT. */}
+      <div className={styles.faint} aria-hidden="true" inert>
+        {children}
+      </div>
+      <div className={styles.overlay}>
+        <div className={styles.card} role="status">
+          <p className={styles.title}>Your built-in models live here</p>
+          <p className={styles.body}>
+            Switch to the built-in engine to use and manage them. You're using{' '}
+            {activeLabel} now.
+          </p>
+          <button type="button" className={styles.switch} onClick={onSwitch}>
+            Switch to built-in
+          </button>
+        </div>
+      </div>
+    </div>
+  );
+}

From af052c278f4fc6a1684586e4cc0c80f9ce3450ad Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 21:36:17 -0500
Subject: [PATCH 71/89] fix(models): hide the download control for installed
 quants in Browse-all

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/models/mod.rs                   | 63 +++++++++++++++++--
 .../tabs/models/BrowseAllPane.test.tsx        | 46 ++++++++++++++
 src/settings/tabs/models/BrowseAllPane.tsx    |  5 +-
 .../tabs/models/DiscoverPane.test.tsx         |  1 +
 src/types/starter.ts                          |  3 +
 5 files changed, 112 insertions(+), 6 deletions(-)

diff --git a/src-tauri/src/models/mod.rs b/src-tauri/src/models/mod.rs
index 4126b99b..b5cde082 100644
--- a/src-tauri/src/models/mod.rs
+++ b/src-tauri/src/models/mod.rs
@@ -1706,6 +1706,10 @@ pub struct HfGgufFileRow {
     #[serde(flatten)]
     pub file: HfGgufFile,
     pub fit: Option<registry::RamFit>,
+    /// Whether this exact repo file is already recorded in the installed
+    /// manifest. Lets Browse-all show an "Installed" marker instead of a
+    /// download button once a quant finishes downloading.
+    pub installed: bool,
 }
 
 /// An installed model annotated with its RAM-fit on the host, computed from the
@@ -1747,7 +1751,11 @@ pub fn annotate_gguf_rows(files: Vec<HfGgufFile>, ram_bytes: u64) -> Vec<HfGgufF
             } else {
                 None
             };
-            HfGgufFileRow { file, fit }
+            HfGgufFileRow {
+                file,
+                fit,
+                installed: false,
+            }
         })
         .collect()
 }
@@ -1770,6 +1778,24 @@ pub fn attach_partials(
         .collect()
 }
 
+/// Marks each row whose `<repo>:<file>` is already recorded in the installed
+/// manifest, so Browse-all shows an "Installed" marker rather than a download
+/// button once a quant finishes. A manifest read error degrades to "not
+/// installed" rather than failing the listing, mirroring [`annotate_starter`].
+pub fn attach_installed(
+    rows: Vec<HfGgufFileRow>,
+    repo: &str,
+    conn: &rusqlite::Connection,
+) -> Vec<HfGgufFileRow> {
+    rows.into_iter()
+        .map(|mut row| {
+            let id = format!("{repo}:{}", row.file.file);
+            row.installed = matches!(manifest::get(conn, &id), Ok(Some(_)));
+            row
+        })
+        .collect()
+}
+
 /// Annotates installed models with their RAM-fit on the host, from the recorded
 /// weights size. A model gets `None` when host RAM or the size is 0.
 pub fn build_installed_views(
@@ -2367,12 +2393,12 @@ pub async fn list_hf_repo_ggufs(
     repo: String,
     client: tauri::State<'_, reqwest::Client>,
     store: tauri::State<'_, storage::ModelStore>,
+    db: tauri::State<'_, crate::history::Database>,
 ) -> Result<Vec<HfGgufFileRow>, String> {
     let files = fetch_repo_gguf_listing(&client, HF_BASE_URL, &repo).await?;
-    Ok(attach_partials(
-        annotate_gguf_rows(files, system_ram_bytes()),
-        &store,
-    ))
+    let rows = attach_partials(annotate_gguf_rows(files, system_ram_bytes()), &store);
+    let conn = db.0.lock().map_err(|e| e.to_string())?;
+    Ok(attach_installed(rows, &repo, &conn))
 }
 
 /// Searches Hugging Face for GGUF model repos matching `query`, most-downloaded
@@ -4695,6 +4721,7 @@ mod tests {
                     partial_bytes: None,
                 },
                 fit: None,
+                installed: false,
             },
             HfGgufFileRow {
                 file: HfGgufFile {
@@ -4704,6 +4731,7 @@ mod tests {
                     partial_bytes: None,
                 },
                 fit: None,
+                installed: false,
             },
         ];
         let out = attach_partials(rows, &store);
@@ -4713,6 +4741,29 @@ mod tests {
         assert_eq!(out[1].file.partial_bytes, None);
     }
 
+    #[test]
+    fn attach_installed_marks_only_manifest_rows() {
+        let conn = crate::database::open_in_memory().unwrap();
+        // Record one of the two files in the manifest under "<repo>:<file>".
+        manifest::insert(&conn, &manifest_row("org/repo:in.gguf", false, false)).unwrap();
+
+        let row = |name: &str| HfGgufFileRow {
+            file: HfGgufFile {
+                file: name.to_string(),
+                size_bytes: 100,
+                sha256: String::new(),
+                partial_bytes: None,
+            },
+            fit: None,
+            installed: false,
+        };
+        let out = attach_installed(vec![row("in.gguf"), row("out.gguf")], "org/repo", &conn);
+
+        // Only the file recorded in the manifest is marked installed.
+        assert!(out[0].installed);
+        assert!(!out[1].installed);
+    }
+
     #[test]
     fn parse_gguf_listing_rejects_invalid_json() {
         let err = parse_gguf_listing(b"not json").unwrap_err();
@@ -5169,6 +5220,7 @@ mod tests {
                 partial_bytes: None,
             },
             fit: None,
+            installed: false,
         };
         assert_eq!(
             serde_json::to_value(file_row).unwrap(),
@@ -5178,6 +5230,7 @@ mod tests {
                 "sha256": "",
                 "partial_bytes": serde_json::Value::Null,
                 "fit": serde_json::Value::Null,
+                "installed": false,
             })
         );
     }
diff --git a/src/settings/tabs/models/BrowseAllPane.test.tsx b/src/settings/tabs/models/BrowseAllPane.test.tsx
index 418454a4..242c1f61 100644
--- a/src/settings/tabs/models/BrowseAllPane.test.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.test.tsx
@@ -87,12 +87,14 @@ const GGUFS: HfGgufFile[] = [
     fit: 'tight',
     sha256: 'a'.repeat(64),
     partial_bytes: null,
+    installed: false,
   },
   {
     file: 'gemma-q8.gguf',
     size_bytes: 9_000_000_000,
     sha256: 'b'.repeat(64),
     partial_bytes: null,
+    installed: false,
   },
 ];
 
@@ -102,6 +104,12 @@ const GGUFS_PARTIAL: HfGgufFile[] = [
   GGUFS[1],
 ];
 
+/** GGUFS with the first quant already recorded in the installed manifest. */
+const GGUFS_Q4_INSTALLED: HfGgufFile[] = [
+  { ...GGUFS[0], installed: true },
+  GGUFS[1],
+];
+
 const CONFIG_AFTER_INSTALL = { marker: 'fresh' } as unknown as RawAppConfig;
 
 /**
@@ -487,6 +495,44 @@ describe('BrowseAllPane', () => {
     return row;
   }
 
+  it('shows no download control for an already-installed quant', async () => {
+    await renderPane(() => {}, { list_hf_repo_ggufs: GGUFS_Q4_INSTALLED });
+    await expandRepo();
+    // Both quants are listed, but only the still-uninstalled sibling offers a
+    // Download button: the installed quant shows nothing (no button, no badge),
+    // matching the Staff picks treatment.
+    expect(screen.getByText('gemma-q4.gguf')).toBeInTheDocument();
+    expect(screen.getByText('gemma-q8.gguf')).toBeInTheDocument();
+    expect(screen.getAllByRole('button', { name: 'Download' })).toHaveLength(1);
+  });
+
+  it('drops the download control for a quant once its download finishes', async () => {
+    let calls = 0;
+    const onSaved = vi.fn();
+    await renderPane(onSaved, {
+      // The refetch after a finished install reports the quant installed.
+      list_hf_repo_ggufs: () => {
+        calls += 1;
+        return calls <= 1 ? GGUFS : GGUFS_Q4_INSTALLED;
+      },
+    });
+    await expandRepo();
+    // Two quants, two download buttons before any install.
+    expect(screen.getAllByRole('button', { name: 'Download' })).toHaveLength(2);
+    fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[0]);
+    await flush();
+    expect(screen.getByTestId('download-figures')).toBeInTheDocument();
+    act(() => lastChannel?.simulateMessage({ type: 'AllDone' }));
+    await flush();
+    // The progress card clears and the finished quant settles with no download
+    // control (not back to a download button, the reported bug); only the
+    // uninstalled sibling keeps its button.
+    await waitFor(() =>
+      expect(screen.queryByTestId('download-figures')).not.toBeInTheDocument(),
+    );
+    expect(screen.getAllByRole('button', { name: 'Download' })).toHaveLength(1);
+  });
+
   it('shows Paused with Resume and Discard for an interrupted partial', async () => {
     await renderPane(() => {}, { list_hf_repo_ggufs: GGUFS_PARTIAL });
     await expandRepo();
diff --git a/src/settings/tabs/models/BrowseAllPane.tsx b/src/settings/tabs/models/BrowseAllPane.tsx
index 33517586..00765919 100644
--- a/src/settings/tabs/models/BrowseAllPane.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.tsx
@@ -376,7 +376,10 @@ function QuantRow({ file, repo, downloads, onSaved, refetch }: QuantRowProps) {
               </span>
             </Tooltip>
           ) : null}
-          {paused ? (
+          {/* An already-installed quant shows nothing here: no download button,
+              no badge. It lives in Library, so on this Discover surface the
+              absence of a download is the signal, matching Staff picks. */}
+          {file.installed ? null : paused ? (
             <>
               <span className={styles.quantPaused}>Paused · {pausedPct}%</span>
               <button
diff --git a/src/settings/tabs/models/DiscoverPane.test.tsx b/src/settings/tabs/models/DiscoverPane.test.tsx
index 3363ec93..b633a199 100644
--- a/src/settings/tabs/models/DiscoverPane.test.tsx
+++ b/src/settings/tabs/models/DiscoverPane.test.tsx
@@ -170,6 +170,7 @@ describe('DiscoverPane download persistence', () => {
               fit: 'tight',
               sha256: 'a'.repeat(64),
               partial_bytes: null,
+              installed: false,
             },
           ];
         }
diff --git a/src/types/starter.ts b/src/types/starter.ts
index 271f46f9..f1d4147a 100644
--- a/src/types/starter.ts
+++ b/src/types/starter.ts
@@ -126,6 +126,9 @@ export interface HfGgufFile {
   sha256: string;
   /** Bytes of an interrupted partial for this file on disk, or null when none. */
   partial_bytes: number | null;
+  /** Whether this exact repo file is already recorded in the installed
+   * manifest, so Browse-all shows an "Installed" marker, not a download button. */
+  installed: boolean;
 }
 
 /** Engine lifecycle snapshot published on the `engine:status` event. */

From 6dab3c731aee80e69f7401e96307a8246a7b4e7d Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 21:27:10 -0500
Subject: [PATCH 72/89] feat: unify Models info, link names to Hugging Face,
 fix menu clipping

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/models/mod.rs                   |  34 ++++-
 .../tabs/models/LibraryPane.module.css        |  44 ++++--
 src/settings/tabs/models/LibraryPane.test.tsx |  65 ++++++---
 src/settings/tabs/models/LibraryPane.tsx      | 129 +++++++++++-------
 .../tabs/models/StaffPicksPane.module.css     |  22 ++-
 .../tabs/models/StaffPicksPane.test.tsx       |  38 ++++--
 src/settings/tabs/models/StaffPicksPane.tsx   |  22 ++-
 src/types/starter.ts                          |   7 +
 8 files changed, 257 insertions(+), 104 deletions(-)

diff --git a/src-tauri/src/models/mod.rs b/src-tauri/src/models/mod.rs
index b5cde082..173d2cb2 100644
--- a/src-tauri/src/models/mod.rs
+++ b/src-tauri/src/models/mod.rs
@@ -1723,6 +1723,13 @@ pub struct InstalledModelView {
     /// repo + file. `None` for a pasted model with no registry entry (its
     /// context is not recorded in the manifest).
     pub context_length: Option<u32>,
+    /// Vision projector size in bytes, healed from the registry so the listed
+    /// total (weights + mmproj) matches Discover's. `0` for a text model or a
+    /// pasted repo with no registry entry (the manifest records only weights).
+    pub mmproj_bytes: u64,
+    /// Model maker (e.g. "Google"), healed from the registry. `None` for a
+    /// pasted repo with no entry, where the UI falls back to the repo id.
+    pub origin: Option<String>,
 }
 
 /// Estimated resident memory (GiB) for a GGUF weights blob of `size_bytes`:
@@ -1813,14 +1820,20 @@ pub fn build_installed_views(
             } else {
                 None
             };
-            // Curated models heal their context window from the registry; a
-            // pasted repo has no entry, so it shows none.
-            let context_length =
-                registry::by_repo_file(&model.repo, &model.file_name).map(|s| s.context_length);
+            // Curated models heal their context window, vision-projector size,
+            // and maker from the registry so the Library row reads the same
+            // facts Discover does; a pasted repo has no entry, so those stay
+            // absent (the UI falls back to the repo id for the maker).
+            let starter = registry::by_repo_file(&model.repo, &model.file_name);
+            let context_length = starter.map(|s| s.context_length);
+            let mmproj_bytes = starter.map_or(0, |s| s.mmproj_bytes);
+            let origin = starter.map(|s| s.origin.to_string());
             InstalledModelView {
                 model,
                 fit,
                 context_length,
+                mmproj_bytes,
+                origin,
             }
         })
         .collect()
@@ -5195,19 +5208,28 @@ mod tests {
         };
         let views = build_installed_views(vec![model.clone()], 64 << 30);
         assert_eq!(views[0].fit, Some(registry::RamFit::Fits));
-        // A pasted repo has no registry entry, so its context window is unknown.
+        // A pasted repo has no registry entry, so its context window, vision
+        // projector size, and maker are all unknown.
         assert_eq!(views[0].context_length, None);
+        assert_eq!(views[0].mmproj_bytes, 0);
+        assert_eq!(views[0].origin, None);
         // Unknown host RAM drops the verdict.
         let views = build_installed_views(vec![model], 0);
         assert_eq!(views[0].fit, None);
 
-        // A curated model heals its context window from the registry.
+        // A curated model heals its context window, projector size, and maker
+        // from the registry.
         let curated = registry::to_installed_model(&registry::STARTERS[0]);
         let views = build_installed_views(vec![curated], 64 << 30);
         assert_eq!(
             views[0].context_length,
             Some(registry::STARTERS[0].context_length)
         );
+        assert_eq!(views[0].mmproj_bytes, registry::STARTERS[0].mmproj_bytes);
+        assert_eq!(
+            views[0].origin,
+            Some(registry::STARTERS[0].origin.to_string())
+        );
     }
 
     #[test]
diff --git a/src/settings/tabs/models/LibraryPane.module.css b/src/settings/tabs/models/LibraryPane.module.css
index 154385a8..142ebee3 100644
--- a/src/settings/tabs/models/LibraryPane.module.css
+++ b/src/settings/tabs/models/LibraryPane.module.css
@@ -105,6 +105,32 @@
   gap: 9px;
 }
 
+/* Model name doubles as the link to the repo on Hugging Face: a plain button
+ * that reads as text until hover, matching Browse-all's row link. */
+.nameLink {
+  border: none;
+  background: transparent;
+  padding: 0;
+  font-family: inherit;
+  font-weight: 580;
+  font-size: 13.5px;
+  color: var(--t1);
+  text-align: left;
+  cursor: pointer;
+  transition: color 140ms ease;
+}
+.nameLink:hover {
+  color: var(--accent);
+  text-decoration: underline;
+  text-underline-offset: 2px;
+}
+.nameLink:focus-visible {
+  outline: none;
+  color: var(--accent);
+  text-decoration: underline;
+  text-underline-offset: 2px;
+}
+
 .org {
   font-size: 11px;
   color: var(--t3);
@@ -196,10 +222,11 @@
   position: relative;
 }
 
+/* Fixed to the viewport (top/right set inline from the trigger's rect) so the
+ * popover escapes the Settings window's hidden overflow instead of being
+ * clipped by `.body` / `.window`. */
 .menu {
-  position: absolute;
-  right: 0;
-  top: 36px;
+  position: fixed;
   z-index: 20;
   min-width: 208px;
   padding: 6px;
@@ -215,11 +242,9 @@
   animation: menuIn 130ms cubic-bezier(0.2, 0.8, 0.3, 1);
 }
 
-/* Flip above the trigger when a downward menu would be clipped by the
- * auto-sized window's hidden overflow (set by the pane for tight bottom rows). */
+/* When the menu flips above a bottom-edge trigger, grow the open animation from
+ * the bottom edge so it reads as rising out of the button. */
 .menu[data-side='top'] {
-  top: auto;
-  bottom: 36px;
   transform-origin: bottom right;
 }
 
@@ -251,11 +276,6 @@
   opacity: 0.8;
   flex: none;
 }
-.menuExt {
-  margin-left: auto;
-  font-size: 12px;
-  opacity: 0.4;
-}
 .menuItem:hover {
   background: var(--elev-2);
 }
diff --git a/src/settings/tabs/models/LibraryPane.test.tsx b/src/settings/tabs/models/LibraryPane.test.tsx
index 939203cf..2b4f24e2 100644
--- a/src/settings/tabs/models/LibraryPane.test.tsx
+++ b/src/settings/tabs/models/LibraryPane.test.tsx
@@ -2,10 +2,11 @@
  * Unit tests for the Models surface's Library pane.
  *
  * Covers the installed-model list (active + non-active rows, capability text
- * tags, RAM-fit hint), the popover menu (Set as active / View on Hugging Face
- * / Delete), the delete confirm/cancel/success/error flow, menu dismissal
- * (outside click + Escape), the empty state, the footer, and the defensive
- * guards around the manifest and disk probes.
+ * tags, RAM-fit hint), the model name's Hugging Face link, the popover menu
+ * (Set as active / Reveal in Finder / Delete), the delete confirm/cancel/
+ * success/error flow, menu dismissal (outside click + Escape), the empty
+ * state, the footer, and the defensive guards around the manifest and disk
+ * probes.
  *
  * `invoke` comes from the global Tauri mock; capabilities are fetched
  * through the same `get_model_capabilities` command the hook reads.
@@ -107,14 +108,18 @@ function makeConfig(builtinModel: string): RawAppConfig {
 
 const GEMMA: InstalledModel = {
   id: 'org/gemma:gemma.gguf',
-  display_name: 'gemma',
   size_bytes: 2_489_757_856,
+  // A vision projector healed from the registry: folded into the shown total.
+  mmproj_bytes: 500_000_000,
+  display_name: 'gemma',
   quant: 'Q4_K_M',
   fit: 'fits',
   context_length: 262_144,
+  origin: 'Google',
 };
 
-// No `fit` here: exercises the "RAM unknown" branch (no fit pill).
+// No `fit`, `mmproj_bytes`, `origin`, or `context_length` here: a pasted repo
+// that exercises the "RAM unknown" / weights-only / maker-fallback branches.
 const QWEN: InstalledModel = {
   id: 'org/qwen:qwen.gguf',
   display_name: 'qwen',
@@ -195,16 +200,18 @@ function openMenu(name: string) {
 }
 
 describe('LibraryPane', () => {
-  it('lists each installed model with its org line, size, and quant', async () => {
+  it('lists each installed model with the size · context · maker · quant line', async () => {
     mockCommands(libraryResponses());
     await renderPane();
     expect(screen.getByText('gemma')).toBeInTheDocument();
-    // Curated model: context window healed from the registry, after the size.
+    // Curated model: size is the weights + mmproj total (2.5 + 0.5 = 3.0 GB),
+    // then the registry-healed context and maker, then the quant.
     expect(
-      screen.getByText('org/gemma · Q4_K_M · 2.5 GB · 256K'),
+      screen.getByText('3.0 GB · 256K · Google · Q4_K_M'),
     ).toBeInTheDocument();
-    // Empty quant and (here) no context drop out of the org line.
-    expect(screen.getByText('org/qwen · 9.0 GB')).toBeInTheDocument();
+    // Pasted model: no mmproj/context/quant, and the maker falls back to the
+    // repo id, so only the size and repo remain.
+    expect(screen.getByText('9.0 GB · org/qwen')).toBeInTheDocument();
   });
 
   it('shows the RAM-fit hint only when the backend provides one', async () => {
@@ -317,13 +324,10 @@ describe('LibraryPane', () => {
     expect(screen.getByText('qwen')).toBeInTheDocument();
   });
 
-  it('View on Hugging Face opens the repo page in the system browser', async () => {
+  it('opens the repo on Hugging Face from the model name link', async () => {
     mockCommands(libraryResponses());
     await renderPane();
-    openMenu('gemma');
-    fireEvent.click(
-      screen.getByRole('menuitem', { name: 'View on Hugging Face' }),
-    );
+    fireEvent.click(screen.getByRole('button', { name: 'gemma' }));
     expect(invokeMock).toHaveBeenCalledWith('open_url', {
       url: 'https://huggingface.co/org/gemma',
     });
@@ -485,18 +489,35 @@ describe('LibraryPane', () => {
     expect(screen.getByRole('menu')).toHaveAttribute('data-side', 'bottom');
   });
 
-  it('flips the popover above the trigger when the space below is tight', async () => {
+  it('flips the popover above the trigger when there is more room above', async () => {
     mockCommands(libraryResponses());
     await renderPane();
     const manage = screen.getByRole('button', { name: 'Manage qwen' });
-    // Simulate the trigger sitting near the window's bottom edge, where a
-    // downward menu would be clipped by the Settings window's hidden overflow.
+    // Simulate the trigger sitting near the window's bottom edge: the space
+    // above it (top) exceeds the space below, so the menu flips up rather than
+    // spilling past the window's hidden overflow.
     manage.getBoundingClientRect = () =>
-      ({ bottom: window.innerHeight - 8 }) as unknown as DOMRect;
+      ({
+        top: window.innerHeight - 40,
+        bottom: window.innerHeight - 8,
+      }) as unknown as DOMRect;
     fireEvent.click(manage);
     expect(screen.getByRole('menu')).toHaveAttribute('data-side', 'top');
   });
 
+  it('keeps a top row dropping down when the space above is tighter than below', async () => {
+    mockCommands(libraryResponses());
+    await renderPane();
+    const manage = screen.getByRole('button', { name: 'Manage gemma' });
+    // A row high in the window: the space below it still beats the space above,
+    // so the menu must drop down (the old fixed-height estimate wrongly flipped
+    // such rows up into the window chrome).
+    manage.getBoundingClientRect = () =>
+      ({ top: 150, bottom: window.innerHeight - 200 }) as unknown as DOMRect;
+    fireEvent.click(manage);
+    expect(screen.getByRole('menu')).toHaveAttribute('data-side', 'bottom');
+  });
+
   it('toggles the popover closed when its own button is clicked again', async () => {
     mockCommands(libraryResponses());
     await renderPane();
@@ -543,10 +564,10 @@ describe('LibraryPane', () => {
     await renderPane();
     openMenu('gemma');
     fireEvent.mouseDown(
-      screen.getByRole('menuitem', { name: 'View on Hugging Face' }),
+      screen.getByRole('menuitem', { name: 'Reveal in Finder' }),
     );
     expect(
-      screen.getByRole('menuitem', { name: 'View on Hugging Face' }),
+      screen.getByRole('menuitem', { name: 'Reveal in Finder' }),
     ).toBeInTheDocument();
   });
 
diff --git a/src/settings/tabs/models/LibraryPane.tsx b/src/settings/tabs/models/LibraryPane.tsx
index ff882e8c..d499f5f9 100644
--- a/src/settings/tabs/models/LibraryPane.tsx
+++ b/src/settings/tabs/models/LibraryPane.tsx
@@ -1,17 +1,25 @@
 /**
  * Library pane of the Models surface: the user's installed local models.
  *
- * Each downloaded model shows as a quiet row: its name with capability pills
- * (Text always, plus Vision / Thinking when applicable), the Hugging Face repo
- * / quantisation / size, and a RAM-fit hint (hover for a one-line explanation).
- * The active model is marked by the accent edge alone, not a textual pill.
- * A ⋮ button opens a floating popover (Set as active / View on Hugging Face /
- * Reveal in Finder / Delete) instead of expanding the card; Delete routes
- * through a confirm dialog. When nothing is installed the pane invites the
- * user over to Discover.
+ * Each downloaded model shows as a quiet row: its name (a link that opens the
+ * repo on Hugging Face) with capability pills (Text always, plus Vision /
+ * Thinking when applicable), a `size · context · maker · quant` sub-line (the
+ * same grammar Discover uses, with size as the full weights + mmproj total and
+ * maker falling back to the repo id for a pasted model), and a RAM-fit hint
+ * (hover for a one-line explanation). The active model is marked by the accent
+ * edge alone, not a textual pill. A ⋮ button opens a floating popover (Set as
+ * active / Reveal in Finder / Delete) instead of expanding the card; Delete
+ * routes through a confirm dialog. When nothing is installed the pane invites
+ * the user over to Discover.
  */
 
-import { useCallback, useEffect, useState } from 'react';
+import {
+  useCallback,
+  useEffect,
+  useLayoutEffect,
+  useRef,
+  useState,
+} from 'react';
 import { invoke } from '@tauri-apps/api/core';
 
 import { useModelCapabilities } from '../../../hooks/useModelCapabilities';
@@ -25,14 +33,6 @@ import type { InstalledModel, RamFit } from '../../../types/starter';
 
 const HF_BASE_URL = 'https://huggingface.co';
 
-/**
- * Approximate height (px) the popover needs below the ⋮ trigger. When the space
- * beneath it is tighter than this, the menu flips above the button so it is
- * never clipped: the Settings window auto-hugs its content and `.body` hides
- * overflow, so a downward menu on the last row would spill past the window.
- */
-const MENU_DROP_ESTIMATE_PX = 210;
-
 /** RAM-fit hint colour class on this pane's stylesheet (labels are shared). */
 const FIT_CLASS: Record<RamFit, string> = {
   fits: styles.fitOk,
@@ -46,11 +46,6 @@ const SET_ACTIVE_ICON = (
     <path d="M5 13l4 4L19 7" />
   </svg>
 );
-const HF_ICON = (
-  <svg viewBox="0 0 24 24" aria-hidden="true">
-    <path d="M14 3h7v7M21 3l-9 9M19 14v5a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V7a2 2 0 0 1 2-2h5" />
-  </svg>
-);
 const FINDER_ICON = (
   <svg viewBox="0 0 24 24" aria-hidden="true">
     <path d="M3 7h6l2 2h10v9a2 2 0 0 1-2 2H3z" />
@@ -81,7 +76,18 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
 
   const [installed, setInstalled] = useState<InstalledModel[]>([]);
   const [openMenu, setOpenMenu] = useState<string | null>(null);
-  const [menuDropUp, setMenuDropUp] = useState(false);
+  // Fixed-viewport placement of the open popover, measured from its trigger so
+  // it escapes the Settings window's hidden overflow (an absolutely-positioned
+  // menu was clipped by `.body`/`.window`). `null` until the layout effect has
+  // measured the menu, so the first paint stays hidden rather than flashing at
+  // the wrong spot.
+  const [menuPos, setMenuPos] = useState<{
+    top: number;
+    right: number;
+    dropUp: boolean;
+  } | null>(null);
+  const menuRef = useRef<HTMLDivElement>(null);
+  const triggerRectRef = useRef<DOMRect | null>(null);
   const [confirmDelete, setConfirmDelete] = useState<string | null>(null);
   const [deleteError, setDeleteError] = useState<string | null>(null);
 
@@ -120,20 +126,45 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
     };
   }, [openMenu]);
 
-  // Open the popover for `id` (or close it if already open). On open, flip the
-  // menu above the trigger when there is not enough room below: rows near the
-  // window's bottom edge would otherwise be clipped by the hidden body overflow.
+  // Open the popover for `id` (or close it if already open). Snapshot the
+  // trigger's viewport rect; the layout effect below positions the menu from it.
   function toggleMenu(id: string, trigger: HTMLElement) {
     if (openMenu === id) {
       setOpenMenu(null);
       return;
     }
-    const spaceBelow =
-      window.innerHeight - trigger.getBoundingClientRect().bottom;
-    setMenuDropUp(spaceBelow < MENU_DROP_ESTIMATE_PX);
+    triggerRectRef.current = trigger.getBoundingClientRect();
+    setMenuPos(null);
     setOpenMenu(id);
   }
 
+  // Position the popover once it has mounted. It is `position: fixed`, so it
+  // escapes the Settings window's hidden overflow and is bounded only by the
+  // viewport. Drop below the trigger by default; flip above when the menu would
+  // overflow the bottom edge, then clamp to the top so it can never be clipped.
+  useLayoutEffect(() => {
+    if (openMenu === null) return;
+    /* v8 ignore start -- the trigger rect and menu node always exist once open */
+    const rect = triggerRectRef.current;
+    const menu = menuRef.current;
+    if (!rect || !menu) return;
+    /* v8 ignore stop */
+    const gap = 6;
+    const height = menu.offsetHeight;
+    let top = rect.bottom + gap;
+    let dropUp = false;
+    if (top + height > window.innerHeight - 8) {
+      top = rect.top - gap - height;
+      dropUp = true;
+    }
+    // eslint-disable-next-line @eslint-react/set-state-in-effect -- intended: the popover must be positioned from its measured size before the browser paints
+    setMenuPos({
+      top: Math.max(8, top),
+      right: window.innerWidth - rect.right,
+      dropUp,
+    });
+  }, [openMenu]);
+
   // The backend writes the builtin provider's model field; lift the fresh
   // snapshot so the active row moves without a tab remount.
   function selectModel(id: string) {
@@ -218,6 +249,11 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
             const active = m.id === activeModel;
             const caps = capabilities[m.id];
             const repo = m.id.split(':')[0];
+            // Maker from the registry, or the repo id for a pasted model.
+            const maker = m.origin || repo;
+            // Full on-disk total (weights + vision projector) so the same model
+            // never shows a different size here than in Discover.
+            const totalBytes = m.size_bytes + (m.mmproj_bytes ?? 0);
             // Empty when the model carries no context window, which skips it.
             const contextLabel = formatContextWindow(m.context_length ?? 0);
             return (
@@ -229,7 +265,13 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
                 <div className={styles.row}>
                   <div className={styles.mid}>
                     <div className={styles.name}>
-                      {m.display_name}
+                      <button
+                        type="button"
+                        className={styles.nameLink}
+                        onClick={() => openHuggingFace(m.id)}
+                      >
+                        {m.display_name}
+                      </button>
                       <span className={`${styles.pill} ${styles.pillText}`}>
                         Text
                       </span>
@@ -247,10 +289,9 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
                       ) : null}
                     </div>
                     <div className={styles.org}>
-                      {repo}
-                      {m.quant !== '' ? ` · ${m.quant}` : ''} ·{' '}
-                      {gb(m.size_bytes)} GB
-                      {contextLabel ? ` · ${contextLabel}` : ''}
+                      {gb(totalBytes)} GB
+                      {contextLabel ? ` · ${contextLabel}` : ''} · {maker}
+                      {m.quant !== '' ? ` · ${m.quant}` : ''}
                     </div>
                   </div>
                   <div className={styles.right}>
@@ -274,9 +315,15 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
                       </button>
                       {openMenu === m.id ? (
                         <div
+                          ref={menuRef}
                           className={styles.menu}
                           role="menu"
-                          data-side={menuDropUp ? 'top' : 'bottom'}
+                          data-side={menuPos?.dropUp ? 'top' : 'bottom'}
+                          style={{
+                            top: menuPos?.top ?? 0,
+                            right: menuPos?.right ?? 0,
+                            visibility: menuPos ? 'visible' : 'hidden',
+                          }}
                         >
                           {active ? null : (
                             <button
@@ -289,18 +336,6 @@ export function LibraryPane({ config, onSaved, onAddModel }: LibraryPaneProps) {
                               <span>Set as active</span>
                             </button>
                           )}
-                          <button
-                            type="button"
-                            role="menuitem"
-                            className={styles.menuItem}
-                            onClick={() => openHuggingFace(m.id)}
-                          >
-                            {HF_ICON}
-                            <span>View on Hugging Face</span>
-                            <span className={styles.menuExt} aria-hidden="true">
-                              ↗
-                            </span>
-                          </button>
                           <button
                             type="button"
                             role="menuitem"
diff --git a/src/settings/tabs/models/StaffPicksPane.module.css b/src/settings/tabs/models/StaffPicksPane.module.css
index ad2f203f..aa802c6b 100644
--- a/src/settings/tabs/models/StaffPicksPane.module.css
+++ b/src/settings/tabs/models/StaffPicksPane.module.css
@@ -67,10 +67,30 @@
   flex-wrap: wrap;
 }
 
-.name {
+/* Model name doubles as the link to the repo on Hugging Face: a plain button
+ * that reads as text until hover, matching Browse-all's row link. */
+.nameLink {
+  border: none;
+  background: transparent;
+  padding: 0;
+  font-family: inherit;
   font-weight: 560;
   font-size: 12.5px;
   color: var(--t1);
+  text-align: left;
+  cursor: pointer;
+  transition: color 140ms ease;
+}
+.nameLink:hover {
+  color: var(--accent);
+  text-decoration: underline;
+  text-underline-offset: 2px;
+}
+.nameLink:focus-visible {
+  outline: none;
+  color: var(--accent);
+  text-decoration: underline;
+  text-underline-offset: 2px;
 }
 
 /* Calm capability pills: a neutral chip, the colour carried in the text only.
diff --git a/src/settings/tabs/models/StaffPicksPane.test.tsx b/src/settings/tabs/models/StaffPicksPane.test.tsx
index eee98793..77636be2 100644
--- a/src/settings/tabs/models/StaffPicksPane.test.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.test.tsx
@@ -4,11 +4,11 @@
  * Models are grouped into use-case sections (Everyday chat / Compact & fast /
  * Deep reasoning), known sections first in a fixed order, then any extra
  * category alphabetically; within a section models are alphabetical. Each
- * compact row shows the model name, capability pills (Text always, plus Vision
- * / Thinking), a `size · maker` sub-line, a RAM-fit hint, and a single icon
- * download that runs the VERIFIED starter path (`download_staff_pick`, pinned
- * revision + sha256). The download channel is captured the same way
- * BrowseAllPane.test.tsx does it.
+ * compact row shows the model name (a Hugging Face link), capability pills
+ * (Text always, plus Vision / Thinking), a `size · context · maker · quant`
+ * sub-line, a RAM-fit hint, and a single icon download that runs the VERIFIED
+ * starter path (`download_staff_pick`, pinned revision + sha256). The download
+ * channel is captured the same way BrowseAllPane.test.tsx does it.
  */
 
 import {
@@ -203,32 +203,48 @@ describe('StaffPicksPane', () => {
     expect(screen.queryByText(/Recommended/)).not.toBeInTheDocument();
   });
 
-  it('shows the name, pills, the size · context · maker sub-line, and fit', async () => {
+  it('shows the name, pills, the size · context · maker · quant sub-line, and fit', async () => {
     await renderPane();
     const row = rowFor('Gemma 4 12B');
     expect(within(row).getByText('Text')).toBeInTheDocument();
     expect(within(row).getByText('Vision')).toBeInTheDocument();
     expect(within(row).queryByText('Thinking')).not.toBeInTheDocument();
-    // Context window sits in the metadata sub-line, between size and maker.
-    expect(within(row).getByText('7.2 GB · 128K · Google')).toBeInTheDocument();
+    // Sub-line grammar matches Library: size, context, maker, then quant.
+    expect(
+      within(row).getByText('7.2 GB · 128K · Google · Q4_0'),
+    ).toBeInTheDocument();
     expect(within(row).getByText('Comfortable')).toBeInTheDocument();
   });
 
   it('places the context window between size and maker for each model', async () => {
     await renderPane();
     expect(
-      within(rowFor('Qwen3.5 9B')).getByText('7.2 GB · 256K · Alibaba'),
+      within(rowFor('Qwen3.5 9B')).getByText(
+        '7.2 GB · 256K · Alibaba · Q4_K_M',
+      ),
     ).toBeInTheDocument();
   });
 
-  it('falls back to size · maker when a model has no context window', async () => {
+  it('falls back to size · maker · quant when a model has no context window', async () => {
     await renderPane(() => {}, {
       get_staff_picks: [
         option({ context_length: undefined, display_name: 'Mystery 7B' }),
       ],
     });
     const row = rowFor('Mystery 7B');
-    expect(within(row).getByText('7.2 GB · Google')).toBeInTheDocument();
+    expect(within(row).getByText('7.2 GB · Google · Q4_0')).toBeInTheDocument();
+  });
+
+  it('opens the repo on Hugging Face from the model name link', async () => {
+    await renderPane();
+    fireEvent.click(
+      within(rowFor('Gemma 4 12B')).getByRole('button', {
+        name: 'Gemma 4 12B',
+      }),
+    );
+    expect(invokeMock).toHaveBeenCalledWith('open_url', {
+      url: 'https://huggingface.co/google/gemma-4-12B-it-qat-q4_0-gguf',
+    });
   });
 
   it('shows a Thinking pill on a thinking model and omits Vision on a text-only one', async () => {
diff --git a/src/settings/tabs/models/StaffPicksPane.tsx b/src/settings/tabs/models/StaffPicksPane.tsx
index bd426ac6..1ccc65d0 100644
--- a/src/settings/tabs/models/StaffPicksPane.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.tsx
@@ -5,8 +5,9 @@
  * ("Everyday chat", "Compact & fast", "Deep reasoning", ...) so a non-expert
  * can pick by intent. Known sections show first in a fixed order, then any
  * extra category alphabetically; within a section models are alphabetical. Each
- * compact row shows the model name, capability pills (Text always, plus Vision
- * / Thinking), a `size · maker` sub-line, a RAM-fit hint, and a single icon
+ * compact row shows the model name (a link that opens the repo on Hugging
+ * Face), capability pills (Text always, plus Vision / Thinking), a `size ·
+ * context · maker · quant` sub-line, a RAM-fit hint, and a single icon
  * download that runs the VERIFIED catalog path (`download_staff_pick`, keyed by
  * the entry's stable id, pinned revision + sha256), unlike the Browse-all
  * pane's arbitrary repo downloads. A finished install lifts a fresh config
@@ -36,6 +37,8 @@ import styles from './StaffPicksPane.module.css';
 import type { RawAppConfig } from '../../types';
 import type { RamFit, StaffPickOption } from '../../../types/starter';
 
+const HF_BASE_URL = 'https://huggingface.co';
+
 /** RAM-fit hint colour class on this pane's stylesheet (labels are shared). */
 const FIT_CLASS: Record<RamFit, string> = {
   fits: styles.fitOk,
@@ -197,14 +200,23 @@ function ModelRow({ option, downloads, onSaved, refresh }: ModelRowProps) {
       ? Math.min(100, Math.floor((partial_bytes / totalBytes(option)) * 100))
       : 0;
 
+  function openHuggingFace() {
+    void invoke('open_url', { url: `${HF_BASE_URL}/${starter.repo}` });
+  }
+
   return (
     <div className={styles.row} data-model-row data-id={starter.id}>
       <div className={styles.rowMain}>
         <div className={styles.mid}>
           <div className={styles.top}>
-            <span className={styles.name} data-testid="staff-model-name">
+            <button
+              type="button"
+              className={styles.nameLink}
+              data-testid="staff-model-name"
+              onClick={openHuggingFace}
+            >
               {starter.display_name}
-            </span>
+            </button>
             <span className={styles.pills}>
               <span className={`${styles.pill} ${styles.pillText}`}>Text</span>
               {starter.vision ? (
@@ -224,7 +236,7 @@ function ModelRow({ option, downloads, onSaved, refresh }: ModelRowProps) {
               ? `Paused · ${pausedPct}%`
               : `${gb(totalBytes(option))} GB${
                   contextLabel ? ` · ${contextLabel}` : ''
-                } · ${starter.origin}`}
+                } · ${starter.origin} · ${starter.quant}`}
           </div>
         </div>
         {!showProgress ? (
diff --git a/src/types/starter.ts b/src/types/starter.ts
index f1d4147a..7b45e58c 100644
--- a/src/types/starter.ts
+++ b/src/types/starter.ts
@@ -114,6 +114,13 @@ export interface InstalledModel {
   /** Trained context window in tokens, healed from the curated registry by the
    * backend; `null`/absent for a pasted model with no registry entry. */
   context_length?: number | null;
+  /** Vision projector size in bytes, healed from the registry; added to
+   * `size_bytes` for the displayed total so it matches Discover. `0`/absent for
+   * a text model or a pasted repo with no registry entry. */
+  mmproj_bytes?: number;
+  /** Model maker (e.g. "Google"), healed from the registry; `null`/absent for a
+   * pasted repo, where the row falls back to the repo id. */
+  origin?: string | null;
 }
 
 /** One `.gguf` row from `list_hf_repo_ggufs`, for the paste-a-repo browser.

From 150ea8156f703f7503acc2ee8b57c82ebd8bb460 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 21:54:36 -0500
Subject: [PATCH 73/89] fix: truncate long resident-model name in keep-warm
 status

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 .../tabs/models/ProvidersPane.test.tsx        | 17 +++++++----
 src/settings/tabs/models/ProvidersPane.tsx    | 30 ++++++++++++++-----
 src/styles/settings.module.css                | 21 +++++++++++++
 3 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/src/settings/tabs/models/ProvidersPane.test.tsx b/src/settings/tabs/models/ProvidersPane.test.tsx
index b9683127..0636eb72 100644
--- a/src/settings/tabs/models/ProvidersPane.test.tsx
+++ b/src/settings/tabs/models/ProvidersPane.test.tsx
@@ -4,6 +4,7 @@ import {
   fireEvent,
   act,
   waitFor,
+  within,
 } from '@testing-library/react';
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 
@@ -651,9 +652,11 @@ describe('ProvidersPane generation', () => {
     });
     renderPane(makeConfig('builtin', [BUILTIN_LOADED, OLLAMA]));
     return waitFor(() => {
-      expect(screen.getByText('Mistral Nemo 12B in VRAM')).toBeInTheDocument();
+      const status = screen.getByTestId('keep-warm-status');
+      expect(status).toHaveTextContent('Mistral Nemo 12B');
+      expect(within(status).getByText('in VRAM')).toBeInTheDocument();
       // The selected (but not-yet-resident) model is never shown as resident.
-      expect(screen.queryByText('Qwen3.5 9B in VRAM')).not.toBeInTheDocument();
+      expect(within(status).queryByText('Qwen3.5 9B')).not.toBeInTheDocument();
       expect(screen.getByRole('button', { name: 'Unload now' })).toBeEnabled();
     });
   });
@@ -711,7 +714,9 @@ describe('ProvidersPane generation', () => {
     mockInvoke({ get_loaded_model: 'llama3.1:8b' });
     renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
     await waitFor(() =>
-      expect(screen.getByText('llama3.1:8b in VRAM')).toBeInTheDocument(),
+      expect(screen.getByTestId('keep-warm-status')).toHaveTextContent(
+        'llama3.1:8b',
+      ),
     );
   });
 
@@ -729,7 +734,7 @@ describe('ProvidersPane generation', () => {
     await act(async () => {
       emitTauriEvent('warmup:model-loaded', 'phi4');
     });
-    expect(screen.getByText('phi4 in VRAM')).toBeInTheDocument();
+    expect(screen.getByTestId('keep-warm-status')).toHaveTextContent('phi4');
     await act(async () => {
       emitTauriEvent('warmup:model-evicted', null);
     });
@@ -851,7 +856,9 @@ describe('ProvidersPane robustness', () => {
       emitTauriEvent('engine:status', engineStatus('loaded'));
     });
     await waitFor(() =>
-      expect(screen.getByText('Qwen3.5 9B in VRAM')).toBeInTheDocument(),
+      expect(screen.getByTestId('keep-warm-status')).toHaveTextContent(
+        'Qwen3.5 9B',
+      ),
     );
   });
 
diff --git a/src/settings/tabs/models/ProvidersPane.tsx b/src/settings/tabs/models/ProvidersPane.tsx
index ccb92d0d..709dc51c 100644
--- a/src/settings/tabs/models/ProvidersPane.tsx
+++ b/src/settings/tabs/models/ProvidersPane.tsx
@@ -298,14 +298,14 @@ export function ProvidersPane({
 
   const fillPct = `${ctxPos / 10}%`;
 
-  // Keep-warm live status: the text shown beside the name. `loadedModel` is the
-  // display name of the model the active provider actually has resident (the
-  // built-in engine's loaded blob, or Ollama's /api/ps), never the frontend
-  // selection. While the built-in engine is mid-load it reports "Loading…".
+  // Keep-warm live status. `loadedModel` is the display name of the model the
+  // active provider actually has resident (the built-in engine's loaded blob,
+  // or Ollama's /api/ps), never the frontend selection; when set it renders as
+  // a truncating name + "in VRAM" suffix in the JSX below so a long name can
+  // never break the row. This fallback text covers the two non-resident states
+  // (mid-load for the built-in engine, otherwise nothing loaded).
   let warmStatusText: string;
-  if (loadedModel) {
-    warmStatusText = `${loadedModel} in VRAM`;
-  } else if (activeKind === 'builtin' && engineState === 'starting') {
+  if (activeKind === 'builtin' && engineState === 'starting') {
     warmStatusText = 'Loading…';
   } else {
     warmStatusText = 'No model loaded';
@@ -607,7 +607,21 @@ export function ProvidersPane({
                 </button>
               </Tooltip>
             </div>
-            <span className={styles.genWarmStatus}>{warmStatusText}</span>
+            <span
+              className={styles.genWarmStatus}
+              data-testid="keep-warm-status"
+            >
+              {loadedModel ? (
+                <>
+                  <span className={styles.genWarmModel} title={loadedModel}>
+                    {loadedModel}
+                  </span>
+                  <span className={styles.genWarmSuffix}>in VRAM</span>
+                </>
+              ) : (
+                warmStatusText
+              )}
+            </span>
           </div>
           <div className={styles.genWarmControls}>
             <span className={styles.genWarmPrefix}>Release after</span>
diff --git a/src/styles/settings.module.css b/src/styles/settings.module.css
index dd405d6b..d9a47a05 100644
--- a/src/styles/settings.module.css
+++ b/src/styles/settings.module.css
@@ -539,12 +539,33 @@
   display: flex;
   align-items: center;
   gap: 6px;
+  min-width: 0;
+}
+/* The label stays whole: only the resident-model name (right) ever truncates. */
+.genWarmHead .genName {
+  flex: none;
+  white-space: nowrap;
 }
 .genWarmStatus {
   margin-left: auto;
+  min-width: 0;
+  display: flex;
+  align-items: baseline;
+  gap: 4px;
   font-size: 11.5px;
   color: var(--t2);
 }
+/* A long resident-model name truncates with an ellipsis (full name on hover via
+ * the title attribute) instead of wrapping and breaking the row. */
+.genWarmModel {
+  min-width: 0;
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+}
+.genWarmSuffix {
+  flex: none;
+}
 .genWarmControls {
   display: flex;
   align-items: center;

From 43f64f90646d431064c957cd51e2c7570dff0f4d Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 23:01:07 -0500
Subject: [PATCH 74/89] feat: capability pills in Browse-all, shared capability
 derivation, and active-model preserved on install

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/models/mod.rs                   | 395 +++++++++++++-----
 .../tabs/models/BrowseAllPane.test.tsx        |  97 ++++-
 src/settings/tabs/models/BrowseAllPane.tsx    |   2 +
 .../tabs/models/CapabilityPills.module.css    |  26 ++
 .../tabs/models/CapabilityPills.test.tsx      |  38 ++
 src/settings/tabs/models/CapabilityPills.tsx  |  32 ++
 .../tabs/models/DiscoverPane.test.tsx         |  21 +-
 .../tabs/models/StaffPicksPane.module.css     |  25 --
 src/settings/tabs/models/StaffPicksPane.tsx   |  18 +-
 src/settings/tabs/models/useHfSearch.test.ts  | 157 +++++--
 src/settings/tabs/models/useHfSearch.ts       |  76 ++--
 src/types/hf.ts                               |  18 +
 12 files changed, 669 insertions(+), 236 deletions(-)
 create mode 100644 src/settings/tabs/models/CapabilityPills.module.css
 create mode 100644 src/settings/tabs/models/CapabilityPills.test.tsx
 create mode 100644 src/settings/tabs/models/CapabilityPills.tsx

diff --git a/src-tauri/src/models/mod.rs b/src-tauri/src/models/mod.rs
index 173d2cb2..38c6a08a 100644
--- a/src-tauri/src/models/mod.rs
+++ b/src-tauri/src/models/mod.rs
@@ -1379,13 +1379,23 @@ struct HfRepoInfo {
     siblings: Vec<HfSibling>,
 }
 
-/// The slice of HF's parsed `gguf` metadata block Thuki reads: the model's
-/// trained context window. Present on a search row when the query requests
-/// `expand[]=gguf`. Untrusted external input, sanitized before use.
+/// The slice of HF's parsed `gguf` metadata block Thuki reads. Present on a
+/// search row when the query requests `expand[]=gguf`. Untrusted external input:
+/// the context window is sanitized before use, and the chat template is only fed
+/// to the never-panicking [`reasoning::classify_reasoning`] classifier.
 #[derive(Deserialize)]
 struct HfGgufMeta {
     #[serde(default)]
     context_length: Option<u64>,
+    /// The model's embedded chat template, the highest-signal reasoning class
+    /// source. Already carried by `expand[]=gguf` (the same block that holds the
+    /// context window), so reading it costs no extra request.
+    #[serde(default)]
+    chat_template: Option<String>,
+    /// `general.architecture`, a secondary reasoning signal (e.g. gpt-oss is
+    /// always-on even when its template omits the channel marker).
+    #[serde(default)]
+    architecture: Option<String>,
 }
 
 /// Trust check for an externally-reported context window. Accepts a positive
@@ -1446,6 +1456,13 @@ pub struct MmprojCompanion {
     pub size_bytes: u64,
 }
 
+/// True when `name` is an `mmproj*.gguf` vision projection companion. The
+/// presence of one is Thuki's ground-truth vision signal: llama.cpp cannot do
+/// image input without it, regardless of how the base model is tagged.
+fn is_mmproj(name: &str) -> bool {
+    name.starts_with("mmproj") && name.ends_with(".gguf")
+}
+
 /// Pure parse of an HF repo listing into the spec for one target `file`.
 /// Capability rule for pasted repos: vision = an `mmproj*.gguf` sibling with
 /// complete LFS metadata exists. The reasoning class is recorded in two stages:
@@ -1474,7 +1491,7 @@ pub fn resolve_listing(body: &[u8], file: &str) -> Result<RepoResolved, String>
     let mmproj = info
         .siblings
         .iter()
-        .filter(|s| s.rfilename.starts_with("mmproj") && s.rfilename.ends_with(".gguf"))
+        .filter(|s| is_mmproj(&s.rfilename))
         .find_map(|s| {
             lfs_digest(s).map(|(sha256, size_bytes)| MmprojCompanion {
                 file: s.rfilename.clone(),
@@ -1499,7 +1516,7 @@ pub fn parse_gguf_listing(body: &[u8]) -> Result<Vec<HfGgufFile>, String> {
     Ok(info
         .siblings
         .into_iter()
-        .filter(|s| s.rfilename.ends_with(".gguf") && !s.rfilename.starts_with("mmproj"))
+        .filter(|s| s.rfilename.ends_with(".gguf") && !is_mmproj(&s.rfilename))
         .map(|s| {
             let size_bytes = s.lfs.as_ref().and_then(|l| l.size).or(s.size).unwrap_or(0);
             let sha256 = s
@@ -1620,7 +1637,7 @@ pub async fn fetch_repo_gguf_listing(
 // ─── Hugging Face model search ───────────────────────────────────────────────
 
 /// One repo row from a Hugging Face model search, trimmed to the fields the
-/// in-app browser needs to identify, rank, and gate a model.
+/// in-app browser needs to identify, rank, gate, and label a model.
 #[derive(Debug, Clone, PartialEq, Serialize)]
 pub struct HfModelSummary {
     /// Repo id, e.g. `unsloth/Qwen3.5-9B-GGUF`; the install target.
@@ -1636,11 +1653,35 @@ pub struct HfModelSummary {
     /// `context_length` metadata (a per-repo property, identical across quants).
     /// `None` when the API omits it or the value fails [`sanitize_context_length`].
     pub context_length: Option<u32>,
+    /// True when the repo ships an `mmproj*.gguf` vision companion (see
+    /// [`is_mmproj`]). A capability of the model, shared by every quant, so the
+    /// pill belongs on the repo row, not the per-quant list.
+    pub vision: bool,
+    /// True when the model emits reasoning tokens, from its chat template via
+    /// [`reasoning::classify_reasoning`], or the repo name via [`detect_thinking`]
+    /// when the template is absent. Also a per-repo capability.
+    pub thinking: bool,
+}
+
+/// A page of search rows plus whether the Hub holds more. The flag is derived
+/// from the raw entry count, not the kept-row count, so the per-row pipeline
+/// allowlist (which drops non-chat repos) cannot prematurely end pagination.
+#[derive(Debug, Clone, PartialEq, Serialize)]
+pub struct HfSearchPage {
+    pub rows: Vec<HfModelSummary>,
+    pub has_more: bool,
 }
 
+/// HF `pipeline_tag`s Thuki surfaces in Browse-all: plain text chat and
+/// multimodal (image+text) chat. Every other tag (embeddings, translation,
+/// text-to-video, ...) is not a usable chat model and is dropped. This is an
+/// allowlist applied per row, replacing a single server-side `pipeline_tag`
+/// filter that could not express "text OR image-text" and so hid vision repos.
+const SEARCHABLE_PIPELINE_TAGS: &[&str] = &["text-generation", "image-text-to-text"];
+
 /// One entry in the Hugging Face `/api/models` search response. Only the fields
-/// surfaced by [`HfModelSummary`] are decoded; everything else is ignored so
-/// upstream additions cannot break decoding.
+/// surfaced by [`HfModelSummary`] (and the `pipeline_tag` allowlist gate) are
+/// decoded; everything else is ignored so upstream additions cannot break it.
 #[derive(Deserialize)]
 struct HfSearchEntry {
     #[serde(default)]
@@ -1652,10 +1693,57 @@ struct HfSearchEntry {
     /// defaults to `false`.
     #[serde(default, deserialize_with = "deserialize_gated")]
     gated: bool,
+    /// HF pipeline tag, present because the search requests `expand[]=pipeline_tag`.
+    /// Gated against [`SEARCHABLE_PIPELINE_TAGS`]; an absent tag drops the row.
+    #[serde(default)]
+    pipeline_tag: Option<String>,
     /// HF-parsed GGUF metadata, present because the search requests
-    /// `expand[]=gguf`. Only the context window is read; sanitized before use.
+    /// `expand[]=gguf`: the context window and the chat template / architecture.
     #[serde(default)]
     gguf: Option<HfGgufMeta>,
+    /// Repo file listing, present because the search requests `expand[]=siblings`.
+    /// Scanned for an `mmproj*.gguf` companion to derive the vision flag.
+    #[serde(default)]
+    siblings: Vec<HfSibling>,
+}
+
+/// Projects one raw search entry onto a summary row, or `None` when the row is
+/// not a usable chat model (empty id, or a `pipeline_tag` outside
+/// [`SEARCHABLE_PIPELINE_TAGS`]).
+fn search_entry_to_summary(entry: HfSearchEntry) -> Option<HfModelSummary> {
+    let HfSearchEntry {
+        id,
+        downloads,
+        gated,
+        pipeline_tag,
+        gguf,
+        siblings,
+    } = entry;
+    if id.is_empty() {
+        return None;
+    }
+    if !pipeline_tag
+        .as_deref()
+        .is_some_and(|tag| SEARCHABLE_PIPELINE_TAGS.contains(&tag))
+    {
+        return None;
+    }
+    let vision = siblings.iter().any(|s| is_mmproj(&s.rfilename));
+    // Reasoning runs through the one shared derivation so a search row and the
+    // install it leads to can never disagree. A search row has no chosen file,
+    // so the name fallback (used only when no template ships) sees the repo only.
+    let chat_template = gguf.as_ref().and_then(|g| g.chat_template.as_deref());
+    let architecture = gguf.as_ref().and_then(|g| g.architecture.as_deref());
+    let (thinking, _) = reasoning_flags_from_metadata(chat_template, architecture, &id, "");
+    let context_length = sanitize_context_length(gguf.and_then(|g| g.context_length));
+    Some(HfModelSummary {
+        id,
+        downloads,
+        gated,
+        context_length,
+        vision,
+        thinking,
+    })
 }
 
 /// Normalizes Hugging Face's polymorphic `gated` field (a bool `false` or a
@@ -1672,21 +1760,19 @@ where
     })
 }
 
-/// Pure parse of an `/api/models` search body into summary rows. Rows with an
-/// empty `id` are dropped rather than surfaced as un-installable blanks.
-pub fn parse_search_results(body: &[u8]) -> Result<Vec<HfModelSummary>, String> {
+/// Pure parse of an `/api/models` search body into a page of summary rows.
+/// Non-chat and empty-id rows are dropped per [`search_entry_to_summary`];
+/// `has_more` is set from the raw entry count against `limit` so dropped rows
+/// never cut pagination short.
+pub fn parse_search_results(body: &[u8], limit: usize) -> Result<HfSearchPage, String> {
     let entries: Vec<HfSearchEntry> = serde_json::from_slice(body)
         .map_err(|e| format!("failed to decode Hugging Face search response: {e}"))?;
-    Ok(entries
+    let has_more = entries.len() >= limit;
+    let rows = entries
         .into_iter()
-        .filter(|e| !e.id.is_empty())
-        .map(|e| HfModelSummary {
-            id: e.id,
-            downloads: e.downloads,
-            gated: e.gated,
-            context_length: sanitize_context_length(e.gguf.and_then(|g| g.context_length)),
-        })
-        .collect())
+        .filter_map(search_entry_to_summary)
+        .collect();
+    Ok(HfSearchPage { rows, has_more })
 }
 
 // ─── RAM-fit estimation + annotated view rows ────────────────────────────────
@@ -1847,7 +1933,7 @@ pub async fn fetch_hf_search(
     base_url: &str,
     query: &str,
     limit: usize,
-) -> Result<Vec<HfModelSummary>, String> {
+) -> Result<HfSearchPage, String> {
     let query = query.trim();
     if query.len() > MAX_HF_SEARCH_QUERY_LEN {
         return Err(format!(
@@ -1863,7 +1949,7 @@ pub async fn fetch_hf_search(
         limit,
     )
     .await?;
-    parse_search_results(&body)
+    parse_search_results(&body, limit)
 }
 
 /// Innermost search fetcher with timeout, body cap, and result limit
@@ -1883,20 +1969,25 @@ async fn fetch_hf_search_inner(
     let endpoint = format!("{}/api/models", base_url.trim_end_matches('/'));
     let limit = limit.to_string();
     // `filter=gguf` matches repos *tagged* gguf (the dedicated quant repos that
-    // actually ship `.gguf` files), and `pipeline_tag=text-generation` keeps
-    // them to chat/instruct models. `library=gguf` is deliberately NOT used: it
+    // actually ship `.gguf` files). `library=gguf` is deliberately NOT used: it
     // also matches base repos that merely link to GGUF quants elsewhere, so the
-    // rows would have no downloadable `.gguf` files of their own.
+    // rows would have no downloadable `.gguf` files of their own. The chat-model
+    // gate is NOT a server `pipeline_tag` filter: that param takes a single value
+    // and so cannot express "text-generation OR image-text-to-text", which hid
+    // every multimodal repo. Instead each row's `pipeline_tag` is expanded and
+    // checked against `SEARCHABLE_PIPELINE_TAGS` in `search_entry_to_summary`.
     let mut params: Vec<(&str, &str)> = vec![
         ("filter", "gguf"),
-        ("pipeline_tag", "text-generation"),
         ("sort", "downloads"),
         ("direction", "-1"),
         ("limit", &limit),
-        // `expand[]=gguf` asks the search to include each repo's parsed GGUF
-        // metadata (the model's context window) inline, so the browser can show
-        // it on every row without a second request per repo.
+        // One expand set carries everything a row needs in a single request, so
+        // there is no per-repo follow-up call: `gguf` (context window + chat
+        // template + architecture), `siblings` (the file list, scanned for an
+        // mmproj vision companion), and `pipeline_tag` (the chat-model allowlist).
         ("expand[]", "gguf"),
+        ("expand[]", "siblings"),
+        ("expand[]", "pipeline_tag"),
     ];
     // An empty query browses the most-downloaded GGUF repos; only attach the
     // search term when the user actually typed one.
@@ -2125,34 +2216,39 @@ pub(crate) fn curated_reasoning_flags(repo: &str, file_name: &str) -> Option<(bo
         .map(|s| (s.thinking, s.reasoning_always))
 }
 
-/// Derives `(thinking, reasoning_always)` for a pasted model from its chat
-/// template. A readable template is classified by
-/// [`reasoning::classify_reasoning`]; an absent template falls back to
-/// `fallback` (the placeholder flags), leaving the runtime backstop to correct
-/// an always-reasoning model from real output.
-pub(crate) fn pasted_reasoning_flags(
-    fallback: (bool, bool),
-    template: Option<&str>,
+/// Derives `(thinking, reasoning_always)` for a non-curated model from its GGUF
+/// metadata: a readable chat template is classified by
+/// [`reasoning::classify_reasoning`]; an absent or empty template falls back to
+/// the repo/file name via [`detect_thinking`], leaving `reasoning_always` off
+/// for the runtime backstop to correct an always-reasoning model from real
+/// output. The single reasoning-derivation point: the Browse-all search rows and
+/// the install/heal path both route through it, so identical metadata always
+/// yields identical flags. The name is only the inputs here; for a repo-level
+/// search row with no chosen file, pass an empty `file`.
+pub(crate) fn reasoning_flags_from_metadata(
+    chat_template: Option<&str>,
     architecture: Option<&str>,
+    repo: &str,
+    file: &str,
 ) -> (bool, bool) {
-    match template {
-        Some(t) => reasoning::classify_reasoning(t, architecture).flags(),
-        None => fallback,
+    match chat_template {
+        Some(t) if !t.is_empty() => reasoning::classify_reasoning(t, architecture).flags(),
+        _ => (detect_thinking(repo, file), false),
     }
 }
 
 /// Resolves the final reasoning flags for a model: curated registry flags when
-/// it is a starter, otherwise the class read from the on-disk GGUF blob's chat
-/// template. Coverage-off: the registry lookup and template classification are
-/// tested through [`curated_reasoning_flags`] / [`pasted_reasoning_flags`]; this
-/// wrapper only adds the filesystem read of the blob.
+/// it is a starter, otherwise the class derived from the on-disk GGUF blob's
+/// chat template (with the name fallback baked into
+/// [`reasoning_flags_from_metadata`]). Coverage-off: the registry lookup and the
+/// derivation are tested through [`curated_reasoning_flags`] /
+/// [`reasoning_flags_from_metadata`]; this wrapper only adds the blob read.
 #[cfg_attr(coverage_nightly, coverage(off))]
 fn resolve_reasoning_flags(
     store: &storage::ModelStore,
     repo: &str,
     file_name: &str,
     sha256: &str,
-    fallback: (bool, bool),
 ) -> (bool, bool) {
     if let Some(curated) = curated_reasoning_flags(repo, file_name) {
         return curated;
@@ -2160,7 +2256,7 @@ fn resolve_reasoning_flags(
     let meta = gguf::read_gguf_metadata_from_file(&store.blob_path(sha256));
     let template = meta.as_ref().and_then(|m| m.chat_template.as_deref());
     let architecture = meta.as_ref().and_then(|m| m.architecture.as_deref());
-    pasted_reasoning_flags(fallback, template, architecture)
+    reasoning_flags_from_metadata(template, architecture, repo, file_name)
 }
 
 /// Re-classifies installed built-in rows whose `reasoning_always` is `NULL`
@@ -2179,13 +2275,8 @@ pub fn heal_unclassified_reasoning(conn: &rusqlite::Connection, store: &storage:
         }
     };
     for row in pending {
-        let (thinking, reasoning_always) = resolve_reasoning_flags(
-            store,
-            &row.repo,
-            &row.file_name,
-            &row.sha256,
-            (row.thinking, row.reasoning_always),
-        );
+        let (thinking, reasoning_always) =
+            resolve_reasoning_flags(store, &row.repo, &row.file_name, &row.sha256);
         if let Err(e) = manifest::update_classification(conn, &row.id, thinking, reasoning_always) {
             eprintln!(
                 "thuki: [models] reasoning heal: failed to persist {}: {e}",
@@ -2423,7 +2514,7 @@ pub async fn search_hf_models(
     query: String,
     limit: usize,
     client: tauri::State<'_, reqwest::Client>,
-) -> Result<Vec<HfModelSummary>, String> {
+) -> Result<HfSearchPage, String> {
     fetch_hf_search(&client, HF_BASE_URL, &query, clamp_search_limit(limit)).await
 }
 
@@ -2577,8 +2668,10 @@ fn spawn_model_download(
 
 /// Records a completed download: manifest insert, removal of blobs the
 /// replaced row no longer references (a re-download whose upstream content
-/// changed must not strand the old multi-GB blob), then the builtin
-/// provider's `model` field (the active provider is never changed here).
+/// changed must not strand the old multi-GB blob), then adopts the model as the
+/// builtin provider's selection ONLY when none is chosen yet (the active
+/// provider is never changed here). A later install must not steal the active
+/// model from a model the user already selected.
 #[cfg_attr(coverage_nightly, coverage(off))]
 fn finalize_install(
     app: &tauri::AppHandle,
@@ -2589,13 +2682,8 @@ fn finalize_install(
     // picker badge and `/think` gate are correct the instant the install lands.
     // Curated starters keep their registry flags; a template that cannot be read
     // keeps the placeholder flags for the runtime backstop to correct.
-    let (thinking, reasoning_always) = resolve_reasoning_flags(
-        store.inner(),
-        &model.repo,
-        &model.file_name,
-        &model.sha256,
-        (model.thinking, model.reasoning_always),
-    );
+    let (thinking, reasoning_always) =
+        resolve_reasoning_flags(store.inner(), &model.repo, &model.file_name, &model.sha256);
     let model = manifest::InstalledModel {
         thinking,
         reasoning_always,
@@ -5068,67 +5156,117 @@ mod tests {
 
     // ── Model library: Hugging Face search ───────────────────────────────────
 
-    /// Search fixture exercising every `gated` shape (bool, strategy string,
-    /// absent, null) plus an empty-id row that must be dropped.
+    /// Search fixture exercising the capability derivation and the pipeline
+    /// allowlist: each `gated` shape (bool, strategy string, absent, null),
+    /// vision from an mmproj sibling, thinking from the chat template (template
+    /// class wins over a reasoning-y name) with a name fallback when no template
+    /// is present, a non-chat pipeline that is dropped, an untagged repo that is
+    /// dropped, and an empty-id row that is dropped.
     fn search_fixture() -> serde_json::Value {
         serde_json::json!([
+            // alpha: chat model that ships an mmproj companion and an optional
+            // (`enable_thinking`) template -> vision + thinking, with context.
             {"id": "org/alpha-GGUF", "downloads": 1000, "gated": false,
-             "gguf": {"context_length": 131072}},
-            {"id": "org/beta-GGUF", "downloads": 500, "gated": "manual"},
-            {"id": "org/gamma-GGUF"},
-            {"id": "org/delta-GGUF", "downloads": 1, "gated": true,
-             "gguf": {"context_length": 9000000000u64}},
-            {"id": "org/epsilon-GGUF", "downloads": 2, "gated": null},
-            {"id": "", "downloads": 9}
+             "pipeline_tag": "text-generation",
+             "gguf": {"context_length": 131072,
+                      "chat_template": "{%- if enable_thinking %}<think>{% endif %}",
+                      "architecture": "qwen3"},
+             "siblings": [{"rfilename": "alpha-Q4_K_M.gguf"},
+                          {"rfilename": "mmproj-f16.gguf"}]},
+            // beta: a multimodal pipeline tag is allowlisted; no mmproj sibling
+            // means no vision, and an always-on `<think>` template means thinking.
+            {"id": "org/beta-GGUF", "downloads": 500, "gated": "manual",
+             "pipeline_tag": "image-text-to-text",
+             "gguf": {"chat_template": "<|im_start|>assistant\\n<think>\\n"},
+             "siblings": [{"rfilename": "beta.gguf"}]},
+            // gamma: no expanded gguf at all, so thinking falls back to the name
+            // (`QwQ` is a known reasoning family); no mmproj means no vision.
+            {"id": "org/QwQ-32B-GGUF", "downloads": 7,
+             "pipeline_tag": "text-generation"},
+            // delta: a non-chat pipeline (embeddings) is dropped by the allowlist
+            // even though it is the most downloaded.
+            {"id": "org/embed-GGUF", "downloads": 99999,
+             "pipeline_tag": "feature-extraction"},
+            // epsilon: a plain instruct template classifies as non-thinking and
+            // overrides the reasoning-y repo name; its context is implausibly
+            // large so it is dropped; no mmproj means no vision.
+            {"id": "org/Reasoner-GGUF", "downloads": 2, "gated": null,
+             "pipeline_tag": "text-generation",
+             "gguf": {"context_length": 9000000000u64,
+                      "chat_template": "<|user|>{{x}}<|assistant|>",
+                      "architecture": "llama"},
+             "siblings": [{"rfilename": "r.gguf"}]},
+            // zeta: no pipeline tag at all is dropped (the allowlist requires an
+            // explicit chat-capable tag).
+            {"id": "org/untagged-GGUF", "downloads": 3},
+            // empty id is dropped.
+            {"id": "", "downloads": 9, "pipeline_tag": "text-generation"}
         ])
     }
 
     #[test]
-    fn parse_search_results_maps_rows_normalizes_gated_and_context() {
+    fn parse_search_results_maps_capabilities_and_drops_non_chat_rows() {
         let body = search_fixture().to_string();
-        let rows = parse_search_results(body.as_bytes()).unwrap();
+        // A generous limit keeps `has_more` false so this case stays about rows.
+        let page = parse_search_results(body.as_bytes(), 100).unwrap();
+        assert!(!page.has_more);
         assert_eq!(
-            rows,
+            page.rows,
             vec![
-                // alpha carries a valid context window from its expanded gguf.
                 HfModelSummary {
                     id: "org/alpha-GGUF".to_string(),
                     downloads: 1000,
                     gated: false,
                     context_length: Some(131072),
+                    vision: true,
+                    thinking: true,
                 },
                 HfModelSummary {
                     id: "org/beta-GGUF".to_string(),
                     downloads: 500,
                     gated: true,
                     context_length: None,
+                    vision: false,
+                    thinking: true,
                 },
+                // gamma: thinking healed from the `QwQ` name when no template ships.
                 HfModelSummary {
-                    id: "org/gamma-GGUF".to_string(),
-                    downloads: 0,
+                    id: "org/QwQ-32B-GGUF".to_string(),
+                    downloads: 7,
                     gated: false,
                     context_length: None,
+                    vision: false,
+                    thinking: true,
                 },
-                // delta's declared context is implausibly large, so it is dropped.
+                // epsilon: the plain template wins over the reasoning-y name.
                 HfModelSummary {
-                    id: "org/delta-GGUF".to_string(),
-                    downloads: 1,
-                    gated: true,
-                    context_length: None,
-                },
-                HfModelSummary {
-                    id: "org/epsilon-GGUF".to_string(),
+                    id: "org/Reasoner-GGUF".to_string(),
                     downloads: 2,
                     gated: false,
                     context_length: None,
+                    vision: false,
+                    thinking: false,
                 },
             ]
         );
     }
 
+    #[test]
+    fn parse_search_results_flags_has_more_when_the_page_is_full() {
+        let body = serde_json::json!([
+            {"id": "org/a-GGUF", "downloads": 2, "pipeline_tag": "text-generation"},
+            {"id": "org/b-GGUF", "downloads": 1, "pipeline_tag": "text-generation"}
+        ])
+        .to_string();
+        // Two raw entries: a page of two is full (more may exist on the Hub)...
+        assert!(parse_search_results(body.as_bytes(), 2).unwrap().has_more);
+        // ...but a page asking for three was not filled, so the Hub is exhausted.
+        assert!(!parse_search_results(body.as_bytes(), 3).unwrap().has_more);
+    }
+
     #[test]
     fn parse_search_results_rejects_invalid_json() {
-        let err = parse_search_results(b"not json").unwrap_err();
+        let err = parse_search_results(b"not json", 30).unwrap_err();
         assert!(err.contains("failed to decode"), "got: {err}");
     }
 
@@ -5139,16 +5277,29 @@ mod tests {
             downloads: 7,
             gated: true,
             context_length: Some(131072),
+            vision: true,
+            thinking: false,
         })
         .unwrap();
         assert_eq!(
             v,
             serde_json::json!({
                 "id": "o/r", "downloads": 7, "gated": true, "context_length": 131072,
+                "vision": true, "thinking": false,
             })
         );
     }
 
+    #[test]
+    fn hf_search_page_serializes_snake_case() {
+        let v = serde_json::to_value(HfSearchPage {
+            rows: vec![],
+            has_more: true,
+        })
+        .unwrap();
+        assert_eq!(v, serde_json::json!({ "rows": [], "has_more": true }));
+    }
+
     // ── RAM-fit estimation + annotated views ─────────────────────────────────
 
     #[test]
@@ -5258,16 +5409,26 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn fetch_hf_search_returns_rows_and_sends_filtered_query() {
+    async fn fetch_hf_search_returns_rows_and_sends_widened_query() {
         let mut server = mockito::Server::new_async().await;
+        // The query no longer pins `pipeline_tag=text-generation` (that excluded
+        // multimodal `image-text-to-text` repos); chat-vs-non-chat is now an
+        // allowlist applied to each row's expanded `pipeline_tag`. The expand set
+        // carries the gguf block (context + chat template), the file list (mmproj
+        // -> vision), and the pipeline tag (the allowlist) in one request.
         let mock = server
             .mock("GET", "/api/models")
             .match_query(mockito::Matcher::AllOf(vec![
                 mockito::Matcher::UrlEncoded("filter".into(), "gguf".into()),
-                mockito::Matcher::UrlEncoded("pipeline_tag".into(), "text-generation".into()),
                 mockito::Matcher::UrlEncoded("search".into(), "qwen".into()),
                 mockito::Matcher::UrlEncoded("sort".into(), "downloads".into()),
                 mockito::Matcher::UrlEncoded("limit".into(), "60".into()),
+                // The widened query expands the gguf block, the file list, and
+                // the pipeline tag, and (critically) no longer pins
+                // `pipeline_tag=text-generation`, which had hidden vision repos.
+                mockito::Matcher::Regex("expand%5B%5D=gguf".into()),
+                mockito::Matcher::Regex("expand%5B%5D=siblings".into()),
+                mockito::Matcher::Regex("expand%5B%5D=pipeline_tag".into()),
             ]))
             .with_status(200)
             .with_header("content-type", "application/json")
@@ -5275,12 +5436,16 @@ mod tests {
             .create_async()
             .await;
         let client = reqwest::Client::new();
-        let rows = fetch_hf_search(&client, &server.url(), "qwen", 60)
+        let page = fetch_hf_search(&client, &server.url(), "qwen", 60)
             .await
             .unwrap();
         mock.assert_async().await;
-        assert_eq!(rows.len(), 5);
-        assert_eq!(rows[0].id, "org/alpha-GGUF");
+        // Four chat rows survive the allowlist; the multimodal beta row proves
+        // the widened query surfaces a repo the old filter would have dropped.
+        assert_eq!(page.rows.len(), 4);
+        assert_eq!(page.rows[0].id, "org/alpha-GGUF");
+        assert!(page.rows.iter().any(|r| r.id == "org/beta-GGUF"));
+        assert!(!page.has_more);
     }
 
     #[tokio::test]
@@ -5295,7 +5460,7 @@ mod tests {
             .await;
         let client = reqwest::Client::new();
         // Whitespace-only query trims to empty and the search param is dropped.
-        let rows = fetch_hf_search(
+        let page = fetch_hf_search(
             &client,
             &server.url(),
             "   ",
@@ -5303,7 +5468,8 @@ mod tests {
         )
         .await
         .unwrap();
-        assert!(rows.is_empty());
+        assert!(page.rows.is_empty());
+        assert!(!page.has_more);
     }
 
     #[tokio::test]
@@ -5595,39 +5761,56 @@ mod tests {
     }
 
     #[test]
-    fn pasted_reasoning_flags_classify_from_template() {
+    fn reasoning_flags_from_metadata_classify_from_template() {
         // Optional family: thinking on, no badge.
         assert_eq!(
-            pasted_reasoning_flags(
-                (false, false),
+            reasoning_flags_from_metadata(
                 Some("{% if enable_thinking %}"),
-                Some("qwen3")
+                Some("qwen3"),
+                "any/repo",
+                "x.gguf"
             ),
             (true, false)
         );
         // Always family: thinking on, badge.
         assert_eq!(
-            pasted_reasoning_flags((false, false), Some("<think>"), None),
+            reasoning_flags_from_metadata(Some("<think>"), None, "any/repo", "x.gguf"),
             (true, true)
         );
         // Non-reasoning: both off.
         assert_eq!(
-            pasted_reasoning_flags((false, false), Some("plain instruct"), None),
+            reasoning_flags_from_metadata(Some("plain instruct"), None, "any/repo", "x.gguf"),
+            (false, false)
+        );
+        // A readable template wins over a reasoning-y name.
+        assert_eq!(
+            reasoning_flags_from_metadata(Some("plain instruct"), None, "org/QwQ-32B", "x.gguf"),
             (false, false)
         );
     }
 
     #[test]
-    fn pasted_reasoning_flags_fall_back_when_template_absent() {
-        // No readable template: keep the placeholder flags for the backstop.
+    fn reasoning_flags_from_metadata_falls_back_to_name_without_template() {
+        // No template: the name decides thinking; `reasoning_always` stays off
+        // for the runtime backstop. Marker in the repo, then in the file name.
         assert_eq!(
-            pasted_reasoning_flags((true, true), None, None),
-            (true, true)
+            reasoning_flags_from_metadata(None, None, "org/QwQ-32B", "x.gguf"),
+            (true, false)
         );
         assert_eq!(
-            pasted_reasoning_flags((false, false), None, Some("qwen3")),
+            reasoning_flags_from_metadata(None, None, "org/plain", "model-reasoning.gguf"),
+            (true, false)
+        );
+        // No template and no marker: both off.
+        assert_eq!(
+            reasoning_flags_from_metadata(None, None, "org/plain", "model.gguf"),
             (false, false)
         );
+        // An empty template is treated as no template and falls back to the name.
+        assert_eq!(
+            reasoning_flags_from_metadata(Some(""), None, "org/QwQ-32B", "x.gguf"),
+            (true, false)
+        );
     }
 
     // ── Model library: delete ────────────────────────────────────────────────
diff --git a/src/settings/tabs/models/BrowseAllPane.test.tsx b/src/settings/tabs/models/BrowseAllPane.test.tsx
index 242c1f61..045fd868 100644
--- a/src/settings/tabs/models/BrowseAllPane.test.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.test.tsx
@@ -30,7 +30,7 @@ import {
   HF_PAGE_SIZE,
   clearHfSearchCache,
 } from './useHfSearch';
-import type { HfModelSummary } from '../../../types/hf';
+import type { HfModelSummary, HfSearchPage } from '../../../types/hf';
 import type { HfGgufFile } from '../../../types/starter';
 import type { RawAppConfig } from '../../types';
 
@@ -69,17 +69,39 @@ function mockCommands(responses: Record<string, unknown>) {
 }
 
 const RESULTS: HfModelSummary[] = [
+  // A vision model (mmproj companion), used for the Vision pill assertion.
   {
     id: 'google/gemma-4-12b-it-GGUF',
     downloads: 1_200_000,
     gated: false,
     context_length: 262_144,
+    vision: true,
+    thinking: false,
+  },
+  // No context window and no capabilities: covers the "skip the segment" path
+  // and the text-only pill case.
+  {
+    id: 'unsloth/gemma-4-27b-it-GGUF',
+    downloads: 410_000,
+    gated: false,
+    vision: false,
+    thinking: false,
+  },
+  // A thinking model, used for the Thinking pill assertion.
+  {
+    id: 'meta-llama/Llama-3-8B-GGUF',
+    downloads: 9_000,
+    gated: true,
+    vision: false,
+    thinking: true,
   },
-  // No context window: covers the "skip the segment" path.
-  { id: 'unsloth/gemma-4-27b-it-GGUF', downloads: 410_000, gated: false },
-  { id: 'meta-llama/Llama-3-8B-GGUF', downloads: 9_000, gated: true },
 ];
 
+/** Wraps rows in the page envelope the backend returns. */
+function pageOf(rows: HfModelSummary[], hasMore = false): HfSearchPage {
+  return { rows, has_more: hasMore };
+}
+
 const GGUFS: HfGgufFile[] = [
   {
     file: 'gemma-q4.gguf',
@@ -118,7 +140,7 @@ const CONFIG_AFTER_INSTALL = { marker: 'fresh' } as unknown as RawAppConfig;
  */
 function discoverResponses(overrides: Record<string, unknown> = {}) {
   return {
-    search_hf_models: RESULTS,
+    search_hf_models: pageOf(RESULTS),
     list_hf_repo_ggufs: GGUFS,
     get_config: CONFIG_AFTER_INSTALL,
     ...overrides,
@@ -177,6 +199,29 @@ describe('BrowseAllPane', () => {
     expect(screen.getByText(/chat models/)).toHaveTextContent('3 chat models');
   });
 
+  it('renders capability pills per row from the repo capabilities', async () => {
+    await renderPane();
+    const visionRow = screen
+      .getByText('google/gemma-4-12b-it-GGUF')
+      .closest('[data-row]') as HTMLElement;
+    expect(within(visionRow).getByText('Text')).toBeInTheDocument();
+    expect(within(visionRow).getByText('Vision')).toBeInTheDocument();
+    expect(within(visionRow).queryByText('Thinking')).not.toBeInTheDocument();
+
+    const thinkingRow = screen
+      .getByText('meta-llama/Llama-3-8B-GGUF')
+      .closest('[data-row]') as HTMLElement;
+    expect(within(thinkingRow).getByText('Thinking')).toBeInTheDocument();
+    expect(within(thinkingRow).queryByText('Vision')).not.toBeInTheDocument();
+
+    const plainRow = screen
+      .getByText('unsloth/gemma-4-27b-it-GGUF')
+      .closest('[data-row]') as HTMLElement;
+    expect(within(plainRow).getByText('Text')).toBeInTheDocument();
+    expect(within(plainRow).queryByText('Vision')).not.toBeInTheDocument();
+    expect(within(plainRow).queryByText('Thinking')).not.toBeInTheDocument();
+  });
+
   it('does not show a RAM-fit hint on the collapsed model row', async () => {
     await renderPane();
     // The row-level fit was an unreliable repo-id estimate and is gone; fit
@@ -188,9 +233,15 @@ describe('BrowseAllPane', () => {
 
   it('parses the org line from the full id when it has no org segment', async () => {
     await renderPane(() => {}, {
-      search_hf_models: [
-        { id: 'standalone-repo', downloads: 12, gated: false },
-      ],
+      search_hf_models: pageOf([
+        {
+          id: 'standalone-repo',
+          downloads: 12,
+          gated: false,
+          vision: false,
+          thinking: false,
+        },
+      ]),
     });
     expect(screen.getByText('standalone-repo')).toBeInTheDocument();
     expect(
@@ -211,9 +262,15 @@ describe('BrowseAllPane', () => {
     invokeMock.mockClear();
     mockCommands(
       discoverResponses({
-        search_hf_models: [
-          { id: 'qwen/Qwen3-GGUF', downloads: 50, gated: false },
-        ],
+        search_hf_models: pageOf([
+          {
+            id: 'qwen/Qwen3-GGUF',
+            downloads: 50,
+            gated: false,
+            vision: false,
+            thinking: false,
+          },
+        ]),
       }),
     );
     fireEvent.change(screen.getByRole('searchbox'), {
@@ -681,8 +738,8 @@ describe('BrowseAllPane', () => {
   });
 
   it('shows a loading hint while the search is in flight', async () => {
-    let resolveSearch!: (value: HfModelSummary[]) => void;
-    const pending = new Promise<HfModelSummary[]>((res) => {
+    let resolveSearch!: (value: HfSearchPage) => void;
+    const pending = new Promise<HfSearchPage>((res) => {
       resolveSearch = res;
     });
     mockCommands(discoverResponses({ search_hf_models: pending }));
@@ -692,7 +749,7 @@ describe('BrowseAllPane', () => {
     await flush();
     expect(screen.getByText('Searching…')).toBeInTheDocument();
     await act(async () => {
-      resolveSearch(RESULTS);
+      resolveSearch(pageOf(RESULTS));
       await Promise.resolve();
     });
     await waitFor(() =>
@@ -701,7 +758,7 @@ describe('BrowseAllPane', () => {
   });
 
   it('shows a no-results message when the search returns nothing', async () => {
-    await renderPane(() => {}, { search_hf_models: [] });
+    await renderPane(() => {}, { search_hf_models: pageOf([]) });
     expect(screen.getByText('No models found.')).toBeInTheDocument();
     expect(screen.getByText(/chat models/)).toHaveTextContent('0 chat models');
   });
@@ -713,8 +770,12 @@ describe('BrowseAllPane', () => {
         id: `org/repo-${i}-GGUF`,
         downloads: n - i,
         gated: false,
+        vision: false,
+        thinking: false,
       }));
-    mockCommands(discoverResponses({ search_hf_models: full(HF_PAGE_SIZE) }));
+    mockCommands(
+      discoverResponses({ search_hf_models: pageOf(full(HF_PAGE_SIZE), true) }),
+    );
     render(<BrowseAllPane onSaved={() => {}} />, {
       wrapper: DownloadsProvider,
     });
@@ -725,7 +786,9 @@ describe('BrowseAllPane', () => {
     const loadMore = screen.getByRole('button', { name: 'Load more' });
     invokeMock.mockClear();
     mockCommands(
-      discoverResponses({ search_hf_models: full(HF_PAGE_SIZE + 5) }),
+      discoverResponses({
+        search_hf_models: pageOf(full(HF_PAGE_SIZE + 5), false),
+      }),
     );
     fireEvent.click(loadMore);
     await act(async () => {
diff --git a/src/settings/tabs/models/BrowseAllPane.tsx b/src/settings/tabs/models/BrowseAllPane.tsx
index 00765919..7d307020 100644
--- a/src/settings/tabs/models/BrowseAllPane.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.tsx
@@ -23,6 +23,7 @@ import {
   type DownloadsContextValue,
 } from '../../../contexts/DownloadsContext';
 import { downloadKey } from '../../../hooks/downloadKey';
+import { CapabilityPills } from './CapabilityPills';
 import { useHfSearch } from './useHfSearch';
 import { Tooltip } from '../../../components/Tooltip';
 import { formatContextWindow } from '../../../utils/contextWindow';
@@ -241,6 +242,7 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
             >
               {model.id}
             </button>
+            <CapabilityPills vision={model.vision} thinking={model.thinking} />
             {model.gated ? (
               <span className={styles.gatedBadge}>Gated</span>
             ) : null}
diff --git a/src/settings/tabs/models/CapabilityPills.module.css b/src/settings/tabs/models/CapabilityPills.module.css
new file mode 100644
index 00000000..9b685a79
--- /dev/null
+++ b/src/settings/tabs/models/CapabilityPills.module.css
@@ -0,0 +1,26 @@
+/* Shared capability-pill styling for the Discover panes (Staff picks +
+ * Browse all). Colours come from the global `--cap-*` tokens in
+ * settings.module.css so every surface labels capability identically. */
+
+.pills {
+  display: inline-flex;
+  gap: 5px;
+}
+.pill {
+  display: inline-flex;
+  align-items: center;
+  font-size: 9.5px;
+  font-weight: 540;
+  padding: 2px 7px;
+  border-radius: var(--radius-pill);
+  background: rgba(255, 255, 255, 0.05);
+}
+.pillText {
+  color: var(--cap-text);
+}
+.pillVision {
+  color: var(--cap-vision);
+}
+.pillThinking {
+  color: var(--cap-think);
+}
diff --git a/src/settings/tabs/models/CapabilityPills.test.tsx b/src/settings/tabs/models/CapabilityPills.test.tsx
new file mode 100644
index 00000000..756f2dc5
--- /dev/null
+++ b/src/settings/tabs/models/CapabilityPills.test.tsx
@@ -0,0 +1,38 @@
+/**
+ * Unit tests for {@link CapabilityPills}, the shared Text / Vision / Thinking
+ * capability badge row used by both the Staff-picks and Browse-all Discover
+ * panes. Text is unconditional; Vision and Thinking are flag-gated.
+ */
+
+import { render, screen } from '@testing-library/react';
+import { describe, expect, it } from 'vitest';
+
+import { CapabilityPills } from './CapabilityPills';
+
+describe('CapabilityPills', () => {
+  it('always renders the Text pill', () => {
+    render(<CapabilityPills vision={false} thinking={false} />);
+    expect(screen.getByText('Text')).toBeInTheDocument();
+    expect(screen.queryByText('Vision')).not.toBeInTheDocument();
+    expect(screen.queryByText('Thinking')).not.toBeInTheDocument();
+  });
+
+  it('renders the Vision pill only when vision is set', () => {
+    render(<CapabilityPills vision thinking={false} />);
+    expect(screen.getByText('Vision')).toBeInTheDocument();
+    expect(screen.queryByText('Thinking')).not.toBeInTheDocument();
+  });
+
+  it('renders the Thinking pill only when thinking is set', () => {
+    render(<CapabilityPills vision={false} thinking />);
+    expect(screen.getByText('Thinking')).toBeInTheDocument();
+    expect(screen.queryByText('Vision')).not.toBeInTheDocument();
+  });
+
+  it('renders Vision and Thinking together when both are set', () => {
+    render(<CapabilityPills vision thinking />);
+    expect(screen.getByText('Text')).toBeInTheDocument();
+    expect(screen.getByText('Vision')).toBeInTheDocument();
+    expect(screen.getByText('Thinking')).toBeInTheDocument();
+  });
+});
diff --git a/src/settings/tabs/models/CapabilityPills.tsx b/src/settings/tabs/models/CapabilityPills.tsx
new file mode 100644
index 00000000..1d8f9686
--- /dev/null
+++ b/src/settings/tabs/models/CapabilityPills.tsx
@@ -0,0 +1,32 @@
+/**
+ * The capability badge row shown next to a model name in Discover: a constant
+ * `Text` pill plus optional `Vision` and `Thinking` pills. Shared by both the
+ * Staff-picks and Browse-all panes so the two surfaces label capability
+ * identically. Capability is a per-model property (every quant of a repo shares
+ * it), so the pills live on the model/repo row, never the per-quant list.
+ */
+
+import styles from './CapabilityPills.module.css';
+
+interface CapabilityPillsProps {
+  /** The model accepts image input (an mmproj vision companion is present). */
+  vision: boolean;
+  /** The model emits reasoning tokens. */
+  thinking: boolean;
+}
+
+export function CapabilityPills({ vision, thinking }: CapabilityPillsProps) {
+  return (
+    <span className={styles.pills}>
+      <span className={`${styles.pill} ${styles.pillText}`}>Text</span>
+      {vision ? (
+        <span className={`${styles.pill} ${styles.pillVision}`}>Vision</span>
+      ) : null}
+      {thinking ? (
+        <span className={`${styles.pill} ${styles.pillThinking}`}>
+          Thinking
+        </span>
+      ) : null}
+    </span>
+  );
+}
diff --git a/src/settings/tabs/models/DiscoverPane.test.tsx b/src/settings/tabs/models/DiscoverPane.test.tsx
index b633a199..abf1fe36 100644
--- a/src/settings/tabs/models/DiscoverPane.test.tsx
+++ b/src/settings/tabs/models/DiscoverPane.test.tsx
@@ -56,7 +56,7 @@ beforeEach(() => {
   clearHfSearchCache();
   invokeMock.mockImplementation(async (cmd: string) => {
     if (cmd === 'get_staff_picks') return [STARTER];
-    if (cmd === 'search_hf_models') return [];
+    if (cmd === 'search_hf_models') return { rows: [], has_more: false };
     return undefined;
   });
 });
@@ -154,13 +154,18 @@ describe('DiscoverPane download persistence', () => {
         }
         if (cmd === 'get_staff_picks') return [STARTER];
         if (cmd === 'search_hf_models') {
-          return [
-            {
-              id: 'google/gemma-4-12b-it-GGUF',
-              downloads: 1_200_000,
-              gated: false,
-            },
-          ];
+          return {
+            rows: [
+              {
+                id: 'google/gemma-4-12b-it-GGUF',
+                downloads: 1_200_000,
+                gated: false,
+                vision: false,
+                thinking: false,
+              },
+            ],
+            has_more: false,
+          };
         }
         if (cmd === 'list_hf_repo_ggufs') {
           return [
diff --git a/src/settings/tabs/models/StaffPicksPane.module.css b/src/settings/tabs/models/StaffPicksPane.module.css
index aa802c6b..4ccb6f4d 100644
--- a/src/settings/tabs/models/StaffPicksPane.module.css
+++ b/src/settings/tabs/models/StaffPicksPane.module.css
@@ -93,31 +93,6 @@
   text-underline-offset: 2px;
 }
 
-/* Calm capability pills: a neutral chip, the colour carried in the text only.
- * Each capability has its own warm hue (stone / rose / gold). */
-.pills {
-  display: inline-flex;
-  gap: 5px;
-}
-.pill {
-  display: inline-flex;
-  align-items: center;
-  font-size: 9.5px;
-  font-weight: 540;
-  padding: 2px 7px;
-  border-radius: var(--radius-pill);
-  background: rgba(255, 255, 255, 0.05);
-}
-.pillText {
-  color: var(--cap-text);
-}
-.pillVision {
-  color: var(--cap-vision);
-}
-.pillThinking {
-  color: var(--cap-think);
-}
-
 .sub {
   font-size: 10.5px;
   color: var(--t3);
diff --git a/src/settings/tabs/models/StaffPicksPane.tsx b/src/settings/tabs/models/StaffPicksPane.tsx
index 1ccc65d0..a96867ee 100644
--- a/src/settings/tabs/models/StaffPicksPane.tsx
+++ b/src/settings/tabs/models/StaffPicksPane.tsx
@@ -29,6 +29,7 @@ import {
   type DownloadsContextValue,
 } from '../../../contexts/DownloadsContext';
 import { downloadKey } from '../../../hooks/downloadKey';
+import { CapabilityPills } from './CapabilityPills';
 import { useStaffPicks } from '../../../components/StarterPicker';
 import { Tooltip } from '../../../components/Tooltip';
 import { formatContextWindow } from '../../../utils/contextWindow';
@@ -217,19 +218,10 @@ function ModelRow({ option, downloads, onSaved, refresh }: ModelRowProps) {
             >
               {starter.display_name}
             </button>
-            <span className={styles.pills}>
-              <span className={`${styles.pill} ${styles.pillText}`}>Text</span>
-              {starter.vision ? (
-                <span className={`${styles.pill} ${styles.pillVision}`}>
-                  Vision
-                </span>
-              ) : null}
-              {starter.thinking ? (
-                <span className={`${styles.pill} ${styles.pillThinking}`}>
-                  Thinking
-                </span>
-              ) : null}
-            </span>
+            <CapabilityPills
+              vision={starter.vision}
+              thinking={starter.thinking}
+            />
           </div>
           <div className={styles.sub}>
             {paused
diff --git a/src/settings/tabs/models/useHfSearch.test.ts b/src/settings/tabs/models/useHfSearch.test.ts
index c9de39fb..e56331e9 100644
--- a/src/settings/tabs/models/useHfSearch.test.ts
+++ b/src/settings/tabs/models/useHfSearch.test.ts
@@ -3,9 +3,12 @@
  *
  * The hook debounces the query, serializes overlapping fetches with a
  * monotonic token, drops post-unmount resolutions, and guards the IPC
- * payload at runtime. The tests drive the debounce with fake timers and
- * control resolution order with externally-settled promises so the
- * stale-token path is exercised deterministically.
+ * payload at runtime. The backend returns a {@link HfSearchPage}
+ * (`{ rows, has_more }`); `canLoadMore` follows `has_more`, not the row count,
+ * so the backend's chat-model allowlist cannot end pagination early. The tests
+ * drive the debounce with fake timers and control resolution order with
+ * externally-settled promises so the stale-token path is exercised
+ * deterministically.
  */
 
 import { act, renderHook, waitFor } from '@testing-library/react';
@@ -19,17 +22,40 @@ import {
   HF_PAGE_SIZE,
   clearHfSearchCache,
 } from './useHfSearch';
-import type { HfModelSummary } from '../../../types/hf';
+import type { HfModelSummary, HfSearchPage } from '../../../types/hf';
 
 const invokeMock = invoke as unknown as ReturnType<typeof vi.fn>;
 
+/** Wraps rows in the page envelope the backend returns. */
+function page(rows: HfModelSummary[], hasMore = false): HfSearchPage {
+  return { rows, has_more: hasMore };
+}
+
 const POPULAR: HfModelSummary[] = [
-  { id: 'google/gemma-popular-GGUF', downloads: 1_000_000, gated: false },
+  {
+    id: 'google/gemma-popular-GGUF',
+    downloads: 1_000_000,
+    gated: false,
+    vision: false,
+    thinking: false,
+  },
 ];
 
 const GEMMA: HfModelSummary[] = [
-  { id: 'google/gemma-4-12b-it-GGUF', downloads: 1_200_000, gated: false },
-  { id: 'unsloth/gemma-4-27b-it-GGUF', downloads: 410_000, gated: false },
+  {
+    id: 'google/gemma-4-12b-it-GGUF',
+    downloads: 1_200_000,
+    gated: false,
+    vision: true,
+    thinking: false,
+  },
+  {
+    id: 'unsloth/gemma-4-27b-it-GGUF',
+    downloads: 410_000,
+    gated: false,
+    vision: false,
+    thinking: false,
+  },
 ];
 
 beforeEach(() => {
@@ -54,7 +80,7 @@ function deferred<T>() {
 
 describe('useHfSearch', () => {
   it('fetches the popular browse list on mount with an empty query', async () => {
-    invokeMock.mockResolvedValue(POPULAR);
+    invokeMock.mockResolvedValue(page(POPULAR));
     const { result } = renderHook(() => useHfSearch());
 
     await waitFor(() => expect(result.current.loading).toBe(false));
@@ -68,14 +94,14 @@ describe('useHfSearch', () => {
 
   it('sets the query immediately but debounces the fetch', async () => {
     vi.useFakeTimers();
-    invokeMock.mockResolvedValue(POPULAR);
+    invokeMock.mockResolvedValue(page(POPULAR));
     const { result } = renderHook(() => useHfSearch());
     // Drain the mount fetch.
     await act(async () => {
       await Promise.resolve();
     });
     invokeMock.mockClear();
-    invokeMock.mockResolvedValue(GEMMA);
+    invokeMock.mockResolvedValue(page(GEMMA));
 
     act(() => result.current.setQuery('gemma'));
     // Query is visible immediately; no fetch has fired yet.
@@ -95,13 +121,13 @@ describe('useHfSearch', () => {
 
   it('coalesces rapid input into a single fetch', async () => {
     vi.useFakeTimers();
-    invokeMock.mockResolvedValue(POPULAR);
+    invokeMock.mockResolvedValue(page(POPULAR));
     const { result } = renderHook(() => useHfSearch());
     await act(async () => {
       await Promise.resolve();
     });
     invokeMock.mockClear();
-    invokeMock.mockResolvedValue(GEMMA);
+    invokeMock.mockResolvedValue(page(GEMMA));
 
     act(() => {
       result.current.setQuery('g');
@@ -125,10 +151,10 @@ describe('useHfSearch', () => {
 
   it('drops a stale response that resolves after a newer one', async () => {
     vi.useFakeTimers();
-    const first = deferred<HfModelSummary[]>();
-    const second = deferred<HfModelSummary[]>();
+    const first = deferred<HfSearchPage>();
+    const second = deferred<HfSearchPage>();
     // Mount fetch resolves immediately so the two we care about are #2 and #3.
-    invokeMock.mockResolvedValueOnce(POPULAR);
+    invokeMock.mockResolvedValueOnce(page(POPULAR));
     invokeMock.mockReturnValueOnce(first.promise);
     invokeMock.mockReturnValueOnce(second.promise);
     const { result } = renderHook(() => useHfSearch());
@@ -150,12 +176,12 @@ describe('useHfSearch', () => {
 
     // Resolve the NEWER request first, then the older one.
     await act(async () => {
-      second.resolve(GEMMA);
+      second.resolve(page(GEMMA));
       await Promise.resolve();
     });
     expect(result.current.results).toEqual(GEMMA);
     await act(async () => {
-      first.resolve(POPULAR);
+      first.resolve(page(POPULAR));
       await Promise.resolve();
     });
     // The stale (older) response must not overwrite the newer result.
@@ -163,48 +189,87 @@ describe('useHfSearch', () => {
   });
 
   it('drops a resolution that lands after unmount', async () => {
-    const pending = deferred<HfModelSummary[]>();
+    const pending = deferred<HfSearchPage>();
     invokeMock.mockReturnValue(pending.promise);
     const { result, unmount } = renderHook(() => useHfSearch());
     expect(result.current.loading).toBe(true);
     unmount();
     // Resolving after unmount must not throw or update state.
     await act(async () => {
-      pending.resolve(POPULAR);
+      pending.resolve(page(POPULAR));
       await Promise.resolve();
     });
     // No assertion on state (unmounted); the test passes if nothing throws.
   });
 
-  it('treats a malformed payload as an empty result', async () => {
-    invokeMock.mockResolvedValue({ not: 'an array' });
+  it('treats a non-object payload as an empty result', async () => {
+    invokeMock.mockResolvedValue('nope');
     const { result } = renderHook(() => useHfSearch());
     await waitFor(() => expect(result.current.loading).toBe(false));
     expect(result.current.results).toEqual([]);
+    expect(result.current.canLoadMore).toBe(false);
   });
 
-  it('treats an array with a malformed item as an empty result', async () => {
-    invokeMock.mockResolvedValue([
-      { id: 'ok/repo', downloads: 1, gated: false },
-      { id: 5 },
-    ]);
+  it('treats a payload with a non-boolean has_more as an empty result', async () => {
+    invokeMock.mockResolvedValue({ rows: POPULAR, has_more: 'yes' });
     const { result } = renderHook(() => useHfSearch());
     await waitFor(() => expect(result.current.loading).toBe(false));
     expect(result.current.results).toEqual([]);
   });
 
-  it('treats an array containing a null item as an empty result', async () => {
-    invokeMock.mockResolvedValue([null]);
+  it('treats a payload whose rows are not an array as an empty result', async () => {
+    invokeMock.mockResolvedValue({ rows: 'nope', has_more: false });
     const { result } = renderHook(() => useHfSearch());
     await waitFor(() => expect(result.current.loading).toBe(false));
     expect(result.current.results).toEqual([]);
   });
 
+  it('rejects rows with any malformed field', async () => {
+    // Each row trips a different guard branch: bad id, downloads, gated,
+    // vision, thinking, a non-object row, and a null row.
+    const malformed = [
+      { id: 5, downloads: 1, gated: false, vision: false, thinking: false },
+      {
+        id: 'a/b',
+        downloads: 'x',
+        gated: false,
+        vision: false,
+        thinking: false,
+      },
+      { id: 'a/b', downloads: 1, gated: 'x', vision: false, thinking: false },
+      { id: 'a/b', downloads: 1, gated: false, vision: 'x', thinking: false },
+      { id: 'a/b', downloads: 1, gated: false, vision: false, thinking: 'x' },
+      'not-an-object',
+      null,
+    ];
+    for (const bad of malformed) {
+      clearHfSearchCache();
+      invokeMock.mockReset();
+      invokeMock.mockResolvedValue({
+        rows: [
+          {
+            id: 'ok/repo',
+            downloads: 1,
+            gated: false,
+            vision: false,
+            thinking: false,
+          },
+          bad,
+        ],
+        has_more: false,
+      });
+      const { result, unmount } = renderHook(() => useHfSearch());
+      await waitFor(() => expect(result.current.loading).toBe(false));
+      expect(result.current.results).toEqual([]);
+      unmount();
+    }
+  });
+
   it('drops a stale rejection that lands after a newer success', async () => {
     vi.useFakeTimers();
-    const first = deferred<HfModelSummary[]>();
-    const second = deferred<HfModelSummary[]>();
-    invokeMock.mockResolvedValueOnce(POPULAR);
+    const first = deferred<HfSearchPage>();
+    const second = deferred<HfSearchPage>();
+    invokeMock.mockResolvedValueOnce(page(POPULAR));
     invokeMock.mockReturnValueOnce(first.promise);
     invokeMock.mockReturnValueOnce(second.promise);
     const { result } = renderHook(() => useHfSearch());
@@ -224,7 +289,7 @@ describe('useHfSearch', () => {
 
     // Newer request succeeds first; the older one then rejects.
     await act(async () => {
-      second.resolve(GEMMA);
+      second.resolve(page(GEMMA));
       await Promise.resolve();
     });
     expect(result.current.results).toEqual(GEMMA);
@@ -245,13 +310,13 @@ describe('useHfSearch', () => {
 
   it('passes a non-empty query verbatim', async () => {
     vi.useFakeTimers();
-    invokeMock.mockResolvedValue(POPULAR);
+    invokeMock.mockResolvedValue(page(POPULAR));
     const { result } = renderHook(() => useHfSearch());
     await act(async () => {
       await Promise.resolve();
     });
     invokeMock.mockClear();
-    invokeMock.mockResolvedValue(GEMMA);
+    invokeMock.mockResolvedValue(page(GEMMA));
 
     act(() => result.current.setQuery('llama'));
     await act(async () => {
@@ -265,7 +330,7 @@ describe('useHfSearch', () => {
   });
 
   it('serves a repeated query from cache without re-fetching', async () => {
-    invokeMock.mockResolvedValue(POPULAR);
+    invokeMock.mockResolvedValue(page(POPULAR));
     const first = renderHook(() => useHfSearch());
     await waitFor(() => expect(first.result.current.loading).toBe(false));
     expect(invokeMock).toHaveBeenCalledTimes(1);
@@ -283,8 +348,18 @@ describe('useHfSearch', () => {
     expect(invokeMock).not.toHaveBeenCalled();
   });
 
-  it('does not offer Load more when the page is not full', async () => {
-    invokeMock.mockResolvedValue(POPULAR); // one row, far below a full page
+  it('follows has_more for Load more, regardless of the row count', async () => {
+    // A short page that still reports more is offered Load more: the count is a
+    // poor signal once the backend drops non-chat rows, so has_more is the truth.
+    invokeMock.mockResolvedValue(page(POPULAR, true));
+    const { result } = renderHook(() => useHfSearch());
+    await waitFor(() => expect(result.current.loading).toBe(false));
+    expect(result.current.results).toEqual(POPULAR);
+    expect(result.current.canLoadMore).toBe(true);
+  });
+
+  it('does not offer Load more when the Hub reports no more', async () => {
+    invokeMock.mockResolvedValue(page(POPULAR, false));
     const { result } = renderHook(() => useHfSearch());
     await waitFor(() => expect(result.current.loading).toBe(false));
     expect(result.current.canLoadMore).toBe(false);
@@ -297,8 +372,10 @@ describe('useHfSearch', () => {
         id: `org/repo-${i}-GGUF`,
         downloads: n - i,
         gated: false,
+        vision: false,
+        thinking: false,
       }));
-    invokeMock.mockResolvedValueOnce(full(HF_PAGE_SIZE)); // mount fills page 1
+    invokeMock.mockResolvedValueOnce(page(full(HF_PAGE_SIZE), true));
     const { result } = renderHook(() => useHfSearch());
     await act(async () => {
       vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
@@ -308,8 +385,8 @@ describe('useHfSearch', () => {
     expect(result.current.canLoadMore).toBe(true);
 
     invokeMock.mockClear();
-    // Page 2 returns fewer than the requested 60: the Hub is out of rows.
-    invokeMock.mockResolvedValueOnce(full(HF_PAGE_SIZE + 15));
+    // Page 2 reports the Hub is exhausted: Load more disappears.
+    invokeMock.mockResolvedValueOnce(page(full(HF_PAGE_SIZE + 15), false));
     act(() => result.current.loadMore());
     await act(async () => {
       vi.advanceTimersByTime(HF_SEARCH_DEBOUNCE_MS);
diff --git a/src/settings/tabs/models/useHfSearch.ts b/src/settings/tabs/models/useHfSearch.ts
index bf100224..3cf8774e 100644
--- a/src/settings/tabs/models/useHfSearch.ts
+++ b/src/settings/tabs/models/useHfSearch.ts
@@ -14,7 +14,7 @@
 
 import { useCallback, useEffect, useRef, useState } from 'react';
 import { invoke } from '@tauri-apps/api/core';
-import type { HfModelSummary } from '../../../types/hf';
+import type { HfModelSummary, HfSearchPage } from '../../../types/hf';
 
 /** Debounce window before a query change triggers a backend fetch. */
 export const HF_SEARCH_DEBOUNCE_MS = 300;
@@ -24,14 +24,14 @@ export const HF_SEARCH_DEBOUNCE_MS = 300;
 export const HF_PAGE_SIZE = 30;
 
 /**
- * Session-scoped cache of search results, keyed by `query::limit`. Switching to
+ * Session-scoped cache of search pages, keyed by `query::limit`. Switching to
  * another tab unmounts the Discover pane, so without this every return trip
  * would re-hit the Hub and flash "Searching…"; serving an already-seen query
  * from cache makes the tab feel instant and avoids the redundant call. Lives
  * for the app session (cleared on reload), since Hub rankings barely move on
  * that timescale.
  */
-const searchCache = new Map<string, HfModelSummary[]>();
+const searchCache = new Map<string, HfSearchPage>();
 
 function cacheKey(query: string, limit: number): string {
   return `${query}::${limit}`;
@@ -47,27 +47,40 @@ export function clearHfSearchCache(): void {
 }
 
 /**
- * Runtime guard for the IPC boundary. The Rust backend is trusted, but this
+ * Runtime guard for a single search row. The Rust backend is trusted, but this
  * keeps the hook robust against shape drift (schema changes, legacy builds,
- * mocks) without pulling in a schema library. A malformed payload is treated
- * as a transport failure and collapses to an empty result.
+ * mocks) without pulling in a schema library.
  */
-function isHfModelSummaryArray(value: unknown): value is HfModelSummary[] {
+function isHfModelSummary(item: unknown): item is HfModelSummary {
+  if (typeof item !== 'object' || item === null) return false;
+  const candidate = item as {
+    id?: unknown;
+    downloads?: unknown;
+    gated?: unknown;
+    vision?: unknown;
+    thinking?: unknown;
+  };
+  return (
+    typeof candidate.id === 'string' &&
+    typeof candidate.downloads === 'number' &&
+    typeof candidate.gated === 'boolean' &&
+    typeof candidate.vision === 'boolean' &&
+    typeof candidate.thinking === 'boolean'
+  );
+}
+
+/**
+ * Runtime guard for the IPC boundary: a `{ rows, has_more }` page whose rows are
+ * all well-formed. A malformed payload is treated as a transport failure and
+ * collapses to an empty page.
+ */
+function isHfSearchPage(value: unknown): value is HfSearchPage {
+  if (typeof value !== 'object' || value === null) return false;
+  const candidate = value as { rows?: unknown; has_more?: unknown };
   return (
-    Array.isArray(value) &&
-    value.every((item) => {
-      if (typeof item !== 'object' || item === null) return false;
-      const candidate = item as {
-        id?: unknown;
-        downloads?: unknown;
-        gated?: unknown;
-      };
-      return (
-        typeof candidate.id === 'string' &&
-        typeof candidate.downloads === 'number' &&
-        typeof candidate.gated === 'boolean'
-      );
-    })
+    typeof candidate.has_more === 'boolean' &&
+    Array.isArray(candidate.rows) &&
+    candidate.rows.every(isHfModelSummary)
   );
 }
 
@@ -103,7 +116,10 @@ export function useHfSearch(): UseHfSearchResult {
   // Seed straight from the cache so a remount (tab switch) paints the last
   // results with no loading flash; a cold first run still starts in `loading`.
   const [results, setResults] = useState<HfModelSummary[]>(
-    () => searchCache.get(cacheKey('', HF_PAGE_SIZE)) ?? [],
+    () => searchCache.get(cacheKey('', HF_PAGE_SIZE))?.rows ?? [],
+  );
+  const [hasMore, setHasMore] = useState(
+    () => searchCache.get(cacheKey('', HF_PAGE_SIZE))?.has_more ?? false,
   );
   const [loading, setLoading] = useState(
     () => !searchCache.has(cacheKey('', HF_PAGE_SIZE)),
@@ -144,7 +160,8 @@ export function useHfSearch(): UseHfSearchResult {
       // setState in an effect.
       const cached = searchCache.get(key);
       if (cached) {
-        setResults(cached);
+        setResults(cached.rows);
+        setHasMore(cached.has_more);
         setLoading(false);
         return;
       }
@@ -155,15 +172,18 @@ export function useHfSearch(): UseHfSearchResult {
           limit: lim,
         });
         if (!isLatest(token)) return;
-        if (isHfModelSummaryArray(payload)) {
+        if (isHfSearchPage(payload)) {
           searchCache.set(key, payload);
-          setResults(payload);
+          setResults(payload.rows);
+          setHasMore(payload.has_more);
         } else {
           setResults([]);
+          setHasMore(false);
         }
       } catch {
         if (!isLatest(token)) return;
         setResults([]);
+        setHasMore(false);
       } finally {
         if (isLatest(token)) setLoading(false);
       }
@@ -188,8 +208,10 @@ export function useHfSearch(): UseHfSearchResult {
     return () => window.clearTimeout(timer);
   }, [queryText, limit, runSearch]);
 
-  // The last response filled the page, so the Hub may hold more rows.
-  const canLoadMore = !loading && results.length >= limit;
+  // The Hub reported a full page, so it may hold more rows. Driven by the page's
+  // `has_more` flag, not `results.length`: the backend drops non-chat rows, so a
+  // short page can still have more behind it.
+  const canLoadMore = !loading && hasMore;
 
   return {
     query: queryText,
diff --git a/src/types/hf.ts b/src/types/hf.ts
index af82cd9b..2c51fe34 100644
--- a/src/types/hf.ts
+++ b/src/types/hf.ts
@@ -17,6 +17,8 @@
  * - `downloads` is Hugging Face's all-time download count for the repo.
  * - `gated` is true when the repo requires accepting terms or auth; an
  *   anonymous download fails, so the Discover row disables download for it.
+ * - `vision` / `thinking` are per-model capabilities (every quant shares them),
+ *   rendered as pills on the repo row.
  */
 export interface HfModelSummary {
   /** Canonical `owner/repo` slug. */
@@ -29,4 +31,20 @@ export interface HfModelSummary {
    * metadata (a per-repo property shared by every quant); `null`/absent when
    * unknown or untrusted. */
   context_length?: number | null;
+  /** True when the repo ships an mmproj vision companion (accepts image input). */
+  vision: boolean;
+  /** True when the model emits reasoning tokens. */
+  thinking: boolean;
+}
+
+/**
+ * One page of search results from `search_hf_models`. `has_more` is derived from
+ * the raw Hub entry count, not `rows.length`, so the backend's chat-model
+ * allowlist (which drops non-chat repos) never ends pagination early.
+ */
+export interface HfSearchPage {
+  /** The chat-capable repo rows for this page. */
+  rows: HfModelSummary[];
+  /** True when the Hub returned a full page, so a next page may exist. */
+  has_more: boolean;
 }

From a79e359578970b443a151d96605c4e8f46fc86e9 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sat, 20 Jun 2026 22:55:33 -0500
Subject: [PATCH 75/89] refactor(discover): remove Browse-all result count
 label

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/settings/tabs/models/BrowseAllPane.module.css | 12 +-----------
 src/settings/tabs/models/BrowseAllPane.test.tsx   |  6 ------
 src/settings/tabs/models/BrowseAllPane.tsx        |  3 ---
 3 files changed, 1 insertion(+), 20 deletions(-)

diff --git a/src/settings/tabs/models/BrowseAllPane.module.css b/src/settings/tabs/models/BrowseAllPane.module.css
index 16ff8ce5..8e9b12c5 100644
--- a/src/settings/tabs/models/BrowseAllPane.module.css
+++ b/src/settings/tabs/models/BrowseAllPane.module.css
@@ -80,7 +80,7 @@
   background: var(--elev-2);
 }
 
-/* ── Sub-bar: result count + sort label ───────────────────────────────── */
+/* ── Sub-bar: sort label ──────────────────────────────────────────────── */
 
 .subbar {
   display: flex;
@@ -90,16 +90,6 @@
   border-bottom: 1px solid var(--hair-soft);
 }
 
-.count {
-  font-size: 11.5px;
-  color: var(--t3);
-}
-
-.count b {
-  color: var(--t2);
-  font-weight: 590;
-}
-
 .sort {
   margin-left: auto;
   font-size: 11.5px;
diff --git a/src/settings/tabs/models/BrowseAllPane.test.tsx b/src/settings/tabs/models/BrowseAllPane.test.tsx
index 045fd868..d283ec7f 100644
--- a/src/settings/tabs/models/BrowseAllPane.test.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.test.tsx
@@ -194,11 +194,6 @@ describe('BrowseAllPane', () => {
     expect(screen.getByText('unsloth · 410,000 downloads')).toBeInTheDocument();
   });
 
-  it('shows the result count in the sub-bar', async () => {
-    await renderPane();
-    expect(screen.getByText(/chat models/)).toHaveTextContent('3 chat models');
-  });
-
   it('renders capability pills per row from the repo capabilities', async () => {
     await renderPane();
     const visionRow = screen
@@ -760,7 +755,6 @@ describe('BrowseAllPane', () => {
   it('shows a no-results message when the search returns nothing', async () => {
     await renderPane(() => {}, { search_hf_models: pageOf([]) });
     expect(screen.getByText('No models found.')).toBeInTheDocument();
-    expect(screen.getByText(/chat models/)).toHaveTextContent('0 chat models');
   });
 
   it('offers Load more on a full page and pages to the next batch', async () => {
diff --git a/src/settings/tabs/models/BrowseAllPane.tsx b/src/settings/tabs/models/BrowseAllPane.tsx
index 7d307020..ed76cfc2 100644
--- a/src/settings/tabs/models/BrowseAllPane.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.tsx
@@ -130,9 +130,6 @@ export function BrowseAllPane({ onSaved }: BrowseAllPaneProps) {
       </div>
 
       <div className={styles.subbar}>
-        <span className={styles.count}>
-          <b>{results.length}</b> chat models
-        </span>
         <span className={styles.sort}>Most downloaded</span>
       </div>
 

From 837cab3ce892bee3347121f339c8489639239456 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sun, 21 Jun 2026 00:10:01 -0500
Subject: [PATCH 76/89] feat(discover): show per-family download status pills
 on Browse-all rows

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/contexts/DownloadsContext.tsx             | 51 ++++++++++
 .../__tests__/DownloadsContext.test.tsx       | 92 +++++++++++++++++++
 .../tabs/models/BrowseAllPane.module.css      | 30 ++++++
 .../tabs/models/BrowseAllPane.test.tsx        | 54 +++++++++++
 src/settings/tabs/models/BrowseAllPane.tsx    | 22 +++++
 5 files changed, 249 insertions(+)

diff --git a/src/contexts/DownloadsContext.tsx b/src/contexts/DownloadsContext.tsx
index 004dea48..92d54ae3 100644
--- a/src/contexts/DownloadsContext.tsx
+++ b/src/contexts/DownloadsContext.tsx
@@ -49,6 +49,20 @@ export interface DownloadView {
   speedBytesPerSec: number | null;
 }
 
+/**
+ * Per-repo roll-up of a family's live downloads, by state, for the collapsed
+ * Browse-all row pills. Counts only the in-memory registry's active states:
+ * `downloading` (weights or its mmproj companion), `verifying`, and `failed`.
+ * Terminal-success (`ready`) is omitted (it clears immediately), and paused
+ * partials are not registry state at all (they live in the per-file listing
+ * read on expand), so neither is summarisable here.
+ */
+export interface RepoDownloadSummary {
+  downloading: number;
+  verifying: number;
+  failed: number;
+}
+
 /** Internal record: the identity (for retry replay) plus its accumulator. */
 interface RegistryEntry {
   identity: RegistryIdentity;
@@ -64,6 +78,12 @@ export interface DownloadsContextValue {
    * list (which would reveal the per-file downloads) has been fetched.
    */
   hasRepoDownload: (repo: string) => boolean;
+  /**
+   * Live download counts for `repo`, by state, for the collapsed-row pills.
+   * Counts only repo-kind downloads belonging to `repo`; see
+   * {@link RepoDownloadSummary}.
+   */
+  repoDownloadSummary: (repo: string) => RepoDownloadSummary;
   /** Start (or resume) a Staff Picks catalog download by its stable id. */
   startStaffPick: (id: string) => void;
   /** Start (or resume) a Browse-all repo download by repo + GGUF file. */
@@ -205,10 +225,40 @@ export function DownloadsProvider({ children }: { children: ReactNode }) {
     [entries],
   );
 
+  const repoDownloadSummary = useCallback(
+    (repo: string): RepoDownloadSummary => {
+      const summary: RepoDownloadSummary = {
+        downloading: 0,
+        verifying: 0,
+        failed: 0,
+      };
+      for (const entry of entries.values()) {
+        if (entry.identity.kind !== 'repo' || entry.identity.repo !== repo) {
+          continue;
+        }
+        switch (entry.acc.state.phase) {
+          case 'downloading':
+          case 'downloading_mmproj':
+            summary.downloading += 1;
+            break;
+          case 'verifying':
+            summary.verifying += 1;
+            break;
+          case 'failed':
+            summary.failed += 1;
+            break;
+        }
+      }
+      return summary;
+    },
+    [entries],
+  );
+
   const value = useMemo<DownloadsContextValue>(
     () => ({
       get,
       hasRepoDownload,
+      repoDownloadSummary,
       startStaffPick,
       startRepoDownload,
       cancel,
@@ -219,6 +269,7 @@ export function DownloadsProvider({ children }: { children: ReactNode }) {
     [
       get,
       hasRepoDownload,
+      repoDownloadSummary,
       startStaffPick,
       startRepoDownload,
       cancel,
diff --git a/src/contexts/__tests__/DownloadsContext.test.tsx b/src/contexts/__tests__/DownloadsContext.test.tsx
index e30f3fe7..eef450d9 100644
--- a/src/contexts/__tests__/DownloadsContext.test.tsx
+++ b/src/contexts/__tests__/DownloadsContext.test.tsx
@@ -204,6 +204,98 @@ describe('DownloadsContext', () => {
     });
   });
 
+  it('reports zero counts for a repo with no live downloads', () => {
+    const { result } = renderHook(() => useDownloads(), { wrapper });
+    expect(result.current.repoDownloadSummary('org/repo')).toEqual({
+      downloading: 0,
+      verifying: 0,
+      failed: 0,
+    });
+  });
+
+  it("counts a repo's live downloads by state, mmproj as downloading, excluding ready and other repos", async () => {
+    const { result } = renderHook(() => useDownloads(), { wrapper });
+
+    // a.gguf: plain downloading (default phase on start).
+    await act(async () => {
+      result.current.startRepoDownload('org/repo', 'a.gguf');
+    });
+
+    // b.gguf: a second Started flips it to downloading_mmproj (still downloading).
+    await act(async () => {
+      result.current.startRepoDownload('org/repo', 'b.gguf');
+    });
+    const chB = channel();
+    act(() =>
+      chB.simulateMessage({
+        type: 'Started',
+        data: { file: 'b.gguf', total_bytes: 100, resumed_from: 0 },
+      }),
+    );
+    act(() =>
+      chB.simulateMessage({
+        type: 'Started',
+        data: { file: 'b.mmproj', total_bytes: 50, resumed_from: 0 },
+      }),
+    );
+
+    // c.gguf: verifying.
+    await act(async () => {
+      result.current.startRepoDownload('org/repo', 'c.gguf');
+    });
+    act(() =>
+      channel().simulateMessage({
+        type: 'Verifying',
+        data: { file: 'c.gguf' },
+      }),
+    );
+
+    // d.gguf: failed.
+    await act(async () => {
+      result.current.startRepoDownload('org/repo', 'd.gguf');
+    });
+    act(() =>
+      channel().simulateMessage({
+        type: 'Failed',
+        data: { kind: 'http', message: 'HTTP 500' },
+      }),
+    );
+
+    // e.gguf: ready is terminal-success and must not appear as a live pill.
+    await act(async () => {
+      result.current.startRepoDownload('org/repo', 'e.gguf');
+    });
+    act(() => channel().simulateMessage({ type: 'AllDone' }));
+
+    // A different repo's download must not leak into org/repo's counts.
+    await act(async () => {
+      result.current.startRepoDownload('other/repo', 'z.gguf');
+    });
+
+    expect(result.current.repoDownloadSummary('org/repo')).toEqual({
+      downloading: 2,
+      verifying: 1,
+      failed: 1,
+    });
+    expect(result.current.repoDownloadSummary('other/repo')).toEqual({
+      downloading: 1,
+      verifying: 0,
+      failed: 0,
+    });
+  });
+
+  it('excludes Staff Picks downloads from a repo summary', async () => {
+    const { result } = renderHook(() => useDownloads(), { wrapper });
+    await act(async () => {
+      result.current.startStaffPick('gemma-4-12b');
+    });
+    expect(result.current.repoDownloadSummary('org/repo')).toEqual({
+      downloading: 0,
+      verifying: 0,
+      failed: 0,
+    });
+  });
+
   it('ignores a late channel event after its entry is cleared', async () => {
     const { result } = renderHook(() => useDownloads(), { wrapper });
     await act(async () => {
diff --git a/src/settings/tabs/models/BrowseAllPane.module.css b/src/settings/tabs/models/BrowseAllPane.module.css
index 8e9b12c5..82702f3e 100644
--- a/src/settings/tabs/models/BrowseAllPane.module.css
+++ b/src/settings/tabs/models/BrowseAllPane.module.css
@@ -178,6 +178,36 @@
   background: rgba(230, 181, 107, 0.1);
 }
 
+/* Per-family live download status on the (possibly collapsed) repo row: counts
+ * only, one pill per active state, colour tracking the phase. Colours come from
+ * the shared phase tokens; the soft backgrounds are mixed from those same tokens
+ * so the hue is defined once. */
+.statusPills {
+  flex: none;
+  display: flex;
+  align-items: center;
+  gap: 7px;
+}
+.statusPill {
+  font-size: 11px;
+  font-weight: 560;
+  padding: 3px 9px;
+  border-radius: var(--radius-pill);
+  white-space: nowrap;
+}
+.pillDownloading {
+  color: var(--accent);
+  background: var(--accent-soft);
+}
+.pillVerifying {
+  color: var(--cap-think);
+  background: color-mix(in srgb, var(--cap-think) 14%, transparent);
+}
+.pillFailed {
+  color: var(--danger);
+  background: color-mix(in srgb, var(--danger) 14%, transparent);
+}
+
 /* Per-quant RAM-fit hint: a coloured label (no dot). */
 .fit {
   flex: none;
diff --git a/src/settings/tabs/models/BrowseAllPane.test.tsx b/src/settings/tabs/models/BrowseAllPane.test.tsx
index d283ec7f..04e061db 100644
--- a/src/settings/tabs/models/BrowseAllPane.test.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.test.tsx
@@ -538,6 +538,60 @@ describe('BrowseAllPane', () => {
     });
   });
 
+  it('shows a download-status pill on the repo row that survives collapse', async () => {
+    await renderPane();
+    const row = screen
+      .getByText('google/gemma-4-12b-it-GGUF')
+      .closest('[data-row]') as HTMLElement;
+    // No download yet: the collapsed row carries no status pill.
+    expect(within(row).queryByText('1 downloading')).not.toBeInTheDocument();
+
+    fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
+    await flush();
+    fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[0]);
+    await flush();
+    act(() => {
+      lastChannel?.simulateMessage({
+        type: 'Started',
+        data: {
+          file: 'gemma-q4.gguf',
+          total_bytes: 5_000_000_000,
+          resumed_from: 0,
+        },
+      });
+    });
+    expect(within(row).getByText('1 downloading')).toBeInTheDocument();
+
+    // Collapsing the row hides the quant list but keeps the at-a-glance status.
+    fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
+    await flush();
+    expect(within(row).queryByText('gemma-q4.gguf')).not.toBeInTheDocument();
+    expect(within(row).getByText('1 downloading')).toBeInTheDocument();
+  });
+
+  it('reflects verifying then failed states in the repo row pill', async () => {
+    await renderPane();
+    const row = screen
+      .getByText('google/gemma-4-12b-it-GGUF')
+      .closest('[data-row]') as HTMLElement;
+    fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
+    await flush();
+    fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[0]);
+    await flush();
+
+    act(() => lastChannel?.simulateMessage({ type: 'Verifying' }));
+    expect(within(row).getByText('1 verifying')).toBeInTheDocument();
+    expect(within(row).queryByText('1 downloading')).not.toBeInTheDocument();
+
+    act(() =>
+      lastChannel?.simulateMessage({
+        type: 'Failed',
+        data: { kind: 'http', message: 'HTTP 500' },
+      }),
+    );
+    expect(within(row).getByText('1 failed')).toBeInTheDocument();
+  });
+
   async function expandRepo(): Promise<HTMLElement> {
     const row = screen
       .getByText('google/gemma-4-12b-it-GGUF')
diff --git a/src/settings/tabs/models/BrowseAllPane.tsx b/src/settings/tabs/models/BrowseAllPane.tsx
index ed76cfc2..384bd699 100644
--- a/src/settings/tabs/models/BrowseAllPane.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.tsx
@@ -225,6 +225,19 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
   // when unknown, which skips it.
   const contextLabel = formatContextWindow(model.context_length ?? 0);
 
+  // Live download counts for this family, surfaced as pills on the row so a
+  // collapsed repo still tells you what it has in flight. Reads the registry,
+  // so it survives the accordion collapse that hides the per-quant rows. One
+  // pill per active state with a non-zero count, in active-first order.
+  const dl = downloads.repoDownloadSummary(model.id);
+  const statusPills = (
+    [
+      [dl.downloading, 'downloading', styles.pillDownloading],
+      [dl.verifying, 'verifying', styles.pillVerifying],
+      [dl.failed, 'failed', styles.pillFailed],
+    ] as const
+  ).filter(([count]) => count > 0);
+
   return (
     <div className={styles.rowWrap} data-row>
       <div className={styles.row}>
@@ -249,6 +262,15 @@ function BrowseAllRow({ model, onSaved }: BrowseAllRowProps) {
             {contextLabel ? ` · ${contextLabel}` : ''}
           </div>
         </div>
+        {statusPills.length > 0 ? (
+          <div className={styles.statusPills}>
+            {statusPills.map(([count, label, cls]) => (
+              <span key={label} className={`${styles.statusPill} ${cls}`}>
+                {count} {label}
+              </span>
+            ))}
+          </div>
+        ) : null}
         <button
           type="button"
           className={`${styles.disclose} ${expanded ? styles.discloseOpen : ''}`}

From dd1e571ef119aa26f06b64618ea518e67f0f3cb4 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sun, 21 Jun 2026 00:13:41 -0500
Subject: [PATCH 77/89] fix: broadcast active-model changes so the Settings
 panel and overlay picker stay in sync

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/models/mod.rs                   |  9 +-
 .../__tests__/useModelSelection.test.tsx      | 97 ++++++++++++++++++-
 src/hooks/useModelSelection.ts                | 35 +++++++
 src/settings/hooks/useConfigSync.test.ts      | 88 +++++++++++++++++
 src/settings/hooks/useConfigSync.ts           | 69 ++++++++++---
 5 files changed, 282 insertions(+), 16 deletions(-)

diff --git a/src-tauri/src/models/mod.rs b/src-tauri/src/models/mod.rs
index 38c6a08a..d60fc4f1 100644
--- a/src-tauri/src/models/mod.rs
+++ b/src-tauri/src/models/mod.rs
@@ -27,7 +27,7 @@ use std::sync::Mutex;
 
 use futures_util::StreamExt;
 use serde::{Deserialize, Serialize};
-use tauri::Manager;
+use tauri::{Emitter, Manager};
 
 use crate::config::defaults::{
     DEFAULT_OLLAMA_SHOW_REQUEST_TIMEOUT_SECS, DEFAULT_OLLAMA_TAGS_REQUEST_TIMEOUT_SECS,
@@ -398,6 +398,13 @@ fn persist_active_provider_model(
         let mut guard = active.0.lock().map_err(|e| e.to_string())?;
         *guard = mirror;
     }
+    // Broadcast the same config-change event every settings_commands writer
+    // emits, so the other webview (the overlay's picker, or the Settings panel)
+    // resyncs live. set_active_model is otherwise the only model-write path
+    // that left other windows stale; this also covers finalize_install's
+    // auto-select and the delete-clear path. The listeners refresh via the
+    // read-only get_config, never reload_config_from_disk, so this cannot loop.
+    let _ = app.emit(crate::settings_commands::CONFIG_UPDATED_EVENT, ());
     Ok(())
 }
 
diff --git a/src/hooks/__tests__/useModelSelection.test.tsx b/src/hooks/__tests__/useModelSelection.test.tsx
index 99ec2a16..28767447 100644
--- a/src/hooks/__tests__/useModelSelection.test.tsx
+++ b/src/hooks/__tests__/useModelSelection.test.tsx
@@ -1,11 +1,18 @@
 import { renderHook, act } from '@testing-library/react';
-import { describe, it, expect, beforeEach } from 'vitest';
+import { describe, it, expect, beforeEach, vi } from 'vitest';
 import { useModelSelection } from '../useModelSelection';
-import { invoke } from '../../testUtils/mocks/tauri';
+import {
+  invoke,
+  listen,
+  emitTauriEvent,
+  clearEventHandlers,
+} from '../../testUtils/mocks/tauri';
 
 describe('useModelSelection', () => {
   beforeEach(() => {
     invoke.mockReset();
+    listen.mockClear();
+    clearEventHandlers();
   });
 
   it('loads active and installed models from the backend', async () => {
@@ -384,4 +391,90 @@ describe('useModelSelection', () => {
       rejectLate(new Error('late'));
     });
   });
+
+  it('refreshes the picker when thuki://config-updated fires', async () => {
+    // A model change made from the other window (the Settings panel) writes
+    // config and broadcasts config-updated; the picker must re-pull so its
+    // active model and list match the new backend truth without a remount.
+    invoke
+      .mockResolvedValueOnce({
+        active: 'gemma4:e2b',
+        all: ['gemma4:e2b', 'qwen2.5:7b'],
+        ollamaReachable: true,
+      })
+      .mockResolvedValueOnce({
+        active: 'qwen2.5:7b',
+        all: ['gemma4:e2b', 'qwen2.5:7b'],
+        ollamaReachable: true,
+      });
+
+    const { result } = renderHook(() => useModelSelection());
+    await act(async () => {});
+    expect(result.current.activeModel).toBe('gemma4:e2b');
+
+    await act(async () => {
+      emitTauriEvent('thuki://config-updated', null);
+    });
+
+    expect(result.current.activeModel).toBe('qwen2.5:7b');
+  });
+
+  it('stops refreshing on config-updated after unmount', async () => {
+    invoke.mockResolvedValue({
+      active: 'gemma4:e2b',
+      all: ['gemma4:e2b'],
+      ollamaReachable: true,
+    });
+
+    const { unmount } = renderHook(() => useModelSelection());
+    await act(async () => {});
+    const callsBeforeUnmount = invoke.mock.calls.length;
+
+    unmount();
+    await act(async () => {
+      emitTauriEvent('thuki://config-updated', null);
+    });
+
+    expect(invoke.mock.calls.length).toBe(callsBeforeUnmount);
+  });
+
+  it('survives a config-updated listen rejection without crashing', async () => {
+    listen.mockRejectedValueOnce(new Error('event bridge missing'));
+    invoke.mockResolvedValueOnce({
+      active: 'gemma4:e2b',
+      all: ['gemma4:e2b'],
+      ollamaReachable: true,
+    });
+
+    const { result } = renderHook(() => useModelSelection());
+    await act(async () => {});
+
+    expect(result.current.activeModel).toBe('gemma4:e2b');
+  });
+
+  it('drops a late-arriving config-updated subscription after unmount', async () => {
+    let resolveListen!: (fn: () => void) => void;
+    const unlistenSpy = vi.fn();
+    listen.mockImplementationOnce(
+      () =>
+        new Promise<() => void>((resolve) => {
+          resolveListen = resolve;
+        }),
+    );
+    invoke.mockResolvedValueOnce({
+      active: 'gemma4:e2b',
+      all: ['gemma4:e2b'],
+      ollamaReachable: true,
+    });
+
+    const { unmount } = renderHook(() => useModelSelection());
+    await act(async () => {});
+    unmount();
+
+    await act(async () => {
+      resolveListen(unlistenSpy);
+    });
+
+    expect(unlistenSpy).toHaveBeenCalledTimes(1);
+  });
 });
diff --git a/src/hooks/useModelSelection.ts b/src/hooks/useModelSelection.ts
index fd145aca..8373c616 100644
--- a/src/hooks/useModelSelection.ts
+++ b/src/hooks/useModelSelection.ts
@@ -1,7 +1,16 @@
 import { useCallback, useEffect, useRef, useState } from 'react';
 import { invoke } from '@tauri-apps/api/core';
+import { listen, type UnlistenFn } from '@tauri-apps/api/event';
 import type { ModelPickerState } from '../types/model';
 
+/**
+ * Backend broadcast fired after any in-app config write replaces the in-memory
+ * `AppConfig` (including a model change made from the other webview, e.g. the
+ * Settings panel). Mirrors the Rust-side `CONFIG_UPDATED_EVENT`. Kept as a
+ * string literal to avoid a Rust-codegen dependency in the frontend.
+ */
+const CONFIG_UPDATED_EVENT = 'thuki://config-updated';
+
 /**
  * Runtime guard for the IPC boundary. The Rust backend is trusted, but this
  * keeps the hook robust against shape drift (schema changes, legacy builds,
@@ -140,6 +149,32 @@ export function useModelSelection(): UseModelSelectionResult {
     void refreshModels();
   }, [refreshModels]);
 
+  // Re-pull when any window writes config (a model change in the Settings
+  // panel broadcasts this). Without it the active-model chip and list would
+  // only resync on the next picker-open or summon, so a change made elsewhere
+  // would look stale until then. `mountedRef` gates a late subscription so an
+  // unmount before `listen` resolves still tears the handler down.
+  useEffect(() => {
+    let unlisten: UnlistenFn | null = null;
+    void listen(CONFIG_UPDATED_EVENT, () => {
+      void refreshModels();
+    })
+      .then((stop) => {
+        if (!mountedRef.current) {
+          stop();
+          return;
+        }
+        unlisten = stop;
+      })
+      .catch(() => {
+        // Event bridge unavailable (test env / Tauri not ready). The mount
+        // fetch and explicit refreshes still work; only the live push is lost.
+      });
+    return () => {
+      unlisten?.();
+    };
+  }, [refreshModels]);
+
   const setActiveModel = useCallback(
     async (model: string): Promise<void> => {
       latestTokenRef.current += 1;
diff --git a/src/settings/hooks/useConfigSync.test.ts b/src/settings/hooks/useConfigSync.test.ts
index d27bb256..a0a21e4a 100644
--- a/src/settings/hooks/useConfigSync.test.ts
+++ b/src/settings/hooks/useConfigSync.test.ts
@@ -7,6 +7,11 @@ import {
   __emitFocus,
   __resetFocusListeners,
 } from '../../testUtils/mocks/tauri-window';
+import {
+  listen,
+  emitTauriEvent,
+  clearEventHandlers,
+} from '../../testUtils/mocks/tauri';
 import { useConfigSync } from './useConfigSync';
 import type { RawAppConfig } from '../types';
 
@@ -85,11 +90,14 @@ const CONFIG_B: RawAppConfig = {
 
 beforeEach(() => {
   invokeMock.mockReset();
+  listen.mockClear();
   __resetFocusListeners();
+  clearEventHandlers();
 });
 
 afterEach(() => {
   __resetFocusListeners();
+  clearEventHandlers();
 });
 
 describe('useConfigSync', () => {
@@ -205,4 +213,84 @@ describe('useConfigSync', () => {
     // Listener was removed; no further reload invokes.
     expect(invokeMock).not.toHaveBeenCalled();
   });
+
+  it('refreshes from get_config (not reload) when thuki://config-updated fires', async () => {
+    // An in-app write from either window broadcasts config-updated. The
+    // Settings window must pick the change up live via the read-only
+    // get_config: calling reload_config_from_disk here would re-emit the
+    // same event and loop, and would run residency side-effects again.
+    invokeMock.mockResolvedValueOnce(CONFIG_A).mockResolvedValueOnce(CONFIG_B);
+
+    const { result } = renderHook(() => useConfigSync());
+    await waitFor(() => expect(result.current.config).toEqual(CONFIG_A));
+
+    await act(async () => {
+      emitTauriEvent('thuki://config-updated', null);
+      await Promise.resolve();
+    });
+
+    await waitFor(() => expect(result.current.config).toEqual(CONFIG_B));
+    expect(invokeMock).toHaveBeenCalledWith('get_config');
+    expect(invokeMock).not.toHaveBeenCalledWith('reload_config_from_disk');
+  });
+
+  it('keeps the last good config when the config-updated refresh rejects', async () => {
+    invokeMock
+      .mockResolvedValueOnce(CONFIG_A)
+      .mockRejectedValueOnce(new Error('boom'));
+
+    const { result } = renderHook(() => useConfigSync());
+    await waitFor(() => expect(result.current.config).toEqual(CONFIG_A));
+
+    await act(async () => {
+      emitTauriEvent('thuki://config-updated', null);
+      await Promise.resolve();
+    });
+
+    expect(result.current.config).toEqual(CONFIG_A);
+  });
+
+  it('survives a config-updated listen rejection without crashing hydrate', async () => {
+    listen.mockRejectedValueOnce(new Error('event bridge missing'));
+    invokeMock.mockResolvedValue(CONFIG_A);
+
+    const { result } = renderHook(() => useConfigSync());
+    await waitFor(() => expect(result.current.config).toEqual(CONFIG_A));
+  });
+
+  it('drops a late-arriving config-updated subscription after unmount', async () => {
+    let resolveListen!: (fn: () => void) => void;
+    const unlistenSpy = vi.fn();
+    listen.mockImplementationOnce(
+      () =>
+        new Promise<() => void>((resolve) => {
+          resolveListen = resolve;
+        }),
+    );
+    invokeMock.mockResolvedValue(CONFIG_A);
+
+    const { result, unmount } = renderHook(() => useConfigSync());
+    await waitFor(() => expect(result.current.config).toEqual(CONFIG_A));
+    unmount();
+
+    await act(async () => {
+      resolveListen(unlistenSpy);
+    });
+
+    expect(unlistenSpy).toHaveBeenCalledTimes(1);
+  });
+
+  it('stops refreshing on config-updated after unmount', async () => {
+    invokeMock.mockResolvedValue(CONFIG_A);
+    const { result, unmount } = renderHook(() => useConfigSync());
+    await waitFor(() => expect(result.current.config).toEqual(CONFIG_A));
+
+    unmount();
+    invokeMock.mockClear();
+    await act(async () => {
+      emitTauriEvent('thuki://config-updated', null);
+    });
+
+    expect(invokeMock).not.toHaveBeenCalled();
+  });
 });
diff --git a/src/settings/hooks/useConfigSync.ts b/src/settings/hooks/useConfigSync.ts
index d243ff36..dcb516e8 100644
--- a/src/settings/hooks/useConfigSync.ts
+++ b/src/settings/hooks/useConfigSync.ts
@@ -1,11 +1,17 @@
 /**
- * Loads the resolved `RawAppConfig` on mount and re-syncs whenever the
- * Settings window gains focus (file may have changed externally).
+ * Loads the resolved `RawAppConfig` on mount and keeps it in sync via two
+ * channels: the `thuki://config-updated` broadcast (fired after ANY in-app
+ * config write, including a model change made from the overlay window) and
+ * the Settings window's `tauri://focus` event (covers external hand-edits to
+ * the file).
  *
- * Replaces the file-watcher subsystem the eng review collapsed: the
- * `tauri://focus` event covers 99% of "I hand-edited the file" cases
- * because users naturally bounce focus to see results, and the explicit
- * "↻ Refresh from disk" button in About covers the 1%.
+ * The broadcast refresh reads the in-memory snapshot with `get_config`, never
+ * `reload_config_from_disk`: that command re-emits `config-updated` (which
+ * would loop) and re-runs residency side-effects the originating write already
+ * performed. Focus still uses `reload` because a hand-edit only lands on disk.
+ *
+ * The explicit "↻ Refresh from disk" button in About covers the rare case
+ * where neither fires.
  *
  * Returns the current config plus a reload function the About-tab button
  * binds to. `null` while the initial fetch is in flight; render gating
@@ -14,10 +20,17 @@
 
 import { useCallback, useEffect, useState } from 'react';
 import { invoke } from '@tauri-apps/api/core';
+import { listen, type UnlistenFn } from '@tauri-apps/api/event';
 import { getCurrentWindow } from '@tauri-apps/api/window';
 
 import type { RawAppConfig } from '../types';
 
+/**
+ * Backend broadcast fired after the in-memory `AppConfig` is replaced. Mirrors
+ * the Rust-side `CONFIG_UPDATED_EVENT` and the literal used in `ConfigContext`.
+ */
+const CONFIG_UPDATED_EVENT = 'thuki://config-updated';
+
 export interface ConfigSyncHandle {
   config: RawAppConfig | null;
   /** Replaces local state with what Rust currently considers canonical. */
@@ -40,27 +53,57 @@ export function useConfigSync(): ConfigSyncHandle {
     }
   }, []);
 
-  // Initial mount + focus listener.
+  // Initial mount + focus listener + config-updated subscription.
   useEffect(() => {
     let mounted = true;
-    void invoke<RawAppConfig>('get_config').then((next) => {
-      if (mounted) setConfig(next);
-    });
+
+    // Read-only refresh from the in-memory snapshot. Used for the initial
+    // hydrate and for every config-updated broadcast. Deliberately NOT
+    // `reload()` (see the hook doc comment): that path loops and re-runs
+    // residency side-effects.
+    const refreshFromMemory = () => {
+      void invoke<RawAppConfig>('get_config')
+        .then((next) => {
+          if (mounted) setConfig(next);
+        })
+        .catch(() => {
+          // Non-fatal: keep the last good snapshot.
+        });
+    };
+
+    refreshFromMemory();
 
     const window = getCurrentWindow();
-    let unlisten: (() => void) | null = null;
+    let unlistenFocus: (() => void) | null = null;
     void window
       .onFocusChanged(({ payload: focused }) => {
         if (focused) void reload();
       })
       .then((stop) => {
-        unlisten = stop;
+        unlistenFocus = stop;
         if (!mounted) stop();
       });
 
+    let unlistenConfig: UnlistenFn | null = null;
+    void listen(CONFIG_UPDATED_EVENT, () => {
+      refreshFromMemory();
+    })
+      .then((stop) => {
+        if (!mounted) {
+          stop();
+          return;
+        }
+        unlistenConfig = stop;
+      })
+      .catch(() => {
+        // Event bridge unavailable (test env / Tauri not ready); focus and
+        // the explicit Refresh button still resync.
+      });
+
     return () => {
       mounted = false;
-      unlisten?.();
+      unlistenFocus?.();
+      unlistenConfig?.();
     };
   }, [reload]);
 

From 839fd0425d500a1ef3398f618de4a42ab6d450fe Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sun, 21 Jun 2026 01:50:08 -0500
Subject: [PATCH 78/89] fix: dedup built-in warm-up primes and surface a
 warming status

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/lib.rs                          |   5 +
 src-tauri/src/warmup.rs                       | 209 +++++++++++++++++-
 .../tabs/models/ProvidersPane.test.tsx        |  64 ++++++
 src/settings/tabs/models/ProvidersPane.tsx    |  41 +++-
 4 files changed, 302 insertions(+), 17 deletions(-)

diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs
index 727ae623..6fd5e8eb 100644
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -598,6 +598,7 @@ fn show_overlay(app_handle: &tauri::AppHandle, ctx: crate::context::ActivationCo
                         .clone();
                     let client = app_handle.state::<reqwest::Client>().inner().clone();
                     tauri::async_runtime::spawn(warmup::warm_builtin(
+                        app_handle.clone(),
                         engine,
                         target,
                         model_id,
@@ -2050,6 +2051,8 @@ pub fn run() {
                     let _ = warmup_handle.emit("warmup:model-loaded", model);
                 },
             )));
+            // Port-keyed dedup + cue state for the built-in engine warm-up.
+            app.manage(warmup::BuiltinWarmState::default());
 
             // ── Configuration (TOML file at app_config_dir) ─────────
             // Loaded once at startup. Missing file -> seed defaults.
@@ -2452,6 +2455,8 @@ pub fn run() {
             warmup::get_loaded_model,
             #[cfg(not(coverage))]
             warmup::get_engine_status,
+            #[cfg(not(coverage))]
+            warmup::get_builtin_warm_state,
             updater::commands::get_updater_state,
             #[cfg(not(coverage))]
             updater::commands::check_for_update,
diff --git a/src-tauri/src/warmup.rs b/src-tauri/src/warmup.rs
index 772d11f7..1dc89c0c 100644
--- a/src-tauri/src/warmup.rs
+++ b/src-tauri/src/warmup.rs
@@ -129,37 +129,116 @@ pub(crate) fn builtin_prime_body(model: &str, system_prompt: &str) -> serde_json
 /// priming is app-summon activity, not user chat; if it touched, idle-unload
 /// would never fire for a user who keeps summoning the overlay without
 /// chatting.
+/// Returns `true` when the prime got an HTTP 200 (the model is now warm and
+/// the system-prompt prefix is cached); any transport or non-200 outcome
+/// returns `false` so the caller leaves the load un-primed and a later warm
+/// can retry.
 pub(crate) async fn prime_builtin(
     port: u16,
     model: String,
     system_prompt: String,
     client: reqwest::Client,
-) {
+) -> bool {
     let body = builtin_prime_body(&model, &system_prompt);
-    let _ = client
+    client
         .post(format!("http://127.0.0.1:{port}/v1/chat/completions"))
         .json(&body)
         .send()
-        .await;
+        .await
+        .map(|r| r.status().as_u16())
+        .unwrap_or(0)
+        == 200
+}
+
+/// Port-keyed dedup + cue state for the built-in engine, owned by the app
+/// layer so the engine runner stays a pure process actor. `warm_builtin`
+/// consults it after `ensure_loaded` resolves the serving port, so at most one
+/// prime runs per engine load and the overlay shows the "warming" cue for
+/// exactly that window. Keyed on port, not target: a model or context switch
+/// forces a new process and a new port, so a port mismatch correctly allows a
+/// fresh prime after any restart.
+#[derive(Default)]
+pub struct BuiltinWarmState {
+    inner: std::sync::Mutex<BuiltinWarm>,
+}
+
+#[derive(Default)]
+struct BuiltinWarm {
+    /// Port of a prime currently in flight, if any. Armed by `try_begin`,
+    /// cleared by `finish` regardless of outcome so a failed prime can retry.
+    in_flight: Option<u16>,
+    /// Port whose prime completed successfully. A new process gets a new port,
+    /// so a port mismatch allows a fresh prime after a restart.
+    primed_port: Option<u16>,
+}
+
+impl BuiltinWarmState {
+    /// Atomically decides whether to prime the engine on `port`. Returns true
+    /// (and arms the in-flight slot) only when no prime is already running for
+    /// this port and this port has not already been primed. The two warm
+    /// callers (summon + first keystroke) both reach this after `ensure_loaded`
+    /// resolves the same reused port, so the loser dedups to a no-op.
+    pub fn try_begin(&self, port: u16) -> bool {
+        let mut g = self.inner.lock().unwrap();
+        if g.in_flight == Some(port) || g.primed_port == Some(port) {
+            return false;
+        }
+        g.in_flight = Some(port);
+        true
+    }
+
+    /// Clears the in-flight slot for `port` and, on success, records the port
+    /// as primed so later warm requests for the same load dedup. A `finish`
+    /// for a port that no longer owns the slot (engine restarted mid-prime)
+    /// leaves the slot untouched.
+    pub fn finish(&self, port: u16, success: bool) {
+        let mut g = self.inner.lock().unwrap();
+        if g.in_flight == Some(port) {
+            g.in_flight = None;
+        }
+        if success {
+            g.primed_port = Some(port);
+        }
+    }
+
+    /// Whether a prime is currently in flight. Seeds the Settings keep-warm
+    /// status when the panel mounts during a cold prime (it otherwise learns
+    /// the state only from the `warmup:builtin-warming`/`-warmed` events).
+    pub fn is_warming(&self) -> bool {
+        self.inner.lock().unwrap().in_flight.is_some()
+    }
 }
 
 /// Built-in arm of `warm_up_model`: starts (or reuses) the engine so the
 /// selected model is resident by the time the user submits, then primes the
-/// KV cache for the system-prompt prefix. Best-effort: a superseded or failed
-/// load, or a failed prime, is swallowed exactly like the Ollama warmup.
-/// Coverage-off: `ensure_loaded` is covered by the runner tests, `prime_builtin`
-/// by its own; this only sequences them.
+/// KV cache for the system-prompt prefix. Dedup via [`BuiltinWarmState`]
+/// collapses the summon + keystroke warms (and any double-summon) to a single
+/// prime per load, so the user's first message never queues behind redundant
+/// cold primes. Emits `warmup:builtin-warming` while the prime runs and
+/// `warmup:builtin-warmed` when it ends, so the Settings keep-warm status can
+/// read "warming…" until the model is actually ready (not just `/health` OK).
+/// Best-effort throughout: a superseded load, a dedup skip, or a failed prime
+/// is swallowed. Coverage-off: the dedup logic lives in `BuiltinWarmState`
+/// and the prime in `prime_builtin`, both tested; this only sequences them.
 #[cfg_attr(coverage_nightly, coverage(off))]
 pub(crate) async fn warm_builtin(
+    app: tauri::AppHandle,
     engine: crate::engine::runner::EngineHandle,
     target: crate::engine::state::Target,
-    model: String,
+    model_id: String,
     system_prompt: String,
     client: reqwest::Client,
 ) {
-    if let Ok(port) = engine.ensure_loaded(target).await {
-        prime_builtin(port, model, system_prompt, client).await;
+    let Ok(port) = engine.ensure_loaded(target).await else {
+        return;
+    };
+    if !app.state::<BuiltinWarmState>().try_begin(port) {
+        return;
     }
+    let _ = app.emit("warmup:builtin-warming", ());
+    let ok = prime_builtin(port, model_id, system_prompt, client).await;
+    app.state::<BuiltinWarmState>().finish(port, ok);
+    let _ = app.emit("warmup:builtin-warmed", ());
 }
 
 /// Built-in arm of `evict_model`: stops the engine sidecar and resolves once
@@ -277,7 +356,9 @@ impl WarmupState {
 
 #[tauri::command]
 #[cfg_attr(coverage_nightly, coverage(off))]
+#[allow(clippy::too_many_arguments)]
 pub fn warm_up_model(
+    app: tauri::AppHandle,
     warmup: tauri::State<WarmupState>,
     models: tauri::State<crate::models::ActiveModelState>,
     config: tauri::State<parking_lot::RwLock<crate::config::AppConfig>>,
@@ -344,6 +425,7 @@ pub fn warm_up_model(
             // so just skip rather than surfacing anything.
             if let Ok(target) = target {
                 tauri::async_runtime::spawn(warm_builtin(
+                    app,
                     engine.inner().clone(),
                     target,
                     model_id,
@@ -405,6 +487,17 @@ pub fn get_engine_status(
     engine.current_status()
 }
 
+/// True while the built-in engine is priming (loaded but the system-prompt
+/// prefill has not finished). The Settings keep-warm panel calls this on mount
+/// to seed its "warming…" status, since the `warmup:builtin-warming` event it
+/// otherwise relies on may have fired before the panel attached its listener.
+/// Thin wrapper over [`BuiltinWarmState::is_warming`], which its own tests cover.
+#[tauri::command]
+#[cfg_attr(coverage_nightly, coverage(off))]
+pub fn get_builtin_warm_state(warm: tauri::State<'_, BuiltinWarmState>) -> bool {
+    warm.is_warming()
+}
+
 /// Returns the active model's name if it is currently loaded, `None` if no
 /// model is selected or nothing is running. Branches by the active provider's
 /// kind: Ollama queries `/api/ps`, the built-in engine reads its own status
@@ -1581,7 +1674,7 @@ mod tests {
             .unwrap()
             .parse()
             .expect("mockito url ends in a port");
-        prime_builtin(
+        let ok = prime_builtin(
             port,
             "org/repo:m.gguf".to_string(),
             SYS.to_string(),
@@ -1589,9 +1682,103 @@ mod tests {
         )
         .await;
 
+        assert!(
+            ok,
+            "a 200 prime reports success so the load is marked primed"
+        );
         mock.assert_async().await;
     }
 
+    #[tokio::test]
+    async fn builtin_prime_swallows_connection_error() {
+        // Port 1 refuses; prime is best-effort and must not panic, exercising
+        // the transport-error path of the status capture.
+        let ok = prime_builtin(
+            1,
+            "org/repo:m.gguf".to_string(),
+            SYS.to_string(),
+            reqwest::Client::new(),
+        )
+        .await;
+
+        assert!(
+            !ok,
+            "a transport failure reports not-primed so a later warm retries"
+        );
+    }
+
+    // ── BuiltinWarmState (port-keyed dedup) ──────────────────────────────────
+
+    #[test]
+    fn warm_state_first_call_begins_then_dedups_in_flight() {
+        let s = BuiltinWarmState::default();
+        assert!(s.try_begin(40000), "first call for a port arms the prime");
+        assert!(
+            !s.try_begin(40000),
+            "a second call while the prime is in flight dedups to a no-op"
+        );
+    }
+
+    #[test]
+    fn warm_state_failed_prime_allows_retry() {
+        let s = BuiltinWarmState::default();
+        assert!(s.try_begin(40000));
+        s.finish(40000, false);
+        assert!(
+            s.try_begin(40000),
+            "a failed prime leaves the port un-primed so a later warm retries"
+        );
+    }
+
+    #[test]
+    fn warm_state_successful_prime_dedups_same_port() {
+        let s = BuiltinWarmState::default();
+        assert!(s.try_begin(40000));
+        s.finish(40000, true);
+        assert!(
+            !s.try_begin(40000),
+            "a primed port dedups later warms for the same load"
+        );
+    }
+
+    #[test]
+    fn warm_state_new_port_primes_again_after_success() {
+        let s = BuiltinWarmState::default();
+        assert!(s.try_begin(40000));
+        s.finish(40000, true);
+        assert!(
+            s.try_begin(40001),
+            "a new process/port (restart or model switch) primes fresh"
+        );
+    }
+
+    #[test]
+    fn warm_state_finish_for_unowned_port_leaves_slot_armed() {
+        let s = BuiltinWarmState::default();
+        assert!(s.try_begin(40000));
+        // The engine restarted mid-prime: a finish for a different port must not
+        // clear the slot the live prime still owns, but still records its success.
+        s.finish(40001, true);
+        assert!(
+            !s.try_begin(40000),
+            "the in-flight slot for 40000 is untouched by finish(40001)"
+        );
+        assert!(
+            !s.try_begin(40001),
+            "finish(40001, true) still recorded 40001 as primed"
+        );
+    }
+
+    #[test]
+    fn warm_state_is_warming_tracks_in_flight() {
+        let s = BuiltinWarmState::default();
+        assert!(!s.is_warming(), "nothing is in flight at rest");
+        assert!(s.try_begin(40000));
+        assert!(s.is_warming(), "a begun prime reports warming");
+        s.finish(40000, true);
+        assert!(!s.is_warming(), "a finished prime is no longer warming");
+    }
+
     #[test]
     fn builtin_loaded_model_names_the_resident_blob_not_the_selection() {
         use std::path::PathBuf;
diff --git a/src/settings/tabs/models/ProvidersPane.test.tsx b/src/settings/tabs/models/ProvidersPane.test.tsx
index 0636eb72..927dbc3c 100644
--- a/src/settings/tabs/models/ProvidersPane.test.tsx
+++ b/src/settings/tabs/models/ProvidersPane.test.tsx
@@ -120,6 +120,8 @@ function mockInvoke(over: Record<string, unknown> = {}) {
         return engineStatus('stopped');
       case 'get_loaded_model':
         return null;
+      case 'get_builtin_warm_state':
+        return false;
       case 'get_model_picker_state':
         return { active: null, all: [], ollamaReachable: true };
       default:
@@ -673,6 +675,67 @@ describe('ProvidersPane generation', () => {
     });
   });
 
+  it('shows warming… while the built-in engine primes a resident model', () => {
+    mockInvoke({
+      get_engine_status: engineStatus('loaded'),
+      get_loaded_model: 'Mistral Nemo 12B',
+      get_builtin_warm_state: true,
+      list_installed_models: INSTALLED,
+    });
+    renderPane(makeConfig('builtin', [BUILTIN_LOADED, OLLAMA]));
+    return waitFor(() => {
+      const status = screen.getByTestId('keep-warm-status');
+      expect(status).toHaveTextContent('Mistral Nemo 12B');
+      expect(within(status).getByText('warming…')).toBeInTheDocument();
+      expect(within(status).queryByText('in VRAM')).not.toBeInTheDocument();
+    });
+  });
+
+  it('shows Warming up… while priming before the resident name resolves', () => {
+    mockInvoke({
+      get_engine_status: engineStatus('loaded'),
+      get_loaded_model: null,
+      get_builtin_warm_state: true,
+    });
+    renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
+    return waitFor(() =>
+      expect(screen.getByText('Warming up…')).toBeInTheDocument(),
+    );
+  });
+
+  it('flips warming… to in VRAM across the warming and warmed events', async () => {
+    mockInvoke({
+      get_engine_status: engineStatus('loaded'),
+      get_loaded_model: 'Qwen3.5 9B',
+      list_installed_models: INSTALLED,
+    });
+    renderPane(makeConfig('builtin', [BUILTIN_LOADED, OLLAMA]));
+    const status = await screen.findByTestId('keep-warm-status');
+    await waitFor(() =>
+      expect(within(status).getByText('in VRAM')).toBeInTheDocument(),
+    );
+    act(() => emitTauriEvent('warmup:builtin-warming', null));
+    expect(within(status).getByText('warming…')).toBeInTheDocument();
+    act(() => emitTauriEvent('warmup:builtin-warmed', null));
+    expect(within(status).getByText('in VRAM')).toBeInTheDocument();
+  });
+
+  it('clears the warming status when the model is evicted', async () => {
+    mockInvoke({
+      get_engine_status: engineStatus('loaded'),
+      get_loaded_model: 'Qwen3.5 9B',
+      get_builtin_warm_state: true,
+      list_installed_models: INSTALLED,
+    });
+    renderPane(makeConfig('builtin', [BUILTIN_LOADED, OLLAMA]));
+    const status = await screen.findByTestId('keep-warm-status');
+    await waitFor(() =>
+      expect(within(status).getByText('warming…')).toBeInTheDocument(),
+    );
+    act(() => emitTauriEvent('warmup:model-evicted', null));
+    expect(screen.getByText('No model loaded')).toBeInTheDocument();
+  });
+
   it('falls back to no-model-loaded when the engine is loaded but the model is unknown', () => {
     mockInvoke({ get_engine_status: engineStatus('loaded') });
     renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
@@ -776,6 +839,7 @@ describe('ProvidersPane robustness', () => {
       list_installed_models: new Error('a'),
       get_engine_status: new Error('b'),
       get_loaded_model: new Error('c'),
+      get_builtin_warm_state: new Error('d'),
     });
     renderPane(makeConfig('builtin', [BUILTIN, OLLAMA]));
     await waitFor(() =>
diff --git a/src/settings/tabs/models/ProvidersPane.tsx b/src/settings/tabs/models/ProvidersPane.tsx
index 709dc51c..7c09c7d4 100644
--- a/src/settings/tabs/models/ProvidersPane.tsx
+++ b/src/settings/tabs/models/ProvidersPane.tsx
@@ -123,6 +123,11 @@ export function ProvidersPane({
   const [engineState, setEngineState] =
     useState<EngineStatus['state']>('stopped');
   const [loadedModel, setLoadedModel] = useState<string | null>(null);
+  // True while the built-in engine is priming: the model is resident
+  // (`/health` OK) but the system-prompt prefill has not finished, so it still
+  // answers as slowly as a cold start. The status reads "warming…" until the
+  // prime completes, then flips to "in VRAM".
+  const [warming, setWarming] = useState(false);
   useEffect(() => {
     // Re-reads which model the active provider actually has resident. The
     // built-in engine names it from its loaded blob, so this must be re-run on
@@ -135,6 +140,11 @@ export function ProvidersPane({
       .then((s) => setEngineState(s.state))
       .catch(() => {});
     refreshLoaded();
+    // Seed the warming flag in case the panel mounts mid-prime, before the
+    // warming event below has a chance to fire.
+    void invoke<boolean>('get_builtin_warm_state')
+      .then(setWarming)
+      .catch(() => {});
     const unlistenStatus = listen<EngineStatus>('engine:status', (e) => {
       setEngineState(e.payload.state);
       refreshLoaded();
@@ -142,13 +152,22 @@ export function ProvidersPane({
     const unlistenLoaded = listen<string>('warmup:model-loaded', (e) =>
       setLoadedModel(e.payload),
     );
-    const unlistenEvicted = listen<null>('warmup:model-evicted', () =>
-      setLoadedModel(null),
+    const unlistenEvicted = listen<null>('warmup:model-evicted', () => {
+      setLoadedModel(null);
+      setWarming(false);
+    });
+    const unlistenWarming = listen('warmup:builtin-warming', () =>
+      setWarming(true),
+    );
+    const unlistenWarmed = listen('warmup:builtin-warmed', () =>
+      setWarming(false),
     );
     return () => {
       void unlistenStatus.then((fn) => fn());
       void unlistenLoaded.then((fn) => fn());
       void unlistenEvicted.then((fn) => fn());
+      void unlistenWarming.then((fn) => fn());
+      void unlistenWarmed.then((fn) => fn());
     };
   }, []);
 
@@ -302,10 +321,18 @@ export function ProvidersPane({
   // active provider actually has resident (the built-in engine's loaded blob,
   // or Ollama's /api/ps), never the frontend selection; when set it renders as
   // a truncating name + "in VRAM" suffix in the JSX below so a long name can
-  // never break the row. This fallback text covers the two non-resident states
-  // (mid-load for the built-in engine, otherwise nothing loaded).
+  // never break the row. This fallback text covers the non-resident states
+  // (priming or mid-load for the built-in engine, otherwise nothing loaded).
+  //
+  // The built-in engine reports `loaded` (`/health` OK) before the system
+  // prompt is prefilled, so `builtinWarming` distinguishes "resident but still
+  // priming" (slow first message) from "ready". Scoped to the built-in engine
+  // because only it emits the warming events.
+  const builtinWarming = activeKind === 'builtin' && warming;
   let warmStatusText: string;
-  if (activeKind === 'builtin' && engineState === 'starting') {
+  if (builtinWarming) {
+    warmStatusText = 'Warming up…';
+  } else if (activeKind === 'builtin' && engineState === 'starting') {
     warmStatusText = 'Loading…';
   } else {
     warmStatusText = 'No model loaded';
@@ -616,7 +643,9 @@ export function ProvidersPane({
                   <span className={styles.genWarmModel} title={loadedModel}>
                     {loadedModel}
                   </span>
-                  <span className={styles.genWarmSuffix}>in VRAM</span>
+                  <span className={styles.genWarmSuffix}>
+                    {builtinWarming ? 'warming…' : 'in VRAM'}
+                  </span>
                 </>
               ) : (
                 warmStatusText

From 0a4af7961e2adb72567d5e420a7327a6760f6782 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sun, 21 Jun 2026 12:21:44 -0500
Subject: [PATCH 79/89] fix: allow discarding a paused download while another
 download runs

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/models/mod.rs | 118 +++++++++++++++++++++++++-----------
 1 file changed, 82 insertions(+), 36 deletions(-)

diff --git a/src-tauri/src/models/mod.rs b/src-tauri/src/models/mod.rs
index d60fc4f1..d42fd39a 100644
--- a/src-tauri/src/models/mod.rs
+++ b/src-tauri/src/models/mod.rs
@@ -1151,24 +1151,37 @@ const INVALID_REPO_ID_ERR: &str = "invalid Hugging Face repo id";
 /// the distinct blob shas the registry guarantees (asserted in
 /// `registry::tests`), no two concurrent downloads target the same blob, so no
 /// per-blob lock is needed.
+/// One in-flight download: its cancellation handle plus the blob shas it is
+/// writing. The shas let a discard scope its in-flight check to the exact
+/// partial(s) at risk instead of refusing while any download runs.
+pub struct DownloadSlot {
+    token: tokio_util::sync::CancellationToken,
+    shas: Vec<String>,
+}
+
 #[derive(Default)]
-pub struct DownloadState(
-    pub std::sync::Mutex<std::collections::HashMap<String, tokio_util::sync::CancellationToken>>,
-);
+pub struct DownloadState(pub std::sync::Mutex<std::collections::HashMap<String, DownloadSlot>>);
 
-/// Atomically claims a download slot for `key`. Returns a fresh cancellation
-/// token on success; an error when `key` already has an in-flight download (or
-/// the lock is poisoned).
+/// Atomically claims a download slot for `key`, recording the blob `shas` it
+/// will write. Returns a fresh cancellation token on success; an error when
+/// `key` already has an in-flight download (or the lock is poisoned).
 pub fn claim_download(
     state: &DownloadState,
     key: &str,
+    shas: Vec<String>,
 ) -> Result<tokio_util::sync::CancellationToken, String> {
     let mut guard = state.0.lock().map_err(|e| e.to_string())?;
     if guard.contains_key(key) {
         return Err("a download is already in progress".to_string());
     }
     let token = tokio_util::sync::CancellationToken::new();
-    guard.insert(key.to_string(), token.clone());
+    guard.insert(
+        key.to_string(),
+        DownloadSlot {
+            token: token.clone(),
+            shas,
+        },
+    );
     Ok(token)
 }
 
@@ -1196,8 +1209,8 @@ pub fn download_in_flight(state: &DownloadState) -> bool {
 /// releases its own slot. A missing key is a harmless no-op.
 pub fn cancel_download(state: &DownloadState, key: &str) {
     if let Ok(guard) = state.0.lock() {
-        if let Some(token) = guard.get(key) {
-            token.cancel();
+        if let Some(slot) = guard.get(key) {
+            slot.token.cancel();
         }
     }
 }
@@ -2327,9 +2340,10 @@ pub fn delete_installed_model_inner(
 
 /// Removes the partial file for `sha256` so the next download starts fresh.
 /// Refuses malformed digests (the digest doubles as a file name) and refuses
-/// while a download is running (it may be writing that very partial). Holds
-/// the download-state lock across the removal so a concurrent claim cannot
-/// race the delete.
+/// only while a download is actively writing this very blob (deleting its
+/// partial would fail that download's verification with NotFound). Unrelated
+/// parallel downloads do not block the discard. Holds the download-state lock
+/// across the removal so a concurrent claim cannot race the delete.
 pub fn discard_partial_inner(
     state: &DownloadState,
     store: &storage::ModelStore,
@@ -2339,8 +2353,11 @@ pub fn discard_partial_inner(
         return Err("invalid sha256".to_string());
     }
     let guard = state.0.lock().map_err(|e| e.to_string())?;
-    if !guard.is_empty() {
-        return Err("a download is already in progress".to_string());
+    if guard
+        .values()
+        .any(|slot| slot.shas.iter().any(|s| s == sha256))
+    {
+        return Err("a download for this file is already in progress".to_string());
     }
     match std::fs::remove_file(store.partial_path(sha256)) {
         Ok(()) => Ok(()),
@@ -2433,10 +2450,11 @@ pub fn download_starter(
     download_state: tauri::State<'_, DownloadState>,
 ) -> Result<(), String> {
     let starter = starter_for_tier(&tier)?;
-    let token = claim_download(&download_state, &key)?;
+    let specs = registry::download_specs(starter);
+    let token = claim_download(&download_state, &key, spec_shas(&specs))?;
     spawn_model_download(
         app,
-        registry::download_specs(starter),
+        specs,
         registry::to_installed_model(starter),
         key,
         token,
@@ -2459,10 +2477,11 @@ pub fn download_staff_pick(
     download_state: tauri::State<'_, DownloadState>,
 ) -> Result<(), String> {
     let starter = starter_for_id(&id)?;
-    let token = claim_download(&download_state, &key)?;
+    let specs = registry::download_specs(starter);
+    let token = claim_download(&download_state, &key, spec_shas(&specs))?;
     spawn_model_download(
         app,
-        registry::download_specs(starter),
+        specs,
         registry::to_installed_model(starter),
         key,
         token,
@@ -2485,10 +2504,11 @@ pub async fn download_repo_model(
     download_state: tauri::State<'_, DownloadState>,
 ) -> Result<(), String> {
     let resolved = resolve_repo_spec(&client, HF_BASE_URL, &repo, &file).await?;
-    let token = claim_download(&download_state, &key)?;
+    let specs = repo_download_specs(HF_BASE_URL, &repo, &file, &resolved);
+    let token = claim_download(&download_state, &key, spec_shas(&specs))?;
     spawn_model_download(
         app,
-        repo_download_specs(HF_BASE_URL, &repo, &file, &resolved),
+        specs,
         repo_installed_model(&repo, &file, &resolved),
         key,
         token,
@@ -2637,6 +2657,12 @@ pub(crate) fn finalize_outcome_event(result: Result<(), String>) -> download::Do
     }
 }
 
+/// The blob shas a spec list writes, recorded on the download slot so a discard
+/// can scope its in-flight check to the exact partial(s) this download owns.
+fn spec_shas(specs: &[download::DownloadSpec]) -> Vec<String> {
+    specs.iter().map(|s| s.sha256.clone()).collect()
+}
+
 /// Runs the claimed download on the async runtime: streams events to the
 /// channel, records the manifest row + builtin provider model on success
 /// (then emits AllDone, or Failed when recording fails), and releases the
@@ -4629,24 +4655,34 @@ mod tests {
     #[test]
     fn download_claim_allows_distinct_keys_and_rejects_a_duplicate() {
         let state = DownloadState::default();
-        let token = claim_download(&state, "model-a").unwrap();
+        let token = claim_download(&state, "model-a", vec![]).unwrap();
         assert!(!token.is_cancelled());
         // A different model downloads concurrently: its own slot is granted.
-        assert!(claim_download(&state, "model-b").is_ok());
+        assert!(claim_download(&state, "model-b", vec![]).is_ok());
         // The same key cannot start twice while it is in flight.
-        let err = claim_download(&state, "model-a").unwrap_err();
+        let err = claim_download(&state, "model-a", vec![]).unwrap_err();
         assert_eq!(err, "a download is already in progress");
         // Releasing one key frees only that slot.
         release_download(&state, "model-a");
-        assert!(claim_download(&state, "model-a").is_ok());
+        assert!(claim_download(&state, "model-a", vec![]).is_ok());
+    }
+
+    #[test]
+    fn spec_shas_collects_every_blob_digest() {
+        let specs = registry::download_specs(registry::onboarding_heroes()[1]);
+        let shas = spec_shas(&specs);
+        assert_eq!(shas.len(), specs.len());
+        for (sha, spec) in shas.iter().zip(&specs) {
+            assert_eq!(sha, &spec.sha256);
+        }
     }
 
     #[test]
     fn download_in_flight_tracks_any_claim() {
         let state = DownloadState::default();
         assert!(!download_in_flight(&state));
-        let _a = claim_download(&state, "a").unwrap();
-        let _b = claim_download(&state, "b").unwrap();
+        let _a = claim_download(&state, "a", vec![]).unwrap();
+        let _b = claim_download(&state, "b", vec![]).unwrap();
         assert!(download_in_flight(&state));
         // One release leaves the other download in flight.
         release_download(&state, "a");
@@ -4660,8 +4696,8 @@ mod tests {
         let state = DownloadState::default();
         // No such key: cancelling is a harmless no-op.
         cancel_download(&state, "missing");
-        let a = claim_download(&state, "a").unwrap();
-        let b = claim_download(&state, "b").unwrap();
+        let a = claim_download(&state, "a", vec![]).unwrap();
+        let b = claim_download(&state, "b", vec![]).unwrap();
         cancel_download(&state, "a");
         assert!(a.is_cancelled());
         // Cancelling one download leaves the others running.
@@ -4676,7 +4712,7 @@ mod tests {
             let _guard = state_ref.0.lock().unwrap();
             panic!("poison");
         });
-        assert!(claim_download(&state, "k").is_err());
+        assert!(claim_download(&state, "k", vec![]).is_err());
         let (_dir, store) = make_store();
         assert!(discard_partial_inner(&state, &store, &"a".repeat(64)).is_err());
         let conn = crate::database::open_in_memory().unwrap();
@@ -5864,7 +5900,7 @@ mod tests {
         // A claimed download slot must refuse the delete and leave the row
         // and blob untouched, even though the in-flight download is a different
         // model: a finishing download could insert or share refcounted blobs.
-        let _token = claim_download(&state, "other-model").unwrap();
+        let _token = claim_download(&state, "other-model", vec![]).unwrap();
         let err = delete_installed_model_inner(&state, &conn, &store, &m.id, "").unwrap_err();
         assert_eq!(err, "a download is already in progress");
         assert!(manifest::get(&conn, &m.id).unwrap().is_some());
@@ -5895,7 +5931,7 @@ mod tests {
     // ── Model library: discard partial ───────────────────────────────────────
 
     #[test]
-    fn discard_partial_validates_hex_and_running_state() {
+    fn discard_partial_validates_hex_and_scopes_to_the_target_sha() {
         let (_dir, store) = make_store();
         let state = DownloadState::default();
         let sha = "a".repeat(64);
@@ -5904,15 +5940,25 @@ mod tests {
         assert!(discard_partial_inner(&state, &store, "short").is_err());
         assert!(discard_partial_inner(&state, &store, &"Z".repeat(64)).is_err());
 
-        // Rejected while any download is claimed (a finishing download may be
-        // writing this very partial or about to share its blob).
-        let _token = claim_download(&state, "some-model").unwrap();
+        // A download in flight for a DIFFERENT blob does not block discarding
+        // this paused partial: parallel downloads each own only their own shas,
+        // so an unrelated active download never touches this file.
+        std::fs::write(store.partial_path(&sha), b"bytes").unwrap();
+        let _other = claim_download(&state, "other-model", vec!["c".repeat(64)]).unwrap();
+        discard_partial_inner(&state, &store, &sha).unwrap();
+        assert!(!store.partial_path(&sha).exists());
+        release_download(&state, "other-model");
+
+        // A download in flight that IS writing this blob blocks the discard (it
+        // would unlink the partial out from under the active writer, failing its
+        // verification with NotFound).
+        std::fs::write(store.partial_path(&sha), b"bytes").unwrap();
+        let _this = claim_download(&state, "this-model", vec![sha.clone()]).unwrap();
         let err = discard_partial_inner(&state, &store, &sha).unwrap_err();
         assert!(err.contains("in progress"), "got: {err}");
-        release_download(&state, "some-model");
+        release_download(&state, "this-model");
 
         // Removes an existing partial; a missing partial is fine (idempotent).
-        std::fs::write(store.partial_path(&sha), b"bytes").unwrap();
         discard_partial_inner(&state, &store, &sha).unwrap();
         assert!(!store.partial_path(&sha).exists());
         discard_partial_inner(&state, &store, &sha).unwrap();

From c30d69ab107c61bdc40b5d33f7e2c6a1a7346071 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sun, 21 Jun 2026 14:38:18 -0500
Subject: [PATCH 80/89] feat(settings): themed model picker popover for
 Providers

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/components/ModelPickerPanel.tsx           |   5 +-
 .../__tests__/ModelPickerPanel.test.tsx       |   5 +
 .../tabs/models/ModelSelect.module.css        | 216 ++++++++
 src/settings/tabs/models/ModelSelect.test.tsx | 476 ++++++++++++++++++
 src/settings/tabs/models/ModelSelect.tsx      | 422 ++++++++++++++++
 .../tabs/models/ProvidersPane.test.tsx        |  62 ++-
 src/settings/tabs/models/ProvidersPane.tsx    |  88 ++--
 src/styles/settings.module.css                |   1 -
 8 files changed, 1216 insertions(+), 59 deletions(-)
 create mode 100644 src/settings/tabs/models/ModelSelect.module.css
 create mode 100644 src/settings/tabs/models/ModelSelect.test.tsx
 create mode 100644 src/settings/tabs/models/ModelSelect.tsx

diff --git a/src/components/ModelPickerPanel.tsx b/src/components/ModelPickerPanel.tsx
index c7fbd520..80b2c227 100644
--- a/src/components/ModelPickerPanel.tsx
+++ b/src/components/ModelPickerPanel.tsx
@@ -322,7 +322,10 @@ export function ModelPickerPanel({
                 }`}
               >
                 <span className="flex-1 min-w-0 flex flex-col gap-0.5">
-                  <span className="overflow-hidden text-ellipsis whitespace-nowrap leading-tight">
+                  <span
+                    className="overflow-hidden text-ellipsis whitespace-nowrap leading-tight"
+                    title={labelFor(model)}
+                  >
                     {labelFor(model)}
                   </span>
                   {capLabel && (
diff --git a/src/components/__tests__/ModelPickerPanel.test.tsx b/src/components/__tests__/ModelPickerPanel.test.tsx
index b7c6d54e..0106c615 100644
--- a/src/components/__tests__/ModelPickerPanel.test.tsx
+++ b/src/components/__tests__/ModelPickerPanel.test.tsx
@@ -56,6 +56,11 @@ describe('ModelPickerPanel', () => {
       screen.getByRole('option', { name: 'Qwen3.5 9B' }),
     ).toBeInTheDocument();
     expect(screen.queryByText(BUILTIN_ID)).not.toBeInTheDocument();
+    // The truncated name carries the full label as a native hover tooltip.
+    expect(screen.getByText('Qwen3.5 9B')).toHaveAttribute(
+      'title',
+      'Qwen3.5 9B',
+    );
   });
 
   it('falls back to the id when no display name is given', () => {
diff --git a/src/settings/tabs/models/ModelSelect.module.css b/src/settings/tabs/models/ModelSelect.module.css
new file mode 100644
index 00000000..335563a7
--- /dev/null
+++ b/src/settings/tabs/models/ModelSelect.module.css
@@ -0,0 +1,216 @@
+/*
+ * Styles for ModelSelect, the Providers-pane model picker popover.
+ *
+ * All colours come from the window-global tokens declared on `.window`
+ * (settings.module.css), so the capability pills and RAM-fit labels match the
+ * Library pane exactly without sharing a component. The closed trigger mirrors
+ * the `.dropdown` box it replaces; the open popover mirrors the warm-dark,
+ * hairline-bordered surface used across the Settings window.
+ */
+
+.root {
+  flex: 1;
+  min-width: 0;
+}
+
+/* Closed trigger: the same box as the native-select `.dropdown` it replaces. */
+.trigger {
+  display: inline-flex;
+  align-items: center;
+  gap: 8px;
+  width: 100%;
+  height: 32px;
+  background: rgba(36, 30, 26, 0.6);
+  border: 1px solid rgba(255, 255, 255, 0.06);
+  border-top-color: rgba(255, 141, 92, 0.1);
+  border-radius: 8px;
+  padding: 0 10px 0 12px;
+  color: var(--t1);
+  font-family: inherit;
+  font-size: 13px;
+  cursor: pointer;
+  transition:
+    background 150ms ease,
+    border-color 150ms ease;
+}
+.trigger:hover {
+  background: rgba(36, 30, 26, 0.8);
+  border-color: rgba(255, 255, 255, 0.1);
+}
+.trigger[data-open] {
+  border-color: rgba(255, 141, 92, 0.42);
+  box-shadow: 0 0 0 3px rgba(255, 141, 92, 0.1);
+}
+.triggerLabel {
+  flex: 1;
+  min-width: 0;
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+  text-align: left;
+}
+.chev {
+  flex: none;
+  width: 10px;
+  height: 10px;
+  color: var(--t3);
+}
+
+/* Open popover: fixed to the viewport so it escapes the Settings window's
+ * `overflow: hidden`, positioned by ModelSelect from the trigger rectangle. */
+.popover {
+  position: fixed;
+  z-index: 40;
+  max-width: calc(100vw - 16px);
+  background: #1b1613;
+  border: 1px solid var(--hair);
+  border-top-color: rgba(255, 141, 92, 0.22);
+  border-radius: 12px;
+  box-shadow:
+    0 18px 44px -12px rgba(0, 0, 0, 0.65),
+    0 2px 6px rgba(0, 0, 0, 0.4);
+  overflow: hidden;
+}
+
+.filter {
+  display: flex;
+  align-items: center;
+  gap: 8px;
+  padding: 9px 12px;
+  border-bottom: 1px solid var(--hair-soft);
+}
+.searchIcon {
+  flex: none;
+  width: 14px;
+  height: 14px;
+  color: var(--t3);
+}
+.filter input {
+  flex: 1;
+  min-width: 0;
+  background: transparent;
+  border: 0;
+  outline: 0;
+  color: var(--t1);
+  font-family: inherit;
+  font-size: 12.5px;
+}
+.filter input::placeholder {
+  color: var(--t3);
+}
+
+.scroll {
+  max-height: 280px;
+  overflow-y: auto;
+  padding: 5px;
+}
+
+.option {
+  display: flex;
+  align-items: flex-start;
+  gap: 10px;
+  width: 100%;
+  padding: 8px 10px;
+  border: 0;
+  border-radius: 8px;
+  background: transparent;
+  color: var(--t1);
+  font-family: inherit;
+  text-align: left;
+  cursor: pointer;
+}
+.optionHighlighted {
+  background: rgba(255, 255, 255, 0.05);
+}
+.optionSelected {
+  background: var(--accent-soft);
+}
+.optionBody {
+  display: flex;
+  flex-direction: column;
+  flex: 1;
+  min-width: 0;
+}
+.optionName {
+  display: flex;
+  align-items: center;
+  gap: 7px;
+  min-width: 0;
+  font-size: 13px;
+  line-height: 1.3;
+}
+.optionNameText {
+  /* Shrink-and-truncate, but never grow: a short name keeps the capability
+   * pills hugging it instead of pushing them to the far edge of the row. */
+  flex: 0 1 auto;
+  min-width: 0;
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+}
+.optionSub {
+  margin-top: 3px;
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+  font-size: 10.5px;
+  color: var(--t3);
+}
+
+/* Capability pills: warm stone / rose / gold trio, colour in the text only,
+ * identical to the Library pane via the shared `--cap-*` tokens. */
+.pill {
+  flex: none;
+  display: inline-flex;
+  align-items: center;
+  font-size: 10px;
+  font-weight: 540;
+  padding: 1px 7px;
+  border-radius: var(--radius-pill);
+  background: rgba(255, 255, 255, 0.05);
+}
+.pillText {
+  color: var(--cap-text);
+}
+.pillVision {
+  color: var(--cap-vision);
+}
+.pillThinking {
+  color: var(--cap-think);
+}
+
+/* RAM-fit badge: trailing coloured label reusing the shared fit palette. */
+.fit {
+  flex: none;
+  align-self: center;
+  font-size: 11px;
+  font-weight: 560;
+}
+.fitOk {
+  color: var(--ok);
+}
+.fitTight {
+  color: var(--tight);
+}
+.fitHeavy {
+  color: var(--heavy);
+}
+
+.check {
+  flex: none;
+  width: 15px;
+  height: 15px;
+  margin-top: 2px;
+  color: var(--accent);
+}
+.checkHidden {
+  visibility: hidden;
+}
+
+.empty {
+  margin: 0;
+  padding: 20px 12px;
+  text-align: center;
+  font-size: 12px;
+  color: var(--t3);
+}
diff --git a/src/settings/tabs/models/ModelSelect.test.tsx b/src/settings/tabs/models/ModelSelect.test.tsx
new file mode 100644
index 00000000..fad4709c
--- /dev/null
+++ b/src/settings/tabs/models/ModelSelect.test.tsx
@@ -0,0 +1,476 @@
+import { render, screen, fireEvent, within } from '@testing-library/react';
+import { afterEach, describe, expect, it, vi } from 'vitest';
+
+import {
+  ModelSelect,
+  computePlacement,
+  type ModelSelectItem,
+} from './ModelSelect';
+
+const RICH: ModelSelectItem[] = [
+  {
+    id: 'a',
+    label: 'Alpha',
+    sub: '6.6 GB · 128K · Org · Q4_K_M',
+    vision: true,
+    thinking: false,
+    fit: 'fits',
+  },
+  {
+    id: 'b',
+    label: 'Beta',
+    sub: '7.3 GB · 256K · Org · Q4_K_M',
+    vision: false,
+    thinking: true,
+    fit: 'tight',
+  },
+  {
+    id: 'c',
+    label: 'Gamma',
+    sub: '2.0 GB · 32K · Org · F16',
+    vision: false,
+    thinking: false,
+    fit: 'too_big',
+  },
+];
+
+const SLUGS: ModelSelectItem[] = [
+  { id: 'llama3.2:3b', label: 'llama3.2:3b' },
+  { id: 'qwen2.5:14b', label: 'qwen2.5:14b' },
+];
+
+function open(ariaLabel = 'Built-in model') {
+  fireEvent.click(screen.getByRole('button', { name: ariaLabel }));
+}
+
+afterEach(() => {
+  vi.restoreAllMocks();
+});
+
+describe('computePlacement', () => {
+  it('drops below the trigger when there is room', () => {
+    const p = computePlacement(
+      { top: 60, bottom: 100, left: 24, width: 260 },
+      1000,
+      1440,
+    );
+    expect(p).toEqual({ top: 106, left: 24, width: 260 });
+  });
+
+  it('flips above the trigger when the space below cannot hold it', () => {
+    const p = computePlacement(
+      { top: 700, bottom: 740, left: 10, width: 200 },
+      768,
+      1440,
+      320,
+      6,
+    );
+    expect(p.top).toBe(374);
+    expect(p.left).toBe(10);
+  });
+
+  it('stays below when space is cramped both ways but the trigger sits high', () => {
+    const p = computePlacement(
+      { top: 5, bottom: 45, left: 0, width: 100 },
+      60,
+      1440,
+      320,
+      6,
+    );
+    expect(p.top).toBe(51);
+  });
+
+  it('clamps the left edge so the popover stays within the viewport', () => {
+    // Trigger near the right edge: left pulls back to keep it on-screen.
+    const right = computePlacement(
+      { top: 60, bottom: 100, left: 1300, width: 300 },
+      1000,
+      1440,
+    );
+    expect(right.left).toBe(1132);
+    // Trigger past the left edge: left clamps to the minimum margin.
+    const left = computePlacement(
+      { top: 60, bottom: 100, left: 2, width: 300 },
+      1000,
+      1440,
+    );
+    expect(left.left).toBe(8);
+  });
+});
+
+describe('ModelSelect', () => {
+  it('shows the selected label and a placeholder when nothing matches', () => {
+    const { rerender } = render(
+      <ModelSelect
+        value="b"
+        items={RICH}
+        onChange={() => {}}
+        ariaLabel="Built-in model"
+        placeholder="Choose a model"
+      />,
+    );
+    expect(
+      screen.getByRole('button', { name: 'Built-in model' }),
+    ).toHaveTextContent('Beta');
+
+    rerender(
+      <ModelSelect
+        value="missing"
+        items={RICH}
+        onChange={() => {}}
+        ariaLabel="Built-in model"
+        placeholder="Choose a model"
+      />,
+    );
+    expect(
+      screen.getByRole('button', { name: 'Built-in model' }),
+    ).toHaveTextContent('Choose a model');
+  });
+
+  it('toggles the popover open and closed from the trigger', () => {
+    render(
+      <ModelSelect
+        value="a"
+        items={RICH}
+        onChange={() => {}}
+        ariaLabel="Built-in model"
+      />,
+    );
+    open();
+    expect(screen.getByRole('listbox')).toBeInTheDocument();
+    open();
+    expect(screen.queryByRole('listbox')).not.toBeInTheDocument();
+  });
+
+  it('does not open when the trigger cannot be measured', () => {
+    vi.spyOn(HTMLElement.prototype, 'getBoundingClientRect').mockReturnValue(
+      undefined as unknown as DOMRect,
+    );
+    render(
+      <ModelSelect
+        value="a"
+        items={RICH}
+        onChange={() => {}}
+        ariaLabel="Built-in model"
+      />,
+    );
+    open();
+    expect(screen.queryByRole('listbox')).not.toBeInTheDocument();
+  });
+
+  it('renders capability pills, sub-line, and a RAM-fit badge per item', () => {
+    render(
+      <ModelSelect
+        value="a"
+        items={RICH}
+        onChange={() => {}}
+        ariaLabel="Built-in model"
+      />,
+    );
+    open();
+    const alpha = screen.getByRole('option', { name: /Alpha/ });
+    expect(within(alpha).getByText('Text')).toBeInTheDocument();
+    expect(within(alpha).getByText('Vision')).toBeInTheDocument();
+    expect(within(alpha).queryByText('Thinking')).not.toBeInTheDocument();
+    // The truncated name and sub-line carry the full text as a native tooltip.
+    expect(within(alpha).getByText('Alpha')).toHaveAttribute('title', 'Alpha');
+    const sub = within(alpha).getByText('6.6 GB · 128K · Org · Q4_K_M');
+    expect(sub).toBeInTheDocument();
+    expect(sub).toHaveAttribute('title', '6.6 GB · 128K · Org · Q4_K_M');
+    expect(within(alpha).getByText('Comfortable')).toHaveAttribute(
+      'title',
+      'Fits comfortably.',
+    );
+
+    const beta = screen.getByRole('option', { name: /Beta/ });
+    expect(within(beta).getByText('Thinking')).toBeInTheDocument();
+    expect(within(beta).getByText('Tight')).toBeInTheDocument();
+
+    const gamma = screen.getByRole('option', { name: /Gamma/ });
+    expect(within(gamma).getByText('Text')).toBeInTheDocument();
+    expect(within(gamma).queryByText('Vision')).not.toBeInTheDocument();
+    expect(within(gamma).queryByText('Thinking')).not.toBeInTheDocument();
+    expect(within(gamma).getByText('Heavy')).toBeInTheDocument();
+  });
+
+  it('marks the active row selected', () => {
+    render(
+      <ModelSelect
+        value="b"
+        items={RICH}
+        onChange={() => {}}
+        ariaLabel="Built-in model"
+      />,
+    );
+    open();
+    expect(screen.getByRole('option', { name: /Beta/ })).toHaveAttribute(
+      'aria-selected',
+      'true',
+    );
+    expect(screen.getByRole('option', { name: /Alpha/ })).toHaveAttribute(
+      'aria-selected',
+      'false',
+    );
+  });
+
+  it('renders Ollama slugs without pills, sub-line, or fit', () => {
+    render(
+      <ModelSelect
+        value="llama3.2:3b"
+        items={SLUGS}
+        onChange={() => {}}
+        ariaLabel="Active Ollama model"
+      />,
+    );
+    open('Active Ollama model');
+    const row = screen.getByRole('option', { name: 'llama3.2:3b' });
+    expect(within(row).queryByText('Text')).not.toBeInTheDocument();
+    expect(within(row).queryByText('Comfortable')).not.toBeInTheDocument();
+  });
+
+  it('filters the list and shows an empty message when nothing matches', () => {
+    render(
+      <ModelSelect
+        value="a"
+        items={RICH}
+        onChange={() => {}}
+        ariaLabel="Built-in model"
+      />,
+    );
+    open();
+    const input = screen.getByRole('combobox');
+    fireEvent.change(input, { target: { value: 'al' } });
+    expect(screen.getByRole('option', { name: /Alpha/ })).toBeInTheDocument();
+    expect(
+      screen.queryByRole('option', { name: /Beta/ }),
+    ).not.toBeInTheDocument();
+
+    fireEvent.change(input, { target: { value: 'zzz' } });
+    expect(screen.getByText('No models found.')).toBeInTheDocument();
+  });
+
+  it('commits a clicked option and closes', () => {
+    const onChange = vi.fn();
+    render(
+      <ModelSelect
+        value="a"
+        items={RICH}
+        onChange={onChange}
+        ariaLabel="Built-in model"
+      />,
+    );
+    open();
+    fireEvent.click(screen.getByRole('option', { name: /Beta/ }));
+    expect(onChange).toHaveBeenCalledWith('b');
+    expect(screen.queryByRole('listbox')).not.toBeInTheDocument();
+  });
+
+  it('navigates with the keyboard and commits with Enter', () => {
+    const onChange = vi.fn();
+    render(
+      <ModelSelect
+        value="a"
+        items={RICH}
+        onChange={onChange}
+        ariaLabel="Built-in model"
+      />,
+    );
+    open();
+    const input = screen.getByRole('combobox');
+    fireEvent.keyDown(input, { key: 'ArrowDown' });
+    fireEvent.keyDown(input, { key: 'Enter' });
+    expect(onChange).toHaveBeenCalledWith('b');
+  });
+
+  it('wraps to the last item with ArrowUp and reaches ends with Home/End', () => {
+    const onChange = vi.fn();
+    render(
+      <ModelSelect
+        value="a"
+        items={RICH}
+        onChange={onChange}
+        ariaLabel="Built-in model"
+      />,
+    );
+    open();
+    const input = screen.getByRole('combobox');
+    fireEvent.keyDown(input, { key: 'ArrowUp' });
+    expect(input).toHaveAttribute(
+      'aria-activedescendant',
+      'thuki-model-select-listbox-option-2',
+    );
+    fireEvent.keyDown(input, { key: 'Home' });
+    expect(input).toHaveAttribute(
+      'aria-activedescendant',
+      'thuki-model-select-listbox-option-0',
+    );
+    fireEvent.keyDown(input, { key: 'End' });
+    expect(input).toHaveAttribute(
+      'aria-activedescendant',
+      'thuki-model-select-listbox-option-2',
+    );
+  });
+
+  it('highlights the row under the cursor', () => {
+    render(
+      <ModelSelect
+        value="a"
+        items={RICH}
+        onChange={() => {}}
+        ariaLabel="Built-in model"
+      />,
+    );
+    open();
+    fireEvent.mouseEnter(screen.getByRole('option', { name: /Gamma/ }));
+    expect(screen.getByRole('combobox')).toHaveAttribute(
+      'aria-activedescendant',
+      'thuki-model-select-listbox-option-2',
+    );
+  });
+
+  it('ignores arrows and Enter while the filter matches nothing', () => {
+    const onChange = vi.fn();
+    render(
+      <ModelSelect
+        value="a"
+        items={RICH}
+        onChange={onChange}
+        ariaLabel="Built-in model"
+      />,
+    );
+    open();
+    const input = screen.getByRole('combobox');
+    fireEvent.change(input, { target: { value: 'zzz' } });
+    fireEvent.keyDown(input, { key: 'ArrowDown' });
+    fireEvent.keyDown(input, { key: 'ArrowUp' });
+    fireEvent.keyDown(input, { key: 'Home' });
+    fireEvent.keyDown(input, { key: 'End' });
+    fireEvent.keyDown(input, { key: 'Enter' });
+    expect(input).not.toHaveAttribute('aria-activedescendant');
+    expect(onChange).not.toHaveBeenCalled();
+  });
+
+  it('ignores keys it does not handle', () => {
+    render(
+      <ModelSelect
+        value="a"
+        items={RICH}
+        onChange={() => {}}
+        ariaLabel="Built-in model"
+      />,
+    );
+    open();
+    fireEvent.keyDown(screen.getByRole('combobox'), { key: 'a' });
+    expect(screen.getByRole('listbox')).toBeInTheDocument();
+  });
+
+  it('closes on Tab so it is not left open behind the next control', () => {
+    render(
+      <ModelSelect
+        value="a"
+        items={RICH}
+        onChange={() => {}}
+        ariaLabel="Built-in model"
+      />,
+    );
+    open();
+    fireEvent.keyDown(screen.getByRole('combobox'), { key: 'Tab' });
+    expect(screen.queryByRole('listbox')).not.toBeInTheDocument();
+  });
+
+  it('closes when the page scrolls or the window resizes', () => {
+    render(
+      <ModelSelect
+        value="a"
+        items={RICH}
+        onChange={() => {}}
+        ariaLabel="Built-in model"
+      />,
+    );
+    open();
+    fireEvent.scroll(window);
+    expect(screen.queryByRole('listbox')).not.toBeInTheDocument();
+    open();
+    fireEvent.resize(window);
+    expect(screen.queryByRole('listbox')).not.toBeInTheDocument();
+  });
+
+  it('returns focus to the trigger on Escape and after a selection', () => {
+    render(
+      <ModelSelect
+        value="a"
+        items={RICH}
+        onChange={() => {}}
+        ariaLabel="Built-in model"
+      />,
+    );
+    const trigger = screen.getByRole('button', { name: 'Built-in model' });
+    open();
+    fireEvent.keyDown(screen.getByRole('combobox'), { key: 'Escape' });
+    expect(trigger).toHaveFocus();
+    open();
+    fireEvent.click(screen.getByRole('option', { name: /Beta/ }));
+    expect(trigger).toHaveFocus();
+  });
+
+  it('pre-highlights the active row so Enter commits it, else the first row', () => {
+    const onChange = vi.fn();
+    const { rerender } = render(
+      <ModelSelect
+        value="b"
+        items={RICH}
+        onChange={onChange}
+        ariaLabel="Built-in model"
+      />,
+    );
+    open();
+    fireEvent.keyDown(screen.getByRole('combobox'), { key: 'Enter' });
+    expect(onChange).toHaveBeenLastCalledWith('b');
+
+    rerender(
+      <ModelSelect
+        value="missing"
+        items={RICH}
+        onChange={onChange}
+        ariaLabel="Built-in model"
+        placeholder="Choose a model"
+      />,
+    );
+    open();
+    fireEvent.keyDown(screen.getByRole('combobox'), { key: 'Enter' });
+    expect(onChange).toHaveBeenLastCalledWith('a');
+  });
+
+  it('closes on Escape', () => {
+    render(
+      <ModelSelect
+        value="a"
+        items={RICH}
+        onChange={() => {}}
+        ariaLabel="Built-in model"
+      />,
+    );
+    open();
+    fireEvent.keyDown(screen.getByRole('combobox'), { key: 'Escape' });
+    expect(screen.queryByRole('listbox')).not.toBeInTheDocument();
+  });
+
+  it('closes on an outside press but not on a press inside the popover', () => {
+    render(
+      <div>
+        <span data-testid="outside">outside</span>
+        <ModelSelect
+          value="a"
+          items={RICH}
+          onChange={() => {}}
+          ariaLabel="Built-in model"
+        />
+      </div>,
+    );
+    open();
+    fireEvent.mouseDown(screen.getByRole('combobox'));
+    expect(screen.getByRole('listbox')).toBeInTheDocument();
+    fireEvent.mouseDown(screen.getByTestId('outside'));
+    expect(screen.queryByRole('listbox')).not.toBeInTheDocument();
+  });
+});
diff --git a/src/settings/tabs/models/ModelSelect.tsx b/src/settings/tabs/models/ModelSelect.tsx
new file mode 100644
index 00000000..6d8d999c
--- /dev/null
+++ b/src/settings/tabs/models/ModelSelect.tsx
@@ -0,0 +1,422 @@
+/**
+ * Thuki-styled model picker popover for the Providers pane.
+ *
+ * Replaces the native `<select>` whose open list macOS renders with its own
+ * chrome (system font, blue highlight, no filter). The closed trigger reuses
+ * the existing `.dropdown` box; clicking it opens a popover that matches the
+ * Settings surface and the overlay's `ModelPickerPanel`: a filter input, a
+ * scroll-capped list, capability pills, a RAM-fit badge, accent selection, and
+ * full keyboard navigation.
+ *
+ * The component is purely presentational and data-driven. Each {@link
+ * ModelSelectItem} carries the row's text plus optional rich fields; the
+ * built-in engine fills them (capabilities, size, context, fit) while Ollama
+ * leaves them undefined, so an Ollama row degrades cleanly to a slug-only line.
+ * Colours come from the window-global `--cap-*` / fit tokens, so the pills read
+ * identically to the Library pane without sharing a component.
+ */
+
+import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
+
+import { RAM_FIT_LABEL, RAM_FIT_TOOLTIP } from '../../../utils/ramFit';
+import type { RamFit } from '../../../types/starter';
+import styles from './ModelSelect.module.css';
+
+/** RAM-fit verdict to its colour class on this component's stylesheet. */
+const FIT_CLASS: Record<RamFit, string> = {
+  fits: styles.fitOk,
+  tight: styles.fitTight,
+  too_big: styles.fitHeavy,
+};
+
+/** Gap in pixels between the trigger and the popover. */
+const POPOVER_GAP = 6;
+/** Height used to decide drop direction before the popover is measured. */
+const POPOVER_ESTIMATED_HEIGHT = 320;
+/** Minimum gap kept between the popover and the viewport edges. */
+const POPOVER_EDGE_MARGIN = 8;
+
+/** One selectable model. Rich fields are optional so an Ollama row, which has
+ * no metadata, renders as a slug-only line. */
+export interface ModelSelectItem {
+  /** Stable id, passed back to {@link ModelSelectProps.onChange} on select. */
+  id: string;
+  /** Primary row text and the closed-trigger label (display name, or slug). */
+  label: string;
+  /** Metadata sub-line ("8.2 GB · 128K · Google · Q4_K_M"); omitted for Ollama. */
+  sub?: string;
+  /** Vision capability; present only when known. Undefined hides the pill row. */
+  vision?: boolean;
+  /** Thinking capability; present only when known. */
+  thinking?: boolean;
+  /** RAM-fit verdict; renders a trailing coloured badge when present. */
+  fit?: RamFit | null;
+}
+
+export interface ModelSelectProps {
+  /** Currently selected item id. */
+  value: string;
+  /** Items to choose from. The host renders an empty state instead of this
+   * component when there are none, so the list is always non-empty here. */
+  items: ModelSelectItem[];
+  /** Commits a new selection. */
+  onChange: (id: string) => void;
+  /** Accessible name for the trigger button. */
+  ariaLabel: string;
+  /** Trigger text when {@link value} matches no item (e.g. "Choose a model"). */
+  placeholder?: string;
+}
+
+/** Fixed-position placement for the open popover. */
+export interface PopoverPlacement {
+  top: number;
+  left: number;
+  width: number;
+}
+
+/**
+ * Computes the popover's fixed-viewport position from its trigger rectangle.
+ * The popover matches the trigger width and drops below it, flipping above when
+ * the space below cannot hold it and there is more room above; the left edge is
+ * clamped so the popover never runs off either side. Pure so every branch is
+ * unit tested without a real layout (measurement is a no-op under happy-dom).
+ */
+export function computePlacement(
+  rect: { top: number; bottom: number; left: number; width: number },
+  viewportHeight: number,
+  viewportWidth: number,
+  estimatedHeight: number = POPOVER_ESTIMATED_HEIGHT,
+  gap: number = POPOVER_GAP,
+): PopoverPlacement {
+  const spaceBelow = viewportHeight - rect.bottom;
+  const dropUp = spaceBelow < estimatedHeight + gap && rect.top > spaceBelow;
+  const top = dropUp ? rect.top - estimatedHeight - gap : rect.bottom + gap;
+  const maxLeft = viewportWidth - rect.width - POPOVER_EDGE_MARGIN;
+  const left = Math.max(POPOVER_EDGE_MARGIN, Math.min(rect.left, maxLeft));
+  return { top, left, width: rect.width };
+}
+
+const CHEVRON = (
+  <svg
+    className={styles.chev}
+    viewBox="0 0 10 10"
+    fill="none"
+    aria-hidden="true"
+  >
+    <path
+      d="M2.5 4l2.5 2.5L7.5 4"
+      stroke="currentColor"
+      strokeWidth="1.4"
+      strokeLinecap="round"
+      strokeLinejoin="round"
+    />
+  </svg>
+);
+
+const SEARCH_ICON = (
+  <svg
+    className={styles.searchIcon}
+    viewBox="0 0 16 16"
+    fill="none"
+    stroke="currentColor"
+    strokeWidth="1.5"
+    aria-hidden="true"
+  >
+    <circle cx="7" cy="7" r="4.5" />
+    <path d="M11 11l3 3" strokeLinecap="round" />
+  </svg>
+);
+
+const LISTBOX_ID = 'thuki-model-select-listbox';
+
+/**
+ * Controlled model picker: a trigger button that toggles a filterable popover.
+ * Owns its open state, filter text, keyboard focus, and outside-click
+ * dismissal; selection itself is lifted to the host via `onChange`.
+ */
+export function ModelSelect({
+  value,
+  items,
+  onChange,
+  ariaLabel,
+  placeholder = '',
+}: ModelSelectProps) {
+  const [open, setOpen] = useState(false);
+  const [filter, setFilter] = useState('');
+  const [highlightedIndex, setHighlightedIndex] = useState(0);
+  const [placement, setPlacement] = useState<PopoverPlacement | null>(null);
+
+  const triggerRef = useRef<HTMLButtonElement>(null);
+  const popoverRef = useRef<HTMLDivElement>(null);
+  const listboxRef = useRef<HTMLDivElement>(null);
+
+  const selected = items.find((i) => i.id === value);
+  const triggerLabel = selected ? selected.label : placeholder;
+
+  const filtered = useMemo(() => {
+    const needle = filter.trim().toLowerCase();
+    if (needle === '') return items;
+    return items.filter((i) => i.label.toLowerCase().includes(needle));
+  }, [filter, items]);
+
+  // Derive the safe highlight index inline so aria-activedescendant stays
+  // consistent on the same render that `filtered` shrinks under the cursor.
+  const safeIndex =
+    filtered.length === 0 ? 0 : Math.min(highlightedIndex, filtered.length - 1);
+  const activeId =
+    filtered.length > 0 ? `${LISTBOX_ID}-option-${safeIndex}` : undefined;
+
+  const close = useCallback(() => {
+    setOpen(false);
+    setFilter('');
+    setHighlightedIndex(0);
+    setPlacement(null);
+  }, []);
+
+  // Measure the trigger and place the popover at open time, in the click
+  // handler rather than an effect, so the popover never paints before it is
+  // placed. The measured rect is zero-sized under happy-dom, which
+  // `computePlacement` tolerates; a missing rect (only when unmounted) keeps it
+  // closed.
+  const openPopover = useCallback(() => {
+    const rect = triggerRef.current?.getBoundingClientRect();
+    if (rect) {
+      setPlacement(
+        computePlacement(rect, window.innerHeight, window.innerWidth),
+      );
+      // Pre-highlight the active row so Enter right after opening commits the
+      // current model rather than the first one, matching a native <select>.
+      const selectedIndex = items.findIndex((i) => i.id === value);
+      setHighlightedIndex(selectedIndex >= 0 ? selectedIndex : 0);
+      setOpen(true);
+    }
+  }, [items, value]);
+
+  // Dismiss on a pointer press outside the trigger and popover.
+  useEffect(() => {
+    if (!open) return;
+    const onPointerDown = (e: MouseEvent) => {
+      const target = e.target as Node;
+      if (
+        triggerRef.current?.contains(target) ||
+        popoverRef.current?.contains(target)
+      ) {
+        return;
+      }
+      close();
+    };
+    // The popover is fixed-positioned from a one-time trigger measurement, so
+    // scrolling the Settings body or resizing the window would detach it from
+    // the trigger; dismiss it instead of leaving it floating.
+    document.addEventListener('mousedown', onPointerDown);
+    window.addEventListener('scroll', close, true);
+    window.addEventListener('resize', close);
+    return () => {
+      document.removeEventListener('mousedown', onPointerDown);
+      window.removeEventListener('scroll', close, true);
+      window.removeEventListener('resize', close);
+    };
+  }, [open, close]);
+
+  // Keep the highlighted row visible when arrow keys move it off-screen.
+  useEffect(() => {
+    if (!activeId) return;
+    const el = listboxRef.current?.querySelector<HTMLElement>(`#${activeId}`);
+    /* v8 ignore next -- scrollIntoView is a host API absent in happy-dom */
+    el?.scrollIntoView?.({ block: 'nearest' });
+  }, [activeId]);
+
+  const commit = (index: number) => {
+    if (index < 0 || index >= filtered.length) return;
+    onChange(filtered[index].id);
+    close();
+    // Return focus to the trigger so keyboard flow continues from it, the way
+    // a native <select> does after a selection.
+    triggerRef.current?.focus();
+  };
+
+  return (
+    <div className={styles.root}>
+      <button
+        ref={triggerRef}
+        type="button"
+        className={styles.trigger}
+        aria-haspopup="listbox"
+        aria-expanded={open}
+        aria-label={ariaLabel}
+        data-open={open || undefined}
+        onClick={() => (open ? close() : openPopover())}
+      >
+        <span className={styles.triggerLabel}>{triggerLabel}</span>
+        {CHEVRON}
+      </button>
+
+      {open && placement ? (
+        <div
+          ref={popoverRef}
+          className={styles.popover}
+          style={{
+            top: placement.top,
+            left: placement.left,
+            width: placement.width,
+          }}
+        >
+          <div className={styles.filter}>
+            {SEARCH_ICON}
+            <input
+              type="text"
+              role="combobox"
+              aria-controls={LISTBOX_ID}
+              aria-expanded="true"
+              aria-activedescendant={activeId}
+              aria-autocomplete="list"
+              aria-label={`Filter ${ariaLabel}`}
+              value={filter}
+              autoFocus
+              spellCheck={false}
+              autoComplete="off"
+              placeholder="Filter models…"
+              onChange={(e) => {
+                setFilter(e.target.value);
+                setHighlightedIndex(0);
+              }}
+              onKeyDown={(e) => {
+                if (e.key === 'ArrowDown') {
+                  e.preventDefault();
+                  if (filtered.length > 0) {
+                    setHighlightedIndex((i) => (i + 1) % filtered.length);
+                  }
+                } else if (e.key === 'ArrowUp') {
+                  e.preventDefault();
+                  if (filtered.length > 0) {
+                    setHighlightedIndex(
+                      (i) => (i - 1 + filtered.length) % filtered.length,
+                    );
+                  }
+                } else if (e.key === 'Home') {
+                  e.preventDefault();
+                  if (filtered.length > 0) setHighlightedIndex(0);
+                } else if (e.key === 'End') {
+                  e.preventDefault();
+                  if (filtered.length > 0)
+                    setHighlightedIndex(filtered.length - 1);
+                } else if (e.key === 'Enter') {
+                  e.preventDefault();
+                  commit(safeIndex);
+                } else if (e.key === 'Escape') {
+                  e.preventDefault();
+                  close();
+                  triggerRef.current?.focus();
+                } else if (e.key === 'Tab') {
+                  // Let focus move on to the next control, but close the
+                  // popover so it cannot be left open and detached behind it.
+                  close();
+                }
+              }}
+            />
+          </div>
+
+          <div
+            ref={listboxRef}
+            id={LISTBOX_ID}
+            role="listbox"
+            aria-label={ariaLabel}
+            className={styles.scroll}
+          >
+            {filtered.length === 0 ? (
+              <p className={styles.empty}>No models found.</p>
+            ) : (
+              filtered.map((item, index) => {
+                const isActive = item.id === value;
+                const isHighlighted = index === safeIndex;
+                const showPills =
+                  item.vision !== undefined || item.thinking !== undefined;
+                return (
+                  <button
+                    key={item.id}
+                    id={`${LISTBOX_ID}-option-${index}`}
+                    type="button"
+                    role="option"
+                    aria-selected={isActive}
+                    tabIndex={-1}
+                    className={`${styles.option} ${
+                      isHighlighted ? styles.optionHighlighted : ''
+                    } ${isActive ? styles.optionSelected : ''}`}
+                    onMouseEnter={() => setHighlightedIndex(index)}
+                    onClick={() => commit(index)}
+                  >
+                    <span className={styles.optionBody}>
+                      <span className={styles.optionName}>
+                        <span
+                          className={styles.optionNameText}
+                          title={item.label}
+                        >
+                          {item.label}
+                        </span>
+                        {showPills ? (
+                          <>
+                            <span
+                              className={`${styles.pill} ${styles.pillText}`}
+                            >
+                              Text
+                            </span>
+                            {item.vision ? (
+                              <span
+                                className={`${styles.pill} ${styles.pillVision}`}
+                              >
+                                Vision
+                              </span>
+                            ) : null}
+                            {item.thinking ? (
+                              <span
+                                className={`${styles.pill} ${styles.pillThinking}`}
+                              >
+                                Thinking
+                              </span>
+                            ) : null}
+                          </>
+                        ) : null}
+                      </span>
+                      {item.sub ? (
+                        <span className={styles.optionSub} title={item.sub}>
+                          {item.sub}
+                        </span>
+                      ) : null}
+                    </span>
+                    {item.fit ? (
+                      // Native title (not the Tooltip component): the row is a
+                      // <button>, so a Tooltip's wrapper <div> would be invalid
+                      // phrasing content nested inside it.
+                      <span
+                        className={`${styles.fit} ${FIT_CLASS[item.fit]}`}
+                        title={RAM_FIT_TOOLTIP[item.fit]}
+                      >
+                        {RAM_FIT_LABEL[item.fit]}
+                      </span>
+                    ) : null}
+                    <svg
+                      className={`${styles.check} ${
+                        isActive ? '' : styles.checkHidden
+                      }`}
+                      viewBox="0 0 16 16"
+                      fill="none"
+                      aria-hidden="true"
+                    >
+                      <path
+                        d="M3 8l3.5 3.5L13 5"
+                        stroke="currentColor"
+                        strokeWidth="2.2"
+                        strokeLinecap="round"
+                        strokeLinejoin="round"
+                      />
+                    </svg>
+                  </button>
+                );
+              })
+            )}
+          </div>
+        </div>
+      ) : null}
+    </div>
+  );
+}
diff --git a/src/settings/tabs/models/ProvidersPane.test.tsx b/src/settings/tabs/models/ProvidersPane.test.tsx
index 927dbc3c..19ca8895 100644
--- a/src/settings/tabs/models/ProvidersPane.test.tsx
+++ b/src/settings/tabs/models/ProvidersPane.test.tsx
@@ -124,6 +124,8 @@ function mockInvoke(over: Record<string, unknown> = {}) {
         return false;
       case 'get_model_picker_state':
         return { active: null, all: [], ollamaReachable: true };
+      case 'get_model_capabilities':
+        return {};
       default:
         return makeConfig('ollama', [BUILTIN, OLLAMA]);
     }
@@ -174,11 +176,12 @@ describe('ProvidersPane active hero', () => {
     mockInvoke({ list_installed_models: INSTALLED });
     const onSaved = vi.fn();
     renderPane(makeConfig('builtin', [builtin, OLLAMA]), { onSaved });
-    const select = await screen.findByRole('combobox', {
+    const trigger = await screen.findByRole('button', {
       name: 'Built-in model',
     });
-    expect(select).toHaveValue(INSTALLED[0].id);
-    fireEvent.change(select, { target: { value: INSTALLED[0].id } });
+    expect(trigger).toHaveTextContent('Qwen3.5 9B');
+    fireEvent.click(trigger);
+    fireEvent.click(await screen.findByRole('option', { name: /Qwen3\.5 9B/ }));
     await waitFor(() =>
       expect(invokeMock).toHaveBeenCalledWith('update_provider_field', {
         providerId: 'builtin',
@@ -223,10 +226,10 @@ describe('ProvidersPane active hero', () => {
     );
     // The provider-change refetch replaces the stale built-in id with the
     // live Ollama model rather than leaving the built-in id in the dropdown.
-    const select = await screen.findByRole('combobox', {
+    const trigger = await screen.findByRole('button', {
       name: 'Active Ollama model',
     });
-    await waitFor(() => expect(select).toHaveValue('gemma4:e4b'));
+    await waitFor(() => expect(trigger).toHaveTextContent('gemma4:e4b'));
   });
 
   it('appends the quant only to disambiguate duplicate display names', async () => {
@@ -238,24 +241,25 @@ describe('ProvidersPane active hero', () => {
     renderPane(
       makeConfig('builtin', [{ ...BUILTIN, model: 'org/x:q4.gguf' }, OLLAMA]),
     );
-    await screen.findByRole('combobox', { name: 'Built-in model' });
+    fireEvent.click(
+      await screen.findByRole('button', { name: 'Built-in model' }),
+    );
     // Shared display name -> each option disambiguates with its quant.
     expect(
-      screen.getByRole('option', { name: 'Qwen3.5 9B · Q4_K_M' }),
+      screen.getByRole('option', { name: /Qwen3\.5 9B · Q4_K_M/ }),
     ).toBeInTheDocument();
     expect(
-      screen.getByRole('option', { name: 'Qwen3.5 9B · Q8_0' }),
+      screen.getByRole('option', { name: /Qwen3\.5 9B · Q8_0/ }),
     ).toBeInTheDocument();
   });
 
   it('shows a Choose-a-model option when the built-in model is not installed', async () => {
     mockInvoke({ list_installed_models: INSTALLED });
     renderPane(makeConfig('builtin', [{ ...BUILTIN, model: 'gone' }, OLLAMA]));
-    const select = await screen.findByRole('combobox', {
+    const trigger = await screen.findByRole('button', {
       name: 'Built-in model',
     });
-    expect(select).toHaveValue('');
-    expect(screen.getByText('Choose a model')).toBeInTheDocument();
+    expect(trigger).toHaveTextContent('Choose a model');
   });
 
   it('offers a Discover link when no built-in models are installed', async () => {
@@ -275,10 +279,10 @@ describe('ProvidersPane active hero', () => {
       update_provider_field: new Error('nope'),
     });
     renderPane(makeConfig('builtin', [builtin, OLLAMA]));
-    const select = await screen.findByRole('combobox', {
-      name: 'Built-in model',
-    });
-    fireEvent.change(select, { target: { value: INSTALLED[0].id } });
+    fireEvent.click(
+      await screen.findByRole('button', { name: 'Built-in model' }),
+    );
+    fireEvent.click(screen.getByRole('option', { name: /Qwen3\.5 9B/ }));
     // No throw.
     await Promise.resolve();
   });
@@ -295,11 +299,12 @@ describe('ProvidersPane active hero', () => {
     expect(screen.getByRole('textbox', { name: 'Ollama URL' })).toHaveValue(
       'http://127.0.0.1:11434',
     );
-    const select = await screen.findByRole('combobox', {
+    const trigger = await screen.findByRole('button', {
       name: 'Active Ollama model',
     });
-    expect(select).toHaveValue('llama3.1:8b');
-    fireEvent.change(select, { target: { value: 'llama3.1:8b' } });
+    expect(trigger).toHaveTextContent('llama3.1:8b');
+    fireEvent.click(trigger);
+    fireEvent.click(screen.getByRole('option', { name: 'llama3.1:8b' }));
     await waitFor(() =>
       expect(invokeMock).toHaveBeenCalledWith('set_active_model', {
         model: 'llama3.1:8b',
@@ -322,10 +327,11 @@ describe('ProvidersPane active hero', () => {
     });
     const onSaved = vi.fn();
     renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]), { onSaved });
-    const select = await screen.findByRole('combobox', {
+    const trigger = await screen.findByRole('button', {
       name: 'Active Ollama model',
     });
-    fireEvent.change(select, { target: { value: 'llama3.2:3b' } });
+    fireEvent.click(trigger);
+    fireEvent.click(screen.getByRole('option', { name: 'llama3.2:3b' }));
     await waitFor(() =>
       expect(invokeMock).toHaveBeenCalledWith('set_active_model', {
         model: 'llama3.2:3b',
@@ -347,10 +353,11 @@ describe('ProvidersPane active hero', () => {
       set_active_model: new Error('nope'),
     });
     renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]), { onSaved });
-    const select = await screen.findByRole('combobox', {
+    const trigger = await screen.findByRole('button', {
       name: 'Active Ollama model',
     });
-    fireEvent.change(select, { target: { value: 'llama3.2:3b' } });
+    fireEvent.click(trigger);
+    fireEvent.click(screen.getByRole('option', { name: 'llama3.2:3b' }));
     await waitFor(() =>
       expect(invokeMock).toHaveBeenCalledWith('set_active_model', {
         model: 'llama3.2:3b',
@@ -898,7 +905,7 @@ describe('ProvidersPane robustness', () => {
     renderPane(
       makeConfig('builtin', [{ ...BUILTIN, model: INSTALLED[0].id }, OLLAMA]),
     );
-    await screen.findByRole('combobox', { name: 'Built-in model' });
+    await screen.findByRole('button', { name: 'Built-in model' });
     expect(screen.queryByText(/installed model/)).toBeNull();
   });
 
@@ -935,10 +942,10 @@ describe('ProvidersPane robustness', () => {
       },
     });
     renderPane(makeConfig('ollama', [BUILTIN, OLLAMA]));
-    const select = await screen.findByRole('combobox', {
+    const trigger = await screen.findByRole('button', {
       name: 'Active Ollama model',
     });
-    expect(select).toHaveValue('m1');
+    expect(trigger).toHaveTextContent('m1');
   });
 
   it('uses generic subtitles when provider URLs are empty', () => {
@@ -995,10 +1002,11 @@ describe('ProvidersPane robustness', () => {
     renderPane(
       makeConfig('builtin', [{ ...BUILTIN, model: noQuant.id }, OLLAMA]),
     );
-    const select = await screen.findByRole('combobox', {
+    const trigger = await screen.findByRole('button', {
       name: 'Built-in model',
     });
-    expect(select).toHaveValue(noQuant.id);
+    // With an empty quant the trigger shows the bare display name (no "· …").
+    expect(trigger.textContent).toBe('Qwen3.5 9B');
   });
 
   it('handles a config with no Ollama provider', () => {
diff --git a/src/settings/tabs/models/ProvidersPane.tsx b/src/settings/tabs/models/ProvidersPane.tsx
index 7c09c7d4..1706a5d6 100644
--- a/src/settings/tabs/models/ProvidersPane.tsx
+++ b/src/settings/tabs/models/ProvidersPane.tsx
@@ -24,9 +24,12 @@ import { SaveField } from '../../components/SaveField';
 import { OpenAiProviderCard, AddOpenAiProvider } from '../ProviderCards';
 import { useDebouncedSave } from '../../hooks/useDebouncedSave';
 import { useModelSelection } from '../../../hooks/useModelSelection';
+import { useModelCapabilities } from '../../../hooks/useModelCapabilities';
 import { isNonLocalUrl } from '../../../utils/isNonLocalUrl';
+import { formatContextWindow } from '../../../utils/contextWindow';
 import { configHelp } from '../../configHelpers';
 import { Tooltip } from '../../../components/Tooltip';
+import { ModelSelect, type ModelSelectItem } from './ModelSelect';
 import styles from '../../../styles/settings.module.css';
 import type { RawAppConfig, RawProvider } from '../../types';
 import type { EngineStatus, InstalledModel } from '../../../types/starter';
@@ -83,6 +86,11 @@ function posToCtx(pos: number): number {
 const CTX_TUNING_URL =
   'https://github.com/quiet-node/thuki/blob/main/docs/tuning-context-window.md#the-5-minute-benchmark-recipe';
 
+/** Bytes rendered as decimal gigabytes with one decimal (e.g. "8.2"). */
+function gb(bytes: number): string {
+  return (bytes / 1e9).toFixed(1);
+}
+
 /** One-line description shown under a provider's name. */
 function providerSubtitle(p: RawProvider): string {
   if (p.kind === 'builtin') return "Thuki's bundled llama.cpp engine";
@@ -225,6 +233,9 @@ export function ProvidersPane({
   const { activeModel, availableModels, setActiveModel, refreshModels } =
     useModelSelection();
 
+  // Per-model capabilities (vision/thinking) drive the built-in picker's pills.
+  const { capabilities } = useModelCapabilities();
+
   // The picker hook fetches once on mount; re-fetch whenever the active
   // provider changes so the hero's Model dropdown reflects the newly-active
   // provider's inventory instead of the previous provider's cached list.
@@ -355,6 +366,42 @@ export function ProvidersPane({
       .filter((name, i, all) => all.indexOf(name) !== i),
   );
 
+  // Built-in picker rows: name (quant-disambiguated only when a display name
+  // repeats), capability pills, a `size · context · maker · quant` sub-line, and
+  // a RAM-fit badge. Mirrors the Library pane's grammar so the two surfaces read
+  // the same.
+  const builtinItems: ModelSelectItem[] = installed.map((m) => {
+    const caps = capabilities[m.id];
+    const maker = m.origin || m.id.split(':')[0];
+    const totalBytes = m.size_bytes + (m.mmproj_bytes ?? 0);
+    const sub = [
+      `${gb(totalBytes)} GB`,
+      formatContextWindow(m.context_length ?? 0),
+      maker,
+      m.quant,
+    ]
+      .filter((part) => part !== '')
+      .join(' · ');
+    const quantSuffix =
+      duplicateDisplayNames.has(m.display_name) && m.quant !== ''
+        ? ` · ${m.quant}`
+        : '';
+    return {
+      id: m.id,
+      label: `${m.display_name}${quantSuffix}`,
+      sub,
+      vision: !!caps?.vision,
+      thinking: !!caps?.thinking,
+      fit: m.fit ?? null,
+    };
+  });
+
+  // Ollama exposes no capability metadata, so its rows fall back to the slug.
+  const ollamaItems: ModelSelectItem[] = availableModels.map((m) => ({
+    id: m,
+    label: m,
+  }));
+
   // Providers other than the active one, in a stable order.
   const otherProviders = providers.filter((p) => p.id !== activeId);
 
@@ -379,26 +426,13 @@ export function ProvidersPane({
           <div className={styles.heroModel}>
             <span className={styles.heroModelLabel}>Model</span>
             {installed.length > 0 ? (
-              <select
-                className={styles.dropdown}
-                aria-label="Built-in model"
+              <ModelSelect
                 value={builtinModelValue}
-                onChange={(e) => commitBuiltinModel(e.target.value)}
-              >
-                {builtinModelValue === '' ? (
-                  <option value="" disabled>
-                    Choose a model
-                  </option>
-                ) : null}
-                {installed.map((m) => (
-                  <option key={m.id} value={m.id}>
-                    {m.display_name}
-                    {duplicateDisplayNames.has(m.display_name) && m.quant !== ''
-                      ? ` · ${m.quant}`
-                      : ''}
-                  </option>
-                ))}
-              </select>
+                items={builtinItems}
+                onChange={commitBuiltinModel}
+                ariaLabel="Built-in model"
+                placeholder="Choose a model"
+              />
             ) : (
               <button
                 type="button"
@@ -448,18 +482,12 @@ export function ProvidersPane({
             <div className={styles.heroModel}>
               <span className={styles.heroModelLabel}>Model</span>
               {availableModels.length > 0 ? (
-                <select
-                  className={styles.dropdown}
-                  aria-label="Active Ollama model"
+                <ModelSelect
                   value={ollamaModelValue}
-                  onChange={(e) => commitOllamaModel(e.target.value)}
-                >
-                  {availableModels.map((m) => (
-                    <option key={m} value={m}>
-                      {m}
-                    </option>
-                  ))}
-                </select>
+                  items={ollamaItems}
+                  onChange={commitOllamaModel}
+                  ariaLabel="Active Ollama model"
+                />
               ) : (
                 <span className={styles.providerHint}>No models installed</span>
               )}
diff --git a/src/styles/settings.module.css b/src/styles/settings.module.css
index d9a47a05..9e1bc5d0 100644
--- a/src/styles/settings.module.css
+++ b/src/styles/settings.module.css
@@ -370,7 +370,6 @@
   padding-top: 13px;
   border-top: 1px solid var(--hair-soft);
 }
-.heroModel .dropdown,
 .heroModel .input {
   flex: 1;
   min-width: 0;

From 3bb13b0cad19587c9e77c14206c5bfff61db7593 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sun, 21 Jun 2026 15:03:49 -0500
Subject: [PATCH 81/89] fix(engine): surface llama-server load failures and
 flag unsupported models

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 docs/configurations.md                      |   3 +
 src-tauri/src/commands.rs                   | 150 ++++++++++++++++-
 src-tauri/src/config/defaults.rs            |  19 +++
 src-tauri/src/engine/process.rs             | 178 +++++++++++++++++++-
 src-tauri/src/engine/runner.rs              |  77 ++++++++-
 src-tauri/src/warmup.rs                     |  10 ++
 src/components/ErrorCard.tsx                |   3 +
 src/components/__tests__/ErrorCard.test.tsx |  34 ++++
 src/hooks/useModel.ts                       |   1 +
 9 files changed, 457 insertions(+), 18 deletions(-)

diff --git a/docs/configurations.md b/docs/configurations.md
index 611345d4..f47dca05 100644
--- a/docs/configurations.md
+++ b/docs/configurations.md
@@ -185,6 +185,9 @@ The table below also lists the baked-in safety limits that govern Thuki's commun
 | `DEFAULT_BUILTIN_IDLE_MINUTES`              | `5 min`  | No       | The fixed translation of the `keep_warm_inactivity_minutes = 0` sentinel for the built-in engine, not a separate preference. The built-in engine has no external daemon to defer to, so `0` ("use the provider's natural short default") resolves to this value. Users who want a different timeout set `keep_warm_inactivity_minutes` directly (`N` minutes, or `-1` for forever). | —      | The idle window the built-in engine applies when `keep_warm_inactivity_minutes` is `0`. After this many minutes of inactivity the sidecar is stopped to free RAM. |
 | `ENGINE_HEALTH_PROBE_TIMEOUT_SECS`          | `5 s`    | No       | Internal lifecycle contract between the runner and the engine process. A wedged-but-connected server must not park the poll loop forever; loopback probes are normally instant so 5 s is generous. The poll interval and deadline are the user-facing knobs. | —      | How long a single `/health` GET is allowed to take inside the startup poll loop. If the engine has accepted the TCP connection but stopped responding, this timeout causes the probe to return an error (treated as Wait and retried after `ENGINE_HEALTH_POLL_INTERVAL_MS`). |
 | `ENGINE_COMMAND_QUEUE_CAPACITY`             | `64`     | No       | Bounds memory under command bursts; 64 slots is ample for all UI-driven traffic (Ensure, Touch, SetIdleMinutes, Shutdown) under any realistic usage pattern. | —      | Capacity of the bounded `mpsc` channel that carries commands from `EngineHandle` to the runner actor task. Back-pressure from a full queue is not observable in normal use. |
+| `ENGINE_STDERR_TAIL_LINES`                  | `20`     | No       | Defense-in-depth bound on captured subprocess output: 20 lines cover the load-error block `llama-server` prints on exit without retaining its whole log. | —      | Number of trailing `llama-server` stderr lines the runner keeps so a crash can report the engine's own reason (e.g. `unknown model architecture`) instead of a generic message. |
+| `ENGINE_STDERR_TAIL_LINE_MAX_BYTES`         | `500`    | No       | Defense-in-depth bound on attacker-influenced data: a single pathological newline-less stderr line (e.g. an enormous architecture string echoed from crafted GGUF metadata) is capped during the read, so neither peak read buffering nor the retained tail can grow without limit. | —      | Maximum bytes buffered and retained per captured engine stderr line. |
+| `ENGINE_CRASH_FALLBACK_MESSAGE`             | `"engine process exited unexpectedly"` | No | Internal diagnostic fallback surfaced only when the real reason is unavailable; not meaningful to expose. | n/a | Reason reported when the built-in engine process exits without leaving any stderr to capture (e.g. an external `SIGKILL`). |
 | `DOWNLOAD_PROGRESS_MIN_INTERVAL_MS`         | `500 ms` | No       | Pure IPC hygiene: a fast local connection can deliver thousands of chunks per second and the UI only needs a few updates per second, so throttling below the UI refresh rate is invisible to the user. | —      | Minimum interval between `Progress` events emitted while a model file downloads. An update is also emitted whenever at least 1% of the file has arrived since the last one, whichever comes first, and a final 100% update always precedes verification. |
 | `BLOB_HASH_BUFFER_BYTES`                     | `4 MiB`  | No       | Internal I/O buffer with no user-visible effect beyond verify speed. A few-MB buffer turns hashing a multi-GB blob into a few hundred reads instead of hundreds of thousands. | —      | Read-buffer size for streaming a downloaded blob through SHA-256 during verification. The common path hashes bytes as they download, so this applies only to a full-length partial left from a prior run or a resumed download's on-disk prefix. |
 | `MAX_HF_API_BODY_BYTES`                     | `4 MiB`  | No       | Defense-in-depth bound on attacker-controlled data from a remote service, mirroring `MAX_OLLAMA_TAGS_BODY_BYTES`. | —      | The largest Hugging Face API response body (repo file listings) Thuki will accept while resolving a model to download. Larger responses are rejected mid-stream and the request returns an error. |
diff --git a/src-tauri/src/commands.rs b/src-tauri/src/commands.rs
index eda7ca80..b27689c7 100644
--- a/src-tauri/src/commands.rs
+++ b/src-tauri/src/commands.rs
@@ -96,6 +96,11 @@ pub enum EngineErrorKind {
     /// The bundled engine's sidecar process failed to launch or crashed before
     /// passing its health check.
     EngineStartFailed,
+    /// The selected model's architecture is not supported by the bundled
+    /// engine build, so `llama-server` refused to load it. A setup nudge (try
+    /// another model), not a crash: the frontend renders it with the amber
+    /// warning accent rather than the red failure accent.
+    ModelUnsupported,
     /// The requested model has not been pulled yet (HTTP 404).
     ModelNotFound,
     /// No active model has been selected. The user must pick a model from
@@ -259,6 +264,64 @@ async fn fetch_builtin_vision(client: &reqwest::Client, base_url: &str) -> bool
     }
 }
 
+/// Condenses a multi-line engine failure detail into the single most
+/// informative line for the error subtitle (which renders as one paragraph).
+/// The captured stderr tail can be many timestamped lines, so this prefers the
+/// FIRST line mentioning an error or failure: llama.cpp prints the specific
+/// root cause first ("error loading model: <reason>") then generic trailers
+/// ("failed to load", "exiting due to model loading error"), so the first
+/// match is the actionable one. Falls back to the last non-empty line; a
+/// single-line detail (e.g. a health-check message) is returned unchanged.
+/// Classification upstream still sees the full detail.
+fn concise_detail(detail: &str) -> String {
+    let lines: Vec<&str> = detail
+        .lines()
+        .map(str::trim)
+        .filter(|line| !line.is_empty())
+        .collect();
+    match lines.as_slice() {
+        [] => detail.trim().to_string(),
+        [single] => (*single).to_string(),
+        many => many
+            .iter()
+            .find(|line| {
+                let lower = line.to_ascii_lowercase();
+                lower.contains("error") || lower.contains("failed")
+            })
+            .copied()
+            .unwrap_or(many[many.len() - 1])
+            .to_string(),
+    }
+}
+
+/// Maps a built-in engine start failure (the engine's own captured stderr, or
+/// a health-check message) onto a user-facing [`EngineError`]. A llama.cpp
+/// "unknown model architecture" failure means the bundled engine cannot run
+/// this model, so it becomes a `ModelUnsupported` nudge to pick another model;
+/// every other failure surfaces the concise reason under the generic
+/// engine-start title so OOM, context-size, and projector mismatches stay
+/// actionable.
+///
+/// Pure so the classification and exact copy are unit-tested without a Tauri
+/// runtime. Shared by `stream_builtin_chat` and `resolve_llm_transport`.
+pub fn engine_start_error(detail: &str) -> EngineError {
+    let lower = detail.to_ascii_lowercase();
+    if lower.contains("unknown model architecture") || lower.contains("unknown architecture") {
+        EngineError {
+            kind: EngineErrorKind::ModelUnsupported,
+            message: "Unsupported model\nThuki's engine doesn't support this arch yet. Try another model. Engine improves over time and may support it down the road.".to_string(),
+        }
+    } else {
+        EngineError {
+            kind: EngineErrorKind::EngineStartFailed,
+            message: format!(
+                "Thuki's engine could not start.\n{}",
+                concise_detail(detail)
+            ),
+        }
+    }
+}
+
 /// Runs the built-in-engine stage of a chat turn: mark activity, ensure the
 /// engine serves `target`, then stream via the `/v1` client at the engine's
 /// port. An engine activity guard is held for the whole turn (ensure,
@@ -341,10 +404,7 @@ pub(crate) async fn stream_builtin_chat(
             String::new()
         }
         Some(Err(crate::engine::runner::EnsureError::StartFailed(detail))) => {
-            on_chunk(StreamChunk::Error(EngineError {
-                kind: EngineErrorKind::EngineStartFailed,
-                message: format!("Thuki's engine could not start.\n{detail}"),
-            }));
+            on_chunk(StreamChunk::Error(engine_start_error(&detail)));
             String::new()
         }
     }
@@ -588,10 +648,7 @@ pub(crate) async fn resolve_llm_transport(
                     Err(TransportError::Superseded)
                 }
                 Some(Err(crate::engine::runner::EnsureError::StartFailed(detail))) => {
-                    Err(TransportError::Engine(EngineError {
-                        kind: EngineErrorKind::EngineStartFailed,
-                        message: format!("Thuki's engine could not start.\n{detail}"),
-                    }))
+                    Err(TransportError::Engine(engine_start_error(&detail)))
                 }
             }
         }
@@ -1420,6 +1477,73 @@ mod tests {
         )
     }
 
+    #[test]
+    fn engine_start_error_unknown_architecture_is_model_unsupported() {
+        let err = engine_start_error(
+            "error loading model: unknown model architecture: 'deepseek4_mtp_support'",
+        );
+        assert_eq!(err.kind, EngineErrorKind::ModelUnsupported);
+        assert_eq!(
+            err.message,
+            "Unsupported model\nThuki's engine doesn't support this arch yet. Try another model. Engine improves over time and may support it down the road."
+        );
+    }
+
+    #[test]
+    fn engine_start_error_matches_short_unknown_architecture_phrasing() {
+        assert_eq!(
+            engine_start_error("llama_model_load: unknown architecture").kind,
+            EngineErrorKind::ModelUnsupported
+        );
+    }
+
+    #[test]
+    fn engine_start_error_other_failures_surface_raw_reason() {
+        let err = engine_start_error("engine health check returned HTTP 500");
+        assert_eq!(err.kind, EngineErrorKind::EngineStartFailed);
+        assert_eq!(
+            err.message,
+            "Thuki's engine could not start.\nengine health check returned HTTP 500"
+        );
+    }
+
+    #[test]
+    fn engine_start_error_condenses_a_multiline_non_arch_tail() {
+        let detail = "0.06 I log_info: loading\n0.06 E common_init: error loading model: out of memory\n0.06 I srv exiting";
+        let err = engine_start_error(detail);
+        assert_eq!(err.kind, EngineErrorKind::EngineStartFailed);
+        assert_eq!(
+            err.message,
+            "Thuki's engine could not start.\n0.06 E common_init: error loading model: out of memory"
+        );
+    }
+
+    #[test]
+    fn concise_detail_returns_a_single_line_unchanged() {
+        assert_eq!(
+            concise_detail("engine did not become healthy before the deadline"),
+            "engine did not become healthy before the deadline"
+        );
+    }
+
+    #[test]
+    fn concise_detail_falls_back_to_the_last_line_without_an_error_marker() {
+        assert_eq!(concise_detail("first\nsecond\nthird"), "third");
+    }
+
+    #[test]
+    fn concise_detail_prefers_the_first_error_line_over_a_generic_trailer() {
+        // llama.cpp prints the root cause first, then generic "exiting due to
+        // ... error" trailers: the first match must win.
+        let tail = "I loading model\nE error loading model: out of memory\nE failed to load model\nE exiting due to model loading error";
+        assert_eq!(concise_detail(tail), "E error loading model: out of memory");
+    }
+
+    #[test]
+    fn concise_detail_empty_detail_is_empty() {
+        assert_eq!(concise_detail("  \n  "), "");
+    }
+
     #[tokio::test]
     async fn streams_tokens_from_valid_response() {
         let mut server = mockito::Server::new_async().await;
@@ -3379,6 +3503,16 @@ mod tests {
         async fn kill(&mut self) {
             let _ = self.exit_tx.send(true);
         }
+        fn stderr_tail(&self) -> String {
+            String::new()
+        }
+    }
+
+    #[test]
+    fn scripted_child_has_no_stderr_tail() {
+        let (exit_tx, exit_rx) = tokio::sync::watch::channel(false);
+        let child = ScriptedChild { exit_tx, exit_rx };
+        assert_eq!(crate::engine::process::EngineChild::stderr_tail(&child), "");
     }
 
     #[async_trait::async_trait]
diff --git a/src-tauri/src/config/defaults.rs b/src-tauri/src/config/defaults.rs
index db09d3ec..d3f3bc84 100644
--- a/src-tauri/src/config/defaults.rs
+++ b/src-tauri/src/config/defaults.rs
@@ -129,6 +129,25 @@ pub const ENGINE_IDLE_CHECK_INTERVAL_SECS: u64 = 30;
 /// normal use.
 pub const ENGINE_COMMAND_QUEUE_CAPACITY: usize = 64;
 
+/// Number of trailing `llama-server` stderr lines the runner retains so a
+/// crash can report the engine's own reason (e.g. "unknown model
+/// architecture") instead of a generic message. Not user-tunable:
+/// defense-in-depth bound on subprocess output; 20 lines covers the final
+/// load-error block llama.cpp prints without retaining its whole log.
+pub const ENGINE_STDERR_TAIL_LINES: usize = 20;
+
+/// Maximum bytes buffered (and retained) per captured engine stderr line. Not
+/// user-tunable: defense-in-depth bound so one pathological newline-less line
+/// (e.g. an enormous architecture string echoed from crafted GGUF metadata)
+/// cannot force an unbounded read allocation; bytes past the cap are dropped.
+pub const ENGINE_STDERR_TAIL_LINE_MAX_BYTES: usize = 500;
+
+/// Reason reported when the built-in engine process exits without leaving any
+/// stderr we could capture (e.g. an external SIGKILL). Not user-tunable:
+/// internal diagnostic fallback surfaced only when the real reason is
+/// unavailable.
+pub const ENGINE_CRASH_FALLBACK_MESSAGE: &str = "engine process exited unexpectedly";
+
 /// Minimum interval between Progress events emitted during a model download.
 /// Bounds IPC channel traffic: a fast local connection can deliver thousands
 /// of chunks per second and the UI only needs a few updates per second. Not
diff --git a/src-tauri/src/engine/process.rs b/src-tauri/src/engine/process.rs
index 5c83b904..c5c1432f 100644
--- a/src-tauri/src/engine/process.rs
+++ b/src-tauri/src/engine/process.rs
@@ -8,13 +8,17 @@
 //! poll loop, command-line construction) lives in pure functions tested
 //! directly.
 
+use std::collections::VecDeque;
 use std::future::Future;
 use std::path::PathBuf;
+use std::sync::{Arc, Mutex};
 use std::time::Duration;
 
 use async_trait::async_trait;
 
-use crate::config::defaults::ENGINE_HEALTH_PROBE_TIMEOUT_SECS;
+use crate::config::defaults::{
+    ENGINE_HEALTH_PROBE_TIMEOUT_SECS, ENGINE_STDERR_TAIL_LINES, ENGINE_STDERR_TAIL_LINE_MAX_BYTES,
+};
 
 /// Everything needed to launch one engine process.
 #[derive(Debug, Clone, PartialEq)]
@@ -36,6 +40,12 @@ pub trait EngineChild: Send {
     async fn wait_exit(&mut self);
     /// Kills the process and waits for the exit to land.
     async fn kill(&mut self);
+    /// The captured tail of the process's stderr, ready to read once
+    /// `wait_exit` has resolved (which drains the stream to EOF). The runner
+    /// surfaces this as the crash reason so an engine load failure reports the
+    /// engine's own message instead of a generic string. Empty when the
+    /// process left no stderr.
+    fn stderr_tail(&self) -> String;
 }
 
 /// Spawn-and-probe seam between the runner actor and the operating system.
@@ -143,9 +153,82 @@ fn llama_server_args(args: &SpawnArgs) -> Vec<std::ffi::OsString> {
     argv
 }
 
-/// A spawned `llama-server` process.
+/// Pure: turns one captured line's raw bytes into a stored tail line. Lossy
+/// UTF-8 so invalid bytes from a corrupt stream never panic, with trailing
+/// whitespace (e.g. a `\r` from CRLF) trimmed.
+fn finalize_stderr_line(bytes: &[u8]) -> String {
+    String::from_utf8_lossy(bytes).trim_end().to_string()
+}
+
+/// Pure: feeds one chunk of stderr bytes through the bounded line accumulator.
+/// `current` carries the in-progress line across chunk boundaries; on each
+/// `\n` the completed line is finalized and pushed into the tail ring. Bytes
+/// past `ENGINE_STDERR_TAIL_LINE_MAX_BYTES` on a single line are dropped, so
+/// peak per-line memory stays bounded regardless of how rarely the stream
+/// emits a newline (a hard cap on read buffering, not just retained memory).
+fn ingest_stderr_chunk(chunk: &[u8], current: &mut Vec<u8>, tail: &mut VecDeque<String>) {
+    for &byte in chunk {
+        if byte == b'\n' {
+            push_stderr_line(
+                tail,
+                finalize_stderr_line(current),
+                ENGINE_STDERR_TAIL_LINES,
+            );
+            current.clear();
+        } else if current.len() < ENGINE_STDERR_TAIL_LINE_MAX_BYTES {
+            current.push(byte);
+        }
+    }
+}
+
+/// Pure: appends a captured line to the bounded tail ring, dropping the oldest
+/// line once `max_lines` is exceeded so only the trailing window is kept.
+fn push_stderr_line(buf: &mut VecDeque<String>, line: String, max_lines: usize) {
+    buf.push_back(line);
+    while buf.len() > max_lines {
+        buf.pop_front();
+    }
+}
+
+/// Pure: joins the retained tail lines into one newline-separated string.
+fn join_stderr_tail(buf: &VecDeque<String>) -> String {
+    buf.iter().cloned().collect::<Vec<_>>().join("\n")
+}
+
+/// Drains a child's stderr pipe into the bounded tail ring until EOF. Reads in
+/// fixed-size chunks (not unbounded lines) and delegates all splitting and
+/// bounding to [`ingest_stderr_chunk`], so a stream that never emits a newline
+/// cannot force an unbounded allocation. Coverage-off: thin I/O over the tested
+/// ingester; a trailing newline-less line (e.g. a process killed mid-line) is
+/// flushed after EOF.
+#[cfg_attr(coverage_nightly, coverage(off))]
+async fn pump_stderr(pipe: tokio::process::ChildStderr, tail: Arc<Mutex<VecDeque<String>>>) {
+    use tokio::io::AsyncReadExt;
+    let mut reader = tokio::io::BufReader::new(pipe);
+    let mut chunk = [0u8; 4096];
+    let mut current: Vec<u8> = Vec::new();
+    loop {
+        match reader.read(&mut chunk).await {
+            Ok(0) | Err(_) => break,
+            Ok(n) => ingest_stderr_chunk(&chunk[..n], &mut current, &mut tail.lock().unwrap()),
+        }
+    }
+    if !current.is_empty() {
+        push_stderr_line(
+            &mut tail.lock().unwrap(),
+            finalize_stderr_line(&current),
+            ENGINE_STDERR_TAIL_LINES,
+        );
+    }
+}
+
+/// A spawned `llama-server` process. `stderr_tail` is the shared bounded ring
+/// the reader task fills; the reader handle is joined on exit so the tail is
+/// complete before the runner reads it.
 struct TokioChild {
     inner: tokio::process::Child,
+    stderr_tail: Arc<Mutex<VecDeque<String>>>,
+    reader: Option<tokio::task::JoinHandle<()>>,
 }
 
 #[async_trait]
@@ -153,12 +236,26 @@ impl EngineChild for TokioChild {
     #[cfg_attr(coverage_nightly, coverage(off))]
     async fn wait_exit(&mut self) {
         let _ = self.inner.wait().await;
+        // Join the reader so the stderr tail is fully drained to EOF (which
+        // coincides with the pipe closing at process exit) before the runner
+        // reads the crash reason.
+        if let Some(reader) = self.reader.take() {
+            let _ = reader.await;
+        }
     }
 
     #[cfg_attr(coverage_nightly, coverage(off))]
     async fn kill(&mut self) {
         let _ = self.inner.start_kill();
         let _ = self.inner.wait().await;
+        if let Some(reader) = self.reader.take() {
+            let _ = reader.await;
+        }
+    }
+
+    #[cfg_attr(coverage_nightly, coverage(off))]
+    fn stderr_tail(&self) -> String {
+        join_stderr_tail(&self.stderr_tail.lock().unwrap())
     }
 }
 
@@ -166,14 +263,29 @@ impl EngineChild for TokioChild {
 impl EngineProcess for TokioEngineProcess {
     #[cfg_attr(coverage_nightly, coverage(off))]
     async fn spawn(&self, args: &SpawnArgs) -> Result<Box<dyn EngineChild>, String> {
-        let child = tokio::process::Command::new(&self.binary)
+        let mut child = tokio::process::Command::new(&self.binary)
             .args(llama_server_args(args))
             .stdout(std::process::Stdio::null())
-            .stderr(std::process::Stdio::null())
+            // Capture stderr so a load failure (e.g. "unknown model
+            // architecture") reaches the user instead of being discarded.
+            .stderr(std::process::Stdio::piped())
             .kill_on_drop(true)
             .spawn()
             .map_err(|e| e.to_string())?;
-        Ok(Box::new(TokioChild { inner: child }))
+
+        let stderr_tail = Arc::new(Mutex::new(VecDeque::new()));
+        // Drain stderr into the bounded tail ring. The task ends when the pipe
+        // closes at process exit; `wait_exit`/`kill` join it.
+        let reader = child
+            .stderr
+            .take()
+            .map(|pipe| tokio::spawn(pump_stderr(pipe, Arc::clone(&stderr_tail))));
+
+        Ok(Box::new(TokioChild {
+            inner: child,
+            stderr_tail,
+            reader,
+        }))
     }
 
     #[cfg_attr(coverage_nightly, coverage(off))]
@@ -229,6 +341,62 @@ mod tests {
         );
     }
 
+    #[test]
+    fn finalize_stderr_line_is_lossy_and_trims_trailing() {
+        assert_eq!(finalize_stderr_line(b"hello"), "hello");
+        // Trailing CR (CRLF) and spaces are trimmed.
+        assert_eq!(finalize_stderr_line(b"hello\r"), "hello");
+        // Invalid UTF-8 never panics; it becomes the replacement char.
+        assert_eq!(finalize_stderr_line(&[b'h', b'i', 0xFF]), "hi\u{FFFD}");
+    }
+
+    #[test]
+    fn ingest_stderr_chunk_splits_on_newlines_and_carries_across_chunks() {
+        let mut tail = VecDeque::new();
+        let mut current = Vec::new();
+        // No newline yet: nothing pushed, line held in `current`.
+        ingest_stderr_chunk(b"ab", &mut current, &mut tail);
+        assert!(tail.is_empty());
+        // Completes "abc", then starts "d".
+        ingest_stderr_chunk(b"c\nd", &mut current, &mut tail);
+        assert_eq!(tail.iter().cloned().collect::<Vec<_>>(), vec!["abc"]);
+        ingest_stderr_chunk(b"\n", &mut current, &mut tail);
+        assert_eq!(tail.iter().cloned().collect::<Vec<_>>(), vec!["abc", "d"]);
+    }
+
+    #[test]
+    fn ingest_stderr_chunk_caps_an_overlong_newlineless_line() {
+        let mut tail = VecDeque::new();
+        let mut current = Vec::new();
+        // A flood longer than the per-line cap, with no newline, must not grow
+        // `current` past the cap: peak read buffering is bounded.
+        let flood = vec![b'x'; ENGINE_STDERR_TAIL_LINE_MAX_BYTES + 100];
+        ingest_stderr_chunk(&flood, &mut current, &mut tail);
+        assert_eq!(current.len(), ENGINE_STDERR_TAIL_LINE_MAX_BYTES);
+        assert!(tail.is_empty());
+        ingest_stderr_chunk(b"\n", &mut current, &mut tail);
+        assert_eq!(tail.len(), 1);
+        assert_eq!(tail[0].len(), ENGINE_STDERR_TAIL_LINE_MAX_BYTES);
+    }
+
+    #[test]
+    fn push_stderr_line_keeps_only_the_trailing_window() {
+        let mut buf = VecDeque::new();
+        push_stderr_line(&mut buf, "a".to_string(), 2);
+        push_stderr_line(&mut buf, "b".to_string(), 2);
+        push_stderr_line(&mut buf, "c".to_string(), 2);
+        assert_eq!(buf.iter().cloned().collect::<Vec<_>>(), vec!["b", "c"]);
+    }
+
+    #[test]
+    fn join_stderr_tail_newline_joins_in_order() {
+        let mut buf = VecDeque::new();
+        assert_eq!(join_stderr_tail(&buf), "");
+        push_stderr_line(&mut buf, "first".to_string(), 8);
+        push_stderr_line(&mut buf, "second".to_string(), 8);
+        assert_eq!(join_stderr_tail(&buf), "first\nsecond");
+    }
+
     #[test]
     fn llama_server_args_with_mmproj() {
         assert_eq!(
diff --git a/src-tauri/src/engine/runner.rs b/src-tauri/src/engine/runner.rs
index e8ea05f4..c97d0e92 100644
--- a/src-tauri/src/engine/runner.rs
+++ b/src-tauri/src/engine/runner.rs
@@ -22,7 +22,8 @@ use tokio::sync::{mpsc, oneshot, watch};
 use super::process::{poll_until_healthy, EngineChild, EngineProcess, SpawnArgs};
 use super::state::{step, Effect, EngineState, Event, Target};
 use crate::config::defaults::{
-    ENGINE_COMMAND_QUEUE_CAPACITY, ENGINE_HEALTH_DEADLINE_SECS, ENGINE_HEALTH_POLL_INTERVAL_MS,
+    ENGINE_COMMAND_QUEUE_CAPACITY, ENGINE_CRASH_FALLBACK_MESSAGE, ENGINE_HEALTH_DEADLINE_SECS,
+    ENGINE_HEALTH_POLL_INTERVAL_MS,
 };
 
 /// Snapshot of the engine lifecycle published through the status watch.
@@ -340,6 +341,18 @@ impl Core {
     }
 }
 
+/// Pure: the crash reason for a `ChildCrashed` event. Uses the engine's
+/// captured stderr tail when it carries anything, otherwise the generic
+/// fallback (an external SIGKILL leaves no stderr to report).
+fn crash_reason(stderr_tail: &str) -> String {
+    let trimmed = stderr_tail.trim();
+    if trimmed.is_empty() {
+        ENGINE_CRASH_FALLBACK_MESSAGE.to_string()
+    } else {
+        trimmed.to_string()
+    }
+}
+
 /// What woke the actor loop.
 enum Wake {
     Cmd(Option<Command>),
@@ -436,12 +449,19 @@ async fn run_actor(
                 core.dispatch(Event::SpawnFailed(error)).await;
             }
             Wake::ChildExit => {
+                // Read the captured stderr tail before dropping the child so
+                // the crash reports the engine's own message (e.g. "unknown
+                // model architecture") instead of a generic string.
+                let reason = crash_reason(
+                    &core
+                        .child
+                        .as_ref()
+                        .map(|child| child.stderr_tail())
+                        .unwrap_or_default(),
+                );
                 core.child = None;
                 core.health = None;
-                core.dispatch(Event::ChildCrashed(
-                    "engine process exited unexpectedly".to_string(),
-                ))
-                .await;
+                core.dispatch(Event::ChildCrashed(reason)).await;
             }
             Wake::Tick => {
                 if in_flight.load(Ordering::SeqCst) > 0 {
@@ -485,6 +505,9 @@ mod tests {
         probes_served: usize,
         log: Vec<String>,
         current_exit: Option<Arc<watch::Sender<bool>>>,
+        /// Stderr tail handed to the next spawned child, mirroring the real
+        /// process's captured stderr.
+        next_stderr_tail: String,
     }
 
     /// Scriptable [`EngineProcess`]: records every spawn, hands out
@@ -524,6 +547,11 @@ mod tests {
                 .push_back(message.to_string());
         }
 
+        /// Sets the stderr tail the next spawned child reports on exit.
+        fn push_stderr_tail(&self, tail: &str) {
+            self.inner.lock().unwrap().next_stderr_tail = tail.to_string();
+        }
+
         /// Makes the live child exit without a kill being issued.
         fn crash_current(&self) {
             let exit = {
@@ -544,6 +572,7 @@ mod tests {
         inner: Arc<Mutex<FakeInner>>,
         exit_tx: Arc<watch::Sender<bool>>,
         exit_rx: watch::Receiver<bool>,
+        stderr_tail: String,
     }
 
     #[async_trait::async_trait]
@@ -552,6 +581,10 @@ mod tests {
             let _ = self.exit_rx.wait_for(|exited| *exited).await;
         }
 
+        fn stderr_tail(&self) -> String {
+            self.stderr_tail.clone()
+        }
+
         async fn kill(&mut self) {
             {
                 let mut inner = self.inner.lock().unwrap();
@@ -580,10 +613,12 @@ mod tests {
             let (exit_tx, exit_rx) = watch::channel(false);
             let exit_tx = Arc::new(exit_tx);
             inner.current_exit = Some(Arc::clone(&exit_tx));
+            let stderr_tail = std::mem::take(&mut inner.next_stderr_tail);
             Ok(Box::new(FakeChild {
                 inner: Arc::clone(&self.inner),
                 exit_tx,
                 exit_rx,
+                stderr_tail,
             }))
         }
 
@@ -1016,6 +1051,38 @@ mod tests {
         assert_eq!(process.snapshot(|i| i.kills), 0);
     }
 
+    #[test]
+    fn crash_reason_prefers_stderr_tail_over_fallback() {
+        assert_eq!(crash_reason(""), ENGINE_CRASH_FALLBACK_MESSAGE);
+        assert_eq!(crash_reason("   \n  "), ENGINE_CRASH_FALLBACK_MESSAGE);
+        assert_eq!(
+            crash_reason("error loading model: unknown model architecture: 'x'\n"),
+            "error loading model: unknown model architecture: 'x'"
+        );
+    }
+
+    /// A crash surfaces the engine's captured stderr as the failure reason, so
+    /// an unsupported-architecture load failure reaches the user verbatim
+    /// instead of collapsing to the generic exit message.
+    #[tokio::test(start_paused = true)]
+    async fn crash_surfaces_captured_stderr_reason() {
+        let process = FakeProcess::new();
+        let handle = spawn_handle(&process, 0);
+
+        process.push_stderr_tail(
+            "error loading model: unknown model architecture: 'deepseek4_mtp_support'",
+        );
+        load(&handle, &process, "a").await;
+        process.crash_current();
+
+        let mut rx = handle.status();
+        wait_for_state(&mut rx, "failed").await;
+        assert_eq!(
+            rx.borrow().error.as_deref(),
+            Some("error loading model: unknown model architecture: 'deepseek4_mtp_support'")
+        );
+    }
+
     // ── Runner: idle unload ────────────────────────────────────────────
 
     #[tokio::test(start_paused = true)]
diff --git a/src-tauri/src/warmup.rs b/src-tauri/src/warmup.rs
index 1dc89c0c..9857b174 100644
--- a/src-tauri/src/warmup.rs
+++ b/src-tauri/src/warmup.rs
@@ -1835,6 +1835,16 @@ mod tests {
         async fn kill(&mut self) {
             let _ = self.exit_tx.send(true);
         }
+        fn stderr_tail(&self) -> String {
+            String::new()
+        }
+    }
+
+    #[test]
+    fn instant_child_has_no_stderr_tail() {
+        let (exit_tx, exit_rx) = tokio::sync::watch::channel(false);
+        let child = InstantChild { exit_tx, exit_rx };
+        assert_eq!(crate::engine::process::EngineChild::stderr_tail(&child), "");
     }
 
     #[async_trait::async_trait]
diff --git a/src/components/ErrorCard.tsx b/src/components/ErrorCard.tsx
index 9deba6ad..e99ae7ad 100644
--- a/src/components/ErrorCard.tsx
+++ b/src/components/ErrorCard.tsx
@@ -9,6 +9,9 @@ const barColors: Record<EngineErrorKind, string> = {
   EngineUnreachable: '#ef4444',
   // Same red as EngineUnreachable: a sidecar crash is equally severe.
   EngineStartFailed: '#ef4444',
+  // Amber, not red: an unsupported model architecture is a "pick another
+  // model" nudge, not an engine crash, so it shares the warning hue.
+  ModelUnsupported: '#f59e0b',
   ModelNotFound: '#f59e0b',
   // Same accent as ModelNotFound: this is a configuration/setup nudge,
   // not a daemon failure, so the warning hue (amber) is the right read.
diff --git a/src/components/__tests__/ErrorCard.test.tsx b/src/components/__tests__/ErrorCard.test.tsx
index 18a54df2..8652ec09 100644
--- a/src/components/__tests__/ErrorCard.test.tsx
+++ b/src/components/__tests__/ErrorCard.test.tsx
@@ -69,6 +69,40 @@ describe('ErrorCard', () => {
     expect(bar?.getAttribute('data-kind')).toBe('ModelNotFound');
   });
 
+  it('applies amber accent bar for ModelUnsupported', () => {
+    const { container } = render(
+      <ErrorCard
+        kind="ModelUnsupported"
+        message={
+          "Unsupported model\nThuki's engine doesn't support this arch yet."
+        }
+      />,
+    );
+    const bar = container.querySelector('[data-error-bar]');
+    expect(bar?.getAttribute('data-kind')).toBe('ModelUnsupported');
+    // JSDOM normalizes hex to rgb; assert the amber family (same as ModelNotFound).
+    expect((bar as HTMLElement | null)?.style.background).toBe(
+      'rgb(245, 158, 11)',
+    );
+  });
+
+  it('renders the ModelUnsupported copy (title and subtitle)', () => {
+    render(
+      <ErrorCard
+        kind="ModelUnsupported"
+        message={
+          "Unsupported model\nThuki's engine doesn't support this arch yet. Try another model. Engine improves over time and may support it down the road."
+        }
+      />,
+    );
+    expect(screen.getByText('Unsupported model')).toBeInTheDocument();
+    expect(
+      screen.getByText(
+        "Thuki's engine doesn't support this arch yet. Try another model. Engine improves over time and may support it down the road.",
+      ),
+    ).toBeInTheDocument();
+  });
+
   it('applies neutral accent bar for Other', () => {
     const { container } = render(
       <ErrorCard kind="Other" message={'Something went wrong\nHTTP 500'} />,
diff --git a/src/hooks/useModel.ts b/src/hooks/useModel.ts
index 266e9b86..06ccf2ef 100644
--- a/src/hooks/useModel.ts
+++ b/src/hooks/useModel.ts
@@ -13,6 +13,7 @@ import type {
 export type EngineErrorKind =
   | 'EngineUnreachable'
   | 'EngineStartFailed'
+  | 'ModelUnsupported'
   | 'ModelNotFound'
   | 'NoModelSelected'
   | 'Other';

From 6724386c7dbe57a3134d1c418ba6a02f592dc962 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sun, 21 Jun 2026 15:03:54 -0500
Subject: [PATCH 82/89] feat(settings): caution notice and per-download confirm
 in Browse all

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/components/DownloadProgress.tsx           | 14 ++--
 .../tabs/models/BrowseAllPane.module.css      | 29 +++++--
 .../tabs/models/BrowseAllPane.test.tsx        | 78 ++++++++++++++++---
 src/settings/tabs/models/BrowseAllPane.tsx    | 40 +++++++++-
 .../tabs/models/DiscoverPane.test.tsx         |  2 +
 .../tabs/models/DownloadRiskConfirm.test.tsx  | 30 +++++++
 .../tabs/models/DownloadRiskConfirm.tsx       | 44 +++++++++++
 7 files changed, 210 insertions(+), 27 deletions(-)
 create mode 100644 src/settings/tabs/models/DownloadRiskConfirm.test.tsx
 create mode 100644 src/settings/tabs/models/DownloadRiskConfirm.tsx

diff --git a/src/components/DownloadProgress.tsx b/src/components/DownloadProgress.tsx
index 835e1e23..509b5b14 100644
--- a/src/components/DownloadProgress.tsx
+++ b/src/components/DownloadProgress.tsx
@@ -275,7 +275,7 @@ export function DownloadProgress({
   }
 }
 
-function Card({ children }: { children: React.ReactNode }) {
+export function Card({ children }: { children: React.ReactNode }) {
   return (
     <div
       data-download-progress
@@ -294,7 +294,7 @@ function Card({ children }: { children: React.ReactNode }) {
   );
 }
 
-function Headline({ children }: { children: React.ReactNode }) {
+export function Headline({ children }: { children: React.ReactNode }) {
   return (
     <p
       style={{
@@ -311,7 +311,7 @@ function Headline({ children }: { children: React.ReactNode }) {
   );
 }
 
-function Detail({
+export function Detail({
   children,
   warn = false,
 }: {
@@ -503,7 +503,11 @@ interface FlowButtonProps {
   primary?: boolean;
 }
 
-function FlowButton({ label, onClick, primary = false }: FlowButtonProps) {
+export function FlowButton({
+  label,
+  onClick,
+  primary = false,
+}: FlowButtonProps) {
   return (
     <button
       onClick={onClick}
@@ -526,7 +530,7 @@ function FlowButton({ label, onClick, primary = false }: FlowButtonProps) {
   );
 }
 
-function ButtonRow({ children }: { children: React.ReactNode }) {
+export function ButtonRow({ children }: { children: React.ReactNode }) {
   return (
     <div style={{ display: 'flex', gap: 8, marginTop: 4 }}>{children}</div>
   );
diff --git a/src/settings/tabs/models/BrowseAllPane.module.css b/src/settings/tabs/models/BrowseAllPane.module.css
index 82702f3e..9758c88c 100644
--- a/src/settings/tabs/models/BrowseAllPane.module.css
+++ b/src/settings/tabs/models/BrowseAllPane.module.css
@@ -80,20 +80,33 @@
   background: var(--elev-2);
 }
 
-/* ── Sub-bar: sort label ──────────────────────────────────────────────── */
+/* ── Notice: live-fetch / do-your-own-research caution ────────────────── */
 
-.subbar {
+/* Browse-all is an unfiltered live Hugging Face fetch, so a calm caution line
+ * sets the expectation that results are unreviewed and downloads are at the
+ * user's own risk. No box: just the amber icon and the statement, so it reads
+ * as a quiet heads-up rather than an alarm. */
+.notice {
   display: flex;
   align-items: center;
+  justify-content: center;
+  gap: 9px;
   margin-top: 11px;
-  padding-bottom: 11px;
-  border-bottom: 1px solid var(--hair-soft);
+  font-size: 11px;
+  color: var(--t2);
+  line-height: 1.45;
+  text-align: center;
 }
 
-.sort {
-  margin-left: auto;
-  font-size: 11.5px;
-  color: var(--t2);
+.notice svg {
+  width: 14px;
+  height: 14px;
+  stroke: var(--tight);
+  stroke-width: 1.7;
+  fill: none;
+  stroke-linecap: round;
+  stroke-linejoin: round;
+  flex: none;
 }
 
 /* ── Result list ──────────────────────────────────────────────────────── */
diff --git a/src/settings/tabs/models/BrowseAllPane.test.tsx b/src/settings/tabs/models/BrowseAllPane.test.tsx
index 04e061db..0641de01 100644
--- a/src/settings/tabs/models/BrowseAllPane.test.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.test.tsx
@@ -154,6 +154,16 @@ async function flush() {
   });
 }
 
+/**
+ * Clicks a quant's Download button and accepts the Browse-all
+ * at-your-own-risk confirmation, mirroring a real download start (every
+ * Browse-all download is gated behind this per-download confirm).
+ */
+function confirmDownload(button: HTMLElement) {
+  fireEvent.click(button);
+  fireEvent.click(screen.getByRole('button', { name: 'Download anyway' }));
+}
+
 beforeEach(() => {
   invokeMock.mockReset();
   lastChannel = null;
@@ -467,7 +477,7 @@ describe('BrowseAllPane', () => {
     const downloadButtons = screen.getAllByRole('button', {
       name: 'Download',
     });
-    fireEvent.click(downloadButtons[1]);
+    confirmDownload(downloadButtons[1]);
     await flush();
     expect(invokeMock).toHaveBeenCalledWith(
       'download_repo_model',
@@ -512,7 +522,7 @@ describe('BrowseAllPane', () => {
       .closest('[data-row]') as HTMLElement;
     fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
     await flush();
-    fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[0]);
+    confirmDownload(screen.getAllByRole('button', { name: 'Download' })[0]);
     await flush();
     act(() => {
       lastChannel?.simulateMessage({ type: 'AllDone' });
@@ -528,7 +538,7 @@ describe('BrowseAllPane', () => {
       .closest('[data-row]') as HTMLElement;
     fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
     await flush();
-    fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[0]);
+    confirmDownload(screen.getAllByRole('button', { name: 'Download' })[0]);
     await flush();
     expect(screen.getByTestId('download-figures')).toBeInTheDocument();
     fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
@@ -548,7 +558,7 @@ describe('BrowseAllPane', () => {
 
     fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
     await flush();
-    fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[0]);
+    confirmDownload(screen.getAllByRole('button', { name: 'Download' })[0]);
     await flush();
     act(() => {
       lastChannel?.simulateMessage({
@@ -576,7 +586,7 @@ describe('BrowseAllPane', () => {
       .closest('[data-row]') as HTMLElement;
     fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
     await flush();
-    fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[0]);
+    confirmDownload(screen.getAllByRole('button', { name: 'Download' })[0]);
     await flush();
 
     act(() => lastChannel?.simulateMessage({ type: 'Verifying' }));
@@ -625,7 +635,7 @@ describe('BrowseAllPane', () => {
     await expandRepo();
     // Two quants, two download buttons before any install.
     expect(screen.getAllByRole('button', { name: 'Download' })).toHaveLength(2);
-    fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[0]);
+    confirmDownload(screen.getAllByRole('button', { name: 'Download' })[0]);
     await flush();
     expect(screen.getByTestId('download-figures')).toBeInTheDocument();
     act(() => lastChannel?.simulateMessage({ type: 'AllDone' }));
@@ -692,7 +702,7 @@ describe('BrowseAllPane', () => {
       },
     });
     await expandRepo();
-    fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[0]);
+    confirmDownload(screen.getAllByRole('button', { name: 'Download' })[0]);
     await flush();
     expect(screen.getByTestId('download-figures')).toBeInTheDocument();
     fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
@@ -734,7 +744,7 @@ describe('BrowseAllPane', () => {
     fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
     await flush();
     // Start the first quant's download.
-    fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[0]);
+    confirmDownload(screen.getAllByRole('button', { name: 'Download' })[0]);
     await flush();
     // The active quant shows progress...
     expect(screen.getByTestId('download-figures')).toBeInTheDocument();
@@ -744,7 +754,7 @@ describe('BrowseAllPane', () => {
     const others = screen.getAllByRole('button', { name: 'Download' });
     expect(others).toHaveLength(1);
     expect(others[0]).toBeEnabled();
-    fireEvent.click(others[0]);
+    confirmDownload(others[0]);
     await flush();
     const repoStarts = invokeMock.mock.calls.filter(
       (c: unknown[]) => c[0] === 'download_repo_model',
@@ -759,7 +769,7 @@ describe('BrowseAllPane', () => {
       .closest('[data-row]') as HTMLElement;
     fireEvent.click(within(row).getByRole('button', { name: 'Show files' }));
     await flush();
-    fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[0]);
+    confirmDownload(screen.getAllByRole('button', { name: 'Download' })[0]);
     await flush();
     act(() => {
       lastChannel?.simulateMessage({
@@ -855,4 +865,52 @@ describe('BrowseAllPane', () => {
       screen.queryByRole('button', { name: 'Load more' }),
     ).not.toBeInTheDocument();
   });
+
+  it('shows the live-fetch caution notice', async () => {
+    await renderPane();
+    expect(
+      screen.getByText(
+        'Live from Hugging Face. Quality and safety vary. Research any model before you download it.',
+      ),
+    ).toBeInTheDocument();
+  });
+
+  it('confirms before a fresh download and only starts on accept', async () => {
+    await renderPane();
+    await expandRepo();
+    fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[0]);
+    // The confirm replaces the download control; nothing is downloaded yet.
+    expect(screen.getByText('Before you download')).toBeInTheDocument();
+    expect(
+      invokeMock.mock.calls.filter(
+        (c: unknown[]) => c[0] === 'download_repo_model',
+      ),
+    ).toHaveLength(0);
+
+    fireEvent.click(screen.getByRole('button', { name: 'Download anyway' }));
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith(
+      'download_repo_model',
+      expect.objectContaining({
+        repo: 'google/gemma-4-12b-it-GGUF',
+        file: 'gemma-q4.gguf',
+      }),
+    );
+  });
+
+  it('cancelling the confirm restores the download control without downloading', async () => {
+    await renderPane();
+    await expandRepo();
+    fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[0]);
+    expect(screen.getByText('Before you download')).toBeInTheDocument();
+
+    fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
+    expect(screen.queryByText('Before you download')).not.toBeInTheDocument();
+    expect(screen.getAllByRole('button', { name: 'Download' })).toHaveLength(2);
+    expect(
+      invokeMock.mock.calls.filter(
+        (c: unknown[]) => c[0] === 'download_repo_model',
+      ),
+    ).toHaveLength(0);
+  });
 });
diff --git a/src/settings/tabs/models/BrowseAllPane.tsx b/src/settings/tabs/models/BrowseAllPane.tsx
index 384bd699..fd6a47ff 100644
--- a/src/settings/tabs/models/BrowseAllPane.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.tsx
@@ -24,6 +24,7 @@ import {
 } from '../../../contexts/DownloadsContext';
 import { downloadKey } from '../../../hooks/downloadKey';
 import { CapabilityPills } from './CapabilityPills';
+import { DownloadRiskConfirm } from './DownloadRiskConfirm';
 import { useHfSearch } from './useHfSearch';
 import { Tooltip } from '../../../components/Tooltip';
 import { formatContextWindow } from '../../../utils/contextWindow';
@@ -81,6 +82,14 @@ const CHEVRON_ICON = (
     <path d="M6 9l6 6 6-6" />
   </svg>
 );
+// Amber caution triangle bookending the live-fetch notice.
+const CAUTION_ICON = (
+  <svg viewBox="0 0 24 24" aria-hidden="true">
+    <path d="M10.3 3.9 1.8 18a2 2 0 0 0 1.7 3h16.9a2 2 0 0 0 1.7-3L13.7 3.9a2 2 0 0 0-3.4 0Z" />
+    <path d="M12 9v4" />
+    <path d="M12 17h.01" />
+  </svg>
+);
 interface BrowseAllPaneProps {
   /** Lift a fresh config snapshot after a successful install. */
   onSaved: (next: RawAppConfig) => void;
@@ -129,9 +138,12 @@ export function BrowseAllPane({ onSaved }: BrowseAllPaneProps) {
         })}
       </div>
 
-      <div className={styles.subbar}>
-        <span className={styles.sort}>Most downloaded</span>
-      </div>
+      <p className={styles.notice}>
+        {CAUTION_ICON}
+        Live from Hugging Face. Quality and safety vary. Research any model
+        before you download it.
+        {CAUTION_ICON}
+      </p>
 
       <div className={styles.list}>
         {loading ? <p className={styles.state}>Searching…</p> : null}
@@ -328,6 +340,10 @@ function QuantRow({ file, repo, downloads, onSaved, refetch }: QuantRowProps) {
   const { clear } = downloads;
   const downloading = entry !== undefined;
   const phase = entry?.state.phase;
+  // Browse-all is a live Hugging Face fetch, so a fresh download click first
+  // asks the user to accept an unreviewed third-party model. Resume of an
+  // already-accepted partial skips this.
+  const [confirming, setConfirming] = useState(false);
 
   // A finished install: the backend recorded the model, so lift the fresh config
   // and re-read the listing (the quant flips to its installed state) and drop
@@ -368,6 +384,22 @@ function QuantRow({ file, repo, downloads, onSaved, refetch }: QuantRowProps) {
       ? Math.min(100, Math.floor((file.partial_bytes / file.size_bytes) * 100))
       : 0;
 
+  if (confirming) {
+    return (
+      <div className={styles.quantRow}>
+        <div style={{ flex: 1, minWidth: 0 }}>
+          <DownloadRiskConfirm
+            onConfirm={() => {
+              setConfirming(false);
+              downloads.startRepoDownload(repo, file.file);
+            }}
+            onCancel={() => setConfirming(false)}
+          />
+        </div>
+      </div>
+    );
+  }
+
   return (
     <div className={styles.quantRow}>
       <span className={styles.quantName}>{file.file}</span>
@@ -426,7 +458,7 @@ function QuantRow({ file, repo, downloads, onSaved, refetch }: QuantRowProps) {
                 type="button"
                 className={styles.quantGet}
                 aria-label="Download"
-                onClick={() => downloads.startRepoDownload(repo, file.file)}
+                onClick={() => setConfirming(true)}
               >
                 {DOWNLOAD_ICON}
               </button>
diff --git a/src/settings/tabs/models/DiscoverPane.test.tsx b/src/settings/tabs/models/DiscoverPane.test.tsx
index abf1fe36..afeccadc 100644
--- a/src/settings/tabs/models/DiscoverPane.test.tsx
+++ b/src/settings/tabs/models/DiscoverPane.test.tsx
@@ -248,6 +248,8 @@ describe('DiscoverPane download persistence', () => {
       expect(screen.getByText('gemma-q4.gguf')).toBeInTheDocument(),
     );
     fireEvent.click(screen.getByRole('button', { name: 'Download' }));
+    // Browse-all gates every download behind a per-download confirm.
+    fireEvent.click(screen.getByRole('button', { name: 'Download anyway' }));
     await flush();
     act(() =>
       channel?.simulateMessage({
diff --git a/src/settings/tabs/models/DownloadRiskConfirm.test.tsx b/src/settings/tabs/models/DownloadRiskConfirm.test.tsx
new file mode 100644
index 00000000..699cbda1
--- /dev/null
+++ b/src/settings/tabs/models/DownloadRiskConfirm.test.tsx
@@ -0,0 +1,30 @@
+import { fireEvent, render, screen } from '@testing-library/react';
+import { describe, expect, it, vi } from 'vitest';
+
+import { DownloadRiskConfirm } from './DownloadRiskConfirm';
+
+describe('DownloadRiskConfirm', () => {
+  it('renders the title and the at-your-own-risk body', () => {
+    render(<DownloadRiskConfirm onConfirm={() => {}} onCancel={() => {}} />);
+    expect(screen.getByText('Before you download')).toBeInTheDocument();
+    expect(
+      screen.getByText(
+        'Models on Hugging Face can be low-quality, uncensored, or unsafe. Do your own research; download and run at your own risk.',
+      ),
+    ).toBeInTheDocument();
+  });
+
+  it('calls onConfirm when Download anyway is clicked', () => {
+    const onConfirm = vi.fn();
+    render(<DownloadRiskConfirm onConfirm={onConfirm} onCancel={() => {}} />);
+    fireEvent.click(screen.getByRole('button', { name: 'Download anyway' }));
+    expect(onConfirm).toHaveBeenCalledOnce();
+  });
+
+  it('calls onCancel when Cancel is clicked', () => {
+    const onCancel = vi.fn();
+    render(<DownloadRiskConfirm onConfirm={() => {}} onCancel={onCancel} />);
+    fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
+    expect(onCancel).toHaveBeenCalledOnce();
+  });
+});
diff --git a/src/settings/tabs/models/DownloadRiskConfirm.tsx b/src/settings/tabs/models/DownloadRiskConfirm.tsx
new file mode 100644
index 00000000..b54b74ce
--- /dev/null
+++ b/src/settings/tabs/models/DownloadRiskConfirm.tsx
@@ -0,0 +1,44 @@
+/**
+ * Inline pre-download confirmation for Browse-all (Hugging Face) models.
+ *
+ * Browse-all results are an unfiltered live fetch from Hugging Face, so the
+ * download click is the point where the user accepts an unreviewed
+ * third-party model. This card replaces the quant row's download control
+ * until the user confirms or backs out. The copy and the at-your-own-risk
+ * framing live here so the wording is owned in one place and unit-tested; the
+ * card chrome reuses the download flow's primitives so the two read as one
+ * family.
+ */
+import {
+  ButtonRow,
+  Card,
+  Detail,
+  FlowButton,
+  Headline,
+} from '../../../components/DownloadProgress';
+
+export interface DownloadRiskConfirmProps {
+  /** Proceed with the download. */
+  onConfirm: () => void;
+  /** Back out and restore the download control. */
+  onCancel: () => void;
+}
+
+export function DownloadRiskConfirm({
+  onConfirm,
+  onCancel,
+}: DownloadRiskConfirmProps) {
+  return (
+    <Card>
+      <Headline>Before you download</Headline>
+      <Detail>
+        Models on Hugging Face can be low-quality, uncensored, or unsafe. Do
+        your own research; download and run at your own risk.
+      </Detail>
+      <ButtonRow>
+        <FlowButton label="Cancel" onClick={onCancel} />
+        <FlowButton label="Download anyway" primary onClick={onConfirm} />
+      </ButtonRow>
+    </Card>
+  );
+}

From 9030528f485387f8ecacc8d4cf77e8aad3a99926 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sun, 21 Jun 2026 15:10:52 -0500
Subject: [PATCH 83/89] chore(onboarding): note more models live in Settings

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/view/onboarding/ModelCheckStep.tsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/view/onboarding/ModelCheckStep.tsx b/src/view/onboarding/ModelCheckStep.tsx
index 3e014073..4af92b2f 100644
--- a/src/view/onboarding/ModelCheckStep.tsx
+++ b/src/view/onboarding/ModelCheckStep.tsx
@@ -410,8 +410,8 @@ const BuiltinShell = forwardRef<HTMLDivElement, { children: React.ReactNode }>(
               maxWidth: 560,
             }}
           >
-            Pick a starter brain for Thuki. Downloads once, then runs fully
-            offline.
+            Pick a starter brain for Thuki. More models are available later in
+            Settings.
           </p>
 
           {children}

From 2f180d4e94a8095b2b535a31fd9bfe8dc0f60383 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sun, 21 Jun 2026 15:59:22 -0500
Subject: [PATCH 84/89] docs(openai): correct reasoning note; off-mode thinking
 is shown, not dropped

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/openai.rs | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src-tauri/src/openai.rs b/src-tauri/src/openai.rs
index 09420ec1..5d3582d9 100644
--- a/src-tauri/src/openai.rs
+++ b/src-tauri/src/openai.rs
@@ -231,10 +231,11 @@ fn oversize_sse_line_error() -> EngineError {
 /// ByteDance Seed-OSS). `false`/`0` answers directly; `true`/`-1` reasons.
 ///
 /// Families with no template switch (DeepSeek-R1 + distills, QwQ, gpt-oss
-/// Harmony, MiniMax, EXAONE, Phi-4-reasoning, ...) reason regardless; their
-/// output is dropped at the stream layer when thinking is off (see
-/// [`stream_openai_chat`]), so nothing is ever shown even though the compute
-/// cannot be stopped on this engine.
+/// Harmony, MiniMax, EXAONE, Phi-4-reasoning, ...) reason regardless of this
+/// switch: the compute cannot be stopped on this engine. Their reasoning is
+/// not suppressed; [`stream_openai_chat`] surfaces any `reasoning_content` in
+/// the thinking block (always shown, never hidden), so the chain of thought is
+/// presented cleanly rather than running invisibly.
 ///
 /// Only the bundled engine ([`V1Flavor::Builtin`]) receives the kwargs; the
 /// fields are llama.cpp-specific and an arbitrary OpenAI-compatible server may

From 750cf8bb6da07c82fdfce1c36a197814cea282d9 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sun, 21 Jun 2026 15:59:27 -0500
Subject: [PATCH 85/89] fix(discover): stop HF search Load more refetching the
 capped page forever

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/models/mod.rs | 39 +++++++++++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/src-tauri/src/models/mod.rs b/src-tauri/src/models/mod.rs
index d42fd39a..aecd40fe 100644
--- a/src-tauri/src/models/mod.rs
+++ b/src-tauri/src/models/mod.rs
@@ -1783,11 +1783,14 @@ where
 /// Pure parse of an `/api/models` search body into a page of summary rows.
 /// Non-chat and empty-id rows are dropped per [`search_entry_to_summary`];
 /// `has_more` is set from the raw entry count against `limit` so dropped rows
-/// never cut pagination short.
+/// never cut pagination short, and is forced `false` once `limit` reaches the
+/// [`HF_SEARCH_LIMIT_MAX`] ceiling: requests are clamped to that ceiling, so a
+/// full page there would refetch the same capped rows forever. "Load more"
+/// stops at the ceiling instead.
 pub fn parse_search_results(body: &[u8], limit: usize) -> Result<HfSearchPage, String> {
     let entries: Vec<HfSearchEntry> = serde_json::from_slice(body)
         .map_err(|e| format!("failed to decode Hugging Face search response: {e}"))?;
-    let has_more = entries.len() >= limit;
+    let has_more = entries.len() >= limit && limit < HF_SEARCH_LIMIT_MAX;
     let rows = entries
         .into_iter()
         .filter_map(search_entry_to_summary)
@@ -5307,6 +5310,38 @@ mod tests {
         assert!(!parse_search_results(body.as_bytes(), 3).unwrap().has_more);
     }
 
+    #[test]
+    fn parse_search_results_stops_paginating_at_the_ceiling() {
+        let page_of = |n: usize| {
+            let entries: Vec<_> = (0..n)
+                .map(|i| {
+                    serde_json::json!({
+                        "id": format!("org/m{i}-GGUF"),
+                        "downloads": 1,
+                        "pipeline_tag": "text-generation"
+                    })
+                })
+                .collect();
+            serde_json::Value::Array(entries).to_string()
+        };
+        // A full page exactly at the clamp ceiling reports no more: requests are
+        // clamped to HF_SEARCH_LIMIT_MAX, so paging past it would refetch the
+        // same capped rows forever and never let "Load more" settle.
+        let full = page_of(HF_SEARCH_LIMIT_MAX);
+        assert!(
+            !parse_search_results(full.as_bytes(), HF_SEARCH_LIMIT_MAX)
+                .unwrap()
+                .has_more
+        );
+        // One step below the ceiling, a full page still invites another fetch.
+        let below = page_of(HF_SEARCH_LIMIT_MAX - 1);
+        assert!(
+            parse_search_results(below.as_bytes(), HF_SEARCH_LIMIT_MAX - 1)
+                .unwrap()
+                .has_more
+        );
+    }
+
     #[test]
     fn parse_search_results_rejects_invalid_json() {
         let err = parse_search_results(b"not json", 30).unwrap_err();

From f9b1d88b370135b71ae603d453f9124cc780d2aa Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sun, 21 Jun 2026 16:29:29 -0500
Subject: [PATCH 86/89] fix(engine): reset built-in warm-up dedup when the
 engine unloads

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/lib.rs    |  8 ++++++++
 src-tauri/src/warmup.rs | 30 ++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs
index 6fd5e8eb..5994d11d 100644
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -2310,6 +2310,14 @@ pub fn run() {
                 tauri::async_runtime::spawn(async move {
                     while status_rx.changed().await.is_ok() {
                         let status = status_rx.borrow_and_update().clone();
+                        // A load left memory (idle-unload, model switch, crash):
+                        // drop the built-in warm-up dedup so the next load primes
+                        // fresh even when the OS reuses the same port. The dedup
+                        // is keyed on port, so a stale primed record would
+                        // otherwise skip the cold reload's prime.
+                        if status.state != "loaded" {
+                            status_handle.state::<warmup::BuiltinWarmState>().reset();
+                        }
                         let _ = status_handle.emit("engine:status", status);
                     }
                 });
diff --git a/src-tauri/src/warmup.rs b/src-tauri/src/warmup.rs
index 9857b174..a668913e 100644
--- a/src-tauri/src/warmup.rs
+++ b/src-tauri/src/warmup.rs
@@ -207,6 +207,18 @@ impl BuiltinWarmState {
     pub fn is_warming(&self) -> bool {
         self.inner.lock().unwrap().in_flight.is_some()
     }
+
+    /// Drops all dedup state so the next warm primes fresh. Called when the
+    /// engine leaves the `loaded` state (idle-unload, model switch, crash): the
+    /// primed port belongs to a process that no longer exists, and the OS can
+    /// hand the next load that exact port again. Without this clear, the cold
+    /// reload would match the dead port's primed record, dedup to a no-op, and
+    /// leave the user's first message to eat the full cold prefill.
+    pub fn reset(&self) {
+        let mut g = self.inner.lock().unwrap();
+        g.in_flight = None;
+        g.primed_port = None;
+    }
 }
 
 /// Built-in arm of `warm_up_model`: starts (or reuses) the engine so the
@@ -1769,6 +1781,24 @@ mod tests {
         );
     }
 
+    #[test]
+    fn warm_state_reset_clears_dedup_after_teardown() {
+        let s = BuiltinWarmState::default();
+        assert!(s.try_begin(40000));
+        s.finish(40000, true);
+        assert!(s.try_begin(40001), "a second load primes on its own port");
+        assert!(s.is_warming(), "the 40001 prime is in flight");
+        // Engine torn down: the next load can reuse either port. reset() drops
+        // both the primed record and the in-flight slot so a reused port primes
+        // fresh instead of deduping against the dead process.
+        s.reset();
+        assert!(!s.is_warming(), "reset clears the in-flight slot");
+        assert!(
+            s.try_begin(40000),
+            "reset clears the primed record so a reused port primes fresh"
+        );
+    }
+
     #[test]
     fn warm_state_is_warming_tracks_in_flight() {
         let s = BuiltinWarmState::default();

From cda5025f21a988078a9edb7f6230105d5f465018 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sun, 21 Jun 2026 16:29:29 -0500
Subject: [PATCH 87/89] fix(engine): prefer the actionable stderr line in
 engine-start errors

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src-tauri/src/commands.rs | 38 +++++++++++++++++++++++++++++++++-----
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/src-tauri/src/commands.rs b/src-tauri/src/commands.rs
index b27689c7..8c1e796e 100644
--- a/src-tauri/src/commands.rs
+++ b/src-tauri/src/commands.rs
@@ -267,12 +267,15 @@ async fn fetch_builtin_vision(client: &reqwest::Client, base_url: &str) -> bool
 /// Condenses a multi-line engine failure detail into the single most
 /// informative line for the error subtitle (which renders as one paragraph).
 /// The captured stderr tail can be many timestamped lines, so this prefers the
-/// FIRST line mentioning an error or failure: llama.cpp prints the specific
+/// FIRST line that reads like an actual error message ("error:", "error
+/// loading", "failed to", "failed:") over one that merely contains the word
+/// (a startup banner such as "log level: error"). llama.cpp prints the specific
 /// root cause first ("error loading model: <reason>") then generic trailers
 /// ("failed to load", "exiting due to model loading error"), so the first
-/// match is the actionable one. Falls back to the last non-empty line; a
-/// single-line detail (e.g. a health-check message) is returned unchanged.
-/// Classification upstream still sees the full detail.
+/// actionable match is the one to show. It falls back to any error/failure
+/// mention, then to the last non-empty line; a single-line detail (e.g. a
+/// health-check message) is returned unchanged. Classification upstream still
+/// sees the full detail.
 fn concise_detail(detail: &str) -> String {
     let lines: Vec<&str> = detail
         .lines()
@@ -286,7 +289,16 @@ fn concise_detail(detail: &str) -> String {
             .iter()
             .find(|line| {
                 let lower = line.to_ascii_lowercase();
-                lower.contains("error") || lower.contains("failed")
+                lower.contains("error:")
+                    || lower.contains("error loading")
+                    || lower.contains("failed to")
+                    || lower.contains("failed:")
+            })
+            .or_else(|| {
+                many.iter().find(|line| {
+                    let lower = line.to_ascii_lowercase();
+                    lower.contains("error") || lower.contains("failed")
+                })
             })
             .copied()
             .unwrap_or(many[many.len() - 1])
@@ -1539,6 +1551,22 @@ mod tests {
         assert_eq!(concise_detail(tail), "E error loading model: out of memory");
     }
 
+    #[test]
+    fn concise_detail_skips_a_benign_error_word_for_the_real_cause() {
+        // A startup banner mentions "error" as a log level; the actionable line
+        // is the real loading failure further down. The banner must not win.
+        let tail = "I log level set to error\nI loading model\nE error loading model: bad magic";
+        assert_eq!(concise_detail(tail), "E error loading model: bad magic");
+    }
+
+    #[test]
+    fn concise_detail_falls_back_to_any_failure_mention() {
+        // No "error:"/"error loading"/"failed to" line, but a bare mention is
+        // still more informative than the last line, so it is preferred.
+        let tail = "I starting up\nW cuda error detected\nI shutting down";
+        assert_eq!(concise_detail(tail), "W cuda error detected");
+    }
+
     #[test]
     fn concise_detail_empty_detail_is_empty() {
         assert_eq!(concise_detail("  \n  "), "");

From 9187d9a2f0f41d2ce3e4624e5c9cfe559e555c5a Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sun, 21 Jun 2026 16:29:29 -0500
Subject: [PATCH 88/89] fix(discover): ignore re-entrant download starts for an
 in-flight model

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/contexts/DownloadsContext.tsx              | 10 ++++++++++
 .../__tests__/DownloadsContext.test.tsx        | 18 ++++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/src/contexts/DownloadsContext.tsx b/src/contexts/DownloadsContext.tsx
index 92d54ae3..43c4cca2 100644
--- a/src/contexts/DownloadsContext.tsx
+++ b/src/contexts/DownloadsContext.tsx
@@ -28,6 +28,7 @@ import {
   type DownloadAccumulator,
   type DownloadProgressInfo,
   type DownloadUiState,
+  isDownloadInFlight,
   reduceDownloadEvent,
   startingAccumulator,
 } from '../hooks/downloadReducer';
@@ -126,6 +127,15 @@ export function DownloadsProvider({ children }: { children: ReactNode }) {
 
   const begin = useCallback((identity: RegistryIdentity) => {
     const key = downloadKey(identity);
+    // A fast double-click, or a click landing before the row re-renders to hide
+    // its button, would fire a second backend download that claim_download
+    // rejects, flashing a spurious failure over the live one. Ignore re-entry
+    // while this key is already downloading; a retry of a terminal
+    // (failed/ready) entry is not in flight, so it still proceeds.
+    const existing = entriesRef.current.get(key);
+    if (existing && isDownloadInFlight(existing.acc.state.phase)) {
+      return;
+    }
     const [command, args] = commandFor(identity);
     setEntries((prev) => {
       const next = new Map(prev);
diff --git a/src/contexts/__tests__/DownloadsContext.test.tsx b/src/contexts/__tests__/DownloadsContext.test.tsx
index eef450d9..919c6b36 100644
--- a/src/contexts/__tests__/DownloadsContext.test.tsx
+++ b/src/contexts/__tests__/DownloadsContext.test.tsx
@@ -124,6 +124,24 @@ describe('DownloadsContext', () => {
     });
   });
 
+  it('ignores a re-entrant start while the same key is already downloading', async () => {
+    const { result } = renderHook(() => useDownloads(), { wrapper });
+    await act(async () => {
+      result.current.startStaffPick('gemma-4-12b');
+    });
+    // A second click before the row hides its button must not fire a second
+    // backend download (which claim_download would reject as a spurious flash).
+    await act(async () => {
+      result.current.startStaffPick('gemma-4-12b');
+    });
+    expect(
+      invoke.mock.calls.filter((c) => c[0] === 'download_staff_pick'),
+    ).toHaveLength(1);
+    expect(result.current.get(STAFF_KEY)?.state).toEqual({
+      phase: 'downloading',
+    });
+  });
+
   it('cancel targets the keyed download', async () => {
     const { result } = renderHook(() => useDownloads(), { wrapper });
     await act(async () => {

From 14d81aef3fa0cd515056bf23883161461bc1baf7 Mon Sep 17 00:00:00 2001
From: Logan Nguyen <lg.131.dev@gmail.com>
Date: Sun, 21 Jun 2026 16:29:29 -0500
Subject: [PATCH 89/89] fix(discover): cap the Hugging Face search input at the
 backend query length

Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
---
 src/settings/tabs/models/BrowseAllPane.test.tsx | 9 +++++++++
 src/settings/tabs/models/BrowseAllPane.tsx      | 3 ++-
 src/settings/tabs/models/useHfSearch.ts         | 6 ++++++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/settings/tabs/models/BrowseAllPane.test.tsx b/src/settings/tabs/models/BrowseAllPane.test.tsx
index 0641de01..26af7468 100644
--- a/src/settings/tabs/models/BrowseAllPane.test.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.test.tsx
@@ -28,6 +28,7 @@ import { DownloadsProvider } from '../../../contexts/DownloadsContext';
 import {
   HF_SEARCH_DEBOUNCE_MS,
   HF_PAGE_SIZE,
+  HF_SEARCH_QUERY_MAX_LEN,
   clearHfSearchCache,
 } from './useHfSearch';
 import type { HfModelSummary, HfSearchPage } from '../../../types/hf';
@@ -204,6 +205,14 @@ describe('BrowseAllPane', () => {
     expect(screen.getByText('unsloth · 410,000 downloads')).toBeInTheDocument();
   });
 
+  it('caps the search input at the backend query length', async () => {
+    await renderPane();
+    expect(screen.getByRole('searchbox')).toHaveAttribute(
+      'maxLength',
+      String(HF_SEARCH_QUERY_MAX_LEN),
+    );
+  });
+
   it('renders capability pills per row from the repo capabilities', async () => {
     await renderPane();
     const visionRow = screen
diff --git a/src/settings/tabs/models/BrowseAllPane.tsx b/src/settings/tabs/models/BrowseAllPane.tsx
index fd6a47ff..d3fd91f5 100644
--- a/src/settings/tabs/models/BrowseAllPane.tsx
+++ b/src/settings/tabs/models/BrowseAllPane.tsx
@@ -25,7 +25,7 @@ import {
 import { downloadKey } from '../../../hooks/downloadKey';
 import { CapabilityPills } from './CapabilityPills';
 import { DownloadRiskConfirm } from './DownloadRiskConfirm';
-import { useHfSearch } from './useHfSearch';
+import { HF_SEARCH_QUERY_MAX_LEN, useHfSearch } from './useHfSearch';
 import { Tooltip } from '../../../components/Tooltip';
 import { formatContextWindow } from '../../../utils/contextWindow';
 import { RAM_FIT_LABEL, RAM_FIT_TOOLTIP } from '../../../utils/ramFit';
@@ -115,6 +115,7 @@ export function BrowseAllPane({ onSaved }: BrowseAllPaneProps) {
           autoComplete="off"
           autoCorrect="off"
           autoCapitalize="off"
+          maxLength={HF_SEARCH_QUERY_MAX_LEN}
           value={query}
           onChange={(e) => setQuery(e.target.value)}
         />
diff --git a/src/settings/tabs/models/useHfSearch.ts b/src/settings/tabs/models/useHfSearch.ts
index 3cf8774e..8bd484f0 100644
--- a/src/settings/tabs/models/useHfSearch.ts
+++ b/src/settings/tabs/models/useHfSearch.ts
@@ -23,6 +23,12 @@ export const HF_SEARCH_DEBOUNCE_MS = 300;
  * page step (`HF_SEARCH_LIMIT`); the backend clamps the total to its own max. */
 export const HF_PAGE_SIZE = 30;
 
+/** Max search length the input accepts. Mirrors the backend's
+ * `MAX_HF_SEARCH_QUERY_LEN` byte cap; capping the field keeps an over-long
+ * paste from reaching the backend, which rejects it and silently empties the
+ * results. A character cap, exact for the ASCII model names searched here. */
+export const HF_SEARCH_QUERY_MAX_LEN = 200;
+
 /**
  * Session-scoped cache of search pages, keyed by `query::limit`. Switching to
  * another tab unmounts the Discover pane, so without this every return trip