diff --git a/photomap/backend/config.py b/photomap/backend/config.py index 93b817b9..07812856 100644 --- a/photomap/backend/config.py +++ b/photomap/backend/config.py @@ -15,7 +15,7 @@ from platformdirs import user_config_dir from pydantic import BaseModel, Field, field_validator, model_validator -from .encoders import DEFAULT_ENCODER_SPEC, LEGACY_ENCODER_SPEC +from .encoders import LEGACY_ENCODER_SPEC, default_encoder_spec from .util import atomic_write_text logger = logging.getLogger(__name__) @@ -33,10 +33,12 @@ class Album(BaseModel): umap_eps: float = Field(default=0.2, description="UMAP epsilon parameter") description: str = Field(default="", description="Album description") encoder_spec: str = Field( - default=DEFAULT_ENCODER_SPEC, + # Resolved per-host: OpenCLIP ViT-L-14 on CUDA/macOS, lighter OpenAI CLIP + # ViT-B/32 on CPU-only Linux/Windows. See encoders.default_encoder_spec. + default_factory=default_encoder_spec, description=( "Image/text encoder spec. Format: ':'. " - "Examples: 'openai-clip:ViT-B/32' (default, legacy), " + "Examples: 'openai-clip:ViT-B/32' (legacy, CPU default), " "'open-clip:ViT-L-14/dfn2b', 'siglip:google/siglip2-large-patch16-256'. " "Changing this requires re-indexing the album." ), @@ -124,8 +126,9 @@ def from_dict(cls, key: str, data: dict[str, Any]) -> "Album": description=data.get("description", ""), # Legacy YAML albums predate the encoder_spec field; their indexes # were built with the original CLIP, so fall back to that to stay - # cache-compatible. New albums get DEFAULT_ENCODER_SPEC via the - # Album field default when the frontend creates them. + # cache-compatible. New albums get the host-resolved default + # (encoders.default_encoder_spec) via the Album field default when + # the frontend creates them. encoder_spec=data.get("encoder_spec", LEGACY_ENCODER_SPEC), min_search_score=data.get("min_search_score"), max_search_results=data.get("max_search_results", 100), diff --git a/photomap/backend/encoders.py b/photomap/backend/encoders.py index cd2e0eec..c6ed4b2b 100644 --- a/photomap/backend/encoders.py +++ b/photomap/backend/encoders.py @@ -14,6 +14,7 @@ import logging import math +import sys import threading import time from abc import ABC, abstractmethod @@ -39,6 +40,29 @@ # a compatibility marker, not a tunable. LEGACY_ENCODER_SPEC = "openai-clip:ViT-B/32" +# Default encoder for *new* albums on Linux/Windows hosts without CUDA. The +# OpenCLIP ViT-L-14 DEFAULT_ENCODER_SPEC is impractically slow to index/search +# on CPU there, so new albums fall back to the much lighter OpenAI CLIP +# ViT-B/32 (weaker recall, far faster). This happens to be the same spec string +# as LEGACY_ENCODER_SPEC, but it's a distinct constant on purpose: this one is +# a tunable CPU default, not the frozen legacy-cache compatibility marker. +CPU_FALLBACK_ENCODER_SPEC = "openai-clip:ViT-B/32" + + +def default_encoder_spec() -> str: + """Resolve the default encoder spec for *new* albums based on the host. + + Hosts with CUDA, and macOS (left on the high-quality default since the + lighter CPU path is untested there), get ``DEFAULT_ENCODER_SPEC``. Linux and + Windows hosts without CUDA fall back to ``CPU_FALLBACK_ENCODER_SPEC`` because + OpenCLIP ViT-L-14 is far too slow to run on CPU on those platforms. + """ + if torch.cuda.is_available(): + return DEFAULT_ENCODER_SPEC + if sys.platform == "darwin": + return DEFAULT_ENCODER_SPEC + return CPU_FALLBACK_ENCODER_SPEC + # When True, SigLIP's encode_text wraps each query in every entry of # SIGLIP_PROMPT_TEMPLATES, encodes them all, L2-normalizes each per-template # embedding, mean-pools across templates, and re-normalizes. Intended to make diff --git a/photomap/backend/routers/album.py b/photomap/backend/routers/album.py index db1cc0a6..7b540b8c 100644 --- a/photomap/backend/routers/album.py +++ b/photomap/backend/routers/album.py @@ -9,6 +9,7 @@ from ..config import Album, create_album, get_config_manager from ..embeddings import Embeddings +from ..encoders import default_encoder_spec class UmapEpsSetRequest(BaseModel): @@ -201,6 +202,17 @@ async def get_available_albums() -> list[dict[str, Any]]: return [] +@album_router.get("/default_encoder/", tags=["Albums"]) +async def get_default_encoder() -> dict[str, str]: + """Return the encoder spec new albums should default to on this host. + + The default is platform-aware — CPU-only Linux/Windows hosts get a lighter + encoder than CUDA/macOS hosts — so the frontend asks the server for it + rather than hardcoding a single default in the dropdown. + """ + return {"encoder_spec": default_encoder_spec()} + + @album_router.get("/album/{album_key}/", tags=["Albums"]) async def get_album(album: AlbumDep) -> Album: """Get details of a specific album.""" diff --git a/photomap/frontend/static/javascript/album-manager.js b/photomap/frontend/static/javascript/album-manager.js index e7e3fcb6..75a73973 100644 --- a/photomap/frontend/static/javascript/album-manager.js +++ b/photomap/frontend/static/javascript/album-manager.js @@ -8,8 +8,9 @@ import { fetchJson, hideSpinner, showSpinner } from "./utils.js"; // Encoder backends offered in the album manager dropdown. Values must match // the spec format consumed by photomap.backend.encoders.build_encoder. -// The first entry is the default selection for new albums and must stay in -// sync with backend ``DEFAULT_ENCODER_SPEC`` (encoders.py). +// The pre-selected default for new albums is fetched from the server +// (getServerDefaultEncoderSpec) since it's platform-aware; ENCODER_OPTIONS[0] +// is only the client-side fallback when that request fails. export const ENCODER_OPTIONS = [ { value: "open-clip:ViT-L-14/dfn2b_s39b", @@ -26,6 +27,21 @@ export const ENCODER_OPTIONS = [ ]; const DEFAULT_ENCODER_SPEC = ENCODER_OPTIONS[0].value; +// The default encoder for *new* albums is resolved by the server: it's +// platform-aware (CPU-only Linux/Windows hosts get a lighter encoder than +// CUDA/macOS hosts), so we can't hardcode ENCODER_OPTIONS[0] here. Fetch it +// once, cache the promise, and fall back to the recommended option if the +// request fails. +let serverDefaultEncoderPromise = null; +function getServerDefaultEncoderSpec() { + if (!serverDefaultEncoderPromise) { + serverDefaultEncoderPromise = fetchJson("default_encoder/") + .then((data) => data?.encoder_spec || DEFAULT_ENCODER_SPEC) + .catch(() => DEFAULT_ENCODER_SPEC); + } + return serverDefaultEncoderPromise; +} + function populateEncoderSelect(selectEl, currentValue) { if (!selectEl) { return; @@ -273,8 +289,8 @@ export class AlbumManager { this.elements.newAlbumPathsContainer.innerHTML = ""; } - // Reset encoder dropdown to the default - populateEncoderSelect(this.elements.newAlbumEncoder, DEFAULT_ENCODER_SPEC); + // Reset encoder dropdown to the host-resolved default + getServerDefaultEncoderSpec().then((spec) => populateEncoderSelect(this.elements.newAlbumEncoder, spec)); } // Form management @@ -286,8 +302,8 @@ export class AlbumManager { // Initialize path fields for the add album form this.initializeNewAlbumPathFields(); - // Initialize encoder dropdown - populateEncoderSelect(this.elements.newAlbumEncoder, DEFAULT_ENCODER_SPEC); + // Initialize encoder dropdown to the host-resolved default + getServerDefaultEncoderSpec().then((spec) => populateEncoderSelect(this.elements.newAlbumEncoder, spec)); // Focus on the first input field this.elements.newAlbumKey.focus(); diff --git a/tests/backend/test_albums.py b/tests/backend/test_albums.py index 8d3c203d..a7b8f340 100644 --- a/tests/backend/test_albums.py +++ b/tests/backend/test_albums.py @@ -169,6 +169,31 @@ def test_album_routes(client): assert len(albums) == 0 +def test_default_encoder_endpoint(client, monkeypatch): + """/default_encoder/ reports the host-resolved default the frontend + pre-selects in the new-album dropdown.""" + from photomap.backend.encoders import ( + CPU_FALLBACK_ENCODER_SPEC, + DEFAULT_ENCODER_SPEC, + ) + + monkeypatch.setattr( + "photomap.backend.routers.album.default_encoder_spec", + lambda: CPU_FALLBACK_ENCODER_SPEC, + ) + assert client.get("/default_encoder/").json() == { + "encoder_spec": CPU_FALLBACK_ENCODER_SPEC + } + + monkeypatch.setattr( + "photomap.backend.routers.album.default_encoder_spec", + lambda: DEFAULT_ENCODER_SPEC, + ) + assert client.get("/default_encoder/").json() == { + "encoder_spec": DEFAULT_ENCODER_SPEC + } + + def test_encoder_spec_round_trips_through_available_albums(client, tmp_path): """Regression: /available_albums/ used to strip encoder_spec, which caused the album-manager edit form to always show the default encoder diff --git a/tests/backend/test_encoders.py b/tests/backend/test_encoders.py index 99a6e425..b5c2df5a 100644 --- a/tests/backend/test_encoders.py +++ b/tests/backend/test_encoders.py @@ -14,6 +14,7 @@ from photomap.backend import encoders as encoders_module from photomap.backend.encoders import ( + CPU_FALLBACK_ENCODER_SPEC, DEFAULT_ENCODER_SPEC, LEGACY_ENCODER_SPEC, EmbeddingCacheMismatch, @@ -23,6 +24,7 @@ SiglipEncoder, build_encoder, clear_encoder_cache, + default_encoder_spec, get_cached_encoder, ) @@ -32,6 +34,33 @@ def test_default_spec_for_new_albums(): assert DEFAULT_ENCODER_SPEC == "open-clip:ViT-L-14/dfn2b_s39b" +def test_cpu_fallback_spec_is_light_clip(): + """CPU-only Linux/Windows hosts default new albums to the light CLIP.""" + assert CPU_FALLBACK_ENCODER_SPEC == "openai-clip:ViT-B/32" + + +def test_default_encoder_spec_cuda_uses_high_quality(monkeypatch): + """Any host with CUDA gets the high-quality default regardless of OS.""" + monkeypatch.setattr(encoders_module.torch.cuda, "is_available", lambda: True) + monkeypatch.setattr(encoders_module.sys, "platform", "win32") + assert default_encoder_spec() == DEFAULT_ENCODER_SPEC + + +def test_default_encoder_spec_macos_uses_high_quality(monkeypatch): + """macOS stays on the high-quality default even without CUDA (untested path).""" + monkeypatch.setattr(encoders_module.torch.cuda, "is_available", lambda: False) + monkeypatch.setattr(encoders_module.sys, "platform", "darwin") + assert default_encoder_spec() == DEFAULT_ENCODER_SPEC + + +@pytest.mark.parametrize("platform", ["linux", "win32"]) +def test_default_encoder_spec_cpu_linux_windows_falls_back(monkeypatch, platform): + """CPU-only Linux/Windows hosts fall back to the lighter encoder.""" + monkeypatch.setattr(encoders_module.torch.cuda, "is_available", lambda: False) + monkeypatch.setattr(encoders_module.sys, "platform", platform) + assert default_encoder_spec() == CPU_FALLBACK_ENCODER_SPEC + + def test_legacy_spec_unchanged(): """LEGACY_ENCODER_SPEC is a compatibility marker for caches that predate the encoder swap layer. Don't change this — it's pinned to the original