Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions photomap/backend/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from platformdirs import user_config_dir
from pydantic import BaseModel, Field, field_validator, model_validator

from .encoders import DEFAULT_ENCODER_SPEC, LEGACY_ENCODER_SPEC
from .encoders import LEGACY_ENCODER_SPEC, default_encoder_spec
from .util import atomic_write_text

logger = logging.getLogger(__name__)
Expand All @@ -33,10 +33,12 @@ class Album(BaseModel):
umap_eps: float = Field(default=0.2, description="UMAP epsilon parameter")
description: str = Field(default="", description="Album description")
encoder_spec: str = Field(
default=DEFAULT_ENCODER_SPEC,
# Resolved per-host: OpenCLIP ViT-L-14 on CUDA/macOS, lighter OpenAI CLIP
# ViT-B/32 on CPU-only Linux/Windows. See encoders.default_encoder_spec.
default_factory=default_encoder_spec,
description=(
"Image/text encoder spec. Format: '<backend>:<model>'. "
"Examples: 'openai-clip:ViT-B/32' (default, legacy), "
"Examples: 'openai-clip:ViT-B/32' (legacy, CPU default), "
"'open-clip:ViT-L-14/dfn2b', 'siglip:google/siglip2-large-patch16-256'. "
"Changing this requires re-indexing the album."
),
Expand Down Expand Up @@ -124,8 +126,9 @@ def from_dict(cls, key: str, data: dict[str, Any]) -> "Album":
description=data.get("description", ""),
# Legacy YAML albums predate the encoder_spec field; their indexes
# were built with the original CLIP, so fall back to that to stay
# cache-compatible. New albums get DEFAULT_ENCODER_SPEC via the
# Album field default when the frontend creates them.
# cache-compatible. New albums get the host-resolved default
# (encoders.default_encoder_spec) via the Album field default when
# the frontend creates them.
encoder_spec=data.get("encoder_spec", LEGACY_ENCODER_SPEC),
min_search_score=data.get("min_search_score"),
max_search_results=data.get("max_search_results", 100),
Expand Down
24 changes: 24 additions & 0 deletions photomap/backend/encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import logging
import math
import sys
import threading
import time
from abc import ABC, abstractmethod
Expand All @@ -39,6 +40,29 @@
# a compatibility marker, not a tunable.
LEGACY_ENCODER_SPEC = "openai-clip:ViT-B/32"

# Default encoder for *new* albums on Linux/Windows hosts without CUDA. The
# OpenCLIP ViT-L-14 DEFAULT_ENCODER_SPEC is impractically slow to index/search
# on CPU there, so new albums fall back to the much lighter OpenAI CLIP
# ViT-B/32 (weaker recall, far faster). This happens to be the same spec string
# as LEGACY_ENCODER_SPEC, but it's a distinct constant on purpose: this one is
# a tunable CPU default, not the frozen legacy-cache compatibility marker.
CPU_FALLBACK_ENCODER_SPEC = "openai-clip:ViT-B/32"


def default_encoder_spec() -> str:
"""Resolve the default encoder spec for *new* albums based on the host.

Hosts with CUDA, and macOS (left on the high-quality default since the
lighter CPU path is untested there), get ``DEFAULT_ENCODER_SPEC``. Linux and
Windows hosts without CUDA fall back to ``CPU_FALLBACK_ENCODER_SPEC`` because
OpenCLIP ViT-L-14 is far too slow to run on CPU on those platforms.
"""
if torch.cuda.is_available():
return DEFAULT_ENCODER_SPEC
if sys.platform == "darwin":
return DEFAULT_ENCODER_SPEC
return CPU_FALLBACK_ENCODER_SPEC

# When True, SigLIP's encode_text wraps each query in every entry of
# SIGLIP_PROMPT_TEMPLATES, encodes them all, L2-normalizes each per-template
# embedding, mean-pools across templates, and re-normalizes. Intended to make
Expand Down
12 changes: 12 additions & 0 deletions photomap/backend/routers/album.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from ..config import Album, create_album, get_config_manager
from ..embeddings import Embeddings
from ..encoders import default_encoder_spec


class UmapEpsSetRequest(BaseModel):
Expand Down Expand Up @@ -201,6 +202,17 @@ async def get_available_albums() -> list[dict[str, Any]]:
return []


@album_router.get("/default_encoder/", tags=["Albums"])
async def get_default_encoder() -> dict[str, str]:
"""Return the encoder spec new albums should default to on this host.

The default is platform-aware — CPU-only Linux/Windows hosts get a lighter
encoder than CUDA/macOS hosts — so the frontend asks the server for it
rather than hardcoding a single default in the dropdown.
"""
return {"encoder_spec": default_encoder_spec()}


@album_router.get("/album/{album_key}/", tags=["Albums"])
async def get_album(album: AlbumDep) -> Album:
"""Get details of a specific album."""
Expand Down
28 changes: 22 additions & 6 deletions photomap/frontend/static/javascript/album-manager.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ import { fetchJson, hideSpinner, showSpinner } from "./utils.js";

// Encoder backends offered in the album manager dropdown. Values must match
// the spec format consumed by photomap.backend.encoders.build_encoder.
// The first entry is the default selection for new albums and must stay in
// sync with backend ``DEFAULT_ENCODER_SPEC`` (encoders.py).
// The pre-selected default for new albums is fetched from the server
// (getServerDefaultEncoderSpec) since it's platform-aware; ENCODER_OPTIONS[0]
// is only the client-side fallback when that request fails.
export const ENCODER_OPTIONS = [
{
value: "open-clip:ViT-L-14/dfn2b_s39b",
Expand All @@ -26,6 +27,21 @@ export const ENCODER_OPTIONS = [
];
const DEFAULT_ENCODER_SPEC = ENCODER_OPTIONS[0].value;

// The default encoder for *new* albums is resolved by the server: it's
// platform-aware (CPU-only Linux/Windows hosts get a lighter encoder than
// CUDA/macOS hosts), so we can't hardcode ENCODER_OPTIONS[0] here. Fetch it
// once, cache the promise, and fall back to the recommended option if the
// request fails.
let serverDefaultEncoderPromise = null;
function getServerDefaultEncoderSpec() {
if (!serverDefaultEncoderPromise) {
serverDefaultEncoderPromise = fetchJson("default_encoder/")
.then((data) => data?.encoder_spec || DEFAULT_ENCODER_SPEC)
.catch(() => DEFAULT_ENCODER_SPEC);
}
return serverDefaultEncoderPromise;
}

function populateEncoderSelect(selectEl, currentValue) {
if (!selectEl) {
return;
Expand Down Expand Up @@ -273,8 +289,8 @@ export class AlbumManager {
this.elements.newAlbumPathsContainer.innerHTML = "";
}

// Reset encoder dropdown to the default
populateEncoderSelect(this.elements.newAlbumEncoder, DEFAULT_ENCODER_SPEC);
// Reset encoder dropdown to the host-resolved default
getServerDefaultEncoderSpec().then((spec) => populateEncoderSelect(this.elements.newAlbumEncoder, spec));
}

// Form management
Expand All @@ -286,8 +302,8 @@ export class AlbumManager {
// Initialize path fields for the add album form
this.initializeNewAlbumPathFields();

// Initialize encoder dropdown
populateEncoderSelect(this.elements.newAlbumEncoder, DEFAULT_ENCODER_SPEC);
// Initialize encoder dropdown to the host-resolved default
getServerDefaultEncoderSpec().then((spec) => populateEncoderSelect(this.elements.newAlbumEncoder, spec));

// Focus on the first input field
this.elements.newAlbumKey.focus();
Expand Down
25 changes: 25 additions & 0 deletions tests/backend/test_albums.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,31 @@ def test_album_routes(client):
assert len(albums) == 0


def test_default_encoder_endpoint(client, monkeypatch):
"""/default_encoder/ reports the host-resolved default the frontend
pre-selects in the new-album dropdown."""
from photomap.backend.encoders import (
CPU_FALLBACK_ENCODER_SPEC,
DEFAULT_ENCODER_SPEC,
)

monkeypatch.setattr(
"photomap.backend.routers.album.default_encoder_spec",
lambda: CPU_FALLBACK_ENCODER_SPEC,
)
assert client.get("/default_encoder/").json() == {
"encoder_spec": CPU_FALLBACK_ENCODER_SPEC
}

monkeypatch.setattr(
"photomap.backend.routers.album.default_encoder_spec",
lambda: DEFAULT_ENCODER_SPEC,
)
assert client.get("/default_encoder/").json() == {
"encoder_spec": DEFAULT_ENCODER_SPEC
}


def test_encoder_spec_round_trips_through_available_albums(client, tmp_path):
"""Regression: /available_albums/ used to strip encoder_spec, which
caused the album-manager edit form to always show the default encoder
Expand Down
29 changes: 29 additions & 0 deletions tests/backend/test_encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from photomap.backend import encoders as encoders_module
from photomap.backend.encoders import (
CPU_FALLBACK_ENCODER_SPEC,
DEFAULT_ENCODER_SPEC,
LEGACY_ENCODER_SPEC,
EmbeddingCacheMismatch,
Expand All @@ -23,6 +24,7 @@
SiglipEncoder,
build_encoder,
clear_encoder_cache,
default_encoder_spec,
get_cached_encoder,
)

Expand All @@ -32,6 +34,33 @@ def test_default_spec_for_new_albums():
assert DEFAULT_ENCODER_SPEC == "open-clip:ViT-L-14/dfn2b_s39b"


def test_cpu_fallback_spec_is_light_clip():
"""CPU-only Linux/Windows hosts default new albums to the light CLIP."""
assert CPU_FALLBACK_ENCODER_SPEC == "openai-clip:ViT-B/32"


def test_default_encoder_spec_cuda_uses_high_quality(monkeypatch):
"""Any host with CUDA gets the high-quality default regardless of OS."""
monkeypatch.setattr(encoders_module.torch.cuda, "is_available", lambda: True)
monkeypatch.setattr(encoders_module.sys, "platform", "win32")
assert default_encoder_spec() == DEFAULT_ENCODER_SPEC


def test_default_encoder_spec_macos_uses_high_quality(monkeypatch):
"""macOS stays on the high-quality default even without CUDA (untested path)."""
monkeypatch.setattr(encoders_module.torch.cuda, "is_available", lambda: False)
monkeypatch.setattr(encoders_module.sys, "platform", "darwin")
assert default_encoder_spec() == DEFAULT_ENCODER_SPEC


@pytest.mark.parametrize("platform", ["linux", "win32"])
def test_default_encoder_spec_cpu_linux_windows_falls_back(monkeypatch, platform):
"""CPU-only Linux/Windows hosts fall back to the lighter encoder."""
monkeypatch.setattr(encoders_module.torch.cuda, "is_available", lambda: False)
monkeypatch.setattr(encoders_module.sys, "platform", platform)
assert default_encoder_spec() == CPU_FALLBACK_ENCODER_SPEC


def test_legacy_spec_unchanged():
"""LEGACY_ENCODER_SPEC is a compatibility marker for caches that predate
the encoder swap layer. Don't change this — it's pinned to the original
Expand Down
Loading