Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/rust-foundation-heavy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ jobs:
run: cargo fmt --manifest-path crates/voscript_core/Cargo.toml -- --check

- name: Run Rust clippy
run: cargo clippy --manifest-path crates/voscript_core/Cargo.toml --all-targets -- -D warnings
run: cargo clippy --manifest-path crates/voscript_core/Cargo.toml --features python-bindings --all-targets -- -D warnings

- name: Run Rust tests
run: cargo test --manifest-path crates/voscript_core/Cargo.toml
Expand Down Expand Up @@ -109,7 +109,7 @@ jobs:
docker run --rm \
-e RUST_KERNEL_MODE=required \
voscript-rust-foundation:${{ github.sha }} \
python -c "from providers.kernel_bridge import core_smoke; result = core_smoke({'source': 'ci'}); assert result['ok'] is True; assert result['echoed']['source'] == 'ci'"
python -c "from providers.kernel_bridge import core_smoke, voiceprint_score; result = core_smoke({'source': 'ci'}); assert result['ok'] is True; assert result['echoed']['source'] == 'ci'; decision = voiceprint_score({'query_embedding': [1.0, 0.0], 'candidates': [{'speaker_id': 'spk_ci', 'name': 'CI', 'embedding': [1.0, 0.0], 'sample_count': 1, 'sample_spread': None}], 'threshold': 0.75, 'asnorm_threshold': 0.5, 'cohort': None}); assert decision['matched_id'] == 'spk_ci'; assert decision['reason'] == 'matched'"

- name: Run health check smoke
run: |
Expand Down
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions app/providers/kernel_bridge/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
require_rust_core,
rust_kernel_mode,
rust_provider_paths_enabled,
voiceprint_score,
)

__all__ = [
Expand All @@ -18,4 +19,5 @@
"require_rust_core",
"rust_kernel_mode",
"rust_provider_paths_enabled",
"voiceprint_score",
]
124 changes: 124 additions & 0 deletions app/providers/kernel_bridge/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from collections.abc import Mapping
from importlib import import_module
from math import isfinite
from types import ModuleType
from typing import Any, Callable

Expand Down Expand Up @@ -68,6 +69,115 @@ def _validate_smoke_response(response: Any) -> dict[str, Any]:
return result


def _validate_voiceprint_score_response(response: Any) -> dict[str, Any]:
if not isinstance(response, Mapping):
raise RustKernelBridgeError(
"Rust voiceprint_score returned a non-mapping response"
)

result = dict(response)
required_keys = {
"matched_id",
"matched_name",
"similarity",
"reason",
"asnorm_active",
"asnorm_reason",
"candidates",
}
missing = sorted(required_keys.difference(result))
if missing:
raise RustKernelBridgeError(
f"Rust voiceprint_score response missing keys: {', '.join(missing)}"
)
if not isinstance(result["reason"], str) or not result["reason"]:
raise RustKernelBridgeError("Rust voiceprint_score reason must be non-empty")
Comment on lines +93 to +94
if not isinstance(result["asnorm_active"], bool):
raise RustKernelBridgeError("Rust voiceprint_score asnorm_active must be bool")
if not isinstance(result["asnorm_reason"], str) or not result["asnorm_reason"]:
raise RustKernelBridgeError(
"Rust voiceprint_score asnorm_reason must be non-empty"
)
if not isinstance(result["candidates"], list):
raise RustKernelBridgeError("Rust voiceprint_score candidates must be a list")
result["candidates"] = [
_validate_voiceprint_score_candidate_response(candidate)
for candidate in result["candidates"]
]
try:
result["similarity"] = float(result["similarity"])
except (TypeError, ValueError) as exc:
raise RustKernelBridgeError(
"Rust voiceprint_score similarity must be numeric"
) from exc
if not isfinite(result["similarity"]):
raise RustKernelBridgeError("Rust voiceprint_score similarity must be finite")
return result


def _validate_voiceprint_score_candidate_response(candidate: Any) -> dict[str, Any]:
if not isinstance(candidate, Mapping):
raise RustKernelBridgeError(
"Rust voiceprint_score candidate returned a non-mapping response"
)

result = dict(candidate)
required_keys = {
"speaker_id",
"name",
"raw_similarity",
"similarity",
"effective_threshold",
"score_method",
"sample_count",
"sample_spread",
}
missing = sorted(required_keys.difference(result))
if missing:
raise RustKernelBridgeError(
"Rust voiceprint_score candidate response missing keys: "
+ ", ".join(missing)
)
for key in ("speaker_id", "name", "score_method"):
if not isinstance(result[key], str) or not result[key]:
raise RustKernelBridgeError(
f"Rust voiceprint_score candidate {key} must be non-empty"
)
for key in ("raw_similarity", "similarity", "effective_threshold"):
try:
result[key] = float(result[key])
except (TypeError, ValueError) as exc:
raise RustKernelBridgeError(
f"Rust voiceprint_score candidate {key} must be numeric"
) from exc
if not isfinite(result[key]):
raise RustKernelBridgeError(
f"Rust voiceprint_score candidate {key} must be finite"
)
try:
result["sample_count"] = int(result["sample_count"])
except (TypeError, ValueError) as exc:
raise RustKernelBridgeError(
"Rust voiceprint_score candidate sample_count must be integer-like"
) from exc
if result["sample_count"] < 0:
raise RustKernelBridgeError(
"Rust voiceprint_score candidate sample_count must be non-negative"
)
if result["sample_spread"] is not None:
try:
result["sample_spread"] = float(result["sample_spread"])
except (TypeError, ValueError) as exc:
raise RustKernelBridgeError(
"Rust voiceprint_score candidate sample_spread must be numeric"
) from exc
if not isfinite(result["sample_spread"]):
raise RustKernelBridgeError(
"Rust voiceprint_score candidate sample_spread must be finite"
)
return result


def core_smoke(
payload: Any,
importer: Callable[[str], ModuleType] = import_module,
Expand All @@ -80,3 +190,17 @@ def core_smoke(
except Exception as exc:
raise RustKernelBridgeError("Rust core_smoke call failed") from exc
return _validate_smoke_response(response)


def voiceprint_score(
payload: dict[str, Any],
importer: Callable[[str], ModuleType] = import_module,
) -> dict[str, Any]:
"""Call the native voiceprint scoring kernel and fail closed on errors."""

rust_core = require_rust_core(importer=importer)
try:
response = rust_core.voiceprint_score(payload)
except Exception as exc:
raise RustKernelBridgeError("Rust voiceprint_score call failed") from exc
return _validate_voiceprint_score_response(response)
78 changes: 77 additions & 1 deletion app/voiceprints/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,15 @@
import numpy as np

from config import EMBEDDING_DIM
from providers.kernel_bridge import (
RustKernelBridgeError,
rust_provider_paths_enabled,
voiceprint_score as rust_voiceprint_score,
)
from .cohort import VoiceprintCohortManager
from .repository import VoiceprintRepository
from .scoring import (
ASNORM_MIN_COHORT_SIZE,
ASNormScorer,
asnorm_margin_passes,
effective_asnorm_threshold,
Expand Down Expand Up @@ -123,9 +129,19 @@ def identify(
if float(np.linalg.norm(query)) < 1e-6:
return None, None, 0.0

candidates = self._repository.fetch_identify_candidates(query, limit=2)
candidates = self._repository.fetch_identify_candidates(
query, limit=self._identify_candidate_limit()
)
if not candidates:
return None, None, 0.0

if rust_provider_paths_enabled():
return self._identify_with_rust_kernel(
query=query,
candidates=candidates,
threshold=threshold,
)

candidate = candidates[0]

best_sim = candidate.similarity
Expand Down Expand Up @@ -214,6 +230,66 @@ def _effective_asnorm_threshold(
) -> float:
return effective_asnorm_threshold(base, sample_count, sample_spread)

def _identify_candidate_limit(self) -> int:
if self._asnorm is None or self._asnorm.cohort_size < ASNORM_MIN_COHORT_SIZE:
return 2
return max(1, self._repository.count_identify_candidates())

def _identify_with_rust_kernel(
self,
*,
query: np.ndarray,
candidates,
threshold: float,
) -> tuple[str | None, str | None, float]:
payload = {
"query_embedding": query.astype(np.float32).flatten().tolist(),
"candidates": [
self._candidate_to_rust_payload(candidate) for candidate in candidates
],
"threshold": float(threshold),
"asnorm_threshold": float(self._asnorm_threshold),
"cohort": self._asnorm_cohort_payload(),
"asnorm_top_n": int(getattr(self._asnorm, "_top_n", 200)),
}
decision = rust_voiceprint_score(payload)
try:
similarity = float(decision["similarity"])
except (KeyError, TypeError, ValueError) as exc:
raise RustKernelBridgeError(
"Rust voiceprint_score response has invalid similarity"
) from exc
return decision.get("matched_id"), decision.get("matched_name"), similarity

@staticmethod
def _candidate_to_rust_payload(candidate) -> dict:
if candidate.enroll_emb is None:
raise RustKernelBridgeError(
"Rust voiceprint scoring requires enrollment embeddings"
)
return {
"speaker_id": candidate.speaker_id,
"name": candidate.name,
"sample_count": int(candidate.sample_count),
"sample_spread": candidate.sample_spread,
"embedding": candidate.enroll_emb.astype(np.float32).flatten().tolist(),
}

def _asnorm_cohort_payload(self) -> list[list[float]] | None:
cohort = (
None if self._asnorm is None else getattr(self._asnorm, "_cohort", None)
)
if cohort is None:
if (
self._asnorm is not None
and self._asnorm.cohort_size >= ASNORM_MIN_COHORT_SIZE
):
raise RustKernelBridgeError(
"Rust voiceprint scoring requires an exportable AS-norm cohort"
)
return None
return np.asarray(cohort, dtype=np.float32).tolist()

def list_speakers(self) -> list[dict]:
return self._repository.list_speakers()

Expand Down
Loading
Loading