From 74bcc2682051a5b2b138a59533ed3e004cf8c4f5 Mon Sep 17 00:00:00 2001 From: Maple Gao Date: Tue, 9 Jun 2026 22:29:54 +0800 Subject: [PATCH 1/3] feat: add Rust voiceprint scoring kernel --- .github/workflows/rust-foundation-heavy.yml | 4 +- Cargo.lock | 2 +- app/providers/kernel_bridge/__init__.py | 2 + app/providers/kernel_bridge/runtime.py | 124 ++++++ app/voiceprints/db.py | 78 +++- app/voiceprints/scoring.py | 213 +++++++++++ crates/voscript_core/Cargo.toml | 2 +- crates/voscript_core/src/lib.rs | 113 +++++- crates/voscript_core/src/voiceprint.rs | 361 ++++++++++++++++++ .../voscript_core/tests/voiceprint_scoring.rs | 118 ++++++ doc/changelog.en.md | 6 + doc/changelog.zh.md | 4 + doc/configuration.en.md | 2 +- doc/configuration.zh.md | 2 +- tests/test_voiceprint_db.py | 93 ++++- tests/unit/test_kernel_bridge.py | 4 +- tests/unit/test_voiceprint_scoring_kernel.py | 192 ++++++++++ 17 files changed, 1300 insertions(+), 20 deletions(-) create mode 100644 crates/voscript_core/src/voiceprint.rs create mode 100644 crates/voscript_core/tests/voiceprint_scoring.rs create mode 100644 tests/unit/test_voiceprint_scoring_kernel.py diff --git a/.github/workflows/rust-foundation-heavy.yml b/.github/workflows/rust-foundation-heavy.yml index 1c7c8bd..da6505a 100644 --- a/.github/workflows/rust-foundation-heavy.yml +++ b/.github/workflows/rust-foundation-heavy.yml @@ -48,7 +48,7 @@ jobs: run: cargo fmt --manifest-path crates/voscript_core/Cargo.toml -- --check - name: Run Rust clippy - run: cargo clippy --manifest-path crates/voscript_core/Cargo.toml --all-targets -- -D warnings + run: cargo clippy --manifest-path crates/voscript_core/Cargo.toml --features python-bindings --all-targets -- -D warnings - name: Run Rust tests run: cargo test --manifest-path crates/voscript_core/Cargo.toml @@ -109,7 +109,7 @@ jobs: docker run --rm \ -e RUST_KERNEL_MODE=required \ voscript-rust-foundation:${{ github.sha }} \ - python -c "from providers.kernel_bridge import core_smoke; result = core_smoke({'source': 'ci'}); assert result['ok'] is True; assert result['echoed']['source'] == 'ci'" + python -c "from providers.kernel_bridge import core_smoke, voiceprint_score; result = core_smoke({'source': 'ci'}); assert result['ok'] is True; assert result['echoed']['source'] == 'ci'; decision = voiceprint_score({'query_embedding': [1.0, 0.0], 'candidates': [{'speaker_id': 'spk_ci', 'name': 'CI', 'embedding': [1.0, 0.0], 'sample_count': 1, 'sample_spread': None}], 'threshold': 0.75, 'asnorm_threshold': 0.5, 'cohort': None}); assert decision['matched_id'] == 'spk_ci'; assert decision['reason'] == 'matched'" - name: Run health check smoke run: | diff --git a/Cargo.lock b/Cargo.lock index 49089e2..426af82 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -127,7 +127,7 @@ checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "voscript_core" -version = "0.8.0" +version = "0.8.1" dependencies = [ "pyo3", ] diff --git a/app/providers/kernel_bridge/__init__.py b/app/providers/kernel_bridge/__init__.py index 299c00e..6b6ec68 100644 --- a/app/providers/kernel_bridge/__init__.py +++ b/app/providers/kernel_bridge/__init__.py @@ -8,6 +8,7 @@ require_rust_core, rust_kernel_mode, rust_provider_paths_enabled, + voiceprint_score, ) __all__ = [ @@ -18,4 +19,5 @@ "require_rust_core", "rust_kernel_mode", "rust_provider_paths_enabled", + "voiceprint_score", ] diff --git a/app/providers/kernel_bridge/runtime.py b/app/providers/kernel_bridge/runtime.py index a7acfe4..3052088 100644 --- a/app/providers/kernel_bridge/runtime.py +++ b/app/providers/kernel_bridge/runtime.py @@ -4,6 +4,7 @@ from collections.abc import Mapping from importlib import import_module +from math import isfinite from types import ModuleType from typing import Any, Callable @@ -68,6 +69,115 @@ def _validate_smoke_response(response: Any) -> dict[str, Any]: return result +def _validate_voiceprint_score_response(response: Any) -> dict[str, Any]: + if not isinstance(response, Mapping): + raise RustKernelBridgeError( + "Rust voiceprint_score returned a non-mapping response" + ) + + result = dict(response) + required_keys = { + "matched_id", + "matched_name", + "similarity", + "reason", + "asnorm_active", + "asnorm_reason", + "candidates", + } + missing = sorted(required_keys.difference(result)) + if missing: + raise RustKernelBridgeError( + f"Rust voiceprint_score response missing keys: {', '.join(missing)}" + ) + if not isinstance(result["reason"], str) or not result["reason"]: + raise RustKernelBridgeError("Rust voiceprint_score reason must be non-empty") + if not isinstance(result["asnorm_active"], bool): + raise RustKernelBridgeError("Rust voiceprint_score asnorm_active must be bool") + if not isinstance(result["asnorm_reason"], str) or not result["asnorm_reason"]: + raise RustKernelBridgeError( + "Rust voiceprint_score asnorm_reason must be non-empty" + ) + if not isinstance(result["candidates"], list): + raise RustKernelBridgeError("Rust voiceprint_score candidates must be a list") + result["candidates"] = [ + _validate_voiceprint_score_candidate_response(candidate) + for candidate in result["candidates"] + ] + try: + result["similarity"] = float(result["similarity"]) + except (TypeError, ValueError) as exc: + raise RustKernelBridgeError( + "Rust voiceprint_score similarity must be numeric" + ) from exc + if not isfinite(result["similarity"]): + raise RustKernelBridgeError("Rust voiceprint_score similarity must be finite") + return result + + +def _validate_voiceprint_score_candidate_response(candidate: Any) -> dict[str, Any]: + if not isinstance(candidate, Mapping): + raise RustKernelBridgeError( + "Rust voiceprint_score candidate returned a non-mapping response" + ) + + result = dict(candidate) + required_keys = { + "speaker_id", + "name", + "raw_similarity", + "similarity", + "effective_threshold", + "score_method", + "sample_count", + "sample_spread", + } + missing = sorted(required_keys.difference(result)) + if missing: + raise RustKernelBridgeError( + "Rust voiceprint_score candidate response missing keys: " + + ", ".join(missing) + ) + for key in ("speaker_id", "name", "score_method"): + if not isinstance(result[key], str) or not result[key]: + raise RustKernelBridgeError( + f"Rust voiceprint_score candidate {key} must be non-empty" + ) + for key in ("raw_similarity", "similarity", "effective_threshold"): + try: + result[key] = float(result[key]) + except (TypeError, ValueError) as exc: + raise RustKernelBridgeError( + f"Rust voiceprint_score candidate {key} must be numeric" + ) from exc + if not isfinite(result[key]): + raise RustKernelBridgeError( + f"Rust voiceprint_score candidate {key} must be finite" + ) + try: + result["sample_count"] = int(result["sample_count"]) + except (TypeError, ValueError) as exc: + raise RustKernelBridgeError( + "Rust voiceprint_score candidate sample_count must be integer-like" + ) from exc + if result["sample_count"] < 0: + raise RustKernelBridgeError( + "Rust voiceprint_score candidate sample_count must be non-negative" + ) + if result["sample_spread"] is not None: + try: + result["sample_spread"] = float(result["sample_spread"]) + except (TypeError, ValueError) as exc: + raise RustKernelBridgeError( + "Rust voiceprint_score candidate sample_spread must be numeric" + ) from exc + if not isfinite(result["sample_spread"]): + raise RustKernelBridgeError( + "Rust voiceprint_score candidate sample_spread must be finite" + ) + return result + + def core_smoke( payload: Any, importer: Callable[[str], ModuleType] = import_module, @@ -80,3 +190,17 @@ def core_smoke( except Exception as exc: raise RustKernelBridgeError("Rust core_smoke call failed") from exc return _validate_smoke_response(response) + + +def voiceprint_score( + payload: dict[str, Any], + importer: Callable[[str], ModuleType] = import_module, +) -> dict[str, Any]: + """Call the native voiceprint scoring kernel and fail closed on errors.""" + + rust_core = require_rust_core(importer=importer) + try: + response = rust_core.voiceprint_score(payload) + except Exception as exc: + raise RustKernelBridgeError("Rust voiceprint_score call failed") from exc + return _validate_voiceprint_score_response(response) diff --git a/app/voiceprints/db.py b/app/voiceprints/db.py index 0109324..724a05b 100755 --- a/app/voiceprints/db.py +++ b/app/voiceprints/db.py @@ -9,9 +9,15 @@ import numpy as np from config import EMBEDDING_DIM +from providers.kernel_bridge import ( + RustKernelBridgeError, + rust_provider_paths_enabled, + voiceprint_score as rust_voiceprint_score, +) from .cohort import VoiceprintCohortManager from .repository import VoiceprintRepository from .scoring import ( + ASNORM_MIN_COHORT_SIZE, ASNormScorer, asnorm_margin_passes, effective_asnorm_threshold, @@ -123,9 +129,19 @@ def identify( if float(np.linalg.norm(query)) < 1e-6: return None, None, 0.0 - candidates = self._repository.fetch_identify_candidates(query, limit=2) + candidates = self._repository.fetch_identify_candidates( + query, limit=self._identify_candidate_limit() + ) if not candidates: return None, None, 0.0 + + if rust_provider_paths_enabled(): + return self._identify_with_rust_kernel( + query=query, + candidates=candidates, + threshold=threshold, + ) + candidate = candidates[0] best_sim = candidate.similarity @@ -214,6 +230,66 @@ def _effective_asnorm_threshold( ) -> float: return effective_asnorm_threshold(base, sample_count, sample_spread) + def _identify_candidate_limit(self) -> int: + if self._asnorm is None or self._asnorm.cohort_size < ASNORM_MIN_COHORT_SIZE: + return 2 + return max(1, self._repository.count_identify_candidates()) + + def _identify_with_rust_kernel( + self, + *, + query: np.ndarray, + candidates, + threshold: float, + ) -> tuple[str | None, str | None, float]: + payload = { + "query_embedding": query.astype(np.float32).flatten().tolist(), + "candidates": [ + self._candidate_to_rust_payload(candidate) for candidate in candidates + ], + "threshold": float(threshold), + "asnorm_threshold": float(self._asnorm_threshold), + "cohort": self._asnorm_cohort_payload(), + "asnorm_top_n": int(getattr(self._asnorm, "_top_n", 200)), + } + decision = rust_voiceprint_score(payload) + try: + similarity = float(decision["similarity"]) + except (KeyError, TypeError, ValueError) as exc: + raise RustKernelBridgeError( + "Rust voiceprint_score response has invalid similarity" + ) from exc + return decision.get("matched_id"), decision.get("matched_name"), similarity + + @staticmethod + def _candidate_to_rust_payload(candidate) -> dict: + if candidate.enroll_emb is None: + raise RustKernelBridgeError( + "Rust voiceprint scoring requires enrollment embeddings" + ) + return { + "speaker_id": candidate.speaker_id, + "name": candidate.name, + "sample_count": int(candidate.sample_count), + "sample_spread": candidate.sample_spread, + "embedding": candidate.enroll_emb.astype(np.float32).flatten().tolist(), + } + + def _asnorm_cohort_payload(self) -> list[list[float]] | None: + cohort = ( + None if self._asnorm is None else getattr(self._asnorm, "_cohort", None) + ) + if cohort is None: + if ( + self._asnorm is not None + and self._asnorm.cohort_size >= ASNORM_MIN_COHORT_SIZE + ): + raise RustKernelBridgeError( + "Rust voiceprint scoring requires an exportable AS-norm cohort" + ) + return None + return np.asarray(cohort, dtype=np.float32).tolist() + def list_speakers(self) -> list[dict]: return self._repository.list_speakers() diff --git a/app/voiceprints/scoring.py b/app/voiceprints/scoring.py index 66d293d..b179c9a 100644 --- a/app/voiceprints/scoring.py +++ b/app/voiceprints/scoring.py @@ -71,6 +71,38 @@ class ScoreResult: asnorm_active: bool +@dataclass(frozen=True) +class VoiceprintScoreCandidate: + speaker_id: str + name: str + embedding: np.ndarray + sample_count: int + sample_spread: float | None + + +@dataclass(frozen=True) +class VoiceprintScoredCandidate: + speaker_id: str + name: str + raw_similarity: float + similarity: float + effective_threshold: float + score_method: str + sample_count: int + sample_spread: float | None + + +@dataclass(frozen=True) +class VoiceprintScoreDecision: + matched_id: str | None + matched_name: str | None + similarity: float + reason: str + asnorm_active: bool + asnorm_reason: str + candidates: tuple[VoiceprintScoredCandidate, ...] + + def resolve_score( *, raw_similarity: float, @@ -142,3 +174,184 @@ def asnorm_margin_passes( if second_score is None: return True return (best_score - second_score) >= min_margin + + +def score_voiceprint_candidates( + *, + query_embedding: np.ndarray, + candidates: list[VoiceprintScoreCandidate], + threshold: float = 0.75, + asnorm_threshold: float = 0.5, + cohort: np.ndarray | None = None, + asnorm_top_n: int = 200, + asnorm_min_margin: float = _ASNORM_MIN_TOP2_MARGIN, +) -> VoiceprintScoreDecision: + """Score voiceprint candidates with the Python oracle contract. + + This is the golden oracle for the Rust voiceprint kernel. It owns the + behavior contract; Rust must match it when selected. + """ + + query = _validated_embedding("query_embedding", query_embedding) + if not np.isfinite([threshold, asnorm_threshold, asnorm_min_margin]).all(): + raise ValueError("voiceprint thresholds must be finite") + if not candidates: + return _voiceprint_no_match( + reason="no_candidates", + asnorm_reason="not_requested", + asnorm_active=False, + candidates=(), + similarity=0.0, + ) + + query_norm = float(np.linalg.norm(query)) + if query_norm < 1e-12: + return _voiceprint_no_match( + reason="invalid_query", + asnorm_reason="not_requested", + asnorm_active=False, + candidates=(), + similarity=0.0, + ) + + asnorm_active = False + asnorm_reason = "not_requested" + scorer: ASNormScorer | None = None + if cohort is not None: + cohort_array = _validated_cohort(cohort, dim=len(query)) + if len(cohort_array) < ASNORM_MIN_COHORT_SIZE: + asnorm_reason = "cohort_too_small" + else: + scorer = ASNormScorer(cohort_array, top_n=asnorm_top_n) + asnorm_active = True + asnorm_reason = "active" + + scored_candidates: list[VoiceprintScoredCandidate] = [] + query_normed = query / query_norm + for candidate in candidates: + enroll = _validated_embedding("candidate embedding", candidate.embedding) + if len(enroll) != len(query): + raise ValueError("voiceprint embeddings must share dimension") + if candidate.sample_spread is not None and not np.isfinite( + candidate.sample_spread + ): + raise ValueError("voiceprint sample_spread values must be finite") + + enroll_norm = float(np.linalg.norm(enroll)) + raw_similarity = ( + 0.0 if enroll_norm < 1e-12 else float((enroll / enroll_norm) @ query_normed) + ) + if scorer is not None: + similarity = scorer.score(enroll, query) + effective = effective_asnorm_threshold( + base=asnorm_threshold, + sample_count=candidate.sample_count, + sample_spread=candidate.sample_spread, + ) + score_method = "asnorm" + else: + similarity = raw_similarity + effective = effective_threshold( + base=threshold, + sample_count=candidate.sample_count, + sample_spread=candidate.sample_spread, + ) + score_method = "raw_cosine" + + scored_candidates.append( + VoiceprintScoredCandidate( + speaker_id=candidate.speaker_id, + name=candidate.name, + raw_similarity=raw_similarity, + similarity=similarity, + effective_threshold=effective, + score_method=score_method, + sample_count=candidate.sample_count, + sample_spread=candidate.sample_spread, + ) + ) + + scored_candidates.sort(key=lambda candidate: candidate.similarity, reverse=True) + scored = tuple(scored_candidates) + if not scored: + return _voiceprint_no_match( + reason="no_candidates", + asnorm_reason=asnorm_reason, + asnorm_active=asnorm_active, + candidates=scored, + similarity=0.0, + ) + + best = scored[0] + if asnorm_active: + second = scored[1].similarity if len(scored) > 1 else None + if not asnorm_margin_passes( + best_score=best.similarity, + second_score=second, + min_margin=asnorm_min_margin, + ): + return _voiceprint_no_match( + reason="ambiguous_margin", + asnorm_reason=asnorm_reason, + asnorm_active=True, + candidates=scored, + similarity=best.similarity, + ) + + if best.similarity >= best.effective_threshold: + return VoiceprintScoreDecision( + matched_id=best.speaker_id, + matched_name=best.name, + similarity=best.similarity, + reason="matched", + asnorm_active=asnorm_active, + asnorm_reason=asnorm_reason, + candidates=scored, + ) + + return _voiceprint_no_match( + reason="below_threshold", + asnorm_reason=asnorm_reason, + asnorm_active=asnorm_active, + candidates=scored, + similarity=best.similarity, + ) + + +def _voiceprint_no_match( + *, + reason: str, + asnorm_reason: str, + asnorm_active: bool, + candidates: tuple[VoiceprintScoredCandidate, ...], + similarity: float, +) -> VoiceprintScoreDecision: + return VoiceprintScoreDecision( + matched_id=None, + matched_name=None, + similarity=similarity, + reason=reason, + asnorm_active=asnorm_active, + asnorm_reason=asnorm_reason, + candidates=candidates, + ) + + +def _validated_embedding(name: str, embedding: np.ndarray) -> np.ndarray: + array = np.asarray(embedding, dtype=np.float32).flatten() + if len(array) == 0: + raise ValueError(f"{name} must not be empty") + if not np.isfinite(array).all(): + raise ValueError(f"{name} values must be finite") + return array + + +def _validated_cohort(cohort: np.ndarray, dim: int) -> np.ndarray: + array = np.asarray(cohort, dtype=np.float32) + if array.ndim != 2: + raise ValueError("voiceprint cohort must be a 2-D array") + if array.shape[1] != dim: + raise ValueError("voiceprint cohort embeddings must share dimension") + if not np.isfinite(array).all(): + raise ValueError("voiceprint cohort values must be finite") + return array diff --git a/crates/voscript_core/Cargo.toml b/crates/voscript_core/Cargo.toml index 7a279f0..044cc9d 100644 --- a/crates/voscript_core/Cargo.toml +++ b/crates/voscript_core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "voscript_core" -version = "0.8.0" +version = "0.8.1" edition = "2021" license = "Apache-2.0" publish = false diff --git a/crates/voscript_core/src/lib.rs b/crates/voscript_core/src/lib.rs index 1c3d650..4eb5151 100644 --- a/crates/voscript_core/src/lib.rs +++ b/crates/voscript_core/src/lib.rs @@ -1,7 +1,11 @@ #[cfg(feature = "python-bindings")] +use pyo3::exceptions::{PyKeyError, PyValueError}; +#[cfg(feature = "python-bindings")] use pyo3::prelude::*; #[cfg(feature = "python-bindings")] -use pyo3::types::{PyDict, PyModule}; +use pyo3::types::{PyDict, PyList, PyModule}; + +pub mod voiceprint; pub const CORE_SMOKE_CAPABILITY: &str = "core_smoke"; pub const RUST_EXTENSION_CAPABILITY: &str = "rust_extension"; @@ -25,11 +29,116 @@ fn core_smoke(py: Python<'_>, payload: Py) -> PyResult> { Ok(response.unbind()) } +#[cfg(feature = "python-bindings")] +fn required_item<'py>(dict: &Bound<'py, PyDict>, key: &str) -> PyResult> { + dict.get_item(key)? + .ok_or_else(|| PyKeyError::new_err(format!("missing required key: {key}"))) +} + +#[cfg(feature = "python-bindings")] +fn optional_f64(dict: &Bound<'_, PyDict>, key: &str, default: f64) -> PyResult { + match dict.get_item(key)? { + Some(value) if !value.is_none() => value.extract::(), + _ => Ok(default), + } +} + +#[cfg(feature = "python-bindings")] +fn optional_usize(dict: &Bound<'_, PyDict>, key: &str, default: usize) -> PyResult { + match dict.get_item(key)? { + Some(value) if !value.is_none() => value.extract::(), + _ => Ok(default), + } +} + +#[cfg(feature = "python-bindings")] +fn parse_voiceprint_candidate( + item: Bound<'_, PyAny>, +) -> PyResult { + let dict = item.cast_into::()?; + let speaker_id = required_item(&dict, "speaker_id")?.extract::()?; + let name = required_item(&dict, "name")?.extract::()?; + let embedding = required_item(&dict, "embedding")?.extract::>()?; + let sample_count = required_item(&dict, "sample_count")?.extract::()?; + let sample_spread = match dict.get_item("sample_spread")? { + Some(value) if !value.is_none() => Some(value.extract::()?), + _ => None, + }; + Ok(voiceprint::VoiceprintScoreCandidate { + speaker_id, + name, + embedding, + sample_count, + sample_spread, + }) +} + +#[cfg(feature = "python-bindings")] +fn parse_voiceprint_request( + payload: &Bound<'_, PyDict>, +) -> PyResult { + let query_embedding = required_item(payload, "query_embedding")?.extract::>()?; + let candidates_any = required_item(payload, "candidates")?; + let candidates_list = candidates_any.cast_into::()?; + let mut candidates = Vec::with_capacity(candidates_list.len()); + for item in candidates_list.iter() { + candidates.push(parse_voiceprint_candidate(item)?); + } + + let cohort = match payload.get_item("cohort")? { + Some(value) if !value.is_none() => Some(value.extract::>>()?), + _ => None, + }; + + Ok(voiceprint::VoiceprintScoreRequest { + query_embedding, + candidates, + threshold: optional_f64(payload, "threshold", 0.75)?, + asnorm_threshold: optional_f64(payload, "asnorm_threshold", 0.5)?, + cohort, + asnorm_top_n: optional_usize(payload, "asnorm_top_n", 200)?, + asnorm_min_margin: optional_f64(payload, "asnorm_min_margin", 0.05)?, + }) +} + +#[cfg(feature = "python-bindings")] +#[pyfunction] +fn voiceprint_score(py: Python<'_>, payload: &Bound<'_, PyDict>) -> PyResult> { + let request = parse_voiceprint_request(payload)?; + let decision = + voiceprint::score_voiceprint_candidates(request).map_err(PyValueError::new_err)?; + + let response = PyDict::new(py); + response.set_item("matched_id", decision.matched_id)?; + response.set_item("matched_name", decision.matched_name)?; + response.set_item("similarity", decision.similarity)?; + response.set_item("reason", decision.reason)?; + response.set_item("asnorm_active", decision.asnorm_active)?; + response.set_item("asnorm_reason", decision.asnorm_reason)?; + + let candidates = PyList::empty(py); + for candidate in decision.candidates { + let item = PyDict::new(py); + item.set_item("speaker_id", candidate.speaker_id)?; + item.set_item("name", candidate.name)?; + item.set_item("raw_similarity", candidate.raw_similarity)?; + item.set_item("similarity", candidate.similarity)?; + item.set_item("effective_threshold", candidate.effective_threshold)?; + item.set_item("score_method", candidate.score_method)?; + item.set_item("sample_count", candidate.sample_count)?; + item.set_item("sample_spread", candidate.sample_spread)?; + candidates.append(item)?; + } + response.set_item("candidates", candidates)?; + Ok(response.unbind()) +} + #[cfg(feature = "python-bindings")] #[pymodule] fn voscript_core(module: &Bound<'_, PyModule>) -> PyResult<()> { module.add("__version__", PACKAGE_VERSION)?; module.add_function(wrap_pyfunction!(core_smoke, module)?)?; + module.add_function(wrap_pyfunction!(voiceprint_score, module)?)?; Ok(()) } @@ -37,7 +146,7 @@ fn voscript_core(module: &Bound<'_, PyModule>) -> PyResult<()> { mod tests { #[test] fn package_version_is_set() { - assert_eq!(super::PACKAGE_VERSION, "0.8.0"); + assert_eq!(super::PACKAGE_VERSION, "0.8.1"); } #[test] diff --git a/crates/voscript_core/src/voiceprint.rs b/crates/voscript_core/src/voiceprint.rs new file mode 100644 index 0000000..e3a6f48 --- /dev/null +++ b/crates/voscript_core/src/voiceprint.rs @@ -0,0 +1,361 @@ +const SINGLE_SAMPLE_RELAXATION: f64 = 0.05; +const SPREAD_RELAXATION_K: f64 = 3.0; +const SPREAD_RELAXATION_CAP: f64 = 0.10; +const ABSOLUTE_FLOOR: f64 = 0.60; +const ASNORM_MIN_COHORT_SIZE: usize = 10; +const ASNORM_SINGLE_SAMPLE_PENALTY: f64 = 0.10; +const ASNORM_LEGACY_SPREAD_UNKNOWN_PENALTY: f64 = 0.05; +const ASNORM_LOW_SAMPLE_PENALTY: f64 = 0.025; +const ASNORM_SPREAD_PENALTY_K: f64 = 0.50; +const ASNORM_SPREAD_PENALTY_CAP: f64 = 0.10; +const ASNORM_STABLE_RELAXATION: f64 = 0.02; + +#[derive(Debug, Clone)] +pub struct VoiceprintScoreCandidate { + pub speaker_id: String, + pub name: String, + pub embedding: Vec, + pub sample_count: usize, + pub sample_spread: Option, +} + +#[derive(Debug, Clone)] +pub struct VoiceprintScoreRequest { + pub query_embedding: Vec, + pub candidates: Vec, + pub threshold: f64, + pub asnorm_threshold: f64, + pub cohort: Option>>, + pub asnorm_top_n: usize, + pub asnorm_min_margin: f64, +} + +#[derive(Debug, Clone)] +pub struct VoiceprintScoredCandidate { + pub speaker_id: String, + pub name: String, + pub raw_similarity: f64, + pub similarity: f64, + pub effective_threshold: f64, + pub score_method: String, + pub sample_count: usize, + pub sample_spread: Option, +} + +#[derive(Debug, Clone)] +pub struct VoiceprintScoreDecision { + pub matched_id: Option, + pub matched_name: Option, + pub similarity: f64, + pub reason: String, + pub asnorm_active: bool, + pub asnorm_reason: String, + pub candidates: Vec, +} + +pub fn effective_threshold(base: f64, sample_count: usize, sample_spread: Option) -> f64 { + let dynamic = if sample_count <= 1 || sample_spread.is_none() { + if sample_count <= 1 { + base - SINGLE_SAMPLE_RELAXATION + } else { + base + } + } else { + let spread = sample_spread.unwrap_or(0.0).max(0.0); + base - (SPREAD_RELAXATION_K * spread).min(SPREAD_RELAXATION_CAP) + }; + ABSOLUTE_FLOOR.max(base.min(dynamic)) +} + +pub fn effective_asnorm_threshold( + base: f64, + sample_count: usize, + sample_spread: Option, +) -> f64 { + if sample_count <= 1 { + return base + ASNORM_SINGLE_SAMPLE_PENALTY; + } + let Some(spread) = sample_spread else { + return base + ASNORM_LEGACY_SPREAD_UNKNOWN_PENALTY; + }; + + let low_sample_penalty = + (3usize.saturating_sub(sample_count)) as f64 * ASNORM_LOW_SAMPLE_PENALTY; + let spread_penalty = (spread.max(0.0) * ASNORM_SPREAD_PENALTY_K).min(ASNORM_SPREAD_PENALTY_CAP); + let mut threshold = base + low_sample_penalty + spread_penalty; + if sample_count >= 3 && spread <= 0.03 { + threshold -= ASNORM_STABLE_RELAXATION; + } + threshold.max(0.0) +} + +pub fn score_voiceprint_candidates( + request: VoiceprintScoreRequest, +) -> Result { + validate_embedding("query_embedding", &request.query_embedding)?; + if !request.threshold.is_finite() + || !request.asnorm_threshold.is_finite() + || !request.asnorm_min_margin.is_finite() + { + return Err("voiceprint thresholds must be finite".to_string()); + } + if request.candidates.is_empty() { + return Ok(no_match( + "no_candidates", + "not_requested", + false, + Vec::new(), + 0.0, + )); + } + + let query = match normalize(&request.query_embedding)? { + Some(value) => value, + None => { + return Ok(no_match( + "invalid_query", + "not_requested", + false, + Vec::new(), + 0.0, + )) + } + }; + + for candidate in &request.candidates { + validate_embedding("candidate embedding", &candidate.embedding)?; + if candidate.embedding.len() != request.query_embedding.len() { + return Err("voiceprint embeddings must share dimension".to_string()); + } + if !candidate.sample_spread.unwrap_or(0.0).is_finite() { + return Err("voiceprint sample_spread values must be finite".to_string()); + } + } + + let (asnorm_active, asnorm_reason, normalized_cohort) = match request.cohort { + None => (false, "not_requested".to_string(), None), + Some(cohort) if cohort.len() < ASNORM_MIN_COHORT_SIZE => { + validate_cohort(&cohort, request.query_embedding.len())?; + (false, "cohort_too_small".to_string(), None) + } + Some(cohort) => { + let normalized = normalize_cohort(&cohort, request.query_embedding.len())?; + (true, "active".to_string(), Some(normalized)) + } + }; + + let mut scored = Vec::with_capacity(request.candidates.len()); + for candidate in request.candidates { + let raw_similarity = cosine_from_normalized(&candidate.embedding, &query)?; + let (similarity, effective, score_method) = if let Some(cohort) = &normalized_cohort { + let normalized_score = asnorm_score( + &candidate.embedding, + &request.query_embedding, + raw_similarity, + cohort, + request.asnorm_top_n, + )?; + ( + normalized_score, + effective_asnorm_threshold( + request.asnorm_threshold, + candidate.sample_count, + candidate.sample_spread, + ), + "asnorm", + ) + } else { + ( + raw_similarity, + effective_threshold( + request.threshold, + candidate.sample_count, + candidate.sample_spread, + ), + "raw_cosine", + ) + }; + scored.push(VoiceprintScoredCandidate { + speaker_id: candidate.speaker_id, + name: candidate.name, + raw_similarity, + similarity, + effective_threshold: effective, + score_method: score_method.to_string(), + sample_count: candidate.sample_count, + sample_spread: candidate.sample_spread, + }); + } + + scored.sort_by(|a, b| { + b.similarity + .partial_cmp(&a.similarity) + .unwrap_or(std::cmp::Ordering::Equal) + }); + + if scored.is_empty() { + return Ok(no_match( + "no_candidates", + &asnorm_reason, + asnorm_active, + scored, + 0.0, + )); + } + + let best_similarity = scored[0].similarity; + let best_effective_threshold = scored[0].effective_threshold; + + if asnorm_active { + let second_score = scored.get(1).map(|candidate| candidate.similarity); + if let Some(second) = second_score { + if best_similarity - second < request.asnorm_min_margin { + return Ok(no_match( + "ambiguous_margin", + &asnorm_reason, + asnorm_active, + scored, + best_similarity, + )); + } + } + } + + if best_similarity >= best_effective_threshold { + let matched_id = scored[0].speaker_id.clone(); + let matched_name = scored[0].name.clone(); + Ok(VoiceprintScoreDecision { + matched_id: Some(matched_id), + matched_name: Some(matched_name), + similarity: best_similarity, + reason: "matched".to_string(), + asnorm_active, + asnorm_reason, + candidates: scored, + }) + } else { + Ok(no_match( + "below_threshold", + &asnorm_reason, + asnorm_active, + scored, + best_similarity, + )) + } +} + +fn no_match( + reason: &str, + asnorm_reason: &str, + asnorm_active: bool, + candidates: Vec, + similarity: f64, +) -> VoiceprintScoreDecision { + VoiceprintScoreDecision { + matched_id: None, + matched_name: None, + similarity, + reason: reason.to_string(), + asnorm_active, + asnorm_reason: asnorm_reason.to_string(), + candidates, + } +} + +fn validate_embedding(name: &str, embedding: &[f64]) -> Result<(), String> { + if embedding.is_empty() { + return Err(format!("{name} must not be empty")); + } + if embedding.iter().any(|value| !value.is_finite()) { + return Err(format!("{name} values must be finite")); + } + Ok(()) +} + +fn validate_cohort(cohort: &[Vec], dim: usize) -> Result<(), String> { + for embedding in cohort { + validate_embedding("cohort embedding", embedding)?; + if embedding.len() != dim { + return Err("voiceprint cohort embeddings must share dimension".to_string()); + } + } + Ok(()) +} + +fn normalize_cohort(cohort: &[Vec], dim: usize) -> Result>, String> { + validate_cohort(cohort, dim)?; + let mut normalized = Vec::with_capacity(cohort.len()); + for embedding in cohort { + let Some(vector) = normalize(embedding)? else { + return Err("voiceprint cohort embeddings must not be zero vectors".to_string()); + }; + normalized.push(vector); + } + Ok(normalized) +} + +fn normalize(embedding: &[f64]) -> Result>, String> { + validate_embedding("embedding", embedding)?; + let norm = embedding + .iter() + .map(|value| value * value) + .sum::() + .sqrt(); + if norm < 1e-12 { + return Ok(None); + } + Ok(Some(embedding.iter().map(|value| value / norm).collect())) +} + +fn cosine_from_normalized(embedding: &[f64], normalized_query: &[f64]) -> Result { + let Some(normalized_embedding) = normalize(embedding)? else { + return Ok(0.0); + }; + Ok(dot(&normalized_embedding, normalized_query)) +} + +fn asnorm_score( + enroll_emb: &[f64], + test_emb: &[f64], + raw_similarity: f64, + normalized_cohort: &[Vec], + top_n: usize, +) -> Result { + let (mean_e, std_e) = cohort_stats(enroll_emb, normalized_cohort, top_n)?; + let (mean_t, std_t) = cohort_stats(test_emb, normalized_cohort, top_n)?; + Ok(0.5 * ((raw_similarity - mean_e) / std_e + (raw_similarity - mean_t) / std_t)) +} + +fn cohort_stats( + embedding: &[f64], + normalized_cohort: &[Vec], + top_n: usize, +) -> Result<(f64, f64), String> { + let Some(normalized_embedding) = normalize(embedding)? else { + return Err("voiceprint AS-norm embedding must not be zero vector".to_string()); + }; + let mut scores = normalized_cohort + .iter() + .map(|cohort_embedding| dot(cohort_embedding, &normalized_embedding)) + .collect::>(); + scores.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal)); + let count = top_n.max(1).min(scores.len()); + let top = &scores[..count]; + let mean = top.iter().sum::() / count as f64; + let variance = top + .iter() + .map(|score| { + let delta = score - mean; + delta * delta + }) + .sum::() + / count as f64; + Ok((mean, variance.sqrt() + 1e-8)) +} + +fn dot(a: &[f64], b: &[f64]) -> f64 { + a.iter() + .zip(b.iter()) + .map(|(left, right)| left * right) + .sum() +} diff --git a/crates/voscript_core/tests/voiceprint_scoring.rs b/crates/voscript_core/tests/voiceprint_scoring.rs new file mode 100644 index 0000000..47e939e --- /dev/null +++ b/crates/voscript_core/tests/voiceprint_scoring.rs @@ -0,0 +1,118 @@ +use voscript_core::voiceprint::{ + score_voiceprint_candidates, VoiceprintScoreCandidate, VoiceprintScoreRequest, +}; + +fn vec_at(angle: f64) -> Vec { + vec![angle.cos(), angle.sin()] +} + +fn candidate( + speaker_id: &str, + name: &str, + angle: f64, + sample_count: usize, + sample_spread: Option, +) -> VoiceprintScoreCandidate { + VoiceprintScoreCandidate { + speaker_id: speaker_id.to_string(), + name: name.to_string(), + embedding: vec_at(angle), + sample_count, + sample_spread, + } +} + +fn cohort(angles: &[f64]) -> Vec> { + angles.iter().map(|angle| vec_at(*angle)).collect() +} + +#[test] +fn raw_scoring_matches_top_candidate_with_adaptive_threshold() { + let result = score_voiceprint_candidates(VoiceprintScoreRequest { + query_embedding: vec_at(0.0), + candidates: vec![ + candidate("spk_alice", "Alice", 0.72_f64.acos(), 1, None), + candidate("spk_bob", "Bob", 0.69_f64.acos(), 3, Some(0.0)), + ], + threshold: 0.75, + asnorm_threshold: 0.5, + cohort: None, + asnorm_top_n: 200, + asnorm_min_margin: 0.05, + }) + .expect("voiceprint score should succeed"); + + assert_eq!(result.matched_id.as_deref(), Some("spk_alice")); + assert_eq!(result.matched_name.as_deref(), Some("Alice")); + assert_eq!(result.reason, "matched"); + assert!(!result.asnorm_active); + assert_eq!(result.asnorm_reason, "not_requested"); + assert!((result.similarity - 0.72).abs() < 1e-9); + assert_eq!(result.candidates[0].speaker_id, "spk_alice"); + assert_eq!(result.candidates[0].score_method, "raw_cosine"); + assert!((result.candidates[0].effective_threshold - 0.70).abs() < 1e-9); +} + +#[test] +fn small_asnorm_cohort_falls_back_to_raw_scoring() { + let result = score_voiceprint_candidates(VoiceprintScoreRequest { + query_embedding: vec_at(0.0), + candidates: vec![candidate("spk_alice", "Alice", 0.0, 1, None)], + threshold: 0.75, + asnorm_threshold: 0.5, + cohort: Some(cohort(&[0.0, 0.1, -0.1, 0.2, -0.2])), + asnorm_top_n: 200, + asnorm_min_margin: 0.05, + }) + .expect("voiceprint score should succeed"); + + assert_eq!(result.matched_id.as_deref(), Some("spk_alice")); + assert!(!result.asnorm_active); + assert_eq!(result.asnorm_reason, "cohort_too_small"); + assert_eq!(result.candidates[0].score_method, "raw_cosine"); + assert!((result.similarity - 1.0).abs() < 1e-8); +} + +#[test] +fn asnorm_margin_rejects_ambiguous_top_two() { + let result = score_voiceprint_candidates(VoiceprintScoreRequest { + query_embedding: vec_at(0.0), + candidates: vec![ + candidate("spk_first", "First", 0.0, 3, Some(0.0)), + candidate("spk_second", "Second", 0.005, 3, Some(0.0)), + ], + threshold: 0.75, + asnorm_threshold: 0.5, + cohort: Some(cohort(&[ + 1.0, 1.1, 1.2, 1.3, 1.4, -1.0, -1.1, -1.2, -1.3, -1.4, + ])), + asnorm_top_n: 200, + asnorm_min_margin: 0.05, + }) + .expect("voiceprint score should succeed"); + + assert_eq!(result.matched_id, None); + assert_eq!(result.matched_name, None); + assert_eq!(result.reason, "ambiguous_margin"); + assert!(result.asnorm_active); + assert_eq!(result.asnorm_reason, "active"); + assert!((result.similarity - 4.89135345).abs() < 1e-6); + assert_eq!(result.candidates[0].score_method, "asnorm"); + assert!((result.candidates[1].similarity - 4.88978820).abs() < 1e-6); +} + +#[test] +fn non_finite_embeddings_are_rejected() { + let error = score_voiceprint_candidates(VoiceprintScoreRequest { + query_embedding: vec![1.0, f64::NAN], + candidates: vec![], + threshold: 0.75, + asnorm_threshold: 0.5, + cohort: None, + asnorm_top_n: 200, + asnorm_min_margin: 0.05, + }) + .expect_err("non-finite embeddings must fail closed"); + + assert!(error.contains("finite")); +} diff --git a/doc/changelog.en.md b/doc/changelog.en.md index 7879785..5fde29e 100644 --- a/doc/changelog.en.md +++ b/doc/changelog.en.md @@ -15,6 +15,9 @@ - Added the optional Rust kernel bridge foundation and `RUST_KERNEL_MODE`. The default `off` keeps current Python implementations; `required` makes selected Rust-backed paths import and execute successfully or fail closed. +- Added an optional Rust-backed voiceprint scoring kernel for explicit + `RUST_KERNEL_MODE=required` runs. The default remains Python scoring, and the + public speaker/voiceprint result contract is unchanged. ### Security @@ -28,6 +31,9 @@ extension, and run Docker packaging smoke with that wheel. Later PR updates do not rerun the heavy gate automatically; trigger it manually before merge when needed. +- Extended Rust kernel tests with voiceprint scoring golden cases for raw + cosine, AS-norm activation, small-cohort raw fallback, ambiguous top-2 + margins, and non-finite embedding rejection. ## 0.7.6 — Health, alignment, and embedding runtime fixes (2026-05-07) diff --git a/doc/changelog.zh.md b/doc/changelog.zh.md index c2e556c..7a0986e 100644 --- a/doc/changelog.zh.md +++ b/doc/changelog.zh.md @@ -13,6 +13,8 @@ - 新增可选 Rust kernel bridge 基础能力与 `RUST_KERNEL_MODE`。默认 `off` 保持当前 Python 实现;显式设为 `required` 时,被选择的 Rust-backed 路径必须可导入并执行, 否则 fail closed。 +- 新增显式 `RUST_KERNEL_MODE=required` 下可选的 Rust-backed 声纹计分 kernel。 + 默认仍使用 Python 计分,公开 speaker / voiceprint 结果契约不变。 ### 安全 @@ -24,6 +26,8 @@ - 新增 Rust foundation heavy gate:PR 首轮、main push 和手动触发会构建内部 `voscript_core` wheel、验证扩展 smoke,并用该 wheel 做 Docker packaging smoke。 后续 PR 更新不自动重复重型 gate,需在合并前按需手动触发。 +- 扩展 Rust kernel 测试,覆盖 raw cosine、AS-norm 启用、小 cohort 回退 raw、 + top-2 margin 模糊拒绝以及非有限 embedding 拒绝等声纹计分 golden case。 ## 0.7.6 — 健康检查、alignment 与 embedding 运行时修复 (2026-05-07) diff --git a/doc/configuration.en.md b/doc/configuration.en.md index 91159a9..c39cc0f 100644 --- a/doc/configuration.en.md +++ b/doc/configuration.en.md @@ -38,7 +38,7 @@ parameters yet. | `FFMPEG_TIMEOUT_SEC` | `1800` | ffmpeg conversion timeout in seconds; timeout returns `504`. | | `JOBS_MAX_CACHE` | `200` | In-memory job LRU limit. Evicted completed jobs remain queryable from disk `status.json` / `result.json`. | | `MODEL_IDLE_TIMEOUT_SEC` | `180` | GPU model idle-unload timeout, defaulting to 180 seconds (3 minutes). Set `0` to disable idle unload and keep models resident. When enabled, loaded models are released only after the serialized GPU runtime has been idle for this many seconds; on the next reload, ASR, diarization, and embedding each choose the visible CUDA device with the most free memory during their own lazy load. | -| `RUST_KERNEL_MODE` | `off` | Optional Rust-backed provider/kernel mode. `off` keeps Python implementations; `required` makes selected Rust-backed paths import and run successfully or fail closed. CI / Docker packaging still validates the Rust extension directly when the runtime default is off. | +| `RUST_KERNEL_MODE` | `off` | Optional Rust-backed provider/kernel mode. `off` keeps Python implementations; `required` makes selected Rust-backed paths import and run successfully or fail closed. The current selected path is voiceprint scoring; CI / Docker packaging still validates the Rust extension directly when the runtime default is off. | `MODELS_DIR` and `LANGUAGE` are defined in the config module, but v0.7.6's main HTTP transcription path does not use them as stable public tuning knobs: diff --git a/doc/configuration.zh.md b/doc/configuration.zh.md index ddb8103..e0bf0e0 100644 --- a/doc/configuration.zh.md +++ b/doc/configuration.zh.md @@ -36,7 +36,7 @@ | `FFMPEG_TIMEOUT_SEC` | `1800` | ffmpeg 转码超时秒数,超时返回 `504`。 | | `JOBS_MAX_CACHE` | `200` | 内存 job LRU 上限;被淘汰的完成任务仍可从磁盘 `status.json` / `result.json` 查询。 | | `MODEL_IDLE_TIMEOUT_SEC` | `180` | GPU 模型空闲卸载超时,默认 180 秒(3 分钟)。设为 `0` 可关闭空闲卸载并保持模型常驻。开启后,只有串行 GPU 运行时空闲达到该秒数才释放已加载模型;下一次 reload 时 ASR、diarization 和 embedding 会在各自 lazy load 时分别选择当前可见 CUDA 中空闲显存最多的设备。 | -| `RUST_KERNEL_MODE` | `off` | 可选 Rust-backed provider/kernel 路径开关。`off` 保持 Python 实现;`required` 要求被选择的 Rust-backed 路径可导入并执行,缺失或调用失败时 fail closed。默认关闭时,CI / Docker packaging 仍会直接验证 Rust 扩展。 | +| `RUST_KERNEL_MODE` | `off` | 可选 Rust-backed provider/kernel 路径开关。`off` 保持 Python 实现;`required` 要求被选择的 Rust-backed 路径可导入并执行,缺失或调用失败时 fail closed。当前被选择的路径是声纹计分;默认关闭时,CI / Docker packaging 仍会直接验证 Rust 扩展。 | `MODELS_DIR` 和 `LANGUAGE` 在配置模块里有定义,但 v0.7.6 的主 HTTP 转写路径 没有把它们作为稳定公开调参入口使用:Whisper 本地 checkpoint 查找仍使用 diff --git a/tests/test_voiceprint_db.py b/tests/test_voiceprint_db.py index d2c5668..e50cb1c 100644 --- a/tests/test_voiceprint_db.py +++ b/tests/test_voiceprint_db.py @@ -21,6 +21,8 @@ import numpy as np import pytest +from providers.kernel_bridge import RustKernelBridgeError + # --------------------------------------------------------------------------- # Helpers @@ -121,9 +123,9 @@ def test_adaptive_threshold_single_sample(tmp_path): effective = mod.VoiceprintDB._effective_threshold( base=0.75, sample_count=1, sample_spread=None ) - assert ( - 0.695 <= effective <= 0.705 - ), f"single-sample relaxation should yield ~0.70, got {effective}" + assert 0.695 <= effective <= 0.705, ( + f"single-sample relaxation should yield ~0.70, got {effective}" + ) # And the live identify() honours it: a slightly perturbed embedding # at similarity ≈ 0.72 must be accepted for a one-sample speaker, which @@ -179,9 +181,9 @@ def test_asnorm_active_only_when_cohort_ge_10(tmp_path): got_id, got_name, sim = db.identify(enroll) assert got_id is not None assert got_name == "erin" - assert ( - sim >= 0.99 - ), f"cohort<10 should leave raw cosine untouched (got sim={sim:.3f})" + assert sim >= 0.99, ( + f"cohort<10 should leave raw cosine untouched (got sim={sim:.3f})" + ) # Sanity: construct a low-similarity query that would be accepted by # the AS-norm operating threshold (0.5) but rejected by the adaptive @@ -195,9 +197,9 @@ def test_asnorm_active_only_when_cohort_ge_10(tmp_path): low /= np.linalg.norm(low) + 1e-9 got_id, _, sim = db.identify(low) - assert ( - got_id is None - ), f"cohort<10 must still reject sub-threshold raw cosine (sim={sim:.3f})" + assert got_id is None, ( + f"cohort<10 must still reject sub-threshold raw cosine (sim={sim:.3f})" + ) def test_asnorm_single_sample_uses_sample_count_aware_threshold(tmp_path): @@ -314,6 +316,79 @@ def test_asnorm_margin_uses_normalized_second_best(tmp_path): assert len({best_id, raw_second_id, normalized_second_id}) == 3 +def test_identify_uses_rust_voiceprint_kernel_when_required(tmp_path, monkeypatch): + """Explicit Rust mode routes identify scoring through the kernel bridge.""" + db, mod = _fresh_db(tmp_path / "vp") + enroll = _unit_vec(39) + sid = db.add_speaker("rust_selected", enroll) + calls = {} + + def _fake_score(payload): + calls["payload"] = payload + return { + "matched_id": sid, + "matched_name": "rust_selected", + "similarity": 0.9876, + "reason": "matched", + "asnorm_active": False, + "asnorm_reason": "not_requested", + "candidates": [], + } + + monkeypatch.setattr(mod, "rust_provider_paths_enabled", lambda: True, raising=False) + monkeypatch.setattr(mod, "rust_voiceprint_score", _fake_score, raising=False) + + got_id, got_name, sim = db.identify(enroll) + + assert got_id == sid + assert got_name == "rust_selected" + assert sim == pytest.approx(0.9876) + payload = calls["payload"] + assert payload["threshold"] == pytest.approx(0.75) + assert payload["asnorm_threshold"] == pytest.approx(0.5) + assert payload["cohort"] is None + assert payload["candidates"][0]["speaker_id"] == sid + assert payload["candidates"][0]["name"] == "rust_selected" + assert payload["candidates"][0]["sample_count"] == 1 + + +def test_identify_hard_fails_when_selected_rust_voiceprint_kernel_fails( + tmp_path, monkeypatch +): + """Selected Rust scoring failures must not silently use Python scoring.""" + db, mod = _fresh_db(tmp_path / "vp") + enroll = _unit_vec(40) + db.add_speaker("rust_failure", enroll) + + def _fake_score(payload): + raise RustKernelBridgeError("rust voiceprint score failed") + + monkeypatch.setattr(mod, "rust_provider_paths_enabled", lambda: True, raising=False) + monkeypatch.setattr(mod, "rust_voiceprint_score", _fake_score, raising=False) + + with pytest.raises(RustKernelBridgeError, match="rust voiceprint score failed"): + db.identify(enroll) + + +def test_identify_hard_fails_when_selected_rust_asnorm_cohort_is_not_exportable( + tmp_path, monkeypatch +): + """AS-norm Rust scoring needs an actual cohort array, not only scorer API.""" + db, mod = _fresh_db(tmp_path / "vp") + enroll = _unit_vec(41) + db.add_speaker("rust_asnorm", enroll) + db._asnorm = _FixedASNormScorer({_asnorm_key(enroll): 0.9}, cohort_size=10) + + def _unexpected_score(payload): + raise AssertionError("bridge should not be called without exportable cohort") + + monkeypatch.setattr(mod, "rust_provider_paths_enabled", lambda: True, raising=False) + monkeypatch.setattr(mod, "rust_voiceprint_score", _unexpected_score, raising=False) + + with pytest.raises(RustKernelBridgeError, match="exportable AS-norm cohort"): + db.identify(enroll) + + def test_update_speaker_static_sql(tmp_path): """update_speaker works both with and without the optional name kwarg.""" db, _mod = _fresh_db(tmp_path / "vp") diff --git a/tests/unit/test_kernel_bridge.py b/tests/unit/test_kernel_bridge.py index 459199d..2bcd203 100644 --- a/tests/unit/test_kernel_bridge.py +++ b/tests/unit/test_kernel_bridge.py @@ -22,7 +22,7 @@ def _core_smoke(payload): return { "ok": True, "echoed": payload, - "version": "0.8.0", + "version": "0.8.1", "capabilities": {"core_smoke": True, "rust_extension": True}, } @@ -36,7 +36,7 @@ def test_core_smoke_round_trips_safe_payload_through_imported_extension(): assert result["ok"] is True assert result["echoed"] == payload - assert result["version"] == "0.8.0" + assert result["version"] == "0.8.1" assert result["capabilities"]["core_smoke"] is True diff --git a/tests/unit/test_voiceprint_scoring_kernel.py b/tests/unit/test_voiceprint_scoring_kernel.py new file mode 100644 index 0000000..e24b26f --- /dev/null +++ b/tests/unit/test_voiceprint_scoring_kernel.py @@ -0,0 +1,192 @@ +"""Golden tests for voiceprint scoring oracle and Rust bridge contracts.""" + +from __future__ import annotations + +import math +from types import SimpleNamespace + +import numpy as np +import pytest + +from providers.kernel_bridge import RustKernelBridgeError, voiceprint_score +from voiceprints.scoring import ( + VoiceprintScoreCandidate, + score_voiceprint_candidates, +) + + +def _vec(angle: float) -> np.ndarray: + return np.array([math.cos(angle), math.sin(angle)], dtype=np.float32) + + +def _cohort(angles: list[float]) -> np.ndarray: + return np.stack([_vec(angle) for angle in angles], axis=0) + + +def test_python_oracle_matches_raw_top_candidate_with_adaptive_threshold(): + result = score_voiceprint_candidates( + query_embedding=_vec(0.0), + candidates=[ + VoiceprintScoreCandidate( + speaker_id="spk_alice", + name="Alice", + embedding=_vec(math.acos(0.72)), + sample_count=1, + sample_spread=None, + ), + VoiceprintScoreCandidate( + speaker_id="spk_bob", + name="Bob", + embedding=_vec(math.acos(0.69)), + sample_count=3, + sample_spread=0.0, + ), + ], + threshold=0.75, + ) + + assert result.matched_id == "spk_alice" + assert result.matched_name == "Alice" + assert result.reason == "matched" + assert result.asnorm_active is False + assert result.asnorm_reason == "not_requested" + assert result.similarity == pytest.approx(0.72, abs=1e-6) + assert [candidate.speaker_id for candidate in result.candidates] == [ + "spk_alice", + "spk_bob", + ] + assert result.candidates[0].effective_threshold == pytest.approx(0.70) + assert result.candidates[0].score_method == "raw_cosine" + + +def test_python_oracle_falls_back_to_raw_when_asnorm_cohort_is_too_small(): + result = score_voiceprint_candidates( + query_embedding=_vec(0.0), + candidates=[ + VoiceprintScoreCandidate( + speaker_id="spk_alice", + name="Alice", + embedding=_vec(0.0), + sample_count=1, + sample_spread=None, + ) + ], + threshold=0.75, + cohort=_cohort([0.0, 0.1, -0.1, 0.2, -0.2]), + ) + + assert result.matched_id == "spk_alice" + assert result.asnorm_active is False + assert result.asnorm_reason == "cohort_too_small" + assert result.candidates[0].score_method == "raw_cosine" + assert result.similarity == pytest.approx(1.0, abs=1e-6) + + +def test_python_oracle_rejects_ambiguous_asnorm_margin(): + result = score_voiceprint_candidates( + query_embedding=_vec(0.0), + candidates=[ + VoiceprintScoreCandidate( + speaker_id="spk_first", + name="First", + embedding=_vec(0.0), + sample_count=3, + sample_spread=0.0, + ), + VoiceprintScoreCandidate( + speaker_id="spk_second", + name="Second", + embedding=_vec(0.005), + sample_count=3, + sample_spread=0.0, + ), + ], + threshold=0.75, + asnorm_threshold=0.5, + cohort=_cohort([1.0, 1.1, 1.2, 1.3, 1.4, -1.0, -1.1, -1.2, -1.3, -1.4]), + ) + + assert result.matched_id is None + assert result.matched_name is None + assert result.reason == "ambiguous_margin" + assert result.asnorm_active is True + assert result.asnorm_reason == "active" + assert result.similarity == pytest.approx(4.89135345, abs=1e-6) + assert result.candidates[0].score_method == "asnorm" + assert result.candidates[1].similarity == pytest.approx(4.88978820, abs=1e-6) + + +def test_python_oracle_rejects_non_finite_embeddings(): + with pytest.raises(ValueError, match="finite"): + score_voiceprint_candidates( + query_embedding=np.array([1.0, np.nan], dtype=np.float32), + candidates=[], + ) + + +def test_kernel_bridge_validates_voiceprint_score_response(): + response = { + "matched_id": "spk_alice", + "matched_name": "Alice", + "similarity": 0.72, + "reason": "matched", + "asnorm_active": False, + "asnorm_reason": "not_requested", + "candidates": [], + } + + def _importer(module_name): + assert module_name == "voscript_core" + return SimpleNamespace(voiceprint_score=lambda payload: response) + + assert ( + voiceprint_score({"query_embedding": [1.0, 0.0]}, importer=_importer) + == response + ) + + +def test_kernel_bridge_hard_fails_invalid_voiceprint_score_response(): + def _importer(module_name): + assert module_name == "voscript_core" + return SimpleNamespace(voiceprint_score=lambda payload: {"ok": True}) + + with pytest.raises(RustKernelBridgeError, match="missing keys"): + voiceprint_score({"query_embedding": [1.0, 0.0]}, importer=_importer) + + +def test_kernel_bridge_hard_fails_invalid_voiceprint_candidate_response(): + response = { + "matched_id": "spk_alice", + "matched_name": "Alice", + "similarity": 0.72, + "reason": "matched", + "asnorm_active": False, + "asnorm_reason": "not_requested", + "candidates": [{"speaker_id": "spk_alice"}], + } + + def _importer(module_name): + assert module_name == "voscript_core" + return SimpleNamespace(voiceprint_score=lambda payload: response) + + with pytest.raises(RustKernelBridgeError, match="candidate.*missing keys"): + voiceprint_score({"query_embedding": [1.0, 0.0]}, importer=_importer) + + +def test_kernel_bridge_hard_fails_non_finite_voiceprint_response(): + response = { + "matched_id": None, + "matched_name": None, + "similarity": float("nan"), + "reason": "below_threshold", + "asnorm_active": False, + "asnorm_reason": "not_requested", + "candidates": [], + } + + def _importer(module_name): + assert module_name == "voscript_core" + return SimpleNamespace(voiceprint_score=lambda payload: response) + + with pytest.raises(RustKernelBridgeError, match="similarity must be finite"): + voiceprint_score({"query_embedding": [1.0, 0.0]}, importer=_importer) From 2bf9c024f739f5640c86d73eaa19e49b13cf3603 Mon Sep 17 00:00:00 2001 From: Maple Gao Date: Tue, 9 Jun 2026 22:37:21 +0800 Subject: [PATCH 2/3] test: harden voiceprint kernel CI imports --- tests/test_voiceprint_db.py | 2 +- tests/unit/test_voiceprint_scoring_kernel.py | 20 ++++++++++++++++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/tests/test_voiceprint_db.py b/tests/test_voiceprint_db.py index e50cb1c..480efd1 100644 --- a/tests/test_voiceprint_db.py +++ b/tests/test_voiceprint_db.py @@ -385,7 +385,7 @@ def _unexpected_score(payload): monkeypatch.setattr(mod, "rust_provider_paths_enabled", lambda: True, raising=False) monkeypatch.setattr(mod, "rust_voiceprint_score", _unexpected_score, raising=False) - with pytest.raises(RustKernelBridgeError, match="exportable AS-norm cohort"): + with pytest.raises(mod.RustKernelBridgeError, match="exportable AS-norm cohort"): db.identify(enroll) diff --git a/tests/unit/test_voiceprint_scoring_kernel.py b/tests/unit/test_voiceprint_scoring_kernel.py index e24b26f..a26a70f 100644 --- a/tests/unit/test_voiceprint_scoring_kernel.py +++ b/tests/unit/test_voiceprint_scoring_kernel.py @@ -2,17 +2,29 @@ from __future__ import annotations +import importlib.util import math +import sys +from pathlib import Path from types import SimpleNamespace import numpy as np import pytest -from providers.kernel_bridge import RustKernelBridgeError, voiceprint_score -from voiceprints.scoring import ( - VoiceprintScoreCandidate, - score_voiceprint_candidates, +_APP_DIR = Path(__file__).resolve().parents[2] / "app" +sys.path.insert(0, str(_APP_DIR)) + +from providers.kernel_bridge import RustKernelBridgeError, voiceprint_score # noqa: E402 + +_SCORING_SPEC = importlib.util.spec_from_file_location( + "_voscript_voiceprint_scoring", _APP_DIR / "voiceprints" / "scoring.py" ) +assert _SCORING_SPEC is not None and _SCORING_SPEC.loader is not None +_SCORING = importlib.util.module_from_spec(_SCORING_SPEC) +sys.modules[_SCORING_SPEC.name] = _SCORING +_SCORING_SPEC.loader.exec_module(_SCORING) +VoiceprintScoreCandidate = _SCORING.VoiceprintScoreCandidate +score_voiceprint_candidates = _SCORING.score_voiceprint_candidates def _vec(angle: float) -> np.ndarray: From b6ca0b8008af9edb476becf826d59438128c6b6d Mon Sep 17 00:00:00 2001 From: Maple Gao Date: Tue, 9 Jun 2026 22:46:34 +0800 Subject: [PATCH 3/3] test: cover voiceprint bridge validation --- tests/unit/test_voiceprint_scoring_kernel.py | 143 +++++++++++++++---- 1 file changed, 112 insertions(+), 31 deletions(-) diff --git a/tests/unit/test_voiceprint_scoring_kernel.py b/tests/unit/test_voiceprint_scoring_kernel.py index a26a70f..9cd6eaa 100644 --- a/tests/unit/test_voiceprint_scoring_kernel.py +++ b/tests/unit/test_voiceprint_scoring_kernel.py @@ -35,6 +35,35 @@ def _cohort(angles: list[float]) -> np.ndarray: return np.stack([_vec(angle) for angle in angles], axis=0) +def _voiceprint_response(**overrides): + response = { + "matched_id": "spk_alice", + "matched_name": "Alice", + "similarity": 0.72, + "reason": "matched", + "asnorm_active": False, + "asnorm_reason": "not_requested", + "candidates": [], + } + response.update(overrides) + return response + + +def _candidate_response(**overrides): + candidate = { + "speaker_id": "spk_alice", + "name": "Alice", + "raw_similarity": 0.72, + "similarity": 0.72, + "effective_threshold": 0.7, + "score_method": "raw_cosine", + "sample_count": 1, + "sample_spread": None, + } + candidate.update(overrides) + return candidate + + def test_python_oracle_matches_raw_top_candidate_with_adaptive_threshold(): result = score_voiceprint_candidates( query_embedding=_vec(0.0), @@ -137,24 +166,19 @@ def test_python_oracle_rejects_non_finite_embeddings(): def test_kernel_bridge_validates_voiceprint_score_response(): - response = { - "matched_id": "spk_alice", - "matched_name": "Alice", - "similarity": 0.72, - "reason": "matched", - "asnorm_active": False, - "asnorm_reason": "not_requested", - "candidates": [], - } + response = _voiceprint_response( + candidates=[_candidate_response(sample_spread=0.01)] + ) def _importer(module_name): assert module_name == "voscript_core" return SimpleNamespace(voiceprint_score=lambda payload: response) - assert ( - voiceprint_score({"query_embedding": [1.0, 0.0]}, importer=_importer) - == response - ) + result = voiceprint_score({"query_embedding": [1.0, 0.0]}, importer=_importer) + + assert result["matched_id"] == "spk_alice" + assert result["candidates"][0]["sample_count"] == 1 + assert result["candidates"][0]["sample_spread"] == pytest.approx(0.01) def test_kernel_bridge_hard_fails_invalid_voiceprint_score_response(): @@ -166,16 +190,21 @@ def _importer(module_name): voiceprint_score({"query_embedding": [1.0, 0.0]}, importer=_importer) +def test_kernel_bridge_hard_fails_voiceprint_score_call_failure(): + def _importer(module_name): + assert module_name == "voscript_core" + + def _voiceprint_score(payload): + raise RuntimeError("boom") + + return SimpleNamespace(voiceprint_score=_voiceprint_score) + + with pytest.raises(RustKernelBridgeError, match="voiceprint_score call failed"): + voiceprint_score({"query_embedding": [1.0, 0.0]}, importer=_importer) + + def test_kernel_bridge_hard_fails_invalid_voiceprint_candidate_response(): - response = { - "matched_id": "spk_alice", - "matched_name": "Alice", - "similarity": 0.72, - "reason": "matched", - "asnorm_active": False, - "asnorm_reason": "not_requested", - "candidates": [{"speaker_id": "spk_alice"}], - } + response = _voiceprint_response(candidates=[{"speaker_id": "spk_alice"}]) def _importer(module_name): assert module_name == "voscript_core" @@ -186,15 +215,7 @@ def _importer(module_name): def test_kernel_bridge_hard_fails_non_finite_voiceprint_response(): - response = { - "matched_id": None, - "matched_name": None, - "similarity": float("nan"), - "reason": "below_threshold", - "asnorm_active": False, - "asnorm_reason": "not_requested", - "candidates": [], - } + response = _voiceprint_response(similarity=float("nan")) def _importer(module_name): assert module_name == "voscript_core" @@ -202,3 +223,63 @@ def _importer(module_name): with pytest.raises(RustKernelBridgeError, match="similarity must be finite"): voiceprint_score({"query_embedding": [1.0, 0.0]}, importer=_importer) + + +@pytest.mark.parametrize( + ("response", "message"), + [ + ([], "non-mapping"), + (_voiceprint_response(reason=""), "reason must be non-empty"), + (_voiceprint_response(asnorm_active="false"), "asnorm_active must be bool"), + (_voiceprint_response(asnorm_reason=""), "asnorm_reason must be non-empty"), + (_voiceprint_response(candidates={}), "candidates must be a list"), + (_voiceprint_response(similarity="not-a-number"), "similarity must be numeric"), + ], +) +def test_kernel_bridge_hard_fails_invalid_voiceprint_score_responses(response, message): + def _importer(module_name): + assert module_name == "voscript_core" + return SimpleNamespace(voiceprint_score=lambda payload: response) + + with pytest.raises(RustKernelBridgeError, match=message): + voiceprint_score({"query_embedding": [1.0, 0.0]}, importer=_importer) + + +@pytest.mark.parametrize( + ("candidate", "message"), + [ + ([], "candidate returned a non-mapping"), + (_candidate_response(name=""), "candidate name must be non-empty"), + ( + _candidate_response(score_method=""), + "candidate score_method must be non-empty", + ), + (_candidate_response(raw_similarity="bad"), "raw_similarity must be numeric"), + ( + _candidate_response(similarity=float("inf")), + "candidate similarity must be finite", + ), + ( + _candidate_response(effective_threshold="bad"), + "effective_threshold must be numeric", + ), + (_candidate_response(sample_count="bad"), "sample_count must be integer-like"), + (_candidate_response(sample_count=-1), "sample_count must be non-negative"), + (_candidate_response(sample_spread="bad"), "sample_spread must be numeric"), + ( + _candidate_response(sample_spread=float("nan")), + "sample_spread must be finite", + ), + ], +) +def test_kernel_bridge_hard_fails_invalid_voiceprint_candidate_responses( + candidate, message +): + response = _voiceprint_response(candidates=[candidate]) + + def _importer(module_name): + assert module_name == "voscript_core" + return SimpleNamespace(voiceprint_score=lambda payload: response) + + with pytest.raises(RustKernelBridgeError, match=message): + voiceprint_score({"query_embedding": [1.0, 0.0]}, importer=_importer)