From 327d19fe70662cf58db4abf98221c264527dddc7 Mon Sep 17 00:00:00 2001 From: Maple Gao Date: Wed, 10 Jun 2026 01:09:48 +0800 Subject: [PATCH 1/3] feat: add rust kernel release gates --- .github/workflows/ci.yml | 10 + Cargo.lock | 2 +- app/providers/kernel_bridge/__init__.py | 14 + app/providers/kernel_bridge/release_gates.py | 183 +++++++++ crates/voscript_core/Cargo.toml | 2 +- crates/voscript_core/src/lib.rs | 2 +- doc/changelog.en.md | 6 + doc/changelog.zh.md | 5 + tests/unit/test_kernel_bridge.py | 4 +- tests/unit/test_kernel_release_gates.py | 85 ++++ tests/unit/test_public_release_scan.py | 77 ++++ voscript-api/scripts/public_release_scan.py | 404 +++++++++++++++++++ 12 files changed, 789 insertions(+), 5 deletions(-) create mode 100644 app/providers/kernel_bridge/release_gates.py create mode 100644 tests/unit/test_kernel_release_gates.py create mode 100644 tests/unit/test_public_release_scan.py create mode 100644 voscript-api/scripts/public_release_scan.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f5308e4..70e3afa 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,6 +7,16 @@ on: branches: [main] jobs: + public-release-scan: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Run public release scan + run: python voscript-api/scripts/public_release_scan.py --root . + lint: runs-on: ubuntu-latest steps: diff --git a/Cargo.lock b/Cargo.lock index 8b86cf3..e034ead 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -127,7 +127,7 @@ checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "voscript_core" -version = "0.8.3" +version = "0.8.4" dependencies = [ "pyo3", ] diff --git a/app/providers/kernel_bridge/__init__.py b/app/providers/kernel_bridge/__init__.py index 515c2e6..d204370 100644 --- a/app/providers/kernel_bridge/__init__.py +++ b/app/providers/kernel_bridge/__init__.py @@ -13,17 +13,31 @@ status_payload_contract, voiceprint_score, ) +from .release_gates import ( + REQUIRED_CI_GATES, + REQUIRED_HARD_FAIL_MODES, + RUST_KERNEL_MODE_ROLLBACK, + RustKernelReleaseGate, + release_gate_matrix, + validate_release_gate_matrix, +) __all__ = [ + "REQUIRED_CI_GATES", + "REQUIRED_HARD_FAIL_MODES", "RUST_KERNEL_MODE_OFF", "RUST_KERNEL_MODE_REQUIRED", + "RUST_KERNEL_MODE_ROLLBACK", "RustKernelBridgeError", + "RustKernelReleaseGate", "artifact_manifest_contract", "core_smoke", "postprocess_segments", + "release_gate_matrix", "require_rust_core", "rust_kernel_mode", "rust_provider_paths_enabled", "status_payload_contract", + "validate_release_gate_matrix", "voiceprint_score", ] diff --git a/app/providers/kernel_bridge/release_gates.py b/app/providers/kernel_bridge/release_gates.py new file mode 100644 index 0000000..e207ec0 --- /dev/null +++ b/app/providers/kernel_bridge/release_gates.py @@ -0,0 +1,183 @@ +"""Internal release-gate matrix for selected Rust-backed provider paths.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Final + +RUST_KERNEL_MODE_ROLLBACK: Final = "RUST_KERNEL_MODE=off" + +HARD_FAIL_IMPORT_FAILURE: Final = "import_failure" +HARD_FAIL_CALL_FAILURE: Final = "call_failure" +HARD_FAIL_INVALID_PAYLOAD: Final = "invalid_payload" +HARD_FAIL_INVALID_RESPONSE: Final = "invalid_response" +HARD_FAIL_PARITY_MISMATCH: Final = "parity_mismatch" + +REQUIRED_HARD_FAIL_MODES: Final = frozenset( + { + HARD_FAIL_IMPORT_FAILURE, + HARD_FAIL_CALL_FAILURE, + HARD_FAIL_INVALID_PAYLOAD, + HARD_FAIL_INVALID_RESPONSE, + HARD_FAIL_PARITY_MISMATCH, + } +) + +REQUIRED_CI_GATES: Final = frozenset( + { + "python_unit_security_tests", + "kernel_bridge_smoke_tests", + "rust_fmt", + "rust_clippy", + "rust_tests", + "rust_wheel_smoke", + "docker_packaging_smoke", + "public_release_scan", + } +) + + +@dataclass(frozen=True, slots=True) +class RustKernelReleaseGate: + """Audit contract for one selected Rust-backed implementation path. + + The runtime switch stays intentionally small: Python owns orchestration and + public API shape; Rust owns only selected pure kernels/helpers. This matrix + makes the selected paths and their rollback/fail-closed evidence explicit. + """ + + name: str + bridge_function: str + python_owner: str + rust_owner: str + rollback: str + regression_matrix: tuple[str, ...] + hard_fail_modes: frozenset[str] + ci_gates: frozenset[str] + performance_baseline: str + public_api_change: bool = False + + +SELECTED_RUST_KERNEL_GATES: Final = ( + RustKernelReleaseGate( + name="voiceprint_scoring", + bridge_function="voiceprint_score", + python_owner="voiceprints.scoring.score_voiceprint_candidates", + rust_owner="voscript_core::voiceprint::score_voiceprint_candidates", + rollback=RUST_KERNEL_MODE_ROLLBACK, + regression_matrix=( + "raw_cosine_adaptive_threshold", + "asnorm_active_with_margin_guard", + "small_cohort_raw_fallback", + "non_finite_embedding_rejection", + "db_required_mode_payload_export", + "db_off_mode_python_scoring", + ), + hard_fail_modes=REQUIRED_HARD_FAIL_MODES, + ci_gates=REQUIRED_CI_GATES, + performance_baseline="internal_scoring_only_synthetic", + ), + RustKernelReleaseGate( + name="postprocess_segments", + bridge_function="postprocess_segments", + python_owner="postprocess.segments.build_result_segments", + rust_owner="voscript_core::postprocess::build_result_segments", + rollback=RUST_KERNEL_MODE_ROLLBACK, + regression_matrix=( + "word_normalization", + "adjacent_text_segment_merge", + "word_payload_merge_block", + "duplicate_display_name_disambiguation", + "stable_speaker_label_preservation", + ), + hard_fail_modes=REQUIRED_HARD_FAIL_MODES, + ci_gates=REQUIRED_CI_GATES, + performance_baseline="internal_postprocess_only_synthetic", + ), + RustKernelReleaseGate( + name="artifact_manifest_contract", + bridge_function="artifact_manifest_contract", + python_owner="pipeline.contracts.artifacts.build_artifact_manifest", + rust_owner="voscript_core::contracts::artifact_manifest_contract", + rollback=RUST_KERNEL_MODE_ROLLBACK, + regression_matrix=( + "public_safe_manifest_build", + "path_and_url_rejection", + "legacy_unknown_entry_tolerance", + "stable_optional_experimental_categories", + ), + hard_fail_modes=REQUIRED_HARD_FAIL_MODES, + ci_gates=REQUIRED_CI_GATES, + performance_baseline="internal_helper_only_synthetic", + ), + RustKernelReleaseGate( + name="status_payload_contract", + bridge_function="status_payload_contract", + python_owner="pipeline.contracts.status.build_status_payload", + rust_owner="voscript_core::contracts::status_payload_contract", + rollback=RUST_KERNEL_MODE_ROLLBACK, + regression_matrix=( + "known_status_normalization", + "unknown_legacy_status_to_failed", + "basename_only_filename", + "legacy_status_payload_compatibility", + ), + hard_fail_modes=REQUIRED_HARD_FAIL_MODES, + ci_gates=REQUIRED_CI_GATES, + performance_baseline="internal_helper_only_synthetic", + ), +) + + +def release_gate_matrix() -> tuple[RustKernelReleaseGate, ...]: + """Return the selected Rust-backed release gates as an immutable tuple.""" + + return SELECTED_RUST_KERNEL_GATES + + +def validate_release_gate_matrix( + gates: tuple[RustKernelReleaseGate, ...] = SELECTED_RUST_KERNEL_GATES, +) -> tuple[str, ...]: + """Return policy gaps that would block a 0.8.x Rust-backed release.""" + + gaps: list[str] = [] + names: set[str] = set() + bridge_functions: set[str] = set() + for gate in gates: + if gate.name in names: + gaps.append(f"{gate.name}: duplicate gate name") + names.add(gate.name) + if gate.bridge_function in bridge_functions: + gaps.append(f"{gate.name}: duplicate bridge function") + bridge_functions.add(gate.bridge_function) + if not gate.regression_matrix: + gaps.append(f"{gate.name}: missing regression matrix") + missing_hard_fail = REQUIRED_HARD_FAIL_MODES.difference(gate.hard_fail_modes) + if missing_hard_fail: + gaps.append( + f"{gate.name}: missing hard-fail modes {sorted(missing_hard_fail)}" + ) + missing_ci = REQUIRED_CI_GATES.difference(gate.ci_gates) + if missing_ci: + gaps.append(f"{gate.name}: missing CI gates {sorted(missing_ci)}") + if gate.rollback != RUST_KERNEL_MODE_ROLLBACK: + gaps.append(f"{gate.name}: rollback must be {RUST_KERNEL_MODE_ROLLBACK}") + if gate.public_api_change: + gaps.append(f"{gate.name}: public API change is not allowed in 0.8.4") + return tuple(gaps) + + +__all__ = [ + "HARD_FAIL_CALL_FAILURE", + "HARD_FAIL_IMPORT_FAILURE", + "HARD_FAIL_INVALID_PAYLOAD", + "HARD_FAIL_INVALID_RESPONSE", + "HARD_FAIL_PARITY_MISMATCH", + "REQUIRED_CI_GATES", + "REQUIRED_HARD_FAIL_MODES", + "RUST_KERNEL_MODE_ROLLBACK", + "RustKernelReleaseGate", + "SELECTED_RUST_KERNEL_GATES", + "release_gate_matrix", + "validate_release_gate_matrix", +] diff --git a/crates/voscript_core/Cargo.toml b/crates/voscript_core/Cargo.toml index dacdac6..50b0167 100644 --- a/crates/voscript_core/Cargo.toml +++ b/crates/voscript_core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "voscript_core" -version = "0.8.3" +version = "0.8.4" edition = "2021" license = "Apache-2.0" publish = false diff --git a/crates/voscript_core/src/lib.rs b/crates/voscript_core/src/lib.rs index 0bb0be0..7bfd382 100644 --- a/crates/voscript_core/src/lib.rs +++ b/crates/voscript_core/src/lib.rs @@ -447,7 +447,7 @@ fn voscript_core(module: &Bound<'_, PyModule>) -> PyResult<()> { mod tests { #[test] fn package_version_is_set() { - assert_eq!(super::PACKAGE_VERSION, "0.8.3"); + assert_eq!(super::PACKAGE_VERSION, "0.8.4"); } #[test] diff --git a/doc/changelog.en.md b/doc/changelog.en.md index 2561cef..e384a9b 100644 --- a/doc/changelog.en.md +++ b/doc/changelog.en.md @@ -52,6 +52,12 @@ - Extended contract tests for legacy / unknown artifact manifests, persisted status payloads, optional schema versions, and Rust artifact helper bridge validation. +- Added a 0.8.4 Rust-kernel release-gate matrix covering selected + voiceprint, post-process, artifact, and status helper paths with explicit + hard-fail, rollback, CI, and internal performance-baseline evidence. +- Added a repository-owned public release scan and CI gate so PR/release checks + fail before private paths, raw validation material, real IDs, or secret-like + values reach public release text. ## 0.7.6 — Health, alignment, and embedding runtime fixes (2026-05-07) diff --git a/doc/changelog.zh.md b/doc/changelog.zh.md index d4ecf7e..1b99559 100644 --- a/doc/changelog.zh.md +++ b/doc/changelog.zh.md @@ -42,6 +42,11 @@ 消歧和 word normalization。 - 扩展 contract 测试,覆盖旧版 / 未知 artifact manifest、持久化 status payload、可选 schema version,以及 Rust artifact helper bridge 校验。 +- 新增 0.8.4 Rust-kernel release-gate matrix,覆盖已选择的声纹计分、结果 + 后处理、artifact 与 status helper 路径,并明确 hard-fail、rollback、CI + 和内部性能基线证据。 +- 新增主仓自带的 public release scan 与 CI gate,避免 private path、原始验证材料、 + 真实 ID 或疑似密钥值进入公开 PR / release 文案。 ## 0.7.6 — 健康检查、alignment 与 embedding 运行时修复 (2026-05-07) diff --git a/tests/unit/test_kernel_bridge.py b/tests/unit/test_kernel_bridge.py index be7e0b2..8277dd1 100644 --- a/tests/unit/test_kernel_bridge.py +++ b/tests/unit/test_kernel_bridge.py @@ -25,7 +25,7 @@ def _core_smoke(payload): return { "ok": True, "echoed": payload, - "version": "0.8.3", + "version": "0.8.4", "capabilities": {"core_smoke": True, "rust_extension": True}, } @@ -39,7 +39,7 @@ def test_core_smoke_round_trips_safe_payload_through_imported_extension(): assert result["ok"] is True assert result["echoed"] == payload - assert result["version"] == "0.8.3" + assert result["version"] == "0.8.4" assert result["capabilities"]["core_smoke"] is True diff --git a/tests/unit/test_kernel_release_gates.py b/tests/unit/test_kernel_release_gates.py new file mode 100644 index 0000000..a5624c1 --- /dev/null +++ b/tests/unit/test_kernel_release_gates.py @@ -0,0 +1,85 @@ +"""Regression, rollback, and CI gate coverage for 0.8.x Rust kernels.""" + +from __future__ import annotations + +from pathlib import Path + +from providers.kernel_bridge.release_gates import ( + REQUIRED_CI_GATES, + REQUIRED_HARD_FAIL_MODES, + RUST_KERNEL_MODE_ROLLBACK, + release_gate_matrix, + validate_release_gate_matrix, +) + + +ROOT = Path(__file__).resolve().parents[2] + + +def test_release_gate_matrix_covers_selected_rust_backed_paths(): + gates = release_gate_matrix() + + assert {gate.name for gate in gates} == { + "voiceprint_scoring", + "postprocess_segments", + "artifact_manifest_contract", + "status_payload_contract", + } + assert {gate.bridge_function for gate in gates} == { + "voiceprint_score", + "postprocess_segments", + "artifact_manifest_contract", + "status_payload_contract", + } + + +def test_release_gate_matrix_has_no_policy_gaps(): + assert validate_release_gate_matrix() == () + + +def test_each_selected_gate_is_fail_closed_and_explicitly_rollbackable(): + for gate in release_gate_matrix(): + assert gate.rollback == RUST_KERNEL_MODE_ROLLBACK + assert REQUIRED_HARD_FAIL_MODES.issubset(gate.hard_fail_modes) + assert REQUIRED_CI_GATES.issubset(gate.ci_gates) + assert gate.regression_matrix + assert gate.performance_baseline.startswith("internal_") + assert gate.public_api_change is False + + +def test_ci_workflows_include_required_release_gate_commands(): + ci = (ROOT / ".github" / "workflows" / "ci.yml").read_text(encoding="utf-8") + heavy = (ROOT / ".github" / "workflows" / "rust-foundation-heavy.yml").read_text( + encoding="utf-8" + ) + release = (ROOT / ".github" / "workflows" / "release.yml").read_text( + encoding="utf-8" + ) + + assert "public_release_scan.py --root ." in ci + assert "pytest tests/unit/ tests/test_security.py" in ci + assert ( + "cargo fmt --manifest-path crates/voscript_core/Cargo.toml -- --check" in heavy + ) + assert "cargo clippy --manifest-path crates/voscript_core/Cargo.toml" in heavy + assert "cargo test --manifest-path crates/voscript_core/Cargo.toml" in heavy + assert ( + "maturin build --release --manifest-path crates/voscript_core/Cargo.toml" + in heavy + ) + assert "docker build ./app" in heavy + assert "RUST_KERNEL_MODE=required" in heavy + assert "workflow_dispatch:" in heavy + assert "types: [opened, reopened, ready_for_review]" in heavy + assert ( + "maturin build --release --manifest-path crates/voscript_core/Cargo.toml" + in release + ) + assert "VOSCRIPT_CORE_WHEEL" in release + + +def test_public_release_scan_entrypoint_is_repo_owned(): + scan = ROOT / "voscript-api" / "scripts" / "public_release_scan.py" + + assert scan.exists() + assert "Public release scan passed" in scan.read_text(encoding="utf-8") diff --git a/tests/unit/test_public_release_scan.py b/tests/unit/test_public_release_scan.py new file mode 100644 index 0000000..736e219 --- /dev/null +++ b/tests/unit/test_public_release_scan.py @@ -0,0 +1,77 @@ +"""Tests for the repository-owned public release scanner.""" + +from __future__ import annotations + +import subprocess +import sys +import tempfile +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[2] +SCANNER = ROOT / "voscript-api" / "scripts" / "public_release_scan.py" + + +def _run_git(root: Path, *args: str) -> None: + subprocess.run(["git", "-C", str(root), *args], check=True, stdout=subprocess.PIPE) + + +def _scan_fixture(content: str) -> subprocess.CompletedProcess[str]: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + _run_git(root, "init", "-q") + fixture = root / "fixture.md" + fixture.write_text(content, encoding="utf-8") + _run_git(root, "add", "fixture.md") + return subprocess.run( + [sys.executable, str(SCANNER), "--root", str(root)], + check=False, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + +def test_public_release_scan_allows_placeholders(): + result = _scan_fixture( + "\n".join( + [ + "Authorization: Bearer ", + "HF_TOKEN=${HF_TOKEN}", + "API_KEY=your-api-key", + "Use internal live validation for release notes.", + ] + ) + ) + + assert result.returncode == 0, result.stdout + result.stderr + assert "Public release scan passed" in result.stdout + + +def test_public_release_scan_blocks_secret_looking_assignments(): + synthetic_secret = "sk-live-" + "syntheticsecret123456789" + + result = _scan_fixture(f"API_KEY={synthetic_secret}") + + assert result.returncode == 1 + assert "secret-looking assignment" in result.stdout + + +def test_public_release_scan_blocks_private_paths_and_real_ids(): + local_path = "/" + "Users/example/private.log" + transcription_id = "tr_" + "20260426_124218_abcdef" + speaker_id = "spk_" + "1234abcd" + fixture = "\n".join( + [ + f"Read {local_path} before publishing.", + f"Result id {transcription_id} should not be public.", + f"Speaker id {speaker_id} should not be public.", + ] + ) + + result = _scan_fixture(fixture) + + assert result.returncode == 1 + assert "machine-local path" in result.stdout + assert "real transcription id" in result.stdout + assert "real speaker id" in result.stdout diff --git a/voscript-api/scripts/public_release_scan.py b/voscript-api/scripts/public_release_scan.py new file mode 100644 index 0000000..f227c39 --- /dev/null +++ b/voscript-api/scripts/public_release_scan.py @@ -0,0 +1,404 @@ +#!/usr/bin/env python3 +"""Scan tracked files for public-release privacy leaks.""" + +from __future__ import annotations + +import argparse +import os +import re +import subprocess +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Iterable, Sequence + +MEDIA_EXTENSIONS = { + ".aac", + ".flac", + ".m4a", + ".mkv", + ".mov", + ".mp3", + ".mp4", + ".ogg", + ".opus", + ".wav", + ".webm", +} +TEXT_EXTENSIONS = { + ".dockerignore", + ".env", + ".gitignore", + ".ini", + ".json", + ".md", + ".py", + ".rst", + ".sh", + ".toml", + ".txt", + ".yaml", + ".yml", +} +ALLOW_CONTENT = {"voscript-api/scripts/public_release_scan.py"} +PUBLIC_CONTAINER_PATHS = { + "/data/voiceprints", + "/data/transcriptions/asnorm_cohort.npy", +} + + +@dataclass(frozen=True) +class Finding: + category: str + path: str + line: int + excerpt: str + advice: str + + +@dataclass(frozen=True) +class Rule: + name: str + pattern: re.Pattern[str] + advice: str + + +def _rx(value: str, flags: int = 0) -> re.Pattern[str]: + return re.compile(value, flags) + + +SECRET_ASSIGNMENT_RE = _rx( + r"(?[A-Za-z_][A-Za-z0-9_-]*)['\"]?" + r"\s*(?:=|:)\s*(?P\"[^\"\n]*\"|'[^'\n]*'|`[^`\n]*`|[^\s,;]+)?", + re.I, +) +SECRET_VALUE_PREFIX_RE = _rx( + r"^(?:sk[-_](?:(?:live|test)[-_])?|hf_|ghp_|github_pat_|xox[baprs]-|AKIA|AIza|eyJ)" + r"[A-Za-z0-9_+/\-.=]{8,}$" +) +RAW_BEARER_TOKEN_RE = _rx( + r"(?[A-Za-z0-9_+/\-.=]{12,})(?![A-Za-z0-9_-])", + re.I, +) +RAW_SECRET_TOKEN_RE = _rx( + r"(?(?:sk[-_](?:(?:live|test)[-_])?|hf_|ghp_|github_pat_|xox[baprs]-|AKIA|AIza|eyJ)" + r"[A-Za-z0-9_+/\-.=]{8,})" + r"(?![A-Za-z0-9_+/\-.=])" +) +PLACEHOLDER_VALUE_RE = _rx( + r"^(?:<[^>\s]+>|\$\{[A-Za-z_][A-Za-z0-9_]*(?::-[^}]*)?\}|\$[A-Za-z_][A-Za-z0-9_]*|" + r"(?:your|example|sample|dummy|fake|test)[-_]?[A-Za-z0-9_-]*" + r"(?:api[-_]?key|key|token|password|secret)[A-Za-z0-9_-]*)$", + re.I, +) +EXPLICIT_SECRET_NAMES = { + "api_key", + "hf_token", + "password", + "secret", + "token", + "voscript_api_key", + "voscript_token", +} + +PRIVATE_DIRECT_SSH_ALIAS = "a" + "i" +PRIVATE_WAN_SSH_ALIAS = PRIVATE_DIRECT_SSH_ALIAS + "-" + "wan" +PRIVATE_REMOTE_ALIASES = (PRIVATE_DIRECT_SSH_ALIAS + "-" + "lan", PRIVATE_WAN_SSH_ALIAS) +PRIVATE_PROXY_HOST_PORT = "127" + ".0.0.1:" + "78" + "97" + +LINE_RULES = [ + Rule( + "private validation corpus name", + _rx(r"\b(?:private E2E (?:corpus|sample)|private (?:corpus|sample))\b", re.I), + "Use anonymized wording such as internal live validation.", + ), + Rule( + "machine-local path", + _rx(r"(?:/Users/|/data/)[^\s)\"'`]+"), + "Move machine-specific paths to local-only notes.", + ), + Rule( + "local-only directory reference", + _rx(r"\b(?:roadmap|tmp)[/\\]"), + "Replace local-only directory paths with generic ignored archive wording.", + ), + Rule( + "local ssh config path", + _rx(r"(?:~[/\\])?\.ssh[/\\]config"), + "Refer to local SSH config generically in public docs.", + ), + Rule( + "remote host alias", + _rx( + rf"(?:\bssh\s+{PRIVATE_DIRECT_SSH_ALIAS}\b|`{PRIVATE_DIRECT_SSH_ALIAS}`|\b(?:{'|'.join(PRIVATE_REMOTE_ALIASES)})\b)", + re.I, + ), + "Replace private host aliases with generic deployment wording.", + ), + Rule( + "private proxy/debug port", + _rx(re.escape(PRIVATE_PROXY_HOST_PORT)), + "Replace private proxy/debug ports with placeholders.", + ), + Rule( + "candidate debug port", + _rx(r"\b18(?:7[0-9]{2}|78[0-9])\b"), + "Do not publish temporary candidate/debug ports.", + ), + Rule( + "real transcription id", + _rx(r"\btr_[0-9]{8}_[0-9]{6}_[A-Za-z0-9_-]+\b"), + "Replace real transcription IDs with .", + ), + Rule( + "real speaker id", + _rx(r"\bspk_[0-9a-f]{6,}\b", re.I), + "Replace real speaker IDs with .", + ), +] + + +def run_git(root: Path, args: Sequence[str]) -> subprocess.CompletedProcess[bytes]: + return subprocess.run( + ["git", "-C", str(root), *args], + check=False, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + +def tracked_files(root: Path) -> list[Path]: + proc = run_git(root, ["ls-files", "-z"]) + if proc.returncode != 0: + raise RuntimeError(proc.stderr.decode("utf-8", "replace").strip()) + return [Path(p.decode("utf-8")) for p in proc.stdout.split(b"\0") if p] + + +def is_text_file(path: Path) -> bool: + if path.name in {".gitignore", ".dockerignore"}: + return True + return path.suffix.lower() in TEXT_EXTENSIONS or not path.suffix + + +def path_findings(paths: Iterable[Path]) -> list[Finding]: + findings: list[Finding] = [] + for rel in paths: + rel_str = rel.as_posix() + suffix = rel.suffix.lower() + if rel.parts and rel.parts[0] in {"roadmap", "tmp"}: + findings.append( + Finding( + "tracked local-only directory", + rel_str, + 1, + rel_str, + "Remove tracked planning/tmp artifacts or move them to ignored local storage.", + ) + ) + if rel.name == "CLAUDE.local.md" or ( + rel.name.startswith(".env") and rel.name != ".env.example" + ): + findings.append( + Finding( + "tracked local config file", + rel_str, + 1, + rel_str, + "Untrack local config files and keep them ignored.", + ) + ) + if suffix in MEDIA_EXTENSIONS: + findings.append( + Finding( + "tracked media corpus file", + rel_str, + 1, + rel_str, + "Do not publish raw audio/video validation material.", + ) + ) + if suffix in {".log", ".json", ".txt"} and "validation" in rel_str.lower(): + findings.append( + Finding( + "tracked validation artifact", + rel_str, + 1, + rel_str, + "Keep raw validation logs/results local-only.", + ) + ) + return findings + + +def is_secret_name(name: str) -> bool: + normalized = name.replace("-", "_").lower() + return ( + normalized in EXPLICIT_SECRET_NAMES + or "api_key" in normalized + or normalized.endswith("_token") + or normalized == "token" + or "password" in normalized + or "secret" in normalized + ) + + +def assigned_value(raw: str | None) -> str: + value = (raw or "").strip() + if not value: + return "" + if value[0] in {"'", '"', "`"}: + quote = value[0] + end = value.find(quote, 1) + if end == -1: + return value[1:].strip() + return value[1:end].strip() + return ( + value.split("#", 1)[0].strip().split(maxsplit=1)[0].rstrip("`.,;)]\"'").strip() + ) + + +def is_placeholder_secret_value(value: str) -> bool: + normalized = value.strip() + if normalized in {"", "..."}: + return True + if normalized.lower() in { + "changeme", + "change-me", + "change_me", + "placeholder", + "redacted", + "replace-me", + "replace_me", + "todo", + }: + return True + return bool(PLACEHOLDER_VALUE_RE.fullmatch(normalized)) + + +def looks_like_secret_value(value: str) -> bool: + if is_placeholder_secret_value(value): + return False + compact = re.sub(r"\s+", "", value.strip()) + if SECRET_VALUE_PREFIX_RE.match(compact): + return True + if len(compact) < 12: + return False + if len([char for char in compact if char.isalnum()]) < 12: + return False + return bool(re.fullmatch(r"[A-Za-z0-9_+/\-.=]+", compact)) + + +def is_backtick_wrapped_value(raw: str | None) -> bool: + return bool(raw and raw.strip().startswith("`")) + + +def _excerpt(line: str) -> str: + excerpt = line.strip() + return excerpt if len(excerpt) <= 180 else excerpt[:177] + "..." + + +def secret_assignment_finding(path: str, line_no: int, line: str) -> Finding | None: + for match in SECRET_ASSIGNMENT_RE.finditer(line): + if not is_secret_name(match.group("name")): + continue + raw_value = match.group("value") + value = assigned_value(raw_value) + if not value or is_placeholder_secret_value(value): + continue + if not looks_like_secret_value(value) and not is_backtick_wrapped_value( + raw_value + ): + continue + return Finding( + "secret-looking assignment", + path, + line_no, + _excerpt(line), + "Use placeholders such as ; rotate if this is a real secret.", + ) + return None + + +def raw_secret_token_finding(path: str, line_no: int, line: str) -> Finding | None: + for pattern in (RAW_BEARER_TOKEN_RE, RAW_SECRET_TOKEN_RE): + for match in pattern.finditer(line): + if looks_like_secret_value(assigned_value(match.group("value"))): + return Finding( + "secret-looking raw token", + path, + line_no, + _excerpt(line), + "Use placeholders such as ; rotate if this is a real secret.", + ) + return None + + +def line_findings(root: Path, paths: Iterable[Path]) -> list[Finding]: + findings: list[Finding] = [] + for rel in paths: + rel_str = rel.as_posix() + if rel.name in {".gitignore", ".npmignore", ".dockerignore"}: + continue + if rel_str in ALLOW_CONTENT or not is_text_file(rel): + continue + try: + lines = ( + (root / rel).read_text(encoding="utf-8", errors="replace").splitlines() + ) + except OSError: + continue + for line_no, line in enumerate(lines, start=1): + for maybe_finding in ( + secret_assignment_finding(rel_str, line_no, line), + raw_secret_token_finding(rel_str, line_no, line), + ): + if maybe_finding is not None: + findings.append(maybe_finding) + for rule in LINE_RULES: + if not rule.pattern.search(line): + continue + if rule.name == "machine-local path" and any( + path in line for path in PUBLIC_CONTAINER_PATHS + ): + continue + findings.append( + Finding(rule.name, rel_str, line_no, _excerpt(line), rule.advice) + ) + return findings + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Scan Git-tracked files for VoScript public-release privacy leaks." + ) + parser.add_argument("--root", default=".", help="Repository root to scan.") + args = parser.parse_args() + + root = Path(args.root).expanduser().resolve() + if not root.exists(): + print(f"root does not exist: {root}", file=sys.stderr) + return 2 + + try: + paths = tracked_files(root) + except RuntimeError as exc: + print(f"not a git worktree or git failed: {exc}", file=sys.stderr) + return 2 + + findings = path_findings(paths) + line_findings(root, paths) + if findings: + print("Public release scan failed:") + for item in findings: + print(f"- {item.path}:{item.line}: {item.category}") + print(f" {item.excerpt}") + print(f" {item.advice}") + return 1 + + print(f"Public release scan passed ({len(paths)} tracked files).") + return 0 + + +if __name__ == "__main__": + os.environ.setdefault("PYTHONUTF8", "1") + sys.exit(main()) From 5dcc34d42d60a8b4f9da14d0350135ffabd70722 Mon Sep 17 00:00:00 2001 From: Maple Gao Date: Wed, 10 Jun 2026 01:12:54 +0800 Subject: [PATCH 2/3] ci: cap claude review turns --- .github/workflows/claude-code-review.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml index 233d182..d58cbc3 100644 --- a/.github/workflows/claude-code-review.yml +++ b/.github/workflows/claude-code-review.yml @@ -88,4 +88,4 @@ jobs: claude_args: | --model ${{ env.CLAUDE_MODEL }} - --max-turns 30 + --max-turns 8 From 3c8387ad66d845826d1e2ad19547bd392efaf43d Mon Sep 17 00:00:00 2001 From: Maple Gao Date: Wed, 10 Jun 2026 01:23:24 +0800 Subject: [PATCH 3/3] ci: remove claude interactive auth path --- .github/workflows/claude-code-review.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml index d58cbc3..8a1ec4a 100644 --- a/.github/workflows/claude-code-review.yml +++ b/.github/workflows/claude-code-review.yml @@ -7,7 +7,6 @@ on: permissions: contents: read - id-token: write pull-requests: write issues: write @@ -80,10 +79,12 @@ jobs: - Synchronized English/Chinese documentation The PR branch is already checked out in the current working directory. - Post feedback only through the official Claude Code Action GitHub integration. + Use only the configured ANTHROPIC_API_KEY and ANTHROPIC_BASE_URL for Claude access. + Do not start browser login or any interactive authorization flow. + Post feedback only through this GitHub Actions run. Do not use the GitHub CLI and do not use a user-owned GitHub token. - If the official Claude GitHub App integration is unavailable, fail instead of posting as the repository owner. - If there are no actionable findings, post the standard no-findings confirmation through the action integration. + If non-interactive API-key review is unavailable, fail instead of falling back. + If there are no actionable findings, post the standard no-findings confirmation through the action. Avoid formatting-only comments. claude_args: |