diff --git a/.github/scripts/check_spec_coverage.py b/.github/scripts/check_spec_coverage.py new file mode 100644 index 0000000..a60132c --- /dev/null +++ b/.github/scripts/check_spec_coverage.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +"""Map a spec-diff to affected docs.pinecone.io pages + impacted SDKs via the manifest. + +Reads spec-diff.json (from extract_spec_diff.py) and spec-manifest.json, joins on +operation id / schema name, and writes spec-gaps.json. Changed surface absent from +the manifest is logged to spec-gaps-unmapped.json so the manifest can be kept current. +Sets the `has_gaps` GitHub Actions output. +""" +import argparse +import json +import os +from pathlib import Path + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--spec-diff", required=True) + ap.add_argument("--manifest", required=True) + ap.add_argument("--output", default="spec-gaps.json") + a = ap.parse_args() + + diff = json.loads(Path(a.spec_diff).read_text()) + man = json.loads(Path(a.manifest).read_text()) + ops = man.get("operations", {}) + schs = man.get("schemas", {}) + + gaps, unmapped = [], [] + for c in diff: + if c["kind"] == "operation": + entry = ops.get(c["id"]) + else: + # schema ids may be "Schema" or "Schema.property" — map by schema name + entry = schs.get(c["id"].split(".")[0]) + if not entry: + unmapped.append(c) + continue + for page in entry.get("docs", []): + gaps.append({ + "symbol": c["id"], "change": c["change"], "breaking": c.get("breaking", False), + "doc_page": page, "sdks": entry.get("sdks", []), "detail": c.get("detail", ""), + }) + + Path(a.output).write_text(json.dumps(gaps, indent=2)) + if unmapped: + Path("spec-gaps-unmapped.json").write_text(json.dumps(unmapped, indent=2)) + + has_gaps = bool(gaps) + gh = os.environ.get("GITHUB_OUTPUT") + if gh: + with open(gh, "a") as f: + f.write(f"has_gaps={'true' if has_gaps else 'false'}\n") + pages = len({g["doc_page"] for g in gaps}) + print(f"{len(gaps)} doc-gaps across {pages} page(s); {len(unmapped)} unmapped") + + +if __name__ == "__main__": + main() diff --git a/.github/scripts/extract_spec_diff.py b/.github/scripts/extract_spec_diff.py new file mode 100644 index 0000000..9e1dcde --- /dev/null +++ b/.github/scripts/extract_spec_diff.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python3 +"""Diff two OpenAPI specs; classify each change as breaking or non-breaking. + +Emits a JSON list of changes. Operations are keyed by operationId (falling back +to " "); schemas by name (properties as "Schema.prop"). Used by the +spec-drift-detector workflow in both PR mode (git refs) and scheduled mode (files). + +Zero runtime dependencies beyond pyyaml. +""" +import argparse +import json +import subprocess +from pathlib import Path + +import yaml + +METHODS = {"get", "post", "put", "patch", "delete", "head", "options"} + + +def load(text: str) -> dict: + return yaml.safe_load(text) or {} + + +def git_show(ref: str, path: str) -> str: + """File contents at a git ref, or '' if absent on that side (new/deleted file).""" + try: + return subprocess.run( + ["git", "show", f"{ref}:{path}"], capture_output=True, text=True, check=True + ).stdout + except subprocess.CalledProcessError: + return "" + + +def operations(spec: dict) -> dict: + """{op_key: {method, path, op}} for every paths...""" + out = {} + for path, methods in (spec.get("paths") or {}).items(): + if not isinstance(methods, dict): + continue + for m, op in methods.items(): + if m.lower() not in METHODS or not isinstance(op, dict): + continue + key = op.get("operationId") or f"{m.upper()} {path}" + out[key] = {"method": m.upper(), "path": path, "op": op} + return out + + +def params(op: dict) -> dict: + out = {} + for p in op.get("parameters") or []: + if isinstance(p, dict) and "name" in p: + out[(p["name"], p.get("in", ""))] = p + return out + + +def schemas(spec: dict) -> dict: + return ((spec.get("components") or {}).get("schemas")) or {} + + +def _type(d): + if not isinstance(d, dict): + return None + s = d.get("schema", d) + if not isinstance(s, dict): + return None + return s.get("type") or s.get("$ref") + + +def _fmt(d): + """The `format` qualifier (e.g. int32, uuid) of a parameter/property schema.""" + if not isinstance(d, dict): + return None + s = d.get("schema", d) + if not isinstance(s, dict): + return None + return s.get("format") + + +def diff_operations(base: dict, head: dict) -> list[dict]: + changes = [] + bo, ho = operations(base), operations(head) + for k in bo.keys() - ho.keys(): + changes.append({"kind": "operation", "id": k, "change": "removed", "breaking": True, + "before": f"{bo[k]['method']} {bo[k]['path']}", "after": "", + "detail": "operation removed"}) + for k in ho.keys() - bo.keys(): + changes.append({"kind": "operation", "id": k, "change": "added", "breaking": False, + "before": "", "after": f"{ho[k]['method']} {ho[k]['path']}", + "detail": "operation added"}) + for k in bo.keys() & ho.keys(): + changes += diff_one_op(k, bo[k]["op"], ho[k]["op"]) + return changes + + +def diff_one_op(key: str, b: dict, h: dict) -> list[dict]: + changes = [] + bp, hp = params(b), params(h) + + # Parameter location changes (e.g. query -> path): a pure relocation otherwise + # shows up as a misleading remove + add. Detect by name and report once, breaking. + b_loc, h_loc = {}, {} + for nm, loc in bp: + b_loc.setdefault(nm, set()).add(loc) + for nm, loc in hp: + h_loc.setdefault(nm, set()).add(loc) + relocated = set() + for nm in b_loc.keys() & h_loc.keys(): + if b_loc[nm] != h_loc[nm] and len(b_loc[nm]) == 1 and len(h_loc[nm]) == 1: + relocated.add(nm) + bl, hl = next(iter(b_loc[nm])), next(iter(h_loc[nm])) + changes.append({"kind": "operation", "id": key, "change": "modified", "breaking": True, + "before": f"{nm} in {bl}", "after": f"{nm} in {hl}", + "detail": f"parameter '{nm}' moved from {bl} to {hl}"}) + + for name in bp.keys() - hp.keys(): + if name[0] in relocated: + continue + req = bool(bp[name].get("required")) + changes.append({"kind": "operation", "id": key, "change": "modified", "breaking": req, + "before": f"param {name[0]}", "after": "", + "detail": f"{'required ' if req else ''}parameter '{name[0]}' removed"}) + for name in hp.keys() - bp.keys(): + if name[0] in relocated: + continue + req = bool(hp[name].get("required")) + changes.append({"kind": "operation", "id": key, "change": "modified", "breaking": req, + "before": "", "after": f"param {name[0]}", + "detail": f"{'required ' if req else ''}parameter '{name[0]}' added"}) + for name in bp.keys() & hp.keys(): + pb, ph = bp[name], hp[name] + if not pb.get("required") and ph.get("required"): + changes.append({"kind": "operation", "id": key, "change": "modified", "breaking": True, + "before": f"{name[0]} optional", "after": f"{name[0]} required", + "detail": f"parameter '{name[0]}' now required"}) + if _type(pb) != _type(ph): + changes.append({"kind": "operation", "id": key, "change": "modified", "breaking": True, + "before": str(_type(pb)), "after": str(_type(ph)), + "detail": f"parameter '{name[0]}' type changed"}) + if _fmt(pb) != _fmt(ph): + changes.append({"kind": "operation", "id": key, "change": "modified", "breaking": True, + "before": str(_fmt(pb)), "after": str(_fmt(ph)), + "detail": f"parameter '{name[0]}' format changed"}) + + # Removed response codes: consumers may branch on them, so removal is breaking. + br = {str(c) for c in (b.get("responses") or {})} + hr = {str(c) for c in (h.get("responses") or {})} + for code in br - hr: + changes.append({"kind": "operation", "id": key, "change": "modified", "breaking": True, + "before": f"response {code}", "after": "", + "detail": f"response '{code}' removed"}) + return changes + + +def diff_schemas(base: dict, head: dict) -> list[dict]: + changes = [] + bs, hs = schemas(base), schemas(head) + for name in bs.keys() - hs.keys(): + changes.append({"kind": "schema", "id": name, "change": "removed", "breaking": True, + "before": name, "after": "", "detail": "schema removed"}) + for name in hs.keys() - bs.keys(): + changes.append({"kind": "schema", "id": name, "change": "added", "breaking": False, + "before": "", "after": name, "detail": "schema added"}) + for name in bs.keys() & hs.keys(): + changes += diff_one_schema(name, bs[name], hs[name]) + return changes + + +def diff_one_schema(name: str, b: dict, h: dict) -> list[dict]: + changes = [] + bp = b.get("properties") or {} + hp = h.get("properties") or {} + breq, hreq = set(b.get("required") or []), set(h.get("required") or []) + for prop in bp.keys() - hp.keys(): + changes.append({"kind": "schema", "id": f"{name}.{prop}", "change": "removed", "breaking": True, + "before": prop, "after": "", "detail": f"property '{prop}' removed"}) + for prop in hp.keys() - bp.keys(): + newreq = prop in hreq + changes.append({"kind": "schema", "id": f"{name}.{prop}", "change": "added", "breaking": newreq, + "before": "", "after": prop, + "detail": f"{'required ' if newreq else ''}property '{prop}' added"}) + for prop in bp.keys() & hp.keys(): + if prop not in breq and prop in hreq: + changes.append({"kind": "schema", "id": f"{name}.{prop}", "change": "modified", "breaking": True, + "before": "optional", "after": "required", + "detail": f"property '{prop}' now required"}) + if _type(bp[prop]) != _type(hp[prop]): + changes.append({"kind": "schema", "id": f"{name}.{prop}", "change": "modified", "breaking": True, + "before": str(_type(bp[prop])), "after": str(_type(hp[prop])), + "detail": f"property '{prop}' type changed"}) + if _fmt(bp[prop]) != _fmt(hp[prop]): + changes.append({"kind": "schema", "id": f"{name}.{prop}", "change": "modified", "breaking": True, + "before": str(_fmt(bp[prop])), "after": str(_fmt(hp[prop])), + "detail": f"property '{prop}' format changed"}) + be = set(bp[prop].get("enum") or []) if isinstance(bp[prop], dict) else set() + he = set(hp[prop].get("enum") or []) if isinstance(hp[prop], dict) else set() + for removed in be - he: + changes.append({"kind": "schema", "id": f"{name}.{prop}", "change": "modified", "breaking": True, + "before": str(removed), "after": "", + "detail": f"enum value '{removed}' removed from '{prop}'"}) + return changes + + +def diff(base: dict, head: dict) -> list[dict]: + return diff_operations(base, head) + diff_schemas(base, head) + + +def main(): + ap = argparse.ArgumentParser(description="Diff two OpenAPI specs.") + ap.add_argument("--base", help="Base spec file path") + ap.add_argument("--head", help="Head spec file path") + ap.add_argument("--base-ref", help="Base git ref (use with --path)") + ap.add_argument("--head-ref", help="Head git ref (use with --path)") + ap.add_argument("--path", help="Repo-relative spec path for git-ref mode") + ap.add_argument("--service", default="") + ap.add_argument("--version", default="") + ap.add_argument("--output", default="spec-diff.json") + a = ap.parse_args() + + if a.base_ref is not None and a.path: + base = load(git_show(a.base_ref, a.path)) + head = load(git_show(a.head_ref, a.path)) + else: + base = load(Path(a.base).read_text()) if a.base and Path(a.base).exists() else {} + head = load(Path(a.head).read_text()) if a.head and Path(a.head).exists() else {} + + changes = diff(base, head) + for c in changes: + c["service"] = a.service + c["version"] = a.version + Path(a.output).write_text(json.dumps(changes, indent=2)) + nb = sum(1 for c in changes if c["breaking"]) + print(f"{len(changes)} changes ({nb} breaking) -> {a.output}") + + +if __name__ == "__main__": + main() diff --git a/.github/scripts/open_docs_stub_pr.py b/.github/scripts/open_docs_stub_pr.py new file mode 100644 index 0000000..3057a75 --- /dev/null +++ b/.github/scripts/open_docs_stub_pr.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 +""" +open_docs_stub_pr.py + +Opens a draft PR in pinecone-io/docs with TODO annotations for each +doc page flagged by check_docs_coverage.py. Runs only on the scheduled +drift-detection job, not on every PR. + +Requirements: + - gh CLI (pre-installed on GitHub Actions ubuntu-latest runners) + - DOCS_TOKEN env var: a PAT with `repo` scope on pinecone-io/docs + +Usage: + python .github/scripts/open_docs_stub_pr.py \ + --gaps docs-gaps.json \ + --docs-repo pinecone-io/docs \ + --token "$DOCS_TOKEN" +""" + +import argparse +import json +import os +import subprocess +import sys +import tempfile +from datetime import date +from pathlib import Path + + +def run(cmd: list[str], cwd: str | None = None, env: dict | None = None) -> None: + print(f" $ {' '.join(str(c) for c in cmd)}", flush=True) + subprocess.run(cmd, cwd=cwd, env=env, check=True) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--gaps", default="docs-gaps.json") + parser.add_argument("--docs-repo", default="pinecone-io/docs") + parser.add_argument("--token", default=os.environ.get("DOCS_TOKEN", "")) + args = parser.parse_args() + + if not args.token: + print("Error: --token or DOCS_TOKEN required", file=sys.stderr) + sys.exit(1) + + with open(args.gaps) as f: + gaps = json.load(f) + + if not gaps: + print("No gaps — nothing to open a PR for.") + return + + today = date.today().isoformat() + branch = f"chore/sdk-drift-{today}" + pr_title = f"chore: update docs for SDK drift ({today})" + + # Group gaps by page so we insert one comment block per page + pages: dict[str, list[dict]] = {} + for gap in gaps: + pages.setdefault(gap["doc_page"], []).append(gap) + + gh_env = {**os.environ, "GH_TOKEN": args.token} + + with tempfile.TemporaryDirectory() as tmpdir: + run(["gh", "repo", "clone", args.docs_repo, tmpdir, "--", "--depth=1"], env=gh_env) + run(["git", "checkout", "-b", branch], cwd=tmpdir) + run(["git", "config", "user.email", "github-actions[bot]@users.noreply.github.com"], cwd=tmpdir) + run(["git", "config", "user.name", "github-actions[bot]"], cwd=tmpdir) + + modified = [] + for page_path, page_gaps in pages.items(): + # Docs repo stores guides as docs/guides/.mdx + mdx_path = Path(tmpdir) / f"{page_path}.mdx" + if not mdx_path.exists(): + # Try without leading "guides/" + alt = Path(tmpdir) / "docs" / f"{page_path}.mdx" + if alt.exists(): + mdx_path = alt + else: + print(f" Skipping {page_path} — .mdx not found in clone") + continue + + content = mdx_path.read_text() + + todos = [] + for g in page_gaps: + msg = f"TODO(sdk-drift): `{g['symbol']}` {g['change']}" + if g.get("before") and g.get("after"): + msg += f"\n was: {g['before']}\n now: {g['after']}" + todos.append(f"{{/* {msg} */}}") + + stub_block = "\n".join(todos) + + # Insert after frontmatter (between second and third "---" markers) + parts = content.split("---", 2) + if len(parts) == 3: + updated = f"---{parts[1]}---\n\n{stub_block}\n{parts[2]}" + else: + updated = f"{stub_block}\n\n{content}" + + mdx_path.write_text(updated) + modified.append(str(mdx_path.relative_to(tmpdir))) + print(f" Annotated {page_path}") + + if not modified: + print("No .mdx files found in clone for the flagged pages — no PR opened.") + return + + run(["git", "add"] + modified, cwd=tmpdir) + run( + ["git", "commit", "-m", f"chore: add drift annotations ({today})"], + cwd=tmpdir, + ) + run(["git", "push", "origin", branch], cwd=tmpdir, env=gh_env) + + # Build PR body + body = ["## SDK drift detected", ""] + body.append( + "The weekly `docs-drift-detector` found SDK method changes that may need doc updates." + ) + body.append("") + body.append("### Pages to review") + body.append("") + for page, page_gaps in pages.items(): + body.append(f"**`{page}`**") + for g in page_gaps: + item = f"- [ ] `{g['symbol']}` ({g['change']})" + if g.get("before") and g.get("after"): + item += f": `{g['before']}` → `{g['after']}`" + body.append(item) + body.append("") + body.append( + "_Auto-generated stub. Mark items done as pages are updated. Close if not applicable._" + ) + + run( + [ + "gh", "pr", "create", + "--repo", args.docs_repo, + "--head", branch, + "--title", pr_title, + "--body", "\n".join(body), + "--draft", + ], + env=gh_env, + ) + + print(f"Draft PR opened: {pr_title}") + + +if __name__ == "__main__": + main() diff --git a/.github/scripts/tests/test_check_spec_coverage.py b/.github/scripts/tests/test_check_spec_coverage.py new file mode 100644 index 0000000..c5d6063 --- /dev/null +++ b/.github/scripts/tests/test_check_spec_coverage.py @@ -0,0 +1,31 @@ +"""Unit test for check_spec_coverage. Run: pytest .github/scripts/tests -v""" +import json +import os +import subprocess +import sys + + +def test_maps_to_docs_and_sdks_and_unmapped(tmp_path): + diff = [ + {"kind": "operation", "id": "create_index", "change": "modified", "breaking": True, "detail": "x"}, + {"kind": "schema", "id": "Index.host", "change": "added", "breaking": False, "detail": "y"}, + {"kind": "operation", "id": "unknown_op", "change": "added", "breaking": False, "detail": "z"}, + ] + man = {"operations": {"create_index": {"docs": ["guides/index-data/create-an-index"], "sdks": ["python", "ts"]}}, + "schemas": {"Index": {"docs": ["guides/index-data/indexes"], "sdks": ["python"]}}} + dp = tmp_path / "diff.json" + mp = tmp_path / "man.json" + op = tmp_path / "gaps.json" + dp.write_text(json.dumps(diff)) + mp.write_text(json.dumps(man)) + script = os.path.join(os.path.dirname(__file__), "..", "check_spec_coverage.py") + r = subprocess.run([sys.executable, script, "--spec-diff", str(dp), + "--manifest", str(mp), "--output", str(op)], + capture_output=True, text=True, cwd=str(tmp_path)) + assert r.returncode == 0, r.stderr + gaps = json.loads(op.read_text()) + pages = {g["doc_page"] for g in gaps} + assert "guides/index-data/create-an-index" in pages + assert "guides/index-data/indexes" in pages # schema property mapped via "Index" + assert any(g["breaking"] for g in gaps) + assert all(g["symbol"] != "unknown_op" for g in gaps) # unmapped, not in gaps diff --git a/.github/scripts/tests/test_extract_spec_diff.py b/.github/scripts/tests/test_extract_spec_diff.py new file mode 100644 index 0000000..de40025 --- /dev/null +++ b/.github/scripts/tests/test_extract_spec_diff.py @@ -0,0 +1,107 @@ +"""Unit tests for extract_spec_diff. Run: pytest .github/scripts/tests -v +(add .github/scripts to sys.path or run from there).""" +import os +import subprocess +import sys + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) +from extract_spec_diff import diff_operations, diff_one_op, diff_schemas # noqa: E402 + + +def test_operation_added_and_removed(): + base = {"paths": {"/indexes": {"get": {"operationId": "list_indexes"}, + "post": {"operationId": "create_index"}}}} + head = {"paths": {"/indexes": {"get": {"operationId": "list_indexes"}}, + "/indexes/{name}": {"delete": {"operationId": "delete_index"}}}} + by = {(c["id"], c["change"]): c for c in diff_operations(base, head)} + assert by[("create_index", "removed")]["breaking"] is True + assert by[("delete_index", "added")]["breaking"] is False + + +def test_parameter_breaking_rules(): + b = {"parameters": [{"name": "limit", "in": "query", "schema": {"type": "integer"}}]} + h = {"parameters": [ + {"name": "limit", "in": "query", "schema": {"type": "string"}}, + {"name": "namespace", "in": "query", "required": True, "schema": {"type": "string"}}, + ]} + details = {c["detail"]: c["breaking"] for c in diff_one_op("op", b, h)} + assert details["parameter 'limit' type changed"] is True + assert details["required parameter 'namespace' added"] is True + + +def test_optional_param_added_is_not_breaking(): + b = {"parameters": []} + h = {"parameters": [{"name": "filter", "in": "query", "schema": {"type": "string"}}]} + c = diff_one_op("op", b, h)[0] + assert c["detail"] == "parameter 'filter' added" + assert c["breaking"] is False + + +def test_param_format_change_is_breaking(): + b = {"parameters": [{"name": "id", "in": "query", "schema": {"type": "integer", "format": "int32"}}]} + h = {"parameters": [{"name": "id", "in": "query", "schema": {"type": "integer", "format": "int64"}}]} + details = {c["detail"]: c["breaking"] for c in diff_one_op("op", b, h)} + assert details["parameter 'id' format changed"] is True + assert "parameter 'id' type changed" not in details # type unchanged, only format + + +def test_param_location_change_reported_once_as_breaking(): + b = {"parameters": [{"name": "name", "in": "query", "required": True, "schema": {"type": "string"}}]} + h = {"parameters": [{"name": "name", "in": "path", "required": True, "schema": {"type": "string"}}]} + out = diff_one_op("op", b, h) + details = {c["detail"]: c["breaking"] for c in out} + assert details["parameter 'name' moved from query to path"] is True + # the relocation must NOT also surface as a remove + add + assert not any("removed" in d or "added" in d for d in details) + + +def test_removed_response_code_is_breaking(): + b = {"responses": {"200": {}, "404": {}}} + h = {"responses": {"200": {}}} + details = {c["detail"]: c["breaking"] for c in diff_one_op("op", b, h)} + assert details["response '404' removed"] is True + + +def test_added_response_code_is_not_flagged(): + b = {"responses": {"200": {}}} + h = {"responses": {"200": {}, "429": {}}} + assert all("response" not in c["detail"] for c in diff_one_op("op", b, h)) + + +def test_schema_property_format_change_is_breaking(): + base = {"components": {"schemas": {"M": {"properties": {"ts": {"type": "string", "format": "date"}}}}}} + head = {"components": {"schemas": {"M": {"properties": {"ts": {"type": "string", "format": "date-time"}}}}}} + d = {c["detail"]: c["breaking"] for c in diff_schemas(base, head)} + assert d["property 'ts' format changed"] is True + + +def test_schema_breaking_rules(): + base = {"components": {"schemas": {"Index": { + "properties": {"name": {"type": "string"}, + "metric": {"type": "string", "enum": ["cosine", "dotproduct"]}}, + "required": ["name"]}}}} + head = {"components": {"schemas": {"Index": { + "properties": {"metric": {"type": "integer", "enum": ["cosine"]}, + "host": {"type": "string"}}, + "required": ["host"]}}}} + d = {c["detail"]: c["breaking"] for c in diff_schemas(base, head)} + assert d["property 'name' removed"] is True + assert d["required property 'host' added"] is True + assert d["property 'metric' type changed"] is True + assert d["enum value 'dotproduct' removed from 'metric'"] is True + + +def test_cli_on_files(tmp_path): + import json + base = tmp_path / "base.yaml" + head = tmp_path / "head.yaml" + out = tmp_path / "diff.json" + base.write_text("paths:\n /x:\n get:\n operationId: getx\n") + head.write_text("paths: {}\n") + script = os.path.join(os.path.dirname(__file__), "..", "extract_spec_diff.py") + r = subprocess.run([sys.executable, script, "--base", str(base), "--head", str(head), + "--service", "db_data", "--version", "2025-10", "--output", str(out)], + capture_output=True, text=True) + assert r.returncode == 0, r.stderr + changes = json.loads(out.read_text()) + assert any(c["id"] == "getx" and c["change"] == "removed" and c["service"] == "db_data" for c in changes) diff --git a/.github/spec-manifest.json b/.github/spec-manifest.json new file mode 100644 index 0000000..615e4b1 --- /dev/null +++ b/.github/spec-manifest.json @@ -0,0 +1,39 @@ +{ + "_comment": "Maps pinecone-api operationIds (and schema names) to the docs.pinecone.io guide pages and SDKs they surface. Edit when operations/schemas are added or pages renamed. Changed surface absent here is written to spec-gaps-unmapped.json on each run — tend that file to keep this current.", + "docs_base_url": "https://docs.pinecone.io", + "operations": { + "create_index": {"docs": ["guides/index-data/create-an-index"], "sdks": ["python", "ts", "go", "java"]}, + "create_index_for_model": {"docs": ["guides/index-data/create-an-index", "guides/get-started/quickstart"], "sdks": ["python", "ts", "go", "java"]}, + "list_indexes": {"docs": ["guides/manage-data/manage-indexes"], "sdks": ["python", "ts", "go", "java"]}, + "describe_index": {"docs": ["guides/manage-data/manage-indexes"], "sdks": ["python", "ts", "go", "java"]}, + "configure_index": {"docs": ["guides/manage-data/manage-indexes"], "sdks": ["python", "ts", "go", "java"]}, + "delete_index": {"docs": ["guides/manage-data/manage-indexes"], "sdks": ["python", "ts", "go", "java"]}, + "upsertVectors": {"docs": ["guides/index-data/upsert-data"], "sdks": ["python", "ts", "go", "java"]}, + "upsertRecordsNamespace": {"docs": ["guides/index-data/upsert-data", "guides/get-started/quickstart"], "sdks": ["python", "ts"]}, + "searchRecordsNamespace": {"docs": ["guides/search/search-overview", "guides/get-started/quickstart"], "sdks": ["python", "ts"]}, + "queryVectors": {"docs": ["guides/search/search-overview"], "sdks": ["python", "ts", "go", "java"]}, + "fetchVectors": {"docs": ["guides/manage-data/fetch-data"], "sdks": ["python", "ts", "go", "java"]}, + "updateVector": {"docs": ["guides/manage-data/update-data"], "sdks": ["python", "ts", "go", "java"]}, + "deleteVectors": {"docs": ["guides/manage-data/delete-data"], "sdks": ["python", "ts", "go", "java"]}, + "listVectors": {"docs": ["guides/manage-data/list-record-ids"], "sdks": ["python", "ts", "go", "java"]}, + "describeIndexStats": {"docs": ["guides/manage-data/manage-indexes"], "sdks": ["python", "ts", "go", "java"]}, + "startBulkImport": {"docs": ["guides/index-data/import-data"], "sdks": ["python", "ts"]}, + "describeBulkImport": {"docs": ["guides/index-data/import-data"], "sdks": ["python", "ts"]}, + "listBulkImports": {"docs": ["guides/index-data/import-data"], "sdks": ["python", "ts"]}, + "cancelBulkImport": {"docs": ["guides/index-data/import-data"], "sdks": ["python", "ts"]}, + "create_backup": {"docs": ["guides/manage-data/back-up-an-index"], "sdks": ["python", "ts", "go", "java"]}, + "create_index_from_backup_operation": {"docs": ["guides/manage-data/restore-an-index"], "sdks": ["python", "ts", "go", "java"]}, + "create_collection": {"docs": ["guides/manage-data/manage-collections"], "sdks": ["python", "ts", "go", "java"]}, + "createNamespace": {"docs": ["guides/manage-data/manage-namespaces"], "sdks": ["python", "ts", "go", "java"]}, + "deleteNamespace": {"docs": ["guides/manage-data/manage-namespaces"], "sdks": ["python", "ts", "go", "java"]}, + "embed": {"docs": ["guides/inference/generate-embeddings"], "sdks": ["python", "ts", "go", "java"]}, + "rerank": {"docs": ["guides/search/rerank-results"], "sdks": ["python", "ts", "go", "java"]}, + "list_models": {"docs": ["guides/inference/understanding-inference"], "sdks": ["python", "ts"]}, + "get_model": {"docs": ["guides/inference/understanding-inference"], "sdks": ["python", "ts"]} + }, + "schemas": { + "IndexModel": {"docs": ["guides/index-data/create-an-index"], "sdks": ["python", "ts", "go", "java"]}, + "CreateIndexForModelRequest": {"docs": ["guides/index-data/create-an-index"], "sdks": ["python", "ts"]}, + "SearchRecordsRequest": {"docs": ["guides/search/search-overview"], "sdks": ["python", "ts"]} + } +} diff --git a/.github/workflows/spec-drift-detector.yaml b/.github/workflows/spec-drift-detector.yaml new file mode 100644 index 0000000..5f7a47d --- /dev/null +++ b/.github/workflows/spec-drift-detector.yaml @@ -0,0 +1,147 @@ +# Detects when an OpenAPI spec change requires docs/SDK updates. +# +# On PR: comments affected guide pages + impacted SDKs (breaking changes first). +# Scheduled: diffs the two newest version snapshots; opens a draft PR in pinecone-io/docs. +# +# Deps: Python 3.12 + pyyaml. No other runtime dependencies. + +name: spec-drift-detector + +on: + pull_request: + types: [opened, synchronize, reopened] + paths: ["**/*.oas.yaml"] + schedule: + - cron: "0 9 * * 1" # Mondays 09:00 UTC + workflow_dispatch: + +jobs: + detect: + runs-on: ubuntu-latest + permissions: + pull-requests: write + issues: write + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install deps + run: pip install pyyaml + + # ── PR mode: diff each changed spec file (base..head) ─────────────── + - name: PR diff + if: github.event_name == 'pull_request' + run: | + BASE='${{ github.event.pull_request.base.sha }}' + HEAD='${{ github.event.pull_request.head.sha }}' + CHANGED=$(git diff --name-only "$BASE" "$HEAD" -- '**/*.oas.yaml') + python - "$BASE" "$HEAD" $CHANGED <<'PY' + import json, sys, os + sys.path.insert(0, ".github/scripts") + import extract_spec_diff as e + base_ref, head_ref = sys.argv[1], sys.argv[2] + all_changes = [] + for path in sys.argv[3:]: + b = e.load(e.git_show(base_ref, path)) + h = e.load(e.git_show(head_ref, path)) + svc = "_".join(os.path.basename(path).split("_")[:-1]) # db_data_2025-10.oas.yaml -> db_data + cs = e.diff(b, h) + for c in cs: + c["service"] = svc + c["version"] = path + all_changes += cs + json.dump(all_changes, open("spec-diff.json", "w"), indent=2) + print(f"{len(all_changes)} changes across changed specs") + PY + + # ── Scheduled/manual: diff the two newest version dirs ────────────── + - name: Version-snapshot diff + if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' + run: | + python - <<'PY' + import glob, os, re, json, sys + sys.path.insert(0, ".github/scripts") + import extract_spec_diff as e + dirs = sorted([d for d in os.listdir(".") if re.fullmatch(r"\d{4}-\d{2}", d)], reverse=True) + if len(dirs) < 2: + json.dump([], open("spec-diff.json", "w")); print("need two version dirs"); raise SystemExit(0) + head_dir, base_dir = dirs[0], dirs[1] + all_changes = [] + for hf in glob.glob(f"{head_dir}/*.oas.yaml"): + svc = "_".join(os.path.basename(hf).split("_")[:-1]) # strip trailing _ + matches = glob.glob(f"{base_dir}/{svc}_*.oas.yaml") + b = e.load(open(matches[0]).read()) if matches else {} + h = e.load(open(hf).read()) + cs = e.diff(b, h) + for c in cs: + c["service"] = svc + c["version"] = head_dir + all_changes += cs + json.dump(all_changes, open("spec-diff.json", "w"), indent=2) + print(f"{base_dir} -> {head_dir}: {len(all_changes)} changes") + PY + + - name: Check coverage + id: cov + run: | + python .github/scripts/check_spec_coverage.py \ + --spec-diff spec-diff.json \ + --manifest .github/spec-manifest.json \ + --output spec-gaps.json + + - name: Comment on PR + if: github.event_name == 'pull_request' && steps.cov.outputs.has_gaps == 'true' + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs') + const gaps = JSON.parse(fs.readFileSync('spec-gaps.json', 'utf8')) + const breaking = gaps.filter(g => g.breaking) + const seen = new Set() + const byPage = {} + for (const g of gaps) { + const key = `${g.symbol}|${g.doc_page}` + if (seen.has(key)) continue; seen.add(key) + ;(byPage[g.doc_page] = byPage[g.doc_page] || []).push(g) + } + const sdks = [...new Set(gaps.flatMap(g => g.sdks))].sort() + const lines = ['## Spec drift detected', '', 'This PR changes the OpenAPI surface. Likely doc/SDK impact:', ''] + if (breaking.length) { + lines.push('### ⚠️ Breaking changes') + const bseen = new Set() + for (const g of breaking) { + const key = `${g.symbol}|${g.detail}` + if (bseen.has(key)) continue; bseen.add(key) + lines.push(`- \`${g.symbol}\` — ${g.detail}`) + } + lines.push('') + } + lines.push('### Pages to update') + for (const [page, items] of Object.entries(byPage)) { + lines.push(`**\`${page}\`**`) + for (const g of items) lines.push(`- [ ] \`${g.symbol}\` (${g.change})${g.breaking ? ' ⚠️' : ''}: ${g.detail}`) + lines.push('') + } + if (sdks.length) lines.push(`**Impacted SDKs:** ${sdks.join(', ')}`) + lines.push('', '_Auto-detected. Dismiss when docs are updated, or close if not applicable._') + await github.rest.issues.createComment({ + owner: context.repo.owner, repo: context.repo.repo, + issue_number: context.issue.number, body: lines.join('\n'), + }) + + - name: Open docs stub PR + if: > + (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') + && steps.cov.outputs.has_gaps == 'true' + env: + DOCS_TOKEN: ${{ secrets.DOCS_GITHUB_TOKEN }} + run: | + python .github/scripts/open_docs_stub_pr.py \ + --gaps spec-gaps.json \ + --docs-repo pinecone-io/docs \ + --token "$DOCS_TOKEN"