diff --git a/.github/scripts/extract_spec_diff.py b/.github/scripts/extract_spec_diff.py new file mode 100644 index 0000000..9e1dcde --- /dev/null +++ b/.github/scripts/extract_spec_diff.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python3 +"""Diff two OpenAPI specs; classify each change as breaking or non-breaking. + +Emits a JSON list of changes. Operations are keyed by operationId (falling back +to " "); schemas by name (properties as "Schema.prop"). Used by the +spec-drift-detector workflow in both PR mode (git refs) and scheduled mode (files). + +Zero runtime dependencies beyond pyyaml. +""" +import argparse +import json +import subprocess +from pathlib import Path + +import yaml + +METHODS = {"get", "post", "put", "patch", "delete", "head", "options"} + + +def load(text: str) -> dict: + return yaml.safe_load(text) or {} + + +def git_show(ref: str, path: str) -> str: + """File contents at a git ref, or '' if absent on that side (new/deleted file).""" + try: + return subprocess.run( + ["git", "show", f"{ref}:{path}"], capture_output=True, text=True, check=True + ).stdout + except subprocess.CalledProcessError: + return "" + + +def operations(spec: dict) -> dict: + """{op_key: {method, path, op}} for every paths...""" + out = {} + for path, methods in (spec.get("paths") or {}).items(): + if not isinstance(methods, dict): + continue + for m, op in methods.items(): + if m.lower() not in METHODS or not isinstance(op, dict): + continue + key = op.get("operationId") or f"{m.upper()} {path}" + out[key] = {"method": m.upper(), "path": path, "op": op} + return out + + +def params(op: dict) -> dict: + out = {} + for p in op.get("parameters") or []: + if isinstance(p, dict) and "name" in p: + out[(p["name"], p.get("in", ""))] = p + return out + + +def schemas(spec: dict) -> dict: + return ((spec.get("components") or {}).get("schemas")) or {} + + +def _type(d): + if not isinstance(d, dict): + return None + s = d.get("schema", d) + if not isinstance(s, dict): + return None + return s.get("type") or s.get("$ref") + + +def _fmt(d): + """The `format` qualifier (e.g. int32, uuid) of a parameter/property schema.""" + if not isinstance(d, dict): + return None + s = d.get("schema", d) + if not isinstance(s, dict): + return None + return s.get("format") + + +def diff_operations(base: dict, head: dict) -> list[dict]: + changes = [] + bo, ho = operations(base), operations(head) + for k in bo.keys() - ho.keys(): + changes.append({"kind": "operation", "id": k, "change": "removed", "breaking": True, + "before": f"{bo[k]['method']} {bo[k]['path']}", "after": "", + "detail": "operation removed"}) + for k in ho.keys() - bo.keys(): + changes.append({"kind": "operation", "id": k, "change": "added", "breaking": False, + "before": "", "after": f"{ho[k]['method']} {ho[k]['path']}", + "detail": "operation added"}) + for k in bo.keys() & ho.keys(): + changes += diff_one_op(k, bo[k]["op"], ho[k]["op"]) + return changes + + +def diff_one_op(key: str, b: dict, h: dict) -> list[dict]: + changes = [] + bp, hp = params(b), params(h) + + # Parameter location changes (e.g. query -> path): a pure relocation otherwise + # shows up as a misleading remove + add. Detect by name and report once, breaking. + b_loc, h_loc = {}, {} + for nm, loc in bp: + b_loc.setdefault(nm, set()).add(loc) + for nm, loc in hp: + h_loc.setdefault(nm, set()).add(loc) + relocated = set() + for nm in b_loc.keys() & h_loc.keys(): + if b_loc[nm] != h_loc[nm] and len(b_loc[nm]) == 1 and len(h_loc[nm]) == 1: + relocated.add(nm) + bl, hl = next(iter(b_loc[nm])), next(iter(h_loc[nm])) + changes.append({"kind": "operation", "id": key, "change": "modified", "breaking": True, + "before": f"{nm} in {bl}", "after": f"{nm} in {hl}", + "detail": f"parameter '{nm}' moved from {bl} to {hl}"}) + + for name in bp.keys() - hp.keys(): + if name[0] in relocated: + continue + req = bool(bp[name].get("required")) + changes.append({"kind": "operation", "id": key, "change": "modified", "breaking": req, + "before": f"param {name[0]}", "after": "", + "detail": f"{'required ' if req else ''}parameter '{name[0]}' removed"}) + for name in hp.keys() - bp.keys(): + if name[0] in relocated: + continue + req = bool(hp[name].get("required")) + changes.append({"kind": "operation", "id": key, "change": "modified", "breaking": req, + "before": "", "after": f"param {name[0]}", + "detail": f"{'required ' if req else ''}parameter '{name[0]}' added"}) + for name in bp.keys() & hp.keys(): + pb, ph = bp[name], hp[name] + if not pb.get("required") and ph.get("required"): + changes.append({"kind": "operation", "id": key, "change": "modified", "breaking": True, + "before": f"{name[0]} optional", "after": f"{name[0]} required", + "detail": f"parameter '{name[0]}' now required"}) + if _type(pb) != _type(ph): + changes.append({"kind": "operation", "id": key, "change": "modified", "breaking": True, + "before": str(_type(pb)), "after": str(_type(ph)), + "detail": f"parameter '{name[0]}' type changed"}) + if _fmt(pb) != _fmt(ph): + changes.append({"kind": "operation", "id": key, "change": "modified", "breaking": True, + "before": str(_fmt(pb)), "after": str(_fmt(ph)), + "detail": f"parameter '{name[0]}' format changed"}) + + # Removed response codes: consumers may branch on them, so removal is breaking. + br = {str(c) for c in (b.get("responses") or {})} + hr = {str(c) for c in (h.get("responses") or {})} + for code in br - hr: + changes.append({"kind": "operation", "id": key, "change": "modified", "breaking": True, + "before": f"response {code}", "after": "", + "detail": f"response '{code}' removed"}) + return changes + + +def diff_schemas(base: dict, head: dict) -> list[dict]: + changes = [] + bs, hs = schemas(base), schemas(head) + for name in bs.keys() - hs.keys(): + changes.append({"kind": "schema", "id": name, "change": "removed", "breaking": True, + "before": name, "after": "", "detail": "schema removed"}) + for name in hs.keys() - bs.keys(): + changes.append({"kind": "schema", "id": name, "change": "added", "breaking": False, + "before": "", "after": name, "detail": "schema added"}) + for name in bs.keys() & hs.keys(): + changes += diff_one_schema(name, bs[name], hs[name]) + return changes + + +def diff_one_schema(name: str, b: dict, h: dict) -> list[dict]: + changes = [] + bp = b.get("properties") or {} + hp = h.get("properties") or {} + breq, hreq = set(b.get("required") or []), set(h.get("required") or []) + for prop in bp.keys() - hp.keys(): + changes.append({"kind": "schema", "id": f"{name}.{prop}", "change": "removed", "breaking": True, + "before": prop, "after": "", "detail": f"property '{prop}' removed"}) + for prop in hp.keys() - bp.keys(): + newreq = prop in hreq + changes.append({"kind": "schema", "id": f"{name}.{prop}", "change": "added", "breaking": newreq, + "before": "", "after": prop, + "detail": f"{'required ' if newreq else ''}property '{prop}' added"}) + for prop in bp.keys() & hp.keys(): + if prop not in breq and prop in hreq: + changes.append({"kind": "schema", "id": f"{name}.{prop}", "change": "modified", "breaking": True, + "before": "optional", "after": "required", + "detail": f"property '{prop}' now required"}) + if _type(bp[prop]) != _type(hp[prop]): + changes.append({"kind": "schema", "id": f"{name}.{prop}", "change": "modified", "breaking": True, + "before": str(_type(bp[prop])), "after": str(_type(hp[prop])), + "detail": f"property '{prop}' type changed"}) + if _fmt(bp[prop]) != _fmt(hp[prop]): + changes.append({"kind": "schema", "id": f"{name}.{prop}", "change": "modified", "breaking": True, + "before": str(_fmt(bp[prop])), "after": str(_fmt(hp[prop])), + "detail": f"property '{prop}' format changed"}) + be = set(bp[prop].get("enum") or []) if isinstance(bp[prop], dict) else set() + he = set(hp[prop].get("enum") or []) if isinstance(hp[prop], dict) else set() + for removed in be - he: + changes.append({"kind": "schema", "id": f"{name}.{prop}", "change": "modified", "breaking": True, + "before": str(removed), "after": "", + "detail": f"enum value '{removed}' removed from '{prop}'"}) + return changes + + +def diff(base: dict, head: dict) -> list[dict]: + return diff_operations(base, head) + diff_schemas(base, head) + + +def main(): + ap = argparse.ArgumentParser(description="Diff two OpenAPI specs.") + ap.add_argument("--base", help="Base spec file path") + ap.add_argument("--head", help="Head spec file path") + ap.add_argument("--base-ref", help="Base git ref (use with --path)") + ap.add_argument("--head-ref", help="Head git ref (use with --path)") + ap.add_argument("--path", help="Repo-relative spec path for git-ref mode") + ap.add_argument("--service", default="") + ap.add_argument("--version", default="") + ap.add_argument("--output", default="spec-diff.json") + a = ap.parse_args() + + if a.base_ref is not None and a.path: + base = load(git_show(a.base_ref, a.path)) + head = load(git_show(a.head_ref, a.path)) + else: + base = load(Path(a.base).read_text()) if a.base and Path(a.base).exists() else {} + head = load(Path(a.head).read_text()) if a.head and Path(a.head).exists() else {} + + changes = diff(base, head) + for c in changes: + c["service"] = a.service + c["version"] = a.version + Path(a.output).write_text(json.dumps(changes, indent=2)) + nb = sum(1 for c in changes if c["breaking"]) + print(f"{len(changes)} changes ({nb} breaking) -> {a.output}") + + +if __name__ == "__main__": + main() diff --git a/.github/scripts/tests/test_extract_spec_diff.py b/.github/scripts/tests/test_extract_spec_diff.py new file mode 100644 index 0000000..de40025 --- /dev/null +++ b/.github/scripts/tests/test_extract_spec_diff.py @@ -0,0 +1,107 @@ +"""Unit tests for extract_spec_diff. Run: pytest .github/scripts/tests -v +(add .github/scripts to sys.path or run from there).""" +import os +import subprocess +import sys + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) +from extract_spec_diff import diff_operations, diff_one_op, diff_schemas # noqa: E402 + + +def test_operation_added_and_removed(): + base = {"paths": {"/indexes": {"get": {"operationId": "list_indexes"}, + "post": {"operationId": "create_index"}}}} + head = {"paths": {"/indexes": {"get": {"operationId": "list_indexes"}}, + "/indexes/{name}": {"delete": {"operationId": "delete_index"}}}} + by = {(c["id"], c["change"]): c for c in diff_operations(base, head)} + assert by[("create_index", "removed")]["breaking"] is True + assert by[("delete_index", "added")]["breaking"] is False + + +def test_parameter_breaking_rules(): + b = {"parameters": [{"name": "limit", "in": "query", "schema": {"type": "integer"}}]} + h = {"parameters": [ + {"name": "limit", "in": "query", "schema": {"type": "string"}}, + {"name": "namespace", "in": "query", "required": True, "schema": {"type": "string"}}, + ]} + details = {c["detail"]: c["breaking"] for c in diff_one_op("op", b, h)} + assert details["parameter 'limit' type changed"] is True + assert details["required parameter 'namespace' added"] is True + + +def test_optional_param_added_is_not_breaking(): + b = {"parameters": []} + h = {"parameters": [{"name": "filter", "in": "query", "schema": {"type": "string"}}]} + c = diff_one_op("op", b, h)[0] + assert c["detail"] == "parameter 'filter' added" + assert c["breaking"] is False + + +def test_param_format_change_is_breaking(): + b = {"parameters": [{"name": "id", "in": "query", "schema": {"type": "integer", "format": "int32"}}]} + h = {"parameters": [{"name": "id", "in": "query", "schema": {"type": "integer", "format": "int64"}}]} + details = {c["detail"]: c["breaking"] for c in diff_one_op("op", b, h)} + assert details["parameter 'id' format changed"] is True + assert "parameter 'id' type changed" not in details # type unchanged, only format + + +def test_param_location_change_reported_once_as_breaking(): + b = {"parameters": [{"name": "name", "in": "query", "required": True, "schema": {"type": "string"}}]} + h = {"parameters": [{"name": "name", "in": "path", "required": True, "schema": {"type": "string"}}]} + out = diff_one_op("op", b, h) + details = {c["detail"]: c["breaking"] for c in out} + assert details["parameter 'name' moved from query to path"] is True + # the relocation must NOT also surface as a remove + add + assert not any("removed" in d or "added" in d for d in details) + + +def test_removed_response_code_is_breaking(): + b = {"responses": {"200": {}, "404": {}}} + h = {"responses": {"200": {}}} + details = {c["detail"]: c["breaking"] for c in diff_one_op("op", b, h)} + assert details["response '404' removed"] is True + + +def test_added_response_code_is_not_flagged(): + b = {"responses": {"200": {}}} + h = {"responses": {"200": {}, "429": {}}} + assert all("response" not in c["detail"] for c in diff_one_op("op", b, h)) + + +def test_schema_property_format_change_is_breaking(): + base = {"components": {"schemas": {"M": {"properties": {"ts": {"type": "string", "format": "date"}}}}}} + head = {"components": {"schemas": {"M": {"properties": {"ts": {"type": "string", "format": "date-time"}}}}}} + d = {c["detail"]: c["breaking"] for c in diff_schemas(base, head)} + assert d["property 'ts' format changed"] is True + + +def test_schema_breaking_rules(): + base = {"components": {"schemas": {"Index": { + "properties": {"name": {"type": "string"}, + "metric": {"type": "string", "enum": ["cosine", "dotproduct"]}}, + "required": ["name"]}}}} + head = {"components": {"schemas": {"Index": { + "properties": {"metric": {"type": "integer", "enum": ["cosine"]}, + "host": {"type": "string"}}, + "required": ["host"]}}}} + d = {c["detail"]: c["breaking"] for c in diff_schemas(base, head)} + assert d["property 'name' removed"] is True + assert d["required property 'host' added"] is True + assert d["property 'metric' type changed"] is True + assert d["enum value 'dotproduct' removed from 'metric'"] is True + + +def test_cli_on_files(tmp_path): + import json + base = tmp_path / "base.yaml" + head = tmp_path / "head.yaml" + out = tmp_path / "diff.json" + base.write_text("paths:\n /x:\n get:\n operationId: getx\n") + head.write_text("paths: {}\n") + script = os.path.join(os.path.dirname(__file__), "..", "extract_spec_diff.py") + r = subprocess.run([sys.executable, script, "--base", str(base), "--head", str(head), + "--service", "db_data", "--version", "2025-10", "--output", str(out)], + capture_output=True, text=True) + assert r.returncode == 0, r.stderr + changes = json.loads(out.read_text()) + assert any(c["id"] == "getx" and c["change"] == "removed" and c["service"] == "db_data" for c in changes) diff --git a/.github/spec-manifest.json b/.github/spec-manifest.json new file mode 100644 index 0000000..615e4b1 --- /dev/null +++ b/.github/spec-manifest.json @@ -0,0 +1,39 @@ +{ + "_comment": "Maps pinecone-api operationIds (and schema names) to the docs.pinecone.io guide pages and SDKs they surface. Edit when operations/schemas are added or pages renamed. Changed surface absent here is written to spec-gaps-unmapped.json on each run — tend that file to keep this current.", + "docs_base_url": "https://docs.pinecone.io", + "operations": { + "create_index": {"docs": ["guides/index-data/create-an-index"], "sdks": ["python", "ts", "go", "java"]}, + "create_index_for_model": {"docs": ["guides/index-data/create-an-index", "guides/get-started/quickstart"], "sdks": ["python", "ts", "go", "java"]}, + "list_indexes": {"docs": ["guides/manage-data/manage-indexes"], "sdks": ["python", "ts", "go", "java"]}, + "describe_index": {"docs": ["guides/manage-data/manage-indexes"], "sdks": ["python", "ts", "go", "java"]}, + "configure_index": {"docs": ["guides/manage-data/manage-indexes"], "sdks": ["python", "ts", "go", "java"]}, + "delete_index": {"docs": ["guides/manage-data/manage-indexes"], "sdks": ["python", "ts", "go", "java"]}, + "upsertVectors": {"docs": ["guides/index-data/upsert-data"], "sdks": ["python", "ts", "go", "java"]}, + "upsertRecordsNamespace": {"docs": ["guides/index-data/upsert-data", "guides/get-started/quickstart"], "sdks": ["python", "ts"]}, + "searchRecordsNamespace": {"docs": ["guides/search/search-overview", "guides/get-started/quickstart"], "sdks": ["python", "ts"]}, + "queryVectors": {"docs": ["guides/search/search-overview"], "sdks": ["python", "ts", "go", "java"]}, + "fetchVectors": {"docs": ["guides/manage-data/fetch-data"], "sdks": ["python", "ts", "go", "java"]}, + "updateVector": {"docs": ["guides/manage-data/update-data"], "sdks": ["python", "ts", "go", "java"]}, + "deleteVectors": {"docs": ["guides/manage-data/delete-data"], "sdks": ["python", "ts", "go", "java"]}, + "listVectors": {"docs": ["guides/manage-data/list-record-ids"], "sdks": ["python", "ts", "go", "java"]}, + "describeIndexStats": {"docs": ["guides/manage-data/manage-indexes"], "sdks": ["python", "ts", "go", "java"]}, + "startBulkImport": {"docs": ["guides/index-data/import-data"], "sdks": ["python", "ts"]}, + "describeBulkImport": {"docs": ["guides/index-data/import-data"], "sdks": ["python", "ts"]}, + "listBulkImports": {"docs": ["guides/index-data/import-data"], "sdks": ["python", "ts"]}, + "cancelBulkImport": {"docs": ["guides/index-data/import-data"], "sdks": ["python", "ts"]}, + "create_backup": {"docs": ["guides/manage-data/back-up-an-index"], "sdks": ["python", "ts", "go", "java"]}, + "create_index_from_backup_operation": {"docs": ["guides/manage-data/restore-an-index"], "sdks": ["python", "ts", "go", "java"]}, + "create_collection": {"docs": ["guides/manage-data/manage-collections"], "sdks": ["python", "ts", "go", "java"]}, + "createNamespace": {"docs": ["guides/manage-data/manage-namespaces"], "sdks": ["python", "ts", "go", "java"]}, + "deleteNamespace": {"docs": ["guides/manage-data/manage-namespaces"], "sdks": ["python", "ts", "go", "java"]}, + "embed": {"docs": ["guides/inference/generate-embeddings"], "sdks": ["python", "ts", "go", "java"]}, + "rerank": {"docs": ["guides/search/rerank-results"], "sdks": ["python", "ts", "go", "java"]}, + "list_models": {"docs": ["guides/inference/understanding-inference"], "sdks": ["python", "ts"]}, + "get_model": {"docs": ["guides/inference/understanding-inference"], "sdks": ["python", "ts"]} + }, + "schemas": { + "IndexModel": {"docs": ["guides/index-data/create-an-index"], "sdks": ["python", "ts", "go", "java"]}, + "CreateIndexForModelRequest": {"docs": ["guides/index-data/create-an-index"], "sdks": ["python", "ts"]}, + "SearchRecordsRequest": {"docs": ["guides/search/search-overview"], "sdks": ["python", "ts"]} + } +}