From 9a0d14408f7889f8a01c42c3aa4e493088653801 Mon Sep 17 00:00:00 2001 From: Karl Kauc Date: Mon, 18 May 2026 10:08:32 +0200 Subject: [PATCH] refactor(xsd): validators take , drop version resolution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The six XSD validators (CLI sh/ps1, Python, Java, .NET, PowerShell) now take an explicit schema + XML file instead of a FundsXML . The schema argument is a local FundsXML.xsd path OR a remote URL (e.g. the official release). No version arg, no FUNDSXML_SCHEMA_DIR, no .schema-cache, no resolver module — whatever you point at is used as-is. Argument order is schema-first, matching the Schematron/XSLT/XQuery invocations. - Delete tools/fundsxml_schema.py and XSD_Validation/dotnet/SchemaResolver.cs; pyproject.toml is now a dependency-only manifest (py-modules = []). - URL schemas: Python / CLI sh / CLI ps1-with-xmllint / Java fetch the schema (and the relative xmldsig-core-schema.xsd sibling that 4.2.9+ imports) into a temp dir then validate locally with instance XXE-hardening intact; .NET and PowerShell Validate-FundsXml.ps1 resolve a URL natively via XmlUrlResolver. The GitHub release 302-redirects to an opaque blob URL, so the relative import cannot be resolved post-redirect — hence the temp fetch. - CI: drop the fundsxml_schema precache; plain-curl the official schemas into ci-schemas/ for the raw-xmllint steps; rewrite validator invocations to pass a schema; exercise the remote-URL path on every stack (Linux) and via the release URL on Windows. - Docs (CLAUDE.md, root + XSD_Validation + per-area READMEs, CONTRIBUTING, .github templates) rewritten for the new contract. Generators that embed the 4.2.9 release URL into xsi:noNamespaceSchemaLocation (Large_File_Processing, Data_Binding_JSON, Database_Integration export) are left as-is by design: that URL is a label on generated output, not schema resolution, and is exactly what the new validators can be pointed at. Verified locally (Python/CLI-sh/Java/.NET): positive URL + local path for 4.2.9 and 4.1.0, negative fixture rejected. PowerShell stacks covered by the CI Windows matrix. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/EXAMPLE_README_TEMPLATE.md | 9 +- .github/ISSUE_TEMPLATE/bug_report.md | 2 +- .github/PULL_REQUEST_TEMPLATE.md | 4 +- .github/workflows/ci.yml | 146 +++++++++------ .gitignore | 5 - CONTRIBUTING.md | 26 +-- Data_Binding_JSON/README.md | 28 ++- Database_Integration/README.md | 5 +- .../python/export_fundsxml.py | 2 +- FundsXML_Files/4.0.0/positions/README.md | 6 +- FundsXML_Files/4.1.0/positions/README.md | 6 +- FundsXML_Files/4.2.9/documents/README.md | 6 +- FundsXML_Files/4.2.9/regulatory/README.md | 6 +- FundsXML_Files/4.2.9/signed/README.md | 11 +- FundsXML_Files/4.2.9/transactions/README.md | 6 +- FundsXML_Files/README.md | 38 ++-- Large_File_Processing/README.md | 4 +- README.md | 47 ++--- XSD_Validation/README.md | 110 ++++++----- XSD_Validation/cli/validate.ps1 | 144 +++++++------- XSD_Validation/cli/validate.sh | 80 ++++---- XSD_Validation/dotnet/SchemaResolver.cs | 85 --------- XSD_Validation/dotnet/XsdValidate.cs | 50 +++-- XSD_Validation/java/XsdValidate.java | 176 ++++++++---------- .../powershell/Validate-FundsXml.ps1 | 72 +++---- XSD_Validation/python/validate.py | 84 ++++++--- pyproject.toml | 29 ++- tools/fundsxml_schema.py | 105 ----------- 28 files changed, 562 insertions(+), 730 deletions(-) delete mode 100644 XSD_Validation/dotnet/SchemaResolver.cs delete mode 100644 tools/fundsxml_schema.py diff --git a/.github/EXAMPLE_README_TEMPLATE.md b/.github/EXAMPLE_README_TEMPLATE.md index a68db10..3a7b891 100644 --- a/.github/EXAMPLE_README_TEMPLATE.md +++ b/.github/EXAMPLE_README_TEMPLATE.md @@ -20,11 +20,10 @@ Why this exists and when an enterprise integrator would use it. ## Prerequisites - Tooling/runtime versions -- The example resolves the XSD itself (env `FUNDSXML_SCHEMA_DIR` → - `.schema-cache/` → official-release download); or - `python -m fundsxml_schema ` to pre-cache for a bare xmllint -- Network/proxy note when the official schema URL must be reached - (`$FUNDSXML_SCHEMA_DIR` is the offline escape hatch) +- XSD validation takes ` `: pass the official release URL + or a local `FundsXML.xsd` path (no version arg, no cache, no env var) +- Network note: only needed when a schema **URL** is passed; a local + `FundsXML.xsd` path validates fully offline ## Run diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 8f2d630..e5d0c0f 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -25,5 +25,5 @@ OS; and the relevant runtime version: Python / Java (`java -version`) / Node (`node -v`) / .NET (`dotnet --version`) / `xmllint --version`. **Validation already done** (helps a lot) -- [ ] `xmllint --noout --schema .schema-cache//FundsXML.xsd ` result: … +- [ ] `XSD_Validation/cli/validate.sh ` result: … - [ ] round-trip checked with `Database_Integration/tools/xml_equiv.py` (if applicable): … diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index e9ab638..32557bb 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -15,8 +15,8 @@ - [ ] English only; example is **self-contained** and **heavily commented** (file-header: purpose / run / deps / FundsXML assumptions + what & why). - [ ] FundsXML conventions: no XML namespace; XSD-validated against the - **official released schema** (each example resolves it itself; or - `python -m fundsxml_schema `); 4.0.0 has no `ControlData/Version`; + **official released schema** (validators take ` ` — + release URL or local path); 4.0.0 has no `ControlData/Version`; secure XML parsing (DTD/external entities off). - [ ] New/changed samples are **XSD-valid**; negative fixtures still fail. - [ ] Round-trip examples: proven with `Database_Integration/tools/xml_equiv.py` diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cb4fc3a..94f0b14 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -39,21 +39,32 @@ jobs: with: dotnet-version: "8.0" - # One idiomatic, OS-agnostic Python install (venv + editable pyproject); - # also exposes the `fundsxml_schema` resolver to every Python script. + # One idiomatic, OS-agnostic Python install (venv + editable pyproject). + # Dependency-only manifest — nothing is packaged/importable. - name: Python - venv & install (pyproject) run: | python -m venv .venv .venv/bin/pip install --quiet --upgrade pip .venv/bin/pip install --quiet -e . - .venv/bin/python -c "import lxml, saxonche, fundsxml_schema; print('python deps OK')" - - # Materialise the official schemas into .schema-cache/ for the - # xmllint-based steps below — via the in-language resolver module - # (cross-platform), replacing the deleted tools/fetch-schema.sh. - - name: Materialise official schemas (4.2.9, 4.1.0, 4.0.0) + .venv/bin/python -c "import lxml, saxonche; print('python deps OK')" + + # The examples no longer resolve any schema: each validator takes a + # schema path (or URL) + the XML file. This step is CI plumbing only — + # a plain curl of the official releases into ci-schemas/ so the many + # raw-xmllint steps below have a local schema path to point at. (The + # validators' own remote-URL handling is exercised separately.) + - name: Fetch official schemas for CI (plain curl, no resolver) run: | - for v in 4.2.9 4.1.0 4.0.0; do .venv/bin/python -m fundsxml_schema "$v"; done + set -e + for v in 4.2.9 4.1.0 4.0.0; do + mkdir -p "ci-schemas/$v" + curl -sSL --fail -o "ci-schemas/$v/FundsXML.xsd" \ + "https://github.com/fundsxml/schema/releases/download/$v/FundsXML.xsd" + if grep -q 'xmldsig-core-schema\.xsd' "ci-schemas/$v/FundsXML.xsd"; then + curl -sSL --fail -o "ci-schemas/$v/xmldsig-core-schema.xsd" \ + "https://github.com/fundsxml/schema/releases/download/$v/xmldsig-core-schema.xsd" + fi + done - name: XSD - all positive samples must validate run: | @@ -66,57 +77,66 @@ jobs: for v in "${!S[@]}"; do for f in ${S[$v]}; do echo "XSD $v/$f" - xmllint --noout --nonet --schema ".schema-cache/$v/FundsXML.xsd" \ + xmllint --noout --nonet --schema "ci-schemas/$v/FundsXML.xsd" \ "FundsXML_Files/$v/$f.xml" done done - name: XSD - negative fixture must FAIL run: | - if xmllint --noout --nonet --schema .schema-cache/4.2.9/FundsXML.xsd \ + if xmllint --noout --nonet --schema ci-schemas/4.2.9/FundsXML.xsd \ tests/fixtures/invalid/xsd-invalid_Positions.xml 2>/dev/null; then echo "::error::xsd-invalid fixture unexpectedly validated"; exit 1 fi echo "xsd-invalid correctly rejected" - - name: Python - XSD validation (in-language schema resolve) + - name: Python - XSD validation (schema path + remote URL) run: | set -e V=".venv/bin/python XSD_Validation/python/validate.py" - # Cache is warm from the xmllint step: exercises the resolver's - # cache-hit path and the $FUNDSXML_SCHEMA_DIR env override. - $V 4.2.9 FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml - FUNDSXML_SCHEMA_DIR="$PWD/.schema-cache/4.1.0" \ - $V 4.1.0 FundsXML_Files/4.1.0/positions/Equity-Fund_Positions.xml - if $V 4.2.9 tests/fixtures/invalid/xsd-invalid_Positions.xml; then + # Local schema path (two versions) ... + $V ci-schemas/4.2.9/FundsXML.xsd FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml + $V ci-schemas/4.1.0/FundsXML.xsd FundsXML_Files/4.1.0/positions/Equity-Fund_Positions.xml + # ... and a remote schema URL (the official release, fetched by the + # validator itself — exercises the URL path + the relative xmldsig + # sibling fetch). + $V https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd \ + FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml + if $V ci-schemas/4.2.9/FundsXML.xsd tests/fixtures/invalid/xsd-invalid_Positions.xml; then echo "::error::xsd-invalid unexpectedly validated (Python)"; exit 1 fi - echo "Python validate.py: positive ok, negative correctly rejected" + echo "Python validate.py: path + URL ok, negative correctly rejected" - - name: .NET - XSD validation (in-language schema resolve) + - name: .NET - XSD validation (schema path + remote URL) run: | set -e D="dotnet run --project XSD_Validation/dotnet --" - # Cache is warm from the xmllint step: exercises the resolver's - # cache-hit path and the $FUNDSXML_SCHEMA_DIR env override. - $D 4.2.9 FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml - FUNDSXML_SCHEMA_DIR="$PWD/.schema-cache/4.1.0" \ - $D 4.1.0 FundsXML_Files/4.1.0/positions/Equity-Fund_Positions.xml - if $D 4.2.9 tests/fixtures/invalid/xsd-invalid_Positions.xml; then + $D ci-schemas/4.2.9/FundsXML.xsd FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml + $D ci-schemas/4.1.0/FundsXML.xsd FundsXML_Files/4.1.0/positions/Equity-Fund_Positions.xml + # Remote schema URL: .NET resolves it (and the relative xmldsig + # import) natively via the schema-set URL resolver. + $D https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd \ + FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml + if $D ci-schemas/4.2.9/FundsXML.xsd tests/fixtures/invalid/xsd-invalid_Positions.xml; then echo "::error::xsd-invalid unexpectedly validated (.NET)"; exit 1 fi - echo ".NET XsdValidate: positive ok, negative correctly rejected" + echo ".NET XsdValidate: path + URL ok, negative correctly rejected" - - name: CLI - validate.sh (standalone POSIX sh) + - name: CLI - validate.sh (schema path + remote URL) run: | set -e - XSD_Validation/cli/validate.sh 4.2.9 \ + XSD_Validation/cli/validate.sh ci-schemas/4.2.9/FundsXML.xsd \ + FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml + # Remote schema URL: validate.sh fetches it (and the xmldsig + # sibling) into a temp dir, then validates offline with --nonet. + XSD_Validation/cli/validate.sh \ + https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd \ FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml - if XSD_Validation/cli/validate.sh 4.2.9 \ + if XSD_Validation/cli/validate.sh ci-schemas/4.2.9/FundsXML.xsd \ tests/fixtures/invalid/xsd-invalid_Positions.xml; then echo "::error::xsd-invalid unexpectedly validated (CLI)"; exit 1 fi - echo "CLI validate.sh: positive ok, negative correctly rejected" + echo "CLI validate.sh: path + URL ok, negative correctly rejected" # --------------------------------------------------------------------- # Java examples — built & run via the committed Maven Wrapper. The first @@ -128,19 +148,19 @@ jobs: - name: Java - build all modules (Maven Wrapper) run: ./mvnw -q -B compile - - name: Java - XSD validation (in-language schema resolve) + - name: Java - XSD validation (schema path + remote URL) run: | set -e M="./mvnw -q -B -pl XSD_Validation/java exec:java" - # Cache is warm from the xmllint step: exercises the resolver's - # cache-hit path and the $FUNDSXML_SCHEMA_DIR env override. - $M -Dexec.args="4.2.9 FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml" - FUNDSXML_SCHEMA_DIR="$PWD/.schema-cache/4.1.0" \ - $M -Dexec.args="4.1.0 FundsXML_Files/4.1.0/positions/Equity-Fund_Positions.xml" - if $M -Dexec.args="4.2.9 tests/fixtures/invalid/xsd-invalid_Positions.xml"; then + $M -Dexec.args="ci-schemas/4.2.9/FundsXML.xsd FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml" + $M -Dexec.args="ci-schemas/4.1.0/FundsXML.xsd FundsXML_Files/4.1.0/positions/Equity-Fund_Positions.xml" + # Remote schema URL: the validator fetches it (and the xmldsig + # sibling) into a temp dir, then validates locally. + $M -Dexec.args="https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml" + if $M -Dexec.args="ci-schemas/4.2.9/FundsXML.xsd tests/fixtures/invalid/xsd-invalid_Positions.xml"; then echo "::error::xsd-invalid unexpectedly validated (Java)"; exit 1 fi - echo "Java XsdValidate: positive ok, negative correctly rejected" + echo "Java XsdValidate: path + URL ok, negative correctly rejected" - name: Schematron - canonical sample passes, fixture fails run: | @@ -195,7 +215,7 @@ jobs: -Dexec.args="$SRC signed.xml XML_Signature/keys/test-signing.p12 changeit fundsxml" $M -Dexec.mainClass=VerifyFundsXml \ -Dexec.args="signed.xml XML_Signature/keys/test-signing-cert.pem" - xmllint --noout --nonet --schema .schema-cache/4.2.9/FundsXML.xsd signed.xml + xmllint --noout --nonet --schema ci-schemas/4.2.9/FundsXML.xsd signed.xml sed 's/8.339.33 tampered.xml if $M -Dexec.mainClass=VerifyFundsXml \ -Dexec.args="tampered.xml XML_Signature/keys/test-signing-cert.pem"; then @@ -211,7 +231,7 @@ jobs: MX=FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml DOC=FUNDSXML_MULTI_1 EQ="$PY Database_Integration/tools/xml_equiv.py" - XSD=".schema-cache/4.2.9/FundsXML.xsd" + XSD="ci-schemas/4.2.9/FundsXML.xsd" # The multi-fund fixture must itself be schema-valid. xmllint --noout --nonet --schema "$XSD" "$FX" @@ -260,7 +280,7 @@ jobs: PY=.venv/bin/python P=Large_File_Processing/python $PY $P/make_large_sample.py big.xml 30000 - xmllint --noout --nonet --schema .schema-cache/4.2.9/FundsXML.xsd big.xml + xmllint --noout --nonet --schema ci-schemas/4.2.9/FundsXML.xsd big.xml $PY $P/stream_aggregate.py big.xml | tee agg.txt grep -q '^positions : 30000$' agg.txt grep -q '^sum value (EUR): 30000000.00$' agg.txt @@ -271,7 +291,7 @@ jobs: grep -q '^positions : 30000$' aggj.txt $PY $P/split.py big.xml chunks/ 10000 test "$(ls chunks/chunk-*.xml | wc -l)" = "3" - xmllint --noout --nonet --schema .schema-cache/4.2.9/FundsXML.xsd chunks/chunk-0001.xml + xmllint --noout --nonet --schema ci-schemas/4.2.9/FundsXML.xsd chunks/chunk-0001.xml $PY $P/delta_diff.py big.xml big.xml # identical -> exit 0 - name: Data binding / JSON - round-trip + native Java binding @@ -280,11 +300,11 @@ jobs: PY=.venv/bin/python SRC=FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml $PY Data_Binding_JSON/python/fundsxml_json.py roundtrip "$SRC" rj.xml - xmllint --noout --nonet --schema .schema-cache/4.2.9/FundsXML.xsd rj.xml + xmllint --noout --nonet --schema ci-schemas/4.2.9/FundsXML.xsd rj.xml # Multi-fund JSON round-trip is lossless -> must be xml_equiv-equal. MF=FundsXML_Files/4.2.9/positions/Multi-Fund_Positions.xml $PY Data_Binding_JSON/python/fundsxml_json.py roundtrip "$MF" rjm.xml - xmllint --noout --nonet --schema .schema-cache/4.2.9/FundsXML.xsd rjm.xml + xmllint --noout --nonet --schema ci-schemas/4.2.9/FundsXML.xsd rjm.xml $PY Database_Integration/tools/xml_equiv.py "$MF" rjm.xml $PY - "$SRC" rj.xml <<'PY' import re, sys @@ -306,10 +326,10 @@ jobs: # ======================================================================= # Windows smoke — proves the examples are genuinely cross-platform and - # standalone (the whole point of this work): every stack resolves the XSD - # itself and runs with no bash, no prior tool step, on a clean Windows box. - # A focused subset (Python / Java / .NET XSD + one XSLT + a DB round-trip + - # the PowerShell CLI), not the full bash-heavy matrix. + # standalone (the whole point of this work): every stack runs with no bash, + # no prior tool step, on a clean Windows box, taking a schema URL + the XML + # file directly. A focused subset (Python / Java / .NET XSD + one XSLT + a + # DB round-trip + the PowerShell CLI), not the full bash-heavy matrix. # ======================================================================= windows-smoke: runs-on: windows-latest @@ -334,14 +354,15 @@ jobs: python -m venv .venv .venv\Scripts\python -m pip install --quiet --upgrade pip .venv\Scripts\pip install --quiet -e . - .venv\Scripts\python -c "import lxml, saxonche, fundsxml_schema; print('python deps OK')" + .venv\Scripts\python -c "import lxml, saxonche; print('python deps OK')" - - name: Python - XSD validation (downloads schema itself on Windows) + - name: Python - XSD validation (remote schema URL on Windows) run: | $src = "FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml" - .venv\Scripts\python XSD_Validation/python/validate.py 4.2.9 $src + $xsd = "https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd" + .venv\Scripts\python XSD_Validation/python/validate.py $xsd $src if ($LASTEXITCODE -ne 0) { throw "positive failed" } - .venv\Scripts\python XSD_Validation/python/validate.py 4.2.9 tests/fixtures/invalid/xsd-invalid_Positions.xml + .venv\Scripts\python XSD_Validation/python/validate.py $xsd tests/fixtures/invalid/xsd-invalid_Positions.xml if ($LASTEXITCODE -eq 0) { throw "negative unexpectedly validated (Python)" } Write-Host "Python OK on Windows" exit 0 # reset $LASTEXITCODE (the negative case left it at 1) @@ -349,30 +370,33 @@ jobs: - name: Java - XSD validate + one XSLT (Maven Wrapper, mvnw.cmd) run: | $src = "FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml" - .\mvnw.cmd -q -B -pl XSD_Validation/java compile exec:java "-Dexec.args=4.2.9 $src" + $xsd = "https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd" + .\mvnw.cmd -q -B -pl XSD_Validation/java compile exec:java "-Dexec.args=$xsd $src" if ($LASTEXITCODE -ne 0) { throw "Java XSD positive failed" } - .\mvnw.cmd -q -B -pl XSD_Validation/java exec:java "-Dexec.args=4.2.9 tests/fixtures/invalid/xsd-invalid_Positions.xml" + .\mvnw.cmd -q -B -pl XSD_Validation/java exec:java "-Dexec.args=$xsd tests/fixtures/invalid/xsd-invalid_Positions.xml" if ($LASTEXITCODE -eq 0) { throw "Java negative unexpectedly validated" } .\mvnw.cmd -q -B -pl XSLT_Transformations/invocation compile exec:java "-Dexec.args=XSLT_Transformations/CSV_Export/positions_csv.xslt $src out_pos.csv" if (-not (Test-Path out_pos.csv)) { throw "XSLT produced no output" } Write-Host "Java OK on Windows" exit 0 # reset $LASTEXITCODE (the negative case left it at 1) - - name: .NET - XSD validation (downloads schema itself on Windows) + - name: .NET - XSD validation (remote schema URL on Windows) run: | $src = "FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml" - dotnet run --project XSD_Validation/dotnet -- 4.2.9 $src + $xsd = "https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd" + dotnet run --project XSD_Validation/dotnet -- $xsd $src if ($LASTEXITCODE -ne 0) { throw ".NET XSD positive failed" } - dotnet run --project XSD_Validation/dotnet -- 4.2.9 tests/fixtures/invalid/xsd-invalid_Positions.xml + dotnet run --project XSD_Validation/dotnet -- $xsd tests/fixtures/invalid/xsd-invalid_Positions.xml if ($LASTEXITCODE -eq 0) { throw ".NET negative unexpectedly validated" } Write-Host ".NET OK on Windows" exit 0 # reset $LASTEXITCODE (the negative case left it at 1) - - name: CLI - validate.ps1 (PowerShell, .NET fallback when no xmllint) + - name: CLI - validate.ps1 (remote URL, .NET fallback when no xmllint) run: | - pwsh XSD_Validation/cli/validate.ps1 4.2.9 FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml + $xsd = "https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd" + pwsh XSD_Validation/cli/validate.ps1 $xsd FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml if ($LASTEXITCODE -ne 0) { throw "validate.ps1 positive failed" } - pwsh XSD_Validation/cli/validate.ps1 4.2.9 tests/fixtures/invalid/xsd-invalid_Positions.xml + pwsh XSD_Validation/cli/validate.ps1 $xsd tests/fixtures/invalid/xsd-invalid_Positions.xml if ($LASTEXITCODE -eq 0) { throw "validate.ps1 negative unexpectedly validated" } Write-Host "PowerShell CLI OK on Windows" exit 0 # reset $LASTEXITCODE (the negative case left it at 1) diff --git a/.gitignore b/.gitignore index ebb9aef..65fe298 100644 --- a/.gitignore +++ b/.gitignore @@ -4,11 +4,6 @@ CLAUDE.md .DS_Store ._* -# Locally cached XSD releases — not committed, the source of truth is the -# official GitHub release. Each example downloads + caches it here itself -# (honouring $FUNDSXML_SCHEMA_DIR); see XSD_Validation/java/XsdValidate.java. -.schema-cache/ - # Maven build output. Dependencies come from Maven Central via ./mvnw; the # committed Maven Wrapper bootstraps Maven itself — nothing is vendored. target/ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 745f2c4..c72d308 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -22,12 +22,11 @@ Please read this before opening a pull request. the schema itself requires them). `xsi:noNamespaceSchemaLocation` must be in the XMLSchema-instance namespace or validators reject it. - **Validate against the official released schema**, never a hand-made - catalog. Every example resolves it itself (`$FUNDSXML_SCHEMA_DIR` → - `.schema-cache/` → official-release download, 302-aware, pulls the - `xmldsig-core-schema.xsd` sibling for 4.2.9+). For an xmllint check, - materialise the cache with `python -m fundsxml_schema ` - (after `pip install -e .`). Set sample `xsi:noNamespaceSchemaLocation` - to that release URL. + catalog. The validators take ` ` — pass the official + release URL (the URL stacks fetch it + the `xmldsig-core-schema.xsd` + sibling for 4.2.9+) or a local `FundsXML.xsd` path. No version arg, no + cache, no env var. Set sample `xsi:noNamespaceSchemaLocation` to that + release URL. - **4.0.0 `ControlData` has no `` element** (added in 4.1.0) — never add one to a 4.0.0 sample. - Positions ↔ Assets link by a shared `UniqueID`; `AssetMasterData` is @@ -60,18 +59,19 @@ Run what you changed and confirm it actually works — no "should pass" claims. Java examples build standalone via the committed Maven Wrapper (`./mvnw`, or `mvnw.cmd` on Windows). Python examples install once into a venv from -`pyproject.toml` and resolve the XSD themselves. No `fetch-tools.sh` and no -`fetch-schema.sh` — both are gone; every stack is standalone & cross-platform. +`pyproject.toml`. No `fetch-tools.sh`, no `fetch-schema.sh`, no +`fundsxml_schema` resolver — all gone; every stack is standalone & +cross-platform and takes the schema as an argument. ```bash +REL=https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd + # Python stack (cross-platform; Windows: .venv\Scripts\activate) python -m venv .venv && . .venv/bin/activate && pip install -e . -python XSD_Validation/python/validate.py 4.2.9 .xml # self-resolves the XSD +python XSD_Validation/python/validate.py "$REL" .xml -# xmllint check: materialise the cache cross-platform, then validate -python -m fundsxml_schema 4.2.9 -xmllint --noout --schema .schema-cache/4.2.9/FundsXML.xsd .xml -# (or just: XSD_Validation/cli/validate.sh 4.2.9 .xml — self-resolving) +# or the CLI (same args); a local FundsXML.xsd path works in place of $REL: +XSD_Validation/cli/validate.sh "$REL" .xml # Schematron via the Maven Wrapper (positive sample -> exit 0) ./mvnw -q -pl Schematron_DataQuality_Checks/Basic_Checks/invocation \ diff --git a/Data_Binding_JSON/README.md b/Data_Binding_JSON/README.md index 0834862..f92b500 100644 --- a/Data_Binding_JSON/README.md +++ b/Data_Binding_JSON/README.md @@ -19,7 +19,9 @@ python3 Data_Binding_JSON/python/fundsxml_json.py to-json \ FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml fund.json python3 Data_Binding_JSON/python/fundsxml_json.py roundtrip \ FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml regenerated.xml -xmllint --noout --schema .schema-cache/4.2.9/FundsXML.xsd regenerated.xml +XSD_Validation/cli/validate.sh \ + https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd \ + regenerated.xml # or a local FundsXML.xsd path ``` Verified: NAV, position count and percentage-sum preserved through @@ -35,15 +37,23 @@ so a full generated model is heavy and brittle to maintain. ### Generated-binding references (when you do want codegen) -| Stack | Tool | Command (against the fetched schema) | -|-------|------|--------------------------------------| -| Java | JAXB `xjc` | `xjc -d src -p org.fundsxml.model .schema-cache/4.2.9/FundsXML.xsd` | -| Python | `xsdata` | `xsdata --package fundsxml.model .schema-cache/4.2.9/FundsXML.xsd` | -| .NET | `xsd.exe` / `XmlSerializer` | `xsd.exe /classes /namespace:FundsXml.Model .schema-cache\4.2.9\FundsXML.xsd` | +| Stack | Tool | Command (against a local FundsXML.xsd) | +|-------|------|----------------------------------------| +| Java | JAXB `xjc` | `xjc -d src -p org.fundsxml.model schema/FundsXML.xsd` | +| Python | `xsdata` | `xsdata --package fundsxml.model schema/FundsXML.xsd` | +| .NET | `xsd.exe` / `XmlSerializer` | `xsd.exe /classes /namespace:FundsXml.Model schema\FundsXML.xsd` | -All three consume the **official released schema**; materialise it with -`python -m fundsxml_schema 4.2.9` (after `pip install -e .` — cross-platform; -also pulls the imported `xmldsig-core-schema.xsd` for 4.2.9). Trade-off: generated models are type-safe but regenerate on every +All three consume the **official released schema** on disk. Codegen needs the +local file (and, for 4.2.9, its `xmldsig-core-schema.xsd` sibling beside it), +so fetch it once: + +```bash +mkdir -p schema && B=https://github.com/fundsxml/schema/releases/download/4.2.9 +curl -sSL -o schema/FundsXML.xsd "$B/FundsXML.xsd" +curl -sSL -o schema/xmldsig-core-schema.xsd "$B/xmldsig-core-schema.xsd" +``` + +Trade-off: generated models are type-safe but regenerate on every schema bump and produce thousands of classes; the native binding stays small and version-tolerant. Pick per use case. diff --git a/Database_Integration/README.md b/Database_Integration/README.md index 7624649..e3da356 100644 --- a/Database_Integration/README.md +++ b/Database_Integration/README.md @@ -59,7 +59,6 @@ Each example is run as **import, then export** (the round-trip = both, then compare). `DOC` is the document id the import prints. ```bash -python -m fundsxml_schema 4.2.9 # caches the XSD into .schema-cache/ (run `pip install -e .` once; cross-platform) FX=FundsXML_Files/4.2.9/positions/Multi-Fund_Positions.xml DOC=FUNDSXML_MULTI_1 @@ -84,7 +83,9 @@ dotnet run --project Database_Integration/csharp/export -- fx.db "$DOC" out.xml # prove it: exported file == input file, and schema-valid python3 Database_Integration/tools/xml_equiv.py "$FX" out.xml -xmllint --noout --schema .schema-cache/4.2.9/FundsXML.xsd out.xml +XSD_Validation/cli/validate.sh \ + https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd \ + out.xml # or a local FundsXML.xsd path ``` (`--enable-native-access=ALL-UNNAMED` only silences a JDK 24+ warning when diff --git a/Database_Integration/python/export_fundsxml.py b/Database_Integration/python/export_fundsxml.py index ae17685..eff2186 100644 --- a/Database_Integration/python/export_fundsxml.py +++ b/Database_Integration/python/export_fundsxml.py @@ -14,7 +14,7 @@ # # Prove the round-trip (import file vs exported file): # python3 ../tools/xml_equiv.py some.xml out.xml -# xmllint --noout --schema .schema-cache/4.2.9/FundsXML.xsd out.xml +# XSD_Validation/cli/validate.sh out.xml # # DEPENDENCIES Python stdlib `sqlite3` + `lxml`. # diff --git a/FundsXML_Files/4.0.0/positions/README.md b/FundsXML_Files/4.0.0/positions/README.md index 7040a16..2b08f6b 100644 --- a/FundsXML_Files/4.0.0/positions/README.md +++ b/FundsXML_Files/4.0.0/positions/README.md @@ -27,7 +27,9 @@ Content-identical to the 4.1.0 example (3 equity positions), but adapted to the ## Validation ```bash -python -m fundsxml_schema 4.0.0 # caches the XSD into .schema-cache/ (run `pip install -e .` once; cross-platform) -xmllint --noout --schema .schema-cache/4.0.0/FundsXML.xsd \ +# Give the validator the schema (the official 4.0.0 release URL — or a local +# FundsXML.xsd path) + the XML file: +XSD_Validation/cli/validate.sh \ + https://github.com/fundsxml/schema/releases/download/4.0.0/FundsXML.xsd \ FundsXML_Files/4.0.0/positions/Equity-Fund_Positions.xml ``` diff --git a/FundsXML_Files/4.1.0/positions/README.md b/FundsXML_Files/4.1.0/positions/README.md index fe5d9b6..78e4302 100644 --- a/FundsXML_Files/4.1.0/positions/README.md +++ b/FundsXML_Files/4.1.0/positions/README.md @@ -24,7 +24,9 @@ EUR 40m NAV. Deliberately small to make the version comparison easy. ## Validation ```bash -python -m fundsxml_schema 4.1.0 # caches the XSD into .schema-cache/ (run `pip install -e .` once; cross-platform) -xmllint --noout --schema .schema-cache/4.1.0/FundsXML.xsd \ +# Give the validator the schema (the official 4.1.0 release URL — or a local +# FundsXML.xsd path) + the XML file: +XSD_Validation/cli/validate.sh \ + https://github.com/fundsxml/schema/releases/download/4.1.0/FundsXML.xsd \ FundsXML_Files/4.1.0/positions/Equity-Fund_Positions.xml ``` diff --git a/FundsXML_Files/4.2.9/documents/README.md b/FundsXML_Files/4.2.9/documents/README.md index 2d67303..c14a2b7 100644 --- a/FundsXML_Files/4.2.9/documents/README.md +++ b/FundsXML_Files/4.2.9/documents/README.md @@ -24,7 +24,9 @@ via `Document/Fund/Identifiers/LEI`. ## Validation ```bash -python -m fundsxml_schema 4.2.9 # caches the XSD into .schema-cache/ (run `pip install -e .` once; cross-platform) -xmllint --noout --schema .schema-cache/4.2.9/FundsXML.xsd \ +# Give the validator the schema (the official 4.2.9 release URL — or a local +# FundsXML.xsd path) + the XML file; the xmldsig sibling is handled for you: +XSD_Validation/cli/validate.sh \ + https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd \ FundsXML_Files/4.2.9/documents/Fund_Documents.xml ``` diff --git a/FundsXML_Files/4.2.9/regulatory/README.md b/FundsXML_Files/4.2.9/regulatory/README.md index 7cc799d..089b598 100644 --- a/FundsXML_Files/4.2.9/regulatory/README.md +++ b/FundsXML_Files/4.2.9/regulatory/README.md @@ -28,7 +28,9 @@ Mandatory blocks included: ## Validation ```bash -python -m fundsxml_schema 4.2.9 # caches the XSD into .schema-cache/ (run `pip install -e .` once; cross-platform) -xmllint --noout --schema .schema-cache/4.2.9/FundsXML.xsd \ +# Give the validator the schema (the official 4.2.9 release URL — or a local +# FundsXML.xsd path) + the XML file; the xmldsig sibling is handled for you: +XSD_Validation/cli/validate.sh \ + https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd \ FundsXML_Files/4.2.9/regulatory/EFT_Regulatory.xml ``` diff --git a/FundsXML_Files/4.2.9/signed/README.md b/FundsXML_Files/4.2.9/signed/README.md index 829957f..e9e0017 100644 --- a/FundsXML_Files/4.2.9/signed/README.md +++ b/FundsXML_Files/4.2.9/signed/README.md @@ -13,8 +13,9 @@ `ds:Signature` (namespace `http://www.w3.org/2000/09/xmldsig#`) is the **last optional child** of ``. From release 4.2.9 on, `FundsXML.xsd` imports -`xmldsig-core-schema.xsd` for this — the schema resolvers fetch that sibling -automatically when it is imported (e.g. `python -m fundsxml_schema 4.2.9`). +`xmldsig-core-schema.xsd` for this — the validators fetch that sibling +alongside `FundsXML.xsd` when you pass the release URL (and it sits next to a +local `FundsXML.xsd` in any complete copy of the release). > ⚠️ **Placeholder:** `DigestValue` and `SignatureValue` are schema-valid base64 > strings but **not cryptographically verifiable**. Real signing and @@ -26,7 +27,9 @@ Algorithms used (enveloped signature): C14N 2001-03-15, RSA-SHA256, SHA-256. ## Validation ```bash -python -m fundsxml_schema 4.2.9 # caches the XSD into .schema-cache/ (run `pip install -e .` once; cross-platform) -xmllint --noout --schema .schema-cache/4.2.9/FundsXML.xsd \ +# Give the validator the schema (the official 4.2.9 release URL — or a local +# FundsXML.xsd path) + the XML file; the xmldsig sibling is handled for you: +XSD_Validation/cli/validate.sh \ + https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd \ FundsXML_Files/4.2.9/signed/Signed_Fund_Skeleton.xml ``` diff --git a/FundsXML_Files/4.2.9/transactions/README.md b/FundsXML_Files/4.2.9/transactions/README.md index 62dbd31..4142c2f 100644 --- a/FundsXML_Files/4.2.9/transactions/README.md +++ b/FundsXML_Files/4.2.9/transactions/README.md @@ -26,8 +26,10 @@ records: ## Validation ```bash -python -m fundsxml_schema 4.2.9 # caches the XSD into .schema-cache/ (run `pip install -e .` once; cross-platform) -xmllint --noout --schema .schema-cache/4.2.9/FundsXML.xsd \ +# Give the validator the schema (the official 4.2.9 release URL — or a local +# FundsXML.xsd path) + the XML file; the xmldsig sibling is handled for you: +XSD_Validation/cli/validate.sh \ + https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd \ FundsXML_Files/4.2.9/transactions/Fund_Transactions.xml ``` diff --git a/FundsXML_Files/README.md b/FundsXML_Files/README.md index a11e2d8..1de8565 100644 --- a/FundsXML_Files/README.md +++ b/FundsXML_Files/README.md @@ -114,36 +114,36 @@ FundsXML uses standardized codes for asset classification: FundsXML documents should be validated against the official XSD schema: -### Download Schema +### The schema -Validation always targets the **official release** of the schema: +The canonical schema is the **official release**: ``` https://github.com/fundsxml/schema/releases/download//FundsXML.xsd ``` -Two enterprise-relevant caveats are handled by every example's in-language -schema resolver (and by the `fundsxml_schema` module shown below): +You hand that URL (or a local `FundsXML.xsd` path) straight to a validator — +nothing is resolved by version. Two enterprise-relevant caveats the validators +handle for you: -1. That URL returns an HTTP 302 redirect; simple HTTP clients (libxml2 / - xmllint) do not follow it, so the schema must be materialised first. -2. From release 4.2.9 on, `FundsXML.xsd` imports `xmldsig-core-schema.xsd` via a - relative path — both files must sit in the same directory. +1. That URL returns an HTTP 302 redirect; the validators that rely on a simple + HTTP client (Python, the xmllint CLI, Java) fetch the schema into a temp + dir first, then validate locally. +2. From release 4.2.9 on, `FundsXML.xsd` imports `xmldsig-core-schema.xsd` via + a relative path — the URL stacks fetch that sibling alongside it; for a + local schema path it must sit in the same directory (it does in any + complete copy of a release). -The resolution order is the same everywhere: `$FUNDSXML_SCHEMA_DIR` (offline / -corporate-network escape hatch) → `.schema-cache/` → download from the -official release. No committed catalog. +### Validate (any stack — schema + xml) ```bash -# Fetches FundsXML.xsd (+ xmldsig-core-schema.xsd when needed) into .schema-cache// -python -m fundsxml_schema 4.2.9 # caches the XSD into .schema-cache/ (run `pip install -e .` once; cross-platform) -``` - -### Validate with xmllint (macOS/Linux) - -```bash -xmllint --noout --schema .schema-cache/4.2.9/FundsXML.xsd \ +XSD_Validation/cli/validate.sh \ + https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd \ FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml +# offline: pass a local FundsXML.xsd path instead of the URL. Bare xmllint +# needs the schema on disk (it can't follow the GitHub 302 itself): +# curl -sSL -o /tmp/FundsXML.xsd "" # + xmldsig sibling for 4.2.9+ +# xmllint --noout --schema /tmp/FundsXML.xsd ``` ### Validate with Saxon diff --git a/Large_File_Processing/README.md b/Large_File_Processing/README.md index 38af7a4..b00b4e6 100644 --- a/Large_File_Processing/README.md +++ b/Large_File_Processing/README.md @@ -33,7 +33,9 @@ MAVEN_OPTS=-Xmx64m ./mvnw -q -pl Large_File_Processing/java compile exec:java \ # 3. split into XSD-valid chunks of 10k positions python3 Large_File_Processing/python/split.py big.xml chunks/ 10000 -xmllint --noout --schema .schema-cache/4.2.9/FundsXML.xsd chunks/chunk-0001.xml +XSD_Validation/cli/validate.sh \ + https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd \ + chunks/chunk-0001.xml # or a local FundsXML.xsd path # 4. day-over-day position delta (exit 1 if anything changed) python3 Large_File_Processing/python/delta_diff.py yesterday.xml today.xml diff --git a/README.md b/README.md index f56a60a..ac4a1ee 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,6 @@ Linux/macOS) with no bash prerequisite, and is exercised by the CI workflow. | Database import / export (multi-fund) | Separate import + export programs in Python · Java · JavaScript · C# (SQLite); Oracle/SQL Server/Postgres SQL (code refs) | [Database_Integration/](./Database_Integration/) | | Large-file / streaming | lxml iterparse + Java StAX, split, delta-diff | [Large_File_Processing/](./Large_File_Processing/) | | Data binding & JSON | FundsXML⇄JSON, native Java binding, codegen refs | [Data_Binding_JSON/](./Data_Binding_JSON/) | -| Schema resolver (env → cache → official download) | In every example + [tools/fundsxml_schema.py](./tools/fundsxml_schema.py) | every stack | ## Repository Structure @@ -49,11 +48,8 @@ fundsxml_examples/ │ # wrapper: builds all Java │ # examples standalone (deps from │ # Maven Central), no preinstall -├── pyproject.toml # Python deps + the -│ # fundsxml_schema resolver module -├── tools/fundsxml_schema.py # shared in-language XSD resolver -│ # (env / cache / official URL); -│ # every stack resolves it itself +├── pyproject.toml # Python deps (lxml + saxonche); +│ # dependency-only, nothing packaged │ ├── FundsXML_Files/ # Sample documents, per version & use-case │ ├── 4.2.9/{positions,transactions,documents,regulatory,signed}/ @@ -102,10 +98,12 @@ fundsxml_examples/ ## Quick Start Every example is **standalone and cross-platform** — no bash prerequisite, no -manual dependency or schema fetching. Each language uses its own idiomatic -build system; dependencies and the official XSD are resolved automatically on -first run. **Run all commands from the repo root.** On Windows use `mvnw.cmd` -instead of `./mvnw` and `.venv\Scripts\activate` instead of the `source` line. +manual dependency fetching. Each language uses its own idiomatic build system; +dependencies are resolved automatically on first run. The XSD validators take +a schema (local path or remote URL) plus the XML file — you supply the schema, +nothing is auto-resolved. **Run all commands from the repo root.** On Windows +use `mvnw.cmd` instead of `./mvnw` and `.venv\Scripts\activate` instead of the +`source` line. ### One-time setup (only the toolchains you intend to use) @@ -115,27 +113,29 @@ instead of `./mvnw` and `.venv\Scripts\activate` instead of the `source` line. # .NET — .NET SDK 8+ ; `dotnet run` restores NuGet packages itself. # Node — Node 20+ ; `npm install` in Database_Integration/javascript. -# Python — one venv from pyproject.toml (lxml + saxonche + the schema resolver) +# Python — one venv from pyproject.toml (lxml + saxonche; deps only) python -m venv .venv source .venv/bin/activate # Windows: .venv\Scripts\activate pip install -e . ``` -The XSD is resolved by each example itself, in this order: -`$FUNDSXML_SCHEMA_DIR` (a hand-placed copy — offline / locked-down-network -escape hatch) → `.schema-cache/` → download from the official GitHub release. -Nothing to run up front. +The XSD is whatever you hand the validator — a local `FundsXML.xsd` path or a +remote URL (e.g. the official release +`https://github.com/fundsxml/schema/releases/download//FundsXML.xsd`). +No version arg, no cache, no env var. Nothing to run up front. ### Try one example per area ```bash SRC=FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml +XSD=https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd +# ($XSD can equally be a local FundsXML.xsd path — same result, fully offline) -# XSD validation — pick any stack (all self-resolve the schema): -python XSD_Validation/python/validate.py 4.2.9 $SRC -dotnet run --project XSD_Validation/dotnet -- 4.2.9 $SRC -./mvnw -q -pl XSD_Validation/java compile exec:java -Dexec.args="4.2.9 $SRC" -XSD_Validation/cli/validate.sh 4.2.9 $SRC # Windows: pwsh .../validate.ps1 +# XSD validation — pick any stack (schema + xml, schema-first): +python XSD_Validation/python/validate.py "$XSD" $SRC +dotnet run --project XSD_Validation/dotnet -- "$XSD" $SRC +./mvnw -q -pl XSD_Validation/java compile exec:java -Dexec.args="$XSD $SRC" +XSD_Validation/cli/validate.sh "$XSD" $SRC # Windows: pwsh .../validate.ps1 # Schematron business rules (SVRL; exit 0 = no ERROR-role failures) ./mvnw -q -pl Schematron_DataQuality_Checks/Basic_Checks/invocation compile exec:java \ @@ -231,9 +231,10 @@ libraries (Saxon, SchXslt, Apache Santuario, SQLite drivers, lxml, saxonche, | CLI | a POSIX shell + `xmllint` (Linux/macOS) **or** PowerShell 5.1+/7+ (Windows) | n/a | | Legacy XSLT 1.0 | `xsltproc` | n/a | -Network access to the official schema release (and Maven Central / PyPI / -NuGet on first run) is expected; for locked-down environments point -`FUNDSXML_SCHEMA_DIR` at a local copy of the XSD to skip the schema download. +Network access to Maven Central / PyPI / NuGet on first run is expected. For +the XSD validators, network is only needed if you pass a schema **URL**; pass +a local `FundsXML.xsd` path (keep its `xmldsig-core-schema.xsd` sibling beside +it for 4.2.9+) to validate fully offline. ## Asset Types in Examples diff --git a/XSD_Validation/README.md b/XSD_Validation/README.md index befa7be..cde6a95 100644 --- a/XSD_Validation/README.md +++ b/XSD_Validation/README.md @@ -1,80 +1,90 @@ # XSD Validation -Validate a FundsXML document against the **official released schema** in five -enterprise stacks. Same input, same result everywhere. +Validate a FundsXML document against an XSD in five enterprise stacks. You +give each validator exactly two things — **a schema and an XML file** — and +get a `VALID`/`INVALID` report. Same input, same result everywhere. + +``` +validate +``` + +`` is a path to an `FundsXML.xsd` **or a remote URL**. No version +argument, no environment variable, no cache, no resolver — whatever you point +at is used as-is. Argument order is schema-first (matching the Schematron / +XSLT / XQuery invocations in this repo). ## The schema source -Validation always targets the official release: +The canonical schema is the official release: ``` https://github.com/fundsxml/schema/releases/download//FundsXML.xsd ``` -This is **not** a hand-maintained catalog — it is the canonical released schema. -Two realities every example must deal with: - -1. **HTTP 302 redirect.** The GitHub URL redirects to - `objects.githubusercontent.com`. Processors with a naive HTTP client - (libxml2/xmllint) do not follow it. On locked-down enterprise networks the - download also goes through an HTTP proxy. +Pass that URL directly and the validator uses it. Two realities each stack +handles: + +1. **HTTP 302 redirect.** The GitHub release URL redirects to an opaque + `objects.githubusercontent.com` blob URL. So a URL schema (and the + relative sibling below) is fetched into a temp dir first by the Python, + CLI `validate.sh`, CLI `validate.ps1`-with-xmllint and Java stacks, then + validated locally; .NET and PowerShell `Validate-FundsXml.ps1` resolve the + URL natively via an `XmlUrlResolver` (the original URL stays the import + base, so the redirect is transparent). No version-based resolution and no + on-disk cache anywhere. 2. **Relative import.** From release 4.2.9 on, `FundsXML.xsd` imports - `xmldsig-core-schema.xsd` via a *relative* path; both files must sit together. - -**Schema resolution (same convention in every stack):** -`$FUNDSXML_SCHEMA_DIR` (a hand-placed copy — offline / corporate-network -escape hatch) → `.schema-cache//` → download from the official -GitHub release (302-aware; also pulls the imported `xmldsig-core-schema.xsd`), -caching into `.schema-cache/`. The official release stays the source of truth -— no committed catalog. - -**Every** stack does this itself now — Java (`XsdValidate`), Python -(`validate.py`), .NET (`XsdValidate.cs` + `SchemaResolver.cs`), the CLI -`validate.sh`/`validate.ps1`, and PowerShell `Validate-FundsXml.ps1` — -standalone, cross-platform, no prior step. To pre-populate the cache for a -bare `xmllint` invocation: - -```bash -python -m fundsxml_schema 4.2.9 # after `pip install -e .`; cross-platform -``` + `xmldsig-core-schema.xsd` via a *relative* path. It must be reachable next + to `` — it is in the official release directory, and the URL + stacks fetch it alongside `FundsXML.xsd`. For a **local** schema path, + keep the sibling next to it (any complete copy of a release has it). ## Security -Every example disables external entity resolution / DTD loading -(`FEATURE_SECURE_PROCESSING`, `resolve_entities=False`, `XmlResolver=null`, -`-nonet`) — FundsXML never needs them and they are a classic XXE vector. +The *instance* document is parsed with external entity resolution / DTD +loading disabled (`FEATURE_SECURE_PROCESSING`, `resolve_entities=False`, +`XmlResolver=null`, `--nonet`) — FundsXML never needs them and they are a +classic XXE vector. Only the trusted, caller-supplied schema is fetched over +the network. ## Stacks -| Stack | Script | API | Runnable on this box | -|-------|--------|-----|----------------------| -| CLI (Linux/macOS) | [`cli/validate.sh`](cli/validate.sh) | `xmllint` (POSIX sh) | ✅ standalone (self-resolving) | -| CLI (Windows) | [`cli/validate.ps1`](cli/validate.ps1) | `xmllint` or .NET fallback | ✅ standalone (self-resolving) | -| Python | [`python/validate.py`](python/validate.py) | `lxml.etree.XMLSchema` | ✅ standalone (`pip install -e .`) | -| Java | [`java/XsdValidate.java`](java/XsdValidate.java) | `javax.xml.validation` | ✅ standalone (`./mvnw`) | -| .NET/C# | [`dotnet/XsdValidate.cs`](dotnet/XsdValidate.cs) | `XmlSchemaSet` | ✅ standalone (`dotnet run`) | -| PowerShell | [`powershell/Validate-FundsXml.ps1`](powershell/Validate-FundsXml.ps1) | `System.Xml.Schema` | ✅ standalone (self-resolving) | +| Stack | Script | API | URL schema handling | +|-------|--------|-----|---------------------| +| CLI (Linux/macOS) | [`cli/validate.sh`](cli/validate.sh) | `xmllint` (POSIX sh) | fetch to temp, validate `--nonet` | +| CLI (Windows) | [`cli/validate.ps1`](cli/validate.ps1) | `xmllint` or .NET fallback | fetch to temp (xmllint) / `XmlUrlResolver` (.NET) | +| Python | [`python/validate.py`](python/validate.py) | `lxml.etree.XMLSchema` | fetch to temp (libxml2 has no HTTP loader) | +| Java | [`java/XsdValidate.java`](java/XsdValidate.java) | `javax.xml.validation` | fetch to temp | +| .NET/C# | [`dotnet/XsdValidate.cs`](dotnet/XsdValidate.cs) | `XmlSchemaSet` | native `XmlUrlResolver` | +| PowerShell | [`powershell/Validate-FundsXml.ps1`](powershell/Validate-FundsXml.ps1) | `System.Xml.Schema` | native `XmlUrlResolver` | -Convention: each takes ` `, exits `0` on valid, `1` on -invalid, prints errors to stderr. +Convention: each takes ` `, exits `0` on valid, `1` on +invalid, `2` on usage/setup error; prints errors to stderr. ## Quick check (positive + negative) -Python (standalone — resolves the schema itself; `pip install -e .` once, see -the repo `pyproject.toml`): +A reusable schema reference (use a local path or the release URL — both work +identically): + +```bash +REL=https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd +``` + +Python (standalone — `pip install -e .` once for `lxml`, see the repo +`pyproject.toml`): ```bash python -m venv .venv && . .venv/bin/activate && pip install -e . # Windows: .venv\Scripts\activate -python XSD_Validation/python/validate.py 4.2.9 FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml # exit 0 -python XSD_Validation/python/validate.py 4.2.9 tests/fixtures/invalid/xsd-invalid_Positions.xml # exit 1 +python XSD_Validation/python/validate.py "$REL" FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml # exit 0 +python XSD_Validation/python/validate.py "$REL" tests/fixtures/invalid/xsd-invalid_Positions.xml # exit 1 ``` -Java (standalone): `./mvnw -q -pl XSD_Validation/java compile exec:java -Dexec.args="4.2.9 "` +Java (standalone): `./mvnw -q -pl XSD_Validation/java compile exec:java -Dexec.args="$REL "` (`mvnw.cmd` on Windows). -.NET (standalone): `dotnet run --project XSD_Validation/dotnet -- 4.2.9 ` +.NET (standalone): `dotnet run --project XSD_Validation/dotnet -- "$REL" ` (exit 0 valid / 1 invalid). -CLI (standalone, self-resolving): `XSD_Validation/cli/validate.sh 4.2.9 ` -(Linux/macOS) or `pwsh XSD_Validation/cli/validate.ps1 4.2.9 ` -(Windows — uses `xmllint` if present, else the built-in .NET validator). +CLI: `XSD_Validation/cli/validate.sh "$REL" ` (Linux/macOS) or +`pwsh XSD_Validation/cli/validate.ps1 "$REL" ` (Windows — uses +`xmllint` if present, else the built-in .NET validator). Swap `"$REL"` for a +local `FundsXML.xsd` path to validate fully offline. diff --git a/XSD_Validation/cli/validate.ps1 b/XSD_Validation/cli/validate.ps1 index da9b4bc..2f8c5a0 100644 --- a/XSD_Validation/cli/validate.ps1 +++ b/XSD_Validation/cli/validate.ps1 @@ -1,98 +1,96 @@ <# .SYNOPSIS XSD validation from the command line on Windows — the counterpart of - validate.sh. Standalone: resolves the official schema itself (no prior step). + validate.sh. You give it exactly two things: a schema and an XML file. .DESCRIPTION - Same 3-step convention as every other stack: - 1. $env:FUNDSXML_SCHEMA_DIR — a dir with FundsXML.xsd (+ the xmldsig - sibling for 4.2.9+). Used as-is, NO network (offline / corporate - escape hatch). - 2. .schema-cache\\FundsXML.xsd — reused if present. - 3. download from the official GitHub release (Invoke-WebRequest follows - the 302), caching into .schema-cache\; the relative - xmldsig-core-schema.xsd sibling is fetched only when imported (4.2.9+). + is a path to an XSD file OR a remote URL, e.g. the official + release: + https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd + No version, no env var, no cache — whatever you point at is used as-is. - Validation uses xmllint when it is on PATH, otherwise the built-in .NET - System.Xml.Schema (always available on Windows) — so no extra tool to - install. Works in Windows PowerShell 5.1 and PowerShell 7+. + Validation uses xmllint when it is on PATH (with --nonet for XXE / external- + entity hardening), otherwise the built-in .NET System.Xml.Schema (always + available on Windows) — so no extra tool to install. To keep the xmllint + hardening while still accepting a remote schema, a URL schema (and the + relative xmldsig-core-schema.xsd sibling FundsXML 4.2.9+ imports) is fetched + into a temp dir first, then validated offline. The .NET fallback takes a + path or URL directly (its schema-set URL resolver handles the import). + A local schema path's xmldsig sibling, if imported, must sit next to it. -.PARAMETER Version FundsXML version, e.g. 4.2.9 + Works in Windows PowerShell 5.1 and PowerShell 7+. + +.PARAMETER Schema path to FundsXML.xsd, or a remote URL .PARAMETER XmlFile the instance document to validate .EXAMPLE - pwsh XSD_Validation/cli/validate.ps1 4.2.9 FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml + pwsh XSD_Validation/cli/validate.ps1 ` + https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd ` + FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml #> [CmdletBinding()] param( - [Parameter(Mandatory = $true, Position = 0)] [string] $Version, + [Parameter(Mandatory = $true, Position = 0)] [string] $Schema, [Parameter(Mandatory = $true, Position = 1)] [string] $XmlFile ) $ErrorActionPreference = 'Stop' -$repoRoot = (Resolve-Path (Join-Path $PSScriptRoot '..\..')).Path -$base = "https://github.com/fundsxml/schema/releases/download/$Version" - -function Get-File($url, $dest) { - Write-Host "schema: fetch $url" -ForegroundColor DarkGray - Invoke-WebRequest -Uri $url -OutFile $dest -MaximumRedirection 5 -UseBasicParsing -} +$origSchema = $Schema +$schemaPath = $Schema +$tmpDir = $null -if ($env:FUNDSXML_SCHEMA_DIR) { - $schema = Join-Path $env:FUNDSXML_SCHEMA_DIR 'FundsXML.xsd' - if (-not (Test-Path $schema)) { - Write-Error "FUNDSXML_SCHEMA_DIR set but $schema not found"; exit 2 +$xmllint = Get-Command xmllint -ErrorAction SilentlyContinue +if ($xmllint -and $Schema -match '^https?://') { + # Remote schema + xmllint: materialise it (and the xmldsig sibling it may + # import) into a temp dir so the instance can be validated with --nonet. + $tmpDir = New-Item -ItemType Directory -Path (Join-Path ([System.IO.Path]::GetTempPath()) ([System.IO.Path]::GetRandomFileName())) + $schemaPath = Join-Path $tmpDir 'FundsXML.xsd' + Write-Host "schema: fetch $Schema" -ForegroundColor DarkGray + Invoke-WebRequest -Uri $Schema -OutFile $schemaPath -MaximumRedirection 5 -UseBasicParsing + if (Select-String -Path $schemaPath -Pattern 'xmldsig-core-schema\.xsd' -Quiet) { + $sib = ($Schema -replace '/[^/]+$', '/xmldsig-core-schema.xsd') + Write-Host "schema: fetch $sib" -ForegroundColor DarkGray + Invoke-WebRequest -Uri $sib -OutFile (Join-Path $tmpDir 'xmldsig-core-schema.xsd') -MaximumRedirection 5 -UseBasicParsing } - Write-Host "schema: using `$FUNDSXML_SCHEMA_DIR -> $schema" } -else { - $cacheDir = Join-Path $repoRoot ".schema-cache\$Version" - $schema = Join-Path $cacheDir 'FundsXML.xsd' - if (Test-Path $schema) { - Write-Host "schema: cached -> $schema" + +try { + if ($xmllint) { + & $xmllint.Source --noout --nonet --schema $schemaPath $XmlFile + if ($LASTEXITCODE -eq 0) { Write-Host "VALID: $XmlFile (schema $origSchema)"; exit 0 } + Write-Error "INVALID: $XmlFile (schema $origSchema)"; exit 1 } - else { - New-Item -ItemType Directory -Force -Path $cacheDir | Out-Null - Get-File "$base/FundsXML.xsd" $schema - if (Select-String -Path $schema -Pattern 'xmldsig-core-schema\.xsd' -Quiet) { - Get-File "$base/xmldsig-core-schema.xsd" (Join-Path $cacheDir 'xmldsig-core-schema.xsd') + + # No xmllint: validate with the built-in .NET schema validator. The schema + # set gets a URL resolver so a remote schema and the relative xmldsig import + # resolve; the instance document is read with no resolver (XXE-hardened). + Add-Type -AssemblyName System.Xml + $set = New-Object System.Xml.Schema.XmlSchemaSet + $set.XmlResolver = New-Object System.Xml.XmlUrlResolver + [void]$set.Add($null, $schemaPath) + $rs = New-Object System.Xml.XmlReaderSettings + $rs.ValidationType = [System.Xml.ValidationType]::Schema + $rs.Schemas = $set + $rs.DtdProcessing = [System.Xml.DtdProcessing]::Prohibit + $rs.XmlResolver = $null + $script:bad = $false + $handler = [System.Xml.Schema.ValidationEventHandler] { + param($s, $e) + if ($e.Severity -eq [System.Xml.Schema.XmlSeverityType]::Error) { + $script:bad = $true + Write-Host (" " + $e.Message) } } -} - -$xmllint = Get-Command xmllint -ErrorAction SilentlyContinue -if ($xmllint) { - & $xmllint.Source --noout --nonet --schema $schema $XmlFile - if ($LASTEXITCODE -eq 0) { Write-Host "VALID: $XmlFile (FundsXML $Version)"; exit 0 } - Write-Error "INVALID: $XmlFile (FundsXML $Version)"; exit 1 -} - -# No xmllint: validate with the built-in .NET schema validator. The schema set -# gets a URL resolver only so the relative xmldsig import resolves; the -# instance document is read with no resolver (XXE-hardened). -Add-Type -AssemblyName System.Xml -$set = New-Object System.Xml.Schema.XmlSchemaSet -$set.XmlResolver = New-Object System.Xml.XmlUrlResolver -[void]$set.Add($null, $schema) -$rs = New-Object System.Xml.XmlReaderSettings -$rs.ValidationType = [System.Xml.ValidationType]::Schema -$rs.Schemas = $set -$rs.DtdProcessing = [System.Xml.DtdProcessing]::Prohibit -$rs.XmlResolver = $null -$script:bad = $false -$handler = [System.Xml.Schema.ValidationEventHandler] { - param($s, $e) - if ($e.Severity -eq [System.Xml.Schema.XmlSeverityType]::Error) { - $script:bad = $true - Write-Host (" " + $e.Message) + $rs.add_ValidationEventHandler($handler) + try { + $r = [System.Xml.XmlReader]::Create($XmlFile, $rs) + while ($r.Read()) { } + $r.Close() } + catch { $script:bad = $true; Write-Host (" " + $_.Exception.Message) } + if ($script:bad) { Write-Error "INVALID: $XmlFile (schema $origSchema)"; exit 1 } + Write-Host "VALID: $XmlFile (schema $origSchema)" + exit 0 } -$rs.add_ValidationEventHandler($handler) -try { - $r = [System.Xml.XmlReader]::Create($XmlFile, $rs) - while ($r.Read()) { } - $r.Close() +finally { + if ($tmpDir) { Remove-Item -Recurse -Force $tmpDir -ErrorAction SilentlyContinue } } -catch { $script:bad = $true; Write-Host (" " + $_.Exception.Message) } -if ($script:bad) { Write-Error "INVALID: $XmlFile (FundsXML $Version)"; exit 1 } -Write-Host "VALID: $XmlFile (FundsXML $Version)" -exit 0 diff --git a/XSD_Validation/cli/validate.sh b/XSD_Validation/cli/validate.sh index 36cda24..9737a71 100755 --- a/XSD_Validation/cli/validate.sh +++ b/XSD_Validation/cli/validate.sh @@ -1,69 +1,61 @@ #!/bin/sh # XSD validation from the command line with xmllint (Linux/macOS). # -# Usage: XSD_Validation/cli/validate.sh +# Usage: XSD_Validation/cli/validate.sh # Exit: 0 = valid, 1 = invalid, 2 = usage/setup error # -# Standalone: this script resolves the official schema itself — no prior tool -# step. Same 3-step convention as every other stack: -# 1. $FUNDSXML_SCHEMA_DIR — a dir with FundsXML.xsd (+ xmldsig sibling for -# 4.2.9+). Used as-is, NO network (offline / corporate-network escape). -# 2. .schema-cache//FundsXML.xsd — reused if present. -# 3. download from the official GitHub release (curl -L follows the 302), -# caching into .schema-cache/; the relative xmldsig-core-schema.xsd -# sibling is fetched only when FundsXML.xsd imports it (4.2.9+). +# You give it exactly two things: the schema and the instance document. +# is a path to an XSD file OR a remote URL, e.g. the official +# release: https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd +# No version, no env var, no cache — whatever you point at is used as-is. +# +# xmllint validates the instance with --nonet (XXE / external-entity +# hardening). To keep that hardening while still accepting a *remote* schema, +# a URL schema (and, when it imports it, the relative xmldsig-core-schema.xsd +# sibling that FundsXML 4.2.9+ needs) is fetched into a temp dir first, then +# validation runs offline against that local copy. A local schema path is +# passed straight through (its xmldsig sibling, if imported, must sit next +# to it — as it does in any complete copy of an official release). # # POSIX sh (no bash-isms) so it runs under dash/ash too. The Windows # counterpart is validate.ps1 in this directory. set -eu -VERSION="${1:-}" +SCHEMA="${1:-}" XML="${2:-}" -if [ -z "$VERSION" ] || [ -z "$XML" ]; then - echo "usage: validate.sh " >&2 +if [ -z "$SCHEMA" ] || [ -z "$XML" ]; then + echo "usage: validate.sh " >&2 exit 2 fi -REPO_ROOT=$(CDPATH= cd -- "$(dirname -- "$0")/../.." && pwd) -BASE="https://github.com/fundsxml/schema/releases/download/${VERSION}" - -fetch() { # url dest - echo "schema: fetch $1" >&2 - curl -sSL --fail -m 60 "$1" -o "$2" -} +TMP= +trap 'rm -f "${ERR:-}"; [ -n "$TMP" ] && rm -rf "$TMP"' EXIT -if [ -n "${FUNDSXML_SCHEMA_DIR:-}" ]; then - SCHEMA="${FUNDSXML_SCHEMA_DIR}/FundsXML.xsd" - if [ ! -f "$SCHEMA" ]; then - echo "FUNDSXML_SCHEMA_DIR set but $SCHEMA not found" >&2 - exit 2 - fi - echo "schema: using \$FUNDSXML_SCHEMA_DIR -> $SCHEMA" >&2 -else - CACHE_DIR="${REPO_ROOT}/.schema-cache/${VERSION}" - SCHEMA="${CACHE_DIR}/FundsXML.xsd" - if [ -f "$SCHEMA" ]; then - echo "schema: cached -> $SCHEMA" >&2 - else - mkdir -p "$CACHE_DIR" - fetch "${BASE}/FundsXML.xsd" "$SCHEMA" - if grep -q 'xmldsig-core-schema\.xsd' "$SCHEMA"; then - fetch "${BASE}/xmldsig-core-schema.xsd" \ - "${CACHE_DIR}/xmldsig-core-schema.xsd" +case "$SCHEMA" in + http://*|https://*) + # Remote schema: materialise it (and the xmldsig sibling it may import) + # into a temp dir so the instance can still be validated with --nonet. + TMP=$(mktemp -d) + LOCAL="$TMP/FundsXML.xsd" + echo "schema: fetch $SCHEMA" >&2 + curl -sSL --fail -m 60 "$SCHEMA" -o "$LOCAL" + if grep -q 'xmldsig-core-schema\.xsd' "$LOCAL"; then + SIB_URL="${SCHEMA%/*}/xmldsig-core-schema.xsd" + echo "schema: fetch $SIB_URL" >&2 + curl -sSL --fail -m 60 "$SIB_URL" -o "$TMP/xmldsig-core-schema.xsd" fi - fi -fi + SCHEMA="$LOCAL" + ;; +esac -# --nonet: never hit the network during validation (XXE / entity hardening); -# the schema was already materialised above. +# --nonet: never hit the network during validation (XXE / entity hardening). ERR=$(mktemp) -trap 'rm -f "$ERR"' EXIT if xmllint --noout --nonet --schema "$SCHEMA" "$XML" 2>"$ERR"; then - echo "VALID: $XML (FundsXML $VERSION)" + echo "VALID: $XML (schema $1)" exit 0 else - echo "INVALID: $XML (FundsXML $VERSION)" >&2 + echo "INVALID: $XML (schema $1)" >&2 cat "$ERR" >&2 exit 1 fi diff --git a/XSD_Validation/dotnet/SchemaResolver.cs b/XSD_Validation/dotnet/SchemaResolver.cs deleted file mode 100644 index 2274055..0000000 --- a/XSD_Validation/dotnet/SchemaResolver.cs +++ /dev/null @@ -1,85 +0,0 @@ -// SchemaResolver — obtain the official FundsXML XSD for a version, standalone -// & cross-platform (no bash, no prior tool step; works on Windows). -// -// Same 3-step convention as the Java (XsdValidate.java) and Python -// (tools/fundsxml_schema.py) resolvers: -// -// 1. $FUNDSXML_SCHEMA_DIR — a directory holding FundsXML.xsd (+ the -// xmldsig-core-schema.xsd sibling for 4.2.9+). Used as-is, NO network. -// The escape hatch for locked-down corporate networks / offline use. -// 2. /.schema-cache//FundsXML.xsd — reused if present. -// 3. download from the official GitHub release (HttpClient follows the -// 302 by default), caching into .schema-cache//; the relative -// xmldsig-core-schema.xsd sibling is fetched only when imported (4.2.9+). -// -// The source of truth stays the official release URL — no committed catalog. -// Kept as a second file in this same project (one self-contained example), -// mirroring how the Java example inlines its resolver. - -using System; -using System.IO; -using System.Net.Http; - -internal static class SchemaResolver -{ - private const string ReleaseBase = - "https://github.com/fundsxml/schema/releases/download/"; - - /// Resolve FundsXML.xsd for . - internal static string Resolve(string version) - { - // 1. Offline / corporate-network escape hatch: a hand-placed copy. - string envDir = Environment.GetEnvironmentVariable("FUNDSXML_SCHEMA_DIR"); - if (!string.IsNullOrWhiteSpace(envDir)) - { - string envXsd = Path.Combine(envDir, "FundsXML.xsd"); - if (!File.Exists(envXsd)) - { - throw new FileNotFoundException( - $"FUNDSXML_SCHEMA_DIR set but {envXsd} not found"); - } - Console.Error.WriteLine( - $"schema: using $FUNDSXML_SCHEMA_DIR -> {envXsd}"); - return envXsd; - } - - // 2. Local cache (shared by every stack, gitignored). The examples are - // documented to run from the repo root. - string cacheDir = Path.Combine( - Directory.GetCurrentDirectory(), ".schema-cache", version); - string xsd = Path.Combine(cacheDir, "FundsXML.xsd"); - if (File.Exists(xsd)) - { - Console.Error.WriteLine($"schema: cached -> {xsd}"); - return xsd; - } - - // 3. Download from the official release (source of truth). - Directory.CreateDirectory(cacheDir); - Download($"{ReleaseBase}{version}/FundsXML.xsd", xsd); - // From 4.2.9 on, FundsXML.xsd imports xmldsig-core-schema.xsd via a - // relative path — it must sit next to FundsXML.xsd or the schema does - // not compile. Fetch the sibling only when it is actually referenced. - if (File.ReadAllText(xsd).Contains("xmldsig-core-schema.xsd")) - { - Download($"{ReleaseBase}{version}/xmldsig-core-schema.xsd", - Path.Combine(cacheDir, "xmldsig-core-schema.xsd")); - } - return xsd; - } - - // HttpClient follows the GitHub 302 (AllowAutoRedirect is on by default). - private static void Download(string url, string outPath) - { - Console.Error.WriteLine($"schema: fetch {url}"); - using var http = new HttpClient(); - byte[] body = http.GetByteArrayAsync(url).GetAwaiter().GetResult(); - string tmp = outPath + ".part"; - File.WriteAllBytes(tmp, body); - if (File.Exists(outPath)) - { - File.Delete(outPath); - } - File.Move(tmp, outPath); - } -} diff --git a/XSD_Validation/dotnet/XsdValidate.cs b/XSD_Validation/dotnet/XsdValidate.cs index 7261df0..05d4ada 100644 --- a/XSD_Validation/dotnet/XsdValidate.cs +++ b/XSD_Validation/dotnet/XsdValidate.cs @@ -2,22 +2,23 @@ // // Standalone & cross-platform (no bash, no prior tool step) with the .NET SDK, // run from the repo root: -// dotnet run --project XSD_Validation/dotnet -- +// dotnet run --project XSD_Validation/dotnet -- // Exit: 0 valid, 1 invalid, 2 usage/setup error. // -// The official released schema is obtained by this example itself via the -// sibling SchemaResolver (see SchemaResolver.cs): $FUNDSXML_SCHEMA_DIR -// (offline/corporate escape hatch) -> .schema-cache/ -> download from the -// official GitHub release (following the 302; fetching the relative -// xmldsig-core-schema.xsd sibling FundsXML 4.2.9+ imports). The official -// release stays the source of truth — no committed catalog. +// You give it exactly two things: the schema and the instance. is a +// path to an XSD file OR a remote URL, e.g. the official release: +// https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd +// No version, no env var, no cache, no resolver — whatever you point at is +// used as-is. For FundsXML 4.2.9+ the schema imports xmldsig-core-schema.xsd +// via a relative path, so that sibling must be reachable next to +// (it is, in the official release directory and in any complete local copy). // -// Security: XmlResolver = null on the reader closes XXE / external-entity -// vectors. An XmlUrlResolver is used ONLY to resolve the schema set's local -// relative xmldsig import, never for instance documents. +// Security: XmlResolver = null on the instance reader closes XXE / external- +// entity vectors. An XmlUrlResolver is used ONLY for the schema set, so a +// remote schema and the schema's relative xmldsig import resolve — never for +// instance documents. using System; -using System.IO; using System.Xml; using System.Xml.Schema; @@ -27,32 +28,29 @@ private static int Main(string[] args) { if (args.Length != 2) { - Console.Error.WriteLine("usage: XsdValidate "); + Console.Error.WriteLine("usage: XsdValidate "); return 2; } - string version = args[0]; + string schemaArg = args[0]; string xmlFile = args[1]; - string schemaPath; + var schemas = new XmlSchemaSet + { + // Resolves a remote schema URL and the schema's relative + // xmldsig-core-schema.xsd import (4.2.9+) from the same location. + XmlResolver = new XmlUrlResolver() + }; try { - schemaPath = SchemaResolver.Resolve(version); + schemas.Add(null, schemaArg); } catch (Exception ex) { - Console.Error.WriteLine($"schema resolution failed: {ex.Message}"); + Console.Error.WriteLine($"schema load failed: {ex.Message}"); return 2; } - var schemas = new XmlSchemaSet - { - // Needed only so the schema's relative xmldsig-core-schema.xsd - // import (4.2.9+) resolves from the same directory. - XmlResolver = new XmlUrlResolver() - }; - schemas.Add(null, schemaPath); - bool failed = false; var settings = new XmlReaderSettings { @@ -86,11 +84,11 @@ private static int Main(string[] args) if (failed) { Console.Error.WriteLine( - $"INVALID: {xmlFile} (FundsXML {version})"); + $"INVALID: {xmlFile} (schema {schemaArg})"); return 1; } - Console.WriteLine($"VALID: {xmlFile} (FundsXML {version})"); + Console.WriteLine($"VALID: {xmlFile} (schema {schemaArg})"); return 0; } } diff --git a/XSD_Validation/java/XsdValidate.java b/XSD_Validation/java/XsdValidate.java index 7efe7af..2cd26af 100644 --- a/XSD_Validation/java/XsdValidate.java +++ b/XSD_Validation/java/XsdValidate.java @@ -3,22 +3,27 @@ // Standalone & cross-platform — no prior tool, no bash, works on Windows. // Run from the repo root with the committed Maven Wrapper: // ./mvnw -q -pl XSD_Validation/java compile exec:java \ -// -Dexec.args="4.2.9 FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml" +// -Dexec.args="https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd \ +// FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml" // Exit: 0 = valid, 1 = invalid, 2 = usage/setup error // -// The official released schema is obtained by this program itself, in this -// order (identical convention across all stacks): -// 1. $FUNDSXML_SCHEMA_DIR — a directory holding FundsXML.xsd (+ the -// xmldsig-core-schema.xsd sibling for 4.2.9+). Used as-is, NO network. -// The escape hatch for locked-down corporate networks / offline use. -// 2. .schema-cache//FundsXML.xsd — reused if already present. -// 3. download from the official GitHub release (following the 302), caching -// into .schema-cache//; the relative xmldsig-core-schema.xsd -// sibling is fetched only when FundsXML.xsd actually imports it (4.2.9+). -// The source of truth stays the official release URL — no committed catalog. +// You give it exactly two things: the schema and the instance. is a +// path to an XSD file OR a remote URL (e.g. the official release shown above). +// No version, no env var, no cache, no resolver — whatever you point at is +// used as-is. For FundsXML 4.2.9+ the schema imports xmldsig-core-schema.xsd +// via a relative path, so that sibling must be reachable next to +// (it is, in the official release directory and in any complete local copy). // -// Security: FEATURE_SECURE_PROCESSING on, external DTD/schema access denied, -// XXE vectors closed. FundsXML needs no external entities. +// A URL schema (and, when imported, the xmldsig sibling) is fetched into a +// temp dir first, then validated from there. The official release URL 302- +// redirects to an opaque blob URL; resolving the schema's *relative* xmldsig +// import against that post-redirect URL would fail, so the fetch is done +// here (it also keeps the 302 handled) and the relative import then resolves +// locally — identical behaviour for a path or a URL. +// +// Security: FEATURE_SECURE_PROCESSING on; the instance's external DTD access +// is denied (ACCESS_EXTERNAL_DTD = ""), closing XXE vectors. Only the trusted, +// user-supplied schema is fetched over the network. import java.io.File; import java.net.URI; @@ -27,8 +32,6 @@ import java.net.http.HttpResponse; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; -import java.nio.file.StandardCopyOption; import javax.xml.XMLConstants; import javax.xml.transform.stream.StreamSource; import javax.xml.validation.Schema; @@ -41,99 +44,84 @@ public class XsdValidate { public static void main(String[] args) throws Exception { if (args.length != 2) { - System.err.println("usage: XsdValidate "); + System.err.println("usage: XsdValidate "); System.exit(2); } - String version = args[0]; + String schemaArg = args[0]; String xmlFile = args[1]; - File schema = resolveSchema(version).toFile(); + Path tmpDir = null; + File schemaFile; + try { + if (schemaArg.matches("^https?://.*")) { + tmpDir = Files.createTempDirectory("fxsd"); + Path local = tmpDir.resolve("FundsXML.xsd"); + download(schemaArg, local); + // FundsXML 4.2.9+ imports xmldsig-core-schema.xsd via a + // relative path; fetch that sibling from the same URL dir + // only when it is actually referenced. + if (Files.readString(local).contains("xmldsig-core-schema.xsd")) { + String sib = schemaArg.substring( + 0, schemaArg.lastIndexOf('/') + 1) + + "xmldsig-core-schema.xsd"; + download(sib, tmpDir.resolve("xmldsig-core-schema.xsd")); + } + schemaFile = local.toFile(); + } else { + schemaFile = new File(schemaArg); + } + + SchemaFactory factory = + SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); + factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); + // Local file access only — the schema is already materialised; + // the instance's DTD access stays denied (XXE hardening). + factory.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "file"); + factory.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, ""); - SchemaFactory factory = - SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); - factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); - // Allow only local file access so the relative xmldsig-core-schema.xsd - // import (4.2.9+) resolves; block http/external fetches. - factory.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "file"); - factory.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, ""); + Schema fundsXmlSchema = factory.newSchema(schemaFile); + Validator validator = fundsXmlSchema.newValidator(); + validator.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, ""); + validator.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "file"); - Schema fundsXmlSchema = factory.newSchema(schema); - Validator validator = fundsXmlSchema.newValidator(); - validator.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, ""); - validator.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "file"); + final boolean[] failed = {false}; + validator.setErrorHandler(new ErrorHandler() { + public void warning(SAXParseException e) { } + public void error(SAXParseException e) { report(e); } + public void fatalError(SAXParseException e) { report(e); } + private void report(SAXParseException e) { + failed[0] = true; + System.err.println(" line " + e.getLineNumber() + ": " + + e.getMessage()); + } + }); - final boolean[] failed = {false}; - validator.setErrorHandler(new ErrorHandler() { - public void warning(SAXParseException e) { } - public void error(SAXParseException e) { report(e); } - public void fatalError(SAXParseException e) { report(e); } - private void report(SAXParseException e) { + try { + validator.validate(new StreamSource(new File(xmlFile))); + } catch (SAXParseException e) { failed[0] = true; System.err.println(" line " + e.getLineNumber() + ": " + e.getMessage()); } - }); - - try { - validator.validate(new StreamSource(new File(xmlFile))); - } catch (SAXParseException e) { - failed[0] = true; - System.err.println(" line " + e.getLineNumber() + ": " - + e.getMessage()); - } - - if (failed[0]) { - System.err.println("INVALID: " + xmlFile + " (FundsXML " + version + ")"); - System.exit(1); - } - System.out.println("VALID: " + xmlFile + " (FundsXML " + version + ")"); - } - static final String RELEASE_BASE = - "https://github.com/fundsxml/schema/releases/download/"; - - /** - * Resolve FundsXML.xsd for {@code version}: env-var dir, else local cache, - * else download from the official GitHub release (caching the result). - * The xmldsig-core-schema.xsd sibling is fetched only when imported. - */ - static Path resolveSchema(String version) throws Exception { - // 1. Offline / corporate-network escape hatch: a hand-placed copy. - String envDir = System.getenv("FUNDSXML_SCHEMA_DIR"); - if (envDir != null && !envDir.isBlank()) { - Path xsd = Paths.get(envDir, "FundsXML.xsd"); - if (!Files.isRegularFile(xsd)) { - System.err.println("FUNDSXML_SCHEMA_DIR set but " - + xsd + " not found"); - System.exit(2); + if (failed[0]) { + System.err.println("INVALID: " + xmlFile + " (schema " + + schemaArg + ")"); + System.exit(1); + } + System.out.println("VALID: " + xmlFile + " (schema " + + schemaArg + ")"); + } finally { + if (tmpDir != null) { + try (var paths = Files.walk(tmpDir)) { + paths.sorted(java.util.Comparator.reverseOrder()) + .forEach(p -> p.toFile().delete()); + } } - System.err.println("schema: using $FUNDSXML_SCHEMA_DIR -> " + xsd); - return xsd; - } - - // 2. Local cache (shared by every stack, gitignored). - Path cacheDir = Paths.get(System.getProperty("user.dir"), - ".schema-cache", version); - Path xsd = cacheDir.resolve("FundsXML.xsd"); - if (Files.isRegularFile(xsd)) { - System.err.println("schema: cached -> " + xsd); - return xsd; - } - - // 3. Download from the official release (source of truth). - Files.createDirectories(cacheDir); - download(RELEASE_BASE + version + "/FundsXML.xsd", xsd); - // From 4.2.9 on, FundsXML.xsd imports xmldsig-core-schema.xsd via a - // relative path — it must sit next to FundsXML.xsd or the schema does - // not compile. Fetch the sibling only when it is actually referenced. - if (Files.readString(xsd).contains("xmldsig-core-schema.xsd")) { - download(RELEASE_BASE + version + "/xmldsig-core-schema.xsd", - cacheDir.resolve("xmldsig-core-schema.xsd")); } - return xsd; } - /** GET {@code url} (following the GitHub 302) atomically into {@code out}. */ + /** GET {@code url} (following the GitHub 302) into {@code out}. */ static void download(String url, Path out) throws Exception { System.err.println("schema: fetch " + url); HttpClient client = HttpClient.newBuilder() @@ -146,8 +134,6 @@ static void download(String url, Path out) throws Exception { + "): " + url); System.exit(2); } - Path tmp = Files.createTempFile(out.getParent(), "xsd", ".part"); - Files.write(tmp, resp.body()); - Files.move(tmp, out, StandardCopyOption.REPLACE_EXISTING); + Files.write(out, resp.body()); } } diff --git a/XSD_Validation/powershell/Validate-FundsXml.ps1 b/XSD_Validation/powershell/Validate-FundsXml.ps1 index f2b7b08..8d52c13 100644 --- a/XSD_Validation/powershell/Validate-FundsXml.ps1 +++ b/XSD_Validation/powershell/Validate-FundsXml.ps1 @@ -3,76 +3,48 @@ XSD validation in PowerShell via System.Xml.Schema. .DESCRIPTION - Standalone & cross-platform — resolves the official released schema - itself (no prior tool step). Same 3-step convention as every stack: - $env:FUNDSXML_SCHEMA_DIR (offline/corporate escape hatch) -> - .schema-cache\ -> download from the official GitHub release - (Invoke-WebRequest follows the 302; pulls the xmldsig-core-schema.xsd - sibling that FundsXML 4.2.9+ imports). + Standalone & cross-platform — you give it exactly two things: a schema + and an XML file. is a path to an XSD file OR a remote URL, e.g. + the official release: + https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd + No version, no env var, no cache, no resolver — whatever you point at is + used as-is. Security: the instance document is read with XmlResolver = $null and - DtdProcessing = Prohibit to close XXE / external-entity vectors. A - URL resolver is used only for the schema set's local relative import. + DtdProcessing = Prohibit to close XXE / external-entity vectors. A URL + resolver is used only for the schema set, so a remote schema and the + schema's relative xmldsig-core-schema.xsd import (FundsXML 4.2.9+) + resolve. A local schema path's xmldsig sibling, if imported, must sit + next to it (it does in any complete copy of an official release). Works in Windows PowerShell 5.1 and PowerShell 7+. -.PARAMETER Version - FundsXML version, e.g. 4.2.9 +.PARAMETER Schema + Path to FundsXML.xsd, or a remote URL. .PARAMETER XmlFile Path to the FundsXML instance document. .EXAMPLE - pwsh XSD_Validation/powershell/Validate-FundsXml.ps1 4.2.9 ` - FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml + pwsh XSD_Validation/powershell/Validate-FundsXml.ps1 ` + https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd ` + FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml .OUTPUTS Exit code 0 = valid, 1 = invalid, 2 = usage/setup error. #> param( - [Parameter(Mandatory = $true)][string]$Version, + [Parameter(Mandatory = $true)][string]$Schema, [Parameter(Mandatory = $true)][string]$XmlFile ) $ErrorActionPreference = 'Stop' -# Nested Join-Path: the 3-arg form (-AdditionalChildPath) is PS 7+ only; -# Windows PowerShell 5.1 accepts just -Path/-ChildPath. -$repoRoot = (Resolve-Path (Join-Path (Join-Path $PSScriptRoot '..') '..')).Path -$base = "https://github.com/fundsxml/schema/releases/download/$Version" - -function Get-File($url, $dest) { - Write-Host "schema: fetch $url" -ForegroundColor DarkGray - Invoke-WebRequest -Uri $url -OutFile $dest -MaximumRedirection 5 -UseBasicParsing -} - -if ($env:FUNDSXML_SCHEMA_DIR) { - $schemaPath = Join-Path $env:FUNDSXML_SCHEMA_DIR 'FundsXML.xsd' - if (-not (Test-Path $schemaPath)) { - Write-Error "FUNDSXML_SCHEMA_DIR set but $schemaPath not found"; exit 2 - } - Write-Host "schema: using `$FUNDSXML_SCHEMA_DIR -> $schemaPath" -} -else { - $cacheDir = Join-Path $repoRoot ".schema-cache/$Version" - $schemaPath = Join-Path $cacheDir 'FundsXML.xsd' - if (Test-Path $schemaPath) { - Write-Host "schema: cached -> $schemaPath" - } - else { - New-Item -ItemType Directory -Force -Path $cacheDir | Out-Null - Get-File "$base/FundsXML.xsd" $schemaPath - if (Select-String -Path $schemaPath -Pattern 'xmldsig-core-schema\.xsd' -Quiet) { - Get-File "$base/xmldsig-core-schema.xsd" (Join-Path $cacheDir 'xmldsig-core-schema.xsd') - } - } -} - $schemas = New-Object System.Xml.Schema.XmlSchemaSet -# Needed only so the schema's relative xmldsig-core-schema.xsd import (4.2.9+) -# resolves from the same directory. +# Resolves a remote schema URL and the schema's relative +# xmldsig-core-schema.xsd import (4.2.9+) from the same location. $schemas.XmlResolver = New-Object System.Xml.XmlUrlResolver -[void]$schemas.Add($null, $schemaPath) +[void]$schemas.Add($null, $Schema) $settings = New-Object System.Xml.XmlReaderSettings $settings.ValidationType = [System.Xml.ValidationType]::Schema @@ -101,9 +73,9 @@ catch [System.Xml.XmlException] { } if ($script:failed) { - Write-Error "INVALID: $XmlFile (FundsXML $Version)" + Write-Error "INVALID: $XmlFile (schema $Schema)" exit 1 } -Write-Host "VALID: $XmlFile (FundsXML $Version)" +Write-Host "VALID: $XmlFile (schema $Schema)" exit 0 diff --git a/XSD_Validation/python/validate.py b/XSD_Validation/python/validate.py index a7abdd5..7097ad6 100644 --- a/XSD_Validation/python/validate.py +++ b/XSD_Validation/python/validate.py @@ -4,52 +4,78 @@ Standalone & cross-platform — no bash, no prior tool step (works on Windows). After `pip install -e .` (see pyproject.toml): - python XSD_Validation/python/validate.py + python XSD_Validation/python/validate.py Exit: 0 = valid, 1 = invalid, 2 = usage/setup error -The official released schema is obtained by this program itself via the shared -`fundsxml_schema` resolver: $FUNDSXML_SCHEMA_DIR (offline/corporate escape -hatch) -> .schema-cache/ -> download from the official GitHub release -(following the 302; fetching the relative xmldsig-core-schema.xsd sibling that -FundsXML 4.2.9+ imports). The official release stays the source of truth. +`` is a path to an XSD file OR a remote URL — e.g. the official +release: https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd +You give the validator exactly two things: the schema and the instance. No +version, no env var, no cache, no resolver — whatever you point at is used +as-is. For FundsXML 4.2.9+ the schema imports xmldsig-core-schema.xsd via a +relative path, so that sibling must be reachable next to `` (it is, in +the official release directory and in any complete local copy). -Security: the XML parser is hardened against XXE / entity-expansion -(no_network=True, resolve_entities=False, no DTD load). FundsXML needs none -of those features. +A URL schema (and, when it imports it, the xmldsig sibling) is fetched into a +temp dir with urllib first — libxml2 as built into lxml has no HTTP loader, so +`etree.parse` cannot read a URL itself; doing the fetch here also keeps the +official-release 302 handled. Validation then runs against that local copy. + +Security: the *instance* parser is hardened against XXE / entity-expansion +(no_network=True, resolve_entities=False, no DTD load); FundsXML needs none of +those. Only the trusted, user-supplied schema is fetched over the network. """ import sys +import tempfile +import urllib.request +from pathlib import Path from lxml import etree -from fundsxml_schema import resolve_schema + +def _fetch(url: str, dest: Path) -> None: + print(f"schema: fetch {url}", file=sys.stderr) + with urllib.request.urlopen(url, timeout=60) as r: # follows the 302 + dest.write_bytes(r.read()) def main() -> int: if len(sys.argv) != 3: - print("usage: validate.py ", file=sys.stderr) + print("usage: validate.py ", file=sys.stderr) return 2 - version, xml_path = sys.argv[1], sys.argv[2] - - schema_path = resolve_schema(version) + schema_arg, xml_path = sys.argv[1], sys.argv[2] - # Hardened parser: no network, no entity resolution, no huge-tree blowups. - safe = etree.XMLParser(no_network=True, resolve_entities=False, - load_dtd=False, huge_tree=False) + tmp: tempfile.TemporaryDirectory | None = None + schema_path = schema_arg + if schema_arg.startswith(("http://", "https://")): + tmp = tempfile.TemporaryDirectory() + local = Path(tmp.name) / "FundsXML.xsd" + _fetch(schema_arg, local) + # FundsXML 4.2.9+ imports xmldsig-core-schema.xsd via a relative path; + # fetch that sibling from the same URL dir only when referenced. + if "xmldsig-core-schema.xsd" in local.read_text(encoding="utf-8"): + sib = schema_arg.rsplit("/", 1)[0] + "/xmldsig-core-schema.xsd" + _fetch(sib, Path(tmp.name) / "xmldsig-core-schema.xsd") + schema_path = str(local) - # The schema itself is parsed with network access so its relative - # xmldsig-core-schema.xsd import (4.2.9+) resolves from the same dir. - schema_doc = etree.parse(str(schema_path)) - schema = etree.XMLSchema(schema_doc) + try: + # Hardened parser for the instance: no network, no entity resolution, + # no DTD load, no huge-tree blowups. + safe = etree.XMLParser(no_network=True, resolve_entities=False, + load_dtd=False, huge_tree=False) - doc = etree.parse(xml_path, parser=safe) - if schema.validate(doc): - print(f"VALID: {xml_path} (FundsXML {version})") - return 0 + schema = etree.XMLSchema(etree.parse(schema_path)) + doc = etree.parse(xml_path, parser=safe) + if schema.validate(doc): + print(f"VALID: {xml_path} (schema {schema_arg})") + return 0 - print(f"INVALID: {xml_path} (FundsXML {version})", file=sys.stderr) - for err in schema.error_log: - print(f" line {err.line}: {err.message}", file=sys.stderr) - return 1 + print(f"INVALID: {xml_path} (schema {schema_arg})", file=sys.stderr) + for err in schema.error_log: + print(f" line {err.line}: {err.message}", file=sys.stderr) + return 1 + finally: + if tmp is not None: + tmp.cleanup() if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index c1e9072..ce4ac5e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,24 +3,21 @@ # WHY THIS EXISTS # =============== # The Python examples used to rely on ad-hoc `pip install lxml` / `pip install -# saxonche` and on tools/fetch-schema.sh (bash) to materialise the XSD. That is -# neither standalone nor cross-platform. This manifest makes the Python stack -# install with one idiomatic, OS-agnostic command and ships the in-language -# schema resolver as an importable module. +# saxonche`. That is neither standalone nor cross-platform. This manifest makes +# the Python stack install with one idiomatic, OS-agnostic command. It is a +# dependency-only manifest — no module is packaged; the examples are plain +# scripts run from the repo root. # # STANDALONE BOOTSTRAP (any OS; venv works on Windows too) # ======================================================= # python -m venv .venv # # Linux/macOS: . .venv/bin/activate Windows: .venv\Scripts\activate # pip install -e . -# Then run any example with the venv's Python, e.g.: -# python XSD_Validation/python/validate.py 4.2.9 \ +# Then run any example with the venv's Python, e.g. (the XSD validator takes a +# schema path or URL + the XML file — no version, no resolver): +# python XSD_Validation/python/validate.py \ +# https://github.com/fundsxml/schema/releases/download/4.2.9/FundsXML.xsd \ # FundsXML_Files/4.2.9/positions/Mixed-Fund_Positions.xml -# -# `pip install -e .` exposes the top-level module `fundsxml_schema` -# (tools/fundsxml_schema.py): $FUNDSXML_SCHEMA_DIR -> .schema-cache -> official -# release download. No bash, no prior step, official release stays the source -# of truth (no committed catalog). [build-system] requires = ["setuptools>=64"] @@ -41,9 +38,7 @@ dependencies = [ ] [tool.setuptools] -# Single-module distribution: expose tools/fundsxml_schema.py as the top-level -# importable `fundsxml_schema`. Nothing else in the repo is packaged. -py-modules = ["fundsxml_schema"] - -[tool.setuptools.package-dir] -"" = "tools" +# Dependency-only install: nothing in the repo is packaged or importable, so +# declare an empty module list (this also disables setuptools auto-discovery, +# which would otherwise choke on the repo's flat multi-directory layout). +py-modules = [] diff --git a/tools/fundsxml_schema.py b/tools/fundsxml_schema.py deleted file mode 100644 index 8f7144d..0000000 --- a/tools/fundsxml_schema.py +++ /dev/null @@ -1,105 +0,0 @@ -"""Resolve the official FundsXML XSD for a given version — standalone & -cross-platform, no bash, no prior tool step. - -This is the Python counterpart of the in-language resolver used by the Java -example (XSD_Validation/java/XsdValidate.java). Same 3-step convention in every -stack: - - 1. ``$FUNDSXML_SCHEMA_DIR`` — a directory holding ``FundsXML.xsd`` (plus the - ``xmldsig-core-schema.xsd`` sibling for 4.2.9+). Used as-is, NO network. - The escape hatch for locked-down corporate networks / offline use. - 2. ``/.schema-cache//FundsXML.xsd`` — reused if present. - 3. download from the official GitHub release (following the 302), caching - into ``.schema-cache//``; the relative ``xmldsig-core-schema.xsd`` - sibling is fetched only when ``FundsXML.xsd`` actually imports it (4.2.9+). - -The source of truth stays the official release URL — no committed catalog. -``urllib`` follows the GitHub 302 to objects.githubusercontent.com on its own. - -Installed as a top-level module via the repo's ``pyproject.toml`` (``pip -install -e .``), so every Python example can ``from fundsxml_schema import -resolve_schema``. Also runnable directly:: - - python -m fundsxml_schema 4.2.9 # prints the resolved path -""" -from __future__ import annotations - -import os -import sys -import tempfile -import urllib.request -from pathlib import Path - -RELEASE_BASE = "https://github.com/fundsxml/schema/releases/download/" - -# tools/fundsxml_schema.py -> repo root is one level up. An editable install -# keeps __file__ pointing at this source file, so this stays correct. -_DEFAULT_REPO_ROOT = Path(__file__).resolve().parents[1] - - -def _download(url: str, out: Path) -> None: - """GET *url* (urllib follows the GitHub 302) atomically into *out*.""" - print(f"schema: fetch {url}", file=sys.stderr) - with urllib.request.urlopen(url, timeout=60) as resp: # noqa: S310 (trusted host) - if resp.status != 200: - raise RuntimeError(f"download failed (HTTP {resp.status}): {url}") - data = resp.read() - out.parent.mkdir(parents=True, exist_ok=True) - fd, tmp = tempfile.mkstemp(dir=str(out.parent), suffix=".part") - try: - with os.fdopen(fd, "wb") as fh: - fh.write(data) - os.replace(tmp, out) - except BaseException: - try: - os.unlink(tmp) - except OSError: - pass - raise - - -def resolve_schema(version: str, repo_root: Path | None = None) -> Path: - """Return a local path to ``FundsXML.xsd`` for *version* (see module doc).""" - # 1. Offline / corporate-network escape hatch: a hand-placed copy. - env_dir = os.environ.get("FUNDSXML_SCHEMA_DIR") - if env_dir: - xsd = Path(env_dir) / "FundsXML.xsd" - if not xsd.is_file(): - raise FileNotFoundError( - f"FUNDSXML_SCHEMA_DIR set but {xsd} not found") - print(f"schema: using $FUNDSXML_SCHEMA_DIR -> {xsd}", file=sys.stderr) - return xsd - - root = Path(repo_root) if repo_root else _DEFAULT_REPO_ROOT - cache_dir = root / ".schema-cache" / version - xsd = cache_dir / "FundsXML.xsd" - - # 2. Local cache (shared by every stack, gitignored). - if xsd.is_file(): - print(f"schema: cached -> {xsd}", file=sys.stderr) - return xsd - - # 3. Download from the official release (source of truth). - _download(f"{RELEASE_BASE}{version}/FundsXML.xsd", xsd) - # From 4.2.9 on, FundsXML.xsd imports xmldsig-core-schema.xsd via a - # relative path — it must sit next to FundsXML.xsd or the schema does not - # compile. Fetch the sibling only when it is actually referenced. - if "xmldsig-core-schema.xsd" in xsd.read_text(encoding="utf-8"): - _download(f"{RELEASE_BASE}{version}/xmldsig-core-schema.xsd", - cache_dir / "xmldsig-core-schema.xsd") - return xsd - - -def _main(argv: list[str]) -> int: - if not 1 <= len(argv) <= 2: - print("usage: python -m fundsxml_schema [repo-root]", - file=sys.stderr) - return 2 - version = argv[0] - root = Path(argv[1]) if len(argv) == 2 else None - print(resolve_schema(version, root)) # path on stdout (scriptable) - return 0 - - -if __name__ == "__main__": - raise SystemExit(_main(sys.argv[1:]))