diff --git a/.github/workflows/cross-port-interop.yml b/.github/workflows/cross-port-interop.yml index 5d7aa97..5f1b5fe 100644 --- a/.github/workflows/cross-port-interop.yml +++ b/.github/workflows/cross-port-interop.yml @@ -16,6 +16,8 @@ on: paths: - 'reference/PCF-SIG-v1.0/**' - 'implementations/**/pcf-sig/**' + - 'reference/PCF-DCP-v1.0/**' + - 'implementations/**/pcf-dcp/**' - 'implementations/ts/package.json' - 'implementations/ts/package-lock.json' - 'implementations/dotnet/Directory.Build.props' @@ -25,6 +27,8 @@ on: paths: - 'reference/PCF-SIG-v1.0/**' - 'implementations/**/pcf-sig/**' + - 'reference/PCF-DCP-v1.0/**' + - 'implementations/**/pcf-dcp/**' - 'implementations/ts/package.json' - 'implementations/ts/package-lock.json' - 'implementations/dotnet/Directory.Build.props' @@ -169,3 +173,111 @@ jobs: /tmp/ts.bin /tmp/php.bin /tmp/dotnet.bin + + cross-port-byte-exact-dcp: + name: all DCP writers produce identical bytes + runs-on: ubuntu-latest + # Expected SHA-256 of the canonical 700-byte DCP vector (spec Section 17). + env: + EXPECTED_SHA256: b9bb59794abed008863063886d8d0daa810c44939c1c5d29449475ced8156b90 + steps: + - uses: actions/checkout@v4 + + - uses: dtolnay/rust-toolchain@stable + - uses: Swatinem/rust-cache@v2 + + - uses: actions/setup-node@v4 + with: + node-version: 22 + cache: npm + cache-dependency-path: implementations/ts/package-lock.json + + - uses: shivammathur/setup-php@v2 + with: + php-version: '8.3' + extensions: hash, mbstring + coverage: none + tools: composer:v2 + + - uses: actions/setup-dotnet@v4 + with: + dotnet-version: '8.0.x' + + # ---- Rust reference writer -------------------------------------------- + - name: Generate Rust reference vector + run: cargo run -p pcf-dcp --example gen_testvector -- /tmp/rust.bin + + # ---- TypeScript writer ------------------------------------------------ + - name: Install npm deps and build pcf + working-directory: implementations/ts + run: | + npm ci + npm run build -w @kduma-oss/pcf + - name: Generate TS vector + working-directory: implementations/ts + run: npm run gen-testvector -w @kduma-oss/pcf-dcp -- /tmp/ts.bin + + # ---- PHP writer ------------------------------------------------------- + - name: Install composer deps + working-directory: implementations/php/pcf-dcp + run: composer install --prefer-dist --no-progress --no-interaction + - name: Generate PHP vector + working-directory: implementations/php/pcf-dcp + run: php examples/gen_testvector.php /tmp/php.bin + + # ---- .NET writer ------------------------------------------------------ + - name: Generate .NET vector + run: | + mkdir -p /tmp/dotnet-gen + cat > /tmp/dotnet-gen/GenTestVector.csproj <<'EOF' + + + Exe + net8.0 + disable + + + + + + EOF + cat > /tmp/dotnet-gen/Program.cs <<'EOF' + using System.IO; + using Pcf.Dcp; + File.WriteAllBytes(args[0], ReferenceVector.Build()); + EOF + dotnet run --project /tmp/dotnet-gen/GenTestVector.csproj -c Release -- /tmp/dotnet.bin + + # ---- Compare ---------------------------------------------------------- + - name: All four DCP writers agree on byte-exact output + run: | + set -euo pipefail + ls -l /tmp/rust.bin /tmp/ts.bin /tmp/php.bin /tmp/dotnet.bin + fail=0 + for f in /tmp/rust.bin /tmp/ts.bin /tmp/php.bin /tmp/dotnet.bin; do + d=$(sha256sum "$f" | awk '{print $1}') + echo "$f -> $d" + if [ "$d" != "$EXPECTED_SHA256" ]; then + echo "::error::$f sha256 = $d (expected $EXPECTED_SHA256)" + fail=1 + fi + if ! cmp -s /tmp/rust.bin "$f"; then + echo "::error::$f differs from /tmp/rust.bin" + fail=1 + fi + done + if [ "$fail" != "0" ]; then + echo "Cross-port DCP writer interop FAILED" + exit 1 + fi + echo "All four DCP writers produced sha256 = $EXPECTED_SHA256" + + - uses: actions/upload-artifact@v4 + if: always() + with: + name: cross-port-vectors-dcp + path: | + /tmp/rust.bin + /tmp/ts.bin + /tmp/php.bin + /tmp/dotnet.bin diff --git a/.github/workflows/dotnet-ci.yml b/.github/workflows/dotnet-ci.yml index 25e7c2d..f47abd8 100644 --- a/.github/workflows/dotnet-ci.yml +++ b/.github/workflows/dotnet-ci.yml @@ -6,6 +6,7 @@ on: paths: - 'implementations/dotnet/pcf/**' - 'implementations/dotnet/pcf-sig/**' + - 'implementations/dotnet/pcf-dcp/**' - 'implementations/dotnet/Directory.Build.props' - '.github/workflows/dotnet-ci.yml' pull_request: @@ -13,6 +14,7 @@ on: paths: - 'implementations/dotnet/pcf/**' - 'implementations/dotnet/pcf-sig/**' + - 'implementations/dotnet/pcf-dcp/**' - 'implementations/dotnet/Directory.Build.props' - '.github/workflows/dotnet-ci.yml' @@ -24,7 +26,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] - package: [pcf, pcf-sig] + package: [pcf, pcf-sig, pcf-dcp] defaults: run: working-directory: implementations/dotnet/${{ matrix.package }} diff --git a/.github/workflows/php-split.yml b/.github/workflows/php-split.yml index af99241..72a01bb 100644 --- a/.github/workflows/php-split.yml +++ b/.github/workflows/php-split.yml @@ -31,6 +31,7 @@ jobs: package: - { dir: 'implementations/php/pcf', repo: 'PHP-PCF-lib' } - { dir: 'implementations/php/pcf-sig', repo: 'PHP-PCF-SIG-lib' } + - { dir: 'implementations/php/pcf-dcp', repo: 'PHP-PCF-DCP-lib' } steps: - uses: actions/checkout@v4 with: diff --git a/.github/workflows/php.yml b/.github/workflows/php.yml index a40b41c..bd5c8db 100644 --- a/.github/workflows/php.yml +++ b/.github/workflows/php.yml @@ -14,7 +14,7 @@ jobs: fail-fast: false matrix: php: ['8.1', '8.2', '8.3', '8.4'] - package: [pcf, pcf-sig] + package: [pcf, pcf-sig, pcf-dcp] defaults: run: working-directory: implementations/php/${{ matrix.package }} @@ -47,6 +47,7 @@ jobs: include: - { package: pcf, output: pcf_testvector.bin, expected: 395 } - { package: pcf-sig, output: pcfsig_testvector.bin, expected: 966 } + - { package: pcf-dcp, output: pcf_dcp_testvector.bin, expected: 700 } defaults: run: working-directory: implementations/php/${{ matrix.package }} diff --git a/.github/workflows/release-prepare.yml b/.github/workflows/release-prepare.yml index 95bfe21..9f8df34 100644 --- a/.github/workflows/release-prepare.yml +++ b/.github/workflows/release-prepare.yml @@ -79,14 +79,21 @@ jobs: sed -i 's/^version = "[^"]*"/version = "'"$NEW"'"/' reference/PCF-DCP-v1.0/Cargo.toml sed -i 's/^version = "[^"]*"/version = "'"$NEW"'"/' tools/pcf-debug/Cargo.toml sed -i 's/^version = "[^"]*"/version = "'"$NEW"'"/' tools/pcf-compact/Cargo.toml + sed -i 's/^version = "[^"]*"/version = "'"$NEW"'"/' tools/pcf-sig/Cargo.toml # path-dep version pins on pcf sed -i 's|pcf = { path = "\.\./PCF-v1.0", version = "[^"]*" }|pcf = { path = "../PCF-v1.0", version = "'"$NEW"'" }|' reference/PFS-MS-v1.0/Cargo.toml sed -i 's|pcf = { path = "\.\./PCF-v1.0", version = "[^"]*" }|pcf = { path = "../PCF-v1.0", version = "'"$NEW"'" }|' reference/PCF-SIG-v1.0/Cargo.toml sed -i 's|pcf = { path = "\.\./PCF-v1.0", version = "[^"]*" }|pcf = { path = "../PCF-v1.0", version = "'"$NEW"'" }|' reference/PCF-DCP-v1.0/Cargo.toml + # PFS-MS also pins the PCF-SIG library and the pcf-sig CLI library + sed -i 's|pcf-sig = { path = "\.\./PCF-SIG-v1.0", version = "[^"]*" }|pcf-sig = { path = "../PCF-SIG-v1.0", version = "'"$NEW"'" }|' reference/PFS-MS-v1.0/Cargo.toml + sed -i 's|pcf-sig-cli = { path = "\.\./\.\./tools/pcf-sig", version = "[^"]*" }|pcf-sig-cli = { path = "../../tools/pcf-sig", version = "'"$NEW"'" }|' reference/PFS-MS-v1.0/Cargo.toml sed -i 's|pcf = { path = "\.\./\.\./reference/PCF-v1.0", version = "[^"]*" }|pcf = { path = "../../reference/PCF-v1.0", version = "'"$NEW"'" }|' tools/pcf-debug/Cargo.toml sed -i 's|pcf-sig = { path = "\.\./\.\./reference/PCF-SIG-v1.0", version = "[^"]*" }|pcf-sig = { path = "../../reference/PCF-SIG-v1.0", version = "'"$NEW"'" }|' tools/pcf-debug/Cargo.toml sed -i 's|pcf-dcp = { path = "\.\./\.\./reference/PCF-DCP-v1.0", version = "[^"]*" }|pcf-dcp = { path = "../../reference/PCF-DCP-v1.0", version = "'"$NEW"'" }|' tools/pcf-debug/Cargo.toml sed -i 's|pcf = { path = "\.\./\.\./reference/PCF-v1.0", version = "[^"]*" }|pcf = { path = "../../reference/PCF-v1.0", version = "'"$NEW"'" }|' tools/pcf-compact/Cargo.toml + # pcf-sig CLI pins on pcf and pcf-sig + sed -i 's|pcf = { path = "\.\./\.\./reference/PCF-v1.0", version = "[^"]*" }|pcf = { path = "../../reference/PCF-v1.0", version = "'"$NEW"'" }|' tools/pcf-sig/Cargo.toml + sed -i 's|pcf-sig = { path = "\.\./\.\./reference/PCF-SIG-v1.0", version = "[^"]*" }|pcf-sig = { path = "../../reference/PCF-SIG-v1.0", version = "'"$NEW"'" }|' tools/pcf-sig/Cargo.toml - name: Bump TypeScript packages shell: bash @@ -102,6 +109,13 @@ jobs: sed -i 's|"kduma/pcf": "\^[^"]*"|"kduma/pcf": "^'"$NEW"'"|' implementations/php/pcf-sig/composer.json sed -i 's|"versions": { "kduma/pcf": "[^"]*" }|"versions": { "kduma/pcf": "'"$NEW"'" }|' implementations/php/pcf-sig/composer.json + - name: Bump PHP pcf-dcp dependency on pcf + shell: bash + run: | + NEW='${{ steps.version.outputs.version }}' + sed -i 's|"kduma/pcf": "\^[^"]*"|"kduma/pcf": "^'"$NEW"'"|' implementations/php/pcf-dcp/composer.json + sed -i 's|"versions": { "kduma/pcf": "[^"]*" }|"versions": { "kduma/pcf": "'"$NEW"'" }|' implementations/php/pcf-dcp/composer.json + - name: Bump .NET Directory.Build.props shell: bash run: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index be1a07c..1b45644 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -190,6 +190,7 @@ jobs: - run: npm ci - run: npm run build -w @kduma-oss/pcf - run: npm run build -w @kduma-oss/pcf-sig + - run: npm run build -w @kduma-oss/pcf-dcp - name: npm publish pcf (OIDC trusted publishing, auto-provenance) run: | if [ "${{ needs.resolve.outputs.dry_run }}" = "true" ]; then @@ -204,6 +205,13 @@ jobs: else npm publish -w @kduma-oss/pcf-sig --access public fi + - name: npm publish pcf-dcp + run: | + if [ "${{ needs.resolve.outputs.dry_run }}" = "true" ]; then + npm publish -w @kduma-oss/pcf-dcp --access public --dry-run + else + npm publish -w @kduma-oss/pcf-dcp --access public + fi publish-nuget: name: Publish to NuGet @@ -291,6 +299,49 @@ jobs: name: nuget-package-sig path: implementations/dotnet/pcf-sig/out/*.nupkg + publish-nuget-dcp: + name: Publish KDuma.Pcf.Dcp to NuGet + needs: [resolve, publish-nuget] + runs-on: ubuntu-latest + permissions: + id-token: write + contents: read + defaults: + run: + working-directory: implementations/dotnet/pcf-dcp + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-dotnet@v4 + with: + dotnet-version: '8.0.x' + - run: dotnet restore + - name: dotnet pack + run: | + dotnet pack src/Pcf.Dcp/Pcf.Dcp.csproj \ + -c Release \ + -p:Version='${{ needs.resolve.outputs.version }}' \ + -o out + - name: NuGet login (OIDC trusted publishing) + id: nuget-login + if: needs.resolve.outputs.dry_run != 'true' + uses: NuGet/login@v1 + with: + user: krystianduma + - name: dotnet nuget push + if: needs.resolve.outputs.dry_run != 'true' + run: | + dotnet nuget push out/*.nupkg \ + --source https://api.nuget.org/v3/index.json \ + --api-key '${{ steps.nuget-login.outputs.NUGET_API_KEY }}' \ + --skip-duplicate + - name: Dry-run note + if: needs.resolve.outputs.dry_run == 'true' + run: 'echo "Dry-run - skipping dotnet nuget push. Package would be out/*.nupkg."' + - uses: actions/upload-artifact@v4 + with: + name: nuget-package-dcp + path: implementations/dotnet/pcf-dcp/out/*.nupkg + split-php: name: Split PHP to packagist source repo needs: resolve diff --git a/.github/workflows/ts-ci.yml b/.github/workflows/ts-ci.yml index 80ae7fb..2ff054a 100644 --- a/.github/workflows/ts-ci.yml +++ b/.github/workflows/ts-ci.yml @@ -24,6 +24,7 @@ jobs: - run: npm ci - run: npm run build -w @kduma-oss/pcf - run: npm run build -w @kduma-oss/pcf-sig + - run: npm run build -w @kduma-oss/pcf-dcp test: name: test (${{ matrix.os }}) @@ -41,10 +42,11 @@ jobs: cache-dependency-path: implementations/ts/package-lock.json - run: npm ci - run: npm test -w @kduma-oss/pcf - # pcf-sig imports the compiled @kduma-oss/pcf dist/; build pcf first - # so the workspace dependency resolves before vitest runs. + # pcf-sig and pcf-dcp import the compiled @kduma-oss/pcf dist/; build pcf + # first so the workspace dependency resolves before vitest runs. - run: npm run build -w @kduma-oss/pcf - run: npm test -w @kduma-oss/pcf-sig + - run: npm test -w @kduma-oss/pcf-dcp test-vector: name: regenerate spec test vector @@ -71,12 +73,19 @@ jobs: run: | ls -l pcf-sig/pcfsig_testvector.bin test "$(wc -c < pcf-sig/pcfsig_testvector.bin)" = "966" + - name: Build and run the PCF-DCP test-vector example + run: npm run gen-testvector -w @kduma-oss/pcf-dcp -- pcf_dcp_testvector.bin + - name: Inspect PCF-DCP test vector + run: | + ls -l pcf-dcp/pcf_dcp_testvector.bin + test "$(wc -c < pcf-dcp/pcf_dcp_testvector.bin)" = "700" - uses: actions/upload-artifact@v4 with: name: pcf-testvector-ts path: | implementations/ts/pcf/pcf_testvector.bin implementations/ts/pcf-sig/pcfsig_testvector.bin + implementations/ts/pcf-dcp/pcf_dcp_testvector.bin coverage: name: code coverage @@ -95,9 +104,12 @@ jobs: run: npm run build -w @kduma-oss/pcf - name: Generate PCF-SIG coverage report (enforces >=90% line / 100% function) run: npm run coverage -w @kduma-oss/pcf-sig + - name: Generate PCF-DCP coverage report (enforces >=85% line / 90% function) + run: npm run coverage -w @kduma-oss/pcf-dcp - uses: actions/upload-artifact@v4 with: name: coverage-lcov-ts path: | implementations/ts/pcf/coverage/lcov.info implementations/ts/pcf-sig/coverage/lcov.info + implementations/ts/pcf-dcp/coverage/lcov.info diff --git a/implementations/dotnet/pcf-dcp/README.md b/implementations/dotnet/pcf-dcp/README.md new file mode 100644 index 0000000..0b77df8 --- /dev/null +++ b/implementations/dotnet/pcf-dcp/README.md @@ -0,0 +1,62 @@ +# KDuma.Pcf.Dcp — PCF Dynamic Container Partition (.NET) + +.NET reader/writer for **PCF-DCP v1.0**, an application-level profile that adds +*dynamic*, fragmentable, dedup-friendly sub-partitions to the +[Partitioned Container Format](../pcf) without modifying the PCF byte container. + +This package mirrors the written specification (`PCF-DCP-spec-v1.0.txt`) and the +Rust reference implementation field-for-field, and ships the same byte-exact +700-byte canonical test vector as every other port. It has no cryptographic +dependency — data/table hashing comes from the base `KDuma.Pcf` package. + +## Model at a glance + +One new PCF partition type is defined: + +| Type | Name | Holds | +|--------------|-----------------|----------------------------------------------------| +| `0xAAAC0001` | `DCP_CONTAINER` | An *arena*: a header, an inner partition table, fragment tables, and data extents | + +``` +arena: +[ DCP Header (24 B) | data extents | Fragment Tables | Inner Table Block(s) ] +``` + +Each inner partition's logical content is the concatenation of its DATA extents; +its data hash covers that logical content, so fragmentation, deduplication, +compaction, and promotion all leave the hash (and any PCF-SIG signature over it) +unchanged. A generic PCF reader sees a DCP file as **one opaque partition**; only +a DCP-aware reader looks inside. + +## Example + +```csharp +using System.IO; +using Pcf; +using Pcf.Dcp; + +var arena = new Arena(); +arena.AddInner(0x10, Uid(0xA1), "A", Bytes("Hello, World!"), HashAlgo.Sha256, Chunker.Fixed(7)); +arena.AddInner(0x10, Uid(0xB2), "B", Bytes("World!"), HashAlgo.Sha256, Chunker.Whole()); + +var w = new DcpWriter(); +w.AddContainer(Uid(0xDC), "dcp", arena); +byte[] image = w.ToImage(); + +var r = DcpReader.Open(new MemoryStream(image)); +r.Verify(); +// System.Text.Encoding.UTF8.GetString(r.ReadInner(Uid(0xB2))) == "World!" +``` + +## Operations + +`Arena` supports content-defined deduplication, copy-on-write edits +(`Append` / `Insert` / `Overwrite` / `Delete` / `Truncate`), and +sharing-preserving `Compact`. `DcpWriter` adds **promotion** (`Promote`, +dynamic → fixed) and **demotion** (`Demote`, fixed → dynamic), each preserving +`uid`, `PartitionType`, `Label`, `DataHashAlgo`, and `DataHash` — the promotion +invariant, identical to the fields a PCF-SIG signature protects. + +## Licence + +MIT OR Apache-2.0. diff --git a/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/Arena.cs b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/Arena.cs new file mode 100644 index 0000000..e68f0eb --- /dev/null +++ b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/Arena.cs @@ -0,0 +1,848 @@ +using System; +using System.Collections.Generic; +using Pcf; + +namespace Pcf.Dcp; + +/// +/// The DCP arena: the in-memory model of one DCP container and its canonical +/// byte serialisation. +/// +/// An holds a byte pool plus a list of inner partitions, +/// each owning a list of fragments. A fragment addresses a byte range in the +/// pool; two fragments addressing the same range share that extent +/// (deduplication, spec Section 10.2). Edits work on the fragment list and +/// append new bytes to the pool, never overwriting bytes a SHARED extent still +/// names (copy-on-write, spec Section 10.1). always emits +/// the canonical layout of the spec's Section 17 test vector. +/// +public sealed class Arena +{ + private sealed class Frag + { + public int Offset; + public int Length; + public byte Kind; + public bool Shared; + } + + private sealed class Inner + { + public uint PartitionType; + public byte[] Uid; + public byte[] Label; + public HashAlgo DataHashAlgo; + public List Frags; + } + + private byte ProfileVersionMajor = Constants.ProfileVersionMajor; + private byte ProfileVersionMinor = Constants.ProfileVersionMinor; + private ushort Flags; + private HashAlgo _innerTableAlgo = HashAlgo.Sha256; + private byte[] _blob = Array.Empty(); + private int _blobLen; + private readonly List _inners = new List(); + + /// Choose the hash algorithm used for inner Table Blocks (default SHA-256). + public Arena WithInnerTableAlgo(HashAlgo algo) + { + _innerTableAlgo = algo; + return this; + } + + // ---- byte pool --------------------------------------------------------- + + private int AppendBlob(byte[] data) + { + int start = _blobLen; + int end = start + data.Length; + if (end > _blob.Length) + { + int cap = _blob.Length == 0 ? 64 : _blob.Length; + while (cap < end) + { + cap *= 2; + } + var next = new byte[cap]; + Buffer.BlockCopy(_blob, 0, next, 0, _blobLen); + _blob = next; + } + Buffer.BlockCopy(data, 0, _blob, start, data.Length); + _blobLen = end; + return start; + } + + private bool BlobEquals(int off, int len, byte[] chunk) + { + if (len != chunk.Length) + { + return false; + } + for (int i = 0; i < len; i++) + { + if (_blob[off + i] != chunk[i]) + { + return false; + } + } + return true; + } + + // ---- parsing ----------------------------------------------------------- + + /// Parse an arena from its on-disk bytes (spec Sections 6–8). + public static Arena Parse(byte[] bytes) + { + DcpHeader header = DcpHeader.Read(bytes); + if (header.ProfileVersionMajor != Constants.ProfileVersionMajor) + { + throw PcfDcpException.UnsupportedProfileMajor(header.ProfileVersionMajor); + } + ulong arenaUsed = header.ArenaUsed; + + var arena = new Arena + { + ProfileVersionMajor = header.ProfileVersionMajor, + ProfileVersionMinor = header.ProfileVersionMinor, + Flags = header.Flags, + _blob = (byte[])bytes.Clone(), + _blobLen = bytes.Length, + }; + + bool firstBlock = true; + ulong off = header.InnerTableOffset; + int budget = bytes.Length / (int)Pcf.Constants.TableHeaderSize + 1; + while (off != Constants.ArenaNone) + { + if (budget == 0) + { + throw PcfDcpException.OffsetOutOfRange(); + } + budget -= 1; + int baseOff = checked((int)off); + if (baseOff + (int)Pcf.Constants.TableHeaderSize > bytes.Length) + { + throw PcfDcpException.OffsetOutOfRange(); + } + var hb = new byte[(int)Pcf.Constants.TableHeaderSize]; + Buffer.BlockCopy(bytes, baseOff, hb, 0, hb.Length); + var h = TableBlockHeader.FromBytes(hb); + if (firstBlock) + { + arena._innerTableAlgo = h.TableHashAlgo; + firstBlock = false; + } + for (int i = 0; i < h.PartitionCount; i++) + { + int eo = baseOff + (int)Pcf.Constants.TableHeaderSize + i * (int)Pcf.Constants.EntrySize; + if (eo + (int)Pcf.Constants.EntrySize > bytes.Length) + { + throw PcfDcpException.OffsetOutOfRange(); + } + var eb = new byte[(int)Pcf.Constants.EntrySize]; + Buffer.BlockCopy(bytes, eo, eb, 0, eb.Length); + var entry = PartitionEntry.FromBytes(eb); + var onDisk = FragmentTable.Walk(bytes, entry.StartOffset); + var frags = new List(onDisk.Count); + foreach (var fe in onDisk) + { + frags.Add(new Frag + { + Offset = checked((int)fe.ExtentOffset), + Length = checked((int)fe.ExtentLength), + Kind = fe.Kind, + Shared = fe.IsShared(), + }); + } + arena._inners.Add(new Inner + { + PartitionType = entry.PartitionType, + Uid = entry.Uid, + Label = entry.Label, + DataHashAlgo = entry.DataHashAlgo, + Frags = frags, + }); + } + off = h.NextTableOffset; + } + + foreach (var inner in arena._inners) + { + foreach (var f in inner.Frags) + { + if ((ulong)(f.Offset + f.Length) > arenaUsed) + { + throw PcfDcpException.OffsetOutOfRange(); + } + } + } + return arena; + } + + // ---- read-only views --------------------------------------------------- + + /// Number of inner partitions. + public int Count => _inners.Count; + + /// Whether the arena has no inner partitions. + public bool IsEmpty => _inners.Count == 0; + + /// The uids of all inner partitions, in stored order. + public List Uids() + { + var outList = new List(_inners.Count); + foreach (var i in _inners) + { + outList.Add((byte[])i.Uid.Clone()); + } + return outList; + } + + private int IndexOf(byte[] uid) + { + for (int i = 0; i < _inners.Count; i++) + { + if (BytesEqual(_inners[i].Uid, uid)) + { + return i; + } + } + throw PcfDcpException.NotFound(); + } + + private int InnerLogicalLen(Inner inner) + { + int total = 0; + foreach (var f in inner.Frags) + { + if (f.Kind == Constants.KindData) + { + total += f.Length; + } + } + return total; + } + + private byte[] InnerContent(Inner inner) + { + var outBytes = new byte[InnerLogicalLen(inner)]; + int p = 0; + foreach (var f in inner.Frags) + { + if (f.Kind == Constants.KindData) + { + Buffer.BlockCopy(_blob, f.Offset, outBytes, p, f.Length); + p += f.Length; + } + } + return outBytes; + } + + private byte[] InnerDataHash(Inner inner) => + inner.DataHashAlgo.Compute(InnerContent(inner)); + + private InnerInfo View(Inner inner) + { + var extents = new List(inner.Frags.Count); + foreach (var f in inner.Frags) + { + extents.Add(new ExtentInfo + { + ExtentOffset = (ulong)f.Offset, + ExtentLength = (ulong)f.Length, + Kind = f.Kind, + Shared = f.Shared, + }); + } + return new InnerInfo + { + PartitionType = inner.PartitionType, + Uid = (byte[])inner.Uid.Clone(), + Label = PartitionEntry.DecodeLabel(inner.Label), + UsedBytes = (ulong)InnerLogicalLen(inner), + DataHashAlgo = inner.DataHashAlgo, + DataHash = InnerDataHash(inner), + Extents = extents, + }; + } + + /// A read-only view of one inner partition. + public InnerInfo GetInner(byte[] uid) => View(_inners[IndexOf(uid)]); + + /// Read-only views of every inner partition, in stored order. + public List Inners() + { + var outList = new List(_inners.Count); + foreach (var i in _inners) + { + outList.Add(View(i)); + } + return outList; + } + + /// Reconstruct an inner partition's logical content (spec Section 8.3). + public byte[] Content(byte[] uid) + { + var inner = _inners[IndexOf(uid)]; + var bytes = InnerContent(inner); + int declared = InnerLogicalLen(inner); + if (bytes.Length != declared) + { + throw PcfDcpException.LengthMismatch(declared, bytes.Length); + } + return bytes; + } + + // ---- builder ----------------------------------------------------------- + + /// + /// Add an inner partition whose is split by + /// into extents, deduplicating against extents + /// already present (spec Section 10.2). + /// + public void AddInner( + uint partitionType, byte[] uid, string label, byte[] content, + HashAlgo dataHashAlgo, Chunker chunker) + { + if (partitionType == 0) + { + throw PcfDcpException.ReservedType(); + } + if (partitionType == Constants.DcpContainerType) + { + throw PcfDcpException.NestedContainer(); + } + if (BytesEqual(uid, Pcf.Constants.NilUid)) + { + throw PcfDcpException.NilUid(); + } + foreach (var i in _inners) + { + if (BytesEqual(i.Uid, uid)) + { + throw PcfDcpException.DuplicateUid(); + } + } + var labelBytes = PartitionEntry.EncodeLabel(label); + + var frags = new List(); + foreach (var chunk in SplitChunks(chunker, content)) + { + var hit = FindExtent(chunk); + if (hit == null) + { + hit = FindLocal(frags, chunk); + } + if (hit != null) + { + int offset = hit.Value.Item1; + int length = hit.Value.Item2; + MarkShared(offset, length); + foreach (var f in frags) + { + if (f.Offset == offset && f.Length == length) + { + f.Shared = true; + } + } + frags.Add(new Frag { Offset = offset, Length = length, Kind = Constants.KindData, Shared = true }); + } + else + { + int offset = AppendBlob(chunk); + frags.Add(new Frag { Offset = offset, Length = chunk.Length, Kind = Constants.KindData, Shared = false }); + } + } + _inners.Add(new Inner + { + PartitionType = partitionType, + Uid = (byte[])uid.Clone(), + Label = labelBytes, + DataHashAlgo = dataHashAlgo, + Frags = frags, + }); + } + + private static IEnumerable SplitChunks(Chunker chunker, byte[] content) + { + if (content.Length == 0) + { + yield break; + } + if (chunker.IsWhole) + { + yield return content; + yield break; + } + for (int i = 0; i < content.Length; i += chunker.Size) + { + int len = Math.Min(chunker.Size, content.Length - i); + var chunk = new byte[len]; + Buffer.BlockCopy(content, i, chunk, 0, len); + yield return chunk; + } + } + + private (int, int)? FindExtent(byte[] chunk) + { + if (chunk.Length == 0) + { + return null; + } + foreach (var inner in _inners) + { + foreach (var f in inner.Frags) + { + if (f.Kind == Constants.KindData && f.Length == chunk.Length && BlobEquals(f.Offset, f.Length, chunk)) + { + return (f.Offset, f.Length); + } + } + } + return null; + } + + private (int, int)? FindLocal(List frags, byte[] chunk) + { + if (chunk.Length == 0) + { + return null; + } + foreach (var f in frags) + { + if (f.Kind == Constants.KindData && f.Length == chunk.Length && BlobEquals(f.Offset, f.Length, chunk)) + { + return (f.Offset, f.Length); + } + } + return null; + } + + private void MarkShared(int offset, int length) + { + foreach (var inner in _inners) + { + foreach (var f in inner.Frags) + { + if (f.Offset == offset && f.Length == length) + { + f.Shared = true; + } + } + } + } + + // ---- logical edits (copy-on-write) ------------------------------------- + + /// Append to an inner partition's content. + public void Append(byte[] uid, byte[] bytes) + { + int idx = IndexOf(uid); + if (bytes.Length == 0) + { + return; + } + int offset = AppendBlob(bytes); + _inners[idx].Frags.Add(new Frag { Offset = offset, Length = bytes.Length, Kind = Constants.KindData, Shared = false }); + } + + /// Overwrite the logical range [pos, pos+len) with . + public void Overwrite(byte[] uid, int pos, int len, byte[] bytes) + { + Delete(uid, pos, len); + Insert(uid, pos, bytes); + } + + /// Insert at logical position . + public void Insert(byte[] uid, int pos, byte[] bytes) + { + int idx = IndexOf(uid); + int total = InnerLogicalLen(_inners[idx]); + if (pos > total) + { + throw PcfDcpException.PositionOutOfRange(); + } + if (bytes.Length == 0) + { + return; + } + int split = SplitAt(idx, pos); + int offset = AppendBlob(bytes); + _inners[idx].Frags.Insert(split, new Frag { Offset = offset, Length = bytes.Length, Kind = Constants.KindData, Shared = false }); + } + + /// Delete the logical range [pos, pos+len). + public void Delete(byte[] uid, int pos, int len) + { + int idx = IndexOf(uid); + int total = InnerLogicalLen(_inners[idx]); + int end = pos + len; + if (end > total) + { + throw PcfDcpException.PositionOutOfRange(); + } + if (len == 0) + { + return; + } + int lo = SplitAt(idx, pos); + int hi = SplitAt(idx, end); + _inners[idx].Frags.RemoveRange(lo, hi - lo); + } + + /// Truncate the partition's logical content to bytes. + public void Truncate(byte[] uid, int newLen) + { + int idx = IndexOf(uid); + int total = InnerLogicalLen(_inners[idx]); + if (newLen > total) + { + throw PcfDcpException.PositionOutOfRange(); + } + int cut = SplitAt(idx, newLen); + var frags = _inners[idx].Frags; + if (cut < frags.Count) + { + frags.RemoveRange(cut, frags.Count - cut); + } + } + + private int SplitAt(int idx, int pos) + { + var frags = _inners[idx].Frags; + int logical = 0; + int i = 0; + while (i < frags.Count) + { + var f = frags[i]; + int flen = f.Length; + if (logical == pos) + { + return i; + } + if (pos < logical + flen) + { + int head = pos - logical; + var left = new Frag { Offset = f.Offset, Length = head, Kind = f.Kind, Shared = f.Shared }; + var right = new Frag { Offset = f.Offset + head, Length = flen - head, Kind = f.Kind, Shared = f.Shared }; + frags[i] = left; + frags.Insert(i + 1, right); + return i + 1; + } + logical += flen; + i += 1; + } + return frags.Count; + } + + // ---- promotion support ------------------------------------------------- + + /// + /// Remove an inner partition, returning the pieces a promotion needs: its + /// type, label, hash algorithm, and reconstructed logical content. + /// + public (uint PartitionType, string Label, HashAlgo DataHashAlgo, byte[] Content) RemoveInner(byte[] uid) + { + int idx = IndexOf(uid); + var content = Content(uid); + var inner = _inners[idx]; + _inners.RemoveAt(idx); + return (inner.PartitionType, PartitionEntry.DecodeLabel(inner.Label), inner.DataHashAlgo, content); + } + + // ---- deduplication and compaction -------------------------------------- + + /// + /// Re-chunk every inner partition with and + /// deduplicate identical extents across the whole arena. Returns the + /// estimated number of bytes the pool shrank by once re-serialised. + /// + public long Dedup(Chunker chunker) + { + long before = CanonicalExtentBytes(); + var rebuilt = new Arena + { + ProfileVersionMajor = ProfileVersionMajor, + ProfileVersionMinor = ProfileVersionMinor, + Flags = Flags, + _innerTableAlgo = _innerTableAlgo, + }; + foreach (var inner in _inners) + { + rebuilt.AddInner( + inner.PartitionType, inner.Uid, PartitionEntry.DecodeLabel(inner.Label), + InnerContent(inner), inner.DataHashAlgo, chunker); + } + _blob = rebuilt._blob; + _blobLen = rebuilt._blobLen; + _inners.Clear(); + _inners.AddRange(rebuilt._inners); + long after = CanonicalExtentBytes(); + return Math.Max(0, before - after); + } + + /// + /// Compact the arena (spec Section 10.3): drop unreferenced pool bytes and + /// normalise the SHARED flag, clearing it on any extent now referenced + /// exactly once (rule F2). Returns the number of dead pool bytes reclaimed. + /// + public long Compact() + { + var refcount = new Dictionary<(int, int), int>(); + foreach (var inner in _inners) + { + foreach (var f in inner.Frags) + { + var k = (f.Offset, f.Length); + refcount[k] = refcount.TryGetValue(k, out int c) ? c + 1 : 1; + } + } + foreach (var inner in _inners) + { + foreach (var f in inner.Frags) + { + if (refcount[(f.Offset, f.Length)] <= 1) + { + f.Shared = false; + } + } + } + long liveBytes = 0; + foreach (var k in refcount.Keys) + { + liveBytes += k.Item2; + } + long deadBefore = Math.Max(0, _blobLen - liveBytes); + + var newPool = new Arena(); + var remap = new Dictionary<(int, int), int>(); + foreach (var inner in _inners) + { + foreach (var f in inner.Frags) + { + var k = (f.Offset, f.Length); + if (!remap.ContainsKey(k)) + { + var region = new byte[f.Length]; + Buffer.BlockCopy(_blob, f.Offset, region, 0, f.Length); + remap[k] = newPool.AppendBlob(region); + } + } + } + foreach (var inner in _inners) + { + foreach (var f in inner.Frags) + { + f.Offset = remap[(f.Offset, f.Length)]; + } + } + _blob = newPool._blob; + _blobLen = newPool._blobLen; + return deadBefore; + } + + private long CanonicalExtentBytes() + { + var seen = new HashSet<(int, int)>(); + long total = 0; + foreach (var inner in _inners) + { + foreach (var f in inner.Frags) + { + if (seen.Add((f.Offset, f.Length))) + { + total += f.Length; + } + } + } + return total; + } + + // ---- canonical serialisation ------------------------------------------- + + /// Serialise the arena into its canonical on-disk layout (spec Section 17). + public byte[] ToBytes() + { + var extOrder = new List<(int, int)>(); + var extIndex = new Dictionary<(int, int), int>(); + foreach (var inner in _inners) + { + foreach (var f in inner.Frags) + { + var k = (f.Offset, f.Length); + if (!extIndex.ContainsKey(k)) + { + extIndex[k] = extOrder.Count; + extOrder.Add(k); + } + } + } + + int cur = Constants.DcpHeaderSize; + var extArenaOff = new int[extOrder.Count]; + for (int i = 0; i < extOrder.Count; i++) + { + extArenaOff[i] = cur; + cur += extOrder[i].Item2; + } + + var fragOff = new int[_inners.Count]; + for (int ii = 0; ii < _inners.Count; ii++) + { + fragOff[ii] = cur; + cur += FragtableSpan(_inners[ii].Frags.Count); + } + + int innerTableOffset = cur; + var counts = BlockCounts(_inners.Count); + var blockOff = new int[counts.Count]; + for (int b = 0; b < counts.Count; b++) + { + blockOff[b] = cur; + cur += (int)Pcf.Constants.TableHeaderSize + counts[b] * (int)Pcf.Constants.EntrySize; + } + int arenaUsed = cur; + + var buf = new byte[arenaUsed]; + + var header = new DcpHeader + { + ProfileVersionMajor = ProfileVersionMajor, + ProfileVersionMinor = ProfileVersionMinor, + Flags = Flags, + InnerTableOffset = (ulong)innerTableOffset, + ArenaUsed = (ulong)arenaUsed, + }; + Buffer.BlockCopy(header.ToBytes(), 0, buf, 0, Constants.DcpHeaderSize); + + for (int i = 0; i < extOrder.Count; i++) + { + Buffer.BlockCopy(_blob, extOrder[i].Item1, buf, extArenaOff[i], extOrder[i].Item2); + } + + for (int ii = 0; ii < _inners.Count; ii++) + { + WriteFragmentTable(buf, fragOff[ii], _inners[ii].Frags, extIndex, extArenaOff); + } + + var entries = new List(_inners.Count); + for (int ii = 0; ii < _inners.Count; ii++) + { + var inner = _inners[ii]; + ulong used = (ulong)InnerLogicalLen(inner); + entries.Add(new PartitionEntry + { + PartitionType = inner.PartitionType, + Uid = (byte[])inner.Uid.Clone(), + Label = (byte[])inner.Label.Clone(), + StartOffset = (ulong)fragOff[ii], + MaxLength = used, + UsedBytes = used, + DataHashAlgo = inner.DataHashAlgo, + DataHash = InnerDataHash(inner), + }); + } + + int idx = 0; + for (int b = 0; b < counts.Count; b++) + { + int c = counts[b]; + ulong next = b + 1 < counts.Count ? (ulong)blockOff[b + 1] : 0; + var slice = entries.GetRange(idx, c); + var th = TableBlockHeader.ComputeTableHash(_innerTableAlgo, next, slice); + var bh = new TableBlockHeader + { + PartitionCount = (byte)c, + NextTableOffset = next, + TableHashAlgo = _innerTableAlgo, + TableHash = th, + }; + int p = blockOff[b]; + Buffer.BlockCopy(bh.ToBytes(), 0, buf, p, (int)Pcf.Constants.TableHeaderSize); + p += (int)Pcf.Constants.TableHeaderSize; + foreach (var e in slice) + { + Buffer.BlockCopy(e.ToBytes(), 0, buf, p, (int)Pcf.Constants.EntrySize); + p += (int)Pcf.Constants.EntrySize; + } + idx += c; + } + + return buf; + } + + private static int FragtableSpan(int n) + { + int span = 0; + foreach (int c in BlockCounts(n)) + { + span += Constants.FragTableHeaderSize + c * Constants.FragmentEntrySize; + } + return span; + } + + private static List BlockCounts(int n) + { + if (n == 0) + { + return new List { 0 }; + } + var outList = new List(); + int rem = n; + while (rem > 0) + { + int c = Math.Min(rem, Constants.MaxEntriesPerBlock); + outList.Add(c); + rem -= c; + } + return outList; + } + + private static void WriteFragmentTable( + byte[] buf, int start, List frags, + Dictionary<(int, int), int> extIndex, int[] extArenaOff) + { + var counts = BlockCounts(frags.Count); + int blockStart = start; + int idx = 0; + for (int b = 0; b < counts.Count; b++) + { + int c = counts[b]; + int span = Constants.FragTableHeaderSize + c * Constants.FragmentEntrySize; + ulong next = b + 1 < counts.Count ? (ulong)(blockStart + span) : 0; + var fh = new FragTableHeader { NextFragtableOffset = next, FragmentCount = (byte)c }; + Buffer.BlockCopy(fh.ToBytes(), 0, buf, blockStart, Constants.FragTableHeaderSize); + for (int j = 0; j < c; j++) + { + var f = frags[idx + j]; + int arenaOff = extArenaOff[extIndex[(f.Offset, f.Length)]]; + var fe = new FragmentEntry + { + ExtentOffset = (ulong)arenaOff, + ExtentLength = (ulong)f.Length, + Kind = f.Kind, + Flags = f.Shared ? Constants.FlagShared : (byte)0, + }; + Buffer.BlockCopy(fe.ToBytes(), 0, buf, blockStart + Constants.FragTableHeaderSize + j * Constants.FragmentEntrySize, Constants.FragmentEntrySize); + } + blockStart += span; + idx += c; + } + } + + internal static bool BytesEqual(byte[] a, byte[] b) + { + if (a.Length != b.Length) + { + return false; + } + for (int i = 0; i < a.Length; i++) + { + if (a[i] != b[i]) + { + return false; + } + } + return true; + } +} diff --git a/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/Chunker.cs b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/Chunker.cs new file mode 100644 index 0000000..f4260da --- /dev/null +++ b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/Chunker.cs @@ -0,0 +1,26 @@ +namespace Pcf.Dcp; + +/// +/// How a Writer splits an inner partition's content into extents (spec Section +/// 10.2; chunking is writer-side policy). +/// +public sealed class Chunker +{ + /// Whether this chunker emits one extent for the whole content. + public bool IsWhole { get; } + + /// Fixed chunk size in bytes (meaningful only when not whole). + public int Size { get; } + + private Chunker(bool whole, int size) + { + IsWhole = whole; + Size = size; + } + + /// One extent for the whole content. + public static Chunker Whole() => new Chunker(true, 0); + + /// Fixed-size chunks of bytes (0 = whole). + public static Chunker Fixed(int n) => new Chunker(n <= 0, n); +} diff --git a/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/Constants.cs b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/Constants.cs new file mode 100644 index 0000000..a0ddd45 --- /dev/null +++ b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/Constants.cs @@ -0,0 +1,56 @@ +namespace Pcf.Dcp; + +/// +/// On-disk constants defined by PCF-DCP v1.0 (spec Appendix A and B). Every +/// value here is normative. +/// +public static class Constants +{ + /// PCF partition type carrying one DCP arena. + public const uint DcpContainerType = 0xAAAC_0001; + + /// First value reserved by this profile for future types. + public const uint DcpTypeReservedLo = 0xAAAC_0000; + + /// Last value reserved by this profile. + public const uint DcpTypeReservedHi = 0xAAAC_00FF; + + /// 4-byte magic at the start of a DCP arena: "PDCP". + public static readonly byte[] DcpMagic = { 0x50, 0x44, 0x43, 0x50 }; + + /// PCF-DCP profile version implemented by this library (major). + public const byte ProfileVersionMajor = 1; + + /// PCF-DCP profile version implemented by this library (minor). + public const byte ProfileVersionMinor = 0; + + /// Fixed size of the DCP Header, in bytes (spec Section 6). + public const int DcpHeaderSize = 24; + + /// Fixed size of a Fragment Table block header, in bytes. + public const int FragTableHeaderSize = 9; + + /// Fixed size of one Fragment Entry, in bytes. + public const int FragmentEntrySize = 18; + + /// Fragment Entry kind: RESERVED / INVALID guard. + public const byte KindInvalid = 0; + + /// Fragment Entry kind: DATA — literal content (only kind in v1.0). + public const byte KindData = 1; + + /// Fragment Entry kind: HOLE (RESERVED). + public const byte KindHole = 2; + + /// Fragment Entry kind: REF (RESERVED). + public const byte KindRef = 3; + + /// Fragment Entry flags bit 0: SHARED (copy-on-write required). + public const byte FlagShared = 0x01; + + /// The arena-relative offset value reserved as "none" / terminator. + public const ulong ArenaNone = 0; + + /// Max entries per (inner) Table Block and extents per Fragment block. + public const int MaxEntriesPerBlock = 255; +} diff --git a/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/DcpHeader.cs b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/DcpHeader.cs new file mode 100644 index 0000000..17b03b2 --- /dev/null +++ b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/DcpHeader.cs @@ -0,0 +1,67 @@ +using System; + +namespace Pcf.Dcp; + +/// The fixed 24-byte DCP Header at arena offset 0 (spec Section 6). +public sealed class DcpHeader +{ + /// PCF-DCP profile major version. + public byte ProfileVersionMajor { get; set; } + + /// PCF-DCP profile minor version. + public byte ProfileVersionMinor { get; set; } + + /// Reserved; MUST be 0 in v1.0. + public ushort Flags { get; set; } + + /// Arena-relative offset of the first Inner Table Block (0 = none). + public ulong InnerTableOffset { get; set; } + + /// Bump pointer: arena-relative offset of the first free byte. + public ulong ArenaUsed { get; set; } + + /// Serialise to the on-disk 24-byte layout. + public byte[] ToBytes() + { + var b = new byte[Constants.DcpHeaderSize]; + Buffer.BlockCopy(Constants.DcpMagic, 0, b, 0, 4); + b[4] = ProfileVersionMajor; + b[5] = ProfileVersionMinor; + LittleEndian.WriteU16(b, 6, Flags); + LittleEndian.WriteU64(b, 8, InnerTableOffset); + LittleEndian.WriteU64(b, 16, ArenaUsed); + return b; + } + + /// Parse from the on-disk 24-byte layout, validating the magic. + public static DcpHeader FromBytes(byte[] b) + { + for (int i = 0; i < 4; i++) + { + if (b[i] != Constants.DcpMagic[i]) + { + throw PcfDcpException.BadDcpMagic(); + } + } + return new DcpHeader + { + ProfileVersionMajor = b[4], + ProfileVersionMinor = b[5], + Flags = LittleEndian.ReadU16(b, 6), + InnerTableOffset = LittleEndian.ReadU64(b, 8), + ArenaUsed = LittleEndian.ReadU64(b, 16), + }; + } + + /// Read a DCP Header from the start of an arena byte array. + public static DcpHeader Read(byte[] arena) + { + if (arena.Length < Constants.DcpHeaderSize) + { + throw PcfDcpException.BadDcpMagic(); + } + var fixedBytes = new byte[Constants.DcpHeaderSize]; + Buffer.BlockCopy(arena, 0, fixedBytes, 0, Constants.DcpHeaderSize); + return FromBytes(fixedBytes); + } +} diff --git a/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/DcpReader.cs b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/DcpReader.cs new file mode 100644 index 0000000..1148eb0 --- /dev/null +++ b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/DcpReader.cs @@ -0,0 +1,235 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; +using Pcf; + +namespace Pcf.Dcp; + +/// An inner partition together with the container that holds it. +public sealed class InnerLocation +{ + /// uid of the enclosing DCP container partition. + public byte[] ContainerUid { get; set; } + + /// The inner partition's metadata and extents. + public InnerInfo Info { get; set; } +} + +/// The result of resolving a uid against top-level ∪ inner (spec 2.1). +public sealed class Resolved +{ + /// Whether the uid resolved to a top-level PCF partition. + public bool IsTopLevel { get; set; } + + /// The top-level entry (when is true). + public PartitionEntry Entry { get; set; } + + /// The inner partition location (when is false). + public InnerLocation Inner { get; set; } +} + +/// +/// A reader for DCP containers layered over a PCF file. It works entirely +/// through the high-level API, so a DCP file written in +/// trailer mode reads back transparently. +/// +public sealed class DcpReader +{ + private readonly Container _c; + + private DcpReader(Container c) + { + _c = c; + } + + /// Open a PCF file for DCP-aware reading. + public static DcpReader Open(Stream storage) => new DcpReader(Container.Open(storage)); + + /// Borrow the underlying PCF container. + public Container Container => _c; + + /// All top-level entries, in chain order. + public List Entries() => _c.Entries(); + + /// The top-level DCP container entries. + public List Containers() + { + var outList = new List(); + foreach (var e in _c.Entries()) + { + if (e.PartitionType == Constants.DcpContainerType) + { + outList.Add(e); + } + } + return outList; + } + + /// Parse the arena of a DCP container entry. + public Arena OpenArena(PartitionEntry entry) + { + if (entry.PartitionType != Constants.DcpContainerType) + { + throw PcfDcpException.NotADcpContainer(); + } + return Arena.Parse(_c.ReadPartitionData(entry)); + } + + /// Every inner partition across every DCP container, in file order. + public List InnerPartitions() + { + var outList = new List(); + foreach (var cont in Containers()) + { + var arena = OpenArena(cont); + foreach (var info in arena.Inners()) + { + outList.Add(new InnerLocation { ContainerUid = (byte[])cont.Uid.Clone(), Info = info }); + } + } + return outList; + } + + /// Resolve a uid against the flattened set top-level ∪ inner (spec 2.1). + public Resolved ResolveUid(byte[] uid) + { + foreach (var e in _c.Entries()) + { + if (Arena.BytesEqual(e.Uid, uid)) + { + return new Resolved { IsTopLevel = true, Entry = e }; + } + } + foreach (var loc in InnerPartitions()) + { + if (Arena.BytesEqual(loc.Info.Uid, uid)) + { + return new Resolved { IsTopLevel = false, Inner = loc }; + } + } + throw PcfDcpException.NotFound(); + } + + /// Reconstruct an inner partition's logical content by uid. + public byte[] ReadInner(byte[] uid) + { + foreach (var cont in Containers()) + { + var arena = OpenArena(cont); + foreach (var u in arena.Uids()) + { + if (Arena.BytesEqual(u, uid)) + { + return arena.Content(uid); + } + } + } + throw PcfDcpException.NotFound(); + } + + /// + /// Full DCP-aware verification: PCF integrity, each inner Table Block's + /// table_hash, reconstruction length and (when algorithmic) data_hash, no + /// nested container, and file-wide uid uniqueness. + /// + public void Verify() + { + _c.Verify(); + + var seen = new HashSet(); + foreach (var e in _c.Entries()) + { + if (!seen.Add(Hex(e.Uid))) + { + throw PcfDcpException.DuplicateUid(); + } + } + + foreach (var cont in Containers()) + { + var data = _c.ReadPartitionData(cont); + VerifyInnerTableHashes(data); + + var arena = Arena.Parse(data); + foreach (var info in arena.Inners()) + { + if (info.PartitionType == Constants.DcpContainerType) + { + throw PcfDcpException.NestedContainer(); + } + if (!seen.Add(Hex(info.Uid))) + { + throw PcfDcpException.DuplicateUid(); + } + var content = arena.Content(info.Uid); + if ((ulong)content.Length != info.UsedBytes) + { + throw PcfDcpException.LengthMismatch((long)info.UsedBytes, content.Length); + } + if (!info.DataHashAlgo.Verify(content, info.DataHash)) + { + throw PcfDcpException.HashMismatch(); + } + } + } + } + + private static void VerifyInnerTableHashes(byte[] arena) + { + DcpHeader header = DcpHeader.Read(arena); + ulong off = header.InnerTableOffset; + int budget = arena.Length / (int)Pcf.Constants.TableHeaderSize + 1; + while (off != 0) + { + if (budget == 0) + { + throw PcfDcpException.OffsetOutOfRange(); + } + budget -= 1; + int baseOff = checked((int)off); + if (baseOff + (int)Pcf.Constants.TableHeaderSize > arena.Length) + { + throw PcfDcpException.OffsetOutOfRange(); + } + var hb = new byte[(int)Pcf.Constants.TableHeaderSize]; + Buffer.BlockCopy(arena, baseOff, hb, 0, hb.Length); + var h = TableBlockHeader.FromBytes(hb); + var entries = new List(h.PartitionCount); + for (int i = 0; i < h.PartitionCount; i++) + { + int eo = baseOff + (int)Pcf.Constants.TableHeaderSize + i * (int)Pcf.Constants.EntrySize; + if (eo + (int)Pcf.Constants.EntrySize > arena.Length) + { + throw PcfDcpException.OffsetOutOfRange(); + } + var eb = new byte[(int)Pcf.Constants.EntrySize]; + Buffer.BlockCopy(arena, eo, eb, 0, eb.Length); + entries.Add(PartitionEntry.FromBytes(eb)); + } + if (h.TableHashAlgo.Verifies()) + { + var computed = TableBlockHeader.ComputeTableHash(h.TableHashAlgo, h.NextTableOffset, entries); + int n = h.TableHashAlgo.DigestLen(); + for (int i = 0; i < n; i++) + { + if (computed[i] != h.TableHash[i]) + { + throw PcfDcpException.HashMismatch(); + } + } + } + off = h.NextTableOffset; + } + } + + private static string Hex(byte[] b) + { + var sb = new StringBuilder(b.Length * 2); + foreach (var x in b) + { + sb.Append(x.ToString("x2")); + } + return sb.ToString(); + } +} diff --git a/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/DcpWriter.cs b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/DcpWriter.cs new file mode 100644 index 0000000..2f13f39 --- /dev/null +++ b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/DcpWriter.cs @@ -0,0 +1,199 @@ +using System; +using System.Collections.Generic; +using System.IO; +using Pcf; + +namespace Pcf.Dcp; + +/// +/// Building and rewriting PCF files that carry DCP containers. The writer keeps +/// the whole file as an in-memory list of top-level partitions and emits a +/// fresh, canonical PCF image on demand. Every mutating operation is a logical +/// edit of that list followed by a rebuild — simple and always correct for a +/// reference implementation; the result is a fully conforming PCF v1.0 file. +/// +public sealed class DcpWriter +{ + private sealed class TopPart + { + public uint PartitionType; + public byte[] Uid; + public string Label; + public HashAlgo DataHashAlgo; + public byte[] PlainData; // non-null for a plain partition + public Arena Arena; // non-null for a DCP container + } + + private readonly List _parts = new List(); + private readonly HashAlgo _tableHashAlgo = HashAlgo.Sha256; + private bool _trailer; + + /// Load an existing PCF file into the writer's model. + public static DcpWriter Open(Stream storage) + { + var c = Container.Open(storage); + var w = new DcpWriter(); + foreach (var e in c.Entries()) + { + var data = c.ReadPartitionData(e); + var label = PartitionEntry.DecodeLabel(e.Label); + var part = new TopPart + { + PartitionType = e.PartitionType, + Uid = (byte[])e.Uid.Clone(), + Label = label, + DataHashAlgo = e.DataHashAlgo, + }; + if (e.PartitionType == Constants.DcpContainerType) + { + part.Arena = Arena.Parse(data); + } + else + { + part.PlainData = data; + } + w._parts.Add(part); + } + return w; + } + + /// Finalise emitted images in trailer mode (append-only host). + public void SetTrailer(bool on) => _trailer = on; + + private void EnsureUnique(byte[] uid) + { + foreach (var p in _parts) + { + if (Arena.BytesEqual(p.Uid, uid)) + { + throw PcfDcpException.DuplicateUid(); + } + } + } + + /// Add a DCP container partition holding . + public void AddContainer(byte[] uid, string label, Arena arena) + { + EnsureUnique(uid); + _parts.Add(new TopPart + { + PartitionType = Constants.DcpContainerType, + Uid = (byte[])uid.Clone(), + Label = label, + DataHashAlgo = HashAlgo.None, + Arena = arena, + }); + } + + /// Add an ordinary top-level partition. + public void AddPlain(uint partitionType, byte[] uid, string label, byte[] data, HashAlgo dataHashAlgo) + { + EnsureUnique(uid); + _parts.Add(new TopPart + { + PartitionType = partitionType, + Uid = (byte[])uid.Clone(), + Label = label, + DataHashAlgo = dataHashAlgo, + PlainData = data, + }); + } + + private Arena ContainerArena(byte[] uid) + { + foreach (var p in _parts) + { + if (Arena.BytesEqual(p.Uid, uid)) + { + if (p.Arena == null) + { + throw PcfDcpException.NotADcpContainer(); + } + return p.Arena; + } + } + throw PcfDcpException.NotFound(); + } + + /// Borrow a container's arena for inspection or in-place editing. + public Arena GetArena(byte[] containerUid) => ContainerArena(containerUid); + + // ---- migration: promotion / demotion ----------------------------------- + + /// + /// Promote an inner partition out of its DCP container to a top-level PCF + /// partition (dynamic → fixed), preserving uid, type, label, hash algorithm + /// and data_hash (the promotion invariant, spec Section 10.4). + /// + public void Promote(byte[] containerUid, byte[] innerUid) + { + var arena = ContainerArena(containerUid); + var piece = arena.RemoveInner(innerUid); + _parts.Add(new TopPart + { + PartitionType = piece.PartitionType, + Uid = (byte[])innerUid.Clone(), + Label = piece.Label, + DataHashAlgo = piece.DataHashAlgo, + PlainData = piece.Content, + }); + } + + /// + /// Demote a top-level partition into a DCP container as an inner partition + /// (fixed → dynamic), preserving uid, type, label, hash algorithm and + /// data_hash. The content becomes a single DATA extent. + /// + public void Demote(byte[] partUid, byte[] containerUid) + { + int pos = -1; + for (int i = 0; i < _parts.Count; i++) + { + if (Arena.BytesEqual(_parts[i].Uid, partUid)) + { + pos = i; + break; + } + } + if (pos < 0) + { + throw PcfDcpException.NotFound(); + } + var p = _parts[pos]; + if (p.PartitionType == Constants.DcpContainerType || p.PlainData == null) + { + throw PcfDcpException.NestedContainer(); + } + var arena = ContainerArena(containerUid); + arena.AddInner(p.PartitionType, partUid, p.Label, p.PlainData, p.DataHashAlgo, Chunker.Whole()); + _parts.RemoveAt(pos); + } + + // ---- container-level maintenance --------------------------------------- + + /// Re-chunk and deduplicate a container's inner partitions. + public long Dedup(byte[] containerUid, Chunker chunker) => ContainerArena(containerUid).Dedup(chunker); + + /// Compact / defragment a container's arena. Returns bytes reclaimed. + public long Defrag(byte[] containerUid) => ContainerArena(containerUid).Compact(); + + // ---- serialisation ----------------------------------------------------- + + /// Build a fresh, canonical PCF image of the whole file. + public byte[] ToImage() + { + uint cap = (uint)Math.Max(1, _parts.Count); + var stream = new MemoryStream(); + var c = Container.CreateWith(stream, cap, _tableHashAlgo); + foreach (var p in _parts) + { + byte[] data = p.Arena != null ? p.Arena.ToBytes() : p.PlainData; + c.AddPartition(p.PartitionType, p.Uid, p.Label, data, 0, p.DataHashAlgo); + } + if (_trailer) + { + c.FinalizeWithTrailer(); + } + return ((MemoryStream)c.Storage).ToArray(); + } +} diff --git a/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/FragTableHeader.cs b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/FragTableHeader.cs new file mode 100644 index 0000000..e697e47 --- /dev/null +++ b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/FragTableHeader.cs @@ -0,0 +1,104 @@ +using System; +using System.Collections.Generic; + +namespace Pcf.Dcp; + +/// The 9-byte header that begins each Fragment Table block (spec 8.1). +public sealed class FragTableHeader +{ + /// Arena-relative offset of the next block of this partition, or 0. + public ulong NextFragtableOffset { get; set; } + + /// Number of Fragment Entries packed immediately after this header. + public byte FragmentCount { get; set; } + + /// Serialise to the on-disk 9-byte layout. + public byte[] ToBytes() + { + var b = new byte[Constants.FragTableHeaderSize]; + LittleEndian.WriteU64(b, 0, NextFragtableOffset); + b[8] = FragmentCount; + return b; + } + + /// Parse from the on-disk 9-byte layout. + public static FragTableHeader FromBytes(byte[] b, int offset = 0) + { + return new FragTableHeader + { + NextFragtableOffset = LittleEndian.ReadU64(b, offset + 0), + FragmentCount = b[offset + 8], + }; + } +} + +/// Static helpers for walking and reconstructing Fragment Tables. +public static class FragmentTable +{ + /// + /// Walk an inner partition's Fragment Table chain starting at arena-relative + /// , returning its entries in logical order. + /// + public static List Walk(byte[] arena, ulong firstOff) + { + var outList = new List(); + ulong off = firstOff; + int budget = arena.Length / Constants.FragTableHeaderSize + 1; + while (off != Constants.ArenaNone) + { + if (budget == 0) + { + throw PcfDcpException.OffsetOutOfRange(); + } + budget -= 1; + int baseOff = checked((int)off); + if (baseOff + Constants.FragTableHeaderSize > arena.Length) + { + throw PcfDcpException.OffsetOutOfRange(); + } + var h = FragTableHeader.FromBytes(arena, baseOff); + int eo = baseOff + Constants.FragTableHeaderSize; + for (int i = 0; i < h.FragmentCount; i++) + { + if (eo + Constants.FragmentEntrySize > arena.Length) + { + throw PcfDcpException.OffsetOutOfRange(); + } + outList.Add(FragmentEntry.FromBytes(arena, eo)); + eo += Constants.FragmentEntrySize; + } + off = h.NextFragtableOffset; + } + return outList; + } + + /// + /// Reconstruct the logical content from Fragment Entries (spec Section 8.3): + /// concatenate the bytes of the DATA extents in order. + /// + public static byte[] Reconstruct(byte[] arena, IReadOnlyList frags, ulong arenaUsed) + { + long total = 0; + foreach (var f in frags) + { + if (!f.IsData()) + { + throw PcfDcpException.BadFragmentKind(f.Kind); + } + ulong end = f.ExtentOffset + f.ExtentLength; + if (end > arenaUsed || end > (ulong)arena.Length) + { + throw PcfDcpException.OffsetOutOfRange(); + } + total += (long)f.ExtentLength; + } + var outBytes = new byte[total]; + int p = 0; + foreach (var f in frags) + { + Buffer.BlockCopy(arena, (int)f.ExtentOffset, outBytes, p, (int)f.ExtentLength); + p += (int)f.ExtentLength; + } + return outBytes; + } +} diff --git a/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/FragmentEntry.cs b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/FragmentEntry.cs new file mode 100644 index 0000000..ae83847 --- /dev/null +++ b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/FragmentEntry.cs @@ -0,0 +1,46 @@ +namespace Pcf.Dcp; + +/// One Fragment Entry: a single extent of an inner partition (spec 8.2). +public sealed class FragmentEntry +{ + /// Arena-relative start of the extent's bytes. + public ulong ExtentOffset { get; set; } + + /// Length of the extent in bytes. + public ulong ExtentLength { get; set; } + + /// Extent kind (1 = DATA; 0 invalid; 2/3 reserved). + public byte Kind { get; set; } + + /// flags byte (bit 0 = SHARED; others reserved 0). + public byte Flags { get; set; } + + /// Serialise to the on-disk 18-byte layout. + public byte[] ToBytes() + { + var b = new byte[Constants.FragmentEntrySize]; + LittleEndian.WriteU64(b, 0, ExtentOffset); + LittleEndian.WriteU64(b, 8, ExtentLength); + b[16] = Kind; + b[17] = Flags; + return b; + } + + /// Parse from the on-disk 18-byte layout. + public static FragmentEntry FromBytes(byte[] b, int offset = 0) + { + return new FragmentEntry + { + ExtentOffset = LittleEndian.ReadU64(b, offset + 0), + ExtentLength = LittleEndian.ReadU64(b, offset + 8), + Kind = b[offset + 16], + Flags = b[offset + 17], + }; + } + + /// Whether this entry's kind is DATA. + public bool IsData() => Kind == Constants.KindData; + + /// Whether the SHARED flag (bit 0) is set. + public bool IsShared() => (Flags & Constants.FlagShared) != 0; +} diff --git a/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/InnerInfo.cs b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/InnerInfo.cs new file mode 100644 index 0000000..0d00a82 --- /dev/null +++ b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/InnerInfo.cs @@ -0,0 +1,45 @@ +using System.Collections.Generic; +using Pcf; + +namespace Pcf.Dcp; + +/// A read-only view of one extent, for tooling and tests. +public sealed class ExtentInfo +{ + /// Arena/pool-relative offset of the extent. + public ulong ExtentOffset { get; set; } + + /// Length of the extent in bytes. + public ulong ExtentLength { get; set; } + + /// Extent kind (1 = DATA). + public byte Kind { get; set; } + + /// Whether the SHARED flag is set. + public bool Shared { get; set; } +} + +/// A read-only view of one inner partition. +public sealed class InnerInfo +{ + /// Application partition type. + public uint PartitionType { get; set; } + + /// 16-byte uid (unique file-wide). + public byte[] Uid { get; set; } + + /// Decoded label. + public string Label { get; set; } + + /// Logical content length (= used_bytes). + public ulong UsedBytes { get; set; } + + /// Hash algorithm protecting the logical content. + public HashAlgo DataHashAlgo { get; set; } + + /// The 64-byte data-hash field over the logical content. + public byte[] DataHash { get; set; } + + /// The partition's extents in logical order. + public List Extents { get; set; } +} diff --git a/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/LittleEndian.cs b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/LittleEndian.cs new file mode 100644 index 0000000..f36d93d --- /dev/null +++ b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/LittleEndian.cs @@ -0,0 +1,35 @@ +namespace Pcf.Dcp; + +/// +/// Explicit little-endian integer helpers. PCF mandates little-endian for every +/// multi-byte integer; reading/writing the bytes by hand keeps the encoding +/// independent of the host's native byte order. +/// +internal static class LittleEndian +{ + public static void WriteU16(byte[] b, int o, ushort v) + { + b[o] = (byte)(v & 0xFF); + b[o + 1] = (byte)((v >> 8) & 0xFF); + } + + public static void WriteU64(byte[] b, int o, ulong v) + { + for (int i = 0; i < 8; i++) + { + b[o + i] = (byte)((v >> (8 * i)) & 0xFF); + } + } + + public static ushort ReadU16(byte[] b, int o) => (ushort)(b[o] | (b[o + 1] << 8)); + + public static ulong ReadU64(byte[] b, int o) + { + ulong v = 0; + for (int i = 0; i < 8; i++) + { + v |= (ulong)b[o + i] << (8 * i); + } + return v; + } +} diff --git a/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/Pcf.Dcp.csproj b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/Pcf.Dcp.csproj new file mode 100644 index 0000000..27af897 --- /dev/null +++ b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/Pcf.Dcp.csproj @@ -0,0 +1,30 @@ + + + + netstandard2.0 + latest + disable + disable + Pcf.Dcp + Pcf.Dcp + true + Reader/writer for PCF-DCP v1.0, the PCF Dynamic Container Partition profile. + + KDuma.Pcf.Dcp + pcf;pcf-dcp;container;deduplication;fragmentation + README.md + true + snupkg + + + + + + + + + + + + diff --git a/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/PcfDcpException.cs b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/PcfDcpException.cs new file mode 100644 index 0000000..796387a --- /dev/null +++ b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/PcfDcpException.cs @@ -0,0 +1,90 @@ +using System; + +namespace Pcf.Dcp; + +/// Discriminant identifying which kind of occurred. +public enum PcfDcpErrorKind +{ + /// The arena did not begin with the "PDCP" magic. + BadDcpMagic, + /// The arena's profile major version is not implemented. + UnsupportedProfileMajor, + /// A Fragment Entry carried an unsupported kind (HOLE/REF/unknown). + BadFragmentKind, + /// An extent range escapes [0, arena_used). + OffsetOutOfRange, + /// Reconstructed length did not match used_bytes, or a hash failed. + LengthMismatch, + /// A stored hash (inner table_hash or data_hash) did not verify. + HashMismatch, + /// No inner (or top-level) partition with the requested uid. + NotFound, + /// A uid is used by more than one partition file-wide. + DuplicateUid, + /// An inner partition is itself a DCP container (nesting forbidden). + NestedContainer, + /// A partition uid is the PCF NIL uid. + NilUid, + /// A partition type is the PCF reserved type 0x00000000. + ReservedType, + /// A top-level partition expected to be a DCP container is not one. + NotADcpContainer, + /// A logical edit addressed a position beyond the partition's content. + PositionOutOfRange, +} + +/// All ways a PCF-DCP operation can fail. +public sealed class PcfDcpException : Exception +{ + /// The kind of failure. + public PcfDcpErrorKind Kind { get; } + + private PcfDcpException(PcfDcpErrorKind kind, string message) : base(message) + { + Kind = kind; + } + + internal static PcfDcpException BadDcpMagic() => + new PcfDcpException(PcfDcpErrorKind.BadDcpMagic, "arena does not begin with \"PDCP\" magic"); + + internal static PcfDcpException UnsupportedProfileMajor(int v) => + new PcfDcpException(PcfDcpErrorKind.UnsupportedProfileMajor, + $"unsupported PCF-DCP profile major version {v}"); + + internal static PcfDcpException BadFragmentKind(int k) => + new PcfDcpException(PcfDcpErrorKind.BadFragmentKind, $"unsupported fragment kind {k}"); + + internal static PcfDcpException OffsetOutOfRange() => + new PcfDcpException(PcfDcpErrorKind.OffsetOutOfRange, "extent range escapes the arena"); + + internal static PcfDcpException LengthMismatch(long expected, long got) => + new PcfDcpException(PcfDcpErrorKind.LengthMismatch, + $"logical length mismatch: expected {expected}, got {got}"); + + internal static PcfDcpException HashMismatch() => + new PcfDcpException(PcfDcpErrorKind.HashMismatch, "stored hash does not verify"); + + internal static PcfDcpException NotFound() => + new PcfDcpException(PcfDcpErrorKind.NotFound, "no partition with that uid"); + + internal static PcfDcpException DuplicateUid() => + new PcfDcpException(PcfDcpErrorKind.DuplicateUid, "uid is not unique file-wide"); + + internal static PcfDcpException NestedContainer() => + new PcfDcpException(PcfDcpErrorKind.NestedContainer, + "an inner partition may not be a DCP container"); + + internal static PcfDcpException NilUid() => + new PcfDcpException(PcfDcpErrorKind.NilUid, "uid is the NIL uid"); + + internal static PcfDcpException ReservedType() => + new PcfDcpException(PcfDcpErrorKind.ReservedType, + "partition type is the reserved type 0x00000000"); + + internal static PcfDcpException NotADcpContainer() => + new PcfDcpException(PcfDcpErrorKind.NotADcpContainer, "partition is not a DCP container"); + + internal static PcfDcpException PositionOutOfRange() => + new PcfDcpException(PcfDcpErrorKind.PositionOutOfRange, + "logical position is past end of content"); +} diff --git a/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/ReferenceVector.cs b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/ReferenceVector.cs new file mode 100644 index 0000000..e007cd6 --- /dev/null +++ b/implementations/dotnet/pcf-dcp/src/Pcf.Dcp/ReferenceVector.cs @@ -0,0 +1,38 @@ +using System.Text; +using Pcf; + +namespace Pcf.Dcp; + +/// The canonical PCF-DCP v1.0 test vector (spec Section 17). +public static class ReferenceVector +{ + private static byte[] Fill(byte b) + { + var u = new byte[16]; + for (int i = 0; i < 16; i++) + { + u[i] = b; + } + return u; + } + + /// + /// Build the byte-exact 700-byte reference file from spec Section 17: one + /// DCP container ("dcp", uid 16×0xDC) holding inner "A" ("Hello, World!" as + /// two extents, the second shared) and inner "B" ("World!" deduplicated onto + /// A's second extent). Building this logical container and emitting the + /// canonical layout MUST reproduce these exact bytes. + /// + public static byte[] Build() + { + var arena = new Arena(); + arena.AddInner(0x0000_0010, Fill(0xA1), "A", + Encoding.UTF8.GetBytes("Hello, World!"), HashAlgo.Sha256, Chunker.Fixed(7)); + arena.AddInner(0x0000_0010, Fill(0xB2), "B", + Encoding.UTF8.GetBytes("World!"), HashAlgo.Sha256, Chunker.Whole()); + + var w = new DcpWriter(); + w.AddContainer(Fill(0xDC), "dcp", arena); + return w.ToImage(); + } +} diff --git a/implementations/dotnet/pcf-dcp/testdata/canonical.bin b/implementations/dotnet/pcf-dcp/testdata/canonical.bin new file mode 100644 index 0000000..834aea4 Binary files /dev/null and b/implementations/dotnet/pcf-dcp/testdata/canonical.bin differ diff --git a/implementations/dotnet/pcf-dcp/tests/Pcf.Dcp.Tests/CanonicalVectorTests.cs b/implementations/dotnet/pcf-dcp/tests/Pcf.Dcp.Tests/CanonicalVectorTests.cs new file mode 100644 index 0000000..b0e1d80 --- /dev/null +++ b/implementations/dotnet/pcf-dcp/tests/Pcf.Dcp.Tests/CanonicalVectorTests.cs @@ -0,0 +1,54 @@ +using System.IO; +using Pcf; +using Pcf.Dcp; +using Xunit; + +namespace Pcf.Dcp.Tests; + +public class CanonicalVectorTests +{ + private const string ExpectedSha256 = + "b9bb59794abed008863063886d8d0daa810c44939c1c5d29449475ced8156b90"; + + private static byte[] Canonical() => + File.ReadAllBytes(Path.Combine( + Path.GetDirectoryName(typeof(CanonicalVectorTests).Assembly.Location)!, + "testdata", "canonical.bin")); + + [Fact] + public void ShipsExpectedSha256AndLength() + { + var c = Canonical(); + Assert.Equal(700, c.Length); + Assert.Equal(ExpectedSha256, TestSupport.Sha256Hex(c)); + } + + [Fact] + public void RegeneratesByteExact() + { + var image = ReferenceVector.Build(); + Assert.Equal(700, image.Length); + Assert.Equal(ExpectedSha256, TestSupport.Sha256Hex(image)); + Assert.Equal(TestSupport.Hex(Canonical()), TestSupport.Hex(image)); + } + + [Fact] + public void IsValidPcf() + { + var c = Container.Open(new MemoryStream(Canonical())); + c.Verify(); + var entries = c.Entries(); + Assert.Single(entries); + Assert.Equal(0xAAAC0001u, entries[0].PartitionType); + Assert.Equal(465ul, entries[0].UsedBytes); + } + + [Fact] + public void IsValidDcp() + { + var r = DcpReader.Open(new MemoryStream(Canonical())); + r.Verify(); + Assert.Equal("Hello, World!", TestSupport.Str(r.ReadInner(TestSupport.Fill(0xA1)))); + Assert.Equal("World!", TestSupport.Str(r.ReadInner(TestSupport.Fill(0xB2)))); + } +} diff --git a/implementations/dotnet/pcf-dcp/tests/Pcf.Dcp.Tests/CoverageTests.cs b/implementations/dotnet/pcf-dcp/tests/Pcf.Dcp.Tests/CoverageTests.cs new file mode 100644 index 0000000..e33d305 --- /dev/null +++ b/implementations/dotnet/pcf-dcp/tests/Pcf.Dcp.Tests/CoverageTests.cs @@ -0,0 +1,132 @@ +using System.IO; +using Pcf; +using Pcf.Dcp; +using Xunit; + +namespace Pcf.Dcp.Tests; + +public class CoverageTests +{ + private static PcfDcpErrorKind KindOf(System.Action fn) + { + var ex = Assert.Throws(fn); + return ex.Kind; + } + + [Fact] + public void RejectsBadArenaMagic() + { + var a = new Arena(); + a.AddInner(0x10, TestSupport.Fill(1), "x", TestSupport.Bytes("hi"), HashAlgo.Sha256, Chunker.Whole()); + var bytes = a.ToBytes(); + bytes[0] = 0x58; + Assert.Equal(PcfDcpErrorKind.BadDcpMagic, KindOf(() => Arena.Parse(bytes))); + } + + [Fact] + public void RejectsUnsupportedProfileMajor() + { + var a = new Arena(); + a.AddInner(0x10, TestSupport.Fill(1), "x", TestSupport.Bytes("hi"), HashAlgo.Sha256, Chunker.Whole()); + var bytes = a.ToBytes(); + bytes[4] = 2; + Assert.Equal(PcfDcpErrorKind.UnsupportedProfileMajor, KindOf(() => Arena.Parse(bytes))); + } + + [Fact] + public void RejectsReservedNestedAndNilUid() + { + var a = new Arena(); + Assert.Equal(PcfDcpErrorKind.ReservedType, + KindOf(() => a.AddInner(0, TestSupport.Fill(1), "x", TestSupport.Bytes(""), HashAlgo.None, Chunker.Whole()))); + Assert.Equal(PcfDcpErrorKind.NestedContainer, + KindOf(() => a.AddInner(0xAAAC0001, TestSupport.Fill(1), "x", TestSupport.Bytes(""), HashAlgo.None, Chunker.Whole()))); + Assert.Equal(PcfDcpErrorKind.NilUid, + KindOf(() => a.AddInner(0x10, new byte[16], "x", TestSupport.Bytes(""), HashAlgo.None, Chunker.Whole()))); + } + + [Fact] + public void RejectsDuplicateUidWithinArena() + { + var a = new Arena(); + a.AddInner(0x10, TestSupport.Fill(1), "x", TestSupport.Bytes("a"), HashAlgo.None, Chunker.Whole()); + Assert.Equal(PcfDcpErrorKind.DuplicateUid, + KindOf(() => a.AddInner(0x10, TestSupport.Fill(1), "y", TestSupport.Bytes("b"), HashAlgo.None, Chunker.Whole()))); + } + + [Fact] + public void RejectsBadKindAndOutOfRangeExtent() + { + Assert.Equal(PcfDcpErrorKind.BadFragmentKind, + KindOf(() => FragmentTable.Reconstruct(new byte[64], + new[] { new FragmentEntry { ExtentOffset = 24, ExtentLength = 1, Kind = 2, Flags = 0 } }, 64))); + Assert.Equal(PcfDcpErrorKind.OffsetOutOfRange, + KindOf(() => FragmentTable.Reconstruct(new byte[64], + new[] { new FragmentEntry { ExtentOffset = 60, ExtentLength = 100, Kind = 1, Flags = 0 } }, 64))); + } + + [Fact] + public void AllowsEmptyInner() + { + var a = new Arena(); + a.AddInner(0x10, TestSupport.Fill(1), "empty", TestSupport.Bytes(""), HashAlgo.Sha256, Chunker.Whole()); + var info = a.GetInner(TestSupport.Fill(1)); + Assert.Equal(0ul, info.UsedBytes); + Assert.Empty(info.Extents); + var parsed = Arena.Parse(a.ToBytes()); + Assert.Empty(parsed.Content(TestSupport.Fill(1))); + } + + [Fact] + public void ChainsInnerTableBeyond255() + { + var a = new Arena(); + for (int i = 0; i < 300; i++) + { + var uid = new byte[16]; + uid[0] = (byte)(i & 0xFF); + uid[1] = (byte)((i >> 8) & 0xFF); + uid[15] = 1; + a.AddInner(0x10, uid, "n", new byte[] { (byte)(i & 0xFF), (byte)((i >> 8) & 0xFF) }, HashAlgo.Sha256, Chunker.Whole()); + } + Assert.Equal(300, a.Count); + Assert.Equal(300, Arena.Parse(a.ToBytes()).Count); + + var w = new DcpWriter(); + w.AddContainer(TestSupport.Fill(0xDC), "big", a); + DcpReader.Open(new MemoryStream(w.ToImage())).Verify(); + } + + [Fact] + public void ChainsFragmentTableBeyond255() + { + var a = new Arena(); + var distinct = new byte[300]; + for (int i = 0; i < 300; i++) distinct[i] = (byte)(i & 0xFF); + a.AddInner(0x10, TestSupport.Fill(2), "frag", distinct, HashAlgo.Sha256, Chunker.Fixed(1)); + var parsed = Arena.Parse(a.ToBytes()); + Assert.Equal(TestSupport.Hex(distinct), TestSupport.Hex(parsed.Content(TestSupport.Fill(2)))); + } + + [Fact] + public void VerifyDetectsFileWideUidCollision() + { + var a = new Arena(); + a.AddInner(0x10, TestSupport.Fill(0xB2), "B", TestSupport.Bytes("World!"), HashAlgo.Sha256, Chunker.Whole()); + var w = new DcpWriter(); + w.AddContainer(TestSupport.Fill(0xDC), "dcp", a); + w.AddPlain(0x10, TestSupport.Fill(0xB2), "dup", TestSupport.Bytes("x"), HashAlgo.Sha256); + var r = DcpReader.Open(new MemoryStream(w.ToImage())); + Assert.Equal(PcfDcpErrorKind.DuplicateUid, KindOf(() => r.Verify())); + } + + [Fact] + public void OpenArenaRejectsNonDcpPartition() + { + var c = Container.CreateWith(new MemoryStream(), 4, HashAlgo.Sha256); + c.AddPartition(0x10, TestSupport.Fill(7), "plain", TestSupport.Bytes("hi"), 0, HashAlgo.Sha256); + var r = DcpReader.Open(c.Storage); + var entry = r.Entries()[0]; + Assert.Equal(PcfDcpErrorKind.NotADcpContainer, KindOf(() => r.OpenArena(entry))); + } +} diff --git a/implementations/dotnet/pcf-dcp/tests/Pcf.Dcp.Tests/Pcf.Dcp.Tests.csproj b/implementations/dotnet/pcf-dcp/tests/Pcf.Dcp.Tests/Pcf.Dcp.Tests.csproj new file mode 100644 index 0000000..ae37d52 --- /dev/null +++ b/implementations/dotnet/pcf-dcp/tests/Pcf.Dcp.Tests/Pcf.Dcp.Tests.csproj @@ -0,0 +1,34 @@ + + + + net8.0 + enable + enable + + false + true + + + + + + + + + + + + + + + + + + + + testdata\canonical.bin + PreserveNewest + + + + diff --git a/implementations/dotnet/pcf-dcp/tests/Pcf.Dcp.Tests/RoundtripTests.cs b/implementations/dotnet/pcf-dcp/tests/Pcf.Dcp.Tests/RoundtripTests.cs new file mode 100644 index 0000000..95b63b2 --- /dev/null +++ b/implementations/dotnet/pcf-dcp/tests/Pcf.Dcp.Tests/RoundtripTests.cs @@ -0,0 +1,130 @@ +using System.IO; +using Pcf; +using Pcf.Dcp; +using Xunit; + +namespace Pcf.Dcp.Tests; + +public class RoundtripTests +{ + private static byte[] BuildTwoInnerFile() + { + var arena = new Arena(); + arena.AddInner(0x10, TestSupport.Fill(0xA1), "A", TestSupport.Bytes("Hello, World!"), HashAlgo.Sha256, Chunker.Fixed(7)); + arena.AddInner(0x10, TestSupport.Fill(0xB2), "B", TestSupport.Bytes("World!"), HashAlgo.Sha256, Chunker.Whole()); + var w = new DcpWriter(); + w.AddContainer(TestSupport.Fill(0xDC), "dcp", arena); + return w.ToImage(); + } + + [Fact] + public void EditsReconstructCorrectly() + { + var a = new Arena(); + a.AddInner(0x10, TestSupport.Fill(1), "f", TestSupport.Bytes("Hello, World!"), HashAlgo.Sha256, Chunker.Fixed(7)); + + a.Append(TestSupport.Fill(1), TestSupport.Bytes("!!")); + Assert.Equal("Hello, World!!!", TestSupport.Str(a.Content(TestSupport.Fill(1)))); + + a.Insert(TestSupport.Fill(1), 5, TestSupport.Bytes("XYZ")); + Assert.Equal("HelloXYZ, World!!!", TestSupport.Str(a.Content(TestSupport.Fill(1)))); + + a.Delete(TestSupport.Fill(1), 5, 3); + Assert.Equal("Hello, World!!!", TestSupport.Str(a.Content(TestSupport.Fill(1)))); + + a.Overwrite(TestSupport.Fill(1), 0, 5, TestSupport.Bytes("HOWDY")); + Assert.Equal("HOWDY, World!!!", TestSupport.Str(a.Content(TestSupport.Fill(1)))); + + a.Truncate(TestSupport.Fill(1), 5); + Assert.Equal("HOWDY", TestSupport.Str(a.Content(TestSupport.Fill(1)))); + } + + [Fact] + public void CopyOnWriteDoesNotDisturbSharedBytes() + { + var a = new Arena(); + a.AddInner(0x10, TestSupport.Fill(0xA1), "A", TestSupport.Bytes("Hello, World!"), HashAlgo.Sha256, Chunker.Fixed(7)); + a.AddInner(0x10, TestSupport.Fill(0xB2), "B", TestSupport.Bytes("World!"), HashAlgo.Sha256, Chunker.Whole()); + a.Overwrite(TestSupport.Fill(0xA1), 7, 6, TestSupport.Bytes("PLANET")); + Assert.Equal("Hello, PLANET", TestSupport.Str(a.Content(TestSupport.Fill(0xA1)))); + Assert.Equal("World!", TestSupport.Str(a.Content(TestSupport.Fill(0xB2)))); + } + + [Fact] + public void DedupThenDefragPreserveContent() + { + var a = new Arena(); + a.AddInner(0x10, TestSupport.Fill(1), "A", TestSupport.Bytes("abcabc"), HashAlgo.Sha256, Chunker.Whole()); + a.AddInner(0x10, TestSupport.Fill(2), "B", TestSupport.Bytes("abcabc"), HashAlgo.Sha256, Chunker.Whole()); + var h1 = a.GetInner(TestSupport.Fill(1)).DataHash; + + long saved = a.Dedup(Chunker.Fixed(3)); + Assert.True(saved > 0); + Assert.Equal("abcabc", TestSupport.Str(a.Content(TestSupport.Fill(1)))); + Assert.Equal("abcabc", TestSupport.Str(a.Content(TestSupport.Fill(2)))); + Assert.Equal(TestSupport.Hex(h1), TestSupport.Hex(a.GetInner(TestSupport.Fill(1)).DataHash)); + + a.Compact(); + Assert.Equal("abcabc", TestSupport.Str(a.Content(TestSupport.Fill(2)))); + } + + [Fact] + public void DefragClearsSharedWhenNoLongerAliased() + { + var a = new Arena(); + a.AddInner(0x10, TestSupport.Fill(0xA1), "A", TestSupport.Bytes("Hello, World!"), HashAlgo.Sha256, Chunker.Fixed(7)); + a.AddInner(0x10, TestSupport.Fill(0xB2), "B", TestSupport.Bytes("World!"), HashAlgo.Sha256, Chunker.Whole()); + a.RemoveInner(TestSupport.Fill(0xB2)); + a.Compact(); + var ia = a.GetInner(TestSupport.Fill(0xA1)); + Assert.All(ia.Extents, e => Assert.False(e.Shared)); + Assert.Equal("Hello, World!", TestSupport.Str(a.Content(TestSupport.Fill(0xA1)))); + } + + [Fact] + public void PromotePreservesUidAndDataHash() + { + var w = DcpWriter.Open(new MemoryStream(BuildTwoInnerFile())); + byte[] before; + { + var r0 = DcpReader.Open(new MemoryStream(w.ToImage())); + before = r0.InnerPartitions().Find(l => l.Info.Uid[0] == 0xB2)!.Info.DataHash; + } + + w.Promote(TestSupport.Fill(0xDC), TestSupport.Fill(0xB2)); + var r = DcpReader.Open(new MemoryStream(w.ToImage())); + r.Verify(); + var resolved = r.ResolveUid(TestSupport.Fill(0xB2)); + Assert.True(resolved.IsTopLevel); + Assert.Equal(TestSupport.Hex(before), TestSupport.Hex(resolved.Entry!.DataHash)); + Assert.Equal(6ul, resolved.Entry.UsedBytes); + Assert.Equal("Hello, World!", TestSupport.Str(r.ReadInner(TestSupport.Fill(0xA1)))); + } + + [Fact] + public void DemoteThenPromoteIsIdentityForContent() + { + var w = DcpWriter.Open(new MemoryStream(BuildTwoInnerFile())); + w.Promote(TestSupport.Fill(0xDC), TestSupport.Fill(0xB2)); + w.Demote(TestSupport.Fill(0xB2), TestSupport.Fill(0xDC)); + var r = DcpReader.Open(new MemoryStream(w.ToImage())); + r.Verify(); + Assert.Equal("World!", TestSupport.Str(r.ReadInner(TestSupport.Fill(0xB2)))); + Assert.False(r.ResolveUid(TestSupport.Fill(0xB2)).IsTopLevel); + } + + [Fact] + public void TrailerModeReadsBackIdentically() + { + var arena = new Arena(); + arena.AddInner(0x10, TestSupport.Fill(0xA1), "A", TestSupport.Bytes("Hello, World!"), HashAlgo.Sha256, Chunker.Fixed(7)); + arena.AddInner(0x10, TestSupport.Fill(0xB2), "B", TestSupport.Bytes("World!"), HashAlgo.Sha256, Chunker.Whole()); + var w = new DcpWriter(); + w.AddContainer(TestSupport.Fill(0xDC), "dcp", arena); + w.SetTrailer(true); + var r = DcpReader.Open(new MemoryStream(w.ToImage())); + r.Verify(); + Assert.Equal("Hello, World!", TestSupport.Str(r.ReadInner(TestSupport.Fill(0xA1)))); + Assert.Equal(2, r.InnerPartitions().Count); + } +} diff --git a/implementations/dotnet/pcf-dcp/tests/Pcf.Dcp.Tests/SpecComplianceTests.cs b/implementations/dotnet/pcf-dcp/tests/Pcf.Dcp.Tests/SpecComplianceTests.cs new file mode 100644 index 0000000..7253c20 --- /dev/null +++ b/implementations/dotnet/pcf-dcp/tests/Pcf.Dcp.Tests/SpecComplianceTests.cs @@ -0,0 +1,103 @@ +using Pcf; +using Pcf.Dcp; +using Xunit; + +namespace Pcf.Dcp.Tests; + +public class SpecComplianceTests +{ + [Fact] + public void StructureSizesMatchAppendixA() + { + Assert.Equal(24, Pcf.Dcp.Constants.DcpHeaderSize); + Assert.Equal(9, Pcf.Dcp.Constants.FragTableHeaderSize); + Assert.Equal(18, Pcf.Dcp.Constants.FragmentEntrySize); + Assert.Equal(0xAAAC0001u, Pcf.Dcp.Constants.DcpContainerType); + } + + [Fact] + public void HeaderRoundTripsAndCarriesMagic() + { + var h = new DcpHeader + { + ProfileVersionMajor = 1, + ProfileVersionMinor = 0, + Flags = 0, + InnerTableOffset = 109, + ArenaUsed = 465, + }; + var b = h.ToBytes(); + Assert.Equal(new byte[] { 0x50, 0x44, 0x43, 0x50 }, new[] { b[0], b[1], b[2], b[3] }); + var parsed = DcpHeader.FromBytes(b); + Assert.Equal(109ul, parsed.InnerTableOffset); + Assert.Equal(465ul, parsed.ArenaUsed); + Assert.Equal(1, parsed.ProfileVersionMajor); + Assert.Equal(0, parsed.ProfileVersionMinor); + } + + [Fact] + public void FragmentRecordsRoundTrip() + { + var e = new FragmentEntry { ExtentOffset = 31, ExtentLength = 6, Kind = 1, Flags = 1 }; + var pe = FragmentEntry.FromBytes(e.ToBytes()); + Assert.Equal(31ul, pe.ExtentOffset); + Assert.Equal(6ul, pe.ExtentLength); + Assert.Equal(1, pe.Kind); + Assert.True(pe.IsShared()); + + var fh = new FragTableHeader { NextFragtableOffset = 0, FragmentCount = 2 }; + var pfh = FragTableHeader.FromBytes(fh.ToBytes()); + Assert.Equal(0ul, pfh.NextFragtableOffset); + Assert.Equal(2, pfh.FragmentCount); + } + + [Fact] + public void ReconstructionEqualsLogicalContent() + { + var a = new Arena(); + a.AddInner(0x10, TestSupport.Fill(1), "x", TestSupport.Bytes("Hello, World!"), HashAlgo.Sha256, Chunker.Fixed(7)); + Assert.Equal("Hello, World!", TestSupport.Str(a.Content(TestSupport.Fill(1)))); + var info = a.GetInner(TestSupport.Fill(1)); + Assert.Equal(13ul, info.UsedBytes); + Assert.Equal(2, info.Extents.Count); + } + + [Fact] + public void DataHashIsInvariantUnderFragmentation() + { + string Mk(Chunker c) + { + var a = new Arena(); + a.AddInner(0x10, TestSupport.Fill(7), "x", TestSupport.Bytes("abcdefghij"), HashAlgo.Sha256, c); + return TestSupport.Hex(a.GetInner(TestSupport.Fill(7)).DataHash); + } + Assert.Equal(Mk(Chunker.Whole()), Mk(Chunker.Fixed(3))); + Assert.Equal(Mk(Chunker.Whole()), TestSupport.Hex(HashAlgo.Sha256.Compute(TestSupport.Bytes("abcdefghij")))); + } + + [Fact] + public void DedupSetsSharedOnAllAliasesRuleF1() + { + var a = new Arena(); + a.AddInner(0x10, TestSupport.Fill(0xA1), "A", TestSupport.Bytes("Hello, World!"), HashAlgo.Sha256, Chunker.Fixed(7)); + a.AddInner(0x10, TestSupport.Fill(0xB2), "B", TestSupport.Bytes("World!"), HashAlgo.Sha256, Chunker.Whole()); + + var ia = a.GetInner(TestSupport.Fill(0xA1)); + var ib = a.GetInner(TestSupport.Fill(0xB2)); + Assert.False(ia.Extents[0].Shared); + Assert.True(ia.Extents[1].Shared); + Assert.Single(ib.Extents); + Assert.True(ib.Extents[0].Shared); + Assert.Equal(TestSupport.Hex(HashAlgo.Sha256.Compute(TestSupport.Bytes("World!"))), TestSupport.Hex(ib.DataHash)); + } + + [Fact] + public void ParseRoundTripsCanonicalArenaByteExact() + { + var a = new Arena(); + a.AddInner(0x10, TestSupport.Fill(0xA1), "A", TestSupport.Bytes("Hello, World!"), HashAlgo.Sha256, Chunker.Fixed(7)); + a.AddInner(0x10, TestSupport.Fill(0xB2), "B", TestSupport.Bytes("World!"), HashAlgo.Sha256, Chunker.Whole()); + var bytes = a.ToBytes(); + Assert.Equal(TestSupport.Hex(bytes), TestSupport.Hex(Arena.Parse(bytes).ToBytes())); + } +} diff --git a/implementations/dotnet/pcf-dcp/tests/Pcf.Dcp.Tests/TestSupport.cs b/implementations/dotnet/pcf-dcp/tests/Pcf.Dcp.Tests/TestSupport.cs new file mode 100644 index 0000000..26d3f59 --- /dev/null +++ b/implementations/dotnet/pcf-dcp/tests/Pcf.Dcp.Tests/TestSupport.cs @@ -0,0 +1,32 @@ +using System.Security.Cryptography; +using System.Text; + +namespace Pcf.Dcp.Tests; + +internal static class TestSupport +{ + /// A 16-byte uid all equal to . + public static byte[] Fill(byte b) + { + var u = new byte[16]; + for (int i = 0; i < 16; i++) u[i] = b; + return u; + } + + public static byte[] Bytes(string s) => Encoding.UTF8.GetBytes(s); + + public static string Str(byte[] b) => Encoding.UTF8.GetString(b); + + public static string Hex(byte[] b) + { + var sb = new StringBuilder(b.Length * 2); + foreach (var x in b) sb.Append(x.ToString("x2")); + return sb.ToString(); + } + + public static string Sha256Hex(byte[] b) + { + using var sha = SHA256.Create(); + return Hex(sha.ComputeHash(b)); + } +} diff --git a/implementations/php/pcf-dcp/.gitignore b/implementations/php/pcf-dcp/.gitignore new file mode 100644 index 0000000..977d1ba --- /dev/null +++ b/implementations/php/pcf-dcp/.gitignore @@ -0,0 +1,20 @@ +# --- Composer --- +/vendor/ +composer.lock + +# --- PHPUnit --- +/.phpunit.cache/ +.phpunit.result.cache + +# --- Generated artifacts --- +*.bin +!testdata/canonical.bin + +# --- Editors --- +.idea/ +.vscode/ +*.swp +*~ + +# --- macOS --- +.DS_Store diff --git a/implementations/php/pcf-dcp/README.md b/implementations/php/pcf-dcp/README.md new file mode 100644 index 0000000..9f4d921 --- /dev/null +++ b/implementations/php/pcf-dcp/README.md @@ -0,0 +1,66 @@ +# kduma/pcf-dcp — PCF Dynamic Container Partition (PHP) + +PHP reader/writer for **PCF-DCP v1.0**, an application-level profile that adds +*dynamic*, fragmentable, dedup-friendly sub-partitions to the +[Partitioned Container Format](https://github.com/kduma-OSS/Partitioned-Container-Format) +(`kduma/pcf`) without modifying the PCF byte container. + +This package mirrors the written specification (`PCF-DCP-spec-v1.0.txt`) and the +Rust reference implementation field-for-field, and ships the same byte-exact +700-byte canonical test vector as every other port. It has no cryptographic +dependency — data/table hashing comes from `kduma/pcf` (`ext-hash`). + +## Model at a glance + +One new PCF partition type is defined: + +| Type | Name | Holds | +|--------------|-----------------|----------------------------------------------------| +| `0xAAAC0001` | `DCP_CONTAINER` | An *arena*: a header, an inner partition table, fragment tables, and data extents | + +``` +arena: +[ DCP Header (24 B) | data extents | Fragment Tables | Inner Table Block(s) ] +``` + +Each inner partition's logical content is the concatenation of its DATA extents; +its data hash covers that logical content, so fragmentation, deduplication, +compaction, and promotion all leave the hash (and any PCF-SIG signature over it) +unchanged. A generic PCF reader sees a DCP file as **one opaque partition**; only +a DCP-aware reader looks inside. + +## Example + +```php +use Kduma\PCF\HashAlgo; +use Kduma\PCF\Storage\MemoryStorage; +use Kduma\PCFDCP\Arena; +use Kduma\PCFDCP\Chunker; +use Kduma\PCFDCP\DcpReader; +use Kduma\PCFDCP\DcpWriter; + +$arena = new Arena(); +$arena->addInner(0x10, str_repeat("\xA1", 16), 'A', 'Hello, World!', HashAlgo::Sha256, Chunker::fixed(7)); +$arena->addInner(0x10, str_repeat("\xB2", 16), 'B', 'World!', HashAlgo::Sha256, Chunker::whole()); + +$w = new DcpWriter(); +$w->addContainer(str_repeat("\xDC", 16), 'dcp', $arena); +$image = $w->toImage(); + +$r = DcpReader::open(new MemoryStorage($image)); +$r->verify(); +echo $r->readInner(str_repeat("\xB2", 16)); // "World!" +``` + +## Operations + +`Arena` supports content-defined deduplication, copy-on-write edits +(`append` / `insert` / `overwrite` / `delete` / `truncate`), and +sharing-preserving `compact`. `DcpWriter` adds **promotion** (`promote`, +dynamic → fixed) and **demotion** (`demote`, fixed → dynamic), each preserving +`uid`, `partitionType`, `label`, `dataHashAlgo`, and `dataHash` — the promotion +invariant, identical to the fields a PCF-SIG signature protects. + +## Licence + +MIT. diff --git a/implementations/php/pcf-dcp/composer.json b/implementations/php/pcf-dcp/composer.json new file mode 100644 index 0000000..9caa546 --- /dev/null +++ b/implementations/php/pcf-dcp/composer.json @@ -0,0 +1,47 @@ +{ + "name": "kduma/pcf-dcp", + "description": "PHP implementation of PCF-DCP v1.0, the PCF Dynamic Container Partition profile", + "type": "library", + "license": "MIT", + "keywords": ["pcf", "pcf-dcp", "container", "deduplication", "fragmentation"], + "homepage": "https://github.com/kduma-OSS/Partitioned-Container-Format", + "support": { + "issues": "https://github.com/kduma-OSS/Partitioned-Container-Format/issues", + "source": "https://github.com/kduma-OSS-splits/PHP-PCF-DCP-lib" + }, + "require": { + "php": ">=8.1", + "ext-hash": "*", + "kduma/pcf": "^0.0.9" + }, + "require-dev": { + "phpunit/phpunit": "^10.5 || ^11.0" + }, + "repositories": [ + { + "type": "path", + "url": "../pcf", + "options": { + "symlink": true, + "versions": { "kduma/pcf": "0.0.9" } + } + } + ], + "autoload": { + "psr-4": { + "Kduma\\PCFDCP\\": "src/" + } + }, + "autoload-dev": { + "psr-4": { + "Kduma\\PCFDCP\\Tests\\": "tests/" + } + }, + "scripts": { + "test": "phpunit", + "gen-testvector": "php examples/gen_testvector.php" + }, + "config": { + "sort-packages": true + } +} diff --git a/implementations/php/pcf-dcp/examples/gen_testvector.php b/implementations/php/pcf-dcp/examples/gen_testvector.php new file mode 100644 index 0000000..9da7892 --- /dev/null +++ b/implementations/php/pcf-dcp/examples/gen_testvector.php @@ -0,0 +1,31 @@ +` (defaults to + * ./pcf_dcp_testvector.bin). Everything is fixed and deterministic so that + * independent implementations can reproduce the file byte-for-byte. + */ + +require __DIR__ . '/../vendor/autoload.php'; + +use Kduma\PCF\Container; +use Kduma\PCF\Storage\MemoryStorage; +use Kduma\PCFDCP\DcpReader; +use Kduma\PCFDCP\ReferenceVector; + +$path = $argv[1] ?? 'pcf_dcp_testvector.bin'; + +$image = ReferenceVector::build(); +file_put_contents($path, $image); + +// It is a conforming PCF v1.0 file ... +Container::open(new MemoryStorage($image))->verify(); + +// ... and a conforming DCP file. +DcpReader::open(new MemoryStorage($image))->verify(); + +fwrite(STDERR, sprintf("wrote %s (%d bytes)\n", $path, \strlen($image))); +fwrite(STDERR, 'sha256 = ' . bin2hex(hash('sha256', $image, true)) . "\n"); diff --git a/implementations/php/pcf-dcp/phpunit.xml.dist b/implementations/php/pcf-dcp/phpunit.xml.dist new file mode 100644 index 0000000..87bda63 --- /dev/null +++ b/implementations/php/pcf-dcp/phpunit.xml.dist @@ -0,0 +1,20 @@ + + + + + tests + + + + + src + + + diff --git a/implementations/php/pcf-dcp/src/Arena.php b/implementations/php/pcf-dcp/src/Arena.php new file mode 100644 index 0000000..2d18f76 --- /dev/null +++ b/implementations/php/pcf-dcp/src/Arena.php @@ -0,0 +1,742 @@ +}. + */ +final class Arena +{ + private int $profileVersionMajor = Consts::PROFILE_VERSION_MAJOR; + private int $profileVersionMinor = Consts::PROFILE_VERSION_MINOR; + private int $flags = 0; + private HashAlgo $innerTableAlgo = HashAlgo::Sha256; + private string $blob = ''; + + /** @var list */ + private array $inners = []; + + /** Choose the hash algorithm used for inner Table Blocks (default SHA-256). */ + public function withInnerTableAlgo(HashAlgo $algo): self + { + $this->innerTableAlgo = $algo; + + return $this; + } + + // ---- byte pool --------------------------------------------------------- + + private function appendBlob(string $data): int + { + $start = \strlen($this->blob); + $this->blob .= $data; + + return $start; + } + + private function blobSlice(int $off, int $len): string + { + return substr($this->blob, $off, $len); + } + + // ---- parsing ----------------------------------------------------------- + + /** Parse an arena from its on-disk bytes (spec Sections 6–8). */ + public static function parse(string $bytes): self + { + $header = DcpHeader::read($bytes); + if ($header->profileVersionMajor !== Consts::PROFILE_VERSION_MAJOR) { + throw PcfDcpException::unsupportedProfileMajor($header->profileVersionMajor); + } + $arenaUsed = $header->arenaUsed; + + $arena = new self(); + $arena->profileVersionMajor = $header->profileVersionMajor; + $arena->profileVersionMinor = $header->profileVersionMinor; + $arena->flags = $header->flags; + $arena->blob = $bytes; + + $len = \strlen($bytes); + $firstBlock = true; + $off = $header->innerTableOffset; + $budget = intdiv($len, PcfConsts::TABLE_HEADER_SIZE) + 1; + while ($off !== Consts::ARENA_NONE) { + if ($budget === 0) { + throw PcfDcpException::offsetOutOfRange(); + } + --$budget; + if ($off + PcfConsts::TABLE_HEADER_SIZE > $len) { + throw PcfDcpException::offsetOutOfRange(); + } + $h = TableBlockHeader::fromBytes(substr($bytes, $off, PcfConsts::TABLE_HEADER_SIZE)); + if ($firstBlock) { + $arena->innerTableAlgo = $h->tableHashAlgo; + $firstBlock = false; + } + for ($i = 0; $i < $h->partitionCount; ++$i) { + $eo = $off + PcfConsts::TABLE_HEADER_SIZE + $i * PcfConsts::ENTRY_SIZE; + if ($eo + PcfConsts::ENTRY_SIZE > $len) { + throw PcfDcpException::offsetOutOfRange(); + } + $entry = PartitionEntry::fromBytes(substr($bytes, $eo, PcfConsts::ENTRY_SIZE)); + $onDisk = FragmentTable::walk($bytes, $entry->startOffset); + $frags = []; + foreach ($onDisk as $fe) { + $frags[] = (object) [ + 'offset' => $fe->extentOffset, + 'length' => $fe->extentLength, + 'kind' => $fe->kind, + 'shared' => $fe->isShared(), + ]; + } + $arena->inners[] = (object) [ + 'partitionType' => $entry->partitionType, + 'uid' => $entry->uid, + 'label' => $entry->label, + 'dataHashAlgo' => $entry->dataHashAlgo, + 'frags' => $frags, + ]; + } + $off = $h->nextTableOffset; + } + + foreach ($arena->inners as $inner) { + foreach ($inner->frags as $f) { + if ($f->offset + $f->length > $arenaUsed) { + throw PcfDcpException::offsetOutOfRange(); + } + } + } + + return $arena; + } + + // ---- read-only views --------------------------------------------------- + + /** Number of inner partitions. */ + public function count(): int + { + return \count($this->inners); + } + + /** Whether the arena has no inner partitions. */ + public function isEmpty(): bool + { + return \count($this->inners) === 0; + } + + /** + * The uids of all inner partitions, in stored order. + * + * @return list + */ + public function uids(): array + { + $out = []; + foreach ($this->inners as $i) { + $out[] = $i->uid; + } + + return $out; + } + + private function indexOf(string $uid): int + { + foreach ($this->inners as $i => $inner) { + if ($inner->uid === $uid) { + return $i; + } + } + throw PcfDcpException::notFound(); + } + + private function innerLogicalLen(object $inner): int + { + $total = 0; + foreach ($inner->frags as $f) { + if ($f->kind === Consts::KIND_DATA) { + $total += $f->length; + } + } + + return $total; + } + + private function innerContent(object $inner): string + { + $out = ''; + foreach ($inner->frags as $f) { + if ($f->kind === Consts::KIND_DATA) { + $out .= $this->blobSlice($f->offset, $f->length); + } + } + + return $out; + } + + private function innerDataHash(object $inner): string + { + return $inner->dataHashAlgo->compute($this->innerContent($inner)); + } + + private function view(object $inner): InnerInfo + { + $extents = []; + foreach ($inner->frags as $f) { + $extents[] = new ExtentInfo($f->offset, $f->length, $f->kind, $f->shared); + } + + return new InnerInfo( + $inner->partitionType, + $inner->uid, + PartitionEntry::decodeLabel($inner->label), + $this->innerLogicalLen($inner), + $inner->dataHashAlgo, + $this->innerDataHash($inner), + $extents, + ); + } + + /** A read-only view of one inner partition. */ + public function innerInfo(string $uid): InnerInfo + { + return $this->view($this->inners[$this->indexOf($uid)]); + } + + /** + * Read-only views of every inner partition, in stored order. + * + * @return list + */ + public function inners(): array + { + $out = []; + foreach ($this->inners as $i) { + $out[] = $this->view($i); + } + + return $out; + } + + /** Reconstruct an inner partition's logical content (spec Section 8.3). */ + public function content(string $uid): string + { + $inner = $this->inners[$this->indexOf($uid)]; + $bytes = $this->innerContent($inner); + $declared = $this->innerLogicalLen($inner); + if (\strlen($bytes) !== $declared) { + throw PcfDcpException::lengthMismatch($declared, \strlen($bytes)); + } + + return $bytes; + } + + // ---- builder ----------------------------------------------------------- + + /** + * Add an inner partition whose $content is split by $chunker into extents, + * deduplicating against extents already present (spec Section 10.2). + */ + public function addInner( + int $partitionType, + string $uid, + string $label, + string $content, + HashAlgo $dataHashAlgo, + Chunker $chunker, + ): void { + if ($partitionType === 0) { + throw PcfDcpException::reservedType(); + } + if ($partitionType === Consts::DCP_CONTAINER_TYPE) { + throw PcfDcpException::nestedContainer(); + } + if ($uid === PcfConsts::NIL_UID) { + throw PcfDcpException::nilUid(); + } + foreach ($this->inners as $i) { + if ($i->uid === $uid) { + throw PcfDcpException::duplicateUid(); + } + } + $labelBytes = PartitionEntry::encodeLabel($label); + + $frags = []; + foreach (self::splitChunks($chunker, $content) as $chunk) { + $hit = $this->findExtent($chunk) ?? self::findLocal($this->blob, $frags, $chunk); + if ($hit !== null) { + [$offset, $length] = $hit; + $this->markShared($offset, $length); + foreach ($frags as $f) { + if ($f->offset === $offset && $f->length === $length) { + $f->shared = true; + } + } + $frags[] = (object) ['offset' => $offset, 'length' => $length, 'kind' => Consts::KIND_DATA, 'shared' => true]; + } else { + $offset = $this->appendBlob($chunk); + $frags[] = (object) ['offset' => $offset, 'length' => \strlen($chunk), 'kind' => Consts::KIND_DATA, 'shared' => false]; + } + } + $this->inners[] = (object) [ + 'partitionType' => $partitionType, + 'uid' => $uid, + 'label' => $labelBytes, + 'dataHashAlgo' => $dataHashAlgo, + 'frags' => $frags, + ]; + } + + /** + * @return list + */ + private static function splitChunks(Chunker $chunker, string $content): array + { + if ($content === '') { + return []; + } + if ($chunker->isWhole) { + return [$content]; + } + + return str_split($content, $chunker->size); + } + + /** + * @return array{0:int,1:int}|null + */ + private function findExtent(string $chunk): ?array + { + if ($chunk === '') { + return null; + } + $n = \strlen($chunk); + foreach ($this->inners as $inner) { + foreach ($inner->frags as $f) { + if ($f->kind === Consts::KIND_DATA && $f->length === $n && $this->blobSlice($f->offset, $f->length) === $chunk) { + return [$f->offset, $f->length]; + } + } + } + + return null; + } + + /** + * @param list $frags + * + * @return array{0:int,1:int}|null + */ + private static function findLocal(string $blob, array $frags, string $chunk): ?array + { + if ($chunk === '') { + return null; + } + $n = \strlen($chunk); + foreach ($frags as $f) { + if ($f->kind === Consts::KIND_DATA && $f->length === $n && substr($blob, $f->offset, $f->length) === $chunk) { + return [$f->offset, $f->length]; + } + } + + return null; + } + + private function markShared(int $offset, int $length): void + { + foreach ($this->inners as $inner) { + foreach ($inner->frags as $f) { + if ($f->offset === $offset && $f->length === $length) { + $f->shared = true; + } + } + } + } + + // ---- logical edits (copy-on-write) ------------------------------------- + + /** Append $bytes to an inner partition's content. */ + public function append(string $uid, string $bytes): void + { + $inner = $this->inners[$this->indexOf($uid)]; + if ($bytes === '') { + return; + } + $offset = $this->appendBlob($bytes); + $inner->frags[] = (object) ['offset' => $offset, 'length' => \strlen($bytes), 'kind' => Consts::KIND_DATA, 'shared' => false]; + } + + /** Overwrite the logical range [pos, pos+len) with $bytes. */ + public function overwrite(string $uid, int $pos, int $len, string $bytes): void + { + $this->delete($uid, $pos, $len); + $this->insert($uid, $pos, $bytes); + } + + /** Insert $bytes at logical position $pos. */ + public function insert(string $uid, int $pos, string $bytes): void + { + $idx = $this->indexOf($uid); + $inner = $this->inners[$idx]; + $total = $this->innerLogicalLen($inner); + if ($pos > $total) { + throw PcfDcpException::positionOutOfRange(); + } + if ($bytes === '') { + return; + } + $split = $this->splitAt($inner, $pos); + $offset = $this->appendBlob($bytes); + $new = (object) ['offset' => $offset, 'length' => \strlen($bytes), 'kind' => Consts::KIND_DATA, 'shared' => false]; + array_splice($inner->frags, $split, 0, [$new]); + } + + /** Delete the logical range [pos, pos+len). */ + public function delete(string $uid, int $pos, int $len): void + { + $idx = $this->indexOf($uid); + $inner = $this->inners[$idx]; + $total = $this->innerLogicalLen($inner); + $end = $pos + $len; + if ($end > $total) { + throw PcfDcpException::positionOutOfRange(); + } + if ($len === 0) { + return; + } + $lo = $this->splitAt($inner, $pos); + $hi = $this->splitAt($inner, $end); + array_splice($inner->frags, $lo, $hi - $lo); + } + + /** Truncate the partition's logical content to $newLen bytes. */ + public function truncate(string $uid, int $newLen): void + { + $idx = $this->indexOf($uid); + $inner = $this->inners[$idx]; + $total = $this->innerLogicalLen($inner); + if ($newLen > $total) { + throw PcfDcpException::positionOutOfRange(); + } + $cut = $this->splitAt($inner, $newLen); + $inner->frags = \array_slice($inner->frags, 0, $cut); + } + + private function splitAt(object $inner, int $pos): int + { + $logical = 0; + $i = 0; + while ($i < \count($inner->frags)) { + $f = $inner->frags[$i]; + $flen = $f->length; + if ($logical === $pos) { + return $i; + } + if ($pos < $logical + $flen) { + $head = $pos - $logical; + $left = (object) ['offset' => $f->offset, 'length' => $head, 'kind' => $f->kind, 'shared' => $f->shared]; + $right = (object) ['offset' => $f->offset + $head, 'length' => $flen - $head, 'kind' => $f->kind, 'shared' => $f->shared]; + $inner->frags[$i] = $left; + array_splice($inner->frags, $i + 1, 0, [$right]); + + return $i + 1; + } + $logical += $flen; + ++$i; + } + + return \count($inner->frags); + } + + // ---- promotion support ------------------------------------------------- + + /** + * Remove an inner partition, returning the pieces a promotion needs: its + * type, label, hash algorithm, and reconstructed logical content. + * + * @return array{partitionType:int,label:string,dataHashAlgo:HashAlgo,content:string} + */ + public function removeInner(string $uid): array + { + $idx = $this->indexOf($uid); + $content = $this->content($uid); + $inner = $this->inners[$idx]; + array_splice($this->inners, $idx, 1); + + return [ + 'partitionType' => $inner->partitionType, + 'label' => PartitionEntry::decodeLabel($inner->label), + 'dataHashAlgo' => $inner->dataHashAlgo, + 'content' => $content, + ]; + } + + // ---- deduplication and compaction -------------------------------------- + + /** + * Re-chunk every inner partition with $chunker and deduplicate identical + * extents across the whole arena. Returns estimated bytes saved. + */ + public function dedup(Chunker $chunker): int + { + $before = $this->canonicalExtentBytes(); + $rebuilt = new self(); + $rebuilt->profileVersionMajor = $this->profileVersionMajor; + $rebuilt->profileVersionMinor = $this->profileVersionMinor; + $rebuilt->flags = $this->flags; + $rebuilt->innerTableAlgo = $this->innerTableAlgo; + foreach ($this->inners as $inner) { + $rebuilt->addInner( + $inner->partitionType, + $inner->uid, + PartitionEntry::decodeLabel($inner->label), + $this->innerContent($inner), + $inner->dataHashAlgo, + $chunker, + ); + } + $this->blob = $rebuilt->blob; + $this->inners = $rebuilt->inners; + $after = $this->canonicalExtentBytes(); + + return max(0, $before - $after); + } + + /** + * Compact the arena (spec Section 10.3): drop unreferenced pool bytes and + * normalise the SHARED flag, clearing it on any extent now referenced + * exactly once (rule F2). Returns dead pool bytes reclaimed. + */ + public function compact(): int + { + $refcount = []; + foreach ($this->inners as $inner) { + foreach ($inner->frags as $f) { + $k = $f->offset . ':' . $f->length; + $refcount[$k] = ($refcount[$k] ?? 0) + 1; + } + } + foreach ($this->inners as $inner) { + foreach ($inner->frags as $f) { + if (($refcount[$f->offset . ':' . $f->length] ?? 0) <= 1) { + $f->shared = false; + } + } + } + $liveBytes = 0; + foreach (array_keys($refcount) as $k) { + $liveBytes += (int) explode(':', $k)[1]; + } + $deadBefore = max(0, \strlen($this->blob) - $liveBytes); + + $newBlob = ''; + $remap = []; + foreach ($this->inners as $inner) { + foreach ($inner->frags as $f) { + $k = $f->offset . ':' . $f->length; + if (!isset($remap[$k])) { + $remap[$k] = \strlen($newBlob); + $newBlob .= $this->blobSlice($f->offset, $f->length); + } + } + } + foreach ($this->inners as $inner) { + foreach ($inner->frags as $f) { + $f->offset = $remap[$f->offset . ':' . $f->length]; + } + } + $this->blob = $newBlob; + + return $deadBefore; + } + + private function canonicalExtentBytes(): int + { + $seen = []; + $total = 0; + foreach ($this->inners as $inner) { + foreach ($inner->frags as $f) { + $k = $f->offset . ':' . $f->length; + if (!isset($seen[$k])) { + $seen[$k] = true; + $total += $f->length; + } + } + } + + return $total; + } + + // ---- canonical serialisation ------------------------------------------- + + /** Serialise the arena into its canonical on-disk layout (spec Section 17). */ + public function toBytes(): string + { + // 1. distinct extents, first-reference order + $extOrder = []; + $extIndex = []; + foreach ($this->inners as $inner) { + foreach ($inner->frags as $f) { + $k = $f->offset . ':' . $f->length; + if (!isset($extIndex[$k])) { + $extIndex[$k] = \count($extOrder); + $extOrder[] = [$f->offset, $f->length]; + } + } + } + + // 2. lay out extents right after the header + $cur = Consts::DCP_HEADER_SIZE; + $extArenaOff = []; + foreach ($extOrder as [$off, $len]) { + $extArenaOff[] = $cur; + $cur += $len; + } + + // 3. Fragment Tables (one chain per inner) + $fragOff = []; + foreach ($this->inners as $inner) { + $fragOff[] = $cur; + $cur += self::fragtableSpan(\count($inner->frags)); + } + + // 4. Inner Table Block(s) + $innerTableOffset = $cur; + $counts = self::blockCounts(\count($this->inners)); + $blockOff = []; + foreach ($counts as $c) { + $blockOff[] = $cur; + $cur += PcfConsts::TABLE_HEADER_SIZE + $c * PcfConsts::ENTRY_SIZE; + } + $arenaUsed = $cur; + + // header + $buf = (new DcpHeader( + $this->profileVersionMajor, + $this->profileVersionMinor, + $this->flags, + $innerTableOffset, + $arenaUsed, + ))->toBytes(); + + // extents (first-reference order) + foreach ($extOrder as [$off, $len]) { + $buf .= $this->blobSlice($off, $len); + } + + // fragment tables + foreach ($this->inners as $ii => $inner) { + $buf .= self::writeFragmentTable($fragOff[$ii], $inner->frags, $extIndex, $extArenaOff); + } + + // inner table block(s) + $entries = []; + foreach ($this->inners as $ii => $inner) { + $used = $this->innerLogicalLen($inner); + $entries[] = new PartitionEntry( + $inner->partitionType, + $inner->uid, + $inner->label, + $fragOff[$ii], + $used, + $used, + $inner->dataHashAlgo, + $this->innerDataHash($inner), + ); + } + + $idx = 0; + foreach ($counts as $b => $c) { + $next = $b + 1 < \count($counts) ? $blockOff[$b + 1] : 0; + $slice = \array_slice($entries, $idx, $c); + $th = TableBlockHeader::computeTableHash($this->innerTableAlgo, $next, $slice); + $bh = new TableBlockHeader($c, $next, $this->innerTableAlgo, $th); + $buf .= $bh->toBytes(); + foreach ($slice as $e) { + $buf .= $e->toBytes(); + } + $idx += $c; + } + + return $buf; + } + + private static function fragtableSpan(int $n): int + { + $span = 0; + foreach (self::blockCounts($n) as $c) { + $span += Consts::FRAGTABLE_HEADER_SIZE + $c * Consts::FRAGMENT_ENTRY_SIZE; + } + + return $span; + } + + /** + * @return list + */ + private static function blockCounts(int $n): array + { + if ($n === 0) { + return [0]; + } + $out = []; + $rem = $n; + while ($rem > 0) { + $c = min($rem, Consts::MAX_ENTRIES_PER_BLOCK); + $out[] = $c; + $rem -= $c; + } + + return $out; + } + + /** + * Serialise one inner partition's Fragment Table chain whose first block + * sits at absolute arena offset $start. + * + * @param list $frags + * @param array $extIndex + * @param list $extArenaOff + */ + private static function writeFragmentTable(int $start, array $frags, array $extIndex, array $extArenaOff): string + { + $counts = self::blockCounts(\count($frags)); + $out = ''; + $blockStart = $start; + $idx = 0; + foreach ($counts as $b => $c) { + $span = Consts::FRAGTABLE_HEADER_SIZE + $c * Consts::FRAGMENT_ENTRY_SIZE; + $next = $b + 1 < \count($counts) ? $blockStart + $span : 0; + $out .= (new FragTableHeader($next, $c))->toBytes(); + for ($j = 0; $j < $c; ++$j) { + $f = $frags[$idx + $j]; + $arenaOff = $extArenaOff[$extIndex[$f->offset . ':' . $f->length]]; + $out .= (new FragmentEntry($arenaOff, $f->length, $f->kind, $f->shared ? Consts::FLAG_SHARED : 0))->toBytes(); + } + $blockStart += $span; + $idx += $c; + } + + return $out; + } +} diff --git a/implementations/php/pcf-dcp/src/Chunker.php b/implementations/php/pcf-dcp/src/Chunker.php new file mode 100644 index 0000000..cfdb013 --- /dev/null +++ b/implementations/php/pcf-dcp/src/Chunker.php @@ -0,0 +1,30 @@ +profileVersionMajor & 0xFF) + . \chr($this->profileVersionMinor & 0xFF) + . pack('v', $this->flags & 0xFFFF) + . pack('P', $this->innerTableOffset) + . pack('P', $this->arenaUsed); + } + + /** Parse from the on-disk 24-byte layout, validating the magic. */ + public static function fromBytes(string $b): self + { + if (\strlen($b) < Consts::DCP_HEADER_SIZE || substr($b, 0, 4) !== Consts::DCP_MAGIC) { + throw PcfDcpException::badDcpMagic(); + } + + return new self( + \ord($b[4]), + \ord($b[5]), + unpack('v', substr($b, 6, 2))[1], + unpack('P', substr($b, 8, 8))[1], + unpack('P', substr($b, 16, 8))[1], + ); + } + + /** Read a DCP Header from the start of an arena byte string. */ + public static function read(string $arena): self + { + if (\strlen($arena) < Consts::DCP_HEADER_SIZE) { + throw PcfDcpException::badDcpMagic(); + } + + return self::fromBytes(substr($arena, 0, Consts::DCP_HEADER_SIZE)); + } +} diff --git a/implementations/php/pcf-dcp/src/DcpReader.php b/implementations/php/pcf-dcp/src/DcpReader.php new file mode 100644 index 0000000..b978d36 --- /dev/null +++ b/implementations/php/pcf-dcp/src/DcpReader.php @@ -0,0 +1,199 @@ +container; + } + + /** + * All top-level entries, in chain order. + * + * @return PartitionEntry[] + */ + public function entries(): array + { + return $this->container->entries(); + } + + /** + * The top-level DCP container entries. + * + * @return PartitionEntry[] + */ + public function containers(): array + { + $out = []; + foreach ($this->container->entries() as $e) { + if ($e->partitionType === Consts::DCP_CONTAINER_TYPE) { + $out[] = $e; + } + } + + return $out; + } + + /** Parse the arena of a DCP container entry. */ + public function openArena(PartitionEntry $entry): Arena + { + if ($entry->partitionType !== Consts::DCP_CONTAINER_TYPE) { + throw PcfDcpException::notADcpContainer(); + } + + return Arena::parse($this->container->readPartitionData($entry)); + } + + /** + * Every inner partition across every DCP container, in file order. + * + * @return InnerLocation[] + */ + public function innerPartitions(): array + { + $out = []; + foreach ($this->containers() as $cont) { + $arena = $this->openArena($cont); + foreach ($arena->inners() as $info) { + $out[] = new InnerLocation($cont->uid, $info); + } + } + + return $out; + } + + /** Resolve a uid against the flattened set top-level ∪ inner (spec 2.1). */ + public function resolveUid(string $uid): Resolved + { + foreach ($this->container->entries() as $e) { + if ($e->uid === $uid) { + return Resolved::topLevel($e); + } + } + foreach ($this->innerPartitions() as $loc) { + if ($loc->info->uid === $uid) { + return Resolved::innerPartition($loc); + } + } + throw PcfDcpException::notFound(); + } + + /** Reconstruct an inner partition's logical content by uid. */ + public function readInner(string $uid): string + { + foreach ($this->containers() as $cont) { + $arena = $this->openArena($cont); + foreach ($arena->uids() as $u) { + if ($u === $uid) { + return $arena->content($uid); + } + } + } + throw PcfDcpException::notFound(); + } + + /** + * Full DCP-aware verification: PCF integrity, each inner Table Block's + * table_hash, reconstruction length and (when algorithmic) data_hash, no + * nested container, and file-wide uid uniqueness. + */ + public function verify(): void + { + $this->container->verify(); + + $seen = []; + foreach ($this->container->entries() as $e) { + $k = bin2hex($e->uid); + if (isset($seen[$k])) { + throw PcfDcpException::duplicateUid(); + } + $seen[$k] = true; + } + + foreach ($this->containers() as $cont) { + $data = $this->container->readPartitionData($cont); + self::verifyInnerTableHashes($data); + + $arena = Arena::parse($data); + foreach ($arena->inners() as $info) { + if ($info->partitionType === Consts::DCP_CONTAINER_TYPE) { + throw PcfDcpException::nestedContainer(); + } + $k = bin2hex($info->uid); + if (isset($seen[$k])) { + throw PcfDcpException::duplicateUid(); + } + $seen[$k] = true; + + $content = $arena->content($info->uid); + if (\strlen($content) !== $info->usedBytes) { + throw PcfDcpException::lengthMismatch($info->usedBytes, \strlen($content)); + } + if (!$info->dataHashAlgo->verify($content, $info->dataHash)) { + throw PcfDcpException::hashMismatch(); + } + } + } + } + + private static function verifyInnerTableHashes(string $arena): void + { + $header = DcpHeader::read($arena); + $len = \strlen($arena); + $off = $header->innerTableOffset; + $budget = intdiv($len, PcfConsts::TABLE_HEADER_SIZE) + 1; + while ($off !== 0) { + if ($budget === 0) { + throw PcfDcpException::offsetOutOfRange(); + } + --$budget; + if ($off + PcfConsts::TABLE_HEADER_SIZE > $len) { + throw PcfDcpException::offsetOutOfRange(); + } + $h = TableBlockHeader::fromBytes(substr($arena, $off, PcfConsts::TABLE_HEADER_SIZE)); + $entries = []; + for ($i = 0; $i < $h->partitionCount; ++$i) { + $eo = $off + PcfConsts::TABLE_HEADER_SIZE + $i * PcfConsts::ENTRY_SIZE; + if ($eo + PcfConsts::ENTRY_SIZE > $len) { + throw PcfDcpException::offsetOutOfRange(); + } + $entries[] = PartitionEntry::fromBytes(substr($arena, $eo, PcfConsts::ENTRY_SIZE)); + } + if ($h->tableHashAlgo->verifies()) { + $computed = TableBlockHeader::computeTableHash($h->tableHashAlgo, $h->nextTableOffset, $entries); + $n = $h->tableHashAlgo->digestLen(); + if (substr($computed, 0, $n) !== substr($h->tableHash, 0, $n)) { + throw PcfDcpException::hashMismatch(); + } + } + $off = $h->nextTableOffset; + } + } +} diff --git a/implementations/php/pcf-dcp/src/DcpWriter.php b/implementations/php/pcf-dcp/src/DcpWriter.php new file mode 100644 index 0000000..9b8f950 --- /dev/null +++ b/implementations/php/pcf-dcp/src/DcpWriter.php @@ -0,0 +1,193 @@ + */ + private array $parts = []; + private HashAlgo $tableHashAlgo = HashAlgo::Sha256; + private bool $trailer = false; + + /** Load an existing PCF file into the writer's model. */ + public static function open(StorageInterface $storage): self + { + $c = Container::open($storage); + $w = new self(); + foreach ($c->entries() as $e) { + $data = $c->readPartitionData($e); + $label = PartitionEntry::decodeLabel($e->label); + $w->parts[] = (object) [ + 'partitionType' => $e->partitionType, + 'uid' => $e->uid, + 'label' => $label, + 'dataHashAlgo' => $e->dataHashAlgo, + 'plain' => $e->partitionType === Consts::DCP_CONTAINER_TYPE ? null : $data, + 'arena' => $e->partitionType === Consts::DCP_CONTAINER_TYPE ? Arena::parse($data) : null, + ]; + } + + return $w; + } + + /** Finalise emitted images in trailer mode (append-only host). */ + public function setTrailer(bool $on): void + { + $this->trailer = $on; + } + + private function ensureUnique(string $uid): void + { + foreach ($this->parts as $p) { + if ($p->uid === $uid) { + throw PcfDcpException::duplicateUid(); + } + } + } + + /** Add a DCP container partition holding $arena. */ + public function addContainer(string $uid, string $label, Arena $arena): void + { + $this->ensureUnique($uid); + $this->parts[] = (object) [ + 'partitionType' => Consts::DCP_CONTAINER_TYPE, + 'uid' => $uid, + 'label' => $label, + 'dataHashAlgo' => HashAlgo::None, + 'plain' => null, + 'arena' => $arena, + ]; + } + + /** Add an ordinary top-level partition. */ + public function addPlain(int $partitionType, string $uid, string $label, string $data, HashAlgo $dataHashAlgo): void + { + $this->ensureUnique($uid); + $this->parts[] = (object) [ + 'partitionType' => $partitionType, + 'uid' => $uid, + 'label' => $label, + 'dataHashAlgo' => $dataHashAlgo, + 'plain' => $data, + 'arena' => null, + ]; + } + + private function containerArena(string $uid): Arena + { + foreach ($this->parts as $p) { + if ($p->uid === $uid) { + if ($p->arena === null) { + throw PcfDcpException::notADcpContainer(); + } + + return $p->arena; + } + } + throw PcfDcpException::notFound(); + } + + /** Borrow a container's arena for inspection or in-place editing. */ + public function arena(string $containerUid): Arena + { + return $this->containerArena($containerUid); + } + + // ---- migration: promotion / demotion ----------------------------------- + + /** + * Promote an inner partition out of its DCP container to a top-level PCF + * partition (dynamic → fixed), preserving uid, type, label, hash algorithm + * and data_hash (the promotion invariant, spec Section 10.4). + */ + public function promote(string $containerUid, string $innerUid): void + { + $arena = $this->containerArena($containerUid); + $piece = $arena->removeInner($innerUid); + $this->parts[] = (object) [ + 'partitionType' => $piece['partitionType'], + 'uid' => $innerUid, + 'label' => $piece['label'], + 'dataHashAlgo' => $piece['dataHashAlgo'], + 'plain' => $piece['content'], + 'arena' => null, + ]; + } + + /** + * Demote a top-level partition into a DCP container as an inner partition + * (fixed → dynamic), preserving uid, type, label, hash algorithm and + * data_hash. The content becomes a single DATA extent. + */ + public function demote(string $partUid, string $containerUid): void + { + $pos = -1; + foreach ($this->parts as $i => $p) { + if ($p->uid === $partUid) { + $pos = $i; + break; + } + } + if ($pos < 0) { + throw PcfDcpException::notFound(); + } + $p = $this->parts[$pos]; + if ($p->partitionType === Consts::DCP_CONTAINER_TYPE || $p->plain === null) { + throw PcfDcpException::nestedContainer(); + } + $arena = $this->containerArena($containerUid); + $arena->addInner($p->partitionType, $partUid, $p->label, $p->plain, $p->dataHashAlgo, Chunker::whole()); + array_splice($this->parts, $pos, 1); + } + + // ---- container-level maintenance --------------------------------------- + + /** Re-chunk and deduplicate a container's inner partitions. */ + public function dedup(string $containerUid, Chunker $chunker): int + { + return $this->containerArena($containerUid)->dedup($chunker); + } + + /** Compact / defragment a container's arena. Returns bytes reclaimed. */ + public function defrag(string $containerUid): int + { + return $this->containerArena($containerUid)->compact(); + } + + // ---- serialisation ----------------------------------------------------- + + /** Build a fresh, canonical PCF image of the whole file. */ + public function toImage(): string + { + $cap = max(1, \count($this->parts)); + $storage = new MemoryStorage(); + $c = Container::createWith($storage, $cap, $this->tableHashAlgo); + foreach ($this->parts as $p) { + $data = $p->arena !== null ? $p->arena->toBytes() : $p->plain; + $c->addPartition($p->partitionType, $p->uid, $p->label, $data, 0, $p->dataHashAlgo); + } + if ($this->trailer) { + $c->finalizeWithTrailer(); + } + + return $storage->getContents(); + } +} diff --git a/implementations/php/pcf-dcp/src/ErrorKind.php b/implementations/php/pcf-dcp/src/ErrorKind.php new file mode 100644 index 0000000..fc621a0 --- /dev/null +++ b/implementations/php/pcf-dcp/src/ErrorKind.php @@ -0,0 +1,23 @@ +nextFragtableOffset) . \chr($this->fragmentCount & 0xFF); + } + + /** Parse from the on-disk 9-byte layout (optionally at an offset). */ + public static function fromBytes(string $b, int $offset = 0): self + { + return new self( + unpack('P', substr($b, $offset + 0, 8))[1], + \ord($b[$offset + 8]), + ); + } +} diff --git a/implementations/php/pcf-dcp/src/FragmentEntry.php b/implementations/php/pcf-dcp/src/FragmentEntry.php new file mode 100644 index 0000000..a1797eb --- /dev/null +++ b/implementations/php/pcf-dcp/src/FragmentEntry.php @@ -0,0 +1,49 @@ +extentOffset) + . pack('P', $this->extentLength) + . \chr($this->kind & 0xFF) + . \chr($this->flags & 0xFF); + } + + /** Parse from the on-disk 18-byte layout (optionally at an offset). */ + public static function fromBytes(string $b, int $offset = 0): self + { + return new self( + unpack('P', substr($b, $offset + 0, 8))[1], + unpack('P', substr($b, $offset + 8, 8))[1], + \ord($b[$offset + 16]), + \ord($b[$offset + 17]), + ); + } + + /** Whether this entry's kind is DATA. */ + public function isData(): bool + { + return $this->kind === Consts::KIND_DATA; + } + + /** Whether the SHARED flag (bit 0) is set. */ + public function isShared(): bool + { + return ($this->flags & Consts::FLAG_SHARED) !== 0; + } +} diff --git a/implementations/php/pcf-dcp/src/FragmentTable.php b/implementations/php/pcf-dcp/src/FragmentTable.php new file mode 100644 index 0000000..58744bf --- /dev/null +++ b/implementations/php/pcf-dcp/src/FragmentTable.php @@ -0,0 +1,72 @@ + $len) { + throw PcfDcpException::offsetOutOfRange(); + } + $h = FragTableHeader::fromBytes($arena, $off); + $eo = $off + Consts::FRAGTABLE_HEADER_SIZE; + for ($i = 0; $i < $h->fragmentCount; ++$i) { + if ($eo + Consts::FRAGMENT_ENTRY_SIZE > $len) { + throw PcfDcpException::offsetOutOfRange(); + } + $out[] = FragmentEntry::fromBytes($arena, $eo); + $eo += Consts::FRAGMENT_ENTRY_SIZE; + } + $off = $h->nextFragtableOffset; + } + + return $out; + } + + /** + * Reconstruct the logical content from Fragment Entries (spec Section 8.3): + * concatenate the bytes of the DATA extents in order. + * + * @param FragmentEntry[] $frags + */ + public static function reconstruct(string $arena, array $frags, int $arenaUsed): string + { + $len = \strlen($arena); + $out = ''; + foreach ($frags as $f) { + if (!$f->isData()) { + throw PcfDcpException::badFragmentKind($f->kind); + } + $end = $f->extentOffset + $f->extentLength; + if ($end > $arenaUsed || $end > $len) { + throw PcfDcpException::offsetOutOfRange(); + } + $out .= substr($arena, $f->extentOffset, $f->extentLength); + } + + return $out; + } +} diff --git a/implementations/php/pcf-dcp/src/InnerInfo.php b/implementations/php/pcf-dcp/src/InnerInfo.php new file mode 100644 index 0000000..d96240a --- /dev/null +++ b/implementations/php/pcf-dcp/src/InnerInfo.php @@ -0,0 +1,25 @@ +addInner(0x0000_0010, str_repeat("\xA1", 16), 'A', 'Hello, World!', HashAlgo::Sha256, Chunker::fixed(7)); + $arena->addInner(0x0000_0010, str_repeat("\xB2", 16), 'B', 'World!', HashAlgo::Sha256, Chunker::whole()); + + $w = new DcpWriter(); + $w->addContainer(str_repeat("\xDC", 16), 'dcp', $arena); + + return $w->toImage(); + } +} diff --git a/implementations/php/pcf-dcp/src/Resolved.php b/implementations/php/pcf-dcp/src/Resolved.php new file mode 100644 index 0000000..c0b9f13 --- /dev/null +++ b/implementations/php/pcf-dcp/src/Resolved.php @@ -0,0 +1,28 @@ +verify(); + $entries = $c->entries(); + self::assertCount(1, $entries); + self::assertSame(0xAAAC0001, $entries[0]->partitionType); + self::assertSame(465, $entries[0]->usedBytes); + } + + public function test_is_valid_dcp(): void + { + $r = DcpReader::open(new MemoryStorage(self::canonical())); + $r->verify(); + self::assertSame('Hello, World!', $r->readInner($this->fill(0xA1))); + self::assertSame('World!', $r->readInner($this->fill(0xB2))); + } +} diff --git a/implementations/php/pcf-dcp/tests/CoverageTest.php b/implementations/php/pcf-dcp/tests/CoverageTest.php new file mode 100644 index 0000000..10ece07 --- /dev/null +++ b/implementations/php/pcf-dcp/tests/CoverageTest.php @@ -0,0 +1,136 @@ +kind; + } + self::fail('expected a PcfDcpException'); + } + + public function test_rejects_bad_arena_magic(): void + { + $a = new Arena(); + $a->addInner(0x10, $this->fill(1), 'x', 'hi', HashAlgo::Sha256, Chunker::whole()); + $bytes = $a->toBytes(); + $bytes[0] = 'X'; + self::assertSame(ErrorKind::BadDcpMagic, $this->kindOf(fn () => Arena::parse($bytes))); + } + + public function test_rejects_unsupported_profile_major(): void + { + $a = new Arena(); + $a->addInner(0x10, $this->fill(1), 'x', 'hi', HashAlgo::Sha256, Chunker::whole()); + $bytes = $a->toBytes(); + $bytes[4] = "\x02"; + self::assertSame(ErrorKind::UnsupportedProfileMajor, $this->kindOf(fn () => Arena::parse($bytes))); + } + + public function test_rejects_reserved_nested_and_nil_uid(): void + { + $a = new Arena(); + self::assertSame(ErrorKind::ReservedType, + $this->kindOf(fn () => $a->addInner(0, $this->fill(1), 'x', '', HashAlgo::None, Chunker::whole()))); + self::assertSame(ErrorKind::NestedContainer, + $this->kindOf(fn () => $a->addInner(0xAAAC0001, $this->fill(1), 'x', '', HashAlgo::None, Chunker::whole()))); + self::assertSame(ErrorKind::NilUid, + $this->kindOf(fn () => $a->addInner(0x10, str_repeat("\x00", 16), 'x', '', HashAlgo::None, Chunker::whole()))); + } + + public function test_rejects_duplicate_uid_within_arena(): void + { + $a = new Arena(); + $a->addInner(0x10, $this->fill(1), 'x', 'a', HashAlgo::None, Chunker::whole()); + self::assertSame(ErrorKind::DuplicateUid, + $this->kindOf(fn () => $a->addInner(0x10, $this->fill(1), 'y', 'b', HashAlgo::None, Chunker::whole()))); + } + + public function test_rejects_bad_kind_and_out_of_range_extent(): void + { + self::assertSame(ErrorKind::BadFragmentKind, + $this->kindOf(fn () => FragmentTable::reconstruct(str_repeat("\x00", 64), [new FragmentEntry(24, 1, 2, 0)], 64))); + self::assertSame(ErrorKind::OffsetOutOfRange, + $this->kindOf(fn () => FragmentTable::reconstruct(str_repeat("\x00", 64), [new FragmentEntry(60, 100, 1, 0)], 64))); + } + + public function test_allows_empty_inner(): void + { + $a = new Arena(); + $a->addInner(0x10, $this->fill(1), 'empty', '', HashAlgo::Sha256, Chunker::whole()); + $info = $a->innerInfo($this->fill(1)); + self::assertSame(0, $info->usedBytes); + self::assertCount(0, $info->extents); + self::assertSame('', Arena::parse($a->toBytes())->content($this->fill(1))); + } + + public function test_chains_inner_table_beyond_255(): void + { + $a = new Arena(); + for ($i = 0; $i < 300; ++$i) { + $uid = str_repeat("\x00", 16); + $uid[0] = \chr($i & 0xFF); + $uid[1] = \chr(($i >> 8) & 0xFF); + $uid[15] = "\x01"; + $a->addInner(0x10, $uid, 'n', \chr($i & 0xFF) . \chr(($i >> 8) & 0xFF), HashAlgo::Sha256, Chunker::whole()); + } + self::assertSame(300, $a->count()); + self::assertSame(300, Arena::parse($a->toBytes())->count()); + + $w = new DcpWriter(); + $w->addContainer($this->fill(0xDC), 'big', $a); + DcpReader::open(new MemoryStorage($w->toImage()))->verify(); + } + + public function test_chains_fragment_table_beyond_255(): void + { + $a = new Arena(); + $distinct = ''; + for ($i = 0; $i < 300; ++$i) { + $distinct .= \chr($i & 0xFF); + } + $a->addInner(0x10, $this->fill(2), 'frag', $distinct, HashAlgo::Sha256, Chunker::fixed(1)); + $parsed = Arena::parse($a->toBytes()); + self::assertSame(bin2hex($distinct), bin2hex($parsed->content($this->fill(2)))); + } + + public function test_verify_detects_file_wide_uid_collision(): void + { + $a = new Arena(); + $a->addInner(0x10, $this->fill(0xB2), 'B', 'World!', HashAlgo::Sha256, Chunker::whole()); + $w = new DcpWriter(); + $w->addContainer($this->fill(0xDC), 'dcp', $a); + $w->addPlain(0x10, $this->fill(0xB2), 'dup', 'x', HashAlgo::Sha256); + $r = DcpReader::open(new MemoryStorage($w->toImage())); + self::assertSame(ErrorKind::DuplicateUid, $this->kindOf(fn () => $r->verify())); + } + + public function test_open_arena_rejects_non_dcp_partition(): void + { + $storage = new MemoryStorage(); + $c = Container::createWith($storage, 4, HashAlgo::Sha256); + $c->addPartition(0x10, $this->fill(7), 'plain', 'hi', 0, HashAlgo::Sha256); + $r = DcpReader::open($storage); + $entry = $r->entries()[0]; + self::assertSame(ErrorKind::NotADcpContainer, $this->kindOf(fn () => $r->openArena($entry))); + } +} diff --git a/implementations/php/pcf-dcp/tests/PcfDcpTestCase.php b/implementations/php/pcf-dcp/tests/PcfDcpTestCase.php new file mode 100644 index 0000000..fd80338 --- /dev/null +++ b/implementations/php/pcf-dcp/tests/PcfDcpTestCase.php @@ -0,0 +1,16 @@ +addInner(0x10, $this->fill(0xA1), 'A', 'Hello, World!', HashAlgo::Sha256, Chunker::fixed(7)); + $arena->addInner(0x10, $this->fill(0xB2), 'B', 'World!', HashAlgo::Sha256, Chunker::whole()); + $w = new DcpWriter(); + $w->addContainer($this->fill(0xDC), 'dcp', $arena); + + return $w->toImage(); + } + + public function test_edits_reconstruct_correctly(): void + { + $a = new Arena(); + $a->addInner(0x10, $this->fill(1), 'f', 'Hello, World!', HashAlgo::Sha256, Chunker::fixed(7)); + + $a->append($this->fill(1), '!!'); + self::assertSame('Hello, World!!!', $a->content($this->fill(1))); + + $a->insert($this->fill(1), 5, 'XYZ'); + self::assertSame('HelloXYZ, World!!!', $a->content($this->fill(1))); + + $a->delete($this->fill(1), 5, 3); + self::assertSame('Hello, World!!!', $a->content($this->fill(1))); + + $a->overwrite($this->fill(1), 0, 5, 'HOWDY'); + self::assertSame('HOWDY, World!!!', $a->content($this->fill(1))); + + $a->truncate($this->fill(1), 5); + self::assertSame('HOWDY', $a->content($this->fill(1))); + } + + public function test_copy_on_write_does_not_disturb_shared_bytes(): void + { + $a = new Arena(); + $a->addInner(0x10, $this->fill(0xA1), 'A', 'Hello, World!', HashAlgo::Sha256, Chunker::fixed(7)); + $a->addInner(0x10, $this->fill(0xB2), 'B', 'World!', HashAlgo::Sha256, Chunker::whole()); + $a->overwrite($this->fill(0xA1), 7, 6, 'PLANET'); + self::assertSame('Hello, PLANET', $a->content($this->fill(0xA1))); + self::assertSame('World!', $a->content($this->fill(0xB2))); + } + + public function test_dedup_then_defrag_preserve_content(): void + { + $a = new Arena(); + $a->addInner(0x10, $this->fill(1), 'A', 'abcabc', HashAlgo::Sha256, Chunker::whole()); + $a->addInner(0x10, $this->fill(2), 'B', 'abcabc', HashAlgo::Sha256, Chunker::whole()); + $h1 = $a->innerInfo($this->fill(1))->dataHash; + + $saved = $a->dedup(Chunker::fixed(3)); + self::assertGreaterThan(0, $saved); + self::assertSame('abcabc', $a->content($this->fill(1))); + self::assertSame('abcabc', $a->content($this->fill(2))); + self::assertSame(bin2hex($h1), bin2hex($a->innerInfo($this->fill(1))->dataHash)); + + $a->compact(); + self::assertSame('abcabc', $a->content($this->fill(2))); + } + + public function test_defrag_clears_shared_when_no_longer_aliased(): void + { + $a = new Arena(); + $a->addInner(0x10, $this->fill(0xA1), 'A', 'Hello, World!', HashAlgo::Sha256, Chunker::fixed(7)); + $a->addInner(0x10, $this->fill(0xB2), 'B', 'World!', HashAlgo::Sha256, Chunker::whole()); + $a->removeInner($this->fill(0xB2)); + $a->compact(); + foreach ($a->innerInfo($this->fill(0xA1))->extents as $e) { + self::assertFalse($e->shared); + } + self::assertSame('Hello, World!', $a->content($this->fill(0xA1))); + } + + public function test_promote_preserves_uid_and_data_hash(): void + { + $w = DcpWriter::open(new MemoryStorage($this->buildTwoInnerFile())); + $r0 = DcpReader::open(new MemoryStorage($w->toImage())); + $before = null; + foreach ($r0->innerPartitions() as $loc) { + if ($loc->info->uid === $this->fill(0xB2)) { + $before = $loc->info->dataHash; + } + } + self::assertNotNull($before); + + $w->promote($this->fill(0xDC), $this->fill(0xB2)); + $r = DcpReader::open(new MemoryStorage($w->toImage())); + $r->verify(); + $resolved = $r->resolveUid($this->fill(0xB2)); + self::assertTrue($resolved->isTopLevel); + self::assertSame(bin2hex($before), bin2hex($resolved->entry->dataHash)); + self::assertSame(6, $resolved->entry->usedBytes); + self::assertSame('Hello, World!', $r->readInner($this->fill(0xA1))); + } + + public function test_demote_then_promote_is_identity_for_content(): void + { + $w = DcpWriter::open(new MemoryStorage($this->buildTwoInnerFile())); + $w->promote($this->fill(0xDC), $this->fill(0xB2)); + $w->demote($this->fill(0xB2), $this->fill(0xDC)); + $r = DcpReader::open(new MemoryStorage($w->toImage())); + $r->verify(); + self::assertSame('World!', $r->readInner($this->fill(0xB2))); + self::assertFalse($r->resolveUid($this->fill(0xB2))->isTopLevel); + } + + public function test_trailer_mode_reads_back_identically(): void + { + $arena = new Arena(); + $arena->addInner(0x10, $this->fill(0xA1), 'A', 'Hello, World!', HashAlgo::Sha256, Chunker::fixed(7)); + $arena->addInner(0x10, $this->fill(0xB2), 'B', 'World!', HashAlgo::Sha256, Chunker::whole()); + $w = new DcpWriter(); + $w->addContainer($this->fill(0xDC), 'dcp', $arena); + $w->setTrailer(true); + $r = DcpReader::open(new MemoryStorage($w->toImage())); + $r->verify(); + self::assertSame('Hello, World!', $r->readInner($this->fill(0xA1))); + self::assertCount(2, $r->innerPartitions()); + } +} diff --git a/implementations/php/pcf-dcp/tests/SpecComplianceTest.php b/implementations/php/pcf-dcp/tests/SpecComplianceTest.php new file mode 100644 index 0000000..9f2319b --- /dev/null +++ b/implementations/php/pcf-dcp/tests/SpecComplianceTest.php @@ -0,0 +1,97 @@ +toBytes(); + self::assertSame('PDCP', substr($b, 0, 4)); + $parsed = DcpHeader::fromBytes($b); + self::assertSame(109, $parsed->innerTableOffset); + self::assertSame(465, $parsed->arenaUsed); + self::assertSame(1, $parsed->profileVersionMajor); + self::assertSame(0, $parsed->profileVersionMinor); + } + + public function test_fragment_records_round_trip(): void + { + $e = new FragmentEntry(31, 6, 1, 1); + $pe = FragmentEntry::fromBytes($e->toBytes()); + self::assertSame(31, $pe->extentOffset); + self::assertSame(6, $pe->extentLength); + self::assertSame(1, $pe->kind); + self::assertTrue($pe->isShared()); + + $fh = new FragTableHeader(0, 2); + $pfh = FragTableHeader::fromBytes($fh->toBytes()); + self::assertSame(0, $pfh->nextFragtableOffset); + self::assertSame(2, $pfh->fragmentCount); + } + + public function test_reconstruction_equals_logical_content(): void + { + $a = new Arena(); + $a->addInner(0x10, $this->fill(1), 'x', 'Hello, World!', HashAlgo::Sha256, Chunker::fixed(7)); + self::assertSame('Hello, World!', $a->content($this->fill(1))); + $info = $a->innerInfo($this->fill(1)); + self::assertSame(13, $info->usedBytes); + self::assertCount(2, $info->extents); + } + + public function test_data_hash_is_invariant_under_fragmentation(): void + { + $mk = function (Chunker $c): string { + $a = new Arena(); + $a->addInner(0x10, $this->fill(7), 'x', 'abcdefghij', HashAlgo::Sha256, $c); + + return bin2hex($a->innerInfo($this->fill(7))->dataHash); + }; + self::assertSame($mk(Chunker::whole()), $mk(Chunker::fixed(3))); + self::assertSame($mk(Chunker::whole()), bin2hex(HashAlgo::Sha256->compute('abcdefghij'))); + } + + public function test_dedup_sets_shared_on_all_aliases_rule_f1(): void + { + $a = new Arena(); + $a->addInner(0x10, $this->fill(0xA1), 'A', 'Hello, World!', HashAlgo::Sha256, Chunker::fixed(7)); + $a->addInner(0x10, $this->fill(0xB2), 'B', 'World!', HashAlgo::Sha256, Chunker::whole()); + + $ia = $a->innerInfo($this->fill(0xA1)); + $ib = $a->innerInfo($this->fill(0xB2)); + self::assertFalse($ia->extents[0]->shared); + self::assertTrue($ia->extents[1]->shared); + self::assertCount(1, $ib->extents); + self::assertTrue($ib->extents[0]->shared); + self::assertSame(bin2hex(HashAlgo::Sha256->compute('World!')), bin2hex($ib->dataHash)); + } + + public function test_parse_round_trips_canonical_arena_byte_exact(): void + { + $a = new Arena(); + $a->addInner(0x10, $this->fill(0xA1), 'A', 'Hello, World!', HashAlgo::Sha256, Chunker::fixed(7)); + $a->addInner(0x10, $this->fill(0xB2), 'B', 'World!', HashAlgo::Sha256, Chunker::whole()); + $bytes = $a->toBytes(); + self::assertSame(bin2hex($bytes), bin2hex(Arena::parse($bytes)->toBytes())); + } +} diff --git a/implementations/ts/.gitignore b/implementations/ts/.gitignore index c67ec45..985c387 100644 --- a/implementations/ts/.gitignore +++ b/implementations/ts/.gitignore @@ -8,6 +8,7 @@ coverage/ pcf_testvector.bin *.bin !pcf-sig/testdata/canonical.bin +!pcf-dcp/testdata/canonical.bin # --- editors --- .idea/ diff --git a/implementations/ts/package-lock.json b/implementations/ts/package-lock.json index 50b5138..9c5686d 100644 --- a/implementations/ts/package-lock.json +++ b/implementations/ts/package-lock.json @@ -8,7 +8,8 @@ "name": "@kduma-oss/implementations-ts", "workspaces": [ "pcf", - "pcf-sig" + "pcf-sig", + "pcf-dcp" ] }, "node_modules/@babel/helper-string-parser": { @@ -579,6 +580,10 @@ "resolved": "pcf", "link": true }, + "node_modules/@kduma-oss/pcf-dcp": { + "resolved": "pcf-dcp", + "link": true + }, "node_modules/@kduma-oss/pcf-sig": { "resolved": "pcf-sig", "link": true @@ -2079,6 +2084,24 @@ "node": ">=18" } }, + "pcf-dcp": { + "name": "@kduma-oss/pcf-dcp", + "version": "0.0.9", + "license": "MIT OR Apache-2.0", + "dependencies": { + "@kduma-oss/pcf": "^0.0.9" + }, + "devDependencies": { + "@types/node": "^22.19.19", + "@vitest/coverage-v8": "^4.1.8", + "tsx": "^4.19.0", + "typescript": "^5.6.0", + "vitest": "^4.1.8" + }, + "engines": { + "node": ">=18" + } + }, "pcf-sig": { "name": "@kduma-oss/pcf-sig", "version": "0.0.9", diff --git a/implementations/ts/package.json b/implementations/ts/package.json index 996a6e9..6c50d29 100644 --- a/implementations/ts/package.json +++ b/implementations/ts/package.json @@ -3,6 +3,7 @@ "private": true, "workspaces": [ "pcf", - "pcf-sig" + "pcf-sig", + "pcf-dcp" ] } diff --git a/implementations/ts/pcf-dcp/README.md b/implementations/ts/pcf-dcp/README.md new file mode 100644 index 0000000..e581099 --- /dev/null +++ b/implementations/ts/pcf-dcp/README.md @@ -0,0 +1,70 @@ +# @kduma-oss/pcf-dcp — PCF Dynamic Container Partition (TypeScript) + +TypeScript reader/writer for **PCF-DCP v1.0**, an application-level profile that +adds *dynamic*, fragmentable, dedup-friendly sub-partitions to the +[Partitioned Container Format](../pcf) without modifying the PCF byte container. + +This package mirrors the written specification (`PCF-DCP-spec-v1.0.txt`) and the +Rust reference implementation field-for-field, and ships the same byte-exact +700-byte canonical test vector as every other port. + +## Model at a glance + +One new PCF partition type is defined: + +| Type | Name | Holds | +|--------------|-----------------|----------------------------------------------------| +| `0xAAAC0001` | `DCP_CONTAINER` | An *arena*: a header, an inner partition table, fragment tables, and data extents | + +``` +arena: +[ DCP Header (24 B) | data extents | Fragment Tables | Inner Table Block(s) ] +``` + +Each inner partition's logical content is the concatenation of its DATA extents; +its `dataHash` covers that logical content, so fragmentation, deduplication, +compaction, and promotion all leave the hash (and any PCF-SIG signature over it) +unchanged. A generic PCF reader sees a DCP file as **one opaque partition**; only +a DCP-aware reader looks inside. + +## Example + +```ts +import { Arena, Chunker, DcpReader, DcpWriter, HashAlgo } from "@kduma-oss/pcf-dcp"; +import { MemoryStorage } from "@kduma-oss/pcf"; + +const arena = new Arena(); +arena.addInner(0x10, new Uint8Array(16).fill(0xa1), "A", + new TextEncoder().encode("Hello, World!"), HashAlgo.Sha256, Chunker.fixed(7)); +arena.addInner(0x10, new Uint8Array(16).fill(0xb2), "B", + new TextEncoder().encode("World!"), HashAlgo.Sha256, Chunker.whole()); + +const w = new DcpWriter(); +w.addContainer(new Uint8Array(16).fill(0xdc), "dcp", arena); +const image = w.toImage(); + +const r = DcpReader.open(new MemoryStorage(image)); +r.verify(); +new TextDecoder().decode(r.readInner(new Uint8Array(16).fill(0xb2))); // "World!" +``` + +## Operations + +`Arena` supports content-defined deduplication, copy-on-write edits +(`append` / `insert` / `overwrite` / `delete` / `truncate`), and +sharing-preserving `compact`. `DcpWriter` adds **promotion** (`promote`, +dynamic → fixed) and **demotion** (`demote`, fixed → dynamic), each preserving +`uid`, `partitionType`, `label`, `dataHashAlgo`, and `dataHash` — the promotion +invariant, identical to the fields a PCF-SIG signature protects. + +## Build & test + +``` +npm run build -w @kduma-oss/pcf # build the dependency first +npm test -w @kduma-oss/pcf-dcp +npm run gen-testvector -w @kduma-oss/pcf-dcp -- out.bin # the 700-byte vector +``` + +## Licence + +MIT OR Apache-2.0. diff --git a/implementations/ts/pcf-dcp/examples/gen-testvector.ts b/implementations/ts/pcf-dcp/examples/gen-testvector.ts new file mode 100644 index 0000000..7f76f2f --- /dev/null +++ b/implementations/ts/pcf-dcp/examples/gen-testvector.ts @@ -0,0 +1,40 @@ +/** + * Generates the canonical PCF-DCP v1.0 test-vector file (spec Section 17). Run + * with `npm run gen-testvector -- ` (defaults to + * ./pcf_dcp_testvector.bin). Everything is fixed and deterministic so that + * independent implementations can reproduce the file byte-for-byte. + */ + +import { writeFileSync } from "node:fs"; +import { createHash } from "node:crypto"; + +import { Container, MemoryStorage } from "@kduma-oss/pcf"; + +import { buildReferenceVector, DcpReader } from "../src/index.js"; + +const path = process.argv[2] ?? "pcf_dcp_testvector.bin"; + +const image = buildReferenceVector(); +writeFileSync(path, image); + +// It is a conforming PCF v1.0 file ... +const pcf = Container.open(new MemoryStorage(image)); +pcf.verify(); + +// ... and a conforming DCP file. +const dcp = DcpReader.open(new MemoryStorage(image)); +dcp.verify(); + +const digest = createHash("sha256").update(image).digest("hex"); +console.error(`wrote ${path} (${image.length} bytes)`); +console.error(`sha256 = ${digest}`); +for (const c of dcp.containers()) { + const arena = dcp.openArena(c); + console.error(` container used=${c.usedBytes} inners=${arena.len()}`); + for (const info of arena.innerInfos()) { + const shared = info.extents.filter((e) => e.shared).length; + console.error( + ` inner ${info.label} type=0x${info.partitionType.toString(16)} used=${info.usedBytes} extents=${info.extents.length} shared=${shared}`, + ); + } +} diff --git a/implementations/ts/pcf-dcp/package.json b/implementations/ts/pcf-dcp/package.json new file mode 100644 index 0000000..fea794d --- /dev/null +++ b/implementations/ts/pcf-dcp/package.json @@ -0,0 +1,60 @@ +{ + "name": "@kduma-oss/pcf-dcp", + "version": "0.0.9", + "description": "TypeScript implementation of PCF-DCP v1.0, the PCF Dynamic Container Partition profile", + "license": "MIT OR Apache-2.0", + "author": "Krystian Duma", + "type": "module", + "main": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js" + } + }, + "files": [ + "dist", + "src", + "README.md" + ], + "repository": { + "type": "git", + "url": "git+https://github.com/kduma-OSS/Partitioned-Container-Format.git", + "directory": "implementations/ts/pcf-dcp" + }, + "bugs": { + "url": "https://github.com/kduma-OSS/Partitioned-Container-Format/issues" + }, + "homepage": "https://github.com/kduma-OSS/Partitioned-Container-Format#readme", + "keywords": [ + "pcf", + "pcf-dcp", + "container", + "deduplication", + "fragmentation" + ], + "publishConfig": { + "access": "public" + }, + "engines": { + "node": ">=18" + }, + "scripts": { + "build": "tsc -p tsconfig.json", + "test": "vitest run", + "test:watch": "vitest", + "coverage": "vitest run --coverage", + "gen-testvector": "tsx examples/gen-testvector.ts" + }, + "dependencies": { + "@kduma-oss/pcf": "^0.0.9" + }, + "devDependencies": { + "@types/node": "^22.19.19", + "@vitest/coverage-v8": "^4.1.8", + "tsx": "^4.19.0", + "typescript": "^5.6.0", + "vitest": "^4.1.8" + } +} diff --git a/implementations/ts/pcf-dcp/src/arena.ts b/implementations/ts/pcf-dcp/src/arena.ts new file mode 100644 index 0000000..1e600a8 --- /dev/null +++ b/implementations/ts/pcf-dcp/src/arena.ts @@ -0,0 +1,823 @@ +/** + * The DCP arena: the in-memory model of one DCP container and its canonical + * byte serialisation. + * + * An {@link Arena} holds a byte pool (`blob`) plus a list of inner partitions, + * each owning a list of fragments. A fragment addresses a byte range in the + * pool; two fragments addressing the *same* range share that extent + * (deduplication, spec Section 10.2). Editing operations work purely on the + * fragment list and append new bytes to the pool, never overwriting bytes a + * `SHARED` extent still names (copy-on-write, spec Section 10.1). + * + * {@link Arena.toBytes} always emits the *canonical* layout of the spec's test + * vector (Section 17): `DCP Header || data extents || Fragment Tables || Inner + * Table Block(s)`, with each distinct extent emitted exactly once. + */ + +import { + computeHashField, + decodeLabel, + encodeLabel, + entryFromBytes, + entryToBytes, + ENTRY_SIZE, + HashAlgo, + NIL_UID, + type PartitionEntry, + TABLE_HEADER_SIZE, + tableHeaderFromBytes, + tableHeaderToBytes, + type TableBlockHeader, + computeTableHash, +} from "@kduma-oss/pcf"; + +import { + ARENA_NONE, + DCP_CONTAINER_TYPE, + DCP_HEADER_SIZE, + FLAG_SHARED, + FRAGMENT_ENTRY_SIZE, + FRAGTABLE_HEADER_SIZE, + KIND_DATA, + MAX_ENTRIES_PER_BLOCK, + PROFILE_VERSION_MAJOR, + PROFILE_VERSION_MINOR, +} from "./consts.js"; +import { PcfDcpError } from "./errors.js"; +import { + type DcpHeader, + dcpHeaderToBytes, + readHeader, +} from "./header.js"; +import { + fragTableHeaderToBytes, + fragmentEntryToBytes, + walkFragmentTable, +} from "./fragment.js"; + +/** + * How a Writer splits an inner partition's content into extents (spec Section + * 10.2; chunking is writer-side policy). + */ +export type Chunker = { type: "whole" } | { type: "fixed"; size: number }; + +/** Chunker constructors. */ +export const Chunker = { + /** One extent for the whole content. */ + whole(): Chunker { + return { type: "whole" }; + }, + /** Fixed-size chunks of `n` bytes (final chunk may be shorter; 0 = whole). */ + fixed(n: number): Chunker { + return { type: "fixed", size: n }; + }, +}; + +function splitChunks(chunker: Chunker, content: Uint8Array): Uint8Array[] { + if (content.length === 0) { + return []; + } + if (chunker.type === "whole" || chunker.size <= 0) { + return [content]; + } + const out: Uint8Array[] = []; + for (let i = 0; i < content.length; i += chunker.size) { + out.push(content.subarray(i, Math.min(i + chunker.size, content.length))); + } + return out; +} + +/** One extent reference inside an inner partition (`offset`/`length` → blob). */ +interface Frag { + offset: number; + length: number; + kind: number; + shared: boolean; +} + +/** One inner partition. */ +interface Inner { + partitionType: number; + uid: Uint8Array; + label: Uint8Array; + dataHashAlgo: HashAlgo; + frags: Frag[]; +} + +/** A read-only view of one extent, for tooling and tests. */ +export interface ExtentInfo { + extentOffset: number; + extentLength: number; + kind: number; + shared: boolean; +} + +/** A read-only view of one inner partition. */ +export interface InnerInfo { + partitionType: number; + uid: Uint8Array; + label: string; + usedBytes: number; + dataHashAlgo: HashAlgo; + dataHash: Uint8Array; + extents: ExtentInfo[]; +} + +function bytesEqual(a: Uint8Array, b: Uint8Array): boolean { + if (a.length !== b.length) { + return false; + } + for (let i = 0; i < a.length; i++) { + if (a[i] !== b[i]) { + return false; + } + } + return true; +} + +const extKey = (off: number, len: number): string => `${off}:${len}`; + +/** The in-memory model of one DCP container. */ +export class Arena { + private profileVersionMajor = PROFILE_VERSION_MAJOR; + private profileVersionMinor = PROFILE_VERSION_MINOR; + private flags = 0; + private innerTableAlgo: HashAlgo = HashAlgo.Sha256; + private blob = new Uint8Array(0); + private blobLen = 0; + private inners: Inner[] = []; + + /** Choose the hash algorithm used for inner Table Blocks (default SHA-256). */ + withInnerTableAlgo(algo: HashAlgo): this { + this.innerTableAlgo = algo; + return this; + } + + // ---- byte pool --------------------------------------------------------- + + private appendBlob(data: Uint8Array): number { + const start = this.blobLen; + const end = start + data.length; + if (end > this.blob.length) { + let cap = this.blob.length === 0 ? 64 : this.blob.length; + while (cap < end) { + cap *= 2; + } + const next = new Uint8Array(cap); + next.set(this.blob.subarray(0, this.blobLen), 0); + this.blob = next; + } + this.blob.set(data, start); + this.blobLen = end; + return start; + } + + private blobSlice(off: number, len: number): Uint8Array { + return this.blob.subarray(off, off + len); + } + + // ---- parsing ----------------------------------------------------------- + + /** Parse an arena from its on-disk bytes (spec Sections 6–8). */ + static parse(bytes: Uint8Array): Arena { + const header = readHeader(bytes); + if (header.profileVersionMajor !== PROFILE_VERSION_MAJOR) { + throw PcfDcpError.unsupportedProfileMajor(header.profileVersionMajor); + } + const arenaUsed = header.arenaUsed; + + const arena = new Arena(); + arena.profileVersionMajor = header.profileVersionMajor; + arena.profileVersionMinor = header.profileVersionMinor; + arena.flags = header.flags; + arena.blob = bytes.slice(); + arena.blobLen = bytes.length; + + let firstBlock = true; + let off = header.innerTableOffset; + let budget = Math.floor(bytes.length / TABLE_HEADER_SIZE) + 1; + while (off !== ARENA_NONE) { + if (budget === 0) { + throw PcfDcpError.offsetOutOfRange(); + } + budget -= 1; + if (off + TABLE_HEADER_SIZE > bytes.length) { + throw PcfDcpError.offsetOutOfRange(); + } + const h = tableHeaderFromBytes(bytes.subarray(off, off + TABLE_HEADER_SIZE)); + if (firstBlock) { + arena.innerTableAlgo = h.tableHashAlgo; + firstBlock = false; + } + for (let i = 0; i < h.partitionCount; i++) { + const eo = off + TABLE_HEADER_SIZE + i * ENTRY_SIZE; + if (eo + ENTRY_SIZE > bytes.length) { + throw PcfDcpError.offsetOutOfRange(); + } + const entry = entryFromBytes(bytes.subarray(eo, eo + ENTRY_SIZE)); + const onDisk = walkFragmentTable(bytes, Number(entry.startOffset)); + const frags: Frag[] = onDisk.map((fe) => ({ + offset: fe.extentOffset, + length: fe.extentLength, + kind: fe.kind, + shared: (fe.flags & FLAG_SHARED) !== 0, + })); + arena.inners.push({ + partitionType: entry.partitionType, + uid: entry.uid, + label: entry.label, + dataHashAlgo: entry.dataHashAlgo, + frags, + }); + } + off = Number(h.nextTableOffset); + } + + // Bound every extent by the declared arena_used. + for (const inner of arena.inners) { + for (const f of inner.frags) { + if (f.offset + f.length > arenaUsed) { + throw PcfDcpError.offsetOutOfRange(); + } + } + } + return arena; + } + + // ---- read-only views --------------------------------------------------- + + /** Number of inner partitions. */ + len(): number { + return this.inners.length; + } + + /** Whether the arena has no inner partitions. */ + isEmpty(): boolean { + return this.inners.length === 0; + } + + /** The uids of all inner partitions, in stored order. */ + uids(): Uint8Array[] { + return this.inners.map((i) => i.uid.slice()); + } + + private indexOf(uid: Uint8Array): number { + const i = this.inners.findIndex((inner) => bytesEqual(inner.uid, uid)); + if (i < 0) { + throw PcfDcpError.notFound(); + } + return i; + } + + private innerLogicalLen(inner: Inner): number { + let total = 0; + for (const f of inner.frags) { + if (f.kind === KIND_DATA) { + total += f.length; + } + } + return total; + } + + private innerContent(inner: Inner): Uint8Array { + const out = new Uint8Array(this.innerLogicalLen(inner)); + let p = 0; + for (const f of inner.frags) { + if (f.kind === KIND_DATA) { + out.set(this.blobSlice(f.offset, f.length), p); + p += f.length; + } + } + return out; + } + + private innerDataHash(inner: Inner): Uint8Array { + return computeHashField(inner.dataHashAlgo, this.innerContent(inner)); + } + + private view(inner: Inner): InnerInfo { + return { + partitionType: inner.partitionType, + uid: inner.uid.slice(), + label: decodeLabel(inner.label), + usedBytes: this.innerLogicalLen(inner), + dataHashAlgo: inner.dataHashAlgo, + dataHash: this.innerDataHash(inner), + extents: inner.frags.map((f) => ({ + extentOffset: f.offset, + extentLength: f.length, + kind: f.kind, + shared: f.shared, + })), + }; + } + + /** A read-only view of one inner partition. */ + innerInfo(uid: Uint8Array): InnerInfo { + return this.view(this.inners[this.indexOf(uid)]!); + } + + /** Read-only views of every inner partition, in stored order. */ + innerInfos(): InnerInfo[] { + return this.inners.map((i) => this.view(i)); + } + + /** Reconstruct an inner partition's logical content (spec Section 8.3). */ + content(uid: Uint8Array): Uint8Array { + const inner = this.inners[this.indexOf(uid)]!; + const bytes = this.innerContent(inner); + const declared = this.innerLogicalLen(inner); + if (bytes.length !== declared) { + throw PcfDcpError.lengthMismatch(declared, bytes.length); + } + return bytes; + } + + // ---- builder ----------------------------------------------------------- + + /** + * Add an inner partition whose `content` is split by `chunker` into extents, + * deduplicating against extents already present (spec Section 10.2). + */ + addInner( + partitionType: number, + uid: Uint8Array, + label: string, + content: Uint8Array, + dataHashAlgo: HashAlgo, + chunker: Chunker, + ): void { + if ((partitionType >>> 0) === 0) { + throw PcfDcpError.reservedType(); + } + if ((partitionType >>> 0) === DCP_CONTAINER_TYPE) { + throw PcfDcpError.nestedContainer(); + } + if (bytesEqual(uid, NIL_UID)) { + throw PcfDcpError.nilUid(); + } + if (this.inners.some((i) => bytesEqual(i.uid, uid))) { + throw PcfDcpError.duplicateUid(); + } + const labelBytes = encodeLabel(label); + + const frags: Frag[] = []; + for (const chunk of splitChunks(chunker, content)) { + const hit = + this.findExtent(chunk) ?? this.findLocal(frags, chunk); + if (hit) { + const [offset, length] = hit; + this.markShared(offset, length); + for (const f of frags) { + if (f.offset === offset && f.length === length) { + f.shared = true; + } + } + frags.push({ offset, length, kind: KIND_DATA, shared: true }); + } else { + const offset = this.appendBlob(chunk); + frags.push({ offset, length: chunk.length, kind: KIND_DATA, shared: false }); + } + } + this.inners.push({ + partitionType: partitionType >>> 0, + uid: uid.slice(), + label: labelBytes, + dataHashAlgo, + frags, + }); + } + + private findExtent(chunk: Uint8Array): [number, number] | undefined { + if (chunk.length === 0) { + return undefined; + } + for (const inner of this.inners) { + for (const f of inner.frags) { + if ( + f.kind === KIND_DATA && + f.length === chunk.length && + bytesEqual(this.blobSlice(f.offset, f.length), chunk) + ) { + return [f.offset, f.length]; + } + } + } + return undefined; + } + + private findLocal(frags: readonly Frag[], chunk: Uint8Array): [number, number] | undefined { + if (chunk.length === 0) { + return undefined; + } + for (const f of frags) { + if ( + f.kind === KIND_DATA && + f.length === chunk.length && + bytesEqual(this.blobSlice(f.offset, f.length), chunk) + ) { + return [f.offset, f.length]; + } + } + return undefined; + } + + private markShared(offset: number, length: number): void { + for (const inner of this.inners) { + for (const f of inner.frags) { + if (f.offset === offset && f.length === length) { + f.shared = true; + } + } + } + } + + // ---- logical edits (copy-on-write) ------------------------------------- + + /** Append `bytes` to the end of an inner partition's logical content. */ + append(uid: Uint8Array, bytes: Uint8Array): void { + const idx = this.indexOf(uid); + if (bytes.length === 0) { + return; + } + const offset = this.appendBlob(bytes); + this.inners[idx]!.frags.push({ + offset, + length: bytes.length, + kind: KIND_DATA, + shared: false, + }); + } + + /** Overwrite the logical range `[pos, pos+len)` with `bytes`. */ + overwrite(uid: Uint8Array, pos: number, len: number, bytes: Uint8Array): void { + this.delete(uid, pos, len); + this.insert(uid, pos, bytes); + } + + /** Insert `bytes` at logical position `pos` (`pos == length` appends). */ + insert(uid: Uint8Array, pos: number, bytes: Uint8Array): void { + const idx = this.indexOf(uid); + const total = this.innerLogicalLen(this.inners[idx]!); + if (pos > total) { + throw PcfDcpError.positionOutOfRange(); + } + if (bytes.length === 0) { + return; + } + const split = this.splitAt(idx, pos); + const offset = this.appendBlob(bytes); + this.inners[idx]!.frags.splice(split, 0, { + offset, + length: bytes.length, + kind: KIND_DATA, + shared: false, + }); + } + + /** Delete the logical range `[pos, pos+len)`. */ + delete(uid: Uint8Array, pos: number, len: number): void { + const idx = this.indexOf(uid); + const total = this.innerLogicalLen(this.inners[idx]!); + const end = pos + len; + if (end > total) { + throw PcfDcpError.positionOutOfRange(); + } + if (len === 0) { + return; + } + const lo = this.splitAt(idx, pos); + const hi = this.splitAt(idx, end); + this.inners[idx]!.frags.splice(lo, hi - lo); + } + + /** Truncate the partition's logical content to `newLen` bytes. */ + truncate(uid: Uint8Array, newLen: number): void { + const idx = this.indexOf(uid); + const total = this.innerLogicalLen(this.inners[idx]!); + if (newLen > total) { + throw PcfDcpError.positionOutOfRange(); + } + const cut = this.splitAt(idx, newLen); + this.inners[idx]!.frags.length = cut; + } + + /** + * Ensure a fragment boundary exists at logical position `pos` in inner `idx`, + * splitting the straddling fragment if needed. Splitting copies no bytes. + */ + private splitAt(idx: number, pos: number): number { + const frags = this.inners[idx]!.frags; + let logical = 0; + let i = 0; + while (i < frags.length) { + const f = frags[i]!; + const flen = f.length; + if (logical === pos) { + return i; + } + if (pos < logical + flen) { + const head = pos - logical; + const left: Frag = { offset: f.offset, length: head, kind: f.kind, shared: f.shared }; + const right: Frag = { + offset: f.offset + head, + length: flen - head, + kind: f.kind, + shared: f.shared, + }; + frags[i] = left; + frags.splice(i + 1, 0, right); + return i + 1; + } + logical += flen; + i += 1; + } + return frags.length; + } + + // ---- promotion support ------------------------------------------------- + + /** + * Remove an inner partition, returning the pieces a promotion needs: its + * type, label, hash algorithm, and reconstructed logical content. + */ + removeInner(uid: Uint8Array): { + partitionType: number; + label: string; + dataHashAlgo: HashAlgo; + content: Uint8Array; + } { + const idx = this.indexOf(uid); + const content = this.content(uid); + const inner = this.inners.splice(idx, 1)[0]!; + return { + partitionType: inner.partitionType, + label: decodeLabel(inner.label), + dataHashAlgo: inner.dataHashAlgo, + content, + }; + } + + // ---- deduplication and compaction -------------------------------------- + + /** + * Re-chunk every inner partition with `chunker` and deduplicate identical + * extents across the whole arena (spec Section 10.2). Returns the estimated + * number of bytes the pool shrank by once re-serialised. + */ + dedup(chunker: Chunker): number { + const before = this.canonicalExtentBytes(); + const rebuilt = new Arena(); + rebuilt.profileVersionMajor = this.profileVersionMajor; + rebuilt.profileVersionMinor = this.profileVersionMinor; + rebuilt.flags = this.flags; + rebuilt.innerTableAlgo = this.innerTableAlgo; + for (const inner of this.inners) { + rebuilt.addInner( + inner.partitionType, + inner.uid, + decodeLabel(inner.label), + this.innerContent(inner), + inner.dataHashAlgo, + chunker, + ); + } + this.adopt(rebuilt); + const after = this.canonicalExtentBytes(); + return Math.max(0, before - after); + } + + /** + * Compact the arena (spec Section 10.3): drop unreferenced pool bytes and + * normalise the SHARED flag, clearing it on any extent now referenced exactly + * once (rule F2). Returns the number of dead pool bytes reclaimed. + */ + compact(): number { + const refcount = new Map(); + for (const inner of this.inners) { + for (const f of inner.frags) { + const k = extKey(f.offset, f.length); + refcount.set(k, (refcount.get(k) ?? 0) + 1); + } + } + for (const inner of this.inners) { + for (const f of inner.frags) { + if ((refcount.get(extKey(f.offset, f.length)) ?? 0) <= 1) { + f.shared = false; + } + } + } + let liveBytes = 0; + for (const [k] of refcount) { + liveBytes += Number(k.split(":")[1]); + } + const deadBefore = Math.max(0, this.blobLen - liveBytes); + + const newPool = new Arena(); + newPool.innerTableAlgo = this.innerTableAlgo; + const remap = new Map(); + for (const inner of this.inners) { + for (const f of inner.frags) { + const k = extKey(f.offset, f.length); + if (!remap.has(k)) { + remap.set(k, newPool.appendBlob(this.blobSlice(f.offset, f.length))); + } + } + } + for (const inner of this.inners) { + for (const f of inner.frags) { + f.offset = remap.get(extKey(f.offset, f.length))!; + } + } + this.blob = newPool.blob; + this.blobLen = newPool.blobLen; + return deadBefore; + } + + private canonicalExtentBytes(): number { + const seen = new Set(); + let total = 0; + for (const inner of this.inners) { + for (const f of inner.frags) { + const k = extKey(f.offset, f.length); + if (!seen.has(k)) { + seen.add(k); + total += f.length; + } + } + } + return total; + } + + private adopt(other: Arena): void { + this.profileVersionMajor = other.profileVersionMajor; + this.profileVersionMinor = other.profileVersionMinor; + this.flags = other.flags; + this.innerTableAlgo = other.innerTableAlgo; + this.blob = other.blob; + this.blobLen = other.blobLen; + this.inners = other.inners; + } + + // ---- canonical serialisation ------------------------------------------- + + /** Serialise the arena into its canonical on-disk layout (spec Section 17). */ + toBytes(): Uint8Array { + // 1. distinct extents, first-reference order + const extOrder: Array<[number, number]> = []; + const extIndex = new Map(); + for (const inner of this.inners) { + for (const f of inner.frags) { + const k = extKey(f.offset, f.length); + if (!extIndex.has(k)) { + extIndex.set(k, extOrder.length); + extOrder.push([f.offset, f.length]); + } + } + } + + // 2. lay out extents right after the header + let cur = DCP_HEADER_SIZE; + const extArenaOff: number[] = []; + for (const [, len] of extOrder) { + extArenaOff.push(cur); + cur += len; + } + + // 3. Fragment Tables (one chain per inner) + const fragOff: number[] = []; + for (const inner of this.inners) { + fragOff.push(cur); + cur += fragtableSpan(inner.frags.length); + } + + // 4. Inner Table Block(s) + const innerTableOffset = cur; + const counts = blockCounts(this.inners.length); + const blockOff: number[] = []; + for (const c of counts) { + blockOff.push(cur); + cur += TABLE_HEADER_SIZE + c * ENTRY_SIZE; + } + const arenaUsed = cur; + + const buf = new Uint8Array(arenaUsed); + + const header: DcpHeader = { + profileVersionMajor: this.profileVersionMajor, + profileVersionMinor: this.profileVersionMinor, + flags: this.flags, + innerTableOffset, + arenaUsed, + }; + buf.set(dcpHeaderToBytes(header), 0); + + for (let i = 0; i < extOrder.length; i++) { + const [boff, len] = extOrder[i]!; + buf.set(this.blobSlice(boff, len), extArenaOff[i]!); + } + + for (let ii = 0; ii < this.inners.length; ii++) { + writeFragmentTable(buf, fragOff[ii]!, this.inners[ii]!.frags, extIndex, extArenaOff); + } + + const entries: PartitionEntry[] = this.inners.map((inner, ii) => { + const used = this.innerLogicalLen(inner); + return { + partitionType: inner.partitionType, + uid: inner.uid.slice(), + label: inner.label.slice(), + startOffset: BigInt(fragOff[ii]!), + maxLength: BigInt(used), + usedBytes: BigInt(used), + dataHashAlgo: inner.dataHashAlgo, + dataHash: this.innerDataHash(inner), + }; + }); + + let idx = 0; + for (let b = 0; b < counts.length; b++) { + const c = counts[b]!; + const next = b + 1 < counts.length ? blockOff[b + 1]! : 0; + const slice = entries.slice(idx, idx + c); + const th = computeTableHash(this.innerTableAlgo, BigInt(next), slice); + const bh: TableBlockHeader = { + partitionCount: c, + nextTableOffset: BigInt(next), + tableHashAlgo: this.innerTableAlgo, + tableHash: th, + }; + let p = blockOff[b]!; + buf.set(tableHeaderToBytes(bh), p); + p += TABLE_HEADER_SIZE; + for (const e of slice) { + buf.set(entryToBytes(e), p); + p += ENTRY_SIZE; + } + idx += c; + } + + return buf; + } +} + +/** Span of an inner partition's Fragment Table chain holding `n` extents. */ +function fragtableSpan(n: number): number { + let span = 0; + for (const c of blockCounts(n)) { + span += FRAGTABLE_HEADER_SIZE + c * FRAGMENT_ENTRY_SIZE; + } + return span; +} + +/** Split `n` items into blocks of at most 255 (always at least one block). */ +function blockCounts(n: number): number[] { + if (n === 0) { + return [0]; + } + const out: number[] = []; + let rem = n; + while (rem > 0) { + const c = Math.min(rem, MAX_ENTRIES_PER_BLOCK); + out.push(c); + rem -= c; + } + return out; +} + +/** Write one inner partition's Fragment Table chain at `start`. */ +function writeFragmentTable( + buf: Uint8Array, + start: number, + frags: readonly Frag[], + extIndex: Map, + extArenaOff: number[], +): void { + const counts = blockCounts(frags.length); + let blockStart = start; + let idx = 0; + for (let b = 0; b < counts.length; b++) { + const c = counts[b]!; + const span = FRAGTABLE_HEADER_SIZE + c * FRAGMENT_ENTRY_SIZE; + const next = b + 1 < counts.length ? blockStart + span : 0; + buf.set( + fragTableHeaderToBytes({ nextFragtableOffset: next, fragmentCount: c }), + blockStart, + ); + for (let j = 0; j < c; j++) { + const f = frags[idx + j]!; + const arenaOff = extArenaOff[extIndex.get(extKey(f.offset, f.length))!]!; + buf.set( + fragmentEntryToBytes({ + extentOffset: arenaOff, + extentLength: f.length, + kind: f.kind, + flags: f.shared ? FLAG_SHARED : 0, + }), + blockStart + FRAGTABLE_HEADER_SIZE + j * FRAGMENT_ENTRY_SIZE, + ); + } + blockStart += span; + idx += c; + } +} diff --git a/implementations/ts/pcf-dcp/src/consts.ts b/implementations/ts/pcf-dcp/src/consts.ts new file mode 100644 index 0000000..2d2361b --- /dev/null +++ b/implementations/ts/pcf-dcp/src/consts.ts @@ -0,0 +1,47 @@ +/** + * On-disk constants defined by PCF-DCP v1.0. + * + * Every value here is normative and corresponds directly to a figure in the + * specification (`PCF-DCP-spec-v1.0.txt`, Appendix A and B). + */ + +/** PCF partition type carrying one DCP arena (spec Appendix B). */ +export const DCP_CONTAINER_TYPE = 0xaaac_0001; + +/** First value of the block reserved by this profile for future types. */ +export const DCP_TYPE_RESERVED_LO = 0xaaac_0000; +/** Last value of the block reserved by this profile. */ +export const DCP_TYPE_RESERVED_HI = 0xaaac_00ff; + +/** 4-byte magic at the start of a DCP arena (spec Section 6): `"PDCP"`. */ +export const DCP_MAGIC: Uint8Array = new Uint8Array([0x50, 0x44, 0x43, 0x50]); + +/** PCF-DCP profile version implemented by this library (major). */ +export const PROFILE_VERSION_MAJOR = 1; +/** PCF-DCP profile version implemented by this library (minor). */ +export const PROFILE_VERSION_MINOR = 0; + +/** Fixed size of the DCP Header, in bytes (spec Section 6). */ +export const DCP_HEADER_SIZE = 24; +/** Fixed size of a Fragment Table block header, in bytes (spec Section 8.1). */ +export const FRAGTABLE_HEADER_SIZE = 9; +/** Fixed size of one Fragment Entry, in bytes (spec Section 8.2). */ +export const FRAGMENT_ENTRY_SIZE = 18; + +/** Fragment Entry kind: RESERVED / INVALID guard. */ +export const KIND_INVALID = 0; +/** Fragment Entry kind: DATA — literal content bytes (only kind in v1.0). */ +export const KIND_DATA = 1; +/** Fragment Entry kind: HOLE (RESERVED). */ +export const KIND_HOLE = 2; +/** Fragment Entry kind: REF (RESERVED). */ +export const KIND_REF = 3; + +/** Fragment Entry `flags` bit 0: SHARED — no in-place overwrite (copy-on-write). */ +export const FLAG_SHARED = 0x01; + +/** The arena-relative offset value reserved as "none" / chain terminator. */ +export const ARENA_NONE = 0; + +/** Maximum entries per (inner) Table Block and extents per Fragment Table block. */ +export const MAX_ENTRIES_PER_BLOCK = 255; diff --git a/implementations/ts/pcf-dcp/src/errors.ts b/implementations/ts/pcf-dcp/src/errors.ts new file mode 100644 index 0000000..23f1165 --- /dev/null +++ b/implementations/ts/pcf-dcp/src/errors.ts @@ -0,0 +1,126 @@ +/** + * Error type shared across the library (mirrors the reference `Error` enum). + */ + +/** Discriminant identifying which kind of {@link PcfDcpError} occurred. */ +export enum PcfDcpErrorKind { + /** The arena did not begin with the `"PDCP"` magic (spec Section 6). */ + BadDcpMagic = "BadDcpMagic", + /** The arena's `profile_version_major` is not implemented by this library. */ + UnsupportedProfileMajor = "UnsupportedProfileMajor", + /** A Fragment Entry carried an unsupported `kind` (HOLE/REF/unknown). */ + BadFragmentKind = "BadFragmentKind", + /** An extent range escapes `[0, arena_used)`. */ + OffsetOutOfRange = "OffsetOutOfRange", + /** Reconstructed length did not match `used_bytes`, or a hash did not verify. */ + LengthMismatch = "LengthMismatch", + /** A stored hash (inner `table_hash` or inner `data_hash`) did not verify. */ + HashMismatch = "HashMismatch", + /** No inner (or top-level) partition with the requested uid. */ + NotFound = "NotFound", + /** A uid is used by more than one partition file-wide (spec Section 2.1). */ + DuplicateUid = "DuplicateUid", + /** An inner partition is itself a DCP container (nesting forbidden in v1.0). */ + NestedContainer = "NestedContainer", + /** A partition uid is the PCF NIL uid. */ + NilUid = "NilUid", + /** A partition type is the PCF reserved type `0x00000000`. */ + ReservedType = "ReservedType", + /** A top-level partition expected to be a DCP container is not one. */ + NotADcpContainer = "NotADcpContainer", + /** A logical edit addressed a position beyond the partition's content. */ + PositionOutOfRange = "PositionOutOfRange", +} + +/** All ways a PCF-DCP operation can fail. */ +export class PcfDcpError extends Error { + /** The kind of failure. */ + readonly kind: PcfDcpErrorKind; + /** Optional numeric detail (e.g. the unsupported major or bad fragment kind). */ + readonly value?: number; + + constructor(kind: PcfDcpErrorKind, message: string, value?: number) { + super(message); + this.name = "PcfDcpError"; + this.kind = kind; + if (value !== undefined) { + this.value = value; + } + Object.setPrototypeOf(this, PcfDcpError.prototype); + } + + static badDcpMagic(): PcfDcpError { + return new PcfDcpError( + PcfDcpErrorKind.BadDcpMagic, + 'arena does not begin with "PDCP" magic', + ); + } + static unsupportedProfileMajor(v: number): PcfDcpError { + return new PcfDcpError( + PcfDcpErrorKind.UnsupportedProfileMajor, + `unsupported PCF-DCP profile major version ${v}`, + v, + ); + } + static badFragmentKind(k: number): PcfDcpError { + return new PcfDcpError( + PcfDcpErrorKind.BadFragmentKind, + `unsupported fragment kind ${k}`, + k, + ); + } + static offsetOutOfRange(): PcfDcpError { + return new PcfDcpError( + PcfDcpErrorKind.OffsetOutOfRange, + "extent range escapes the arena", + ); + } + static lengthMismatch(expected: number, got: number): PcfDcpError { + return new PcfDcpError( + PcfDcpErrorKind.LengthMismatch, + `logical length mismatch: expected ${expected}, got ${got}`, + ); + } + static hashMismatch(): PcfDcpError { + return new PcfDcpError( + PcfDcpErrorKind.HashMismatch, + "stored hash does not verify", + ); + } + static notFound(): PcfDcpError { + return new PcfDcpError(PcfDcpErrorKind.NotFound, "no partition with that uid"); + } + static duplicateUid(): PcfDcpError { + return new PcfDcpError( + PcfDcpErrorKind.DuplicateUid, + "uid is not unique file-wide", + ); + } + static nestedContainer(): PcfDcpError { + return new PcfDcpError( + PcfDcpErrorKind.NestedContainer, + "an inner partition may not be a DCP container", + ); + } + static nilUid(): PcfDcpError { + return new PcfDcpError(PcfDcpErrorKind.NilUid, "uid is the NIL uid"); + } + static reservedType(): PcfDcpError { + return new PcfDcpError( + PcfDcpErrorKind.ReservedType, + "partition type is the reserved type 0x00000000", + ); + } + static notADcpContainer(): PcfDcpError { + return new PcfDcpError( + PcfDcpErrorKind.NotADcpContainer, + "partition is not a DCP container", + ); + } + static positionOutOfRange(): PcfDcpError { + return new PcfDcpError( + PcfDcpErrorKind.PositionOutOfRange, + "logical position is past end of content", + ); + } +} diff --git a/implementations/ts/pcf-dcp/src/fragment.ts b/implementations/ts/pcf-dcp/src/fragment.ts new file mode 100644 index 0000000..d476408 --- /dev/null +++ b/implementations/ts/pcf-dcp/src/fragment.ts @@ -0,0 +1,147 @@ +/** + * The Fragment Table: its 9-byte block header and 18-byte entries + * (spec Section 8). + */ + +import { + ARENA_NONE, + FLAG_SHARED, + FRAGMENT_ENTRY_SIZE, + FRAGTABLE_HEADER_SIZE, + KIND_DATA, +} from "./consts.js"; +import { PcfDcpError } from "./errors.js"; + +/** One Fragment Entry: a single extent of an inner partition (spec 8.2). */ +export interface FragmentEntry { + /** Arena-relative start of the extent's bytes. */ + extentOffset: number; + /** Length of the extent in bytes. */ + extentLength: number; + /** Extent kind (`1` = DATA; `0` invalid; `2`/`3` reserved). */ + kind: number; + /** `flags` byte (bit 0 = SHARED; others reserved 0). */ + flags: number; +} + +/** Serialise a Fragment Entry to its on-disk 18-byte layout. */ +export function fragmentEntryToBytes(e: FragmentEntry): Uint8Array { + const b = new Uint8Array(FRAGMENT_ENTRY_SIZE); + const view = new DataView(b.buffer); + view.setBigUint64(0, BigInt(e.extentOffset), true); + view.setBigUint64(8, BigInt(e.extentLength), true); + b[16] = e.kind & 0xff; + b[17] = e.flags & 0xff; + return b; +} + +/** Parse a Fragment Entry from its on-disk 18-byte layout. */ +export function fragmentEntryFromBytes(b: Uint8Array): FragmentEntry { + const view = new DataView(b.buffer, b.byteOffset, b.byteLength); + return { + extentOffset: Number(view.getBigUint64(0, true)), + extentLength: Number(view.getBigUint64(8, true)), + kind: b[16]!, + flags: b[17]!, + }; +} + +/** Whether a Fragment Entry's `kind` is DATA. */ +export function isData(e: FragmentEntry): boolean { + return e.kind === KIND_DATA; +} + +/** Whether the SHARED flag (bit 0) is set on a Fragment Entry. */ +export function isShared(e: FragmentEntry): boolean { + return (e.flags & FLAG_SHARED) !== 0; +} + +/** The 9-byte header that begins each Fragment Table block (spec 8.1). */ +export interface FragTableHeader { + /** Arena-relative offset of the next block of this partition, or 0. */ + nextFragtableOffset: number; + /** Number of Fragment Entries packed immediately after this header. */ + fragmentCount: number; +} + +/** Serialise a Fragment Table block header to its on-disk 9-byte layout. */ +export function fragTableHeaderToBytes(h: FragTableHeader): Uint8Array { + const b = new Uint8Array(FRAGTABLE_HEADER_SIZE); + const view = new DataView(b.buffer); + view.setBigUint64(0, BigInt(h.nextFragtableOffset), true); + b[8] = h.fragmentCount & 0xff; + return b; +} + +/** Parse a Fragment Table block header from its on-disk 9-byte layout. */ +export function fragTableHeaderFromBytes(b: Uint8Array): FragTableHeader { + const view = new DataView(b.buffer, b.byteOffset, b.byteLength); + return { + nextFragtableOffset: Number(view.getBigUint64(0, true)), + fragmentCount: b[8]!, + }; +} + +/** + * Walk an inner partition's Fragment Table chain starting at arena-relative + * `firstOff`, returning its Fragment Entries in logical order (spec 8.3). + */ +export function walkFragmentTable( + arena: Uint8Array, + firstOff: number, +): FragmentEntry[] { + const out: FragmentEntry[] = []; + let off = firstOff; + let budget = Math.floor(arena.length / FRAGTABLE_HEADER_SIZE) + 1; + while (off !== ARENA_NONE) { + if (budget === 0) { + throw PcfDcpError.offsetOutOfRange(); + } + budget -= 1; + if (off + FRAGTABLE_HEADER_SIZE > arena.length) { + throw PcfDcpError.offsetOutOfRange(); + } + const h = fragTableHeaderFromBytes( + arena.subarray(off, off + FRAGTABLE_HEADER_SIZE), + ); + let eo = off + FRAGTABLE_HEADER_SIZE; + for (let i = 0; i < h.fragmentCount; i++) { + if (eo + FRAGMENT_ENTRY_SIZE > arena.length) { + throw PcfDcpError.offsetOutOfRange(); + } + out.push(fragmentEntryFromBytes(arena.subarray(eo, eo + FRAGMENT_ENTRY_SIZE))); + eo += FRAGMENT_ENTRY_SIZE; + } + off = h.nextFragtableOffset; + } + return out; +} + +/** + * Reconstruct the logical content of a partition from its Fragment Entries + * (spec Section 8.3): concatenate the bytes of its DATA extents in order. + */ +export function reconstruct( + arena: Uint8Array, + frags: readonly FragmentEntry[], + arenaUsed: number, +): Uint8Array { + let total = 0; + for (const f of frags) { + if (!isData(f)) { + throw PcfDcpError.badFragmentKind(f.kind); + } + const end = f.extentOffset + f.extentLength; + if (end > arenaUsed || end > arena.length) { + throw PcfDcpError.offsetOutOfRange(); + } + total += f.extentLength; + } + const out = new Uint8Array(total); + let p = 0; + for (const f of frags) { + out.set(arena.subarray(f.extentOffset, f.extentOffset + f.extentLength), p); + p += f.extentLength; + } + return out; +} diff --git a/implementations/ts/pcf-dcp/src/header.ts b/implementations/ts/pcf-dcp/src/header.ts new file mode 100644 index 0000000..89bca84 --- /dev/null +++ b/implementations/ts/pcf-dcp/src/header.ts @@ -0,0 +1,58 @@ +/** + * The fixed 24-byte DCP Header at arena offset 0 (spec Section 6). + */ + +import { DCP_HEADER_SIZE, DCP_MAGIC } from "./consts.js"; +import { PcfDcpError } from "./errors.js"; + +/** Parsed DCP Header. All offsets it carries are arena-relative. */ +export interface DcpHeader { + /** PCF-DCP profile major version. */ + profileVersionMajor: number; + /** PCF-DCP profile minor version. */ + profileVersionMinor: number; + /** Reserved; MUST be 0 in v1.0. */ + flags: number; + /** Arena-relative offset of the first Inner Table Block (0 = none). */ + innerTableOffset: number; + /** Bump pointer: arena-relative offset of the first free byte. */ + arenaUsed: number; +} + +/** Serialise a DCP Header to its on-disk 24-byte layout. */ +export function dcpHeaderToBytes(h: DcpHeader): Uint8Array { + const b = new Uint8Array(DCP_HEADER_SIZE); + const view = new DataView(b.buffer); + b.set(DCP_MAGIC, 0); + b[4] = h.profileVersionMajor & 0xff; + b[5] = h.profileVersionMinor & 0xff; + view.setUint16(6, h.flags & 0xffff, true); + view.setBigUint64(8, BigInt(h.innerTableOffset), true); + view.setBigUint64(16, BigInt(h.arenaUsed), true); + return b; +} + +/** Parse a DCP Header from its on-disk 24-byte layout, validating the magic. */ +export function dcpHeaderFromBytes(b: Uint8Array): DcpHeader { + for (let i = 0; i < 4; i++) { + if (b[i] !== DCP_MAGIC[i]) { + throw PcfDcpError.badDcpMagic(); + } + } + const view = new DataView(b.buffer, b.byteOffset, b.byteLength); + return { + profileVersionMajor: b[4]!, + profileVersionMinor: b[5]!, + flags: view.getUint16(6, true), + innerTableOffset: Number(view.getBigUint64(8, true)), + arenaUsed: Number(view.getBigUint64(16, true)), + }; +} + +/** Read a DCP Header from the start of an arena byte slice. */ +export function readHeader(arena: Uint8Array): DcpHeader { + if (arena.length < DCP_HEADER_SIZE) { + throw PcfDcpError.badDcpMagic(); + } + return dcpHeaderFromBytes(arena.subarray(0, DCP_HEADER_SIZE)); +} diff --git a/implementations/ts/pcf-dcp/src/index.ts b/implementations/ts/pcf-dcp/src/index.ts new file mode 100644 index 0000000..f383da2 --- /dev/null +++ b/implementations/ts/pcf-dcp/src/index.ts @@ -0,0 +1,75 @@ +/** + * # `pcf-dcp` — PCF Dynamic Container Partition (TypeScript implementation) + * + * Adds *dynamic*, fragmentable, dedup-friendly sub-partitions to the + * {@link "@kduma-oss/pcf"} container without changing its byte format. One new + * PCF partition type is defined: + * + * * **`DCP_CONTAINER`** (type `0xAAAC0001`) — a partition whose bytes are an + * *arena*: a {@link DcpHeader}, a chain of reused PCF Table Blocks listing + * *inner* partitions, a Fragment Table per inner partition, and the data + * extents those fragments name. + * + * Each inner partition's logical content is the concatenation of its DATA + * extents (spec Section 8.3); its `dataHash` covers that logical content, so + * fragmentation, deduplication, compaction, and promotion all leave the hash + * (and any PCF-SIG signature over it) unchanged. A generic PCF reader sees a + * DCP file as one opaque, typed partition; only a DCP-aware reader looks inside. + * + * ## Example + * + * ```ts + * import { Arena, Chunker, DcpReader, DcpWriter } from "@kduma-oss/pcf-dcp"; + * import { HashAlgo, MemoryStorage } from "@kduma-oss/pcf"; + * + * const arena = new Arena(); + * arena.addInner(0x10, new Uint8Array(16).fill(0xa1), "A", + * new TextEncoder().encode("Hello, World!"), HashAlgo.Sha256, Chunker.fixed(7)); + * arena.addInner(0x10, new Uint8Array(16).fill(0xb2), "B", + * new TextEncoder().encode("World!"), HashAlgo.Sha256, Chunker.whole()); + * + * const w = new DcpWriter(); + * w.addContainer(new Uint8Array(16).fill(0xdc), "dcp", arena); + * const image = w.toImage(); + * + * const r = DcpReader.open(new MemoryStorage(image)); + * r.verify(); + * // r.readInner(new Uint8Array(16).fill(0xb2)) === "World!" + * ``` + */ + +export * from "./consts.js"; +export { PcfDcpError, PcfDcpErrorKind } from "./errors.js"; +export { + type DcpHeader, + dcpHeaderToBytes, + dcpHeaderFromBytes, +} from "./header.js"; +export { + type FragmentEntry, + type FragTableHeader, + fragmentEntryToBytes, + fragmentEntryFromBytes, + fragTableHeaderToBytes, + fragTableHeaderFromBytes, + isData, + isShared, + walkFragmentTable, + reconstruct, +} from "./fragment.js"; +export { + Arena, + Chunker, + type ExtentInfo, + type InnerInfo, +} from "./arena.js"; +export { + DcpReader, + type InnerLocation, + type Resolved, +} from "./reader.js"; +export { DcpWriter } from "./writer.js"; +export { buildReferenceVector } from "./vector.js"; + +// Re-export the underlying hash registry for convenience. +export { HashAlgo } from "@kduma-oss/pcf"; diff --git a/implementations/ts/pcf-dcp/src/reader.ts b/implementations/ts/pcf-dcp/src/reader.ts new file mode 100644 index 0000000..bfe8a54 --- /dev/null +++ b/implementations/ts/pcf-dcp/src/reader.ts @@ -0,0 +1,209 @@ +/** + * {@link DcpReader}: reading DCP containers from a PCF file. + * + * The reader works entirely through the high-level {@link Container} API, so a + * DCP file written in trailer mode (append-only host) reads back transparently + * — this code never assumes the header's `partitionTableOffset` is a real + * offset (spec Section 2, "Compatibility with the PCF File Trailer"). + */ + +import { + computeTableHash, + Container, + digestLen, + entryFromBytes, + ENTRY_SIZE, + type PartitionEntry, + type Storage, + TABLE_HEADER_SIZE, + tableHeaderFromBytes, + verifies, + verifyHashField, +} from "@kduma-oss/pcf"; + +import { Arena, type InnerInfo } from "./arena.js"; +import { DCP_CONTAINER_TYPE } from "./consts.js"; +import { PcfDcpError } from "./errors.js"; +import { readHeader } from "./header.js"; + +/** An inner partition together with the container that holds it. */ +export interface InnerLocation { + /** uid of the enclosing DCP container partition. */ + containerUid: Uint8Array; + /** The inner partition's metadata and extents. */ + info: InnerInfo; +} + +/** The result of resolving a uid against top-level ∪ inner (spec 2.1). */ +export type Resolved = + | { kind: "top-level"; entry: PartitionEntry } + | { kind: "inner"; location: InnerLocation }; + +function bytesEqual(a: Uint8Array, b: Uint8Array): boolean { + if (a.length !== b.length) { + return false; + } + for (let i = 0; i < a.length; i++) { + if (a[i] !== b[i]) { + return false; + } + } + return true; +} + +/** A reader for DCP containers layered over a PCF file. */ +export class DcpReader { + private c: Container; + + private constructor(c: Container) { + this.c = c; + } + + /** Open a PCF file for DCP-aware reading. */ + static open(storage: Storage): DcpReader { + return new DcpReader(Container.open(storage)); + } + + /** Borrow the underlying PCF container. */ + container(): Container { + return this.c; + } + + /** All top-level entries, in chain order. */ + entries(): PartitionEntry[] { + return this.c.entries(); + } + + /** The top-level DCP container entries. */ + containers(): PartitionEntry[] { + return this.c.entries().filter((e) => e.partitionType === DCP_CONTAINER_TYPE); + } + + /** Parse the arena of a DCP container entry. */ + openArena(entry: PartitionEntry): Arena { + if (entry.partitionType !== DCP_CONTAINER_TYPE) { + throw PcfDcpError.notADcpContainer(); + } + return Arena.parse(this.c.readPartitionData(entry)); + } + + /** Every inner partition across every DCP container, in file order. */ + innerPartitions(): InnerLocation[] { + const out: InnerLocation[] = []; + for (const cont of this.containers()) { + const arena = this.openArena(cont); + for (const info of arena.innerInfos()) { + out.push({ containerUid: cont.uid.slice(), info }); + } + } + return out; + } + + /** Resolve a uid against the flattened set top-level ∪ inner (spec 2.1). */ + resolveUid(uid: Uint8Array): Resolved { + const top = this.c.entries().find((e) => bytesEqual(e.uid, uid)); + if (top) { + return { kind: "top-level", entry: top }; + } + for (const loc of this.innerPartitions()) { + if (bytesEqual(loc.info.uid, uid)) { + return { kind: "inner", location: loc }; + } + } + throw PcfDcpError.notFound(); + } + + /** Reconstruct an inner partition's logical content by uid. */ + readInner(uid: Uint8Array): Uint8Array { + for (const cont of this.containers()) { + const arena = this.openArena(cont); + if (arena.uids().some((u) => bytesEqual(u, uid))) { + return arena.content(uid); + } + } + throw PcfDcpError.notFound(); + } + + /** + * Full DCP-aware verification: PCF integrity, each inner Table Block's + * `table_hash`, reconstruction length and (when algorithmic) `data_hash`, no + * nested container, and file-wide uid uniqueness. + */ + verify(): void { + this.c.verify(); + + const seen = new Set(); + const seenHex = (uid: Uint8Array): string => + Array.from(uid, (b) => b.toString(16).padStart(2, "0")).join(""); + for (const e of this.c.entries()) { + const k = seenHex(e.uid); + if (seen.has(k)) { + throw PcfDcpError.duplicateUid(); + } + seen.add(k); + } + + for (const cont of this.containers()) { + const data = this.c.readPartitionData(cont); + verifyInnerTableHashes(data); + + const arena = Arena.parse(data); + for (const info of arena.innerInfos()) { + if (info.partitionType === DCP_CONTAINER_TYPE) { + throw PcfDcpError.nestedContainer(); + } + const k = seenHex(info.uid); + if (seen.has(k)) { + throw PcfDcpError.duplicateUid(); + } + seen.add(k); + + const content = arena.content(info.uid); + if (content.length !== info.usedBytes) { + throw PcfDcpError.lengthMismatch(info.usedBytes, content.length); + } + if (!verifyHashField(info.dataHashAlgo, content, info.dataHash)) { + throw PcfDcpError.hashMismatch(); + } + } + } + } +} + +/** + * Walk the inner Table Block chain in an arena and recompute each block's + * `table_hash`, exactly as PCF does (spec Section 9.2). + */ +function verifyInnerTableHashes(arena: Uint8Array): void { + const header = readHeader(arena); + let off = header.innerTableOffset; + let budget = Math.floor(arena.length / TABLE_HEADER_SIZE) + 1; + while (off !== 0) { + if (budget === 0) { + throw PcfDcpError.offsetOutOfRange(); + } + budget -= 1; + if (off + TABLE_HEADER_SIZE > arena.length) { + throw PcfDcpError.offsetOutOfRange(); + } + const h = tableHeaderFromBytes(arena.subarray(off, off + TABLE_HEADER_SIZE)); + const entries: PartitionEntry[] = []; + for (let i = 0; i < h.partitionCount; i++) { + const eo = off + TABLE_HEADER_SIZE + i * ENTRY_SIZE; + if (eo + ENTRY_SIZE > arena.length) { + throw PcfDcpError.offsetOutOfRange(); + } + entries.push(entryFromBytes(arena.subarray(eo, eo + ENTRY_SIZE))); + } + if (verifies(h.tableHashAlgo)) { + const computed = computeTableHash(h.tableHashAlgo, h.nextTableOffset, entries); + const n = digestLen(h.tableHashAlgo); + for (let i = 0; i < n; i++) { + if (computed[i] !== h.tableHash[i]) { + throw PcfDcpError.hashMismatch(); + } + } + } + off = Number(h.nextTableOffset); + } +} diff --git a/implementations/ts/pcf-dcp/src/vector.ts b/implementations/ts/pcf-dcp/src/vector.ts new file mode 100644 index 0000000..11822af --- /dev/null +++ b/implementations/ts/pcf-dcp/src/vector.ts @@ -0,0 +1,43 @@ +/** + * The canonical PCF-DCP v1.0 test vector (spec Section 17). + */ + +import { HashAlgo } from "@kduma-oss/pcf"; + +import { Arena, Chunker } from "./arena.js"; +import { DcpWriter } from "./writer.js"; + +/** + * Build the byte-exact 700-byte reference file from spec Section 17. + * + * The file is one DCP container ("dcp", uid 16×0xDC, unsealed) holding two + * inner partitions: **A** ("Hello, World!", 13 B) stored as two extents — + * `"Hello, "` (7 B, private) and `"World!"` (6 B, shared) — via fixed-7 + * chunking; and **B** ("World!", 6 B) stored as one extent that deduplicates + * onto A's second extent (both SHARED). Building the same logical container and + * emitting the canonical layout MUST reproduce these exact bytes. + */ +export function buildReferenceVector(): Uint8Array { + const enc = new TextEncoder(); + const arena = new Arena(); + arena.addInner( + 0x0000_0010, + new Uint8Array(16).fill(0xa1), + "A", + enc.encode("Hello, World!"), + HashAlgo.Sha256, + Chunker.fixed(7), + ); + arena.addInner( + 0x0000_0010, + new Uint8Array(16).fill(0xb2), + "B", + enc.encode("World!"), + HashAlgo.Sha256, + Chunker.whole(), + ); + + const w = new DcpWriter(); + w.addContainer(new Uint8Array(16).fill(0xdc), "dcp", arena); + return w.toImage(); +} diff --git a/implementations/ts/pcf-dcp/src/writer.ts b/implementations/ts/pcf-dcp/src/writer.ts new file mode 100644 index 0000000..4588ea9 --- /dev/null +++ b/implementations/ts/pcf-dcp/src/writer.ts @@ -0,0 +1,202 @@ +/** + * {@link DcpWriter}: building and rewriting PCF files that carry DCP containers. + * + * The writer keeps the whole file as an in-memory list of top-level partitions + * (plain partitions and DCP containers) and emits a fresh, canonical PCF image + * on demand. Every mutating operation is a logical edit of that list followed + * by a rebuild — deliberately simple and always correct for a reference + * implementation; the resulting file is a fully conforming PCF v1.0 file. + */ + +import { + Container, + decodeLabel, + HashAlgo, + MemoryStorage, + type Storage, +} from "@kduma-oss/pcf"; + +import { Arena, type Chunker } from "./arena.js"; +import { DCP_CONTAINER_TYPE } from "./consts.js"; +import { PcfDcpError } from "./errors.js"; + +type Body = + | { kind: "plain"; data: Uint8Array } + | { kind: "container"; arena: Arena }; + +interface TopPart { + partitionType: number; + uid: Uint8Array; + label: string; + dataHashAlgo: HashAlgo; + body: Body; +} + +function bytesEqual(a: Uint8Array, b: Uint8Array): boolean { + if (a.length !== b.length) { + return false; + } + for (let i = 0; i < a.length; i++) { + if (a[i] !== b[i]) { + return false; + } + } + return true; +} + +/** A writer that assembles a PCF file containing DCP containers. */ +export class DcpWriter { + private parts: TopPart[] = []; + private tableHashAlgo: HashAlgo = HashAlgo.Sha256; + private trailer = false; + + /** Load an existing PCF file into the writer's model. */ + static open(storage: Storage): DcpWriter { + const c = Container.open(storage); + const w = new DcpWriter(); + for (const e of c.entries()) { + const data = c.readPartitionData(e); + const label = decodeLabel(e.label); + const body: Body = + e.partitionType === DCP_CONTAINER_TYPE + ? { kind: "container", arena: Arena.parse(data) } + : { kind: "plain", data }; + w.parts.push({ + partitionType: e.partitionType, + uid: e.uid.slice(), + label, + dataHashAlgo: e.dataHashAlgo, + body, + }); + } + return w; + } + + /** Finalise emitted images in trailer mode (append-only host). */ + setTrailer(on: boolean): void { + this.trailer = on; + } + + private ensureUnique(uid: Uint8Array): void { + if (this.parts.some((p) => bytesEqual(p.uid, uid))) { + throw PcfDcpError.duplicateUid(); + } + } + + /** Add a DCP container partition holding `arena` (data hash algo 0). */ + addContainer(uid: Uint8Array, label: string, arena: Arena): void { + this.ensureUnique(uid); + this.parts.push({ + partitionType: DCP_CONTAINER_TYPE, + uid: uid.slice(), + label, + dataHashAlgo: HashAlgo.None, + body: { kind: "container", arena }, + }); + } + + /** Add an ordinary top-level partition. */ + addPlain( + partitionType: number, + uid: Uint8Array, + label: string, + data: Uint8Array, + dataHashAlgo: HashAlgo, + ): void { + this.ensureUnique(uid); + this.parts.push({ + partitionType: partitionType >>> 0, + uid: uid.slice(), + label, + dataHashAlgo, + body: { kind: "plain", data }, + }); + } + + private containerArena(uid: Uint8Array): Arena { + for (const p of this.parts) { + if (bytesEqual(p.uid, uid)) { + if (p.body.kind !== "container") { + throw PcfDcpError.notADcpContainer(); + } + return p.body.arena; + } + } + throw PcfDcpError.notFound(); + } + + /** Borrow a container's arena for inspection or in-place editing. */ + arena(containerUid: Uint8Array): Arena { + return this.containerArena(containerUid); + } + + // ---- migration: promotion / demotion ----------------------------------- + + /** + * Promote an inner partition out of its DCP container to a top-level PCF + * partition (dynamic → fixed), preserving uid, type, label, hash algorithm + * and `data_hash` (the promotion invariant, spec Section 10.4). + */ + promote(containerUid: Uint8Array, innerUid: Uint8Array): void { + const arena = this.containerArena(containerUid); + const { partitionType, label, dataHashAlgo, content } = arena.removeInner(innerUid); + this.parts.push({ + partitionType, + uid: innerUid.slice(), + label, + dataHashAlgo, + body: { kind: "plain", data: content }, + }); + } + + /** + * Demote a top-level partition into a DCP container as an inner partition + * (fixed → dynamic), preserving uid, type, label, hash algorithm and + * `data_hash`. The content becomes a single DATA extent. + */ + demote(partUid: Uint8Array, containerUid: Uint8Array): void { + const pos = this.parts.findIndex((p) => bytesEqual(p.uid, partUid)); + if (pos < 0) { + throw PcfDcpError.notFound(); + } + const p = this.parts[pos]!; + if (p.partitionType === DCP_CONTAINER_TYPE || p.body.kind !== "plain") { + throw PcfDcpError.nestedContainer(); + } + const { partitionType, label, dataHashAlgo, body } = p; + const content = body.data; + const arena = this.containerArena(containerUid); + arena.addInner(partitionType, partUid, label, content, dataHashAlgo, { + type: "whole", + }); + this.parts.splice(pos, 1); + } + + // ---- container-level maintenance --------------------------------------- + + /** Re-chunk and deduplicate a container's inner partitions. */ + dedup(containerUid: Uint8Array, chunker: Chunker): number { + return this.containerArena(containerUid).dedup(chunker); + } + + /** Compact / defragment a container's arena. Returns bytes reclaimed. */ + defrag(containerUid: Uint8Array): number { + return this.containerArena(containerUid).compact(); + } + + // ---- serialisation ----------------------------------------------------- + + /** Build a fresh, canonical PCF image of the whole file. */ + toImage(): Uint8Array { + const cap = Math.max(1, this.parts.length); + const c = Container.createWith(new MemoryStorage(), cap, this.tableHashAlgo); + for (const p of this.parts) { + const data = p.body.kind === "plain" ? p.body.data : p.body.arena.toBytes(); + c.addPartition(p.partitionType, p.uid, p.label, data, 0, p.dataHashAlgo); + } + if (this.trailer) { + c.finalizeWithTrailer(); + } + return (c.intoStorage() as MemoryStorage).toUint8Array(); + } +} diff --git a/implementations/ts/pcf-dcp/test/canonical-vector.test.ts b/implementations/ts/pcf-dcp/test/canonical-vector.test.ts new file mode 100644 index 0000000..5d3d8e7 --- /dev/null +++ b/implementations/ts/pcf-dcp/test/canonical-vector.test.ts @@ -0,0 +1,56 @@ +/** + * Cross-port test vector parity. The same 700-byte canonical container is + * shipped by every PCF-DCP language port (spec Section 17). This test: + * + * 1. Loads the file from disk and asserts SHA-256 + byte-exact regeneration. + * 2. Opens it as a PCF container and verifies the PCF cascade. + * 3. Opens it as a DCP container and verifies it end-to-end. + */ + +import { readFileSync } from "node:fs"; +import { fileURLToPath } from "node:url"; +import { dirname, resolve } from "node:path"; + +import { describe, expect, it } from "vitest"; +import { Container, MemoryStorage } from "@kduma-oss/pcf"; + +import { buildReferenceVector, DcpReader } from "../src/index.js"; +import { dec, fill, hex, sha256hex } from "./helpers.js"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const CANONICAL = new Uint8Array( + readFileSync(resolve(__dirname, "..", "testdata", "canonical.bin")), +); + +const EXPECTED_SHA256 = + "b9bb59794abed008863063886d8d0daa810c44939c1c5d29449475ced8156b90"; + +describe("canonical test vector", () => { + it("ships the expected SHA-256 and length", () => { + expect(CANONICAL.length).toBe(700); + expect(sha256hex(CANONICAL)).toBe(EXPECTED_SHA256); + }); + + it("regenerates byte-exact", () => { + const image = buildReferenceVector(); + expect(image.length).toBe(700); + expect(sha256hex(image)).toBe(EXPECTED_SHA256); + expect(hex(image)).toBe(hex(CANONICAL)); + }); + + it("is a valid PCF file", () => { + const c = Container.open(new MemoryStorage(CANONICAL)); + c.verify(); + const entries = c.entries(); + expect(entries).toHaveLength(1); + expect(entries[0]!.partitionType).toBe(0xaaac_0001); + expect(Number(entries[0]!.usedBytes)).toBe(465); + }); + + it("is a valid DCP file with reconstructable inners", () => { + const r = DcpReader.open(new MemoryStorage(CANONICAL)); + r.verify(); + expect(dec(r.readInner(fill(0xa1)))).toBe("Hello, World!"); + expect(dec(r.readInner(fill(0xb2)))).toBe("World!"); + }); +}); diff --git a/implementations/ts/pcf-dcp/test/coverage.test.ts b/implementations/ts/pcf-dcp/test/coverage.test.ts new file mode 100644 index 0000000..50f376e --- /dev/null +++ b/implementations/ts/pcf-dcp/test/coverage.test.ts @@ -0,0 +1,127 @@ +/** + * Error paths and edge cases (spec Sections 8, 13). + */ + +import { describe, expect, it } from "vitest"; +import { Container, HashAlgo, MemoryStorage } from "@kduma-oss/pcf"; + +import { + Arena, + Chunker, + DcpReader, + DcpWriter, + PcfDcpErrorKind, + reconstruct, +} from "../src/index.js"; +import { enc, expectKind, fill } from "./helpers.js"; + +describe("coverage / error paths", () => { + it("rejects a bad arena magic", () => { + const a = new Arena(); + a.addInner(0x10, fill(1), "x", enc("hi"), HashAlgo.Sha256, Chunker.whole()); + const bytes = a.toBytes(); + bytes[0] = 0x58; // 'X' + expectKind(() => Arena.parse(bytes), PcfDcpErrorKind.BadDcpMagic); + }); + + it("rejects an unsupported profile major", () => { + const a = new Arena(); + a.addInner(0x10, fill(1), "x", enc("hi"), HashAlgo.Sha256, Chunker.whole()); + const bytes = a.toBytes(); + bytes[4] = 2; + expectKind(() => Arena.parse(bytes), PcfDcpErrorKind.UnsupportedProfileMajor); + }); + + it("rejects reserved type, nested container and NIL uid", () => { + const a = new Arena(); + expectKind( + () => a.addInner(0, fill(1), "x", enc(""), HashAlgo.None, Chunker.whole()), + PcfDcpErrorKind.ReservedType, + ); + expectKind( + () => a.addInner(0xaaac_0001, fill(1), "x", enc(""), HashAlgo.None, Chunker.whole()), + PcfDcpErrorKind.NestedContainer, + ); + expectKind( + () => a.addInner(0x10, new Uint8Array(16), "x", enc(""), HashAlgo.None, Chunker.whole()), + PcfDcpErrorKind.NilUid, + ); + }); + + it("rejects a duplicate uid within an arena", () => { + const a = new Arena(); + a.addInner(0x10, fill(1), "x", enc("a"), HashAlgo.None, Chunker.whole()); + expectKind( + () => a.addInner(0x10, fill(1), "y", enc("b"), HashAlgo.None, Chunker.whole()), + PcfDcpErrorKind.DuplicateUid, + ); + }); + + it("rejects a reserved fragment kind and out-of-range extent", () => { + expectKind( + () => reconstruct(new Uint8Array(64), [{ extentOffset: 24, extentLength: 1, kind: 2, flags: 0 }], 64), + PcfDcpErrorKind.BadFragmentKind, + ); + expectKind( + () => reconstruct(new Uint8Array(64), [{ extentOffset: 60, extentLength: 100, kind: 1, flags: 0 }], 64), + PcfDcpErrorKind.OffsetOutOfRange, + ); + }); + + it("allows an empty inner partition", () => { + const a = new Arena(); + a.addInner(0x10, fill(1), "empty", enc(""), HashAlgo.Sha256, Chunker.whole()); + const info = a.innerInfo(fill(1)); + expect(info.usedBytes).toBe(0); + expect(info.extents).toHaveLength(0); + const parsed = Arena.parse(a.toBytes()); + expect(parsed.content(fill(1))).toHaveLength(0); + }); + + it("chains the inner table beyond 255 partitions", () => { + const a = new Arena(); + for (let i = 0; i < 300; i++) { + const uid = new Uint8Array(16); + uid[0] = i & 0xff; + uid[1] = (i >> 8) & 0xff; + uid[15] = 1; + const data = new Uint8Array([i & 0xff, (i >> 8) & 0xff]); + a.addInner(0x10, uid, "n", data, HashAlgo.Sha256, Chunker.whole()); + } + expect(a.len()).toBe(300); + const parsed = Arena.parse(a.toBytes()); + expect(parsed.len()).toBe(300); + + const w = new DcpWriter(); + w.addContainer(fill(0xdc), "big", a); + const r = DcpReader.open(new MemoryStorage(w.toImage())); + r.verify(); + }); + + it("chains the fragment table beyond 255 extents", () => { + const a = new Arena(); + const distinct = new Uint8Array(300); + for (let i = 0; i < 300; i++) distinct[i] = i & 0xff; + a.addInner(0x10, fill(2), "frag", distinct, HashAlgo.Sha256, Chunker.fixed(1)); + const parsed = Arena.parse(a.toBytes()); + expect(parsed.content(fill(2))).toEqual(distinct); + }); + + it("verify detects a file-wide uid collision", () => { + const a = new Arena(); + a.addInner(0x10, fill(0xb2), "B", enc("World!"), HashAlgo.Sha256, Chunker.whole()); + const w = new DcpWriter(); + w.addContainer(fill(0xdc), "dcp", a); + w.addPlain(0x10, fill(0xb2), "dup", enc("x"), HashAlgo.Sha256); + const r = DcpReader.open(new MemoryStorage(w.toImage())); + expectKind(() => r.verify(), PcfDcpErrorKind.DuplicateUid); + }); + + it("openArena rejects a non-DCP partition", () => { + const c = Container.createWith(new MemoryStorage(), 4, HashAlgo.Sha256); + c.addPartition(0x10, fill(7), "plain", enc("hi"), 0, HashAlgo.Sha256); + const r = DcpReader.open(c.intoStorage()); + const entry = r.entries()[0]!; + expectKind(() => r.openArena(entry), PcfDcpErrorKind.NotADcpContainer); + }); +}); diff --git a/implementations/ts/pcf-dcp/test/helpers.ts b/implementations/ts/pcf-dcp/test/helpers.ts new file mode 100644 index 0000000..b124c96 --- /dev/null +++ b/implementations/ts/pcf-dcp/test/helpers.ts @@ -0,0 +1,42 @@ +import { createHash } from "node:crypto"; + +import { expect } from "vitest"; + +import { PcfDcpError, type PcfDcpErrorKind } from "../src/index.js"; + +/** A 16-byte uid all equal to `b`. */ +export function fill(b: number): Uint8Array { + return new Uint8Array(16).fill(b); +} + +/** ASCII/UTF-8 string to bytes. */ +export function enc(s: string): Uint8Array { + return new TextEncoder().encode(s); +} + +/** Bytes to string. */ +export function dec(b: Uint8Array): string { + return new TextDecoder().decode(b); +} + +/** Lowercase hex of a byte array. */ +export function hex(b: Uint8Array): string { + return Array.from(b, (x) => x.toString(16).padStart(2, "0")).join(""); +} + +/** SHA-256 of `b` as lowercase hex. */ +export function sha256hex(b: Uint8Array): string { + return createHash("sha256").update(b).digest("hex"); +} + +/** Assert that `fn` throws a {@link PcfDcpError} of the given `kind`. */ +export function expectKind(fn: () => unknown, kind: PcfDcpErrorKind): void { + try { + fn(); + } catch (e) { + expect(e).toBeInstanceOf(PcfDcpError); + expect((e as PcfDcpError).kind).toBe(kind); + return; + } + throw new Error(`expected a PcfDcpError of kind ${kind}, but nothing was thrown`); +} diff --git a/implementations/ts/pcf-dcp/test/roundtrip.test.ts b/implementations/ts/pcf-dcp/test/roundtrip.test.ts new file mode 100644 index 0000000..207680b --- /dev/null +++ b/implementations/ts/pcf-dcp/test/roundtrip.test.ts @@ -0,0 +1,118 @@ +/** + * End-to-end round-trips: build, edit, dedup/defrag, promote/demote, trailer. + */ + +import { describe, expect, it } from "vitest"; +import { HashAlgo, MemoryStorage } from "@kduma-oss/pcf"; + +import { Arena, Chunker, DcpReader, DcpWriter } from "../src/index.js"; +import { dec, enc, fill } from "./helpers.js"; + +function buildTwoInnerFile(): Uint8Array { + const arena = new Arena(); + arena.addInner(0x10, fill(0xa1), "A", enc("Hello, World!"), HashAlgo.Sha256, Chunker.fixed(7)); + arena.addInner(0x10, fill(0xb2), "B", enc("World!"), HashAlgo.Sha256, Chunker.whole()); + const w = new DcpWriter(); + w.addContainer(fill(0xdc), "dcp", arena); + return w.toImage(); +} + +describe("round-trips", () => { + it("edits reconstruct correctly", () => { + const a = new Arena(); + a.addInner(0x10, fill(1), "f", enc("Hello, World!"), HashAlgo.Sha256, Chunker.fixed(7)); + + a.append(fill(1), enc("!!")); + expect(dec(a.content(fill(1)))).toBe("Hello, World!!!"); + + a.insert(fill(1), 5, enc("XYZ")); + expect(dec(a.content(fill(1)))).toBe("HelloXYZ, World!!!"); + + a.delete(fill(1), 5, 3); + expect(dec(a.content(fill(1)))).toBe("Hello, World!!!"); + + a.overwrite(fill(1), 0, 5, enc("HOWDY")); + expect(dec(a.content(fill(1)))).toBe("HOWDY, World!!!"); + + a.truncate(fill(1), 5); + expect(dec(a.content(fill(1)))).toBe("HOWDY"); + }); + + it("copy-on-write does not disturb shared bytes", () => { + const a = new Arena(); + a.addInner(0x10, fill(0xa1), "A", enc("Hello, World!"), HashAlgo.Sha256, Chunker.fixed(7)); + a.addInner(0x10, fill(0xb2), "B", enc("World!"), HashAlgo.Sha256, Chunker.whole()); + a.overwrite(fill(0xa1), 7, 6, enc("PLANET")); + expect(dec(a.content(fill(0xa1)))).toBe("Hello, PLANET"); + expect(dec(a.content(fill(0xb2)))).toBe("World!"); + }); + + it("dedup then defrag preserve content", () => { + const a = new Arena(); + a.addInner(0x10, fill(1), "A", enc("abcabc"), HashAlgo.Sha256, Chunker.whole()); + a.addInner(0x10, fill(2), "B", enc("abcabc"), HashAlgo.Sha256, Chunker.whole()); + const h1 = a.innerInfo(fill(1)).dataHash; + + const saved = a.dedup(Chunker.fixed(3)); + expect(saved).toBeGreaterThan(0); + expect(dec(a.content(fill(1)))).toBe("abcabc"); + expect(dec(a.content(fill(2)))).toBe("abcabc"); + expect(a.innerInfo(fill(1)).dataHash).toEqual(h1); + + a.compact(); + expect(dec(a.content(fill(2)))).toBe("abcabc"); + }); + + it("defrag clears SHARED when no longer aliased (rule F2)", () => { + const a = new Arena(); + a.addInner(0x10, fill(0xa1), "A", enc("Hello, World!"), HashAlgo.Sha256, Chunker.fixed(7)); + a.addInner(0x10, fill(0xb2), "B", enc("World!"), HashAlgo.Sha256, Chunker.whole()); + a.removeInner(fill(0xb2)); + a.compact(); + const ia = a.innerInfo(fill(0xa1)); + expect(ia.extents.every((e) => !e.shared)).toBe(true); + expect(dec(a.content(fill(0xa1)))).toBe("Hello, World!"); + }); + + it("promote preserves uid and data_hash", () => { + const w = DcpWriter.open(new MemoryStorage(buildTwoInnerFile())); + const before = (() => { + const r = DcpReader.open(new MemoryStorage(w.toImage())); + return r.innerPartitions().find((l) => l.info.uid[0] === 0xb2)!.info.dataHash; + })(); + + w.promote(fill(0xdc), fill(0xb2)); + const r = DcpReader.open(new MemoryStorage(w.toImage())); + r.verify(); + const resolved = r.resolveUid(fill(0xb2)); + expect(resolved.kind).toBe("top-level"); + if (resolved.kind === "top-level") { + expect(resolved.entry.dataHash).toEqual(before); + expect(Number(resolved.entry.usedBytes)).toBe(6); + } + expect(dec(r.readInner(fill(0xa1)))).toBe("Hello, World!"); + }); + + it("demote then promote is identity for content", () => { + const w = DcpWriter.open(new MemoryStorage(buildTwoInnerFile())); + w.promote(fill(0xdc), fill(0xb2)); + w.demote(fill(0xb2), fill(0xdc)); + const r = DcpReader.open(new MemoryStorage(w.toImage())); + r.verify(); + expect(dec(r.readInner(fill(0xb2)))).toBe("World!"); + expect(r.resolveUid(fill(0xb2)).kind).toBe("inner"); + }); + + it("trailer mode reads back identically", () => { + const arena = new Arena(); + arena.addInner(0x10, fill(0xa1), "A", enc("Hello, World!"), HashAlgo.Sha256, Chunker.fixed(7)); + arena.addInner(0x10, fill(0xb2), "B", enc("World!"), HashAlgo.Sha256, Chunker.whole()); + const w = new DcpWriter(); + w.addContainer(fill(0xdc), "dcp", arena); + w.setTrailer(true); + const r = DcpReader.open(new MemoryStorage(w.toImage())); + r.verify(); + expect(dec(r.readInner(fill(0xa1)))).toBe("Hello, World!"); + expect(r.innerPartitions()).toHaveLength(2); + }); +}); diff --git a/implementations/ts/pcf-dcp/test/spec-compliance.test.ts b/implementations/ts/pcf-dcp/test/spec-compliance.test.ts new file mode 100644 index 0000000..53ce699 --- /dev/null +++ b/implementations/ts/pcf-dcp/test/spec-compliance.test.ts @@ -0,0 +1,92 @@ +/** + * Conformance tests tied to specific sections of `PCF-DCP-spec-v1.0.txt`. + */ + +import { describe, expect, it } from "vitest"; +import { computeHashField, HashAlgo } from "@kduma-oss/pcf"; + +import { + Arena, + Chunker, + DCP_CONTAINER_TYPE, + DCP_HEADER_SIZE, + dcpHeaderFromBytes, + dcpHeaderToBytes, + FRAGMENT_ENTRY_SIZE, + FRAGTABLE_HEADER_SIZE, + fragmentEntryFromBytes, + fragmentEntryToBytes, + fragTableHeaderFromBytes, + fragTableHeaderToBytes, +} from "../src/index.js"; +import { dec, enc, fill, hex } from "./helpers.js"; + +describe("spec compliance", () => { + it("structure sizes match Appendix A", () => { + expect(DCP_HEADER_SIZE).toBe(24); + expect(FRAGTABLE_HEADER_SIZE).toBe(9); + expect(FRAGMENT_ENTRY_SIZE).toBe(18); + expect(DCP_CONTAINER_TYPE).toBe(0xaaac_0001); + }); + + it("header round-trips and carries the magic", () => { + const h = { + profileVersionMajor: 1, + profileVersionMinor: 0, + flags: 0, + innerTableOffset: 109, + arenaUsed: 465, + }; + const b = dcpHeaderToBytes(h); + expect(Array.from(b.subarray(0, 4))).toEqual([0x50, 0x44, 0x43, 0x50]); + expect(dcpHeaderFromBytes(b)).toEqual(h); + }); + + it("fragment records round-trip", () => { + const e = { extentOffset: 31, extentLength: 6, kind: 1, flags: 1 }; + expect(fragmentEntryFromBytes(fragmentEntryToBytes(e))).toEqual(e); + const h = { nextFragtableOffset: 0, fragmentCount: 2 }; + expect(fragTableHeaderFromBytes(fragTableHeaderToBytes(h))).toEqual(h); + }); + + it("reconstruction equals the logical content", () => { + const a = new Arena(); + a.addInner(0x10, fill(1), "x", enc("Hello, World!"), HashAlgo.Sha256, Chunker.fixed(7)); + expect(dec(a.content(fill(1)))).toBe("Hello, World!"); + const info = a.innerInfo(fill(1)); + expect(info.usedBytes).toBe(13); + expect(info.extents).toHaveLength(2); + }); + + it("data_hash is invariant under fragmentation", () => { + const mk = (c: Chunker): string => { + const a = new Arena(); + a.addInner(0x10, fill(7), "x", enc("abcdefghij"), HashAlgo.Sha256, c); + return hex(a.innerInfo(fill(7)).dataHash); + }; + expect(mk(Chunker.whole())).toBe(mk(Chunker.fixed(3))); + expect(mk(Chunker.whole())).toBe(hex(computeHashField(HashAlgo.Sha256, enc("abcdefghij")))); + }); + + it("dedup sets SHARED on all aliases (rule F1)", () => { + const a = new Arena(); + a.addInner(0x10, fill(0xa1), "A", enc("Hello, World!"), HashAlgo.Sha256, Chunker.fixed(7)); + a.addInner(0x10, fill(0xb2), "B", enc("World!"), HashAlgo.Sha256, Chunker.whole()); + + const ia = a.innerInfo(fill(0xa1)); + const ib = a.innerInfo(fill(0xb2)); + expect(ia.extents[0]!.shared).toBe(false); + expect(ia.extents[1]!.shared).toBe(true); + expect(ib.extents).toHaveLength(1); + expect(ib.extents[0]!.shared).toBe(true); + expect(hex(ib.dataHash)).toBe(hex(computeHashField(HashAlgo.Sha256, enc("World!")))); + }); + + it("parse round-trips the canonical arena byte-exact", () => { + const a = new Arena(); + a.addInner(0x10, fill(0xa1), "A", enc("Hello, World!"), HashAlgo.Sha256, Chunker.fixed(7)); + a.addInner(0x10, fill(0xb2), "B", enc("World!"), HashAlgo.Sha256, Chunker.whole()); + const bytes = a.toBytes(); + expect(hex(Arena.parse(bytes).toBytes())).toBe(hex(bytes)); + }); +}); diff --git a/implementations/ts/pcf-dcp/testdata/canonical.bin b/implementations/ts/pcf-dcp/testdata/canonical.bin new file mode 100644 index 0000000..834aea4 Binary files /dev/null and b/implementations/ts/pcf-dcp/testdata/canonical.bin differ diff --git a/implementations/ts/pcf-dcp/tsconfig.json b/implementations/ts/pcf-dcp/tsconfig.json new file mode 100644 index 0000000..fd4ad45 --- /dev/null +++ b/implementations/ts/pcf-dcp/tsconfig.json @@ -0,0 +1,24 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "NodeNext", + "moduleResolution": "NodeNext", + "lib": ["ES2022"], + "declaration": true, + "declarationMap": true, + "sourceMap": true, + "outDir": "dist", + "rootDir": "src", + "strict": true, + "noUnusedLocals": true, + "noUnusedParameters": true, + "noImplicitReturns": true, + "noFallthroughCasesInSwitch": true, + "forceConsistentCasingInFileNames": true, + "esModuleInterop": true, + "skipLibCheck": true, + "verbatimModuleSyntax": true + }, + "include": ["src"], + "exclude": ["dist", "node_modules", "test", "examples"] +} diff --git a/implementations/ts/pcf-dcp/vitest.config.ts b/implementations/ts/pcf-dcp/vitest.config.ts new file mode 100644 index 0000000..c4264d7 --- /dev/null +++ b/implementations/ts/pcf-dcp/vitest.config.ts @@ -0,0 +1,18 @@ +import { defineConfig } from "vitest/config"; + +export default defineConfig({ + test: { + environment: "node", + include: ["test/**/*.test.ts"], + coverage: { + provider: "v8", + include: ["src/**/*.ts"], + exclude: ["src/index.ts"], + reporter: ["text", "lcov"], + thresholds: { + lines: 85, + functions: 90, + }, + }, + }, +}); diff --git a/reference/PCF-DCP-v1.0/tests/cross_port_testdata.rs b/reference/PCF-DCP-v1.0/tests/cross_port_testdata.rs new file mode 100644 index 0000000..bbfe91a --- /dev/null +++ b/reference/PCF-DCP-v1.0/tests/cross_port_testdata.rs @@ -0,0 +1,84 @@ +//! Cross-port test-vector parity check. +//! +//! Every PCF-DCP language port ships its own copy of the canonical 700-byte +//! container vector under `implementations//pcf-dcp/testdata/ +//! canonical.bin`. Each port's own test suite asserts that its writer produces +//! this byte sequence; this Rust workspace test additionally asserts that the +//! shipped *files* are byte-identical, so that any future regeneration of the +//! reference vector cannot leave one port out of sync. + +use std::fs; +use std::path::{Path, PathBuf}; + +/// The reference vector compiled into the test binary. +const REFERENCE: &[u8] = include_bytes!("../testdata/canonical.bin"); + +/// Locate the repository root from this crate's `CARGO_MANIFEST_DIR`. +/// reference/PCF-DCP-v1.0 → repository root is two levels up. +fn repo_root() -> PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("PCF-DCP-v1.0 crate has a parent (reference/)") + .parent() + .expect("reference/ has a parent (repo root)") + .to_path_buf() +} + +fn read_port_vector(rel: &str) -> Vec { + let path = repo_root().join(rel); + fs::read(&path).unwrap_or_else(|e| { + panic!( + "failed to read {}: {e}\n\ + every PCF-DCP language port MUST ship a copy of the canonical \ + test vector identical to reference/PCF-DCP-v1.0/testdata/canonical.bin", + path.display(), + ) + }) +} + +fn assert_byte_identical(label: &str, port: &[u8]) { + assert_eq!( + port.len(), + REFERENCE.len(), + "{label} ships canonical.bin of length {} bytes; reference is {} bytes", + port.len(), + REFERENCE.len(), + ); + if port != REFERENCE { + let first_diff = port + .iter() + .zip(REFERENCE.iter()) + .position(|(a, b)| a != b) + .unwrap_or(REFERENCE.len()); + panic!( + "{label} canonical.bin diverges from reference at offset {first_diff}: \ + port byte = 0x{:02x}, reference byte = 0x{:02x}", + port.get(first_diff).copied().unwrap_or(0), + REFERENCE.get(first_diff).copied().unwrap_or(0), + ); + } +} + +#[test] +fn typescript_port_testdata_matches_reference() { + let port = read_port_vector("implementations/ts/pcf-dcp/testdata/canonical.bin"); + assert_byte_identical("TypeScript port", &port); +} + +#[test] +fn php_port_testdata_matches_reference() { + let port = read_port_vector("implementations/php/pcf-dcp/testdata/canonical.bin"); + assert_byte_identical("PHP port", &port); +} + +#[test] +fn dotnet_port_testdata_matches_reference() { + let port = read_port_vector("implementations/dotnet/pcf-dcp/testdata/canonical.bin"); + assert_byte_identical(".NET port", &port); +} + +/// Sanity: the reference itself is the canonical 700-byte vector we expect. +#[test] +fn reference_has_canonical_length() { + assert_eq!(REFERENCE.len(), 700); +} diff --git a/reference/PFS-MS-v1.0/Cargo.toml b/reference/PFS-MS-v1.0/Cargo.toml index 10ff413..03aef05 100644 --- a/reference/PFS-MS-v1.0/Cargo.toml +++ b/reference/PFS-MS-v1.0/Cargo.toml @@ -41,10 +41,10 @@ filetime = "0.2" # Shared PCF-SIG command-line logic, so `pfs` can run keygen / verify-sig using # exactly the same implementation as the standalone `pcf-sig` tool. -pcf-sig-cli = { path = "../../tools/pcf-sig", version = "0.0.8" } +pcf-sig-cli = { path = "../../tools/pcf-sig", version = "0.0.9" } # The PCF-SIG signing primitives. Signing a PFS-MS file cannot simply append # partitions (that would break the backward-linked session chain); instead the # PCFSIG_KEY / PCFSIG_SIG partitions are committed as a dedicated PFS session # (see `src/sign.rs`), built from these primitives. -pcf-sig = { path = "../PCF-SIG-v1.0", version = "0.0.8" } +pcf-sig = { path = "../PCF-SIG-v1.0", version = "0.0.9" } diff --git a/tools/pcf-sig/Cargo.toml b/tools/pcf-sig/Cargo.toml index 4e6eeeb..8a29e2f 100644 --- a/tools/pcf-sig/Cargo.toml +++ b/tools/pcf-sig/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pcf-sig-cli" -version = "0.0.8" +version = "0.0.9" edition = "2021" rust-version = "1.75" license = "MIT OR Apache-2.0" @@ -26,10 +26,10 @@ path = "src/main.rs" [dependencies] # Generic PCF container access (enumerate partitions, read/append). -pcf = { path = "../../reference/PCF-v1.0", version = "0.0.8" } +pcf = { path = "../../reference/PCF-v1.0", version = "0.0.9" } # The PCF-SIG signing/verification primitives this tool drives. -pcf-sig = { path = "../../reference/PCF-SIG-v1.0", version = "0.0.8" } +pcf-sig = { path = "../../reference/PCF-SIG-v1.0", version = "0.0.9" } # UUIDv7 for the uids of newly written PCFSIG_SIG / PCFSIG_KEY partitions # (consistent with the recommendation in both the PCF and PFS-MS specs).