From 66445ebf0dadb99835ec9e44ea1e70310a42ee54 Mon Sep 17 00:00:00 2001
From: Mattijs De Paepe <mattijsdepaepe@hotmail.com>
Date: Tue, 2 Jun 2026 14:16:05 +0100
Subject: [PATCH 01/10] fix: Forward storage_options to parquet metadata reads
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Schema/collection/failure-info reads passed `storage_options` (and
`credential_provider`) to the data read via `pl.read_parquet` /
`pl.scan_parquet`, but the separate embedded-schema metadata read called
`pl.read_parquet_metadata` with no options. Against non-AWS S3-compatible
stores reached purely through `storage_options` (lakeFS, MinIO, R2, Tigris,
…) the metadata read fell back to the default AWS credential chain and
endpoint, breaking typed reads.

Thread the storage-related options into all metadata reads in
`_storage/parquet.py` via a small `_metadata_read_options` helper.

Fixes #352

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 dataframely/_storage/parquet.py         | 42 +++++++++++++----
 tests/schema/test_read_write_parquet.py | 62 +++++++++++++++++++++++++
 2 files changed, 95 insertions(+), 9 deletions(-)

diff --git a/dataframely/_storage/parquet.py b/dataframely/_storage/parquet.py
index 655c595..dbf0e24 100644
--- a/dataframely/_storage/parquet.py
+++ b/dataframely/_storage/parquet.py
@@ -53,13 +53,13 @@ def write_frame(
     def scan_frame(self, **kwargs: Any) -> tuple[pl.LazyFrame, SerializedSchema | None]:
         source = kwargs.pop("source")
         lf = pl.scan_parquet(source, **kwargs)
-        metadata = _read_serialized_schema(source)
+        metadata = _read_serialized_schema(source, **_metadata_read_options(kwargs))
         return lf, metadata
 
     def read_frame(self, **kwargs: Any) -> tuple[pl.DataFrame, SerializedSchema | None]:
         source = kwargs.pop("source")
         df = pl.read_parquet(source, **kwargs)
-        metadata = _read_serialized_schema(source)
+        metadata = _read_serialized_schema(source, **_metadata_read_options(kwargs))
         return df, metadata
 
     # ------------------------------ Collections ---------------------------------------
@@ -159,12 +159,18 @@ def _collection_from_parquet(
                     else pl.read_parquet(scan_path, **kwargs).lazy()
                 )
                 if is_file:
-                    collection_types.append(_read_serialized_collection(source_path))
+                    collection_types.append(
+                        _read_serialized_collection(
+                            source_path, **_metadata_read_options(kwargs)
+                        )
+                    )
                 else:
                     prefix = get_file_prefix(fs)
                     for file in fs.glob(fs.sep.join([source_path, "**", "*.parquet"])):
                         collection_types.append(
-                            _read_serialized_collection(f"{prefix}{file}")
+                            _read_serialized_collection(
+                                f"{prefix}{file}", **_metadata_read_options(kwargs)
+                            )
                         )
 
         return data, collection_types
@@ -234,7 +240,7 @@ def scan_failure_info(
         file = kwargs.pop("file")
 
         # Meta data
-        metadata = pl.read_parquet_metadata(file)
+        metadata = pl.read_parquet_metadata(file, **_metadata_read_options(kwargs))
         serialized_schema = assert_failure_info_metadata(
             metadata.get(SCHEMA_METADATA_KEY)
         )
@@ -245,11 +251,29 @@ def scan_failure_info(
         return lf, serialized_rules, serialized_schema
 
 
-def _read_serialized_collection(path: str) -> SerializedCollection | None:
-    meta = pl.read_parquet_metadata(path)
+def _metadata_read_options(kwargs: dict[str, Any]) -> dict[str, Any]:
+    """Extract the options required to reach the data store for a metadata read.
+
+    The parquet metadata is read separately from the data itself. We must forward the
+    same storage-related options (e.g. ``storage_options`` and ``credential_provider``)
+    so that metadata reads against non-AWS S3-compatible stores (lakeFS, MinIO, R2, …)
+    hit the correct endpoint and credentials instead of falling back to the default AWS
+    credential chain and endpoint.
+    """
+    return {
+        key: kwargs[key]
+        for key in ("storage_options", "credential_provider")
+        if key in kwargs
+    }
+
+
+def _read_serialized_collection(
+    path: str, **read_options: Any
+) -> SerializedCollection | None:
+    meta = pl.read_parquet_metadata(path, **read_options)
     return meta.get(COLLECTION_METADATA_KEY)
 
 
-def _read_serialized_schema(path: str) -> SerializedSchema | None:
-    meta = pl.read_parquet_metadata(path)
+def _read_serialized_schema(path: str, **read_options: Any) -> SerializedSchema | None:
+    meta = pl.read_parquet_metadata(path, **read_options)
     return meta.get(SCHEMA_METADATA_KEY)
diff --git a/tests/schema/test_read_write_parquet.py b/tests/schema/test_read_write_parquet.py
index 4b83031..a3a36e0 100644
--- a/tests/schema/test_read_write_parquet.py
+++ b/tests/schema/test_read_write_parquet.py
@@ -1,11 +1,13 @@
 # Copyright (c) QuantCo 2025-2026
 # SPDX-License-Identifier: BSD-3-Clause
 
+import uuid
 from pathlib import Path
 
 import polars as pl
 import pytest
 from fsspec import url_to_fs
+from polars.testing import assert_frame_equal
 
 import dataframely as dy
 from dataframely._storage.parquet import SCHEMA_METADATA_KEY
@@ -59,3 +61,63 @@ def test_write_parquet_fails_without_mkdir(tmp_path: str) -> None:
     # Act / Assert
     with pytest.raises(FileNotFoundError):
         MySchema.write_parquet(df, file=p)
+
+
+# --------------------------------- STORAGE OPTIONS ---------------------------------- #
+
+
+@pytest.mark.s3
+@pytest.mark.parametrize("lazy", [True, False])
+def test_read_parquet_metadata_uses_storage_options(
+    s3_server: str,
+    s3_bucket: str,
+    monkeypatch: pytest.MonkeyPatch,
+    lazy: bool,
+) -> None:
+    """The embedded-schema metadata read must use the same ``storage_options`` as the
+    data read.
+
+    Regression test for https://github.com/Quantco/dataframely/issues/352: against
+    non-AWS S3-compatible stores (lakeFS, MinIO, R2, …) reached purely via
+    ``storage_options``, the metadata read previously fell back to the default AWS
+    credential chain and endpoint, breaking typed reads.
+    """
+    # Arrange: provide credentials and endpoint *only* via `storage_options`, never via
+    # the environment, so the metadata read fails unless the options are forwarded.
+    for var in (
+        "AWS_ENDPOINT_URL",
+        "AWS_ACCESS_KEY_ID",
+        "AWS_SECRET_ACCESS_KEY",
+        "AWS_ALLOW_HTTP",
+        "AWS_S3_ALLOW_UNSAFE_RENAME",
+        "AWS_DEFAULT_REGION",
+        "AWS_REGION",
+    ):
+        monkeypatch.delenv(var, raising=False)
+
+    storage_options = {
+        "aws_access_key_id": "testing",
+        "aws_secret_access_key": "testing",
+        "aws_endpoint_url": s3_server,
+        "aws_region": "us-east-1",
+        "aws_allow_http": "true",
+    }
+    path = f"{s3_bucket}/{uuid.uuid4()}/df.parquet"
+
+    df = MySchema.create_empty()
+    MySchema.write_parquet(df, file=path, storage_options=storage_options)
+
+    # Act: `validation="forbid"` only returns if the schema stored in the metadata is
+    # read successfully and matches, so a passing read proves the metadata read used the
+    # forwarded `storage_options`.
+    if lazy:
+        out: pl.DataFrame = MySchema.scan_parquet(
+            path, validation="forbid", storage_options=storage_options
+        ).collect()
+    else:
+        out = MySchema.read_parquet(
+            path, validation="forbid", storage_options=storage_options
+        )
+
+    # Assert
+    assert_frame_equal(df, out)

From b5645a86ce6129511f34b1795f6b8d097b795a7a Mon Sep 17 00:00:00 2001
From: Mattijs De Paepe <mattijsdepaepe@hotmail.com>
Date: Tue, 2 Jun 2026 15:06:25 +0100
Subject: [PATCH 02/10] fix: Forward storage_options in public parquet-metadata
 helpers

`read_parquet_metadata_schema` and `read_parquet_metadata_collection` read
parquet metadata from a (possibly remote) source but accepted no options, so
they could not reach non-AWS S3-compatible stores either. Accept and forward
`**kwargs` (e.g. `storage_options`, `credential_provider`) to
`pl.read_parquet_metadata`, matching `read_parquet`/`scan_parquet`.

Add s3-marked regression tests covering both helpers.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 dataframely/collection/collection.py    |  6 +++-
 dataframely/schema.py                   |  6 +++-
 tests/collection/test_storage.py        | 48 +++++++++++++++++++++++++
 tests/schema/test_read_write_parquet.py |  4 +++
 4 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/dataframely/collection/collection.py b/dataframely/collection/collection.py
index 43aab21..1c8767c 100644
--- a/dataframely/collection/collection.py
+++ b/dataframely/collection/collection.py
@@ -1273,17 +1273,21 @@ def _validate_input_keys(cls, data: Mapping[str, FrameType], /) -> None:
 
 def read_parquet_metadata_collection(
     source: str | Path | IO[bytes] | bytes,
+    **kwargs: Any,
 ) -> type[Collection] | None:
     """Read a dataframely Collection type from the metadata of a parquet file.
 
     Args:
         source: Path to a parquet file or a file-like object that contains the metadata.
+        kwargs: Additional keyword arguments passed directly to
+            :meth:`polars.read_parquet_metadata`, e.g. ``storage_options`` and
+            ``credential_provider`` to reach non-AWS S3-compatible stores.
 
     Returns:
         The collection that was serialized to the metadata. `None` if no collection
         metadata is found or the deserialization fails.
     """
-    metadata = pl.read_parquet_metadata(source)
+    metadata = pl.read_parquet_metadata(source, **kwargs)
     if (schema_metadata := metadata.get(COLLECTION_METADATA_KEY)) is not None:
         return deserialize_collection(schema_metadata, strict=False)
     return None
diff --git a/dataframely/schema.py b/dataframely/schema.py
index c7fc92c..af54920 100644
--- a/dataframely/schema.py
+++ b/dataframely/schema.py
@@ -1416,17 +1416,21 @@ def _rules_match(lhs: dict[str, Rule], rhs: dict[str, Rule]) -> bool:
 
 def read_parquet_metadata_schema(
     source: str | Path | IO[bytes] | bytes,
+    **kwargs: Any,
 ) -> type[Schema] | None:
     """Read a dataframely schema from the metadata of a parquet file.
 
     Args:
         source: Path to a parquet file or a file-like object that contains the metadata.
+        kwargs: Additional keyword arguments passed directly to
+            :meth:`polars.read_parquet_metadata`, e.g. ``storage_options`` and
+            ``credential_provider`` to reach non-AWS S3-compatible stores.
 
     Returns:
         The schema that was serialized to the metadata. `None` if no schema metadata
         is found or the deserialization fails.
     """
-    metadata = pl.read_parquet_metadata(source)
+    metadata = pl.read_parquet_metadata(source, **kwargs)
 
     if (schema_metadata := metadata.get(SCHEMA_METADATA_KEY)) is not None:
         return deserialize_schema(schema_metadata, strict=False)
diff --git a/tests/collection/test_storage.py b/tests/collection/test_storage.py
index 9554ca5..2672414 100644
--- a/tests/collection/test_storage.py
+++ b/tests/collection/test_storage.py
@@ -1,6 +1,7 @@
 # Copyright (c) QuantCo 2025-2026
 # SPDX-License-Identifier: BSD-3-Clause
 
+import uuid
 from typing import Any
 
 import polars as pl
@@ -498,6 +499,53 @@ def test_read_invalid_parquet_metadata_collection(
     assert collection is None
 
 
+@pytest.mark.s3
+def test_read_parquet_metadata_collection_uses_storage_options(
+    s3_server: str,
+    s3_bucket: str,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """`read_parquet_metadata_collection` must forward `storage_options` to the
+    metadata read.
+
+    Regression test for https://github.com/Quantco/dataframely/issues/352.
+    """
+    # Arrange: credentials/endpoint are provided *only* via `storage_options`, never via
+    # the environment, so the metadata read fails unless the options are forwarded.
+    for var in (
+        "AWS_ENDPOINT_URL",
+        "AWS_ACCESS_KEY_ID",
+        "AWS_SECRET_ACCESS_KEY",
+        "AWS_ALLOW_HTTP",
+        "AWS_S3_ALLOW_UNSAFE_RENAME",
+        "AWS_DEFAULT_REGION",
+        "AWS_REGION",
+    ):
+        monkeypatch.delenv(var, raising=False)
+
+    storage_options = {
+        "aws_access_key_id": "testing",
+        "aws_secret_access_key": "testing",
+        "aws_endpoint_url": s3_server,
+        "aws_region": "us-east-1",
+        "aws_allow_http": "true",
+    }
+    path = f"{s3_bucket}/{uuid.uuid4()}/df.parquet"
+    pl.DataFrame({"a": [1, 2, 3]}).write_parquet(
+        path,
+        metadata={COLLECTION_METADATA_KEY: MyCollection.serialize()},
+        storage_options=storage_options,
+    )
+
+    # Act
+    collection = dy.read_parquet_metadata_collection(
+        path, storage_options=storage_options
+    )
+
+    # Assert
+    assert collection is not None
+
+
 @pytest.mark.parametrize(
     "any_tmp_path",
     ["tmp_path", pytest.param("s3_tmp_path", marks=pytest.mark.s3)],
diff --git a/tests/schema/test_read_write_parquet.py b/tests/schema/test_read_write_parquet.py
index a3a36e0..f0628a3 100644
--- a/tests/schema/test_read_write_parquet.py
+++ b/tests/schema/test_read_write_parquet.py
@@ -121,3 +121,7 @@ def test_read_parquet_metadata_uses_storage_options(
 
     # Assert
     assert_frame_equal(df, out)
+
+    # The standalone metadata helper must forward `storage_options` too.
+    schema = dy.read_parquet_metadata_schema(path, storage_options=storage_options)
+    assert schema is not None

From bb3b0ab834ce805cee1b6ddcb01b003f1628b7d9 Mon Sep 17 00:00:00 2001
From: Mattijs De Paepe <mattijsdepaepe@hotmail.com>
Date: Tue, 2 Jun 2026 15:17:39 +0100
Subject: [PATCH 03/10] refactor: Align metadata-read options with original
 code and forward retries

Address review feedback on the storage-options forwarding fix:

- Match the file's docstring convention for the public metadata helpers: drop
  the enumerated `storage_options`/`credential_provider` note and use the same
  terse "passed directly to :meth:`polars.read_parquet_metadata`" wording as
  `read_parquet`/`scan_parquet`.
- Forward `retries` alongside `storage_options`/`credential_provider` in
  `_metadata_read_options`, since `read_parquet_metadata` accepts it and it is
  storage-reaching. Clarify in the docstring why the call sites must filter the
  scan/read kwargs (the narrower `read_parquet_metadata` signature rejects
  options like `n_rows`/`columns`) instead of forwarding everything.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 dataframely/_storage/parquet.py      | 18 ++++++++++--------
 dataframely/collection/collection.py |  3 +--
 dataframely/schema.py                |  3 +--
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/dataframely/_storage/parquet.py b/dataframely/_storage/parquet.py
index dbf0e24..e50ff2d 100644
--- a/dataframely/_storage/parquet.py
+++ b/dataframely/_storage/parquet.py
@@ -252,17 +252,19 @@ def scan_failure_info(
 
 
 def _metadata_read_options(kwargs: dict[str, Any]) -> dict[str, Any]:
-    """Extract the options required to reach the data store for a metadata read.
-
-    The parquet metadata is read separately from the data itself. We must forward the
-    same storage-related options (e.g. ``storage_options`` and ``credential_provider``)
-    so that metadata reads against non-AWS S3-compatible stores (lakeFS, MinIO, R2, …)
-    hit the correct endpoint and credentials instead of falling back to the default AWS
-    credential chain and endpoint.
+    """Select the storage-related options to forward to a parquet metadata read.
+
+    The metadata is read separately from the data, via
+    :meth:`polars.read_parquet_metadata`. That function accepts only a narrow subset of
+    the options understood by :meth:`polars.read_parquet`/:meth:`polars.scan_parquet`,
+    so we cannot simply forward all ``kwargs``. We forward exactly the options that
+    determine *how the store is reached* so that metadata reads against non-AWS
+    S3-compatible stores (lakeFS, MinIO, R2, …) hit the correct endpoint and credentials
+    instead of falling back to the default AWS credential chain and endpoint.
     """
     return {
         key: kwargs[key]
-        for key in ("storage_options", "credential_provider")
+        for key in ("storage_options", "credential_provider", "retries")
         if key in kwargs
     }
 
diff --git a/dataframely/collection/collection.py b/dataframely/collection/collection.py
index 1c8767c..6476f20 100644
--- a/dataframely/collection/collection.py
+++ b/dataframely/collection/collection.py
@@ -1280,8 +1280,7 @@ def read_parquet_metadata_collection(
     Args:
         source: Path to a parquet file or a file-like object that contains the metadata.
         kwargs: Additional keyword arguments passed directly to
-            :meth:`polars.read_parquet_metadata`, e.g. ``storage_options`` and
-            ``credential_provider`` to reach non-AWS S3-compatible stores.
+            :meth:`polars.read_parquet_metadata`.
 
     Returns:
         The collection that was serialized to the metadata. `None` if no collection
diff --git a/dataframely/schema.py b/dataframely/schema.py
index af54920..d624ae6 100644
--- a/dataframely/schema.py
+++ b/dataframely/schema.py
@@ -1423,8 +1423,7 @@ def read_parquet_metadata_schema(
     Args:
         source: Path to a parquet file or a file-like object that contains the metadata.
         kwargs: Additional keyword arguments passed directly to
-            :meth:`polars.read_parquet_metadata`, e.g. ``storage_options`` and
-            ``credential_provider`` to reach non-AWS S3-compatible stores.
+            :meth:`polars.read_parquet_metadata`.
 
     Returns:
         The schema that was serialized to the metadata. `None` if no schema metadata

From ceac55b6e98ca5ca68e2ba62c6f04b29f90190d7 Mon Sep 17 00:00:00 2001
From: Mattijs De Paepe <mattijsdepaepe@hotmail.com>
Date: Tue, 2 Jun 2026 15:27:15 +0100
Subject: [PATCH 04/10] test: Tighten storage_options regression tests and trim
 helper docstring

- Shrink `_metadata_read_options` to a one-line comment matching the other
  private helpers in the module (no oversized docstring).
- Extract an `s3_storage_options` fixture (mirrors `s3_tmp_path`, but strips the
  AWS_* env vars so the store is reachable *only* via `storage_options`) and use
  it across the schema, collection and failure-info regression tests.
- Add a failure-info regression test covering the `scan_failure_info` metadata
  read, and split the schema test so the typed read and the standalone
  `read_parquet_metadata_schema` helper are asserted independently.
- Drop the end-to-end collection typed-read test: it cannot pass via
  `storage_options` alone because member discovery goes through fsspec
  (`url_to_fs`/`fs.exists`), which does not receive `storage_options` -- a
  separate limitation from the polars metadata read this PR fixes.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 dataframely/_storage/parquet.py         | 13 ++---
 tests/collection/test_storage.py        | 32 +++---------
 tests/conftest.py                       | 31 ++++++++++++
 tests/failure_info/test_storage.py      | 33 +++++++++++++
 tests/schema/test_read_write_parquet.py | 65 ++++++++++++-------------
 5 files changed, 105 insertions(+), 69 deletions(-)

diff --git a/dataframely/_storage/parquet.py b/dataframely/_storage/parquet.py
index e50ff2d..e0e5087 100644
--- a/dataframely/_storage/parquet.py
+++ b/dataframely/_storage/parquet.py
@@ -252,16 +252,9 @@ def scan_failure_info(
 
 
 def _metadata_read_options(kwargs: dict[str, Any]) -> dict[str, Any]:
-    """Select the storage-related options to forward to a parquet metadata read.
-
-    The metadata is read separately from the data, via
-    :meth:`polars.read_parquet_metadata`. That function accepts only a narrow subset of
-    the options understood by :meth:`polars.read_parquet`/:meth:`polars.scan_parquet`,
-    so we cannot simply forward all ``kwargs``. We forward exactly the options that
-    determine *how the store is reached* so that metadata reads against non-AWS
-    S3-compatible stores (lakeFS, MinIO, R2, …) hit the correct endpoint and credentials
-    instead of falling back to the default AWS credential chain and endpoint.
-    """
+    # Forward only the options that `read_parquet_metadata` accepts (it has a narrower
+    # signature than `read_parquet`/`scan_parquet`) so the metadata read reaches the
+    # same store as the data read.
     return {
         key: kwargs[key]
         for key in ("storage_options", "credential_provider", "retries")
diff --git a/tests/collection/test_storage.py b/tests/collection/test_storage.py
index 2672414..a6debc8 100644
--- a/tests/collection/test_storage.py
+++ b/tests/collection/test_storage.py
@@ -501,45 +501,25 @@ def test_read_invalid_parquet_metadata_collection(
 
 @pytest.mark.s3
 def test_read_parquet_metadata_collection_uses_storage_options(
-    s3_server: str,
     s3_bucket: str,
-    monkeypatch: pytest.MonkeyPatch,
+    s3_storage_options: dict[str, str],
 ) -> None:
-    """`read_parquet_metadata_collection` must forward `storage_options` to the
-    metadata read.
+    """The standalone `read_parquet_metadata_collection` helper must forward
+    `storage_options` to the metadata read.
 
     Regression test for https://github.com/Quantco/dataframely/issues/352.
     """
-    # Arrange: credentials/endpoint are provided *only* via `storage_options`, never via
-    # the environment, so the metadata read fails unless the options are forwarded.
-    for var in (
-        "AWS_ENDPOINT_URL",
-        "AWS_ACCESS_KEY_ID",
-        "AWS_SECRET_ACCESS_KEY",
-        "AWS_ALLOW_HTTP",
-        "AWS_S3_ALLOW_UNSAFE_RENAME",
-        "AWS_DEFAULT_REGION",
-        "AWS_REGION",
-    ):
-        monkeypatch.delenv(var, raising=False)
-
-    storage_options = {
-        "aws_access_key_id": "testing",
-        "aws_secret_access_key": "testing",
-        "aws_endpoint_url": s3_server,
-        "aws_region": "us-east-1",
-        "aws_allow_http": "true",
-    }
+    # Arrange
     path = f"{s3_bucket}/{uuid.uuid4()}/df.parquet"
     pl.DataFrame({"a": [1, 2, 3]}).write_parquet(
         path,
         metadata={COLLECTION_METADATA_KEY: MyCollection.serialize()},
-        storage_options=storage_options,
+        storage_options=s3_storage_options,
     )
 
     # Act
     collection = dy.read_parquet_metadata_collection(
-        path, storage_options=storage_options
+        path, storage_options=s3_storage_options
     )
 
     # Assert
diff --git a/tests/conftest.py b/tests/conftest.py
index 75828f5..dda85f8 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -44,6 +44,37 @@ def s3_tmp_path(s3_server: str, s3_bucket: str, monkeypatch: pytest.MonkeyPatch)
     return f"{s3_bucket}/{str(uuid.uuid4())}"
 
 
+@pytest.fixture()
+def s3_storage_options(
+    s3_server: str, monkeypatch: pytest.MonkeyPatch
+) -> dict[str, str]:
+    """Credentials and endpoint for the moto server, supplied *only* via
+    ``storage_options``.
+
+    Unlike :func:`s3_tmp_path`, this fixture deletes the ``AWS_*`` environment
+    variables so the store is unreachable through the default credential chain. A read
+    therefore succeeds only if ``storage_options`` is forwarded to it -- which is what
+    the regression tests for https://github.com/Quantco/dataframely/issues/352 assert.
+    """
+    for var in (
+        "AWS_ENDPOINT_URL",
+        "AWS_ACCESS_KEY_ID",
+        "AWS_SECRET_ACCESS_KEY",
+        "AWS_ALLOW_HTTP",
+        "AWS_S3_ALLOW_UNSAFE_RENAME",
+        "AWS_DEFAULT_REGION",
+        "AWS_REGION",
+    ):
+        monkeypatch.delenv(var, raising=False)
+    return {
+        "aws_access_key_id": "testing",
+        "aws_secret_access_key": "testing",
+        "aws_endpoint_url": s3_server,
+        "aws_region": "us-east-1",
+        "aws_allow_http": "true",
+    }
+
+
 @pytest.fixture()
 def any_tmp_path(request: pytest.FixtureRequest) -> str:
     return str(request.getfixturevalue(request.param))
diff --git a/tests/failure_info/test_storage.py b/tests/failure_info/test_storage.py
index 10b3e7d..c4580f9 100644
--- a/tests/failure_info/test_storage.py
+++ b/tests/failure_info/test_storage.py
@@ -1,6 +1,8 @@
 # Copyright (c) QuantCo 2025-2026
 # SPDX-License-Identifier: BSD-3-Clause
 
+import uuid
+
 import polars as pl
 import pytest
 from fsspec import AbstractFileSystem, url_to_fs
@@ -165,6 +167,37 @@ def test_write_parquet_custom_metadata(
     assert pl.read_parquet_metadata(p)["custom"] == "test"
 
 
+@pytest.mark.s3
+@pytest.mark.parametrize("lazy", [True, False])
+def test_read_parquet_uses_storage_options_for_metadata(
+    s3_bucket: str,
+    s3_storage_options: dict[str, str],
+    lazy: bool,
+) -> None:
+    """`FailureInfo.scan_parquet`/`read_parquet` must forward `storage_options` to the
+    rule/schema metadata read, not just the data read.
+
+    Regression test for https://github.com/Quantco/dataframely/issues/352.
+    """
+    # Arrange
+    df = pl.DataFrame({"a": [4, 5, 6, 6, 7, 8], "b": [1, 2, 3, 4, 5, 6]})
+    _, failure = MySchema.filter(df)
+    path = f"{s3_bucket}/{uuid.uuid4()}/failure.parquet"
+    failure.write_parquet(path, storage_options=s3_storage_options)
+
+    # Act: reading failure info always reads the rule/schema metadata, so a successful
+    # read proves the metadata read used the forwarded `storage_options`.
+    if lazy:
+        read = dy.FailureInfo.scan_parquet(path, storage_options=s3_storage_options)
+    else:
+        read = dy.FailureInfo.read_parquet(path, storage_options=s3_storage_options)
+
+    # Assert
+    assert_frame_equal(failure._lf, read._lf)
+    assert failure._rule_columns == read._rule_columns
+    assert MySchema.matches(read.schema)
+
+
 def test_write_parquet_fails_without_mkdir(tmp_path: str) -> None:
     # Arrange
     df = pl.DataFrame(
diff --git a/tests/schema/test_read_write_parquet.py b/tests/schema/test_read_write_parquet.py
index f0628a3..8b79199 100644
--- a/tests/schema/test_read_write_parquet.py
+++ b/tests/schema/test_read_write_parquet.py
@@ -68,60 +68,59 @@ def test_write_parquet_fails_without_mkdir(tmp_path: str) -> None:
 
 @pytest.mark.s3
 @pytest.mark.parametrize("lazy", [True, False])
-def test_read_parquet_metadata_uses_storage_options(
-    s3_server: str,
+def test_read_parquet_uses_storage_options_for_metadata(
     s3_bucket: str,
-    monkeypatch: pytest.MonkeyPatch,
+    s3_storage_options: dict[str, str],
     lazy: bool,
 ) -> None:
-    """The embedded-schema metadata read must use the same ``storage_options`` as the
-    data read.
+    """`scan_parquet`/`read_parquet` must forward `storage_options` to the embedded
+    schema metadata read, not just the data read.
 
     Regression test for https://github.com/Quantco/dataframely/issues/352: against
     non-AWS S3-compatible stores (lakeFS, MinIO, R2, …) reached purely via
-    ``storage_options``, the metadata read previously fell back to the default AWS
+    `storage_options`, the metadata read previously fell back to the default AWS
     credential chain and endpoint, breaking typed reads.
     """
-    # Arrange: provide credentials and endpoint *only* via `storage_options`, never via
-    # the environment, so the metadata read fails unless the options are forwarded.
-    for var in (
-        "AWS_ENDPOINT_URL",
-        "AWS_ACCESS_KEY_ID",
-        "AWS_SECRET_ACCESS_KEY",
-        "AWS_ALLOW_HTTP",
-        "AWS_S3_ALLOW_UNSAFE_RENAME",
-        "AWS_DEFAULT_REGION",
-        "AWS_REGION",
-    ):
-        monkeypatch.delenv(var, raising=False)
-
-    storage_options = {
-        "aws_access_key_id": "testing",
-        "aws_secret_access_key": "testing",
-        "aws_endpoint_url": s3_server,
-        "aws_region": "us-east-1",
-        "aws_allow_http": "true",
-    }
+    # Arrange
+    df = MySchema.validate(pl.DataFrame({"a": [1, 2, 3]}), cast=True)
     path = f"{s3_bucket}/{uuid.uuid4()}/df.parquet"
-
-    df = MySchema.create_empty()
-    MySchema.write_parquet(df, file=path, storage_options=storage_options)
+    MySchema.write_parquet(df, file=path, storage_options=s3_storage_options)
 
     # Act: `validation="forbid"` only returns if the schema stored in the metadata is
     # read successfully and matches, so a passing read proves the metadata read used the
     # forwarded `storage_options`.
     if lazy:
         out: pl.DataFrame = MySchema.scan_parquet(
-            path, validation="forbid", storage_options=storage_options
+            path, validation="forbid", storage_options=s3_storage_options
         ).collect()
     else:
         out = MySchema.read_parquet(
-            path, validation="forbid", storage_options=storage_options
+            path, validation="forbid", storage_options=s3_storage_options
         )
 
     # Assert
     assert_frame_equal(df, out)
 
-    # The standalone metadata helper must forward `storage_options` too.
-    schema = dy.read_parquet_metadata_schema(path, storage_options=storage_options)
+
+@pytest.mark.s3
+def test_read_parquet_metadata_schema_uses_storage_options(
+    s3_bucket: str,
+    s3_storage_options: dict[str, str],
+) -> None:
+    """The standalone `read_parquet_metadata_schema` helper must forward
+    `storage_options` to the metadata read.
+
+    Regression test for https://github.com/Quantco/dataframely/issues/352.
+    """
+    # Arrange
+    path = f"{s3_bucket}/{uuid.uuid4()}/df.parquet"
+    MySchema.write_parquet(
+        MySchema.create_empty(), file=path, storage_options=s3_storage_options
+    )
+
+    # Act
+    schema = dy.read_parquet_metadata_schema(path, storage_options=s3_storage_options)
+
+    # Assert
     assert schema is not None
+    assert schema.matches(MySchema)

From 34d3b83ca41de8a9853252f9e2de2cefe7b751ac Mon Sep 17 00:00:00 2001
From: Mattijs De Paepe <mattijsdepaepe@hotmail.com>
Date: Tue, 2 Jun 2026 15:54:08 +0100
Subject: [PATCH 05/10] test: Trim regression-test docstrings and drop ticket
 references

Documentation should stand on its own, so remove the issue-tracker links from
the `s3_storage_options` fixture and the storage-options regression tests, and
shrink their docstrings to a single line in line with the surrounding tests.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 tests/collection/test_storage.py        |  6 +-----
 tests/conftest.py                       |  9 +++------
 tests/failure_info/test_storage.py      |  7 ++-----
 tests/schema/test_read_write_parquet.py | 21 +++++----------------
 4 files changed, 11 insertions(+), 32 deletions(-)

diff --git a/tests/collection/test_storage.py b/tests/collection/test_storage.py
index a6debc8..7325838 100644
--- a/tests/collection/test_storage.py
+++ b/tests/collection/test_storage.py
@@ -504,11 +504,7 @@ def test_read_parquet_metadata_collection_uses_storage_options(
     s3_bucket: str,
     s3_storage_options: dict[str, str],
 ) -> None:
-    """The standalone `read_parquet_metadata_collection` helper must forward
-    `storage_options` to the metadata read.
-
-    Regression test for https://github.com/Quantco/dataframely/issues/352.
-    """
+    """`read_parquet_metadata_collection` must forward `storage_options` to the read."""
     # Arrange
     path = f"{s3_bucket}/{uuid.uuid4()}/df.parquet"
     pl.DataFrame({"a": [1, 2, 3]}).write_parquet(
diff --git a/tests/conftest.py b/tests/conftest.py
index dda85f8..03ceccf 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -48,13 +48,10 @@ def s3_tmp_path(s3_server: str, s3_bucket: str, monkeypatch: pytest.MonkeyPatch)
 def s3_storage_options(
     s3_server: str, monkeypatch: pytest.MonkeyPatch
 ) -> dict[str, str]:
-    """Credentials and endpoint for the moto server, supplied *only* via
-    ``storage_options``.
+    """Credentials and endpoint for the moto server, supplied only via storage options.
 
-    Unlike :func:`s3_tmp_path`, this fixture deletes the ``AWS_*`` environment
-    variables so the store is unreachable through the default credential chain. A read
-    therefore succeeds only if ``storage_options`` is forwarded to it -- which is what
-    the regression tests for https://github.com/Quantco/dataframely/issues/352 assert.
+    Unlike :func:`s3_tmp_path`, the ``AWS_*`` environment variables are removed, so the
+    store is reachable only when ``storage_options`` is forwarded to a read.
     """
     for var in (
         "AWS_ENDPOINT_URL",
diff --git a/tests/failure_info/test_storage.py b/tests/failure_info/test_storage.py
index c4580f9..88e67b6 100644
--- a/tests/failure_info/test_storage.py
+++ b/tests/failure_info/test_storage.py
@@ -174,11 +174,8 @@ def test_read_parquet_uses_storage_options_for_metadata(
     s3_storage_options: dict[str, str],
     lazy: bool,
 ) -> None:
-    """`FailureInfo.scan_parquet`/`read_parquet` must forward `storage_options` to the
-    rule/schema metadata read, not just the data read.
-
-    Regression test for https://github.com/Quantco/dataframely/issues/352.
-    """
+    """`storage_options` must reach the rule/schema metadata read, not just the data
+    read."""
     # Arrange
     df = pl.DataFrame({"a": [4, 5, 6, 6, 7, 8], "b": [1, 2, 3, 4, 5, 6]})
     _, failure = MySchema.filter(df)
diff --git a/tests/schema/test_read_write_parquet.py b/tests/schema/test_read_write_parquet.py
index 8b79199..9228efb 100644
--- a/tests/schema/test_read_write_parquet.py
+++ b/tests/schema/test_read_write_parquet.py
@@ -73,22 +73,15 @@ def test_read_parquet_uses_storage_options_for_metadata(
     s3_storage_options: dict[str, str],
     lazy: bool,
 ) -> None:
-    """`scan_parquet`/`read_parquet` must forward `storage_options` to the embedded
-    schema metadata read, not just the data read.
-
-    Regression test for https://github.com/Quantco/dataframely/issues/352: against
-    non-AWS S3-compatible stores (lakeFS, MinIO, R2, …) reached purely via
-    `storage_options`, the metadata read previously fell back to the default AWS
-    credential chain and endpoint, breaking typed reads.
-    """
+    """`storage_options` must reach the embedded schema metadata read, not just the
+    data read."""
     # Arrange
     df = MySchema.validate(pl.DataFrame({"a": [1, 2, 3]}), cast=True)
     path = f"{s3_bucket}/{uuid.uuid4()}/df.parquet"
     MySchema.write_parquet(df, file=path, storage_options=s3_storage_options)
 
-    # Act: `validation="forbid"` only returns if the schema stored in the metadata is
-    # read successfully and matches, so a passing read proves the metadata read used the
-    # forwarded `storage_options`.
+    # Act: `validation="forbid"` only returns if the metadata schema is read and matches,
+    # so a passing read proves the metadata read used the forwarded `storage_options`.
     if lazy:
         out: pl.DataFrame = MySchema.scan_parquet(
             path, validation="forbid", storage_options=s3_storage_options
@@ -107,11 +100,7 @@ def test_read_parquet_metadata_schema_uses_storage_options(
     s3_bucket: str,
     s3_storage_options: dict[str, str],
 ) -> None:
-    """The standalone `read_parquet_metadata_schema` helper must forward
-    `storage_options` to the metadata read.
-
-    Regression test for https://github.com/Quantco/dataframely/issues/352.
-    """
+    """`read_parquet_metadata_schema` must forward `storage_options` to the read."""
     # Arrange
     path = f"{s3_bucket}/{uuid.uuid4()}/df.parquet"
     MySchema.write_parquet(

From 1f621c1f7a237868d57eeb86b2c688890df3fd83 Mon Sep 17 00:00:00 2001
From: Mattijs De Paepe <mattijsdepaepe@hotmail.com>
Date: Tue, 2 Jun 2026 15:59:28 +0100
Subject: [PATCH 06/10] test: Use bare `# Act` marker to match the surrounding
 tests

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 tests/failure_info/test_storage.py      | 5 +++--
 tests/schema/test_read_write_parquet.py | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/failure_info/test_storage.py b/tests/failure_info/test_storage.py
index 88e67b6..d327e45 100644
--- a/tests/failure_info/test_storage.py
+++ b/tests/failure_info/test_storage.py
@@ -182,8 +182,9 @@ def test_read_parquet_uses_storage_options_for_metadata(
     path = f"{s3_bucket}/{uuid.uuid4()}/failure.parquet"
     failure.write_parquet(path, storage_options=s3_storage_options)
 
-    # Act: reading failure info always reads the rule/schema metadata, so a successful
-    # read proves the metadata read used the forwarded `storage_options`.
+    # Act
+    # Reading failure info always reads the rule/schema metadata, so a successful read
+    # proves the metadata read used the forwarded `storage_options`.
     if lazy:
         read = dy.FailureInfo.scan_parquet(path, storage_options=s3_storage_options)
     else:
diff --git a/tests/schema/test_read_write_parquet.py b/tests/schema/test_read_write_parquet.py
index 9228efb..0eec2cd 100644
--- a/tests/schema/test_read_write_parquet.py
+++ b/tests/schema/test_read_write_parquet.py
@@ -80,8 +80,9 @@ def test_read_parquet_uses_storage_options_for_metadata(
     path = f"{s3_bucket}/{uuid.uuid4()}/df.parquet"
     MySchema.write_parquet(df, file=path, storage_options=s3_storage_options)
 
-    # Act: `validation="forbid"` only returns if the metadata schema is read and matches,
-    # so a passing read proves the metadata read used the forwarded `storage_options`.
+    # Act
+    # `validation="forbid"` only returns if the metadata schema is read and matches, so
+    # a passing read proves the metadata read used the forwarded `storage_options`.
     if lazy:
         out: pl.DataFrame = MySchema.scan_parquet(
             path, validation="forbid", storage_options=s3_storage_options

From 5df26851328f1d130f4656a72f2426b835f9fac9 Mon Sep 17 00:00:00 2001
From: Mattijs De Paepe <mattijsdepaepe@hotmail.com>
Date: Tue, 2 Jun 2026 16:17:23 +0100
Subject: [PATCH 07/10] refactor: Hoist metadata read options out of the member
 loop

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 dataframely/_storage/parquet.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/dataframely/_storage/parquet.py b/dataframely/_storage/parquet.py
index e0e5087..f0fcadc 100644
--- a/dataframely/_storage/parquet.py
+++ b/dataframely/_storage/parquet.py
@@ -147,6 +147,7 @@ def _collection_from_parquet(
         # between lazy and eager reads
         data = {}
         collection_types = []
+        metadata_options = _metadata_read_options(kwargs)
 
         fs: AbstractFileSystem = url_to_fs(path)[0]
         for key in members:
@@ -160,16 +161,14 @@ def _collection_from_parquet(
                 )
                 if is_file:
                     collection_types.append(
-                        _read_serialized_collection(
-                            source_path, **_metadata_read_options(kwargs)
-                        )
+                        _read_serialized_collection(source_path, **metadata_options)
                     )
                 else:
                     prefix = get_file_prefix(fs)
                     for file in fs.glob(fs.sep.join([source_path, "**", "*.parquet"])):
                         collection_types.append(
                             _read_serialized_collection(
-                                f"{prefix}{file}", **_metadata_read_options(kwargs)
+                                f"{prefix}{file}", **metadata_options
                             )
                         )
 

From 48793a588de172af12ecae82ffa55d8a6152180b Mon Sep 17 00:00:00 2001
From: Mattijs De Paepe <mattijsdepaepe@hotmail.com>
Date: Wed, 3 Jun 2026 16:37:59 +0100
Subject: [PATCH 08/10] refactor: Derive metadata read options from
 read_parquet_metadata signature

Introspect pl.read_parquet_metadata's parameters instead of hard-coding the
allowlist, so the forwarded options track the installed polars version rather
than drifting against it.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 dataframely/_storage/parquet.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/dataframely/_storage/parquet.py b/dataframely/_storage/parquet.py
index f0fcadc..ffc0208 100644
--- a/dataframely/_storage/parquet.py
+++ b/dataframely/_storage/parquet.py
@@ -1,6 +1,7 @@
 # Copyright (c) QuantCo 2025-2026
 # SPDX-License-Identifier: BSD-3-Clause
 
+import inspect
 from collections.abc import Iterable
 from typing import Any
 
@@ -250,15 +251,14 @@ def scan_failure_info(
         return lf, serialized_rules, serialized_schema
 
 
+# `read_parquet_metadata` has no `**kwargs`, so unrecognized keys raise `TypeError`.
+_METADATA_READ_PARAMS = frozenset(
+    inspect.signature(pl.read_parquet_metadata).parameters
+) - {"source"}
+
+
 def _metadata_read_options(kwargs: dict[str, Any]) -> dict[str, Any]:
-    # Forward only the options that `read_parquet_metadata` accepts (it has a narrower
-    # signature than `read_parquet`/`scan_parquet`) so the metadata read reaches the
-    # same store as the data read.
-    return {
-        key: kwargs[key]
-        for key in ("storage_options", "credential_provider", "retries")
-        if key in kwargs
-    }
+    return {k: v for k, v in kwargs.items() if k in _METADATA_READ_PARAMS}
 
 
 def _read_serialized_collection(

From f8e717a0000d5dcca12fabacf7dfd897b8bbbf9f Mon Sep 17 00:00:00 2001
From: Mattijs De Paepe <mattijsdepaepe@hotmail.com>
Date: Thu, 4 Jun 2026 19:54:30 +0200
Subject: [PATCH 09/10] test: Isolate storage_options regression tests with a
 unique bucket

Polars caches object stores per bucket on the Rust side, where
monkeypatch.delenv cannot clear them. A bucket configured once from
AWS_* env vars stayed reachable without storage_options, so the
regression tests passed even when forwarding was dropped.

Replace s3_storage_options with s3_isolated, which hands out a
freshly-named bucket that is never env-configured. Reaching it requires
forwarding storage_options to every read, so the tests now fail without
the fix. Also drop a stale comment in parquet.py.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 dataframely/_storage/parquet.py         |  1 -
 tests/collection/test_storage.py        | 10 +++++-----
 tests/conftest.py                       | 20 ++++++++++++++------
 tests/failure_info/test_storage.py      | 12 ++++++------
 tests/schema/test_read_write_parquet.py | 22 +++++++++++-----------
 5 files changed, 36 insertions(+), 29 deletions(-)

diff --git a/dataframely/_storage/parquet.py b/dataframely/_storage/parquet.py
index ffc0208..805efbd 100644
--- a/dataframely/_storage/parquet.py
+++ b/dataframely/_storage/parquet.py
@@ -251,7 +251,6 @@ def scan_failure_info(
         return lf, serialized_rules, serialized_schema
 
 
-# `read_parquet_metadata` has no `**kwargs`, so unrecognized keys raise `TypeError`.
 _METADATA_READ_PARAMS = frozenset(
     inspect.signature(pl.read_parquet_metadata).parameters
 ) - {"source"}
diff --git a/tests/collection/test_storage.py b/tests/collection/test_storage.py
index 7325838..58823a7 100644
--- a/tests/collection/test_storage.py
+++ b/tests/collection/test_storage.py
@@ -501,21 +501,21 @@ def test_read_invalid_parquet_metadata_collection(
 
 @pytest.mark.s3
 def test_read_parquet_metadata_collection_uses_storage_options(
-    s3_bucket: str,
-    s3_storage_options: dict[str, str],
+    s3_isolated: tuple[str, dict[str, str]],
 ) -> None:
     """`read_parquet_metadata_collection` must forward `storage_options` to the read."""
     # Arrange
-    path = f"{s3_bucket}/{uuid.uuid4()}/df.parquet"
+    bucket, storage_options = s3_isolated
+    path = f"{bucket}/{uuid.uuid4()}/df.parquet"
     pl.DataFrame({"a": [1, 2, 3]}).write_parquet(
         path,
         metadata={COLLECTION_METADATA_KEY: MyCollection.serialize()},
-        storage_options=s3_storage_options,
+        storage_options=storage_options,
     )
 
     # Act
     collection = dy.read_parquet_metadata_collection(
-        path, storage_options=s3_storage_options
+        path, storage_options=storage_options
     )
 
     # Assert
diff --git a/tests/conftest.py b/tests/conftest.py
index 03ceccf..7b8870c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -45,13 +45,17 @@ def s3_tmp_path(s3_server: str, s3_bucket: str, monkeypatch: pytest.MonkeyPatch)
 
 
 @pytest.fixture()
-def s3_storage_options(
+def s3_isolated(
     s3_server: str, monkeypatch: pytest.MonkeyPatch
-) -> dict[str, str]:
-    """Credentials and endpoint for the moto server, supplied only via storage options.
+) -> tuple[str, dict[str, str]]:
+    """A freshly-named bucket that is only reachable via the returned ``storage_options``.
 
-    Unlike :func:`s3_tmp_path`, the ``AWS_*`` environment variables are removed, so the
-    store is reachable only when ``storage_options`` is forwarded to a read.
+    Polars caches object stores per bucket, and these caches live Rust-side and are not
+    cleared by ``monkeypatch.delenv``. A bucket configured once from ``AWS_*`` env vars
+    (e.g. by :func:`s3_tmp_path`) therefore stays reachable without ``storage_options``,
+    which would let a read silently succeed even if ``storage_options`` was dropped. A
+    unique bucket has no such cached store, so reaching it requires forwarding
+    ``storage_options`` to every read.
     """
     for var in (
         "AWS_ENDPOINT_URL",
@@ -63,7 +67,11 @@ def s3_storage_options(
         "AWS_REGION",
     ):
         monkeypatch.delenv(var, raising=False)
-    return {
+    bucket = f"isolated-{uuid.uuid4()}"
+    boto3.client(
+        "s3", endpoint_url=s3_server, aws_access_key_id="", aws_secret_access_key=""
+    ).create_bucket(Bucket=bucket)
+    return f"s3://{bucket}", {
         "aws_access_key_id": "testing",
         "aws_secret_access_key": "testing",
         "aws_endpoint_url": s3_server,
diff --git a/tests/failure_info/test_storage.py b/tests/failure_info/test_storage.py
index d327e45..3823342 100644
--- a/tests/failure_info/test_storage.py
+++ b/tests/failure_info/test_storage.py
@@ -170,25 +170,25 @@ def test_write_parquet_custom_metadata(
 @pytest.mark.s3
 @pytest.mark.parametrize("lazy", [True, False])
 def test_read_parquet_uses_storage_options_for_metadata(
-    s3_bucket: str,
-    s3_storage_options: dict[str, str],
+    s3_isolated: tuple[str, dict[str, str]],
     lazy: bool,
 ) -> None:
     """`storage_options` must reach the rule/schema metadata read, not just the data
     read."""
     # Arrange
+    bucket, storage_options = s3_isolated
     df = pl.DataFrame({"a": [4, 5, 6, 6, 7, 8], "b": [1, 2, 3, 4, 5, 6]})
     _, failure = MySchema.filter(df)
-    path = f"{s3_bucket}/{uuid.uuid4()}/failure.parquet"
-    failure.write_parquet(path, storage_options=s3_storage_options)
+    path = f"{bucket}/{uuid.uuid4()}/failure.parquet"
+    failure.write_parquet(path, storage_options=storage_options)
 
     # Act
     # Reading failure info always reads the rule/schema metadata, so a successful read
     # proves the metadata read used the forwarded `storage_options`.
     if lazy:
-        read = dy.FailureInfo.scan_parquet(path, storage_options=s3_storage_options)
+        read = dy.FailureInfo.scan_parquet(path, storage_options=storage_options)
     else:
-        read = dy.FailureInfo.read_parquet(path, storage_options=s3_storage_options)
+        read = dy.FailureInfo.read_parquet(path, storage_options=storage_options)
 
     # Assert
     assert_frame_equal(failure._lf, read._lf)
diff --git a/tests/schema/test_read_write_parquet.py b/tests/schema/test_read_write_parquet.py
index 0eec2cd..22bae98 100644
--- a/tests/schema/test_read_write_parquet.py
+++ b/tests/schema/test_read_write_parquet.py
@@ -69,27 +69,27 @@ def test_write_parquet_fails_without_mkdir(tmp_path: str) -> None:
 @pytest.mark.s3
 @pytest.mark.parametrize("lazy", [True, False])
 def test_read_parquet_uses_storage_options_for_metadata(
-    s3_bucket: str,
-    s3_storage_options: dict[str, str],
+    s3_isolated: tuple[str, dict[str, str]],
     lazy: bool,
 ) -> None:
     """`storage_options` must reach the embedded schema metadata read, not just the
     data read."""
     # Arrange
+    bucket, storage_options = s3_isolated
     df = MySchema.validate(pl.DataFrame({"a": [1, 2, 3]}), cast=True)
-    path = f"{s3_bucket}/{uuid.uuid4()}/df.parquet"
-    MySchema.write_parquet(df, file=path, storage_options=s3_storage_options)
+    path = f"{bucket}/{uuid.uuid4()}/df.parquet"
+    MySchema.write_parquet(df, file=path, storage_options=storage_options)
 
     # Act
     # `validation="forbid"` only returns if the metadata schema is read and matches, so
     # a passing read proves the metadata read used the forwarded `storage_options`.
     if lazy:
         out: pl.DataFrame = MySchema.scan_parquet(
-            path, validation="forbid", storage_options=s3_storage_options
+            path, validation="forbid", storage_options=storage_options
         ).collect()
     else:
         out = MySchema.read_parquet(
-            path, validation="forbid", storage_options=s3_storage_options
+            path, validation="forbid", storage_options=storage_options
         )
 
     # Assert
@@ -98,18 +98,18 @@ def test_read_parquet_uses_storage_options_for_metadata(
 
 @pytest.mark.s3
 def test_read_parquet_metadata_schema_uses_storage_options(
-    s3_bucket: str,
-    s3_storage_options: dict[str, str],
+    s3_isolated: tuple[str, dict[str, str]],
 ) -> None:
     """`read_parquet_metadata_schema` must forward `storage_options` to the read."""
     # Arrange
-    path = f"{s3_bucket}/{uuid.uuid4()}/df.parquet"
+    bucket, storage_options = s3_isolated
+    path = f"{bucket}/{uuid.uuid4()}/df.parquet"
     MySchema.write_parquet(
-        MySchema.create_empty(), file=path, storage_options=s3_storage_options
+        MySchema.create_empty(), file=path, storage_options=storage_options
     )
 
     # Act
-    schema = dy.read_parquet_metadata_schema(path, storage_options=s3_storage_options)
+    schema = dy.read_parquet_metadata_schema(path, storage_options=storage_options)
 
     # Assert
     assert schema is not None

From e33a50a2ee4451af65b2541e394940fe77127ce2 Mon Sep 17 00:00:00 2001
From: Mattijs De Paepe <mattijsdepaepe@hotmail.com>
Date: Thu, 4 Jun 2026 21:26:54 +0200
Subject: [PATCH 10/10] test: Delete the isolated S3 bucket on fixture teardown

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 tests/conftest.py | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 7b8870c..4f84c67 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -47,7 +47,7 @@ def s3_tmp_path(s3_server: str, s3_bucket: str, monkeypatch: pytest.MonkeyPatch)
 @pytest.fixture()
 def s3_isolated(
     s3_server: str, monkeypatch: pytest.MonkeyPatch
-) -> tuple[str, dict[str, str]]:
+) -> Iterator[tuple[str, dict[str, str]]]:
     """A freshly-named bucket that is only reachable via the returned ``storage_options``.
 
     Polars caches object stores per bucket, and these caches live Rust-side and are not
@@ -68,16 +68,23 @@ def s3_isolated(
     ):
         monkeypatch.delenv(var, raising=False)
     bucket = f"isolated-{uuid.uuid4()}"
-    boto3.client(
+    client = boto3.client(
         "s3", endpoint_url=s3_server, aws_access_key_id="", aws_secret_access_key=""
-    ).create_bucket(Bucket=bucket)
-    return f"s3://{bucket}", {
-        "aws_access_key_id": "testing",
-        "aws_secret_access_key": "testing",
-        "aws_endpoint_url": s3_server,
-        "aws_region": "us-east-1",
-        "aws_allow_http": "true",
-    }
+    )
+    client.create_bucket(Bucket=bucket)
+    yield (
+        f"s3://{bucket}",
+        {
+            "aws_access_key_id": "testing",
+            "aws_secret_access_key": "testing",
+            "aws_endpoint_url": s3_server,
+            "aws_region": "us-east-1",
+            "aws_allow_http": "true",
+        },
+    )
+    for obj in client.list_objects_v2(Bucket=bucket).get("Contents", []):
+        client.delete_object(Bucket=bucket, Key=obj["Key"])
+    client.delete_bucket(Bucket=bucket)
 
 
 @pytest.fixture()