diff --git a/src/mdio/api/io.py b/src/mdio/api/io.py index ea5b22dc..85f8ba36 100644 --- a/src/mdio/api/io.py +++ b/src/mdio/api/io.py @@ -30,7 +30,9 @@ def _normalize_path(path: UPath | Path | str) -> UPath: def _normalize_storage_options(path: UPath) -> dict[str, Any] | None: - return None if len(path.storage_options) == 0 else path.storage_options + # UPath.storage_options returns a read-only mappingproxy which cannot be pickled. Copy it into a + # plain dict so callers can safely pass it across process boundaries (e.g. spawned workers). + return None if len(path.storage_options) == 0 else dict(path.storage_options) def open_mdio(input_path: UPath | Path | str, chunks: T_Chunks = None) -> xr_Dataset: diff --git a/tests/unit/test_io.py b/tests/unit/test_io.py new file mode 100644 index 00000000..111b14c2 --- /dev/null +++ b/tests/unit/test_io.py @@ -0,0 +1,21 @@ +"""Tests for low-level MDIO API I/O helpers.""" + +from __future__ import annotations + +from types import MappingProxyType + +from upath import UPath + +from mdio.api.io import _normalize_storage_options + + +def test_normalize_storage_options_is_not_mappingproxy() -> None: + """Storage options must not be a mappingproxy. + + `UPath.storage_options` returns a read-only ``mappingproxy`` that cannot be pickled. Blocked-I/O + ingestion passes these options into ``ProcessPoolExecutor`` initargs, so a mappingproxy breaks + spawned workers with ``TypeError: cannot pickle 'mappingproxy' object``. + """ + storage_options = _normalize_storage_options(UPath("s3://bucket/key", key="access", secret="secret")) # noqa: S106 + + assert not isinstance(storage_options, MappingProxyType)