From 1e0c8542c63ecb39816eeb2640c13324a705feec Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Wed, 17 Jun 2026 13:27:12 +0000 Subject: [PATCH 1/2] Resolve issue where `segy_to_mdio` ingestion failed in trace ingestion phase if `output` contained a `UPath` with `storage_options` --- src/mdio/api/io.py | 4 +++- tests/unit/test_io.py | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 tests/unit/test_io.py diff --git a/src/mdio/api/io.py b/src/mdio/api/io.py index ea5b22dc..85f8ba36 100644 --- a/src/mdio/api/io.py +++ b/src/mdio/api/io.py @@ -30,7 +30,9 @@ def _normalize_path(path: UPath | Path | str) -> UPath: def _normalize_storage_options(path: UPath) -> dict[str, Any] | None: - return None if len(path.storage_options) == 0 else path.storage_options + # UPath.storage_options returns a read-only mappingproxy which cannot be pickled. Copy it into a + # plain dict so callers can safely pass it across process boundaries (e.g. spawned workers). + return None if len(path.storage_options) == 0 else dict(path.storage_options) def open_mdio(input_path: UPath | Path | str, chunks: T_Chunks = None) -> xr_Dataset: diff --git a/tests/unit/test_io.py b/tests/unit/test_io.py new file mode 100644 index 00000000..9cadc66d --- /dev/null +++ b/tests/unit/test_io.py @@ -0,0 +1,21 @@ +"""Tests for low-level MDIO API I/O helpers.""" + +from __future__ import annotations + +from types import MappingProxyType + +from upath import UPath + +from mdio.api.io import _normalize_storage_options + + +def test_normalize_storage_options_is_not_mappingproxy() -> None: + """Storage options must not be a mappingproxy. + + `UPath.storage_options` returns a read-only ``mappingproxy`` that cannot be pickled. Blocked-I/O + ingestion passes these options into ``ProcessPoolExecutor`` initargs, so a mappingproxy breaks + spawned workers with ``TypeError: cannot pickle 'mappingproxy' object``. + """ + storage_options = _normalize_storage_options(UPath("s3://bucket/key", key="access", secret="secret")) + + assert not isinstance(storage_options, MappingProxyType) From 870e3ba6a23a8189f77a6ffa544d0727b56d72c4 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Wed, 17 Jun 2026 13:29:12 +0000 Subject: [PATCH 2/2] Ignore false positive for hardcoded "secret" --- tests/unit/test_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_io.py b/tests/unit/test_io.py index 9cadc66d..111b14c2 100644 --- a/tests/unit/test_io.py +++ b/tests/unit/test_io.py @@ -16,6 +16,6 @@ def test_normalize_storage_options_is_not_mappingproxy() -> None: ingestion passes these options into ``ProcessPoolExecutor`` initargs, so a mappingproxy breaks spawned workers with ``TypeError: cannot pickle 'mappingproxy' object``. """ - storage_options = _normalize_storage_options(UPath("s3://bucket/key", key="access", secret="secret")) + storage_options = _normalize_storage_options(UPath("s3://bucket/key", key="access", secret="secret")) # noqa: S106 assert not isinstance(storage_options, MappingProxyType)