From a814e219b00c597b57bed838d045e614ce69bcb3 Mon Sep 17 00:00:00 2001 From: Graham Findlay Date: Thu, 4 Jun 2026 10:56:23 -0500 Subject: [PATCH 1/2] Fix file size calculation in `write_binary()` `write_binary()` was determining the file size and seek offests based on the source dtype's itemsize, but the data is cast to the target dtype before writing. This means that if the target dtype has a different itemsize than the source dtype, a corrupted file of the wrong size and layout was produced. The solution was just just use the target dtype's itemsize for all file size and seek offset calculations. --- src/spikeinterface/core/time_series_tools.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/spikeinterface/core/time_series_tools.py b/src/spikeinterface/core/time_series_tools.py index fad697f94e..3813b72441 100644 --- a/src/spikeinterface/core/time_series_tools.py +++ b/src/spikeinterface/core/time_series_tools.py @@ -64,7 +64,7 @@ def write_binary( dtype = dtype if dtype is not None else time_series.get_dtype() - sample_size_bytes = time_series.get_sample_size_in_bytes() + sample_size_bytes = np.dtype(dtype).itemsize * time_series.get_num_channels() file_path_dict = {segment_index: file_path for segment_index, file_path in enumerate(file_path_list)} if file_timestamps_paths is not None: @@ -125,7 +125,7 @@ def _write_binary_chunk(segment_index, start_frame, end_frame, worker_ctx): byte_offset = worker_ctx["byte_offset"] file = worker_ctx["file_dict"][segment_index] file_timestamps_dict = worker_ctx["file_timestamps_dict"] - sample_size_bytes = time_series.get_sample_size_in_bytes() + sample_size_bytes = dtype.itemsize * time_series.get_num_channels() # Calculate byte offsets for the start frames relative to the entire recording start_byte = byte_offset + start_frame * sample_size_bytes From bc005fab89e91948c36b68381001767f26b56b73 Mon Sep 17 00:00:00 2001 From: Alessio Buccino Date: Fri, 5 Jun 2026 10:24:14 +0200 Subject: [PATCH 2/2] fix: extend get_sample_size_in_bytes() signature to accept dtype --- src/spikeinterface/core/baserecording.py | 11 +++++++++-- src/spikeinterface/core/time_series.py | 2 +- src/spikeinterface/core/time_series_tools.py | 6 ++---- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/spikeinterface/core/baserecording.py b/src/spikeinterface/core/baserecording.py index 0a9b26931b..6328199475 100644 --- a/src/spikeinterface/core/baserecording.py +++ b/src/spikeinterface/core/baserecording.py @@ -187,17 +187,24 @@ def add_recording_segment(self, recording_segment: "BaseRecordingSegment") -> No """ super().add_segment(recording_segment) - def get_sample_size_in_bytes(self): + def get_sample_size_in_bytes(self, dtype=None): """ Returns the size of a single sample across all channels in bytes. + Parameters + ---------- + dtype : data-type, optional + The data type to use for calculating the sample size. If None, + the recording's dtype is used. + Returns ------- int The size of a single sample in bytes """ num_channels = self.get_num_channels() - dtype_size_bytes = self.get_dtype().itemsize + dtype = self.get_dtype() if dtype is None else np.dtype(dtype) + dtype_size_bytes = dtype.itemsize sample_size = num_channels * dtype_size_bytes return sample_size diff --git a/src/spikeinterface/core/time_series.py b/src/spikeinterface/core/time_series.py index d4d4717dff..62113dafd2 100644 --- a/src/spikeinterface/core/time_series.py +++ b/src/spikeinterface/core/time_series.py @@ -34,7 +34,7 @@ def get_num_samples(self, segment_index: int | None = None) -> int: raise NotImplementedError @abstractmethod - def get_sample_size_in_bytes(self) -> int: + def get_sample_size_in_bytes(self, dtype=None) -> int: raise NotImplementedError @abstractmethod diff --git a/src/spikeinterface/core/time_series_tools.py b/src/spikeinterface/core/time_series_tools.py index 3813b72441..efdedc8282 100644 --- a/src/spikeinterface/core/time_series_tools.py +++ b/src/spikeinterface/core/time_series_tools.py @@ -62,9 +62,7 @@ def write_binary( if add_file_extension: file_path_list = [add_suffix(file_path, ["raw", "bin", "dat"]) for file_path in file_path_list] - dtype = dtype if dtype is not None else time_series.get_dtype() - - sample_size_bytes = np.dtype(dtype).itemsize * time_series.get_num_channels() + sample_size_bytes = time_series.get_sample_size_in_bytes(dtype=dtype) file_path_dict = {segment_index: file_path for segment_index, file_path in enumerate(file_path_list)} if file_timestamps_paths is not None: @@ -125,7 +123,7 @@ def _write_binary_chunk(segment_index, start_frame, end_frame, worker_ctx): byte_offset = worker_ctx["byte_offset"] file = worker_ctx["file_dict"][segment_index] file_timestamps_dict = worker_ctx["file_timestamps_dict"] - sample_size_bytes = dtype.itemsize * time_series.get_num_channels() + sample_size_bytes = time_series.get_sample_size_in_bytes(dtype=dtype) # Calculate byte offsets for the start frames relative to the entire recording start_byte = byte_offset + start_frame * sample_size_bytes