From 32e46006e031de4b175730649b3458b0362d36fa Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 20 Jun 2026 10:29:09 +0100 Subject: [PATCH 1/5] Don't track .hypothesis --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index e5f97089..3ae42ff2 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,4 @@ venv/ .mypy_cache/ .pytest_cache/ .ruff_cache/ +.hypothesis From c63b727d156ce536cb7fd1fc4ae9d0b1d6451100 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 20 Jun 2026 10:42:18 +0100 Subject: [PATCH 2/5] Make ShpWriter.shape return both offset & length in B, not 16 bit words --- src/shapefile.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 1dd4859f..bbf86e9a 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1607,7 +1607,6 @@ def _write_ms_to_byte_stream( raise ShapefileException( f"Failed to write measure extremes for record {i}. Expected floats" ) - ms_to_encode = replace_None_with_NODATA(s.m) try: num_bytes_written += b_io.write(pack(f"<{len(s.m)}d", *ms_to_encode)) @@ -4200,6 +4199,7 @@ def shape( self, s: Shape | HasGeoInterface | GeoJSONHomogeneousGeometryObject, ) -> tuple[int, int]: + """Returns shape's offset and length in B""" if not isinstance(s, Shape): if isinstance(s, HasGeoInterface): shape_dict = s.__geo_interface__ @@ -4216,6 +4216,7 @@ def shape( return self._shp_record(s) def _shp_record(self, s: Shape) -> tuple[int, int]: + """Returns shape's offset and length in B""" offset = self.file.tell() self.shpNum += 1 @@ -4274,7 +4275,7 @@ def _shp_record(self, s: Shape) -> tuple[int, int]: # Flush to file. b_io.seek(0) self.file.write(b_io.read()) - return offset, length_16bw + return offset, n class ShxWriter(_ShpShxHeaderWriter): @@ -4288,7 +4289,7 @@ def __init__( super().__init__(file=shx) self.shp_writer = shp_writer - def _shx_record(self, offset_B: int, length_16bw: int) -> None: + def _shx_record(self, offset_B: int, length_B: int) -> None: """Writes the shx records.""" f = self.file @@ -4299,7 +4300,7 @@ def _shx_record(self, offset_B: int, length_16bw: int) -> None: "It's over 4GB, perhaps split the .shp or the Shapefile into smaller ones? " ) - offset_16bw = offset_B // 2 + offset_16bw, length_16bw = offset_B // 2, length_B // 2 f.write(pack(">2i", offset_16bw, length_16bw)) def _header(self) -> None: @@ -4454,9 +4455,9 @@ def shape( # Balance if already not balanced if self.autoBalance and self.dbf_writer.recNum < self.shp_writer.shpNum: self.balance() - offset_B, length_16bw = self.shp_writer.shape(s) + offset_B, length_B = self.shp_writer.shape(s) if self._shx: - self.shx_writer._shx_record(offset_B, length_16bw) + self.shx_writer._shx_record(offset_B, length_B) def record( self, From 805be08e8b1989a84587413e02529d3a4e50acca Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 20 Jun 2026 10:59:59 +0100 Subject: [PATCH 3/5] v3.0.14.dev --- README.md | 5 +++++ changelog.txt | 7 ++++++- src/shapefile.py | 2 +- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e13ef099..ca3cac09 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,11 @@ part of your geospatial project. # Version Changes +## 3.0.14.dev +### ShpWriter.shape API Tweak (small breaking change). + - Make ShpWriter.shape return shape length in bytes (like offset) not in 16 bit words. + + ## 3.0.13 ### Bug fix - Fix bug when reading empty shp files. diff --git a/changelog.txt b/changelog.txt index 9ba02148..298f5c65 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,8 @@ +VERSION 3.0.14.dev + +2026-06-20 + * API Tweak (small breaking change). Make ShpWriter.shape return shape length in bytes (like offset) not in 16 bit words. + VERSION 3.0.13 2026-06-19 @@ -21,7 +26,7 @@ VERSION 3.0.11 2026-06-04 Edge case handling - * Raise ShapefileException i) when creating Non-null Shapes without (or with empty) points + * Raise ShapefileException: i) when creating Non-null Shapes without (or with empty) points and ii) when creating Null Shapes with non-empty points. * Ensure Shape.z and Shape.partTypes are _Arrays. * Make Shape stricter about its args, e.g. only points or lines, only one point for Points. diff --git a/src/shapefile.py b/src/shapefile.py index bbf86e9a..1353f8bf 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -8,7 +8,7 @@ from __future__ import annotations -__version__ = "3.0.13" +__version__ = "3.0.14.dev" import abc import array From c61f5c2430b14141cdbaec01f03df691584ece75 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 20 Jun 2026 15:45:10 +0100 Subject: [PATCH 4/5] Rename size_B and size_16b_words correctly in ShpWriter._shp_file_length_B --- README.md | 3 ++- changelog.txt | 3 ++- src/shapefile.py | 14 ++++++++------ 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index ca3cac09..b98c4027 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,8 @@ part of your geospatial project. ## 3.0.14.dev ### ShpWriter.shape API Tweak (small breaking change). - - Make ShpWriter.shape return shape length in bytes (like offset) not in 16 bit words. + - Make ShpWriter.shape return shape length in bytes (the + same as for offset) not in 16 bit words. ## 3.0.13 diff --git a/changelog.txt b/changelog.txt index 298f5c65..3ac75dc2 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,7 +1,8 @@ VERSION 3.0.14.dev 2026-06-20 - * API Tweak (small breaking change). Make ShpWriter.shape return shape length in bytes (like offset) not in 16 bit words. + * API Tweak (small breaking change). Make ShpWriter.shape return shape length in bytes + (the same as for offset) not in 16 bit words. VERSION 3.0.13 diff --git a/src/shapefile.py b/src/shapefile.py index 1353f8bf..36fd05c7 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -4134,7 +4134,11 @@ def _header(self) -> None: def _write_file_length(self) -> None: # self.file required to be at correct position, e.g. # if called by self._header - self.file.write(pack(">i", self._shp_file_length_B())) + + # Calculate size as 16-bit words + size_B = self._shp_file_length_B() + size_16b_words = size_B // 2 + self.file.write(pack(">i", size_16b_words)) def _shp_file_length_B(self) -> int: """Calculates the file length of the shp file.""" @@ -4143,9 +4147,7 @@ def _shp_file_length_B(self) -> int: # Calculate size of all shapes self.file.seek(0, 2) - size_16b_words = self.file.tell() - # Calculate size as 16-bit words - size_B = size_16b_words // 2 + size_B = self.file.tell() # Return to start self.file.seek(start_B) return size_B @@ -4199,7 +4201,7 @@ def shape( self, s: Shape | HasGeoInterface | GeoJSONHomogeneousGeometryObject, ) -> tuple[int, int]: - """Returns shape's offset and length in B""" + """Appends s to the file. Returns shape's offset and length in B""" if not isinstance(s, Shape): if isinstance(s, HasGeoInterface): shape_dict = s.__geo_interface__ @@ -4216,7 +4218,7 @@ def shape( return self._shp_record(s) def _shp_record(self, s: Shape) -> tuple[int, int]: - """Returns shape's offset and length in B""" + """Appends s to the file. Returns shape's offset and length in B""" offset = self.file.tell() self.shpNum += 1 From 8d04f7153cc0ed288044fe53cab832c64932e93a Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 20 Jun 2026 15:47:59 +0100 Subject: [PATCH 5/5] Add shx round trip test Reformat --- README.md | 2 +- changelog.txt | 2 +- src/shapefile.py | 2 +- tests/hypothesis_tests.py | 75 ++++++++++++++++++++++++++++++++++----- 4 files changed, 69 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index b98c4027..34cfbeff 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,7 @@ part of your geospatial project. ## 3.0.14.dev ### ShpWriter.shape API Tweak (small breaking change). - - Make ShpWriter.shape return shape length in bytes (the + - Make ShpWriter.shape return shape length in bytes (the same as for offset) not in 16 bit words. diff --git a/changelog.txt b/changelog.txt index 3ac75dc2..26a4530c 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,7 +1,7 @@ VERSION 3.0.14.dev 2026-06-20 - * API Tweak (small breaking change). Make ShpWriter.shape return shape length in bytes + * API Tweak (small breaking change). Make ShpWriter.shape return shape length in bytes (the same as for offset) not in 16 bit words. VERSION 3.0.13 diff --git a/src/shapefile.py b/src/shapefile.py index 36fd05c7..ac645912 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -4134,7 +4134,7 @@ def _header(self) -> None: def _write_file_length(self) -> None: # self.file required to be at correct position, e.g. # if called by self._header - + # Calculate size as 16-bit words size_B = self._shp_file_length_B() size_16b_words = size_B // 2 diff --git a/tests/hypothesis_tests.py b/tests/hypothesis_tests.py index ef74570a..b9b42a14 100644 --- a/tests/hypothesis_tests.py +++ b/tests/hypothesis_tests.py @@ -163,7 +163,6 @@ def multipointM_from_xyms(point_ms: tuple[float, float, float | None], oid_: int multipointm = builds(multipointM_from_xyms, lists(tuples(xs, ys, ms), min_size=1), oid) @pytest.mark.hypothesis -# @settings(suppress_health_check=[HealthCheck.too_slow, HealthCheck.data_too_large]) @given(expected=multipointm, i=integers(min_value=1)) def test_MultiPointM_roundtrips( expected: shp.MultiPointM, @@ -196,7 +195,6 @@ def multipointZ_from_xyzms(pointz_ms: tuple[float, float, float, float | None], @pytest.mark.hypothesis -# @settings(suppress_health_check=[HealthCheck.too_slow, HealthCheck.data_too_large]) @given(expected=multipointz, i=integers(min_value=1)) def test_MultiPointZ_roundtrips( expected: shp.MultiPointZ, @@ -248,7 +246,6 @@ def test_Polyline_roundtrips( assert actual.oid == expected.oid @pytest.mark.hypothesis -# @settings(suppress_health_check=[HealthCheck.too_slow, HealthCheck.data_too_large]) @given(expected=polylinem, i=integers(min_value=1)) def test_PolylineM_roundtrips( expected: shp.PolylineM, @@ -273,7 +270,6 @@ def test_PolylineM_roundtrips( assert actual.oid == expected.oid @pytest.mark.hypothesis -# @settings(suppress_health_check=[HealthCheck.too_slow, HealthCheck.data_too_large]) @given(expected=polylinez, i=integers(min_value=1)) def test_PolylineZ_roundtrips( expected: shp.PolylineZ, @@ -327,7 +323,6 @@ def test_Polygon_roundtrips( assert actual.oid == expected.oid @pytest.mark.hypothesis -# @settings(suppress_health_check=[HealthCheck.too_slow, HealthCheck.data_too_large]) @given(expected=polygonm, i=integers(min_value=1)) def test_PolygonM_roundtrips( expected: shp.PolygonM, @@ -352,7 +347,6 @@ def test_PolygonM_roundtrips( assert actual.oid == expected.oid @pytest.mark.hypothesis -# @settings(suppress_health_check=[HealthCheck.too_slow, HealthCheck.data_too_large]) @given(expected=polygonz, i=integers(min_value=1)) def test_PolygonZ_roundtrips( expected: shp.PolygonZ, @@ -392,7 +386,6 @@ def multipatch_from_xyzms_and_types( @pytest.mark.hypothesis -# @settings(suppress_health_check=[HealthCheck.too_slow, HealthCheck.data_too_large]) @given(expected=multipatch, i=integers(min_value=1)) def test_MultiPatch_roundtrips( expected: shp.MultiPatch, @@ -418,6 +411,12 @@ def test_MultiPatch_roundtrips( assert actual.oid == expected.oid assert actual.partTypes == expected.partTypes, f"{type(actual.partTypes)=}, {type(expected.partTypes)=}" +MAX_FILE_SIZE_16bw = (1 << 31) - 1 # This bound comes from encoding the + # actual file size (in 16 bit words) + # as a 4 byte signed integer. +MAX_NUM_SHAPES = (MAX_FILE_SIZE_16bw - 50) // 6 # Minus 100B header, 12 bytes + # per record (the minimum for + # a Null shape). shape_codes_names_and_strategies = [ # (0, "Null Shape"), @@ -438,7 +437,7 @@ def test_MultiPatch_roundtrips( def code_and_shape_strat_from_triple(t): x, _name, shapes = t - return tuples(just(x), lists(shapes, min_size = 0)) # Empty shp files are in the esri spec. + return tuples(just(x), lists(shapes, min_size = 0, max_size=MAX_NUM_SHAPES)) # Empty shp files are in the esri spec. codes_and_shapes_strats = [ code_and_shape_strat_from_triple(t) @@ -448,7 +447,6 @@ def code_and_shape_strat_from_triple(t): codes_and_shapes = one_of(codes_and_shapes_strats) @pytest.mark.hypothesis -# @settings(suppress_health_check=[HealthCheck.too_slow, HealthCheck.data_too_large]) @given(codes_and_shapes=codes_and_shapes) def test_shp_reader_writer_roundtrip(codes_and_shapes)-> None: code_ex, expected_shapes = codes_and_shapes @@ -483,3 +481,62 @@ def test_shp_reader_writer_roundtrip(codes_and_shapes)-> None: assert actual.partTypes == expected.partTypes, f"{type(actual.partTypes)=}, {type(expected.partTypes)=}" else: assert not hasattr(expected, "partTypes") + + + +# SHX_UB = MAX_FILE_SIZE_16bw - 50 + + +# ## Surprisingly slow. Doesn't add enough value to merit waiting for +# @composite +# def positive_ints_with_bounded_sum( +# draw, +# min_x: int = 6, +# upper_bound: int = SHX_UB, +# max_len: int = MAX_NUM_SHAPES, +# ): +# assert min_x >= 1 +# assert upper_bound >= max_len +# length = draw(integers(min_value=0, max_value=max_len)) +# if length == 0: +# return [] + +# max_x = upper_bound - (length - 1) +# result = [] + +# for i in range(length): +# if max_x < min_x : +# break +# x = draw(integers(min_value=min_x, max_value=max_x)) +# result.append(x) +# max_x -= x + +# return result + + +@pytest.mark.hypothesis +@given(codes_and_shapes=codes_and_shapes) +def test_shx_reader_writer_roundtrip(codes_and_shapes)-> None: + code_ex, expected_shapes = codes_and_shapes + + sizes_B = [] + offsets_B = [] + offset_B = 100 # "Thus, the offset for the first record in the + # main file is 50 (16bw), given the 100-byte header. " + shp_stream = io.BytesIO() + shx_stream = io.BytesIO() + with shp.ShpWriter(shp=shp_stream, shapeType=code_ex) as shp_w: + with shp.ShxWriter(shx=shx_stream, shp_writer = shp_w) as shx_w: + for shape in expected_shapes: + offset_B, size_B = shp_w.shape(shape) + sizes_B.append(size_B) + offsets_B.append(offset_B) + shx_w._shx_record(offset_B, size_B) + + shx_stream.seek(0) + + with shp.ShxReader(shx=shx_stream) as r: + assert r.numShapes == len(expected_shapes) + assert r.offsets == offsets_B + assert r.shape_lengths_B == sizes_B +