Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,4 @@ venv/
.mypy_cache/
.pytest_cache/
.ruff_cache/
.hypothesis
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,12 @@ part of your geospatial project.

# Version Changes

## 3.0.14.dev
### ShpWriter.shape API Tweak (small breaking change).
- Make ShpWriter.shape return shape length in bytes (the
same as for offset) not in 16 bit words.


## 3.0.13
### Bug fix
- Fix bug when reading empty shp files.
Expand Down
8 changes: 7 additions & 1 deletion changelog.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
VERSION 3.0.14.dev

2026-06-20
* API Tweak (small breaking change). Make ShpWriter.shape return shape length in bytes
(the same as for offset) not in 16 bit words.

VERSION 3.0.13

2026-06-19
Expand All @@ -21,7 +27,7 @@ VERSION 3.0.11

2026-06-04
Edge case handling
* Raise ShapefileException i) when creating Non-null Shapes without (or with empty) points
* Raise ShapefileException: i) when creating Non-null Shapes without (or with empty) points
and ii) when creating Null Shapes with non-empty points.
* Ensure Shape.z and Shape.partTypes are _Arrays.
* Make Shape stricter about its args, e.g. only points or lines, only one point for Points.
Expand Down
25 changes: 14 additions & 11 deletions src/shapefile.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from __future__ import annotations

__version__ = "3.0.13"
__version__ = "3.0.14.dev"

import abc
import array
Expand Down Expand Up @@ -1607,7 +1607,6 @@ def _write_ms_to_byte_stream(
raise ShapefileException(
f"Failed to write measure extremes for record {i}. Expected floats"
)

ms_to_encode = replace_None_with_NODATA(s.m)
try:
num_bytes_written += b_io.write(pack(f"<{len(s.m)}d", *ms_to_encode))
Expand Down Expand Up @@ -4135,7 +4134,11 @@ def _header(self) -> None:
def _write_file_length(self) -> None:
# self.file required to be at correct position, e.g.
# if called by self._header
self.file.write(pack(">i", self._shp_file_length_B()))

# Calculate size as 16-bit words
size_B = self._shp_file_length_B()
size_16b_words = size_B // 2
self.file.write(pack(">i", size_16b_words))

def _shp_file_length_B(self) -> int:
"""Calculates the file length of the shp file."""
Expand All @@ -4144,9 +4147,7 @@ def _shp_file_length_B(self) -> int:

# Calculate size of all shapes
self.file.seek(0, 2)
size_16b_words = self.file.tell()
# Calculate size as 16-bit words
size_B = size_16b_words // 2
size_B = self.file.tell()
# Return to start
self.file.seek(start_B)
return size_B
Expand Down Expand Up @@ -4200,6 +4201,7 @@ def shape(
self,
s: Shape | HasGeoInterface | GeoJSONHomogeneousGeometryObject,
) -> tuple[int, int]:
"""Appends s to the file. Returns shape's offset and length in B"""
if not isinstance(s, Shape):
if isinstance(s, HasGeoInterface):
shape_dict = s.__geo_interface__
Expand All @@ -4216,6 +4218,7 @@ def shape(
return self._shp_record(s)

def _shp_record(self, s: Shape) -> tuple[int, int]:
"""Appends s to the file. Returns shape's offset and length in B"""
offset = self.file.tell()
self.shpNum += 1

Expand Down Expand Up @@ -4274,7 +4277,7 @@ def _shp_record(self, s: Shape) -> tuple[int, int]:
# Flush to file.
b_io.seek(0)
self.file.write(b_io.read())
return offset, length_16bw
return offset, n


class ShxWriter(_ShpShxHeaderWriter):
Expand All @@ -4288,7 +4291,7 @@ def __init__(
super().__init__(file=shx)
self.shp_writer = shp_writer

def _shx_record(self, offset_B: int, length_16bw: int) -> None:
def _shx_record(self, offset_B: int, length_B: int) -> None:
"""Writes the shx records."""

f = self.file
Expand All @@ -4299,7 +4302,7 @@ def _shx_record(self, offset_B: int, length_16bw: int) -> None:
"It's over 4GB, perhaps split the .shp or the Shapefile into smaller ones? "
)

offset_16bw = offset_B // 2
offset_16bw, length_16bw = offset_B // 2, length_B // 2
f.write(pack(">2i", offset_16bw, length_16bw))

def _header(self) -> None:
Expand Down Expand Up @@ -4454,9 +4457,9 @@ def shape(
# Balance if already not balanced
if self.autoBalance and self.dbf_writer.recNum < self.shp_writer.shpNum:
self.balance()
offset_B, length_16bw = self.shp_writer.shape(s)
offset_B, length_B = self.shp_writer.shape(s)
if self._shx:
self.shx_writer._shx_record(offset_B, length_16bw)
self.shx_writer._shx_record(offset_B, length_B)

def record(
self,
Expand Down
75 changes: 66 additions & 9 deletions tests/hypothesis_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,6 @@ def multipointM_from_xyms(point_ms: tuple[float, float, float | None], oid_: int
multipointm = builds(multipointM_from_xyms, lists(tuples(xs, ys, ms), min_size=1), oid)

@pytest.mark.hypothesis
# @settings(suppress_health_check=[HealthCheck.too_slow, HealthCheck.data_too_large])
@given(expected=multipointm, i=integers(min_value=1))
def test_MultiPointM_roundtrips(
expected: shp.MultiPointM,
Expand Down Expand Up @@ -196,7 +195,6 @@ def multipointZ_from_xyzms(pointz_ms: tuple[float, float, float, float | None],


@pytest.mark.hypothesis
# @settings(suppress_health_check=[HealthCheck.too_slow, HealthCheck.data_too_large])
@given(expected=multipointz, i=integers(min_value=1))
def test_MultiPointZ_roundtrips(
expected: shp.MultiPointZ,
Expand Down Expand Up @@ -248,7 +246,6 @@ def test_Polyline_roundtrips(
assert actual.oid == expected.oid

@pytest.mark.hypothesis
# @settings(suppress_health_check=[HealthCheck.too_slow, HealthCheck.data_too_large])
@given(expected=polylinem, i=integers(min_value=1))
def test_PolylineM_roundtrips(
expected: shp.PolylineM,
Expand All @@ -273,7 +270,6 @@ def test_PolylineM_roundtrips(
assert actual.oid == expected.oid

@pytest.mark.hypothesis
# @settings(suppress_health_check=[HealthCheck.too_slow, HealthCheck.data_too_large])
@given(expected=polylinez, i=integers(min_value=1))
def test_PolylineZ_roundtrips(
expected: shp.PolylineZ,
Expand Down Expand Up @@ -327,7 +323,6 @@ def test_Polygon_roundtrips(
assert actual.oid == expected.oid

@pytest.mark.hypothesis
# @settings(suppress_health_check=[HealthCheck.too_slow, HealthCheck.data_too_large])
@given(expected=polygonm, i=integers(min_value=1))
def test_PolygonM_roundtrips(
expected: shp.PolygonM,
Expand All @@ -352,7 +347,6 @@ def test_PolygonM_roundtrips(
assert actual.oid == expected.oid

@pytest.mark.hypothesis
# @settings(suppress_health_check=[HealthCheck.too_slow, HealthCheck.data_too_large])
@given(expected=polygonz, i=integers(min_value=1))
def test_PolygonZ_roundtrips(
expected: shp.PolygonZ,
Expand Down Expand Up @@ -392,7 +386,6 @@ def multipatch_from_xyzms_and_types(


@pytest.mark.hypothesis
# @settings(suppress_health_check=[HealthCheck.too_slow, HealthCheck.data_too_large])
@given(expected=multipatch, i=integers(min_value=1))
def test_MultiPatch_roundtrips(
expected: shp.MultiPatch,
Expand All @@ -418,6 +411,12 @@ def test_MultiPatch_roundtrips(
assert actual.oid == expected.oid
assert actual.partTypes == expected.partTypes, f"{type(actual.partTypes)=}, {type(expected.partTypes)=}"

MAX_FILE_SIZE_16bw = (1 << 31) - 1 # This bound comes from encoding the
# actual file size (in 16 bit words)
# as a 4 byte signed integer.
MAX_NUM_SHAPES = (MAX_FILE_SIZE_16bw - 50) // 6 # Minus 100B header, 12 bytes
# per record (the minimum for
# a Null shape).

shape_codes_names_and_strategies = [
# (0, "Null Shape"),
Expand All @@ -438,7 +437,7 @@ def test_MultiPatch_roundtrips(

def code_and_shape_strat_from_triple(t):
x, _name, shapes = t
return tuples(just(x), lists(shapes, min_size = 0)) # Empty shp files are in the esri spec.
return tuples(just(x), lists(shapes, min_size = 0, max_size=MAX_NUM_SHAPES)) # Empty shp files are in the esri spec.

codes_and_shapes_strats = [
code_and_shape_strat_from_triple(t)
Expand All @@ -448,7 +447,6 @@ def code_and_shape_strat_from_triple(t):
codes_and_shapes = one_of(codes_and_shapes_strats)

@pytest.mark.hypothesis
# @settings(suppress_health_check=[HealthCheck.too_slow, HealthCheck.data_too_large])
@given(codes_and_shapes=codes_and_shapes)
def test_shp_reader_writer_roundtrip(codes_and_shapes)-> None:
code_ex, expected_shapes = codes_and_shapes
Expand Down Expand Up @@ -483,3 +481,62 @@ def test_shp_reader_writer_roundtrip(codes_and_shapes)-> None:
assert actual.partTypes == expected.partTypes, f"{type(actual.partTypes)=}, {type(expected.partTypes)=}"
else:
assert not hasattr(expected, "partTypes")



# SHX_UB = MAX_FILE_SIZE_16bw - 50


# ## Surprisingly slow. Doesn't add enough value to merit waiting for
# @composite
# def positive_ints_with_bounded_sum(
# draw,
# min_x: int = 6,
# upper_bound: int = SHX_UB,
# max_len: int = MAX_NUM_SHAPES,
# ):
# assert min_x >= 1
# assert upper_bound >= max_len
# length = draw(integers(min_value=0, max_value=max_len))
# if length == 0:
# return []

# max_x = upper_bound - (length - 1)
# result = []

# for i in range(length):
# if max_x < min_x :
# break
# x = draw(integers(min_value=min_x, max_value=max_x))
# result.append(x)
# max_x -= x

# return result


@pytest.mark.hypothesis
@given(codes_and_shapes=codes_and_shapes)
def test_shx_reader_writer_roundtrip(codes_and_shapes)-> None:
code_ex, expected_shapes = codes_and_shapes

sizes_B = []
offsets_B = []
offset_B = 100 # "Thus, the offset for the first record in the
# main file is 50 (16bw), given the 100-byte header. "
shp_stream = io.BytesIO()
shx_stream = io.BytesIO()
with shp.ShpWriter(shp=shp_stream, shapeType=code_ex) as shp_w:
with shp.ShxWriter(shx=shx_stream, shp_writer = shp_w) as shx_w:
for shape in expected_shapes:
offset_B, size_B = shp_w.shape(shape)
sizes_B.append(size_B)
offsets_B.append(offset_B)
shx_w._shx_record(offset_B, size_B)

shx_stream.seek(0)

with shp.ShxReader(shx=shx_stream) as r:
assert r.numShapes == len(expected_shapes)
assert r.offsets == offsets_B
assert r.shape_lengths_B == sizes_B

Loading