From 183f969f8bb1cf0d04837b6d10aa4e0119b46f6e Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 21 Jun 2026 12:10:36 +0100 Subject: [PATCH 1/5] Add dbf round trip test Fix bugs in test --- tests/hypothesis_tests.py | 97 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/tests/hypothesis_tests.py b/tests/hypothesis_tests.py index 00b7505..efab97a 100644 --- a/tests/hypothesis_tests.py +++ b/tests/hypothesis_tests.py @@ -16,6 +16,9 @@ one_of, tuples, sampled_from, + text, + characters, + dates, ) import shapefile as shp @@ -518,3 +521,97 @@ def test_shx_reader_writer_roundtrip(codes_and_shapes)-> None: assert r.offsets == offsets_B assert r.shape_lengths_B == sizes_B + + +DBF_FIELD_TYPES = { + "C": {"max_decimal" : 0}, + "N": {"max_decimal" : 253}, + "F": {"max_decimal" : 253}, + "L": {"max_length": 1}, + "D": {"min_length": 1, "max_length": 8, "max_decimal" : 0}, +} + +@composite +def dbf_field(draw): + field_type, bounds_dict = draw(sampled_from(list(DBF_FIELD_TYPES.items()))) + + name = draw( + text( + alphabet=characters(whitelist_categories=("Lu", "Nd"), whitelist_characters="_"), + min_size=1, + max_size=10, + ) + ) + + max_length = bounds_dict.get("max_length", 254) + min_length = bounds_dict.get("min_length", 1) + max_decimal = bounds_dict.get("max_decimal", 0) + length = draw(integers(min_value=min_length, max_value=max_length)) + decimal = draw(integers(min_value=0, max_value=min(length - 1, max_decimal))) + + + return {"name": name, "field_type": field_type, "length": length, "decimal": decimal} + + +def record_value_for_field(name: str, field_type: str, length: int, decimal: int = 0): + + if field_type == "C": + return text( + alphabet=characters(blacklist_categories=("Cs",), blacklist_characters="\x00"), + min_size=0, + max_size=length, + ) + if field_type in {"N", "F"}: + + int_digits = length if decimal == 0 else length - decimal - 1 + min_int = -(10 ** (int_digits - 1) - 1) + max_int = 10 ** int_digits - 1 + + if decimal == 0: + return integers(min_value=min_int, max_value=max_int) + + return floats( + min_value=min_int - 1, # + eps + max_value=max_int + 1, # - eps + ) + if field_type == "L": + return sampled_from([True, False, None]) + if field_type == "D": + return dates().map(lambda d: d.strftime("%Y%m%d")) + + raise ValueError(f"Unsupported: {field_type=}") + + +@composite +def dbf_fields_and_records( + draw, + max_fields=10, # In DbfWriter.__init__, max_num_fields: int = 2046, + max_records=20, + ): + + fields = draw(lists(dbf_field(), min_size=1, max_size=max_fields)) + + record_strategy = tuples(*(record_value_for_field(**field) for field in fields)) + + records = draw(lists(record_strategy, min_size=0, max_size=max_records)) + + return fields, records + + + +@pytest.mark.hypothesis +@given(fields_and_records=dbf_fields_and_records()) +def test_dbf_reader_writer_roundtrip(fields_and_records)-> None: + fields, records = fields_and_records + stream = io.BytesIO() + with shp.DbfWriter(dbf=stream) as dbf_w: + for field in fields: + dbf_w.field(field) + for record in records: + dbf_w.record(record) + stream.seek(0) + with shp.DbfReader(dbf=stream) as r: + for f_r, f_w in itertools.zip_longest(r.fields, fields): + assert f_r._asdict() == f_w + for expected, actual in itertools.zip_longest(records, r.records()): + assert actual == list(expected) From a24cfab95a8deeeb5e14e7a76e62e68603bad489 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 21 Jun 2026 12:31:58 +0100 Subject: [PATCH 2/5] Avoid errors due to precision limits and skip deletion flag field --- tests/hypothesis_tests.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tests/hypothesis_tests.py b/tests/hypothesis_tests.py index efab97a..4405238 100644 --- a/tests/hypothesis_tests.py +++ b/tests/hypothesis_tests.py @@ -525,8 +525,10 @@ def test_shx_reader_writer_roundtrip(codes_and_shapes)-> None: DBF_FIELD_TYPES = { "C": {"max_decimal" : 0}, - "N": {"max_decimal" : 253}, - "F": {"max_decimal" : 253}, + "N": {"max_decimal" : 253, "max_length": 23}, # max length=23 to avoid error due to precision limit, e.g.: + "F": {"max_decimal" : 253, "max_length": 23}, # hypothesis.errors.InvalidArgument: max_value=100000000000000000000000 + # cannot be exactly represented as a float of + # width 64 - use max_value=1e+23 instead. "L": {"max_length": 1}, "D": {"min_length": 1, "max_length": 8, "max_decimal" : 0}, } @@ -571,8 +573,10 @@ def record_value_for_field(name: str, field_type: str, length: int, decimal: int return integers(min_value=min_int, max_value=max_int) return floats( - min_value=min_int - 1, # + eps - max_value=max_int + 1, # - eps + min_value=min_int - 1, + max_value=max_int + 1, + exclude_min=True, + exclude_max=True, ) if field_type == "L": return sampled_from([True, False, None]) @@ -611,7 +615,9 @@ def test_dbf_reader_writer_roundtrip(fields_and_records)-> None: dbf_w.record(record) stream.seek(0) with shp.DbfReader(dbf=stream) as r: - for f_r, f_w in itertools.zip_longest(r.fields, fields): + actual_fields = iter(r.fields) + next(actual_fields) # skip deletion flag + for f_r, f_w in itertools.zip_longest(actual_fields, fields): assert f_r._asdict() == f_w for expected, actual in itertools.zip_longest(records, r.records()): assert actual == list(expected) From 54d617fdc702287f424cf514033216430d5c126d Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 21 Jun 2026 18:14:56 +0100 Subject: [PATCH 3/5] Bug fixes --- tests/hypothesis_tests.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/tests/hypothesis_tests.py b/tests/hypothesis_tests.py index 4405238..5b5e15b 100644 --- a/tests/hypothesis_tests.py +++ b/tests/hypothesis_tests.py @@ -524,13 +524,13 @@ def test_shx_reader_writer_roundtrip(codes_and_shapes)-> None: DBF_FIELD_TYPES = { - "C": {"max_decimal" : 0}, - "N": {"max_decimal" : 253, "max_length": 23}, # max length=23 to avoid error due to precision limit, e.g.: - "F": {"max_decimal" : 253, "max_length": 23}, # hypothesis.errors.InvalidArgument: max_value=100000000000000000000000 + "C": {}, + "N": {"max_decimal" : 253, "max_length": 22}, # max length=23 to avoid error due to precision limit, e.g.: + "F": {"max_decimal" : 253, "max_length": 22}, # hypothesis.errors.InvalidArgument: max_value=100000000000000000000000 # cannot be exactly represented as a float of # width 64 - use max_value=1e+23 instead. "L": {"max_length": 1}, - "D": {"min_length": 1, "max_length": 8, "max_decimal" : 0}, + "D": {"min_length": 8, "max_length": 8}, } @composite @@ -548,30 +548,31 @@ def dbf_field(draw): max_length = bounds_dict.get("max_length", 254) min_length = bounds_dict.get("min_length", 1) max_decimal = bounds_dict.get("max_decimal", 0) - length = draw(integers(min_value=min_length, max_value=max_length)) - decimal = draw(integers(min_value=0, max_value=min(length - 1, max_decimal))) + size = draw(integers(min_value=min_length, max_value=max_length)) + decimal = draw(integers(min_value=0, max_value=min(size - 1, max_decimal))) - return {"name": name, "field_type": field_type, "length": length, "decimal": decimal} + return {"name": name, "field_type": field_type, "size": size, "decimal": decimal} -def record_value_for_field(name: str, field_type: str, length: int, decimal: int = 0): +def record_value_for_field(name: str, field_type: str, size: int, decimal: int = 0): if field_type == "C": return text( alphabet=characters(blacklist_categories=("Cs",), blacklist_characters="\x00"), min_size=0, - max_size=length, + max_size=size, ) if field_type in {"N", "F"}: - int_digits = length if decimal == 0 else length - decimal - 1 + int_digits = size if decimal == 0 else size - decimal - 1 min_int = -(10 ** (int_digits - 1) - 1) max_int = 10 ** int_digits - 1 if decimal == 0: return integers(min_value=min_int, max_value=max_int) + # Max finite float: 2**1023 * (2 - 2**(-52)) return floats( min_value=min_int - 1, max_value=max_int + 1, @@ -610,14 +611,16 @@ def test_dbf_reader_writer_roundtrip(fields_and_records)-> None: stream = io.BytesIO() with shp.DbfWriter(dbf=stream) as dbf_w: for field in fields: - dbf_w.field(field) + dbf_w.field(**field) for record in records: - dbf_w.record(record) + dbf_w.record(*record) stream.seek(0) with shp.DbfReader(dbf=stream) as r: actual_fields = iter(r.fields) next(actual_fields) # skip deletion flag for f_r, f_w in itertools.zip_longest(actual_fields, fields): - assert f_r._asdict() == f_w + actual_field_dict = f_r._asdict() + for k in ("field_type", "size", "decimal"): + assert actual_field_dict[k] == f_w[k], f"{k=}, {actual_field_dict[k]=}, {f_w[k]=}" for expected, actual in itertools.zip_longest(records, r.records()): assert actual == list(expected) From ff9d8fadc4689399766f4af799a2374877858464 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 21 Jun 2026 21:39:35 +0100 Subject: [PATCH 4/5] __dbfRecord => _record --- src/shapefile.py | 4 ++-- tests/hypothesis_tests.py | 31 +++++++++++++++++++++++-------- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index ac64591..d66308e 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3943,9 +3943,9 @@ def record( else: # Blank fields for empty record record = ["" for _ in range(fieldCount)] - self.__dbfRecord(record) + self._record(record) - def __dbfRecord(self, record: list[RecordValue]) -> None: + def _record(self, record: list[RecordValue]) -> None: """Writes the dbf records.""" f = self.file if self.recNum == 0: diff --git a/tests/hypothesis_tests.py b/tests/hypothesis_tests.py index 5b5e15b..e0f0d65 100644 --- a/tests/hypothesis_tests.py +++ b/tests/hypothesis_tests.py @@ -1,7 +1,9 @@ from __future__ import annotations +import datetime import io import itertools +import string import pytest from hypothesis import HealthCheck, given, settings @@ -525,8 +527,8 @@ def test_shx_reader_writer_roundtrip(codes_and_shapes)-> None: DBF_FIELD_TYPES = { "C": {}, - "N": {"max_decimal" : 253, "max_length": 22}, # max length=23 to avoid error due to precision limit, e.g.: - "F": {"max_decimal" : 253, "max_length": 22}, # hypothesis.errors.InvalidArgument: max_value=100000000000000000000000 + "N": {"max_decimal" : 20, "max_length": 22}, # max length=23 to avoid error due to precision limit, e.g.: + "F": {"max_decimal" : 20, "max_length": 22}, # hypothesis.errors.InvalidArgument: max_value=100000000000000000000000 # cannot be exactly represented as a float of # width 64 - use max_value=1e+23 instead. "L": {"max_length": 1}, @@ -539,7 +541,7 @@ def dbf_field(draw): name = draw( text( - alphabet=characters(whitelist_categories=("Lu", "Nd"), whitelist_characters="_"), + alphabet=characters(codec="ascii"), min_size=1, max_size=10, ) @@ -549,17 +551,18 @@ def dbf_field(draw): min_length = bounds_dict.get("min_length", 1) max_decimal = bounds_dict.get("max_decimal", 0) size = draw(integers(min_value=min_length, max_value=max_length)) - decimal = draw(integers(min_value=0, max_value=min(size - 1, max_decimal))) + decimal = draw(integers(min_value=0, max_value=max(0,min(size - 2, max_decimal)))) return {"name": name, "field_type": field_type, "size": size, "decimal": decimal} +ascii_printable = string.ascii_letters + string.digits + string.punctuation #+ " " def record_value_for_field(name: str, field_type: str, size: int, decimal: int = 0): if field_type == "C": return text( - alphabet=characters(blacklist_categories=("Cs",), blacklist_characters="\x00"), + alphabet=ascii_printable, min_size=0, max_size=size, ) @@ -582,7 +585,7 @@ def record_value_for_field(name: str, field_type: str, size: int, decimal: int = if field_type == "L": return sampled_from([True, False, None]) if field_type == "D": - return dates().map(lambda d: d.strftime("%Y%m%d")) + return one_of(dates(), dates().map(lambda d: d.strftime("%Y%m%d"))) raise ValueError(f"Unsupported: {field_type=}") @@ -622,5 +625,17 @@ def test_dbf_reader_writer_roundtrip(fields_and_records)-> None: actual_field_dict = f_r._asdict() for k in ("field_type", "size", "decimal"): assert actual_field_dict[k] == f_w[k], f"{k=}, {actual_field_dict[k]=}, {f_w[k]=}" - for expected, actual in itertools.zip_longest(records, r.records()): - assert actual == list(expected) + for exp_rec, actual_rec in itertools.zip_longest(records, r.records()): + for expected, actual, field in itertools.zip_longest(exp_rec, actual_rec, fields): + field_type = field["field_type"] + decimal = field["decimal"] + if field_type == "D": + if isinstance(expected, datetime.date): + expected = expected.strftime("%Y%m%d") + if isinstance(actual, datetime.date): + actual = actual.strftime("%Y%m%d") + elif field_type in ("N", "F"): + expected = float(format(expected, f".{decimal}f")) + # elif field_type == "C": + # expected = expected.strip() + assert actual == expected, f"{actual=}, {expected=}, {field_type=}, {type(actual)=}, {type(expected)=}" From 36fc5eb424609f10ee45a54c5f58d473ff3ae256 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 21 Jun 2026 21:42:54 +0100 Subject: [PATCH 5/5] Allow a character for a minus sign in Float field --- tests/hypothesis_tests.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/hypothesis_tests.py b/tests/hypothesis_tests.py index e0f0d65..7a7ebe3 100644 --- a/tests/hypothesis_tests.py +++ b/tests/hypothesis_tests.py @@ -528,8 +528,8 @@ def test_shx_reader_writer_roundtrip(codes_and_shapes)-> None: DBF_FIELD_TYPES = { "C": {}, "N": {"max_decimal" : 20, "max_length": 22}, # max length=23 to avoid error due to precision limit, e.g.: - "F": {"max_decimal" : 20, "max_length": 22}, # hypothesis.errors.InvalidArgument: max_value=100000000000000000000000 - # cannot be exactly represented as a float of + "F": {"max_decimal" : 20, "max_length": 22}, # hypothesis.errors.InvalidArgument: max_value=100000000000000000000000 + # cannot be exactly represented as a float of # width 64 - use max_value=1e+23 instead. "L": {"max_length": 1}, "D": {"min_length": 8, "max_length": 8}, @@ -551,7 +551,7 @@ def dbf_field(draw): min_length = bounds_dict.get("min_length", 1) max_decimal = bounds_dict.get("max_decimal", 0) size = draw(integers(min_value=min_length, max_value=max_length)) - decimal = draw(integers(min_value=0, max_value=max(0,min(size - 2, max_decimal)))) + decimal = draw(integers(min_value=0, max_value=max(0,min(size - 3, max_decimal)))) return {"name": name, "field_type": field_type, "size": size, "decimal": decimal}