diff --git a/CHANGELOG.md b/CHANGELOG.md index b79950a..38323ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,15 @@ adheres to [Semantic Versioning](https://semver.org/). ## [Unreleased] +### Fixed + +- **`cvm holdings` ignored bare-digit CNPJs.** The CDA reader compared the + CNPJ argument to the stored value as raw strings, but CVM stores CNPJs + punctuated (`22.187.946/0001-41`). A user passing bare digits + (`22187946000141`) — exactly what the `--help` text advertises — got + "No holdings". Both sides are now normalized to digits before comparing, + so punctuated and bare forms return identical results. + ## [0.3.1] — 2026-04-29 Patch release fixing 5 bugs caught in adversarial review of v0.3.0 by diff --git a/src/findata/sources/cvm/holdings.py b/src/findata/sources/cvm/holdings.py index 2b78d79..58be55a 100644 --- a/src/findata/sources/cvm/holdings.py +++ b/src/findata/sources/cvm/holdings.py @@ -29,6 +29,7 @@ import csv import io +import re import sys import zipfile @@ -43,6 +44,13 @@ CDA_URL = f"{CVM_BASE}/FI/DOC/CDA/DADOS/cda_fi_{{ym}}.zip" +_NON_DIGIT = re.compile(r"\D") + + +def _digits(value: str | None) -> str: + """Strip a CNPJ down to its digits so punctuated and bare forms compare equal.""" + return _NON_DIGIT.sub("", value or "") + class FundHolding(BaseModel): """One holding row (one asset position) inside one fund's portfolio.""" @@ -139,7 +147,7 @@ async def get_fund_holdings( """ ym = f"{year}{month:02d}" raw = await get_bytes(CDA_URL.format(ym=ym), cache_ttl=86400) - cnpj_norm = cnpj.strip() + cnpj_norm = _digits(cnpj) wanted_blocks = {b.upper() for b in blocks} if blocks else None holdings: list[FundHolding] = [] with zipfile.ZipFile(io.BytesIO(raw)) as zf: @@ -152,7 +160,7 @@ async def get_fund_holdings( with zf.open(entry) as f: reader = csv.DictReader(io.StringIO(f.read().decode("iso-8859-1")), delimiter=";") for row in reader: - row_cnpj = (row.get("CNPJ_FUNDO_CLASSE") or row.get("CNPJ_FUNDO", "")).strip() + row_cnpj = _digits(row.get("CNPJ_FUNDO_CLASSE") or row.get("CNPJ_FUNDO")) if row_cnpj != cnpj_norm: continue h = _row_to_holding(row, block, include_raw=include_raw) diff --git a/tests/test_cvm_funds.py b/tests/test_cvm_funds.py index faa5da1..f10b211 100644 --- a/tests/test_cvm_funds.py +++ b/tests/test_cvm_funds.py @@ -129,6 +129,22 @@ async def test_holdings_block_whitelist() -> None: assert all(r.bloco == "BLC_4" for r in rows) +@respx.mock +async def test_holdings_bare_digit_cnpj_matches_punctuated() -> None: + """CVM stores CNPJs punctuated; a bare-digit query must still match. + + Regression: the reader used to compare raw strings, so ``12345678000199`` + never matched the stored ``12.345.678/0001-99`` and returned no holdings. + """ + respx.get(re.compile(r"https://.*cda_fi_202603\.zip")).mock( + return_value=httpx.Response(200, content=_make_cda_zip()) + ) + rows = await get_fund_holdings("12345678000199", year=2026, month=3) + assert len(rows) == 3 # identical to the punctuated query + assert {r.bloco for r in rows} == {"BLC_4", "BLC_8"} + assert all(r.cnpj == "12.345.678/0001-99" for r in rows) # output keeps CVM format + + # ── LAMINA ───────────────────────────────────────────────────────