robertoecf · robertoecf · Jun 16, 2026 · Jun 16, 2026 · gemini-code-assist · Jun 16, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,15 @@ adheres to [Semantic Versioning](https://semver.org/).
 
 ## [Unreleased]
 
+### Fixed
+
+- **`cvm holdings` ignored bare-digit CNPJs.** The CDA reader compared the
+  CNPJ argument to the stored value as raw strings, but CVM stores CNPJs
+  punctuated (`22.187.946/0001-41`). A user passing bare digits
+  (`22187946000141`) — exactly what the `--help` text advertises — got
+  "No holdings". Both sides are now normalized to digits before comparing,
+  so punctuated and bare forms return identical results.
+
 ## [0.3.1] — 2026-04-29
 
 Patch release fixing 5 bugs caught in adversarial review of v0.3.0 by

diff --git a/src/findata/sources/cvm/holdings.py b/src/findata/sources/cvm/holdings.py
@@ -29,6 +29,7 @@
 
 import csv
 import io
+import re
 import sys
 import zipfile
 
@@ -43,6 +44,13 @@
 
 CDA_URL = f"{CVM_BASE}/FI/DOC/CDA/DADOS/cda_fi_{{ym}}.zip"
 
+_NON_DIGIT = re.compile(r"\D")
+
+
+def _digits(value: str | None) -> str:
+    """Strip a CNPJ down to its digits so punctuated and bare forms compare equal."""
+    return _NON_DIGIT.sub("", value or "")
+
 
 class FundHolding(BaseModel):
     """One holding row (one asset position) inside one fund's portfolio."""
@@ -139,7 +147,7 @@ async def get_fund_holdings(
     """
     ym = f"{year}{month:02d}"
     raw = await get_bytes(CDA_URL.format(ym=ym), cache_ttl=86400)
-    cnpj_norm = cnpj.strip()
+    cnpj_norm = _digits(cnpj)
     wanted_blocks = {b.upper() for b in blocks} if blocks else None
     holdings: list[FundHolding] = []
     with zipfile.ZipFile(io.BytesIO(raw)) as zf:
@@ -152,7 +160,7 @@ async def get_fund_holdings(
             with zf.open(entry) as f:
                 reader = csv.DictReader(io.StringIO(f.read().decode("iso-8859-1")), delimiter=";")
                 for row in reader:
-                    row_cnpj = (row.get("CNPJ_FUNDO_CLASSE") or row.get("CNPJ_FUNDO", "")).strip()
+                    row_cnpj = _digits(row.get("CNPJ_FUNDO_CLASSE") or row.get("CNPJ_FUNDO"))
                     if row_cnpj != cnpj_norm:
                         continue
-                    row_cnpj = _digits(row.get("CNPJ_FUNDO_CLASSE") or row.get("CNPJ_FUNDO"))
-                    if row_cnpj != cnpj_norm:
-                        continue
+                    row_cnpj = (row.get("CNPJ_FUNDO_CLASSE") or row.get("CNPJ_FUNDO") or "").strip()
+                    if row_cnpj != cnpj_punctuated and row_cnpj != cnpj_bare:
+                        continue
-                    row_cnpj = _digits(row.get("CNPJ_FUNDO_CLASSE") or row.get("CNPJ_FUNDO"))
-                    if row_cnpj != cnpj_norm:
-                        continue
+                    row_cnpj = (row.get("CNPJ_FUNDO_CLASSE") or row.get("CNPJ_FUNDO") or "").strip()
+                    if row_cnpj != cnpj_punctuated and row_cnpj != cnpj_bare:
+                        continue
                     h = _row_to_holding(row, block, include_raw=include_raw)

diff --git a/tests/test_cvm_funds.py b/tests/test_cvm_funds.py
@@ -129,6 +129,22 @@ async def test_holdings_block_whitelist() -> None:
     assert all(r.bloco == "BLC_4" for r in rows)
 
 
+@respx.mock
+async def test_holdings_bare_digit_cnpj_matches_punctuated() -> None:
+    """CVM stores CNPJs punctuated; a bare-digit query must still match.
+
+    Regression: the reader used to compare raw strings, so ``12345678000199``
+    never matched the stored ``12.345.678/0001-99`` and returned no holdings.
+    """
+    respx.get(re.compile(r"https://.*cda_fi_202603\.zip")).mock(
+        return_value=httpx.Response(200, content=_make_cda_zip())
+    )
+    rows = await get_fund_holdings("12345678000199", year=2026, month=3)
+    assert len(rows) == 3  # identical to the punctuated query
+    assert {r.bloco for r in rows} == {"BLC_4", "BLC_8"}
+    assert all(r.cnpj == "12.345.678/0001-99" for r in rows)  # output keeps CVM format
+
+
 # ── LAMINA ───────────────────────────────────────────────────────