mindee · felixdittrich92 · Jul 1, 2026 · Jun 25, 2026 · Jun 26, 2026 · Jun 26, 2026
diff --git a/.github/workflows/references.yml b/.github/workflows/references.yml
@@ -344,3 +344,95 @@ jobs:
           pip install -e .[viz,html] --upgrade
       - name: Benchmark latency
         run: python references/layout/latency.py lw_detr_s --it 5 --size 512
+
+  train-table-structure-recognition:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.10"]
+    steps:
+      - uses: actions/checkout@v7
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: ${{ matrix.python }}
+          architecture: x64
+      - name: Cache python modules
+        uses: actions/cache@v5
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }}-${{ hashFiles('references/requirements.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }}-
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[viz,html] --upgrade
+          pip install -r references/requirements.txt
+      - name: Download and extract toy set
+        run: |
+          wget https://github.com/mindee/doctr/releases/download/v1.0.1/toy_table_set-ea091e15.zip
+          sudo apt-get update && sudo apt-get install unzip -y
+          unzip toy_table_set-ea091e15.zip -d table_set
+      - name: Train for a short epoch
+        run: python references/table/train.py tablecenternet --train_path ./table_set --val_path ./table_set -b 2 --epochs 1
+
+  evaluate-table-structure-recognition:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.10"]
+    steps:
+      - uses: actions/checkout@v7
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: ${{ matrix.python }}
+          architecture: x64
+      - name: Cache python modules
+        uses: actions/cache@v5
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[viz,html] --upgrade
+          pip install -r references/requirements.txt
+      - name: Download and extract toy set
+        run: |
+          wget https://github.com/mindee/doctr/releases/download/v1.0.1/toy_table_set-ea091e15.zip
+          sudo apt-get update && sudo apt-get install unzip -y
+          unzip toy_table_set-ea091e15.zip -d table_set
+      - name: Evaluate table structure recognition
+        run: python references/table/evaluate.py tablecenternet ./table_set
+
+  latency-table-structure-recognition:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.10"]
+    steps:
+      - uses: actions/checkout@v7
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: ${{ matrix.python }}
+          architecture: x64
+      - name: Cache python modules
+        uses: actions/cache@v5
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[viz,html] --upgrade
+      - name: Benchmark latency
+        run: python references/table/latency.py tablecenternet --it 5 --size 512
diff --git a/docs/source/modules/models.rst b/docs/source/modules/models.rst
@@ -88,6 +88,14 @@ doctr.models.layout
 .. autofunction:: doctr.models.layout.layout_predictor
 
 
+doctr.models.table_structure
+----------------------------
+
+.. autofunction:: doctr.models.table_structure.tablecenternet
+
+.. autofunction:: doctr.models.table_structure.table_predictor
+
+
 doctr.models.recognition
 ------------------------
 

diff --git a/docs/source/using_doctr/custom_models_training.rst b/docs/source/using_doctr/custom_models_training.rst
@@ -9,6 +9,7 @@ For details on the training process and the necessary data and data format, refe
 - `detection <https://github.com/mindee/doctr/tree/main/references/detection#readme>`_
 - `recognition <https://github.com/mindee/doctr/tree/main/references/recognition#readme>`_
 - `layout <https://github.com/mindee/doctr/tree/main/references/layout#readme>`_
+- `table structure <https://github.com/mindee/doctr/tree/main/references/table#readme>`_
 
 If you’re looking for a lightweight yet efficient tool to annotate small amounts of data, especially tailored for docTR,
 check out the `docTR Labeling Tool <https://github.com/text2knowledge/docTR-Labeler>`_.

diff --git a/docs/source/using_doctr/sharing_models.rst b/docs/source/using_doctr/sharing_models.rst
@@ -70,6 +70,8 @@ We suggest using the following naming conventions for your models:
 
 **Layout:** ``doctr-<architecture>``
 
+**Table structure:** ``doctr-<architecture>``
+
 
 Classification
 --------------
@@ -113,3 +115,13 @@ Layout
 +=================================+===================================================+========================+
 | lw_detr_s (dummy)               | Felix92/doctr-dummy-torch-lw-detr-s               | PyTorch                |
 +---------------------------------+---------------------------------------------------+------------------------+
+
+
+Table structure
+---------------
+
++---------------------------------+---------------------------------------------------+------------------------+
+|        **Architecture**         |            **Repo_ID**                            |     **Framework**      |
++=================================+===================================================+========================+
+| tablecenternet (dummy)          | Felix92/doctr-dummy-torch-tablecenternet          | PyTorch                |
++---------------------------------+---------------------------------------------------+------------------------+
diff --git a/docs/source/using_doctr/using_models.rst b/docs/source/using_doctr/using_models.rst
@@ -29,6 +29,8 @@ Which predictor should I use?
      - :py:meth:`detection_predictor <doctr.models.detection_predictor>`
    * - Transcribe pre-cropped word images to strings
      - :py:meth:`recognition_predictor <doctr.models.recognition_predictor>`
+   * - Detect the structure of a table (cell bounding-boxes and logical coordinates)
+     - :py:meth:`table_predictor <doctr.models.table_structure.table_predictor>`
 
 For :doc:`custom model loading <custom_models_training>` or sharing models, see the dedicated pages.
 
@@ -121,8 +123,8 @@ Text Recognition
 The task consists of transcribing the character sequence in a given image.
 
 
-Available recognition architectures
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Available architectures
+^^^^^^^^^^^^^^^^^^^^^^^
 
 The following architectures are currently supported:
 
@@ -256,6 +258,52 @@ For instance, this snippet instantiates a layout predictor able to detect text o
     predictor = layout_predictor('lw_detr_s', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True)
 
 
+Table Structure Recognition
+---------------------------
+
+The task consists of parsing the structure of a table into a machine-understandable representation: localizing every
+cell (its spatial structure) and recovering the row and column it spans (its logical structure).
+
+Available table architectures
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The following architectures are currently supported:
+
+* :py:meth:`tablecenternet <doctr.models.table_structure.tablecenternet>`
+
+For a comprehensive comparison, we have compiled a detailed benchmark on a publicly available dataset:
+
++--------------------------------------------------+-----------------+---------------+--------------+---------------+------------+-------------------+--------------------+
+| **Architecture**                                 | **Input shape** | **# params**  | **Recall**   | **Precision** | **F1**     | **Structure acc** | **sec/it (B: 1)**  |
++==================================================+=================+===============+==============+===============+============+===================+====================+
+| tablecenternet                                   | (1024, 1024, 3) | 7.1 M         | 82.31        | 96.01         | 88.64      | 77.53             | 0.7                |
++--------------------------------------------------+-----------------+---------------+--------------+---------------+------------+-------------------+--------------------+
+
+.. note::
+
+    The reported metrics are produced by ``references/table/evaluate.py`` using the
+    :py:class:`TableCellMetric <doctr.utils.metrics.TableCellMetric>`: cell-detection **Recall**, **Precision** and
+    **F1** (cells matched above an IoU threshold of 0.5), and **Structure acc**, the share of matched cells whose
+    logical (row/column) coordinates are correctly predicted.
+
+Table structure predictors
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+:py:meth:`table_predictor <doctr.models.table_structure.table_predictor>` wraps your table model so it can be used directly on
+document images. For each page it returns the list of detected cells, each with its geometry, its confidence score and its logical coordinates, together with the inferred number of rows
+and columns.
+
+.. code:: python3
+
+    import numpy as np
+    from doctr.models import table_predictor
+    model = table_predictor('tablecenternet', pretrained=True)
+    table_crop = (255 * np.random.rand(800, 600, 3)).astype(np.uint8)
+    out = model([table_crop])
+    # out[0] -> {"cells": [{"geometry": ..., "score": ..., "row_start": 0, "row_end": 0,
+    #            "col_start": 0, "col_end": 0}, ...], "num_rows": ..., "num_cols": ...}
+
+
 End-to-End OCR
 --------------
 
@@ -673,4 +721,3 @@ learned confusions, or a ``{forbidden_char: allowed_char}`` dict to override spe
     handle = add_whitelist(predictor, VOCABS["latin"], strategy="nearest")
     out = predictor(input_page)
     handle.remove()
-
diff --git a/doctr/datasets/table_structure.py b/doctr/datasets/table_structure.py
@@ -32,7 +32,9 @@
             ...
         }
 
-    Each sample yields the image and a ``{"cells": (N, 4, 2) relative polygons, "logic": (N, 4)}`` target.
+    Each sample yields the image and a target containing relative cells and their logical coordinates. Cells have
+    shape `(N, 4)` by default, or `(N, 4, 2)` when `use_polygons=True`. Logical coordinates have shape
+    `(N, 4)`.
 
     >>> from doctr.datasets import TableStructureDataset
     >>> from doctr.transforms import Resize
@@ -45,10 +47,17 @@
     Args:
         img_folder: folder with all the dataset images
         label_path: path to the JSON labels
+        use_polygons: whether to keep cell polygons instead of converting them to straight boxes
         **kwargs: keyword arguments from `AbstractDataset` (e.g. ``img_transforms``, ``sample_transforms``)
     """
 
-    def __init__(self, img_folder: str, label_path: str, **kwargs: Any) -> None:
+    def __init__(
+        self,
+        img_folder: str,
+        label_path: str,
+        use_polygons: bool = False,
+        **kwargs: Any,
+    ) -> None:
         super().__init__(img_folder, **kwargs)
 
         if not os.path.exists(label_path):
@@ -69,6 +78,8 @@
                 raise ValueError(f"cells are expected to have shape (N, 4, 2), got {cells.shape}")
             if logic.shape[0] != cells.shape[0] or logic.shape[1] != 4:  # pragma: no cover
                 raise ValueError(f"logic is expected to have shape (N, 4), got {logic.shape}")
+            if not use_polygons:
+                cells = np.concatenate((cells.min(axis=1), cells.max(axis=1)), axis=1)
             self.data.append((img_name, {"cells": cells, "logic": logic}))
 
     # NOTE: Override basic dataset method __getitem__ to handle table-specific targets

diff --git a/doctr/models/__init__.py b/doctr/models/__init__.py
@@ -2,5 +2,6 @@
 from .detection import *
 from .recognition import *
 from .layout import *
+from .table_structure import *
 from .zoo import *
 from .factory import *
diff --git a/doctr/models/factory/hub.py b/doctr/models/factory/hub.py
@@ -30,6 +30,7 @@
     "detection": models.detection.zoo.ARCHS,
     "recognition": models.recognition.zoo.ARCHS,
     "layout": models.layout.zoo.ARCHS,
+    "table_structure": models.table_structure.zoo.ARCHS,
 }
 
 
@@ -96,8 +97,8 @@ def push_to_hf_hub(model: Any, model_name: str, task: str, **kwargs) -> None:  #
 
     if run_config is None and arch is None:
         raise ValueError("run_config or arch must be specified")
-    if task not in ["classification", "detection", "recognition", "layout"]:
-        raise ValueError("task must be one of classification, detection, recognition, layout")
+    if task not in ["classification", "detection", "recognition", "layout", "table_structure"]:
+        raise ValueError("task must be one of classification, detection, recognition, layout, table_structure")
 
     # default readme
     readme = f"""---
@@ -218,6 +219,8 @@ def from_hub(repo_id: str, **kwargs: Any):
         model = models.recognition.__dict__[arch](pretrained=False, input_shape=cfg["input_shape"], vocab=cfg["vocab"])
     elif task == "layout":
         model = models.layout.__dict__[arch](pretrained=False, class_names=cfg["class_names"])
+    elif task == "table_structure":
+        model = models.table_structure.__dict__[arch](pretrained=False)
 
     # update model cfg
     model.cfg = cfg

diff --git a/doctr/models/layout/lw_detr/pytorch.py b/doctr/models/layout/lw_detr/pytorch.py
@@ -16,7 +16,7 @@
 
 from doctr.models.classification import vit_det_m, vit_det_s
 
-from ...utils import load_pretrained_params
+from ...utils import _bf16_to_float32, load_pretrained_params
 from .base import _LWDETR, LWDETRPostProcessor
 from .layers import (
     LWDETRDecoder,
@@ -556,6 +556,9 @@ def forward(
 
         out: dict[str, Any] = {}
 
+        logits = _bf16_to_float32(logits)
+        pred_boxes = _bf16_to_float32(pred_boxes)
+
         if self.exportable:
             out["logits"] = logits
             out["pred_boxes"] = pred_boxes

diff --git a/doctr/models/modules/layers/pytorch.py b/doctr/models/modules/layers/pytorch.py
@@ -171,6 +171,21 @@ def __init__(
         self.bias = nn.Parameter(torch.empty(out_channels))
         channels_ = deformable_groups * 3 * kernel_size[0] * kernel_size[1]
         self.conv_offset_mask = nn.Conv2d(in_channels, channels_, kernel_size, stride, padding, bias=True)
+        self.reset_parameters()
+
+    def reset_parameters(self) -> None:
+        # Standard DCN initialization: the regular conv weight is initialized like a vanilla conv, while
+        # the offset/mask predictor is zero-initialized so the layer starts as a plain convolution
+        # (offsets = 0, modulation = 0.5). Without this, weight/bias keep their uninitialized
+        # torch.empty values, which makes the deformable conv explode and the loss diverge to NaN.
+        n = self.weight.shape[1]
+        for k in self.weight.shape[2:]:
+            n *= k
+        stdv = 1.0 / (n**0.5)
+        nn.init.uniform_(self.weight, -stdv, stdv)
+        nn.init.zeros_(self.bias)
+        nn.init.zeros_(self.conv_offset_mask.weight)
+        nn.init.zeros_(self.conv_offset_mask.bias)  # type: ignore[arg-type]
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         out = self.conv_offset_mask(x)

diff --git a/doctr/models/table_structure/__init__.py b/doctr/models/table_structure/__init__.py
@@ -0,0 +1,2 @@
+from .zoo import *
+from .tablecenternet import *
diff --git a/doctr/models/table_structure/predictor/__init__.py b/doctr/models/table_structure/predictor/__init__.py
@@ -0,0 +1 @@
+from .pytorch import *
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from .zoo import *
Check warning on line 1 in doctr/models/table_structure/__init__.py View check run for this annotation Codacy Production / Codacy Static Code Analysis doctr/models/table_structure/__init__.py#L1 `'.zoo.*' imported but unused (F401)`
		from .tablecenternet import *
Check warning on line 2 in doctr/models/table_structure/__init__.py View check run for this annotation Codacy Production / Codacy Static Code Analysis doctr/models/table_structure/__init__.py#L2 `'.tablecenternet.*' imported but unused (F401)`
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		from .pytorch import *
Check warning on line 1 in doctr/models/table_structure/predictor/__init__.py View check run for this annotation Codacy Production / Codacy Static Code Analysis doctr/models/table_structure/predictor/__init__.py#L1 `'.pytorch.*' imported but unused (F401)` Check notice on line 1 in doctr/models/table_structure/predictor/__init__.py View check run for this annotation Codacy Production / Codacy Static Code Analysis doctr/models/table_structure/predictor/__init__.py#L1 `Missing docstring in public package (D104)`