From 5f56e44ec4965ac5cdf52572d334b92406cde088 Mon Sep 17 00:00:00 2001
From: Hyungtae Lim <shapelim@mit.edu>
Date: Thu, 21 May 2026 22:05:41 +0900
Subject: [PATCH 1/2] =?UTF-8?q?docs(usage):=20add=20Open3D=20RANSAC=20base?=
 =?UTF-8?q?line=20(=C2=A76)=20+=20format=20polish?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- python/examples/evaluate_ransac_in_semantickitti.py: new eval driver
  built on open3d.geometry.PointCloud.segment_plane. Same metric
  definitions and --eval_protocol flag as evaluate_semantickitti.py,
  with --distance_threshold / --num_iterations knobs and an optional
  --sweep_thresholds / --sweep_iterations grid. Per-frame median ms
  is reported alongside P/R/F1.
- USAGE.md §6: full 6×5 grid (thr ∈ {0.10..0.50}, iter ∈ {100..10000})
  on KITTI seq 00 — F1 saturates between iter=500 and iter=1000 (the
  highest-iter cell only buys +0.07 F1 anywhere in the table), F1 ridge
  is at thr=0.15. Best config (thr=0.15, iter=1000) evaluated on full
  KITTI 00-10 gives macro P=94.18 / R=82.03 / F1=87.11 at 19.5 ms
  median per frame — +9.18 F1 behind Patchwork++ on the macro and
  -25.88 F1 on the worst sequence (seq 10, rolling rural).
- USAGE.md top: full README-style centered header block with badges,
  demo gif, pip-install banner (matches README.md for consistency).
- USAGE.md section headings now use ## :emoji: N. ... form per the
  format-readme template; existing 70-underscore dividers retained.

No algorithmic change; new script + docs only.
---
 USAGE.md                                      | 162 +++++++-
 .../evaluate_ransac_in_semantickitti.py       | 375 ++++++++++++++++++
 2 files changed, 530 insertions(+), 7 deletions(-)
 create mode 100644 python/examples/evaluate_ransac_in_semantickitti.py
diff --git a/USAGE.md b/USAGE.md
index a43194e..b379bf5 100644
--- a/USAGE.md
+++ b/USAGE.md
@@ -1,4 +1,44 @@
-# Patchwork++ — Usage Guide
+<div align="center">
+    <h1>Patchwork++</h1>
+    <a href="https://github.com/url-kaist/patchwork-plusplus/tree/master/patchworkpp"><img src="https://img.shields.io/badge/-C++-blue?logo=cplusplus" /></a>
+    <a href="https://github.com/url-kaist/patchwork-plusplus/tree/master"><img src="https://img.shields.io/badge/Python-3670A0?logo=python&logoColor=ffdd54" /></a>
+    <a href="https://github.com/url-kaist/patchwork-plusplus/tree/master/ros"><img src="https://img.shields.io/badge/ROS2-Humble-blue" /></a>
+    <a href="https://github.com/url-kaist/patchwork-plusplus/tree/master"><img src="https://img.shields.io/badge/Ubuntu-E95420?logo=ubuntu&logoColor=white" /></a>
+    <a href="https://github.com/url-kaist/patchwork-plusplus/tree/master"><img src="https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=white" /></a>
+    <a href="https://github.com/url-kaist/patchwork-plusplus/tree/master"><img src="https://img.shields.io/badge/Windows-0078D6?logo=windows&logoColor=white" /></a>
+    <a href="https://arxiv.org/abs/2207.11919"><img src="https://img.shields.io/badge/arXiv-b33737?logo=arXiv" /></a>
+    <a href="https://ieeexplore.ieee.org/document/9981561"><img src="https://img.shields.io/badge/DOI-10.1109/IROS47612.2022.9981561-004088.svg"/></a>
+    <br />
+    <a href="https://github.com/url-kaist/patchwork-plusplus/actions/workflows/cpp.yml"><img src="https://github.com/url-kaist/patchwork-plusplus/actions/workflows/cpp.yml/badge.svg?branch=master" alt="C++ API" /></a>
+    <a href="https://github.com/url-kaist/patchwork-plusplus/actions/workflows/python.yml"><img src="https://github.com/url-kaist/patchwork-plusplus/actions/workflows/python.yml/badge.svg?branch=master" alt="Python API" /></a>
+    <br />
+    <br />
+    <a href=https://www.youtube.com/watch?v=fogCM159GRk>Video</a>
+    <span>&nbsp;&nbsp;•&nbsp;&nbsp;</span>
+    <a href="https://github.com/url-kaist/patchwork-plusplus/tree/master/README.md###Python">Install</a>
+    <span>&nbsp;&nbsp;•&nbsp;&nbsp;</span>
+    <a href="https://github.com/url-kaist/patchwork-plusplus/tree/master/ros">ROS2</a>
+    <span>&nbsp;&nbsp;•&nbsp;&nbsp;</span>
+    <a href=https://www.youtube.com/watch?v=fogCM159GRk>Paper</a>
+    <span>&nbsp;&nbsp;•&nbsp;&nbsp;</span>
+    <a href=https://github.com/url-kaist/patchwork-plusplus/issues>Contact Us</a>
+  <br />
+  <br />
+  <p align="center"><img src=pictures/patchwork++.gif alt="animated" /></p>
+
+<p align="center">
+    <strong>(May 19, 2026)</strong> pip installation is now live:
+    <br/>
+    <a href="https://pypi.org/project/pypatchworkpp/"><img src="https://readme-typing-svg.demolab.com?background=0D1117&color=22C55E&font=Fira+Code&size=18&duration=2500&pause=800&center=true&vCenter=true&width=320&height=30&lines=%24+pip+install+pypatchworkpp" alt="pip install pypatchworkpp"/></a>
+</p>
+
+[Patchwork++][arxivlink], an extension of [Patchwork][patchworklink], is **a fast, robust, and self-adaptive ground segmentation algorithm** on 3D point cloud.
+
+</div>
+
+______________________________________________________________________
+
+# :books: Usage Guide
 
 This guide covers three things that are easy to get wrong on first contact:
 
@@ -10,7 +50,7 @@ For a quick start, jump to [§3](#3-reproducing-paper-table-i).
 
 ______________________________________________________________________
 
-## 1. Evaluation protocols
+## :scroll: 1. Evaluation protocols
 
 The Patchwork and Patchwork++ papers use **different** ground-truth definitions on SemanticKITTI. The eval driver `python/examples/evaluate_semantickitti.py` supports both via `--eval_protocol {patchwork, patchworkpp}`.
 
@@ -54,7 +94,7 @@ Same Patchwork++ inference, KITTI 00–10 macro average, two protocols:
 
 ______________________________________________________________________
 
-## 2. Parameter tuning
+## :wrench: 2. Parameter tuning
 
 If results look wrong on a new sensor (Velodyne 16/32, Ouster 64/128, Livox, etc.), tune in roughly this order. Defaults are in `cpp/patchworkpp/include/patchwork/patchworkpp.h` (Patchwork++) and `cpp/patchwork/include/patchwork/patchwork.h` (classic Patchwork).
 
@@ -109,7 +149,7 @@ Rule of thumb: scale them ∝ `expected_terrain_undulation / 1.723 m` if your se
 
 ______________________________________________________________________
 
-## 3. Reproducing paper Table I
+## :rocket: 3. Reproducing paper Table I
 
 ```bash
 # 1. Install once
@@ -156,7 +196,7 @@ python python/examples/evaluate_semantickitti.py \
 
 ______________________________________________________________________
 
-## 4. Official benchmarks
+## :bar_chart: 4. Official benchmarks
 
 KITTI 00-10 full sweep, **23,201 frames**, macro-average across the eleven sequences. All numbers are produced by `python/examples/evaluate_semantickitti.py` on current `master` (v1.3.1) with paper-matched parameters (the script already sets `uprightness_thr=0.707` and `using_global_thr=false` for `--method patchwork`; `--method patchworkpp` uses library defaults).
 
@@ -208,7 +248,7 @@ python python/examples/evaluate_semantickitti.py \
 
 ______________________________________________________________________
 
-## 5. Per-sequence performance
+## :chart_with_upwards_trend: 5. Per-sequence performance
 
 All numbers below are produced by `python/examples/evaluate_semantickitti.py` on v1.3.1 (current `master`), KITTI 00-10, paper-matched parameters. Use them to debug per-sequence regressions: if seq 05 looks fine but seq 10 is 3 F1 below the table, you have a parameter problem, not a code problem.
 
@@ -288,8 +328,116 @@ All numbers below are produced by `python/examples/evaluate_semantickitti.py` on
 
 ______________________________________________________________________
 
-## See also
+## :vs: 6. RANSAC baseline (Open3D `segment_plane`)
+
+A common first instinct on a new dataset is to fit a single plane with RANSAC and call the inliers "ground". `python/examples/evaluate_ransac_in_semantickitti.py` does exactly that, on top of Open3D's `segment_plane`, with the same metric definitions and `--eval_protocol` flag as `evaluate_semantickitti.py`, so the numbers drop directly into the same comparison frame as §5.
+
+```bash
+# Single (thr, iter) point — defaults to thr=0.15, iter=500
+python python/examples/evaluate_ransac_in_semantickitti.py \
+    --distance_threshold 0.15 --num_iterations 1000 \
+    --eval_protocol patchworkpp
+
+# Full sweep across a (thr × iter) grid
+python python/examples/evaluate_ransac_in_semantickitti.py \
+    --seqs 00 \
+    --sweep_thresholds 0.10,0.15,0.25,0.30,0.40,0.50 \
+    --sweep_iterations 100,500,1000,5000,10000 \
+    --eval_protocol patchworkpp \
+    --output_csv summary_ransac_seq00_grid.csv
+```
+
+### Grid sweep on KITTI seq 00 (4541 frames, `--eval_protocol patchworkpp`)
+
+`distance_threshold` (rows) is the max point-to-plane distance counted as inlier (metres). `num_iterations` (columns) is the RANSAC hypothesis cap; Open3D's `segment_plane` early-terminates when a hypothesis crosses an internal confidence bound, so this is a **maximum** not an exact iteration count. `ransac_n=3` throughout (plane). Cell value is **F1 (%)**; second line is the **median wall-clock ms** of `segment_plane` per frame.
+
+| thr \\ iter | 100              | 500              | 1000             | 5000             | 10000               |
+| ----------- | ---------------- | ---------------- | ---------------- | ---------------- | ------------------- |
+| 0.10        | 82.67 (16.5 ms)  | 88.69 (34.6 ms)  | 89.31 (37.5 ms)  | 89.31 (56.6 ms)  | 89.33 (56.7 ms)     |
+| **0.15**    | 89.34 (17.1 ms)  | 93.12 (29.3 ms)  | **93.28 (29.3 ms)** | 93.30 (40.7 ms) | **93.35 (40.8 ms)** |
+| 0.25        | 90.94 (17.4 ms)  | 92.34 (24.0 ms)  | 92.72 (24.2 ms)  | 92.52 (30.4 ms)  | 92.52 (30.5 ms)     |
+| 0.30        | 89.54 (17.5 ms)  | 90.16 (22.6 ms)  | 90.20 (22.4 ms)  | 90.35 (27.5 ms)  | 90.21 (27.7 ms)     |
+| 0.40        | 84.38 (15.8 ms)  | 84.72 (18.6 ms)  | 84.78 (20.4 ms)  | 84.75 (22.8 ms)  | 84.71 (23.0 ms)     |
+| 0.50        | 79.43 (18.3 ms)  | 80.25 (17.8 ms)  | 80.16 (18.1 ms)  | 80.24 (18.4 ms)  | 80.02 (18.6 ms)     |
+
+Wall-clock numbers are median per-frame ms of `segment_plane` on an i7-12700; the 24-thread parallel default of Open3D is used for iter ≤ 1000, and 8 threads (`OMP_NUM_THREADS=8`) for iter ≥ 5000 (the 24-thread iter=10000 run exhausted system memory). Compare F1 numbers across columns freely; absolute ms across iter≤1000 and iter≥5000 columns are not directly comparable.
+
+### Reading the grid
+
+- **`distance_threshold` is the dominant knob, and the F1 column has a clear inverted-U.** Tight thresholds (0.10 m) over-reject — precision saturates near 96.7 but recall caps at 83. Loose thresholds (0.40–0.50 m) over-accept — precision falls below 81. The F1 ridge sits firmly at **thr=0.15**, no matter how many iterations RANSAC is allowed.
+- **`num_iterations` saturates between 500 and 1000.** Going from 100 → 500 buys 3–6 F1; 500 → 1000 buys 0.0–0.6; 1000 → 10000 buys at most **+0.07 F1** anywhere in the table — well inside run-to-run noise. Open3D's early-termination is the cause: for thr ≥ 0.15 the wall-clock barely moves between iter=1000 and iter=10000, confirming that the inner loop stops on its own well before the cap. Only thr=0.10 keeps the loop running to the cap (37.5 → 56.7 ms going 1000 → 10000), and even there F1 changes by 0.02.
+- **The dominant-plane assumption is the ceiling.** The best cell on the entire grid is `thr=0.15, iter=10000 → F1=93.35`, indistinguishable from `thr=0.15, iter=1000 → F1=93.28`. Practically there is no high-iter config that meaningfully improves on the cheap one; the algorithmic ceiling is set by the single-plane model, not by RANSAC's iteration budget.
+
+### Best config on the full KITTI 00–10 sweep
+
+Picking `thr=0.15, iter=1000` (ties the highest-iter F1 at this threshold, runs faster) and evaluating on all 23,201 frames under the Patchwork++ paper protocol:
+
+| seq     | frames    | Precision | Recall    | F1        |
+| ------- | --------- | --------- | --------- | --------- |
+| 00      | 4541      | 95.37     | 91.63     | 93.31     |
+| 01      | 1101      | 98.33     | 87.74     | 92.52     |
+| 02      | 4661      | 94.34     | 80.44     | 86.27     |
+| 03      | 801       | 97.92     | 77.49     | 85.79     |
+| 04      | 271       | 97.70     | 87.90     | 92.42     |
+| 05      | 2761      | 93.01     | 88.09     | 90.26     |
+| 06      | 1101      | 97.29     | 79.67     | 87.52     |
+| 07      | 1101      | 92.68     | 89.33     | 90.81     |
+| 08      | 4071      | 93.33     | 78.20     | 83.88     |
+| 09      | 1591      | 96.75     | 80.68     | 87.65     |
+| 10      | 1201      | 79.23     | 61.17     | 67.75     |
+| **Avg** | **23201** | **94.18** | **82.03** | **87.11** |
+
+Median wall-clock 19.5 ms / frame (51.2 Hz) with Open3D's default 24-thread parallelism on an i7-12700.
+
+### Macro comparison — RANSAC vs. Patchwork / Patchwork++ on KITTI 00–10
+
+Side-by-side with the §5 numbers, under `--eval_protocol patchworkpp` on the same 23,201 frames:
+
+| Method                                                  | Precision | Recall    | F1        | Median ms |
+| ------------------------------------------------------- | --------- | --------- | --------- | --------- |
+| Open3D RANSAC (best: thr=0.15, iter=1000)               | 94.18     | 82.03     | 87.11     | ~19.5     |
+| Classic Patchwork (this repo, v1.4.0)                   | 94.64     | 97.58     | 96.02     | ~9        |
+| **Patchwork++ (this repo, v1.4.0)**                     | **95.55** | **97.16** | **96.29** | ~18       |
+
+**Patchwork++ wins by +9.18 F1 on the macro average** and roughly **matches** RANSAC on wall-clock per frame (~18 ms vs. ~19.5 ms), even though Patchwork++ is currently single-threaded on v1.4.0 (TBB intentionally disabled; see #96) while Open3D's `segment_plane` is using all 24 cores. The recall column is where the gap concentrates: RANSAC's 82.03 vs. Patchwork++'s 97.16 — a single global plane simply cannot cover the multiple ground patches that the concentric-zone partition handles natively.
+
+### Per-sequence gap to Patchwork++
+
+The macro gap is not uniform; it is dragged down by the hard sequences:
+
+| seq | scene                       | RANSAC F1 | Patchwork++ F1 | Δ         |
+| --- | --------------------------- | --------- | -------------- | --------- |
+| 00  | residential, mild slope     | 93.31     | 96.62          | -3.31     |
+| 01  | highway                     | 92.52     | 97.34          | -4.82     |
+| 02  | residential, parked cars    | 86.27     | 96.35          | -10.08    |
+| 03  | short urban                 | 85.79     | 97.21          | -11.42    |
+| 04  | short highway               | 92.42     | 97.25          | -4.83     |
+| 05  | undulating road             | 90.26     | 94.84          | -4.58     |
+| 06  | open road                   | 87.52     | 97.61          | -10.09    |
+| 07  | inner-city                  | 90.81     | 95.56          | -4.75     |
+| 08  | dense urban                 | 83.88     | 96.74          | -12.86    |
+| 09  | rural                       | 87.65     | 96.06          | -8.41     |
+| 10  | rough rural / rolling roads | **67.75** | **93.63**      | **-25.88** |
+
+Sequences with a gap below 5 F1 (00, 01, 04, 05, 07) are essentially flat with a single dominant ground plane — exactly where the single-plane assumption holds. Sequences with a gap above 10 F1 (02, 03, 06, 08, 10) all have rolling shoulders, multi-tier sidewalks, or rough off-road terrain — multiple ground patches that one plane cannot represent. Seq 10 is the extreme case: rolling rural terrain where one global plane is so wrong RANSAC drops below 70 F1 while Patchwork++ stays above 93 F1.
+
+### Takeaway
+
+RANSAC is the obvious sanity-check baseline for ground segmentation. On KITTI it is **9 F1 behind the macro Patchwork++ row, 26 F1 behind on the worst sequence, and no improvement at higher iteration counts can close that gap** — the bottleneck is the model, not the optimiser. The concentric-zone partition that Patchwork and Patchwork++ both use turns this from a hard problem (one plane for the whole scan) into many easy ones (one plane per patch, with per-patch flatness and elevation gates), which is what closes the gap.
+
+### Caveats
+
+- `ransac_n=3` (plane) is the only value tested. Higher `ransac_n` fits higher-order surfaces and is out of scope here.
+- The grid timing uses Open3D's default thread pool at iter ≤ 1000 and an 8-thread cap at iter ≥ 5000 (memory pressure at 24 threads × iter=10000 forced the cap). F1 numbers are insensitive to thread count; wall-clock numbers between low-iter and high-iter columns are **not** directly comparable. The full-KITTI Patchwork++ comparison row uses 24 threads on both sides.
+- The Patchwork++ wall-clock row above (~18 ms median, single-threaded) is conservative. Enabling TBB on the Patchwork++ side (currently disabled — see #96) is expected to roughly halve it and widen the Hz gap further. Classic Patchwork (v1.4.0) is already TBB-parallel and runs at ~9 ms median on this machine.
+
+______________________________________________________________________
+
+## :link: See also
 
 - [`python/examples/demo_visualize.py`](python/examples/demo_visualize.py) — single-frame visualisation.
 - [`python/examples/demo_sequential.py`](python/examples/demo_sequential.py) — iterate over a folder of `.bin` files.
 - Issues: [#87](https://github.com/url-kaist/patchwork-plusplus/issues/87) (reproduce paper), [#88](https://github.com/url-kaist/patchwork-plusplus/issues/88) (evaluation protocol), [#89](https://github.com/url-kaist/patchwork-plusplus/issues/89) (performance enhancement).
+
+[arxivlink]: https://arxiv.org/abs/2207.11919
+[patchworklink]: https://github.com/LimHyungTae/patchwork
diff --git a/python/examples/evaluate_ransac_in_semantickitti.py b/python/examples/evaluate_ransac_in_semantickitti.py
new file mode 100644
index 0000000..587e417
--- /dev/null
+++ b/python/examples/evaluate_ransac_in_semantickitti.py
@@ -0,0 +1,375 @@
+"""Evaluate an Open3D RANSAC `segment_plane` ground baseline on SemanticKITTI.
+
+This is the apples-to-apples companion of `evaluate_semantickitti.py`. The
+metric definition and protocol (`patchwork` vs `patchworkpp`) are kept
+identical, so the resulting Precision / Recall / F1 / median ms numbers
+can be compared directly against the Patchwork / Patchwork++ rows in
+`USAGE.md` §5.
+
+The only differences vs `evaluate_semantickitti.py` are:
+
+1. The ground segmenter is `open3d.geometry.PointCloud.segment_plane`
+   (RANSAC plane fit on the whole frame, single-plane assumption) instead
+   of Patchwork / Patchwork++.
+2. Two extra knobs: `--distance_threshold` and `--num_iterations`. These
+   are the headline RANSAC parameters; pass them on the command line or
+   sweep them with `--sweep`.
+3. Per-frame `segment_plane` time is recorded and the median ms / Hz are
+   reported alongside P / R / F1 so the speed/quality trade-off shows up
+   in the same table.
+"""
+
+import argparse
+import csv
+import os
+import sys
+import time
+
+import numpy as np
+import open3d as o3d
+
+GROUND_CLASSES_PATCHWORK = np.array([40, 44, 48, 49, 60, 70, 72], dtype=np.uint16)
+GROUND_CLASSES_PP = np.array([40, 44, 48, 49, 60, 72], dtype=np.uint16)
+OUTLIER_CLASSES = np.array([0, 1], dtype=np.uint16)
+VEGETATION = 70
+SENSOR_HEIGHT = 1.73
+VEGETATION_THR = -SENSOR_HEIGHT * 3.0 / 4.0
+DEFAULT_SEQS = [f"{i:02d}" for i in range(11)]
+
+
+def is_ground_mask_patchwork(labels: np.ndarray, z: np.ndarray) -> np.ndarray:
+    in_ground = np.isin(labels, GROUND_CLASSES_PATCHWORK)
+    veg_mask = labels == VEGETATION
+    veg_keep = veg_mask & (z < VEGETATION_THR)
+    return (in_ground & ~veg_mask) | veg_keep
+
+
+def is_ground_mask_pp(labels: np.ndarray) -> np.ndarray:
+    return np.isin(labels, GROUND_CLASSES_PP)
+
+
+def is_excluded_mask_pp(labels: np.ndarray) -> np.ndarray:
+    return (labels == VEGETATION) | np.isin(labels, OUTLIER_CLASSES)
+
+
+def is_outlier_mask(labels: np.ndarray) -> np.ndarray:
+    return np.isin(labels, OUTLIER_CLASSES)
+
+
+def f1(p: float, r: float) -> float:
+    return 2.0 * p * r / (p + r) if (p + r) > 0 else 0.0
+
+
+def load_bin(path: str) -> np.ndarray:
+    return np.fromfile(path, dtype=np.float32).reshape(-1, 4)
+
+
+def load_label(path: str, num_points: int) -> np.ndarray:
+    raw = np.fromfile(path, dtype=np.uint32)
+    if raw.size != num_points:
+        raise ValueError(
+            f"Label count {raw.size} != point count {num_points} for {path}"
+        )
+    return (raw & 0xFFFF).astype(np.uint16)
+
+
+def ransac_ground_indices(
+    points_xyz: np.ndarray,
+    distance_threshold: float,
+    num_iterations: int,
+    ransac_n: int,
+) -> np.ndarray:
+    """Return the inlier indices of the dominant plane found by RANSAC."""
+    pcd = o3d.geometry.PointCloud()
+    pcd.points = o3d.utility.Vector3dVector(points_xyz.astype(np.float64))
+    _, inliers = pcd.segment_plane(
+        distance_threshold=distance_threshold,
+        ransac_n=ransac_n,
+        num_iterations=num_iterations,
+    )
+    return np.asarray(inliers, dtype=np.int64)
+
+
+def evaluate_sequence(
+    seq_dir: str,
+    distance_threshold: float,
+    num_iterations: int,
+    ransac_n: int,
+    max_frames: int | None,
+    verbose: bool,
+    eval_protocol: str,
+) -> dict:
+    velodyne_dir = os.path.join(seq_dir, "velodyne")
+    labels_dir = os.path.join(seq_dir, "labels")
+    if not os.path.isdir(velodyne_dir) or not os.path.isdir(labels_dir):
+        raise FileNotFoundError(f"Missing velodyne/ or labels/ in {seq_dir}")
+
+    bin_files = sorted(f for f in os.listdir(velodyne_dir) if f.endswith(".bin"))
+    if max_frames is not None:
+        bin_files = bin_files[:max_frames]
+
+    precisions, recalls = [], []
+    precisions_naive, recalls_naive = [], []
+    f1s, f1s_naive = [], []
+    per_frame_ms: list[float] = []
+    skipped = 0
+
+    for i, fname in enumerate(bin_files):
+        cloud = load_bin(os.path.join(velodyne_dir, fname))
+        label_path = os.path.join(labels_dir, fname.replace(".bin", ".label"))
+        labels = load_label(label_path, cloud.shape[0])
+        z = cloud[:, 2]
+
+        t0 = time.perf_counter()
+        gnd_idx = ransac_ground_indices(
+            cloud[:, :3], distance_threshold, num_iterations, ransac_n
+        )
+        per_frame_ms.append(1000.0 * (time.perf_counter() - t0))
+
+        if gnd_idx.size == 0:
+            skipped += 1
+            continue
+
+        gnd_labels = labels[gnd_idx]
+        gnd_z = z[gnd_idx]
+
+        if eval_protocol == "patchworkpp":
+            gt_ground = is_ground_mask_pp(labels)
+            num_ground_gt = int(gt_ground.sum())
+            est_excluded = is_excluded_mask_pp(gnd_labels)
+            num_ground_est = int((~est_excluded).sum())
+            num_TP = int(is_ground_mask_pp(gnd_labels).sum())
+            denom = num_ground_est
+            p_n = p = 100.0 * num_TP / denom if denom > 0 else 0.0
+            r_n = r = 100.0 * num_TP / num_ground_gt if num_ground_gt > 0 else 0.0
+        else:
+            num_ground_gt = int(is_ground_mask_patchwork(labels, z).sum())
+            num_ground_est = int(gnd_idx.size)
+            num_TP = int(is_ground_mask_patchwork(gnd_labels, gnd_z).sum())
+            num_outliers_est = int(is_outlier_mask(gnd_labels).sum())
+            denom = num_ground_est - num_outliers_est
+            if num_ground_gt == 0 or denom <= 0 or num_ground_est == 0:
+                skipped += 1
+                continue
+            p = 100.0 * num_TP / denom
+            r = 100.0 * num_TP / num_ground_gt
+            p_n = 100.0 * num_TP / num_ground_est
+            r_n = r
+
+        precisions.append(p)
+        recalls.append(r)
+        f1s.append(f1(p, r))
+        precisions_naive.append(p_n)
+        recalls_naive.append(r_n)
+        f1s_naive.append(f1(p_n, r_n))
+
+        if verbose:
+            print(
+                f"  [{i:05d}] P={p:6.2f} R={r:6.2f} F1={f1s[-1]:6.2f} "
+                f"| {per_frame_ms[-1]:6.1f} ms"
+            )
+
+    if not precisions:
+        raise RuntimeError(f"No valid frames evaluated in {seq_dir}")
+
+    return {
+        "num_frames": len(precisions),
+        "skipped": skipped,
+        "precision": float(np.mean(precisions)),
+        "recall": float(np.mean(recalls)),
+        "f1": float(np.mean(f1s)),
+        "precision_naive": float(np.mean(precisions_naive)),
+        "recall_naive": float(np.mean(recalls_naive)),
+        "f1_naive": float(np.mean(f1s_naive)),
+        "median_ms": float(np.median(per_frame_ms)),
+        "mean_ms": float(np.mean(per_frame_ms)),
+        "p95_ms": float(np.percentile(per_frame_ms, 95)),
+    }
+
+
+def print_row(label: str, m: dict) -> None:
+    print(
+        f"{label:>24} | {m['num_frames']:>6d} | "
+        f"{m['precision']:6.2f} {m['recall']:6.2f} {m['f1']:6.2f} | "
+        f"{m['median_ms']:6.1f} ms (median) {1000.0 / m['median_ms']:6.1f} Hz"
+    )
+
+
+def write_csv(path: str, rows: list[tuple[str, dict]]) -> None:
+    with open(path, "w", newline="") as fp:
+        writer = csv.writer(fp)
+        writer.writerow(
+            [
+                "config",
+                "num_frames",
+                "precision",
+                "recall",
+                "f1",
+                "precision_naive",
+                "recall_naive",
+                "f1_naive",
+                "median_ms",
+                "mean_ms",
+                "p95_ms",
+            ]
+        )
+        for name, m in rows:
+            writer.writerow(
+                [
+                    name,
+                    m["num_frames"],
+                    f"{m['precision']:.4f}",
+                    f"{m['recall']:.4f}",
+                    f"{m['f1']:.4f}",
+                    f"{m['precision_naive']:.4f}",
+                    f"{m['recall_naive']:.4f}",
+                    f"{m['f1_naive']:.4f}",
+                    f"{m['median_ms']:.3f}",
+                    f"{m['mean_ms']:.3f}",
+                    f"{m['p95_ms']:.3f}",
+                ]
+            )
+
+
+def parse_float_list(text: str) -> list[float]:
+    return [float(x) for x in text.split(",") if x.strip()]
+
+
+def parse_int_list(text: str) -> list[int]:
+    return [int(x) for x in text.split(",") if x.strip()]
+
+
+def aggregate(rows: list[tuple[str, dict]]) -> dict:
+    if not rows:
+        raise ValueError("nothing to aggregate")
+    keys = (
+        "precision",
+        "recall",
+        "f1",
+        "precision_naive",
+        "recall_naive",
+        "f1_naive",
+        "median_ms",
+        "mean_ms",
+        "p95_ms",
+    )
+    out = {k: float(np.mean([m[k] for _, m in rows])) for k in keys}
+    out["num_frames"] = int(sum(m["num_frames"] for _, m in rows))
+    out["skipped"] = int(sum(m["skipped"] for _, m in rows))
+    return out
+
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--dataset_path",
+        default="/home/url/datasets/kitti/dataset/sequences",
+        help="Path containing seq subdirectories (00, 01, ...).",
+    )
+    parser.add_argument(
+        "--seqs",
+        nargs="+",
+        default=DEFAULT_SEQS,
+        help="Sequence ids to evaluate (default: 00..10).",
+    )
+    parser.add_argument("--output_csv", default="summary_ransac.csv")
+    parser.add_argument(
+        "--distance_threshold",
+        type=float,
+        default=0.15,
+        help="RANSAC distance threshold in meters (default: 0.15).",
+    )
+    parser.add_argument(
+        "--num_iterations",
+        type=int,
+        default=500,
+        help="RANSAC iteration count (default: 500).",
+    )
+    parser.add_argument(
+        "--ransac_n",
+        type=int,
+        default=3,
+        help="Points sampled per RANSAC hypothesis (default: 3; plane fit).",
+    )
+    parser.add_argument(
+        "--sweep_thresholds",
+        type=parse_float_list,
+        default=None,
+        help="Comma-separated thresholds to sweep, e.g. 0.1,0.15,0.25",
+    )
+    parser.add_argument(
+        "--sweep_iterations",
+        type=parse_int_list,
+        default=None,
+        help="Comma-separated iteration counts to sweep, e.g. 100,500,1000",
+    )
+    parser.add_argument(
+        "--eval_protocol",
+        choices=["patchwork", "patchworkpp"],
+        default="patchworkpp",
+        help="patchwork = original Patchwork repo protocol "
+        "(VEGETATION-low-z counted as ground). "
+        "patchworkpp = Patchwork++ paper Sec IV.A (VEGETATION excluded).",
+    )
+    parser.add_argument("--max_frames", type=int, default=None)
+    parser.add_argument("--verbose", action="store_true")
+    args = parser.parse_args()
+
+    if (args.sweep_thresholds is None) != (args.sweep_iterations is None):
+        parser.error("Pass --sweep_thresholds AND --sweep_iterations together, or neither.")
+
+    if args.sweep_thresholds is None:
+        configs = [(args.distance_threshold, args.num_iterations)]
+    else:
+        configs = [(t, n) for t in args.sweep_thresholds for n in args.sweep_iterations]
+
+    all_rows: list[tuple[str, dict]] = []
+
+    for thr, iters in configs:
+        cfg_label = f"thr={thr:.3f}_iter={iters}"
+        print(f"\n=== {cfg_label} (ransac_n={args.ransac_n}) ===")
+        rows: list[tuple[str, dict]] = []
+        for seq in args.seqs:
+            seq_dir = os.path.join(args.dataset_path, seq)
+            if not os.path.isdir(seq_dir):
+                print(f"[WARN] Skipping {seq}: {seq_dir} does not exist", file=sys.stderr)
+                continue
+            print(f"[seq {seq}] evaluating ...")
+            t0 = time.time()
+            metrics = evaluate_sequence(
+                seq_dir,
+                thr,
+                iters,
+                args.ransac_n,
+                args.max_frames,
+                args.verbose,
+                args.eval_protocol,
+            )
+            dt = time.time() - t0
+            print(
+                f"[seq {seq}] {metrics['num_frames']} frames in {dt:.1f}s | "
+                f"P={metrics['precision']:.2f} R={metrics['recall']:.2f} "
+                f"F1={metrics['f1']:.2f} | median {metrics['median_ms']:.1f} ms"
+            )
+            rows.append((seq, metrics))
+
+        if not rows:
+            print("No sequences evaluated for this config.", file=sys.stderr)
+            continue
+
+        avg = aggregate(rows)
+        print()
+        print_row(f"{cfg_label} Avg", avg)
+        all_rows.append((cfg_label, avg))
+        for seq_name, m in rows:
+            all_rows.append((f"{cfg_label}::{seq_name}", m))
+
+    if not all_rows:
+        sys.exit("Nothing evaluated.")
+
+    write_csv(args.output_csv, all_rows)
+    print(f"\nSummary written to {args.output_csv}")
+
+
+if __name__ == "__main__":
+    main()

From 7df4296352a25cd2782eec9f6e0d8c6706ea398f Mon Sep 17 00:00:00 2001
From: Hyungtae Lim <shapelim@mit.edu>
Date: Fri, 22 May 2026 15:35:56 +0900
Subject: [PATCH 2/2] style: apply black to evaluate_ransac_in_semantickitti.py

---
 python/examples/evaluate_ransac_in_semantickitti.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/examples/evaluate_ransac_in_semantickitti.py b/python/examples/evaluate_ransac_in_semantickitti.py
index 587e417..51052cf 100644
--- a/python/examples/evaluate_ransac_in_semantickitti.py
+++ b/python/examples/evaluate_ransac_in_semantickitti.py
@@ -316,7 +316,9 @@ def main():
     args = parser.parse_args()
 
     if (args.sweep_thresholds is None) != (args.sweep_iterations is None):
-        parser.error("Pass --sweep_thresholds AND --sweep_iterations together, or neither.")
+        parser.error(
+            "Pass --sweep_thresholds AND --sweep_iterations together, or neither."
+        )
 
     if args.sweep_thresholds is None:
         configs = [(args.distance_threshold, args.num_iterations)]
@@ -332,7 +334,9 @@ def main():
         for seq in args.seqs:
             seq_dir = os.path.join(args.dataset_path, seq)
             if not os.path.isdir(seq_dir):
-                print(f"[WARN] Skipping {seq}: {seq_dir} does not exist", file=sys.stderr)
+                print(
+                    f"[WARN] Skipping {seq}: {seq_dir} does not exist", file=sys.stderr
+                )
                 continue
             print(f"[seq {seq}] evaluating ...")
             t0 = time.time()