From 2eb1631512252fb8bd37a20e514306a1519bf189 Mon Sep 17 00:00:00 2001 From: Vladimir Babin Date: Tue, 16 Jun 2026 14:50:16 +0800 Subject: [PATCH 1/2] fix(database): progress-aware stall accessor for external restart logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to #119. The monitor added there is progress-aware internally (it warns only when the write lock is held > WARN_SEC AND no operation was applied in that window), but the public accessor it exposed, push_block_lock_held_ms(), reports raw hold time only. Its doc comment and #119's suggested follow-up invite an orchestrator to auto-restart on hold time alone — which would restart on a legitimately heavy, DETERMINISTIC block (hardfork batch, large reorg) at the same height on every node simultaneously. That is the same synchronized network-wide self-destruct #119 deliberately avoided by rejecting std::_Exit, just moved one level out to the orchestrator. Add push_block_appears_stalled(): a lock-free, progress-aware verdict (held long AND no progress) published by the monitor each poll and backed by a new _push_block_stalled atomic. External healthcheck / restart logic should key off this, never off raw hold time. Correct the push_block_lock_held_ms() doc comment to say so explicitly. Also documents two review notes as comments (no behavior change): the single-caller assumption behind the lazy start/stop CAS, and that a future hardfork block running long in pure C++ without apply_operation could trip the verdict network-wide (log-only, so noise not restart, provided consumers gate on a SUSTAINED verdict). --- libraries/chain/database.cpp | 25 +++++++++++++- .../chain/include/graphene/chain/database.hpp | 33 +++++++++++++++++-- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/libraries/chain/database.cpp b/libraries/chain/database.cpp index ce4cfd0b24..db6996bc90 100644 --- a/libraries/chain/database.cpp +++ b/libraries/chain/database.cpp @@ -1095,6 +1095,14 @@ namespace graphene { namespace chain { return held_us > 0 ? held_us / 1000 : 0; } + bool database::push_block_appears_stalled() const { + // Progress-aware verdict maintained by the monitor thread (held long AND + // no operation applied in that window). Use THIS, not raw hold time, to + // drive any external restart — a heavy deterministic block holds the lock + // long but keeps making progress, and is identical network-wide. + return _push_block_stalled.load(std::memory_order_acquire); + } + void database::start_push_block_monitor() { bool expected = false; if (!_push_block_monitor_run.compare_exchange_strong(expected, true, std::memory_order_acq_rel)) @@ -1117,7 +1125,14 @@ namespace graphene { namespace chain { // diagnostic (stderr, so the monitor itself can never block on node locks). // LOG-ONLY: it never calls std::_Exit and never mutates state — a false // positive can only produce an extra log line. Auto-restart, if wanted, is - // an external decision driven by push_block_lock_held_ms(). + // an external decision driven by push_block_appears_stalled() (the same + // held-AND-frozen verdict this loop publishes), never raw hold time. + // NB: a future hardfork block whose migration runs long in pure C++ + // without calling apply_operation could trip the WARNING (and the stalled + // verdict) on every node at that height. It is log-only, so the cost is + // synchronized log noise, not a synchronized restart — provided external + // consumers gate on a SUSTAINED stalled verdict rather than acting on the + // first true reading. const int64_t warn_us = (int64_t)PUSH_BLOCK_STALL_WARN_SEC * 1000000; const int64_t relog_us = (int64_t)PUSH_BLOCK_STALL_RELOG_SEC * 1000000; uint64_t last_progress = _apply_progress.load(std::memory_order_acquire); @@ -1132,6 +1147,7 @@ namespace graphene { namespace chain { if (cur != last_progress) { last_progress = cur; last_progress_us = now_us; } const int64_t since = _push_block_lock_since_us.load(std::memory_order_acquire); if (since == 0) { + _push_block_stalled.store(false, std::memory_order_release); if (warned) { std::cerr << "[push_block-monitor] write lock released; the node has recovered from the stall." << std::endl; @@ -1142,6 +1158,9 @@ namespace graphene { namespace chain { const int64_t held_us = now_us - since; const int64_t frozen_us = now_us - last_progress_us; if (held_us > warn_us && frozen_us > warn_us) { + // Held long AND no progress in that window: publish the wedged + // verdict for push_block_appears_stalled() and log (throttled). + _push_block_stalled.store(true, std::memory_order_release); if (!warned || (now_us - last_log_us) > relog_us) { const uint32_t num = _push_block_watch_num.load(std::memory_order_acquire); std::cerr << "[push_block-monitor] WARNING: push_block() has held the chainbase write lock for " @@ -1152,6 +1171,10 @@ namespace graphene { namespace chain { warned = true; last_log_us = now_us; } + } else { + // Holding the lock but still under threshold, or progress is + // recent (heavy-but-healthy block): not wedged. + _push_block_stalled.store(false, std::memory_order_release); } } } diff --git a/libraries/chain/include/graphene/chain/database.hpp b/libraries/chain/include/graphene/chain/database.hpp index 599f8623e6..326e2642e5 100644 --- a/libraries/chain/include/graphene/chain/database.hpp +++ b/libraries/chain/include/graphene/chain/database.hpp @@ -501,11 +501,27 @@ namespace graphene { namespace chain { /// Milliseconds the current push_block() has held the chainbase write /// lock, or 0 if push_block is not currently holding it. Lock-free and - /// safe to call from any thread (e.g. a healthcheck/RPC). A value that - /// stays high (tens of seconds) means the node is wedged under the write - /// lock — see the push_block stall monitor. + /// safe to call from any thread (e.g. a healthcheck/RPC). + /// + /// NOTE: a high value alone is NOT a wedge signal — a legitimately heavy + /// block (a hardfork batch, a large reorg) can hold the lock for a long + /// time while making real progress. Such blocks are DETERMINISTIC at the + /// same height on every node, so restarting purely on hold time would + /// restart the whole network at once. Drive restart decisions off + /// push_block_appears_stalled() (held long AND no progress), never off + /// this raw timer. int64_t push_block_lock_held_ms() const; + /// True only when push_block() has held the write lock past + /// PUSH_BLOCK_STALL_WARN_SEC AND no operation has been applied in that + /// window — i.e. the node is wedged, not just doing heavy work. This is + /// the progress-aware signal an external healthcheck/orchestrator should + /// use to decide whether to restart; it cannot false-positive on a heavy + /// deterministic block the way raw hold time can. Lock-free; safe from any + /// thread. Updated by the stall monitor, so it lags reality by up to its + /// ~1s poll interval (irrelevant at the 60s threshold). + bool push_block_appears_stalled() const; + uint32_t last_non_undoable_block_num() const; //////////////////// db_init.cpp //////////////////// @@ -712,7 +728,18 @@ namespace graphene { namespace chain { std::atomic _push_block_lock_since_us{0}; std::atomic _push_block_watch_num{0}; std::atomic _push_block_monitor_run{false}; + // Progress-aware "wedged" verdict published by the monitor each poll: + // true only while (held > WARN_SEC AND no operation applied in that + // window). Backs push_block_appears_stalled() for external consumers so + // they never key a restart off raw hold time. (Maintained solely by the + // monitor thread; read lock-free elsewhere.) + std::atomic _push_block_stalled{false}; std::thread _push_block_monitor; + // start/stop are expected to be called only from the single block-apply + // path (start, on each push_block) and shutdown (stop, in close()/dtor), + // which are never concurrent. The lazy start CAS + thread assignment is + // not hardened against a concurrent stop; keep that single-caller + // assumption if this is ever reused. void start_push_block_monitor(); void stop_push_block_monitor(); void push_block_monitor_loop(); From d418fb5b7ad17f574e839a5bd1664aee9db78eae Mon Sep 17 00:00:00 2001 From: Vladimir Babin Date: Tue, 16 Jun 2026 15:26:25 +0800 Subject: [PATCH 2/2] fix(database): clear stall verdict on monitor start/stop Addresses review of #120: _push_block_stalled retained its last value across a close()->open() recovery cycle, so push_block_appears_stalled() could return a stale `true` for up to the ~1s poll interval before the lazily-restarted monitor re-evaluated. Reset the verdict to false in both start_push_block_monitor() (before launching the new thread) and stop_push_block_monitor() (a stopped monitor reports not-stalled), fully closing the window. --- libraries/chain/database.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libraries/chain/database.cpp b/libraries/chain/database.cpp index db6996bc90..d7f097ec9e 100644 --- a/libraries/chain/database.cpp +++ b/libraries/chain/database.cpp @@ -1107,6 +1107,11 @@ namespace graphene { namespace chain { bool expected = false; if (!_push_block_monitor_run.compare_exchange_strong(expected, true, std::memory_order_acq_rel)) return; // already running + // Clear any stale verdict before the new monitor's first poll. Without + // this, after a close()->open() recovery cycle push_block_appears_stalled() + // could return a stale `true` for ~1s (the poll interval) until the loop + // re-evaluates. + _push_block_stalled.store(false, std::memory_order_release); _push_block_monitor = std::thread([this]() { push_block_monitor_loop(); }); } @@ -1115,6 +1120,9 @@ namespace graphene { namespace chain { if (_push_block_monitor.joinable()) _push_block_monitor.join(); } + // A stopped monitor reports not-stalled: clear the verdict so a reader + // between stop and the next lazy start never sees a stale `true`. + _push_block_stalled.store(false, std::memory_order_release); } void database::push_block_monitor_loop() {