fix: restore connectivity alerts for overdue/unknown/down hosts on startup
restore_connection_timers now calls _set_connectivity_alert("CRITICAL")
for DOWN, OVERDUE, and UNKNOWN connections, ensuring alerts are present
even if hbd was shut down before the transition callbacks recorded them.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -266,10 +266,15 @@ def restore_connection_timers(hbdclass, ctx):
|
||||
for afam, conn in list(host.connections.items()):
|
||||
state = conn.getstate()
|
||||
if state == hbdclass.Connection.DOWN:
|
||||
_set_connectivity_alert(host, afam, "CRITICAL")
|
||||
continue
|
||||
|
||||
on_overdue, on_unknown = _make_timer_callbacks(uname, host, ctx)
|
||||
|
||||
if state == hbdclass.Connection.UNKNOWN:
|
||||
_set_connectivity_alert(host, afam, "CRITICAL")
|
||||
continue
|
||||
|
||||
if state == hbdclass.Connection.UP and interval > 0:
|
||||
elapsed = now - conn.lastbeat
|
||||
# Give hosts one full (interval + grace) of extra time on startup
|
||||
@@ -300,6 +305,10 @@ def restore_connection_timers(hbdclass, ctx):
|
||||
"Restored OVERDUE timer %s/%s: %.0fs remaining",
|
||||
uname, afam, remaining,
|
||||
)
|
||||
# Ensure the connectivity alert is set — it may be missing if
|
||||
# hbd was shut down before the on_overdue callback had a chance
|
||||
# to record it.
|
||||
_set_connectivity_alert(host, afam, "CRITICAL")
|
||||
restored += 1
|
||||
|
||||
logger.info("Restored timers for %d connection(s)", restored)
|
||||
@@ -470,6 +479,7 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
||||
boot = msg.get("boot", 0)
|
||||
|
||||
if boot:
|
||||
# hbc was stared with a -b flag
|
||||
eventlog(uname, "INFO", "booted")
|
||||
if host.watched:
|
||||
asyncio.create_task(notify_mod.send_notification(
|
||||
@@ -480,6 +490,7 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
||||
eventlog(uname, "INFO", message, service=service)
|
||||
|
||||
if conn.getstate() != hbdcls.Connection.UP:
|
||||
# Transition to UP and log/notify if appropriate
|
||||
lasts = conn.state
|
||||
d = conn.newstate(hbdcls.Connection.UP, now)
|
||||
# On reboot, pre-boot plugin data and derived alerts are stale.
|
||||
|
||||
Reference in New Issue
Block a user