diff --git a/hbd/server/udp.py b/hbd/server/udp.py index 2616666..e664185 100644 --- a/hbd/server/udp.py +++ b/hbd/server/udp.py @@ -266,10 +266,15 @@ def restore_connection_timers(hbdclass, ctx): for afam, conn in list(host.connections.items()): state = conn.getstate() if state == hbdclass.Connection.DOWN: + _set_connectivity_alert(host, afam, "CRITICAL") continue on_overdue, on_unknown = _make_timer_callbacks(uname, host, ctx) + if state == hbdclass.Connection.UNKNOWN: + _set_connectivity_alert(host, afam, "CRITICAL") + continue + if state == hbdclass.Connection.UP and interval > 0: elapsed = now - conn.lastbeat # Give hosts one full (interval + grace) of extra time on startup @@ -300,6 +305,10 @@ def restore_connection_timers(hbdclass, ctx): "Restored OVERDUE timer %s/%s: %.0fs remaining", uname, afam, remaining, ) + # Ensure the connectivity alert is set — it may be missing if + # hbd was shut down before the on_overdue callback had a chance + # to record it. + _set_connectivity_alert(host, afam, "CRITICAL") restored += 1 logger.info("Restored timers for %d connection(s)", restored) @@ -470,6 +479,7 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict): boot = msg.get("boot", 0) if boot: + # hbc was stared with a -b flag eventlog(uname, "INFO", "booted") if host.watched: asyncio.create_task(notify_mod.send_notification( @@ -480,6 +490,7 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict): eventlog(uname, "INFO", message, service=service) if conn.getstate() != hbdcls.Connection.UP: + # Transition to UP and log/notify if appropriate lasts = conn.state d = conn.newstate(hbdcls.Connection.UP, now) # On reboot, pre-boot plugin data and derived alerts are stale.