From 3da6976b53aa7f6f11861907a5bad6e0125adff6 Mon Sep 17 00:00:00 2001
From: Andreas Wrede <aew.git@wrede.ca>
Date: Sat, 6 Jun 2026 14:45:47 -0400
Subject: [PATCH] fix: don't purge connectivity/rtt alerts in
 purge_stale_alerts

These entries are set by the connection state machine, not by threshold
config, so they have no threshold entry and were being deleted on every
startup. Guard them explicitly so overdue/down alerts survive the purge.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 hbd/server/threshold.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/hbd/server/threshold.py b/hbd/server/threshold.py
index ec50f03..9d88d83 100644
--- a/hbd/server/threshold.py
+++ b/hbd/server/threshold.py
@@ -1554,6 +1554,10 @@ class ThresholdChecker:
             configured = self.get_thresholds_for_host(hostname)
             stale = []
             for mp in host.alert_states:
+                # connectivity.* and rtt are managed by the connection state
+                # machine, not by threshold config — never purge them.
+                if mp == "rtt" or mp.startswith("connectivity."):
+                    continue
                 if self._find_threshold(configured, mp)[0] is not None:
                     continue
                 # Also match wildcard pool/partition thresholds (e.g. "zfs_monitor.*.status"