diff --git a/hbd/server/threshold.py b/hbd/server/threshold.py index 19cc78a..3572e19 100644 --- a/hbd/server/threshold.py +++ b/hbd/server/threshold.py @@ -1389,6 +1389,9 @@ class ThresholdChecker: host_name, lvl, message, metric_path, AlertLevel.OK, alert_state.level, value ) alert_state.pending_since = None + now = time.time() + alert_state.last_notification = now + alert_state.notification_count = 1 # else: still within grace window, do nothing else: self._check_renotify(host_name, alert_state, metric_path, value, threshold, plugin_data, check_name=check_name, metric_name=metric_name) @@ -1497,7 +1500,16 @@ class ThresholdChecker: if not host.alert_states: continue configured = self.get_thresholds_for_host(hostname) - stale = [mp for mp in host.alert_states if self._find_threshold(configured, mp)[0] is None] + stale = [] + for mp in host.alert_states: + if self._find_threshold(configured, mp)[0] is not None: + continue + # Also match wildcard pool/partition thresholds (e.g. "zfs_monitor.*.status" + # covers alert state "zfs_monitor.tank.status"). + parts = mp.split(".") + if len(parts) == 3 and f"{parts[0]}.*.{parts[2]}" in configured: + continue + stale.append(mp) for mp in stale: logger.info( "Purging stale alert state for %s / %s (no threshold configured)",