diff --git a/hbd/server/udp.py b/hbd/server/udp.py index 831948e..4cc359e 100644 --- a/hbd/server/udp.py +++ b/hbd/server/udp.py @@ -398,6 +398,16 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict): plugin_interval = max(history[-1][0] - history[-2][0], 1) host.reset_plugin_timer(plugin_name, plugin_interval * 3, _make_plugin_stale_callback(uname, ctx)) + # Remove alert states for metrics present in the previous sample + # but absent now (e.g. a nagios check removed from configuration). + prev_keys = set(history[-2][1].keys()) + curr_keys = set(plugin_data.keys()) + for metric_name in prev_keys - curr_keys: + metric_path = f"{plugin_name}.{metric_name}" + if host.alert_states.pop(metric_path, None) is not None: + eventlog(uname, "INFO", f"stale check removed: {metric_path}") + if (prev_keys - curr_keys) and msg_to_websockets: + msg_to_websockets("host", host.stateinfo()) else: host.cancel_plugin_timer(plugin_name)