feat: clear alerts for individual plugin metrics that disappear between samples

When a PLG message arrives with fewer keys than the previous sample,
alert states for the missing metrics are removed immediately. Handles
nagios checks removed from configuration while the runner plugin continues.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Andreas Wrede
2026-06-06 11:32:38 -04:00
parent 32680d34a4
commit b0addd7c67
+10
View File
@@ -398,6 +398,16 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
plugin_interval = max(history[-1][0] - history[-2][0], 1) plugin_interval = max(history[-1][0] - history[-2][0], 1)
host.reset_plugin_timer(plugin_name, plugin_interval * 3, host.reset_plugin_timer(plugin_name, plugin_interval * 3,
_make_plugin_stale_callback(uname, ctx)) _make_plugin_stale_callback(uname, ctx))
# Remove alert states for metrics present in the previous sample
# but absent now (e.g. a nagios check removed from configuration).
prev_keys = set(history[-2][1].keys())
curr_keys = set(plugin_data.keys())
for metric_name in prev_keys - curr_keys:
metric_path = f"{plugin_name}.{metric_name}"
if host.alert_states.pop(metric_path, None) is not None:
eventlog(uname, "INFO", f"stale check removed: {metric_path}")
if (prev_keys - curr_keys) and msg_to_websockets:
msg_to_websockets("host", host.stateinfo())
else: else:
host.cancel_plugin_timer(plugin_name) host.cancel_plugin_timer(plugin_name)