feat: nagios_runner improvements and alerts page fixes
- nagios_runner: remove overall_status/overall_status_code/plugin_count fields;
each command still reports its own <name>_status and <name>_status_code
- threshold: expose {output} and {status} aliases in display templates for
nagios_runner generic matches (mapped from <check_name>_output/status)
- alerts.html: fix scrolling by overriding html,body height/overflow (style.css
sets both); make hostname a link to /plugins/<hostname>
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -299,6 +299,8 @@ Available variables:
|
||||
| `{op_symbol}` | Comparison operator (`>`, `<`, `>=`, …) |
|
||||
| `{check_name}` | Prefix stripped by generic matching (see below) |
|
||||
| `{metric_name}` | Full field name within the plugin data |
|
||||
| `{output}` | For `nagios_runner` generic matches: the matched check's status text (alias for `{check_name}_output`) |
|
||||
| `{status}` | For `nagios_runner` generic matches: the matched check's status name — OK/WARNING/CRITICAL/UNKNOWN (alias for `{check_name}_status`) |
|
||||
| any plugin field | Any other field present in the plugin's data |
|
||||
|
||||
### Generic Threshold Matching
|
||||
|
||||
@@ -31,16 +31,13 @@ from hbd.client.plugin import MonitorPlugin
|
||||
|
||||
|
||||
# Nagios exit codes
|
||||
NAGIOS_OK = 0
|
||||
NAGIOS_WARNING = 1
|
||||
NAGIOS_CRITICAL = 2
|
||||
NAGIOS_UNKNOWN = 3
|
||||
|
||||
STATUS_NAMES = {
|
||||
NAGIOS_OK: "OK",
|
||||
NAGIOS_WARNING: "WARNING",
|
||||
NAGIOS_CRITICAL: "CRITICAL",
|
||||
NAGIOS_UNKNOWN: "UNKNOWN"
|
||||
0: "OK",
|
||||
1: "WARNING",
|
||||
2: "CRITICAL",
|
||||
3: "UNKNOWN",
|
||||
}
|
||||
|
||||
|
||||
@@ -128,52 +125,39 @@ class NagiosRunnerPlugin(MonitorPlugin):
|
||||
Dictionary with results from all plugins
|
||||
"""
|
||||
results = {}
|
||||
|
||||
# Track overall status (worst status wins)
|
||||
worst_status = NAGIOS_OK
|
||||
|
||||
|
||||
for cmd_config in self.commands:
|
||||
name = cmd_config.get("name")
|
||||
command = cmd_config.get("command")
|
||||
|
||||
|
||||
if not name or not command:
|
||||
self.logger.warning("Skipping command with missing name or command")
|
||||
continue
|
||||
|
||||
|
||||
# Execute plugin
|
||||
try:
|
||||
status_code, output, perfdata = await self._run_nagios_plugin(command)
|
||||
|
||||
|
||||
# Store results
|
||||
results[f"{name}_status"] = STATUS_NAMES.get(status_code, "UNKNOWN")
|
||||
results[f"{name}_status_code"] = status_code
|
||||
results[f"{name}_output"] = output
|
||||
|
||||
# Track worst status
|
||||
if status_code > worst_status:
|
||||
worst_status = status_code
|
||||
|
||||
|
||||
# Parse and add performance data
|
||||
if perfdata:
|
||||
for metric_name, metric_value in perfdata.items():
|
||||
results[f"{name}_{metric_name}"] = metric_value
|
||||
|
||||
|
||||
self.logger.info(
|
||||
f"Executed {name}: {STATUS_NAMES.get(status_code, 'UNKNOWN')} - {output[:50]}"
|
||||
)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error running {name}: {e}", exc_info=True)
|
||||
results[f"{name}_status"] = "ERROR"
|
||||
results[f"{name}_status_code"] = NAGIOS_UNKNOWN
|
||||
results[f"{name}_output"] = str(e)
|
||||
worst_status = NAGIOS_UNKNOWN
|
||||
|
||||
# Add overall status
|
||||
results["overall_status"] = STATUS_NAMES.get(worst_status, "UNKNOWN")
|
||||
results["overall_status_code"] = worst_status
|
||||
results["plugin_count"] = len(self.commands)
|
||||
|
||||
|
||||
return results
|
||||
|
||||
async def _run_nagios_plugin(
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
<style>
|
||||
|
||||
body {
|
||||
html, body {
|
||||
height: auto;
|
||||
overflow-y: auto;
|
||||
}
|
||||
@@ -175,8 +175,12 @@
|
||||
|
||||
.alert-hostname {
|
||||
font-weight: bold;
|
||||
color: #333;
|
||||
color: #0066cc;
|
||||
font-size: 1.1em;
|
||||
text-decoration: none;
|
||||
}
|
||||
.alert-hostname:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.alert-metric {
|
||||
@@ -433,7 +437,7 @@
|
||||
<div class="alert-main">
|
||||
<div class="alert-header">
|
||||
<span class="alert-level ${level}">${alert.level}</span>
|
||||
<span class="alert-hostname">${alert.hostname}</span>
|
||||
<a class="alert-hostname" href="/plugins/${alert.hostname}">${alert.hostname}</a>
|
||||
</div>
|
||||
<div class="alert-metric">${alert.metric_path}</div>
|
||||
<div class="alert-details">
|
||||
|
||||
@@ -1152,6 +1152,19 @@ class ThresholdChecker:
|
||||
# Add all plugin data fields if available
|
||||
if plugin_data:
|
||||
format_context.update(plugin_data)
|
||||
|
||||
# For nagios_runner generic matches, expose the matched check's output
|
||||
# and status as short aliases {output} and {status} so display templates
|
||||
# don't need to use the full {check_disk_root_output} form.
|
||||
if check_name and plugin_data:
|
||||
if 'output' not in format_context:
|
||||
output = plugin_data.get(f"{check_name}_output")
|
||||
if output is not None:
|
||||
format_context['output'] = output
|
||||
if 'status' not in format_context:
|
||||
status = plugin_data.get(f"{check_name}_status")
|
||||
if status is not None:
|
||||
format_context['status'] = status
|
||||
|
||||
try:
|
||||
# Format the display string
|
||||
|
||||
Reference in New Issue
Block a user