feat: nagios operator for direct exit-code severity mapping

Add ComparisonOperator.NAGIOS ("nagios") that maps Nagios exit codes
directly to alert levels (0=OK 1=WARNING 2=CRITICAL 3=UNKNOWN) without
requiring numeric warning/critical thresholds. Hysteresis is bypassed for
discrete codes. Display template defaults to "{check_name}: {output}".
_format_display() handles None threshold_value gracefully.

Add nagios_runner.status_code as a built-in default threshold config so
nagios checks alert out of the box.

Also: fix alerts.html scrolling (override html,body), make hostname a link
to /plugins#<hostname>, remove overall_status/overall_status_code/plugin_count
from nagios_runner and hbc_mini, replace with computed worst-status in
plugins.html via nagiosWorstStatus() helper.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-05 12:26:56 -04:00
parent d7b5c97a4e
commit a534c06b26
9 changed files with 100 additions and 114 deletions
+68 -60
View File
@@ -30,12 +30,13 @@ class AlertLevel(Enum):
class ComparisonOperator(Enum):
"""Supported comparison operators for threshold checks."""
GT = ">" # Greater than
GTE = ">=" # Greater than or equal
LT = "<" # Less than
LTE = "<=" # Less than or equal
EQ = "==" # Equal to
NEQ = "!=" # Not equal to
GT = ">" # Greater than
GTE = ">=" # Greater than or equal
LT = "<" # Less than
LTE = "<=" # Less than or equal
EQ = "==" # Equal to
NEQ = "!=" # Not equal to
NAGIOS = "nagios" # Nagios exit-code semantics: 0=OK 1=WARNING 2=CRITICAL 3=UNKNOWN
class AlertState:
@@ -229,33 +230,43 @@ class ThresholdConfig:
def evaluate(self, value: float) -> AlertLevel:
"""
Evaluate a value against this threshold.
Args:
value: Metric value to check
Returns:
AlertLevel indicating the severity
"""
if not self.enabled:
return AlertLevel.OK
# Nagios exit-code semantics: value IS the severity
if self.operator == ComparisonOperator.NAGIOS:
try:
code = int(value)
except (TypeError, ValueError):
return AlertLevel.UNKNOWN
return {0: AlertLevel.OK, 1: AlertLevel.WARNING, 2: AlertLevel.CRITICAL}.get(
code, AlertLevel.UNKNOWN
)
try:
# Convert value to float for comparison
value = float(value)
except (TypeError, ValueError):
logger.warning("Cannot convert value %s to float for %s", value, self.metric_path)
return AlertLevel.UNKNOWN
# Check critical threshold first
if self.critical is not None:
if self._compare(value, self.critical):
return AlertLevel.CRITICAL
# Then check warning threshold
if self.warning is not None:
if self._compare(value, self.warning):
return AlertLevel.WARNING
return AlertLevel.OK
def evaluate_with_hysteresis(
@@ -274,7 +285,11 @@ class ThresholdConfig:
New alert level considering hysteresis
"""
new_level = self.evaluate(value)
# Nagios exit codes are discrete integers — hysteresis doesn't apply
if self.operator == ComparisonOperator.NAGIOS:
return new_level
# If no hysteresis, return new level
if self.hysteresis == 0.0:
return new_level
@@ -557,11 +572,14 @@ class ThresholdChecker:
warning = threshold_config.get("warning")
critical = threshold_config.get("critical")
operator = threshold_config.get("operator", ">")
display = threshold_config.get("display", "(threshold: {op_symbol} {threshold_value})")
hysteresis = threshold_config.get("hysteresis", 0.02) # 2% default
# Nagios operator maps exit codes directly; no numeric thresholds needed
is_nagios_op = (operator == "nagios")
default_display = "{check_name}: {output}" if is_nagios_op else "(threshold: {op_symbol} {threshold_value})"
display = threshold_config.get("display", default_display)
hysteresis = threshold_config.get("hysteresis", 0.0 if is_nagios_op else 0.02)
enabled = threshold_config.get("enabled", True)
if warning is None and critical is None:
if warning is None and critical is None and not is_nagios_op:
logger.warning("No thresholds defined for %s, skipping", metric_path)
continue
@@ -1016,48 +1034,12 @@ class ThresholdChecker:
import math
display_value = "overdue" if isinstance(value, float) and math.isinf(value) else value
# Format message
if new_level == AlertLevel.OK:
lvl = "RECOVER"
message = f"{metric_path} = {display_value} ({old_level.name} -> OK)"
elif new_level == AlertLevel.WARNING:
lvl = "WARNING"
if threshold_value is not None:
threshold_info = self._format_display(
threshold.display,
value=display_value,
threshold_value=threshold_value,
op_symbol=op_symbol,
plugin_data=plugin_data,
check_name=check_name,
metric_name=metric_name,
)
message = f"{metric_path} = {display_value} {threshold_info}"
else:
message = f"{metric_path} = {display_value}"
elif new_level == AlertLevel.CRITICAL:
lvl = "CRITICAL"
if threshold_value is not None:
threshold_info = self._format_display(
threshold.display,
value=display_value,
threshold_value=threshold_value,
op_symbol=op_symbol,
plugin_data=plugin_data,
check_name=check_name,
metric_name=metric_name,
)
message = f"{metric_path} = {display_value} {threshold_info}"
else:
message = f"{metric_path} = {display_value}"
else:
lvl = "UNKNOWN"
message = f"{metric_path} = {display_value}"
# Format message — for the nagios operator there is no numeric threshold_value;
# render the display template whenever one is available.
has_display = threshold_value is not None or threshold.operator == ComparisonOperator.NAGIOS
# Return the formatted threshold info for storing in AlertState
formatted_threshold_msg = None
if threshold_value is not None and new_level != AlertLevel.OK:
formatted_threshold_msg = self._format_display(
def _fmt():
return self._format_display(
threshold.display,
value=display_value,
threshold_value=threshold_value,
@@ -1067,6 +1049,31 @@ class ThresholdChecker:
metric_name=metric_name,
)
if new_level == AlertLevel.OK:
lvl = "RECOVER"
message = f"{metric_path} = {display_value} ({old_level.name} -> OK)"
elif new_level == AlertLevel.WARNING:
lvl = "WARNING"
if has_display:
message = f"{metric_path} = {display_value} {_fmt()}"
else:
message = f"{metric_path} = {display_value}"
elif new_level == AlertLevel.CRITICAL:
lvl = "CRITICAL"
if has_display:
message = f"{metric_path} = {display_value} {_fmt()}"
else:
message = f"{metric_path} = {display_value}"
else:
lvl = "UNKNOWN"
if has_display:
message = f"{metric_path} = {display_value} {_fmt()}"
else:
message = f"{metric_path} = {display_value}"
# Formatted threshold info stored on AlertState for the UI
formatted_threshold_msg = _fmt() if has_display and new_level != AlertLevel.OK else None
return lvl, message, formatted_threshold_msg
def _send_notification(
@@ -1114,7 +1121,7 @@ class ThresholdChecker:
self,
display_format: str,
value: Any,
threshold_value: float,
threshold_value: Optional[float],
op_symbol: str,
plugin_data: Optional[Dict[str, Any]] = None,
check_name: Optional[str] = None,
@@ -1139,9 +1146,10 @@ class ThresholdChecker:
# Build format context with standard variables
format_context = {
'value': value,
'threshold_value': threshold_value,
'op_symbol': op_symbol,
}
if threshold_value is not None:
format_context['threshold_value'] = threshold_value
# Add generic-match context variables when available
if check_name is not None: