fix: change health_ok to status

2026-05-08 16:57:45 -04:00
parent 967e05ed74
commit 217bba1b76
5 changed files with 29 additions and 22 deletions
@@ -90,9 +90,17 @@ class ZFSMonitorPlugin(MonitorPlugin):
            if self._pools_filter and name not in self._pools_filter:
                continue
            health = parts[1].strip()
+            if health == "ONLINE":
+                status = 0
+            elif health in ("DEGRADED", "ONLINE with errors"):
+                status = 1
+            elif health in ("FAULTED", "OFFLINE", "UNAVAIL"):
+                status = 2
+            else:
+                status = 3  # unknown status
            pools[name] = {
                "health":    health,
-                "health_ok": 1 if health == "ONLINE" else 0,
+                "status": status,
                "size":      _int(parts[2]),
                "alloc":     _int(parts[3]),
                "free":      _int(parts[4]),
@@ -139,13 +139,13 @@ thresholds:
  # ----------------------------------------------------------------------------
  zfs_monitor:
    # Pool health check — built-in default; shown here for reference/override.
-    # health_ok is 1 (ONLINE) or 0 (DEGRADED, SUSPENDED, FAULTED, UNAVAIL…).
+    # status is 0 (ONLINE) or 1 (DEGRADED) or 2 (SUSPENDED, FAULTED, UNAVAIL…).
    # Use '*' to apply the same rule to every pool, or name a specific pool.
    pools:
      '*':
-        health_ok:
-          critical: 1           # Alert CRITICAL when pool is not ONLINE
-          operator: "<"
+        status:
+          critical: 0           # Alert CRITICAL when pool is not ONLINE
+          operator: ">"
          hysteresis: 0.0       # No hysteresis — a degraded pool is always critical
          display: "ZFS pool {pool_name} is {health}"

@@ -108,9 +108,8 @@ THRESHOLD_DEFAULTS = {
            'zfs_monitor': {
                'pools': {
                    '*': {
-                        'health_ok': {
-                            'critical': 1,
-                            'operator': '<',
+                        'status': {0,
+                            'operator': '>',
                            'hysteresis': 0.0,
                            'display': 'ZFS pool {pool_name} is {health}',
                        }
@@ -575,7 +575,7 @@ class ThresholdChecker:
            if not isinstance(threshold_config, dict):
                continue
            
-            # Handle nested metrics (e.g., partitions./.percent or pools.*.health_ok)
+            # Handle nested metrics (e.g., partitions./.percent or pools.*.status)
            if metric_name == "partitions":
                self._parse_partition_thresholds(plugin_name, threshold_config, target_dict)
                continue
@@ -680,9 +680,9 @@ class ThresholdChecker:
            zfs_monitor:
              pools:
                '*':
-                  health_ok:
-                    critical: 1
-                    operator: '<'
+                  status:
+                    critical: 0
+                    operator: '>'
                tank:
                  capacity:
                    warning: 80
@@ -1026,11 +1026,11 @@ class ThresholdChecker:
                for pool_name, pool_metrics in pools.items():
                    if not isinstance(pool_metrics, dict):
                        continue
-                    # Synthesize health_ok from health string for older clients
-                    # that predate the health_ok field.
+                    # Synthesize status from health string for older clients
+                    # that predate the status field.
                    pool_metrics_effective = dict(pool_metrics)
-                    if "health" in pool_metrics and "health_ok" not in pool_metrics:
-                        pool_metrics_effective["health_ok"] = 1 if pool_metrics["health"] == "ONLINE" else 0
+                    if "health" in pool_metrics and "status" not in pool_metrics:
+                        pool_metrics_effective["status"] = 0 if pool_metrics["health"] == "ONLINE" else 1
                    for metric_name, value in pool_metrics_effective.items():
                        # Try specific pool name first, then wildcard '*'
                        metric_path = f"{plugin_name}.{pool_name}.{metric_name}"