fix: change health_ok to status

This commit is contained in:
2026-05-08 16:57:45 -04:00
parent 967e05ed74
commit 217bba1b76
5 changed files with 29 additions and 22 deletions
+6 -6
View File
@@ -268,9 +268,9 @@ The default threshold is equivalent to:
zfs_monitor: zfs_monitor:
pools: pools:
'*': '*':
health_ok: status:
critical: 1 critical: 1
operator: "<" operator: ">"
hysteresis: 0.0 hysteresis: 0.0
display: "ZFS pool {pool_name} is {health}" display: "ZFS pool {pool_name} is {health}"
``` ```
@@ -285,7 +285,7 @@ zfs_monitor:
pools: pools:
# Suppress health alerts for a scratch pool (not mission-critical) # Suppress health alerts for a scratch pool (not mission-critical)
scratch: scratch:
health_ok: status:
enabled: false enabled: false
# Capacity threshold for a specific pool # Capacity threshold for a specific pool
@@ -297,12 +297,12 @@ zfs_monitor:
hysteresis: 0.05 hysteresis: 0.05
``` ```
**Alert state paths** follow the pattern `zfs_monitor.<pool_name>.health_ok`, **Alert state paths** follow the pattern `zfs_monitor.<pool_name>.status`,
so acknowledgements and silences target individual pools: so acknowledgements and silences target individual pools:
``` ```
zfs_monitor.tank.health_ok zfs_monitor.tank.status
zfs_monitor.backup.health_ok zfs_monitor.backup.status
``` ```
### Network Monitor ### Network Monitor
+9 -1
View File
@@ -90,9 +90,17 @@ class ZFSMonitorPlugin(MonitorPlugin):
if self._pools_filter and name not in self._pools_filter: if self._pools_filter and name not in self._pools_filter:
continue continue
health = parts[1].strip() health = parts[1].strip()
if health == "ONLINE":
status = 0
elif health in ("DEGRADED", "ONLINE with errors"):
status = 1
elif health in ("FAULTED", "OFFLINE", "UNAVAIL"):
status = 2
else:
status = 3 # unknown status
pools[name] = { pools[name] = {
"health": health, "health": health,
"health_ok": 1 if health == "ONLINE" else 0, "status": status,
"size": _int(parts[2]), "size": _int(parts[2]),
"alloc": _int(parts[3]), "alloc": _int(parts[3]),
"free": _int(parts[4]), "free": _int(parts[4]),
+4 -4
View File
@@ -139,13 +139,13 @@ thresholds:
# ---------------------------------------------------------------------------- # ----------------------------------------------------------------------------
zfs_monitor: zfs_monitor:
# Pool health check — built-in default; shown here for reference/override. # Pool health check — built-in default; shown here for reference/override.
# health_ok is 1 (ONLINE) or 0 (DEGRADED, SUSPENDED, FAULTED, UNAVAIL…). # status is 0 (ONLINE) or 1 (DEGRADED) or 2 (SUSPENDED, FAULTED, UNAVAIL…).
# Use '*' to apply the same rule to every pool, or name a specific pool. # Use '*' to apply the same rule to every pool, or name a specific pool.
pools: pools:
'*': '*':
health_ok: status:
critical: 1 # Alert CRITICAL when pool is not ONLINE critical: 0 # Alert CRITICAL when pool is not ONLINE
operator: "<" operator: ">"
hysteresis: 0.0 # No hysteresis — a degraded pool is always critical hysteresis: 0.0 # No hysteresis — a degraded pool is always critical
display: "ZFS pool {pool_name} is {health}" display: "ZFS pool {pool_name} is {health}"
+2 -3
View File
@@ -108,9 +108,8 @@ THRESHOLD_DEFAULTS = {
'zfs_monitor': { 'zfs_monitor': {
'pools': { 'pools': {
'*': { '*': {
'health_ok': { 'status': {0,
'critical': 1, 'operator': '>',
'operator': '<',
'hysteresis': 0.0, 'hysteresis': 0.0,
'display': 'ZFS pool {pool_name} is {health}', 'display': 'ZFS pool {pool_name} is {health}',
} }
+8 -8
View File
@@ -575,7 +575,7 @@ class ThresholdChecker:
if not isinstance(threshold_config, dict): if not isinstance(threshold_config, dict):
continue continue
# Handle nested metrics (e.g., partitions./.percent or pools.*.health_ok) # Handle nested metrics (e.g., partitions./.percent or pools.*.status)
if metric_name == "partitions": if metric_name == "partitions":
self._parse_partition_thresholds(plugin_name, threshold_config, target_dict) self._parse_partition_thresholds(plugin_name, threshold_config, target_dict)
continue continue
@@ -680,9 +680,9 @@ class ThresholdChecker:
zfs_monitor: zfs_monitor:
pools: pools:
'*': '*':
health_ok: status:
critical: 1 critical: 0
operator: '<' operator: '>'
tank: tank:
capacity: capacity:
warning: 80 warning: 80
@@ -1026,11 +1026,11 @@ class ThresholdChecker:
for pool_name, pool_metrics in pools.items(): for pool_name, pool_metrics in pools.items():
if not isinstance(pool_metrics, dict): if not isinstance(pool_metrics, dict):
continue continue
# Synthesize health_ok from health string for older clients # Synthesize status from health string for older clients
# that predate the health_ok field. # that predate the status field.
pool_metrics_effective = dict(pool_metrics) pool_metrics_effective = dict(pool_metrics)
if "health" in pool_metrics and "health_ok" not in pool_metrics: if "health" in pool_metrics and "status" not in pool_metrics:
pool_metrics_effective["health_ok"] = 1 if pool_metrics["health"] == "ONLINE" else 0 pool_metrics_effective["status"] = 0 if pool_metrics["health"] == "ONLINE" else 1
for metric_name, value in pool_metrics_effective.items(): for metric_name, value in pool_metrics_effective.items():
# Try specific pool name first, then wildcard '*' # Try specific pool name first, then wildcard '*'
metric_path = f"{plugin_name}.{pool_name}.{metric_name}" metric_path = f"{plugin_name}.{pool_name}.{metric_name}"