fix: change health_ok to status
This commit is contained in:
@@ -268,9 +268,9 @@ The default threshold is equivalent to:
|
|||||||
zfs_monitor:
|
zfs_monitor:
|
||||||
pools:
|
pools:
|
||||||
'*':
|
'*':
|
||||||
health_ok:
|
status:
|
||||||
critical: 1
|
critical: 1
|
||||||
operator: "<"
|
operator: ">"
|
||||||
hysteresis: 0.0
|
hysteresis: 0.0
|
||||||
display: "ZFS pool {pool_name} is {health}"
|
display: "ZFS pool {pool_name} is {health}"
|
||||||
```
|
```
|
||||||
@@ -285,7 +285,7 @@ zfs_monitor:
|
|||||||
pools:
|
pools:
|
||||||
# Suppress health alerts for a scratch pool (not mission-critical)
|
# Suppress health alerts for a scratch pool (not mission-critical)
|
||||||
scratch:
|
scratch:
|
||||||
health_ok:
|
status:
|
||||||
enabled: false
|
enabled: false
|
||||||
|
|
||||||
# Capacity threshold for a specific pool
|
# Capacity threshold for a specific pool
|
||||||
@@ -297,12 +297,12 @@ zfs_monitor:
|
|||||||
hysteresis: 0.05
|
hysteresis: 0.05
|
||||||
```
|
```
|
||||||
|
|
||||||
**Alert state paths** follow the pattern `zfs_monitor.<pool_name>.health_ok`,
|
**Alert state paths** follow the pattern `zfs_monitor.<pool_name>.status`,
|
||||||
so acknowledgements and silences target individual pools:
|
so acknowledgements and silences target individual pools:
|
||||||
|
|
||||||
```
|
```
|
||||||
zfs_monitor.tank.health_ok
|
zfs_monitor.tank.status
|
||||||
zfs_monitor.backup.health_ok
|
zfs_monitor.backup.status
|
||||||
```
|
```
|
||||||
|
|
||||||
### Network Monitor
|
### Network Monitor
|
||||||
|
|||||||
@@ -90,9 +90,17 @@ class ZFSMonitorPlugin(MonitorPlugin):
|
|||||||
if self._pools_filter and name not in self._pools_filter:
|
if self._pools_filter and name not in self._pools_filter:
|
||||||
continue
|
continue
|
||||||
health = parts[1].strip()
|
health = parts[1].strip()
|
||||||
|
if health == "ONLINE":
|
||||||
|
status = 0
|
||||||
|
elif health in ("DEGRADED", "ONLINE with errors"):
|
||||||
|
status = 1
|
||||||
|
elif health in ("FAULTED", "OFFLINE", "UNAVAIL"):
|
||||||
|
status = 2
|
||||||
|
else:
|
||||||
|
status = 3 # unknown status
|
||||||
pools[name] = {
|
pools[name] = {
|
||||||
"health": health,
|
"health": health,
|
||||||
"health_ok": 1 if health == "ONLINE" else 0,
|
"status": status,
|
||||||
"size": _int(parts[2]),
|
"size": _int(parts[2]),
|
||||||
"alloc": _int(parts[3]),
|
"alloc": _int(parts[3]),
|
||||||
"free": _int(parts[4]),
|
"free": _int(parts[4]),
|
||||||
|
|||||||
@@ -139,13 +139,13 @@ thresholds:
|
|||||||
# ----------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------
|
||||||
zfs_monitor:
|
zfs_monitor:
|
||||||
# Pool health check — built-in default; shown here for reference/override.
|
# Pool health check — built-in default; shown here for reference/override.
|
||||||
# health_ok is 1 (ONLINE) or 0 (DEGRADED, SUSPENDED, FAULTED, UNAVAIL…).
|
# status is 0 (ONLINE) or 1 (DEGRADED) or 2 (SUSPENDED, FAULTED, UNAVAIL…).
|
||||||
# Use '*' to apply the same rule to every pool, or name a specific pool.
|
# Use '*' to apply the same rule to every pool, or name a specific pool.
|
||||||
pools:
|
pools:
|
||||||
'*':
|
'*':
|
||||||
health_ok:
|
status:
|
||||||
critical: 1 # Alert CRITICAL when pool is not ONLINE
|
critical: 0 # Alert CRITICAL when pool is not ONLINE
|
||||||
operator: "<"
|
operator: ">"
|
||||||
hysteresis: 0.0 # No hysteresis — a degraded pool is always critical
|
hysteresis: 0.0 # No hysteresis — a degraded pool is always critical
|
||||||
display: "ZFS pool {pool_name} is {health}"
|
display: "ZFS pool {pool_name} is {health}"
|
||||||
|
|
||||||
|
|||||||
@@ -108,9 +108,8 @@ THRESHOLD_DEFAULTS = {
|
|||||||
'zfs_monitor': {
|
'zfs_monitor': {
|
||||||
'pools': {
|
'pools': {
|
||||||
'*': {
|
'*': {
|
||||||
'health_ok': {
|
'status': {0,
|
||||||
'critical': 1,
|
'operator': '>',
|
||||||
'operator': '<',
|
|
||||||
'hysteresis': 0.0,
|
'hysteresis': 0.0,
|
||||||
'display': 'ZFS pool {pool_name} is {health}',
|
'display': 'ZFS pool {pool_name} is {health}',
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -575,7 +575,7 @@ class ThresholdChecker:
|
|||||||
if not isinstance(threshold_config, dict):
|
if not isinstance(threshold_config, dict):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Handle nested metrics (e.g., partitions./.percent or pools.*.health_ok)
|
# Handle nested metrics (e.g., partitions./.percent or pools.*.status)
|
||||||
if metric_name == "partitions":
|
if metric_name == "partitions":
|
||||||
self._parse_partition_thresholds(plugin_name, threshold_config, target_dict)
|
self._parse_partition_thresholds(plugin_name, threshold_config, target_dict)
|
||||||
continue
|
continue
|
||||||
@@ -680,9 +680,9 @@ class ThresholdChecker:
|
|||||||
zfs_monitor:
|
zfs_monitor:
|
||||||
pools:
|
pools:
|
||||||
'*':
|
'*':
|
||||||
health_ok:
|
status:
|
||||||
critical: 1
|
critical: 0
|
||||||
operator: '<'
|
operator: '>'
|
||||||
tank:
|
tank:
|
||||||
capacity:
|
capacity:
|
||||||
warning: 80
|
warning: 80
|
||||||
@@ -1026,11 +1026,11 @@ class ThresholdChecker:
|
|||||||
for pool_name, pool_metrics in pools.items():
|
for pool_name, pool_metrics in pools.items():
|
||||||
if not isinstance(pool_metrics, dict):
|
if not isinstance(pool_metrics, dict):
|
||||||
continue
|
continue
|
||||||
# Synthesize health_ok from health string for older clients
|
# Synthesize status from health string for older clients
|
||||||
# that predate the health_ok field.
|
# that predate the status field.
|
||||||
pool_metrics_effective = dict(pool_metrics)
|
pool_metrics_effective = dict(pool_metrics)
|
||||||
if "health" in pool_metrics and "health_ok" not in pool_metrics:
|
if "health" in pool_metrics and "status" not in pool_metrics:
|
||||||
pool_metrics_effective["health_ok"] = 1 if pool_metrics["health"] == "ONLINE" else 0
|
pool_metrics_effective["status"] = 0 if pool_metrics["health"] == "ONLINE" else 1
|
||||||
for metric_name, value in pool_metrics_effective.items():
|
for metric_name, value in pool_metrics_effective.items():
|
||||||
# Try specific pool name first, then wildcard '*'
|
# Try specific pool name first, then wildcard '*'
|
||||||
metric_path = f"{plugin_name}.{pool_name}.{metric_name}"
|
metric_path = f"{plugin_name}.{pool_name}.{metric_name}"
|
||||||
|
|||||||
Reference in New Issue
Block a user