From b95f1a5bb72399aa7ab0a956c3295b839ae58d24 Mon Sep 17 00:00:00 2001 From: Andreas Wrede Date: Fri, 8 May 2026 17:18:41 -0400 Subject: [PATCH] fix: agree: zpool ONLINE=OK, DEGRADED=WARNING, all else is CRITICAL --- docs/THRESHOLD_ALERTING.md | 3 ++- hbd/config_thresholds_example.yaml | 3 ++- hbd/server/config.py | 3 ++- hbd/server/threshold.py | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/THRESHOLD_ALERTING.md b/docs/THRESHOLD_ALERTING.md index cb35b12..b104436 100644 --- a/docs/THRESHOLD_ALERTING.md +++ b/docs/THRESHOLD_ALERTING.md @@ -269,7 +269,8 @@ zfs_monitor: pools: '*': status: - critical: 1 + warning: 1 + critical: 2 operator: ">" hysteresis: 0.0 display: "ZFS pool {pool_name} is {health}" diff --git a/hbd/config_thresholds_example.yaml b/hbd/config_thresholds_example.yaml index 698efec..aceb593 100644 --- a/hbd/config_thresholds_example.yaml +++ b/hbd/config_thresholds_example.yaml @@ -144,7 +144,8 @@ thresholds: pools: '*': status: - critical: 0 # Alert CRITICAL when pool is not ONLINE + warning: 1 # Alert WARNING when pool is DEGRADED + critical: 2 # Alert CRITICAL when pool is SUSPENDED/FAULTED/UNAVAIL operator: ">" hysteresis: 0.0 # No hysteresis — a degraded pool is always critical display: "ZFS pool {pool_name} is {health}" diff --git a/hbd/server/config.py b/hbd/server/config.py index b99adff..5d1d5cf 100644 --- a/hbd/server/config.py +++ b/hbd/server/config.py @@ -109,7 +109,8 @@ THRESHOLD_DEFAULTS = { 'pools': { '*': { 'status': { - 'critical': 1, # Alert if pool health is not ONLINE + 'warning': 1, + 'critical': 2, 'operator': '>', 'hysteresis': 0.0, 'display': 'ZFS pool {pool_name} is {health}' diff --git a/hbd/server/threshold.py b/hbd/server/threshold.py index 5310f5c..81d37dc 100644 --- a/hbd/server/threshold.py +++ b/hbd/server/threshold.py @@ -681,7 +681,8 @@ class ThresholdChecker: pools: '*': status: - critical: 0 + warning: 1 + critical: 2 operator: '>' tank: capacity: