feat: composable threshold_config list for per-host threshold layering

threshold_config in the hosts section now accepts a list of named
configs applied left-to-right on top of the defaults, so focused
override profiles can be mixed without duplication. Single-string
and legacy host_threshold_mapping forms are unchanged.

- Add threshold_raw_configs to store per-config overrides separately
- Normalise threshold_config to list on parse (string or list)
- get_thresholds_for_host folds the list over the default base
- Update README and docs/THRESHOLD_ALERTING.md with examples

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Andreas Wrede
2026-05-02 10:35:23 -04:00
parent 2bd3a9beb6
commit 917d6a401b
3 changed files with 299 additions and 95 deletions
+74 -41
View File
@@ -328,15 +328,18 @@ class ThresholdChecker:
renotify_interval: Seconds between repeat notifications (default: 1 hour)
journal: Optional MessageJournal instance for logging threshold events
"""
# Named threshold configurations: {config_name: {metric_path: ThresholdConfig}}
# Named threshold configurations (pre-merged: defaults + overrides): {config_name: {metric_path: ThresholdConfig}}
self.threshold_configs = {}
# Raw overrides only for each named config (no defaults baked in): {config_name: {metric_path: ThresholdConfig}}
self.threshold_raw_configs: Dict[str, Dict[str, ThresholdConfig]] = {}
# Single threshold set for backward compatibility: {metric_path: ThresholdConfig}
self.thresholds = {}
# Host to config name mapping: {host_name: config_name}
self.host_config_mapping = {}
# Host to ordered list of config names: {host_name: [config_name, ...]}
self.host_config_mapping: Dict[str, List[str]] = {}
# Default config name to use when no mapping exists
self.default_config = "default"
@@ -372,6 +375,7 @@ class ThresholdChecker:
# Clear old configuration
self.threshold_configs.clear()
self.threshold_raw_configs.clear()
self.thresholds.clear()
self.host_config_mapping.clear()
self.grace_seconds = float(config.get("grace", 2))
@@ -424,9 +428,10 @@ class ThresholdChecker:
self._parse_plugin_thresholds(plugin_name, plugin_thresholds, target_dict=effective_defaults)
self.threshold_configs["default"] = dict(effective_defaults)
self.threshold_raw_configs["default"] = {}
logger.info("Registered 'default' threshold config with %d metrics", len(effective_defaults))
# Parse each named configuration, seeding it with effective_defaults first
# Parse each named configuration
for config_name, config_data in threshold_configs.items():
if config_name == "default":
continue # already handled above
@@ -440,33 +445,41 @@ class ThresholdChecker:
continue
logger.info("Parsing threshold configuration: %s", config_name)
self.threshold_configs[config_name] = dict(effective_defaults)
# Raw overrides only (used for multi-config layering)
raw_overrides: Dict[str, ThresholdConfig] = {}
thresholds_config = config_data["thresholds"]
for plugin_name, plugin_thresholds in thresholds_config.items():
if not isinstance(plugin_thresholds, dict):
continue
if isinstance(plugin_thresholds, dict):
self._parse_plugin_thresholds(plugin_name, plugin_thresholds, target_dict=raw_overrides)
self.threshold_raw_configs[config_name] = raw_overrides
self._parse_plugin_thresholds(
plugin_name,
plugin_thresholds,
target_dict=self.threshold_configs[config_name]
)
# Parse host to config mapping from two possible sources
# 1. New format: hosts section with threshold_config attribute
# Pre-merged version (defaults + overrides) for single-config fast path
self.threshold_configs[config_name] = dict(effective_defaults)
self.threshold_configs[config_name].update(raw_overrides)
# Parse host → config list mapping from two possible sources
def _normalise(value) -> List[str]:
"""Accept a string or list; always return a list."""
if isinstance(value, list):
return [str(v) for v in value]
return [str(value)]
# 1. hosts section with threshold_config attribute (string or list)
if "hosts" in config:
hosts_config = config["hosts"]
if isinstance(hosts_config, dict):
for host_name, host_attrs in hosts_config.items():
if isinstance(host_attrs, dict) and "threshold_config" in host_attrs:
self.host_config_mapping[host_name] = host_attrs["threshold_config"]
# 2. Legacy format: host_threshold_mapping section (for backward compatibility)
self.host_config_mapping[host_name] = _normalise(host_attrs["threshold_config"])
# 2. Legacy host_threshold_mapping section (string values only)
if "host_threshold_mapping" in config:
legacy_mapping = config.get("host_threshold_mapping", {})
if isinstance(legacy_mapping, dict):
self.host_config_mapping.update(legacy_mapping)
for host_name, value in legacy_mapping.items():
self.host_config_mapping[host_name] = _normalise(value)
# Set default config (first one alphabetically or explicitly set)
self.default_config = config.get("default_threshold_config", "default")
@@ -664,35 +677,55 @@ class ThresholdChecker:
)
def get_thresholds_for_host(self, host_name: str) -> Dict[str, ThresholdConfig]:
"""Get the appropriate threshold configuration for a host.
"""Get the effective threshold configuration for a host.
When threshold_config is a list, configs are applied left-to-right on top
of the default thresholds so earlier entries can be overridden by later ones.
Args:
host_name: Name of the host
Returns:
Dictionary of thresholds for this host
"""
# Legacy mode: single threshold set for all hosts
if self.thresholds and not self.threshold_configs:
return self.thresholds
# Multi-config mode: look up host-specific configuration
if self.threshold_configs:
config_name = self.host_config_mapping.get(host_name, self.default_config)
if config_name in self.threshold_configs:
return self.threshold_configs[config_name]
else:
if not self.threshold_configs:
return {}
config_names = self.host_config_mapping.get(host_name)
# No host-specific mapping → return pre-merged default
if not config_names:
return self.threshold_configs.get(self.default_config, {})
# Single config → fast path using pre-merged copy
if len(config_names) == 1:
name = config_names[0]
if name in self.threshold_configs:
return self.threshold_configs[name]
logger.warning(
"Threshold config '%s' not found for host '%s', using default '%s'",
name, host_name, self.default_config,
)
return self.threshold_configs.get(self.default_config, {})
# Multiple configs → start from defaults, layer raw overrides in order
result = dict(self.threshold_configs.get(self.default_config, {}))
for name in config_names:
if name == self.default_config:
continue # defaults already the base
raw = self.threshold_raw_configs.get(name)
if raw is None:
logger.warning(
"Threshold config '%s' not found for host '%s', using default '%s'",
config_name,
host_name,
self.default_config
"Threshold config '%s' not found for host '%s', skipping",
name, host_name,
)
return self.threshold_configs.get(self.default_config, {})
# No thresholds configured
return {}
else:
result.update(raw)
return result
def check_value(
self,