per-client threshold config

This commit is contained in:
Andreas Wrede
2026-04-01 15:22:42 -04:00
parent 079e84f729
commit 090d341244
7 changed files with 873 additions and 77 deletions
+202
View File
@@ -0,0 +1,202 @@
# ==============================================================================
# Heartbeat Daemon Multi-Threshold Configuration Example
# ==============================================================================
# This file demonstrates the new multi-threshold configuration feature that allows
# different threshold settings for different hosts/clients.
#
# Features:
# - Define multiple named threshold configurations
# - Map specific hosts to specific threshold configurations
# - Set a default configuration for unmapped hosts
# - Backward compatible with single threshold configuration
# ==============================================================================
# Global threshold settings
threshold_renotify_interval: 3600 # Re-notify every hour for ongoing alerts (seconds)
# Optional: Set default threshold config (defaults to "default" if not specified)
default_threshold_config: "default"
# ----------------------------------------------------------------------------
# Multiple Named Threshold Configurations
# ----------------------------------------------------------------------------
# Define multiple threshold configurations with different sensitivity levels
threshold_configs:
# Default configuration - moderate thresholds for most servers
default:
thresholds:
cpu_monitor:
cpu_percent:
warning: 80.0
critical: 90.0
operator: ">"
load_1min:
warning: 4.0
critical: 8.0
operator: ">"
memory_monitor:
percent:
warning: 85.0
critical: 95.0
operator: ">"
disk_monitor:
partitions:
/:
percent:
warning: 85.0
critical: 95.0
operator: ">"
rtt:
# RTT thresholds per remote host
router:
warning: 50.0 # ms
critical: 200.0
server1:
warning: 100.0
critical: 500.0
# High sensitivity configuration - lower thresholds for critical systems
high_sensitivity:
thresholds:
cpu_monitor:
cpu_percent:
warning: 60.0 # Alert earlier
critical: 75.0
operator: ">"
hysteresis: 0.15 # More hysteresis to reduce flapping
load_1min:
warning: 2.0
critical: 4.0
operator: ">"
memory_monitor:
percent:
warning: 75.0 # Alert at lower memory usage
critical: 85.0
operator: ">"
display: "(threshold: {op_symbol} {threshold_value}%, total: {total_gb} GB)"
disk_monitor:
partitions:
/:
percent:
warning: 75.0
critical: 85.0
operator: ">"
/var:
percent:
warning: 80.0
critical: 90.0
operator: ">"
rtt:
router:
warning: 30.0
critical: 100.0
server1:
warning: 50.0
critical: 200.0
# Low sensitivity configuration - higher thresholds for development/test systems
low_sensitivity:
thresholds:
cpu_monitor:
cpu_percent:
warning: 90.0 # Only alert at very high usage
critical: 95.0
operator: ">"
memory_monitor:
percent:
warning: 90.0
critical: 98.0
operator: ">"
disk_monitor:
partitions:
/:
percent:
warning: 90.0
critical: 95.0
operator: ">"
rtt:
router:
warning: 100.0
critical: 500.0
# Production database servers - specialized thresholds
database:
thresholds:
cpu_monitor:
cpu_percent:
warning: 70.0
critical: 85.0
operator: ">"
memory_monitor:
percent:
warning: 90.0 # Databases can use high memory
critical: 97.0
operator: ">"
display: "(threshold: {op_symbol} {threshold_value}%, total: {total_gb} GB, available: {available_gb} GB)"
disk_monitor:
partitions:
/:
percent:
warning: 80.0
critical: 90.0
operator: ">"
/var/lib/mysql: # Database data partition
percent:
warning: 75.0 # Alert earlier for DB partition
critical: 85.0
operator: ">"
rtt:
router:
warning: 20.0 # Stricter latency requirements
critical: 50.0
# ----------------------------------------------------------------------------
# Host to Threshold Configuration Mapping
# ----------------------------------------------------------------------------
# Map specific hosts to specific threshold configurations
# Hosts not listed here will use the default_threshold_config
host_threshold_mapping:
# Critical production servers
prod-web-01: high_sensitivity
prod-web-02: high_sensitivity
prod-api-01: high_sensitivity
# Database servers
prod-db-01: database
prod-db-02: database
prod-db-replica: database
# Development and test systems
dev-server-01: low_sensitivity
dev-server-02: low_sensitivity
test-server-01: low_sensitivity
test-server-02: low_sensitivity
# Everything else uses 'default' (no need to list explicitly)
# ----------------------------------------------------------------------------
# Backward Compatibility Example
# ----------------------------------------------------------------------------
# The old single threshold format is still supported:
# Just use 'thresholds:' directly without 'threshold_configs:'
#
# thresholds:
# cpu_monitor:
# cpu_percent:
# warning: 80.0
# critical: 90.0
#
# This will apply the same thresholds to all hosts.