# ============================================================================== # Heartbeat Daemon Multi-Threshold Configuration Example # ============================================================================== # This file demonstrates the new multi-threshold configuration feature that allows # different threshold settings for different hosts/clients. # # Features: # - Define multiple named threshold configurations # - Map specific hosts to specific threshold configurations # - Set a default configuration for unmapped hosts # - Backward compatible with single threshold configuration # ============================================================================== # Global threshold settings threshold_renotify_interval: 3600 # Re-notify every hour for ongoing alerts (seconds) # Optional: Set default threshold config (defaults to "default" if not specified) default_threshold_config: "default" # ---------------------------------------------------------------------------- # Multiple Named Threshold Configurations # ---------------------------------------------------------------------------- # Define multiple threshold configurations with different sensitivity levels threshold_configs: # Default configuration - moderate thresholds for most servers default: thresholds: cpu_monitor: cpu_percent: warning: 80.0 critical: 90.0 operator: ">" load_1min: warning: 4.0 critical: 8.0 operator: ">" memory_monitor: percent: warning: 85.0 critical: 95.0 operator: ">" disk_monitor: partitions: /: percent: warning: 85.0 critical: 95.0 operator: ">" rtt: # RTT thresholds per remote host router: warning: 50.0 # ms critical: 200.0 server1: warning: 100.0 critical: 500.0 # High sensitivity configuration - lower thresholds for critical systems high_sensitivity: thresholds: cpu_monitor: cpu_percent: warning: 60.0 # Alert earlier critical: 75.0 operator: ">" hysteresis: 0.15 # More hysteresis to reduce flapping load_1min: warning: 2.0 critical: 4.0 operator: ">" memory_monitor: percent: warning: 75.0 # Alert at lower memory usage critical: 85.0 operator: ">" display: "(threshold: {op_symbol} {threshold_value}%, total: {total_gb} GB)" disk_monitor: partitions: /: percent: warning: 75.0 critical: 85.0 operator: ">" /var: percent: warning: 80.0 critical: 90.0 operator: ">" rtt: router: warning: 30.0 critical: 100.0 server1: warning: 50.0 critical: 200.0 # Low sensitivity configuration - higher thresholds for development/test systems low_sensitivity: thresholds: cpu_monitor: cpu_percent: warning: 90.0 # Only alert at very high usage critical: 95.0 operator: ">" memory_monitor: percent: warning: 90.0 critical: 98.0 operator: ">" disk_monitor: partitions: /: percent: warning: 90.0 critical: 95.0 operator: ">" rtt: router: warning: 100.0 critical: 500.0 # Production database servers - specialized thresholds database: thresholds: cpu_monitor: cpu_percent: warning: 70.0 critical: 85.0 operator: ">" memory_monitor: percent: warning: 90.0 # Databases can use high memory critical: 97.0 operator: ">" display: "(threshold: {op_symbol} {threshold_value}%, total: {total_gb} GB, available: {available_gb} GB)" disk_monitor: partitions: /: percent: warning: 80.0 critical: 90.0 operator: ">" /var/lib/mysql: # Database data partition percent: warning: 75.0 # Alert earlier for DB partition critical: 85.0 operator: ">" rtt: router: warning: 20.0 # Stricter latency requirements critical: 50.0 # ---------------------------------------------------------------------------- # Host to Threshold Configuration Mapping # ---------------------------------------------------------------------------- # Map specific hosts to specific threshold configurations # Hosts not listed here will use the default_threshold_config host_threshold_mapping: # Critical production servers prod-web-01: high_sensitivity prod-web-02: high_sensitivity prod-api-01: high_sensitivity # Database servers prod-db-01: database prod-db-02: database prod-db-replica: database # Development and test systems dev-server-01: low_sensitivity dev-server-02: low_sensitivity test-server-01: low_sensitivity test-server-02: low_sensitivity # Everything else uses 'default' (no need to list explicitly) # ---------------------------------------------------------------------------- # Backward Compatibility Example # ---------------------------------------------------------------------------- # The old single threshold format is still supported: # Just use 'thresholds:' directly without 'threshold_configs:' # # thresholds: # cpu_monitor: # cpu_percent: # warning: 80.0 # critical: 90.0 # # This will apply the same thresholds to all hosts.