# ============================================================================== # Heartbeat Daemon Multi-Threshold Configuration Example # ============================================================================== # This file demonstrates the new multi-threshold configuration feature that allows # different threshold settings for different hosts/clients. # # Features: # - Define multiple named threshold configurations # - Map specific hosts to specific threshold configurations # - Set a default configuration for unmapped hosts # - Backward compatible with single threshold configuration # ============================================================================== # Global threshold settings threshold_renotify_interval: 3600 # Re-notify every hour for ongoing alerts (seconds) # Optional: Set default threshold config (defaults to "default" if not specified) default_threshold_config: "default" # ---------------------------------------------------------------------------- # Multiple Named Threshold Configurations # ---------------------------------------------------------------------------- # Define multiple threshold configurations with different sensitivity levels threshold_configs: # Default configuration - moderate thresholds for most servers default: thresholds: cpu_monitor: cpu_percent: warning: 80.0 critical: 90.0 operator: ">" load_1min: warning: 4.0 critical: 8.0 operator: ">" memory_monitor: percent: warning: 85.0 critical: 95.0 operator: ">" disk_monitor: partitions: /: percent: warning: 85.0 critical: 95.0 operator: ">" rtt: # RTT thresholds (applies to all hosts) warning: 50.0 # ms critical: 200.0 # High sensitivity configuration - lower thresholds for critical systems high_sensitivity: thresholds: cpu_monitor: cpu_percent: warning: 60.0 # Alert earlier critical: 75.0 operator: ">" hysteresis: 0.15 # More hysteresis to reduce flapping load_1min: warning: 2.0 critical: 4.0 operator: ">" memory_monitor: percent: warning: 75.0 # Alert at lower memory usage critical: 85.0 operator: ">" display: "(threshold: {op_symbol} {threshold_value}%, total: {total_gb} GB)" disk_monitor: partitions: /: percent: warning: 75.0 critical: 85.0 operator: ">" /var: percent: warning: 80.0 critical: 90.0 operator: ">" rtt: warning: 30.0 critical: 100.0 # Low sensitivity configuration - higher thresholds for development/test systems low_sensitivity: thresholds: cpu_monitor: cpu_percent: warning: 90.0 # Only alert at very high usage critical: 95.0 operator: ">" memory_monitor: percent: warning: 90.0 critical: 98.0 operator: ">" disk_monitor: partitions: /: percent: warning: 90.0 critical: 95.0 operator: ">" rtt: warning: 100.0 critical: 500.0 # Production database servers - specialized thresholds database: thresholds: cpu_monitor: cpu_percent: warning: 70.0 critical: 85.0 operator: ">" memory_monitor: percent: warning: 90.0 # Databases can use high memory critical: 97.0 operator: ">" display: "(threshold: {op_symbol} {threshold_value}%, total: {total_gb} GB, available: {available_gb} GB)" disk_monitor: partitions: /: percent: warning: 80.0 critical: 90.0 operator: ">" /var/lib/mysql: # Database data partition percent: warning: 75.0 # Alert earlier for DB partition critical: 85.0 operator: ">" rtt: warning: 20.0 # Stricter latency requirements critical: 50.0 # ---------------------------------------------------------------------------- # Host to Threshold Configuration Mapping # ---------------------------------------------------------------------------- # Map specific hosts to specific threshold configurations # ---------------------------------------------------------------------------- # Notification Channels # ---------------------------------------------------------------------------- # Define notification providers centrally with their credentials # Each channel has a type (pushover, email, signal, mattermost) and type-specific config notification_channels: # Signal notifications signal_ops: type: signal cli_path: /usr/local/bin/signal-cli user: +1234567890 recipient: +1234567890 signal_oncall: type: signal cli_path: /usr/local/bin/signal-cli user: +1234567890 recipient: +0987654321 # Email notifications email_ops: type: email recipients: [ops@example.com, alerts@example.com] sender: heartbeat@example.com smtp_server: smtp.example.com smtp_port: 587 smtp_user: heartbeat@example.com smtp_password: your-smtp-password # Pushover notifications pushover_urgent: type: pushover token: your-pushover-app-token user: your-pushover-user-key # Mattermost notifications mattermost_devops: type: mattermost host: mattermost.example.com token: your-webhook-token channel: devops-alerts username: heartbeat-bot icon: https://example.com/heartbeat-icon.png # Default notification channels (used if host doesn't specify channels) default_notification_channels: [email_ops] # ---------------------------------------------------------------------------- # Host Definitions (New Unified Format) # ---------------------------------------------------------------------------- # Define hosts with threshold configs, monitoring, DNS, and notification settings hosts: # Critical production servers - high sensitivity, multiple notification channels prod-web-01: threshold_config: high_sensitivity watch: true notification_channels: [signal_oncall, pushover_urgent, email_ops] dyndns: false prod-web-02: threshold_config: high_sensitivity watch: true notification_channels: [signal_oncall, pushover_urgent, email_ops] dyndns: false prod-api-01: threshold_config: high_sensitivity watch: true notification_channels: [signal_oncall, email_ops] dyndns: false # Database servers - database-specific thresholds prod-db-01: threshold_config: database watch: true notification_channels: [signal_ops, email_ops] dyndns: false prod-db-02: threshold_config: database watch: true notification_channels: [signal_ops, email_ops] dyndns: false prod-db-replica: threshold_config: database watch: true notification_channels: [email_ops] # Replica gets email only dyndns: false # Development servers - low sensitivity, minimal notifications dev-server-01: threshold_config: low_sensitivity watch: false # Don't monitor dev servers closely notification_channels: [email_ops] dyndns: false dev-server-02: threshold_config: low_sensitivity watch: false notification_channels: [email_ops] dyndns: false # Test servers test-server-01: threshold_config: low_sensitivity watch: false dyndns: false # No notification channels - uses default_notification_channels # Home server with dynamic DNS home-server: threshold_config: default watch: true notification_channels: [signal_ops] dyndns: true # Update DNS when IP changes # Hosts not listed in the hosts section will use: # - default_threshold_config for thresholds (falls back to "default") # - default_notification_channels for notifications # ---------------------------------------------------------------------------- # Notes on Configuration Structure # ---------------------------------------------------------------------------- # # All configuration is centralized in the hosts section. Each host can specify: # - threshold_config: Name of threshold configuration to use # - watch: Whether to monitor this host actively (send notifications) # - notification_channels: List of channels to use for this host # - dyndns: Whether to update DNS when IP address changes # # Notification channels are defined once at the top level and referenced # by name in host definitions, allowing easy reuse and updates. # # For hosts not explicitly listed, the system will still accept heartbeats # and track their state, but won't apply thresholds or send notifications # unless default settings are configured.