203 lines
5.8 KiB
YAML
203 lines
5.8 KiB
YAML
# ==============================================================================
|
|
# Heartbeat Daemon Multi-Threshold Configuration Example
|
|
# ==============================================================================
|
|
# This file demonstrates the new multi-threshold configuration feature that allows
|
|
# different threshold settings for different hosts/clients.
|
|
#
|
|
# Features:
|
|
# - Define multiple named threshold configurations
|
|
# - Map specific hosts to specific threshold configurations
|
|
# - Set a default configuration for unmapped hosts
|
|
# - Backward compatible with single threshold configuration
|
|
# ==============================================================================
|
|
|
|
# Global threshold settings
|
|
threshold_renotify_interval: 3600 # Re-notify every hour for ongoing alerts (seconds)
|
|
|
|
# Optional: Set default threshold config (defaults to "default" if not specified)
|
|
default_threshold_config: "default"
|
|
|
|
# ----------------------------------------------------------------------------
|
|
# Multiple Named Threshold Configurations
|
|
# ----------------------------------------------------------------------------
|
|
# Define multiple threshold configurations with different sensitivity levels
|
|
threshold_configs:
|
|
|
|
# Default configuration - moderate thresholds for most servers
|
|
default:
|
|
thresholds:
|
|
cpu_monitor:
|
|
cpu_percent:
|
|
warning: 80.0
|
|
critical: 90.0
|
|
operator: ">"
|
|
load_1min:
|
|
warning: 4.0
|
|
critical: 8.0
|
|
operator: ">"
|
|
|
|
memory_monitor:
|
|
percent:
|
|
warning: 85.0
|
|
critical: 95.0
|
|
operator: ">"
|
|
|
|
disk_monitor:
|
|
partitions:
|
|
/:
|
|
percent:
|
|
warning: 85.0
|
|
critical: 95.0
|
|
operator: ">"
|
|
|
|
rtt:
|
|
# RTT thresholds per remote host
|
|
router:
|
|
warning: 50.0 # ms
|
|
critical: 200.0
|
|
server1:
|
|
warning: 100.0
|
|
critical: 500.0
|
|
|
|
# High sensitivity configuration - lower thresholds for critical systems
|
|
high_sensitivity:
|
|
thresholds:
|
|
cpu_monitor:
|
|
cpu_percent:
|
|
warning: 60.0 # Alert earlier
|
|
critical: 75.0
|
|
operator: ">"
|
|
hysteresis: 0.15 # More hysteresis to reduce flapping
|
|
load_1min:
|
|
warning: 2.0
|
|
critical: 4.0
|
|
operator: ">"
|
|
|
|
memory_monitor:
|
|
percent:
|
|
warning: 75.0 # Alert at lower memory usage
|
|
critical: 85.0
|
|
operator: ">"
|
|
display: "(threshold: {op_symbol} {threshold_value}%, total: {total_gb} GB)"
|
|
|
|
disk_monitor:
|
|
partitions:
|
|
/:
|
|
percent:
|
|
warning: 75.0
|
|
critical: 85.0
|
|
operator: ">"
|
|
/var:
|
|
percent:
|
|
warning: 80.0
|
|
critical: 90.0
|
|
operator: ">"
|
|
|
|
rtt:
|
|
router:
|
|
warning: 30.0
|
|
critical: 100.0
|
|
server1:
|
|
warning: 50.0
|
|
critical: 200.0
|
|
|
|
# Low sensitivity configuration - higher thresholds for development/test systems
|
|
low_sensitivity:
|
|
thresholds:
|
|
cpu_monitor:
|
|
cpu_percent:
|
|
warning: 90.0 # Only alert at very high usage
|
|
critical: 95.0
|
|
operator: ">"
|
|
|
|
memory_monitor:
|
|
percent:
|
|
warning: 90.0
|
|
critical: 98.0
|
|
operator: ">"
|
|
|
|
disk_monitor:
|
|
partitions:
|
|
/:
|
|
percent:
|
|
warning: 90.0
|
|
critical: 95.0
|
|
operator: ">"
|
|
|
|
rtt:
|
|
router:
|
|
warning: 100.0
|
|
critical: 500.0
|
|
|
|
# Production database servers - specialized thresholds
|
|
database:
|
|
thresholds:
|
|
cpu_monitor:
|
|
cpu_percent:
|
|
warning: 70.0
|
|
critical: 85.0
|
|
operator: ">"
|
|
|
|
memory_monitor:
|
|
percent:
|
|
warning: 90.0 # Databases can use high memory
|
|
critical: 97.0
|
|
operator: ">"
|
|
display: "(threshold: {op_symbol} {threshold_value}%, total: {total_gb} GB, available: {available_gb} GB)"
|
|
|
|
disk_monitor:
|
|
partitions:
|
|
/:
|
|
percent:
|
|
warning: 80.0
|
|
critical: 90.0
|
|
operator: ">"
|
|
/var/lib/mysql: # Database data partition
|
|
percent:
|
|
warning: 75.0 # Alert earlier for DB partition
|
|
critical: 85.0
|
|
operator: ">"
|
|
|
|
rtt:
|
|
router:
|
|
warning: 20.0 # Stricter latency requirements
|
|
critical: 50.0
|
|
|
|
# ----------------------------------------------------------------------------
|
|
# Host to Threshold Configuration Mapping
|
|
# ----------------------------------------------------------------------------
|
|
# Map specific hosts to specific threshold configurations
|
|
# Hosts not listed here will use the default_threshold_config
|
|
host_threshold_mapping:
|
|
# Critical production servers
|
|
prod-web-01: high_sensitivity
|
|
prod-web-02: high_sensitivity
|
|
prod-api-01: high_sensitivity
|
|
|
|
# Database servers
|
|
prod-db-01: database
|
|
prod-db-02: database
|
|
prod-db-replica: database
|
|
|
|
# Development and test systems
|
|
dev-server-01: low_sensitivity
|
|
dev-server-02: low_sensitivity
|
|
test-server-01: low_sensitivity
|
|
test-server-02: low_sensitivity
|
|
|
|
# Everything else uses 'default' (no need to list explicitly)
|
|
|
|
# ----------------------------------------------------------------------------
|
|
# Backward Compatibility Example
|
|
# ----------------------------------------------------------------------------
|
|
# The old single threshold format is still supported:
|
|
# Just use 'thresholds:' directly without 'threshold_configs:'
|
|
#
|
|
# thresholds:
|
|
# cpu_monitor:
|
|
# cpu_percent:
|
|
# warning: 80.0
|
|
# critical: 90.0
|
|
#
|
|
# This will apply the same thresholds to all hosts.
|