per-client threshold config
This commit is contained in:
@@ -0,0 +1,202 @@
|
||||
# ==============================================================================
|
||||
# Heartbeat Daemon Multi-Threshold Configuration Example
|
||||
# ==============================================================================
|
||||
# This file demonstrates the new multi-threshold configuration feature that allows
|
||||
# different threshold settings for different hosts/clients.
|
||||
#
|
||||
# Features:
|
||||
# - Define multiple named threshold configurations
|
||||
# - Map specific hosts to specific threshold configurations
|
||||
# - Set a default configuration for unmapped hosts
|
||||
# - Backward compatible with single threshold configuration
|
||||
# ==============================================================================
|
||||
|
||||
# Global threshold settings
|
||||
threshold_renotify_interval: 3600 # Re-notify every hour for ongoing alerts (seconds)
|
||||
|
||||
# Optional: Set default threshold config (defaults to "default" if not specified)
|
||||
default_threshold_config: "default"
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Multiple Named Threshold Configurations
|
||||
# ----------------------------------------------------------------------------
|
||||
# Define multiple threshold configurations with different sensitivity levels
|
||||
threshold_configs:
|
||||
|
||||
# Default configuration - moderate thresholds for most servers
|
||||
default:
|
||||
thresholds:
|
||||
cpu_monitor:
|
||||
cpu_percent:
|
||||
warning: 80.0
|
||||
critical: 90.0
|
||||
operator: ">"
|
||||
load_1min:
|
||||
warning: 4.0
|
||||
critical: 8.0
|
||||
operator: ">"
|
||||
|
||||
memory_monitor:
|
||||
percent:
|
||||
warning: 85.0
|
||||
critical: 95.0
|
||||
operator: ">"
|
||||
|
||||
disk_monitor:
|
||||
partitions:
|
||||
/:
|
||||
percent:
|
||||
warning: 85.0
|
||||
critical: 95.0
|
||||
operator: ">"
|
||||
|
||||
rtt:
|
||||
# RTT thresholds per remote host
|
||||
router:
|
||||
warning: 50.0 # ms
|
||||
critical: 200.0
|
||||
server1:
|
||||
warning: 100.0
|
||||
critical: 500.0
|
||||
|
||||
# High sensitivity configuration - lower thresholds for critical systems
|
||||
high_sensitivity:
|
||||
thresholds:
|
||||
cpu_monitor:
|
||||
cpu_percent:
|
||||
warning: 60.0 # Alert earlier
|
||||
critical: 75.0
|
||||
operator: ">"
|
||||
hysteresis: 0.15 # More hysteresis to reduce flapping
|
||||
load_1min:
|
||||
warning: 2.0
|
||||
critical: 4.0
|
||||
operator: ">"
|
||||
|
||||
memory_monitor:
|
||||
percent:
|
||||
warning: 75.0 # Alert at lower memory usage
|
||||
critical: 85.0
|
||||
operator: ">"
|
||||
display: "(threshold: {op_symbol} {threshold_value}%, total: {total_gb} GB)"
|
||||
|
||||
disk_monitor:
|
||||
partitions:
|
||||
/:
|
||||
percent:
|
||||
warning: 75.0
|
||||
critical: 85.0
|
||||
operator: ">"
|
||||
/var:
|
||||
percent:
|
||||
warning: 80.0
|
||||
critical: 90.0
|
||||
operator: ">"
|
||||
|
||||
rtt:
|
||||
router:
|
||||
warning: 30.0
|
||||
critical: 100.0
|
||||
server1:
|
||||
warning: 50.0
|
||||
critical: 200.0
|
||||
|
||||
# Low sensitivity configuration - higher thresholds for development/test systems
|
||||
low_sensitivity:
|
||||
thresholds:
|
||||
cpu_monitor:
|
||||
cpu_percent:
|
||||
warning: 90.0 # Only alert at very high usage
|
||||
critical: 95.0
|
||||
operator: ">"
|
||||
|
||||
memory_monitor:
|
||||
percent:
|
||||
warning: 90.0
|
||||
critical: 98.0
|
||||
operator: ">"
|
||||
|
||||
disk_monitor:
|
||||
partitions:
|
||||
/:
|
||||
percent:
|
||||
warning: 90.0
|
||||
critical: 95.0
|
||||
operator: ">"
|
||||
|
||||
rtt:
|
||||
router:
|
||||
warning: 100.0
|
||||
critical: 500.0
|
||||
|
||||
# Production database servers - specialized thresholds
|
||||
database:
|
||||
thresholds:
|
||||
cpu_monitor:
|
||||
cpu_percent:
|
||||
warning: 70.0
|
||||
critical: 85.0
|
||||
operator: ">"
|
||||
|
||||
memory_monitor:
|
||||
percent:
|
||||
warning: 90.0 # Databases can use high memory
|
||||
critical: 97.0
|
||||
operator: ">"
|
||||
display: "(threshold: {op_symbol} {threshold_value}%, total: {total_gb} GB, available: {available_gb} GB)"
|
||||
|
||||
disk_monitor:
|
||||
partitions:
|
||||
/:
|
||||
percent:
|
||||
warning: 80.0
|
||||
critical: 90.0
|
||||
operator: ">"
|
||||
/var/lib/mysql: # Database data partition
|
||||
percent:
|
||||
warning: 75.0 # Alert earlier for DB partition
|
||||
critical: 85.0
|
||||
operator: ">"
|
||||
|
||||
rtt:
|
||||
router:
|
||||
warning: 20.0 # Stricter latency requirements
|
||||
critical: 50.0
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Host to Threshold Configuration Mapping
|
||||
# ----------------------------------------------------------------------------
|
||||
# Map specific hosts to specific threshold configurations
|
||||
# Hosts not listed here will use the default_threshold_config
|
||||
host_threshold_mapping:
|
||||
# Critical production servers
|
||||
prod-web-01: high_sensitivity
|
||||
prod-web-02: high_sensitivity
|
||||
prod-api-01: high_sensitivity
|
||||
|
||||
# Database servers
|
||||
prod-db-01: database
|
||||
prod-db-02: database
|
||||
prod-db-replica: database
|
||||
|
||||
# Development and test systems
|
||||
dev-server-01: low_sensitivity
|
||||
dev-server-02: low_sensitivity
|
||||
test-server-01: low_sensitivity
|
||||
test-server-02: low_sensitivity
|
||||
|
||||
# Everything else uses 'default' (no need to list explicitly)
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Backward Compatibility Example
|
||||
# ----------------------------------------------------------------------------
|
||||
# The old single threshold format is still supported:
|
||||
# Just use 'thresholds:' directly without 'threshold_configs:'
|
||||
#
|
||||
# thresholds:
|
||||
# cpu_monitor:
|
||||
# cpu_percent:
|
||||
# warning: 80.0
|
||||
# critical: 90.0
|
||||
#
|
||||
# This will apply the same thresholds to all hosts.
|
||||
Reference in New Issue
Block a user