Fix rtt, including bug in time compute
This commit is contained in:
@@ -51,13 +51,9 @@ threshold_configs:
|
||||
operator: ">"
|
||||
|
||||
rtt:
|
||||
# RTT thresholds per remote host
|
||||
router:
|
||||
warning: 50.0 # ms
|
||||
critical: 200.0
|
||||
server1:
|
||||
warning: 100.0
|
||||
critical: 500.0
|
||||
# RTT thresholds (applies to all hosts)
|
||||
warning: 50.0 # ms
|
||||
critical: 200.0
|
||||
|
||||
# High sensitivity configuration - lower thresholds for critical systems
|
||||
high_sensitivity:
|
||||
@@ -94,12 +90,8 @@ threshold_configs:
|
||||
operator: ">"
|
||||
|
||||
rtt:
|
||||
router:
|
||||
warning: 30.0
|
||||
critical: 100.0
|
||||
server1:
|
||||
warning: 50.0
|
||||
critical: 200.0
|
||||
warning: 30.0
|
||||
critical: 100.0
|
||||
|
||||
# Low sensitivity configuration - higher thresholds for development/test systems
|
||||
low_sensitivity:
|
||||
@@ -125,9 +117,8 @@ threshold_configs:
|
||||
operator: ">"
|
||||
|
||||
rtt:
|
||||
router:
|
||||
warning: 100.0
|
||||
critical: 500.0
|
||||
warning: 100.0
|
||||
critical: 500.0
|
||||
|
||||
# Production database servers - specialized thresholds
|
||||
database:
|
||||
@@ -159,44 +150,147 @@ threshold_configs:
|
||||
operator: ">"
|
||||
|
||||
rtt:
|
||||
router:
|
||||
warning: 20.0 # Stricter latency requirements
|
||||
critical: 50.0
|
||||
warning: 20.0 # Stricter latency requirements
|
||||
critical: 50.0
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Host to Threshold Configuration Mapping
|
||||
# ----------------------------------------------------------------------------
|
||||
# Map specific hosts to specific threshold configurations
|
||||
# Hosts not listed here will use the default_threshold_config
|
||||
host_threshold_mapping:
|
||||
# Critical production servers
|
||||
prod-web-01: high_sensitivity
|
||||
prod-web-02: high_sensitivity
|
||||
prod-api-01: high_sensitivity
|
||||
# ----------------------------------------------------------------------------
|
||||
# Notification Channels
|
||||
# ----------------------------------------------------------------------------
|
||||
# Define notification providers centrally with their credentials
|
||||
# Each channel has a type (pushover, email, signal, mattermost) and type-specific config
|
||||
notification_channels:
|
||||
# Signal notifications
|
||||
signal_ops:
|
||||
type: signal
|
||||
cli_path: /usr/local/bin/signal-cli
|
||||
user: +1234567890
|
||||
recipient: +1234567890
|
||||
|
||||
# Database servers
|
||||
prod-db-01: database
|
||||
prod-db-02: database
|
||||
prod-db-replica: database
|
||||
signal_oncall:
|
||||
type: signal
|
||||
cli_path: /usr/local/bin/signal-cli
|
||||
user: +1234567890
|
||||
recipient: +0987654321
|
||||
|
||||
# Development and test systems
|
||||
dev-server-01: low_sensitivity
|
||||
dev-server-02: low_sensitivity
|
||||
test-server-01: low_sensitivity
|
||||
test-server-02: low_sensitivity
|
||||
# Email notifications
|
||||
email_ops:
|
||||
type: email
|
||||
recipients: [ops@example.com, alerts@example.com]
|
||||
sender: heartbeat@example.com
|
||||
smtp_server: smtp.example.com
|
||||
smtp_port: 587
|
||||
smtp_user: heartbeat@example.com
|
||||
smtp_password: your-smtp-password
|
||||
|
||||
# Everything else uses 'default' (no need to list explicitly)
|
||||
# Pushover notifications
|
||||
pushover_urgent:
|
||||
type: pushover
|
||||
token: your-pushover-app-token
|
||||
user: your-pushover-user-key
|
||||
|
||||
# Mattermost notifications
|
||||
mattermost_devops:
|
||||
type: mattermost
|
||||
host: mattermost.example.com
|
||||
token: your-webhook-token
|
||||
channel: devops-alerts
|
||||
username: heartbeat-bot
|
||||
icon: https://example.com/heartbeat-icon.png
|
||||
|
||||
# Default notification channels (used if host doesn't specify channels)
|
||||
default_notification_channels: [email_ops]
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Backward Compatibility Example
|
||||
# Host Definitions (New Unified Format)
|
||||
# ----------------------------------------------------------------------------
|
||||
# The old single threshold format is still supported:
|
||||
# Just use 'thresholds:' directly without 'threshold_configs:'
|
||||
# Define hosts with threshold configs, monitoring, DNS, and notification settings
|
||||
hosts:
|
||||
# Critical production servers - high sensitivity, multiple notification channels
|
||||
prod-web-01:
|
||||
threshold_config: high_sensitivity
|
||||
watch: true
|
||||
notification_channels: [signal_oncall, pushover_urgent, email_ops]
|
||||
dyndns: false
|
||||
|
||||
prod-web-02:
|
||||
threshold_config: high_sensitivity
|
||||
watch: true
|
||||
notification_channels: [signal_oncall, pushover_urgent, email_ops]
|
||||
dyndns: false
|
||||
|
||||
prod-api-01:
|
||||
threshold_config: high_sensitivity
|
||||
watch: true
|
||||
notification_channels: [signal_oncall, email_ops]
|
||||
dyndns: false
|
||||
|
||||
# Database servers - database-specific thresholds
|
||||
prod-db-01:
|
||||
threshold_config: database
|
||||
watch: true
|
||||
notification_channels: [signal_ops, email_ops]
|
||||
dyndns: false
|
||||
|
||||
prod-db-02:
|
||||
threshold_config: database
|
||||
watch: true
|
||||
notification_channels: [signal_ops, email_ops]
|
||||
dyndns: false
|
||||
|
||||
prod-db-replica:
|
||||
threshold_config: database
|
||||
watch: true
|
||||
notification_channels: [email_ops] # Replica gets email only
|
||||
dyndns: false
|
||||
|
||||
# Development servers - low sensitivity, minimal notifications
|
||||
dev-server-01:
|
||||
threshold_config: low_sensitivity
|
||||
watch: false # Don't monitor dev servers closely
|
||||
notification_channels: [email_ops]
|
||||
dyndns: false
|
||||
|
||||
dev-server-02:
|
||||
threshold_config: low_sensitivity
|
||||
watch: false
|
||||
notification_channels: [email_ops]
|
||||
dyndns: false
|
||||
|
||||
# Test servers
|
||||
test-server-01:
|
||||
threshold_config: low_sensitivity
|
||||
watch: false
|
||||
dyndns: false
|
||||
# No notification channels - uses default_notification_channels
|
||||
|
||||
# Home server with dynamic DNS
|
||||
home-server:
|
||||
threshold_config: default
|
||||
watch: true
|
||||
notification_channels: [signal_ops]
|
||||
dyndns: true # Update DNS when IP changes
|
||||
|
||||
# Hosts not listed in the hosts section will use:
|
||||
# - default_threshold_config for thresholds (falls back to "default")
|
||||
# - default_notification_channels for notifications
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Notes on Configuration Structure
|
||||
# ----------------------------------------------------------------------------
|
||||
#
|
||||
# All configuration is centralized in the hosts section. Each host can specify:
|
||||
# - threshold_config: Name of threshold configuration to use
|
||||
# - watch: Whether to monitor this host actively (send notifications)
|
||||
# - notification_channels: List of channels to use for this host
|
||||
# - dyndns: Whether to update DNS when IP address changes
|
||||
#
|
||||
# thresholds:
|
||||
# cpu_monitor:
|
||||
# cpu_percent:
|
||||
# warning: 80.0
|
||||
# critical: 90.0
|
||||
# Notification channels are defined once at the top level and referenced
|
||||
# by name in host definitions, allowing easy reuse and updates.
|
||||
#
|
||||
# This will apply the same thresholds to all hosts.
|
||||
# For hosts not explicitly listed, the system will still accept heartbeats
|
||||
# and track their state, but won't apply thresholds or send notifications
|
||||
# unless default settings are configured.
|
||||
|
||||
Reference in New Issue
Block a user