Fix rtt, including bug in time compute
This commit is contained in:
@@ -7,33 +7,125 @@ logfile: "/home/andreas/logs/heartbeat/heartbeat.log"
|
||||
logfmt: "msg"
|
||||
grace: 40
|
||||
interval: 10
|
||||
watchhosts:
|
||||
# "localhost":
|
||||
# "haschloss" :
|
||||
# "cotgate":
|
||||
"wentworth":
|
||||
notify: +4915123456789
|
||||
src: "signal"
|
||||
"y":
|
||||
notify: +4915123456789
|
||||
src: "signal"
|
||||
"winter":
|
||||
notify: +14168226179
|
||||
src: "signal"
|
||||
dyndnshosts: {"haschloss", "wayback", "wertvoll", "weekend", "cotgate", "rvgate", "draper", "eris"}
|
||||
|
||||
# Notification Channels - Define notification providers centrally
|
||||
# Each channel has a type (pushover, email, signal, mattermost) and type-specific configuration
|
||||
notification_channels:
|
||||
|
||||
pushover_standard:
|
||||
type: pushover
|
||||
token: ac7NLX2rPjXFareeDgLpXNoDf4iFmf
|
||||
user: uDhH33UjQQDYtNzJb1ThRiWb9ingGK
|
||||
|
||||
signal_andreas:
|
||||
type: signal
|
||||
cli_path: /usr/local/bin/signal-cli
|
||||
user: +14168226179
|
||||
recipient: +14168226179
|
||||
|
||||
email_andreas:
|
||||
type: email
|
||||
recipients: [aew.hbd.notify@wrede.ca]
|
||||
sender: aew.hbd@wrede.ca
|
||||
smtp_server: smtp.fastmail.com
|
||||
smtp_port: 587
|
||||
smtp_user: andreas@wrede.ca
|
||||
smtp_password: pvtvefyp5gbhnch2
|
||||
|
||||
# Example additional channels (commented out)
|
||||
# pushover_urgent:
|
||||
# type: pushover
|
||||
# token: your-app-token
|
||||
# user: your-user-key
|
||||
#
|
||||
mattermost_devops:
|
||||
type: mattermost
|
||||
host: mattermost.example.com
|
||||
token: webhook-token
|
||||
channel: devops-alerts
|
||||
username: heartbeat-bot
|
||||
icon: https://example.com/heartbeat-icon.png
|
||||
|
||||
# Default notification channels (used if host doesn't specify channels)
|
||||
default_notification_channels: [pushover_standard]
|
||||
|
||||
# Host definitions - combines threshold mapping, watch status, DNS updates, and notifications
|
||||
hosts:
|
||||
wentworth:
|
||||
threshold_config: default
|
||||
watch: true
|
||||
notification_channels: [pushover_standard]
|
||||
dyndns: false
|
||||
|
||||
y:
|
||||
threshold_config: default
|
||||
watch: true
|
||||
notification_channels: [pushover_standard]
|
||||
dyndns: false
|
||||
|
||||
winter:
|
||||
threshold_config: default
|
||||
watch: true
|
||||
notification_channels: [pushover_standard]
|
||||
dyndns: false
|
||||
|
||||
wally:
|
||||
threshold_config: freebsd_server
|
||||
watch: false
|
||||
notification_channels: [pushover_standard]
|
||||
dyndns: false
|
||||
|
||||
eris:
|
||||
threshold_config: truenas_server
|
||||
watch: false
|
||||
notification_channels: [pushover_standard]
|
||||
dyndns: false
|
||||
|
||||
haschloss:
|
||||
threshold_config: default
|
||||
watch: false
|
||||
dyndns: true
|
||||
|
||||
wayback:
|
||||
threshold_config: default
|
||||
watch: false
|
||||
notification_channels: [pushover_standard]
|
||||
dyndns: true
|
||||
|
||||
wertvoll:
|
||||
threshold_config: default
|
||||
watch: false
|
||||
notification_channels: [pushover_standard]
|
||||
dyndns: true
|
||||
|
||||
weekend:
|
||||
threshold_config: default
|
||||
watch: false
|
||||
notification_channels: [pushover_standard]
|
||||
dyndns: true
|
||||
|
||||
cotgate:
|
||||
threshold_config: default
|
||||
watch: false
|
||||
dyndns: true
|
||||
|
||||
rvgate:
|
||||
threshold_config: default
|
||||
watch: false
|
||||
dyndns: true
|
||||
|
||||
draper:
|
||||
threshold_config: default
|
||||
watch: false
|
||||
notification_channels: [pushover_standard]
|
||||
dyndns: true
|
||||
|
||||
# Hosts to drop/ignore
|
||||
drophosts: {"unknown", "wookie15", "wort"}
|
||||
|
||||
nsupdate_bin: "/usr/local/bin/nsupdate"
|
||||
pushover_token: "ac7NLX2rPjXFareeDgLpXNoDf4iFmf"
|
||||
pushover_user: "uDhH33UjQQDYtNzJb1ThRiWb9ingGK"
|
||||
pushsrv: "pushover"
|
||||
|
||||
dyndomains: {"wrede.org"}
|
||||
toemail: ["aew.hbd.notify@wrede.ca"]
|
||||
fromemail: "aew.hbd@wrede.ca"
|
||||
smtpserver: "smtp.fastmail.com"
|
||||
smtpuser: "andreas@wrede.ca"
|
||||
smtppassword: "r8psra6wj6gcakkp"
|
||||
smtpport: 587
|
||||
|
||||
ws_port: 50005
|
||||
# wss_port: 50006 # Commented out - use plain WebSocket instead of secure WSS
|
||||
@@ -49,15 +141,16 @@ journal_file: messages.journal # Base filename
|
||||
journal_max_size: 104857600 # Max size (100MB default)
|
||||
journal_max_backups: 10 # Number of backups to keep
|
||||
|
||||
thresholds:
|
||||
threshold_configs:
|
||||
default:
|
||||
thresholds:
|
||||
cpu_monitor:
|
||||
cpu_percent:
|
||||
warning: 80.0
|
||||
critical: 90.0
|
||||
memory_monitor:
|
||||
percent:
|
||||
warning: 3.0
|
||||
warning: 85.0
|
||||
critical: 95.0
|
||||
disk_monitor:
|
||||
partitions:
|
||||
@@ -66,12 +159,12 @@ thresholds:
|
||||
warning: 85.0
|
||||
critical: 90.0
|
||||
rtt:
|
||||
y:
|
||||
warning: 30
|
||||
critical: 250.0
|
||||
|
||||
|
||||
freebsd_server:
|
||||
thresholds:
|
||||
cpu_monitor:
|
||||
cpu_percent:
|
||||
warning: 80.0
|
||||
@@ -111,11 +204,11 @@ thresholds:
|
||||
critical: 2
|
||||
operator: ">="
|
||||
rtt:
|
||||
y:
|
||||
warning: 30
|
||||
critical: 250.0
|
||||
|
||||
truenas_server:
|
||||
thresholds:
|
||||
cpu_monitor:
|
||||
cpu_percent:
|
||||
warning: 80.0
|
||||
@@ -155,14 +248,7 @@ thresholds:
|
||||
critical: 2
|
||||
operator: ">="
|
||||
rtt:
|
||||
y:
|
||||
warning: 30
|
||||
critical: 250.0
|
||||
|
||||
|
||||
host_threshold_mapping:
|
||||
# Critical production servers
|
||||
|
||||
wally: freebsd_server
|
||||
eris: truenas_server
|
||||
|
||||
|
||||
Binary file not shown.
@@ -0,0 +1,533 @@
|
||||
# Notification System
|
||||
|
||||
## Overview
|
||||
|
||||
The Heartbeat Monitoring System includes a flexible notification system that can send alerts through multiple channels including Email, Pushover, Signal, and Mattermost. The system supports centralized channel definitions with per-host routing, allowing fine-grained control over notification delivery.
|
||||
|
||||
## Architecture
|
||||
|
||||
### Components
|
||||
|
||||
1. **Notification Channels** (`notification_channels` in config)
|
||||
- Centralized definitions of notification providers
|
||||
- Each channel has a type and type-specific credentials
|
||||
- Reusable across multiple hosts
|
||||
|
||||
2. **Channel Dispatcher** (`hbd/server/notify.py`)
|
||||
- `pushmsg_for_host(hostname, message)`: Main entry point for host-specific notifications
|
||||
- `_dispatch_to_channel(channel_name, channel_config, message)`: Routes to specific provider
|
||||
- Provider functions: `pushover()`, `pushsignal()`, `pushmattermost()`, `send_email()`
|
||||
|
||||
3. **Configuration Utilities** (`hbd/server/config.py`)
|
||||
- `get_notification_channels_for_host(config, hostname)`: Retrieves channel names for a host
|
||||
- `get_notification_channels_config(config, hostname)`: Retrieves full channel configurations
|
||||
- `get_channel_config(config, channel_name)`: Gets configuration for a specific channel
|
||||
|
||||
4. **Integration Points**
|
||||
- **Threshold alerts**: `threshold.py` calls `notify_mod.pushmsg_for_host()`
|
||||
- **Heartbeat events**: `udp.py` calls `notify_mod.pushmsg_for_host()` for boot/shutdown/overdue
|
||||
- **Custom alerts**: Any code can call `notify_mod.pushmsg_for_host(hostname, message)`
|
||||
|
||||
## Configuration
|
||||
|
||||
### Centralized Channel Definitions
|
||||
|
||||
Define notification channels once in your configuration file:
|
||||
|
||||
```yaml
|
||||
notification_channels:
|
||||
# Signal notifications
|
||||
signal_ops:
|
||||
type: signal
|
||||
cli_path: /usr/local/bin/signal-cli
|
||||
user: +1234567890 # Your Signal number
|
||||
recipient: +1234567890 # Recipient number
|
||||
|
||||
signal_oncall:
|
||||
type: signal
|
||||
cli_path: /usr/local/bin/signal-cli
|
||||
user: +1234567890
|
||||
recipient: +0987654321 # Different recipient
|
||||
|
||||
# Email notifications
|
||||
email_ops:
|
||||
type: email
|
||||
recipients:
|
||||
- ops@example.com
|
||||
- alerts@example.com
|
||||
sender: heartbeat@example.com
|
||||
smtp_server: smtp.example.com
|
||||
smtp_port: 587
|
||||
smtp_user: heartbeat@example.com
|
||||
smtp_password: your-smtp-password
|
||||
|
||||
email_devteam:
|
||||
type: email
|
||||
recipients: [dev-alerts@example.com]
|
||||
sender: heartbeat-dev@example.com
|
||||
smtp_server: smtp.example.com
|
||||
smtp_port: 587
|
||||
smtp_user: heartbeat-dev@example.com
|
||||
smtp_password: your-smtp-password
|
||||
|
||||
# Pushover notifications
|
||||
pushover_urgent:
|
||||
type: pushover
|
||||
token: your-pushover-app-token
|
||||
user: your-pushover-user-key
|
||||
|
||||
pushover_normal:
|
||||
type: pushover
|
||||
token: your-pushover-app-token
|
||||
user: another-user-key
|
||||
|
||||
# Mattermost notifications
|
||||
mattermost_devops:
|
||||
type: mattermost
|
||||
host: mattermost.example.com
|
||||
token: your-webhook-token
|
||||
channel: devops-alerts
|
||||
username: heartbeat-bot
|
||||
icon: https://example.com/heartbeat-icon.png
|
||||
```
|
||||
|
||||
### Default Notification Channels
|
||||
|
||||
Specify default channels for hosts that don't have specific channel assignments:
|
||||
|
||||
```yaml
|
||||
default_notification_channels:
|
||||
- email_ops
|
||||
- mattermost_devops
|
||||
```
|
||||
|
||||
Hosts without `notification_channels` defined will use these defaults.
|
||||
|
||||
### Per-Host Channel Assignment
|
||||
|
||||
Assign specific channels to each host in the `hosts` section:
|
||||
|
||||
```yaml
|
||||
hosts:
|
||||
# Critical production web server - multiple channels for redundancy
|
||||
prod-web-01:
|
||||
threshold_config: high_sensitivity
|
||||
watch: true
|
||||
notification_channels:
|
||||
- signal_oncall # Immediate mobile notification
|
||||
- pushover_urgent # Secondary mobile notification
|
||||
- email_ops # Email for record keeping
|
||||
dyndns: false
|
||||
|
||||
# Database server - ops team notifications only
|
||||
prod-db-01:
|
||||
threshold_config: database
|
||||
watch: true
|
||||
notification_channels:
|
||||
- signal_ops
|
||||
- email_ops
|
||||
dyndns: false
|
||||
|
||||
# Development server - email only, no urgent notifications
|
||||
dev-server-01:
|
||||
threshold_config: low_sensitivity
|
||||
watch: false
|
||||
notification_channels:
|
||||
- email_devteam
|
||||
dyndns: false
|
||||
|
||||
# Test server - uses default_notification_channels
|
||||
test-server-01:
|
||||
threshold_config: default
|
||||
watch: false
|
||||
dyndns: false
|
||||
# No notification_channels specified = uses default_notification_channels
|
||||
```
|
||||
|
||||
## Channel Types
|
||||
|
||||
### Email
|
||||
|
||||
Sends notifications via SMTP.
|
||||
|
||||
**Configuration fields:**
|
||||
```yaml
|
||||
type: email
|
||||
recipients: [email1@example.com, email2@example.com] # Required: List of recipients
|
||||
sender: heartbeat@example.com # Required: From address
|
||||
smtp_server: smtp.example.com # Required: SMTP server hostname
|
||||
smtp_port: 587 # Optional: Default 587
|
||||
smtp_user: heartbeat@example.com # Optional: For authenticated SMTP
|
||||
smtp_password: your-password # Optional: For authenticated SMTP
|
||||
```
|
||||
|
||||
**Features:**
|
||||
- Supports multiple recipients
|
||||
- TLS/STARTTLS support on port 587
|
||||
- Authenticated and unauthenticated SMTP
|
||||
|
||||
**Example:**
|
||||
```yaml
|
||||
notification_channels:
|
||||
email_critical:
|
||||
type: email
|
||||
recipients: [admin@example.com, oncall@example.com]
|
||||
sender: alerts@example.com
|
||||
smtp_server: smtp.fastmail.com
|
||||
smtp_port: 587
|
||||
smtp_user: alerts@example.com
|
||||
smtp_password: app-specific-password
|
||||
```
|
||||
|
||||
### Pushover
|
||||
|
||||
Sends push notifications to mobile devices via Pushover API.
|
||||
|
||||
**Configuration fields:**
|
||||
```yaml
|
||||
type: pushover
|
||||
token: your-application-token # Required: Your Pushover app token
|
||||
user: your-user-key # Required: Recipient's user key
|
||||
```
|
||||
|
||||
**Features:**
|
||||
- Instant mobile push notifications
|
||||
- Works on iOS and Android
|
||||
- Supports delivery confirmations
|
||||
|
||||
**Setup:**
|
||||
1. Create a Pushover account at https://pushover.net
|
||||
2. Create an application to get your app token
|
||||
3. Note your user key from your account dashboard
|
||||
|
||||
**Example:**
|
||||
```yaml
|
||||
notification_channels:
|
||||
pushover_admin:
|
||||
type: pushover
|
||||
token: azGDORePK8gMaC0QOYAMyEEuzJnyUi
|
||||
user: uQiRzpo4DXghDmr9QzzfQu27cmVRsG
|
||||
```
|
||||
|
||||
### Signal
|
||||
|
||||
Sends notifications via Signal messenger using signal-cli.
|
||||
|
||||
**Configuration fields:**
|
||||
```yaml
|
||||
type: signal
|
||||
cli_path: /usr/local/bin/signal-cli # Optional: Path to signal-cli binary
|
||||
user: +1234567890 # Required: Your Signal phone number
|
||||
recipient: +0987654321 # Required: Recipient phone number
|
||||
```
|
||||
|
||||
**Prerequisites:**
|
||||
1. Install signal-cli: https://github.com/AsamK/signal-cli
|
||||
2. Register signal-cli with your phone number:
|
||||
```bash
|
||||
signal-cli -u +1234567890 register
|
||||
signal-cli -u +1234567890 verify CODE
|
||||
```
|
||||
3. Ensure signal-cli is in PATH or specify full path in config
|
||||
|
||||
**Features:**
|
||||
- End-to-end encrypted messaging
|
||||
- Works without phone being online
|
||||
- No API fees or rate limits
|
||||
|
||||
**Example:**
|
||||
```yaml
|
||||
notification_channels:
|
||||
signal_admin:
|
||||
type: signal
|
||||
cli_path: /usr/local/bin/signal-cli
|
||||
user: +12025551234
|
||||
recipient: +12025559999
|
||||
```
|
||||
|
||||
### Mattermost
|
||||
|
||||
Sends notifications to Mattermost team chat via incoming webhooks.
|
||||
|
||||
**Configuration fields:**
|
||||
```yaml
|
||||
type: mattermost
|
||||
host: mattermost.example.com # Required: Mattermost server hostname
|
||||
token: your-webhook-token # Required: Incoming webhook token
|
||||
channel: channel-name # Required: Target channel name
|
||||
username: heartbeat-bot # Optional: Bot display name
|
||||
icon: https://example.com/icon.png # Optional: Bot icon URL
|
||||
```
|
||||
|
||||
**Prerequisites:**
|
||||
1. Enable incoming webhooks in Mattermost
|
||||
2. Create an incoming webhook for your team
|
||||
3. Note the webhook token from the webhook URL
|
||||
|
||||
**Features:**
|
||||
- Team-wide visibility
|
||||
- Rich formatting support
|
||||
- Message threading
|
||||
|
||||
**Example:**
|
||||
```yaml
|
||||
notification_channels:
|
||||
mattermost_ops:
|
||||
type: mattermost
|
||||
host: chat.example.com
|
||||
token: abc123def456ghi789
|
||||
channel: infrastructure-alerts
|
||||
username: heartbeat-monitor
|
||||
icon: https://example.com/heartbeat-icon.png
|
||||
```
|
||||
|
||||
## Notification Events
|
||||
|
||||
The system sends notifications for various events:
|
||||
|
||||
### Threshold Alerts
|
||||
|
||||
When monitored metrics exceed configured thresholds:
|
||||
|
||||
- **State changes**: OK → WARNING, WARNING → CRITICAL, CRITICAL → OK
|
||||
- **Format**: `{LEVEL}: {hostname} - {metric_path} = {value} {threshold_info}`
|
||||
- **Example**: `CRITICAL: prod-web-01 - cpu_monitor.cpu_percent = 95.2 (threshold: > 90.0)`
|
||||
- **Re-notifications**: Periodic reminders for ongoing alerts (default: hourly)
|
||||
|
||||
### Heartbeat Events
|
||||
|
||||
Host lifecycle events:
|
||||
|
||||
- **Host boot**: `{hostname} booted`
|
||||
- **Host shutdown**: `{hostname} {connection_type} shutdown`
|
||||
- **Host recovery**: `{hostname} {connection_type} is back`
|
||||
- **Connection issues**: `{hostname} {message}`
|
||||
- **Host overdue**: `{hostname} {connection_type} overdue`
|
||||
|
||||
Only hosts with `watch: true` send heartbeat event notifications.
|
||||
|
||||
### Custom Alerts
|
||||
|
||||
Application code can send custom notifications:
|
||||
|
||||
```python
|
||||
from hbd.server import notify as notify_mod
|
||||
|
||||
# Send to host-specific channels
|
||||
notify_mod.pushmsg_for_host("prod-web-01", "Custom alert message")
|
||||
|
||||
# Send using global config
|
||||
notify_mod.pushmsg_from_config("Global notification")
|
||||
|
||||
# Send to specific config
|
||||
notify_mod.pushmsg(custom_config_dict, "Targeted notification")
|
||||
```
|
||||
|
||||
## Design Principles
|
||||
|
||||
The notification system follows these core principles:
|
||||
|
||||
- **Centralization**: Define notification providers once, reference them by name
|
||||
- **Flexibility**: Each host can use different channels for different notification needs
|
||||
- **Redundancy**: Critical hosts can specify multiple channels for failover
|
||||
- **Clarity**: Clean separation between channel definition and channel assignment
|
||||
- **Type Safety**: Provider-specific validation at configuration time
|
||||
|
||||
## Best Practices
|
||||
|
||||
### Channel Organization
|
||||
|
||||
- **Create purpose-specific channels**: `email_ops`, `signal_oncall`, `pushover_urgent`
|
||||
- **Separate by team/role**: `email_devteam`, `signal_dbateam`, `mattermost_security`
|
||||
- **Use descriptive names**: Channel names appear in logs and debugging
|
||||
|
||||
### Redundancy
|
||||
|
||||
For critical hosts, use multiple notification channels:
|
||||
|
||||
```yaml
|
||||
hosts:
|
||||
critical-db:
|
||||
notification_channels:
|
||||
- signal_oncall # Primary: Mobile alert
|
||||
- pushover_urgent # Backup: Different mobile platform
|
||||
- email_ops # Tertiary: Email for record-keeping
|
||||
```
|
||||
|
||||
### Notification Fatigue Prevention
|
||||
|
||||
- **Use `watch: false`** for non-critical hosts
|
||||
- **Configure appropriate thresholds** to avoid false positives
|
||||
- **Set different channels for different severities**
|
||||
- **Use `default_notification_channels`** for baseline, add more for critical systems
|
||||
|
||||
### Security
|
||||
|
||||
- **Protect credentials**: Use file permissions to protect config files with passwords/tokens
|
||||
- **Rotate tokens**: Periodically rotate API tokens and passwords
|
||||
- **Use app-specific passwords**: For email, use app-specific passwords instead of main account password
|
||||
- **Separate accounts**: Consider separate notification accounts for different environments (prod vs dev)
|
||||
|
||||
### Testing
|
||||
|
||||
Test notification channels before relying on them:
|
||||
|
||||
```bash
|
||||
# Test signal-cli directly
|
||||
signal-cli -u +1234567890 send -m "Test message" +0987654321
|
||||
|
||||
# Test SMTP
|
||||
echo "Test" | mail -s "Test Subject" admin@example.com
|
||||
|
||||
# Test through heartbeat system (Python REPL)
|
||||
from hbd.server import notify as notify_mod, config as config_mod
|
||||
cfg = config_mod.load_config(".hb.yaml")
|
||||
notify_mod.setup(cfg)
|
||||
notify_mod.pushmsg_for_host("test-host", "Test notification")
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Notifications Not Sending
|
||||
|
||||
1. **Check logs**: Look for "Failed to send notification" errors
|
||||
2. **Verify host is watched**: Ensure `watch: true` in host definition
|
||||
3. **Check channel configuration**: Verify credentials and settings
|
||||
4. **Test channel directly**: Use command-line tools to test provider
|
||||
5. **Check network**: Ensure server can reach notification endpoints
|
||||
|
||||
### Signal Issues
|
||||
|
||||
- **signal-cli not found**: Specify full path in `cli_path`
|
||||
- **Not registered**: Run `signal-cli -u +NUMBER register` and verify
|
||||
- **Trust issues**: Run `signal-cli -u +NUMBER receive` to sync trust store
|
||||
- **Recipient not found**: Ensure recipient is in your Signal contacts
|
||||
|
||||
### Email Issues
|
||||
|
||||
- **Authentication failed**: Check SMTP username/password
|
||||
- **TLS errors**: Verify SMTP port (587 for STARTTLS, 465 for SSL)
|
||||
- **Relay denied**: Ensure SMTP server allows relay from your IP
|
||||
- **Timeout**: Check firewall rules for SMTP ports
|
||||
|
||||
### Pushover Issues
|
||||
|
||||
- **Invalid token/user**: Verify token and user key from Pushover dashboard
|
||||
- **API rate limits**: Pushover has monthly message limits on free tier
|
||||
- **HTTP errors**: Check Pushover API status page
|
||||
|
||||
### Mattermost Issues
|
||||
|
||||
- **Webhook not found**: Verify webhook token and ensure webhook is enabled
|
||||
- **Channel not found**: Check channel name spelling and permissions
|
||||
- **Driver import error**: Install mattermostdriver: `pip install mattermostdriver`
|
||||
|
||||
## API Reference
|
||||
|
||||
### Main Functions
|
||||
|
||||
#### `pushmsg_for_host(hostname: str, msg: str, debug: int = 0) -> dict`
|
||||
|
||||
Send notification to host-specific channels.
|
||||
|
||||
**Parameters:**
|
||||
- `hostname`: Name of the host (used to look up notification channels)
|
||||
- `msg`: Message to send
|
||||
- `debug`: Debug level (0=no debug, 1+=debug output)
|
||||
|
||||
**Returns:** Dictionary of results per channel: `{"signal_ops": True, "email_ops": False}`
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
from hbd.server import notify as notify_mod
|
||||
|
||||
notify_mod.pushmsg_for_host("prod-web-01", "Server CPU at 95%")
|
||||
```
|
||||
|
||||
**Behavior:**
|
||||
1. Looks up notification channels configured for the host
|
||||
2. If no host-specific channels, uses `default_notification_channels`
|
||||
3. Dispatches to each channel in parallel
|
||||
4. Returns dict of results keyed by channel name
|
||||
5. Logs success/failure for each channel
|
||||
|
||||
## Examples
|
||||
|
||||
### Complete Configuration Example
|
||||
|
||||
```yaml
|
||||
# Notification channel definitions
|
||||
notification_channels:
|
||||
signal_oncall:
|
||||
type: signal
|
||||
cli_path: /usr/local/bin/signal-cli
|
||||
user: +12025551234
|
||||
recipient: +12025555678
|
||||
|
||||
email_ops:
|
||||
type: email
|
||||
recipients: [ops@example.com, alerts@example.com]
|
||||
sender: heartbeat@example.com
|
||||
smtp_server: smtp.fastmail.com
|
||||
smtp_port: 587
|
||||
smtp_user: heartbeat@example.com
|
||||
smtp_password: app-password-here
|
||||
|
||||
# Default channels
|
||||
default_notification_channels: [email_ops]
|
||||
|
||||
# Host definitions with channel assignments
|
||||
hosts:
|
||||
prod-web-01:
|
||||
threshold_config: high_sensitivity
|
||||
watch: true
|
||||
notification_channels: [signal_oncall, email_ops]
|
||||
dyndns: false
|
||||
|
||||
dev-server-01:
|
||||
threshold_config: low_sensitivity
|
||||
watch: false
|
||||
notification_channels: [email_ops]
|
||||
dyndns: false
|
||||
```
|
||||
|
||||
### Multiple Environments Example
|
||||
|
||||
```yaml
|
||||
notification_channels:
|
||||
# Production channels
|
||||
signal_prod_oncall:
|
||||
type: signal
|
||||
user: +12025551234
|
||||
recipient: +12025551111 # On-call phone
|
||||
|
||||
email_prod_ops:
|
||||
type: email
|
||||
recipients: [prod-ops@example.com]
|
||||
sender: prod-heartbeat@example.com
|
||||
smtp_server: smtp.example.com
|
||||
|
||||
# Staging channels
|
||||
email_staging:
|
||||
type: email
|
||||
recipients: [staging-alerts@example.com]
|
||||
sender: staging-heartbeat@example.com
|
||||
smtp_server: smtp.example.com
|
||||
|
||||
# Development channels
|
||||
mattermost_dev:
|
||||
type: mattermost
|
||||
host: chat.example.com
|
||||
token: dev-webhook-token
|
||||
channel: dev-alerts
|
||||
|
||||
hosts:
|
||||
prod-api-01:
|
||||
notification_channels: [signal_prod_oncall, email_prod_ops]
|
||||
|
||||
staging-api-01:
|
||||
notification_channels: [email_staging]
|
||||
|
||||
dev-api-01:
|
||||
notification_channels: [mattermost_dev]
|
||||
```
|
||||
+90
-22
@@ -335,43 +335,111 @@ threshold_renotify_interval: 3600 # Re-notify every hour for ongoing alerts
|
||||
|
||||
### Notification Channels
|
||||
|
||||
Thresholds use the same notification infrastructure as heartbeat monitoring:
|
||||
The system supports centralized notification channel definitions, allowing different hosts to use different notification providers and credentials. This provides fine-grained control over who gets notified about what.
|
||||
|
||||
#### Supported Channel Types
|
||||
|
||||
- **Email** (via SMTP)
|
||||
- **Pushover** (mobile notifications)
|
||||
- **Mattermost** (team chat)
|
||||
- **Custom webhooks**
|
||||
- **Signal** (via signal-cli)
|
||||
- **Mattermost** (team chat webhooks)
|
||||
|
||||
Configuration:
|
||||
#### Centralized Channel Configuration
|
||||
|
||||
Define notification channels once in the configuration file:
|
||||
|
||||
```yaml
|
||||
# Email
|
||||
toemail:
|
||||
- admin@example.com
|
||||
- oncall@example.com
|
||||
fromemail: heartbeat@example.com
|
||||
smtpserver: smtp.example.com
|
||||
smtpport: 587
|
||||
smtpuser: heartbeat@example.com
|
||||
smtppassword: your-password
|
||||
notification_channels:
|
||||
# Signal notifications
|
||||
signal_ops:
|
||||
type: signal
|
||||
cli_path: /usr/local/bin/signal-cli
|
||||
user: +1234567890
|
||||
recipient: +1234567890
|
||||
|
||||
# Pushover
|
||||
pushover_token: your-app-token
|
||||
pushover_user: your-user-key
|
||||
# Email notifications
|
||||
email_ops:
|
||||
type: email
|
||||
recipients: [ops@example.com, alerts@example.com]
|
||||
sender: heartbeat@example.com
|
||||
smtp_server: smtp.example.com
|
||||
smtp_port: 587
|
||||
smtp_user: heartbeat@example.com
|
||||
smtp_password: your-smtp-password
|
||||
|
||||
# Pushover notifications
|
||||
pushover_urgent:
|
||||
type: pushover
|
||||
token: your-pushover-app-token
|
||||
user: your-pushover-user-key
|
||||
|
||||
# Mattermost notifications
|
||||
mattermost_devops:
|
||||
type: mattermost
|
||||
host: mattermost.example.com
|
||||
token: your-webhook-token
|
||||
channel: devops-alerts
|
||||
username: heartbeat-bot
|
||||
icon: https://example.com/heartbeat-icon.png
|
||||
|
||||
# Default channels for hosts that don't specify channels
|
||||
default_notification_channels: [email_ops]
|
||||
```
|
||||
|
||||
#### Per-Host Channel Assignment
|
||||
|
||||
Assign notification channels to specific hosts in the `hosts` section:
|
||||
|
||||
```yaml
|
||||
hosts:
|
||||
# Critical server - multiple notification channels
|
||||
prod-web-01:
|
||||
threshold_config: high_sensitivity
|
||||
watch: true
|
||||
notification_channels: [signal_ops, pushover_urgent, email_ops]
|
||||
dyndns: false
|
||||
|
||||
# Database server - ops team only
|
||||
prod-db-01:
|
||||
threshold_config: database
|
||||
watch: true
|
||||
notification_channels: [signal_ops, email_ops]
|
||||
dyndns: false
|
||||
|
||||
# Development server - email only
|
||||
dev-server-01:
|
||||
threshold_config: low_sensitivity
|
||||
watch: false
|
||||
notification_channels: [email_ops]
|
||||
dyndns: false
|
||||
|
||||
# Uses default_notification_channels if not specified
|
||||
test-server-01:
|
||||
threshold_config: default
|
||||
watch: false
|
||||
dyndns: false
|
||||
```
|
||||
|
||||
### Watched Hosts
|
||||
|
||||
Only hosts in the `watchhosts` list will trigger notifications:
|
||||
Only hosts with `watch: true` in the `hosts` section will trigger notifications:
|
||||
|
||||
```yaml
|
||||
watchhosts:
|
||||
- webserver01
|
||||
- database01
|
||||
- mailserver
|
||||
hosts:
|
||||
webserver01:
|
||||
watch: true
|
||||
notification_channels: [email_ops]
|
||||
|
||||
database01:
|
||||
watch: true
|
||||
notification_channels: [signal_ops, email_ops]
|
||||
|
||||
mailserver:
|
||||
watch: true
|
||||
notification_channels: [pushover_urgent]
|
||||
```
|
||||
|
||||
Hosts not in this list will still have thresholds checked and alert states tracked, but won't send notifications.
|
||||
Hosts not marked for watching will still have thresholds checked and alert states tracked, but won't send notifications.
|
||||
|
||||
## Alert State Tracking
|
||||
|
||||
|
||||
+7
-6
@@ -115,13 +115,14 @@ class AsyncConnection:
|
||||
self.logger.debug(f"Sent {msg_id} message ({len(data)} bytes)")
|
||||
|
||||
def handle_ack(self, msg: dict, now: float):
|
||||
"""Handle ACK message from server."""
|
||||
try:
|
||||
self.lastack = msg.get("time", now)
|
||||
rtt = (self.lastack - self.lastsend) * 2000.0 # Convert to ms
|
||||
except Exception:
|
||||
"""Handle ACK message from server.
|
||||
|
||||
RTT is calculated as: (time ACK received) - (time HTB sent)
|
||||
"""
|
||||
self.lastack = now
|
||||
rtt = (self.lastack - self.lastsend) * 1000.0
|
||||
|
||||
# Calculate RTT: time ACK received minus time HTB sent
|
||||
rtt = (now - self.lastsend) * 1000.0 # Convert to ms
|
||||
|
||||
self.rtts.append(rtt)
|
||||
if len(self.rtts) > 10:
|
||||
|
||||
@@ -51,13 +51,9 @@ threshold_configs:
|
||||
operator: ">"
|
||||
|
||||
rtt:
|
||||
# RTT thresholds per remote host
|
||||
router:
|
||||
# RTT thresholds (applies to all hosts)
|
||||
warning: 50.0 # ms
|
||||
critical: 200.0
|
||||
server1:
|
||||
warning: 100.0
|
||||
critical: 500.0
|
||||
|
||||
# High sensitivity configuration - lower thresholds for critical systems
|
||||
high_sensitivity:
|
||||
@@ -94,12 +90,8 @@ threshold_configs:
|
||||
operator: ">"
|
||||
|
||||
rtt:
|
||||
router:
|
||||
warning: 30.0
|
||||
critical: 100.0
|
||||
server1:
|
||||
warning: 50.0
|
||||
critical: 200.0
|
||||
|
||||
# Low sensitivity configuration - higher thresholds for development/test systems
|
||||
low_sensitivity:
|
||||
@@ -125,7 +117,6 @@ threshold_configs:
|
||||
operator: ">"
|
||||
|
||||
rtt:
|
||||
router:
|
||||
warning: 100.0
|
||||
critical: 500.0
|
||||
|
||||
@@ -159,7 +150,6 @@ threshold_configs:
|
||||
operator: ">"
|
||||
|
||||
rtt:
|
||||
router:
|
||||
warning: 20.0 # Stricter latency requirements
|
||||
critical: 50.0
|
||||
|
||||
@@ -167,36 +157,140 @@ threshold_configs:
|
||||
# Host to Threshold Configuration Mapping
|
||||
# ----------------------------------------------------------------------------
|
||||
# Map specific hosts to specific threshold configurations
|
||||
# Hosts not listed here will use the default_threshold_config
|
||||
host_threshold_mapping:
|
||||
# Critical production servers
|
||||
prod-web-01: high_sensitivity
|
||||
prod-web-02: high_sensitivity
|
||||
prod-api-01: high_sensitivity
|
||||
# ----------------------------------------------------------------------------
|
||||
# Notification Channels
|
||||
# ----------------------------------------------------------------------------
|
||||
# Define notification providers centrally with their credentials
|
||||
# Each channel has a type (pushover, email, signal, mattermost) and type-specific config
|
||||
notification_channels:
|
||||
# Signal notifications
|
||||
signal_ops:
|
||||
type: signal
|
||||
cli_path: /usr/local/bin/signal-cli
|
||||
user: +1234567890
|
||||
recipient: +1234567890
|
||||
|
||||
# Database servers
|
||||
prod-db-01: database
|
||||
prod-db-02: database
|
||||
prod-db-replica: database
|
||||
signal_oncall:
|
||||
type: signal
|
||||
cli_path: /usr/local/bin/signal-cli
|
||||
user: +1234567890
|
||||
recipient: +0987654321
|
||||
|
||||
# Development and test systems
|
||||
dev-server-01: low_sensitivity
|
||||
dev-server-02: low_sensitivity
|
||||
test-server-01: low_sensitivity
|
||||
test-server-02: low_sensitivity
|
||||
# Email notifications
|
||||
email_ops:
|
||||
type: email
|
||||
recipients: [ops@example.com, alerts@example.com]
|
||||
sender: heartbeat@example.com
|
||||
smtp_server: smtp.example.com
|
||||
smtp_port: 587
|
||||
smtp_user: heartbeat@example.com
|
||||
smtp_password: your-smtp-password
|
||||
|
||||
# Everything else uses 'default' (no need to list explicitly)
|
||||
# Pushover notifications
|
||||
pushover_urgent:
|
||||
type: pushover
|
||||
token: your-pushover-app-token
|
||||
user: your-pushover-user-key
|
||||
|
||||
# Mattermost notifications
|
||||
mattermost_devops:
|
||||
type: mattermost
|
||||
host: mattermost.example.com
|
||||
token: your-webhook-token
|
||||
channel: devops-alerts
|
||||
username: heartbeat-bot
|
||||
icon: https://example.com/heartbeat-icon.png
|
||||
|
||||
# Default notification channels (used if host doesn't specify channels)
|
||||
default_notification_channels: [email_ops]
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Backward Compatibility Example
|
||||
# Host Definitions (New Unified Format)
|
||||
# ----------------------------------------------------------------------------
|
||||
# Define hosts with threshold configs, monitoring, DNS, and notification settings
|
||||
hosts:
|
||||
# Critical production servers - high sensitivity, multiple notification channels
|
||||
prod-web-01:
|
||||
threshold_config: high_sensitivity
|
||||
watch: true
|
||||
notification_channels: [signal_oncall, pushover_urgent, email_ops]
|
||||
dyndns: false
|
||||
|
||||
prod-web-02:
|
||||
threshold_config: high_sensitivity
|
||||
watch: true
|
||||
notification_channels: [signal_oncall, pushover_urgent, email_ops]
|
||||
dyndns: false
|
||||
|
||||
prod-api-01:
|
||||
threshold_config: high_sensitivity
|
||||
watch: true
|
||||
notification_channels: [signal_oncall, email_ops]
|
||||
dyndns: false
|
||||
|
||||
# Database servers - database-specific thresholds
|
||||
prod-db-01:
|
||||
threshold_config: database
|
||||
watch: true
|
||||
notification_channels: [signal_ops, email_ops]
|
||||
dyndns: false
|
||||
|
||||
prod-db-02:
|
||||
threshold_config: database
|
||||
watch: true
|
||||
notification_channels: [signal_ops, email_ops]
|
||||
dyndns: false
|
||||
|
||||
prod-db-replica:
|
||||
threshold_config: database
|
||||
watch: true
|
||||
notification_channels: [email_ops] # Replica gets email only
|
||||
dyndns: false
|
||||
|
||||
# Development servers - low sensitivity, minimal notifications
|
||||
dev-server-01:
|
||||
threshold_config: low_sensitivity
|
||||
watch: false # Don't monitor dev servers closely
|
||||
notification_channels: [email_ops]
|
||||
dyndns: false
|
||||
|
||||
dev-server-02:
|
||||
threshold_config: low_sensitivity
|
||||
watch: false
|
||||
notification_channels: [email_ops]
|
||||
dyndns: false
|
||||
|
||||
# Test servers
|
||||
test-server-01:
|
||||
threshold_config: low_sensitivity
|
||||
watch: false
|
||||
dyndns: false
|
||||
# No notification channels - uses default_notification_channels
|
||||
|
||||
# Home server with dynamic DNS
|
||||
home-server:
|
||||
threshold_config: default
|
||||
watch: true
|
||||
notification_channels: [signal_ops]
|
||||
dyndns: true # Update DNS when IP changes
|
||||
|
||||
# Hosts not listed in the hosts section will use:
|
||||
# - default_threshold_config for thresholds (falls back to "default")
|
||||
# - default_notification_channels for notifications
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Notes on Configuration Structure
|
||||
# ----------------------------------------------------------------------------
|
||||
# The old single threshold format is still supported:
|
||||
# Just use 'thresholds:' directly without 'threshold_configs:'
|
||||
#
|
||||
# thresholds:
|
||||
# cpu_monitor:
|
||||
# cpu_percent:
|
||||
# warning: 80.0
|
||||
# critical: 90.0
|
||||
# All configuration is centralized in the hosts section. Each host can specify:
|
||||
# - threshold_config: Name of threshold configuration to use
|
||||
# - watch: Whether to monitor this host actively (send notifications)
|
||||
# - notification_channels: List of channels to use for this host
|
||||
# - dyndns: Whether to update DNS when IP address changes
|
||||
#
|
||||
# This will apply the same thresholds to all hosts.
|
||||
# Notification channels are defined once at the top level and referenced
|
||||
# by name in host definitions, allowing easy reuse and updates.
|
||||
#
|
||||
# For hosts not explicitly listed, the system will still accept heartbeats
|
||||
# and track their state, but won't apply thresholds or send notifications
|
||||
# unless default settings are configured.
|
||||
|
||||
+165
-14
@@ -21,10 +21,9 @@ SERVER_DEFAULTS = {
|
||||
"logfile": "/var/log/heartbeat.log",
|
||||
"logfmt": "text", # text or msg or json
|
||||
|
||||
# Notification settings
|
||||
"pushsrv": "pushover", # pushover, mattermost, or all
|
||||
"pushover_token": "",
|
||||
"pushover_user": "",
|
||||
# Notification channels
|
||||
"notification_channels": {}, # Named channels with type and credentials
|
||||
"default_notification_channels": [], # Default channels if host doesn't specify
|
||||
|
||||
# Monitoring settings
|
||||
"interval": 20, # Expected heartbeat interval (for server checks)
|
||||
@@ -32,22 +31,15 @@ SERVER_DEFAULTS = {
|
||||
"threshold_renotify_interval": 3600, # Seconds between threshold re-notifications
|
||||
|
||||
# Host management
|
||||
"watchhosts": [], # Hosts to monitor and notify about
|
||||
"dyndnshosts": [], # Hosts with dynamic DNS
|
||||
"hosts": {}, # New unified host definitions (optional)
|
||||
"watchhosts": [], # Hosts to monitor and notify about (legacy)
|
||||
"dyndnshosts": [], # Hosts with dynamic DNS (legacy)
|
||||
"drophosts": [], # Hosts to ignore
|
||||
"dyndomains": ["wrede.org"],
|
||||
|
||||
# DNS updates
|
||||
"nsupdate_bin": "/usr/bin/nsupdate",
|
||||
|
||||
# Email settings
|
||||
"smtpserver": "smtp.fastmail.com",
|
||||
"smtpuser": "andreas@wrede.ca",
|
||||
"smtppassword": "pvtvefyp5gbhnch2",
|
||||
"smtpport": 587,
|
||||
"toemail": ["aew.hbd.notify@wrede.ca"],
|
||||
"fromemail": "aew.hbd@wrede.ca",
|
||||
|
||||
# WebSocket settings
|
||||
"ws_port": 50005,
|
||||
"wss_port": None,
|
||||
@@ -101,3 +93,162 @@ def load_config(path=None):
|
||||
# yaml not installed: do not attempt to parse; user must ensure defaults
|
||||
pass
|
||||
return cfg
|
||||
|
||||
|
||||
def get_watchhosts(config):
|
||||
"""Extract watchhosts from config, supporting both new and legacy formats.
|
||||
|
||||
Args:
|
||||
config: Configuration dictionary
|
||||
|
||||
Returns:
|
||||
List of hostnames to watch
|
||||
"""
|
||||
watchhosts = []
|
||||
|
||||
# New format: hosts section with watch attribute
|
||||
if "hosts" in config:
|
||||
hosts_config = config["hosts"]
|
||||
if isinstance(hosts_config, dict):
|
||||
for host_name, host_attrs in hosts_config.items():
|
||||
if isinstance(host_attrs, dict) and host_attrs.get("watch", False):
|
||||
watchhosts.append(host_name)
|
||||
|
||||
# Legacy format: watchhosts list
|
||||
if "watchhosts" in config:
|
||||
legacy_watchhosts = config.get("watchhosts", [])
|
||||
if isinstance(legacy_watchhosts, (list, set)):
|
||||
watchhosts.extend(legacy_watchhosts)
|
||||
elif isinstance(legacy_watchhosts, dict):
|
||||
# Old dict format: {"host1": {attrs}, "host2": {attrs}}
|
||||
watchhosts.extend(legacy_watchhosts.keys())
|
||||
|
||||
return list(set(watchhosts)) # Remove duplicates
|
||||
|
||||
|
||||
def get_dyndnshosts(config):
|
||||
"""Extract dyndnshosts from config, supporting both new and legacy formats.
|
||||
|
||||
Args:
|
||||
config: Configuration dictionary
|
||||
|
||||
Returns:
|
||||
List of hostnames with dynamic DNS
|
||||
"""
|
||||
dyndnshosts = []
|
||||
|
||||
# New format: hosts section with dyndns attribute
|
||||
if "hosts" in config:
|
||||
hosts_config = config["hosts"]
|
||||
if isinstance(hosts_config, dict):
|
||||
for host_name, host_attrs in hosts_config.items():
|
||||
if isinstance(host_attrs, dict) and host_attrs.get("dyndns", False):
|
||||
dyndnshosts.append(host_name)
|
||||
|
||||
# Legacy format: dyndnshosts list/set
|
||||
if "dyndnshosts" in config:
|
||||
legacy_dyndnshosts = config.get("dyndnshosts", [])
|
||||
if isinstance(legacy_dyndnshosts, (list, set)):
|
||||
dyndnshosts.extend(legacy_dyndnshosts)
|
||||
|
||||
return list(set(dyndnshosts)) # Remove duplicates
|
||||
|
||||
|
||||
def get_host_config(config, hostname):
|
||||
"""Get configuration for a specific host.
|
||||
|
||||
Args:
|
||||
config: Configuration dictionary
|
||||
hostname: Host name
|
||||
|
||||
Returns:
|
||||
Dictionary with host attributes or empty dict
|
||||
"""
|
||||
if "hosts" in config:
|
||||
hosts_config = config.get("hosts", {})
|
||||
if isinstance(hosts_config, dict) and hostname in hosts_config:
|
||||
return hosts_config[hostname] if isinstance(hosts_config[hostname], dict) else {}
|
||||
|
||||
# Check legacy watchhosts for notification settings
|
||||
if "watchhosts" in config:
|
||||
watchhosts = config.get("watchhosts", {})
|
||||
if isinstance(watchhosts, dict) and hostname in watchhosts:
|
||||
legacy_attrs = watchhosts[hostname]
|
||||
if isinstance(legacy_attrs, dict):
|
||||
# Convert legacy format to new format
|
||||
return {
|
||||
"watch": True,
|
||||
"notify": legacy_attrs.get("notify"),
|
||||
"notify_src": legacy_attrs.get("src"),
|
||||
}
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
def get_notification_channels_for_host(config, hostname):
|
||||
"""Get notification channels configured for a specific host.
|
||||
|
||||
Args:
|
||||
config: Configuration dictionary
|
||||
hostname: Host name
|
||||
|
||||
Returns:
|
||||
List of channel names to use for this host
|
||||
"""
|
||||
host_config = get_host_config(config, hostname)
|
||||
|
||||
# Check if host specifies notification channels
|
||||
channels = host_config.get("notification_channels", [])
|
||||
if channels:
|
||||
if isinstance(channels, str):
|
||||
return [channels]
|
||||
elif isinstance(channels, list):
|
||||
return channels
|
||||
|
||||
# Fall back to default channels
|
||||
default_channels = config.get("default_notification_channels", [])
|
||||
if default_channels:
|
||||
if isinstance(default_channels, str):
|
||||
return [default_channels]
|
||||
elif isinstance(default_channels, list):
|
||||
return default_channels
|
||||
|
||||
# No channels configured, return empty list (will use legacy global config)
|
||||
return []
|
||||
|
||||
|
||||
def get_channel_config(config, channel_name):
|
||||
"""Get configuration for a specific notification channel.
|
||||
|
||||
Args:
|
||||
config: Configuration dictionary
|
||||
channel_name: Name of the notification channel
|
||||
|
||||
Returns:
|
||||
Dictionary with channel configuration or None if not found
|
||||
"""
|
||||
channels = config.get("notification_channels", {})
|
||||
if isinstance(channels, dict) and channel_name in channels:
|
||||
return channels[channel_name]
|
||||
return None
|
||||
|
||||
|
||||
def get_notification_channels_config(config, hostname):
|
||||
"""Get list of notification channel configurations for a host.
|
||||
|
||||
Args:
|
||||
config: Configuration dictionary
|
||||
hostname: Host name
|
||||
|
||||
Returns:
|
||||
List of (channel_name, channel_config) tuples
|
||||
"""
|
||||
channel_names = get_notification_channels_for_host(config, hostname)
|
||||
|
||||
channels = []
|
||||
for channel_name in channel_names:
|
||||
channel_config = get_channel_config(config, channel_name)
|
||||
if channel_config and channel_config.get("type"):
|
||||
channels.append((channel_name, channel_config))
|
||||
|
||||
return channels
|
||||
|
||||
+2
-12
@@ -136,16 +136,7 @@ async def dns_update_worker(
|
||||
)
|
||||
if err:
|
||||
m += f", DNS update failed: {err}"
|
||||
if pushmsg:
|
||||
try:
|
||||
await loop.run_in_executor(
|
||||
None,
|
||||
pushmsg,
|
||||
"error: nsupdate failed",
|
||||
f"{name}.dy.{dyndomain}: {m}",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
logger.error("DNS update failed for %s: %s", name, err)
|
||||
else:
|
||||
m += ", DNS updated."
|
||||
|
||||
@@ -171,7 +162,6 @@ def start_dns_worker(
|
||||
hbdclass,
|
||||
cfg: dict,
|
||||
log: Optional[callable] = None,
|
||||
pushmsg: Optional[callable] = None,
|
||||
loop: Optional[asyncio.AbstractEventLoop] = None,
|
||||
):
|
||||
"""Start the async DNS worker and return the Task.
|
||||
@@ -218,7 +208,7 @@ def start_dns_worker(
|
||||
|
||||
task = loop.create_task(
|
||||
dns_update_worker(
|
||||
hbdclass, cfg, async_queue=async_q, log=log, pushmsg=pushmsg, loop=loop
|
||||
hbdclass, cfg, async_queue=async_q, log=log, loop=loop
|
||||
)
|
||||
)
|
||||
return task
|
||||
|
||||
@@ -25,12 +25,7 @@ async def start(
|
||||
port: int,
|
||||
config,
|
||||
hbdclass,
|
||||
log=None,
|
||||
email=None,
|
||||
pushmsg=None,
|
||||
msg_to_websockets=None,
|
||||
tcss=None,
|
||||
DEBUG=0,
|
||||
verbose=False,
|
||||
get_now=None,
|
||||
VER="",
|
||||
|
||||
+4
-11
@@ -79,14 +79,11 @@ async def _run_async(config):
|
||||
# Initialize threshold checker
|
||||
threshold_checker = threshold_mod.ThresholdChecker(
|
||||
config=config,
|
||||
notification_callback=notify_mod.pushmsg_from_config,
|
||||
renotify_interval=config.get("threshold_renotify_interval", 3600),
|
||||
journal=msg_journal,
|
||||
)
|
||||
logger.info("Threshold checker initialized")
|
||||
|
||||
pushmsg = notify_mod.pushmsg_from_config
|
||||
|
||||
sock = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
|
||||
# Disable IPV6_V6ONLY option to enable dual-stack (listen on IPv4 as well)
|
||||
# This option is system-dependent; on many systems, setting it to False enables
|
||||
@@ -110,7 +107,6 @@ async def _run_async(config):
|
||||
config=config,
|
||||
hbdclass=hbdclass,
|
||||
log=eventlog,
|
||||
pushmsg=pushmsg,
|
||||
msg_to_websockets=msg_to_websockets,
|
||||
msg_journal=msg_journal,
|
||||
threshold_checker=threshold_checker,
|
||||
@@ -132,12 +128,8 @@ async def _run_async(config):
|
||||
port=config.get("hbd_port", 50004),
|
||||
config=config,
|
||||
hbdclass=hbdclass,
|
||||
log=eventlog,
|
||||
pushmsg=pushmsg,
|
||||
msg_to_websockets=msg_to_websockets,
|
||||
threshold_checker=threshold_checker,
|
||||
tcss=None,
|
||||
DEBUG=config.get("debug", 0),
|
||||
verbose=config.get("verbose", False),
|
||||
get_now=lambda: time.time(),
|
||||
VER="",
|
||||
@@ -155,7 +147,7 @@ async def _run_async(config):
|
||||
dns_task = None
|
||||
try:
|
||||
dns_task = dns_mod.start_dns_worker(
|
||||
hbdclass, config, log=eventlog, pushmsg=pushmsg, loop=loop
|
||||
hbdclass, config, log=eventlog, loop=loop
|
||||
)
|
||||
logger.info("dns update worker started")
|
||||
except Exception as e:
|
||||
@@ -273,10 +265,11 @@ def load_pickled_hosts(config, hbdclass):
|
||||
"""Load pickled hosts from file, if available."""
|
||||
import os
|
||||
import pickle
|
||||
from . import config as config_mod
|
||||
|
||||
pickfile = config.get("pickfile", "hbd.pickle")
|
||||
dyndnshosts = config.get("dyndnshosts", [])
|
||||
watchhosts = config.get("watchhosts", [])
|
||||
dyndnshosts = config_mod.get_dyndnshosts(config)
|
||||
watchhosts = config_mod.get_watchhosts(config)
|
||||
drophosts = config.get("drophosts", [])
|
||||
if 1 and os.path.exists(pickfile):
|
||||
if config.get("verbose", False):
|
||||
|
||||
+114
-46
@@ -190,55 +190,123 @@ def pushsignal(
|
||||
return False
|
||||
|
||||
|
||||
def pushmsg(cfg: dict, msg: str, debug: int = 0):
|
||||
"""Dispatch push notifications according to `cfg['pushsrv']`.
|
||||
def _dispatch_to_channel(channel_name: str, channel_config: dict, msg: str, debug: int = 0) -> bool:
|
||||
"""Dispatch a message to a specific notification channel.
|
||||
|
||||
cfg is expected to contain keys for different services when needed, e.g.
|
||||
- cfg['pushsrv'] : one of 'all', 'pushover', 'mattermost', 'signal'
|
||||
- cfg['pushover_token'], cfg['pushover_user']
|
||||
- cfg['matter_host'], cfg['matter_token'], cfg['matter_channel']
|
||||
- cfg['signal_cli'], cfg['signal_user'], cfg['signal_recipient']
|
||||
Args:
|
||||
channel_name: Name of the channel (for logging)
|
||||
channel_config: Channel configuration dictionary with 'type' and type-specific fields
|
||||
msg: Message to send
|
||||
debug: Debug level
|
||||
|
||||
Returns a dict of results per provider.
|
||||
Returns:
|
||||
True if notification sent successfully, False otherwise
|
||||
"""
|
||||
channel_type = channel_config.get("type")
|
||||
|
||||
if channel_type == "pushover":
|
||||
return pushover(
|
||||
channel_config.get("token", ""),
|
||||
channel_config.get("user", ""),
|
||||
msg,
|
||||
debug=debug
|
||||
)
|
||||
|
||||
elif channel_type == "email":
|
||||
# Build email from channel config
|
||||
recipients = channel_config.get("recipients", [])
|
||||
sender = channel_config.get("sender", "")
|
||||
smtp_server = channel_config.get("smtp_server", "")
|
||||
smtp_port = channel_config.get("smtp_port", 587)
|
||||
smtp_user = channel_config.get("smtp_user")
|
||||
smtp_password = channel_config.get("smtp_password")
|
||||
|
||||
if not recipients or not sender or not smtp_server:
|
||||
logger.warning(
|
||||
"Email channel '%s' missing required fields: recipients=%s, sender=%s, smtp_server=%s",
|
||||
channel_name, recipients, sender, smtp_server
|
||||
)
|
||||
return False
|
||||
|
||||
# Temporarily update _config for email() function
|
||||
old_config = dict(_config)
|
||||
_config["toemail"] = recipients
|
||||
_config["fromemail"] = sender
|
||||
_config["smtpserver"] = smtp_server
|
||||
_config["smtpport"] = smtp_port
|
||||
if smtp_user:
|
||||
_config["smtpuser"] = smtp_user
|
||||
if smtp_password:
|
||||
_config["smtppassword"] = smtp_password
|
||||
|
||||
result = email("Heartbeat notification", msg, debug=debug)
|
||||
|
||||
# Restore config
|
||||
_config.clear()
|
||||
_config.update(old_config)
|
||||
|
||||
return result
|
||||
|
||||
elif channel_type == "signal":
|
||||
return pushsignal(
|
||||
channel_config.get("cli_path", "/usr/local/bin/signal-cli"),
|
||||
channel_config.get("user", ""),
|
||||
channel_config.get("recipient", ""),
|
||||
msg,
|
||||
debug=debug
|
||||
)
|
||||
|
||||
elif channel_type == "mattermost":
|
||||
return pushmattermost(
|
||||
channel_config.get("host", ""),
|
||||
channel_config.get("token", ""),
|
||||
channel_config.get("channel", ""),
|
||||
msg,
|
||||
username=channel_config.get("username", "hbd"),
|
||||
icon=channel_config.get("icon"),
|
||||
debug=debug
|
||||
)
|
||||
|
||||
else:
|
||||
logger.warning("Unknown channel type '%s' for channel '%s'", channel_type, channel_name)
|
||||
return False
|
||||
|
||||
|
||||
def pushmsg_for_host(hostname: str, msg: str, debug: int = 0) -> dict:
|
||||
"""Send notification for a specific host using its configured channels.
|
||||
|
||||
This function looks up the host's notification channels from the config
|
||||
and sends the message to those channels.
|
||||
|
||||
Args:
|
||||
hostname: Name of the host to send notification for
|
||||
msg: Message to send
|
||||
debug: Debug level
|
||||
|
||||
Returns:
|
||||
Dictionary of results per channel: {"channel_name": True/False}
|
||||
"""
|
||||
from . import config as config_mod
|
||||
|
||||
# Get notification channels for this host
|
||||
channels = config_mod.get_notification_channels_config(_config, hostname)
|
||||
|
||||
if not channels:
|
||||
logger.warning("No notification channels configured for host '%s'", hostname)
|
||||
return {}
|
||||
|
||||
# Dispatch to each channel
|
||||
results = {}
|
||||
p = cfg.get("pushsrv", "pushover")
|
||||
if p in ("all", "pushover"):
|
||||
ok = pushover(
|
||||
cfg.get("pushover_token", ""),
|
||||
cfg.get("pushover_user", ""),
|
||||
msg,
|
||||
debug=debug,
|
||||
)
|
||||
results["pushover"] = ok
|
||||
if p in ("all", "mattermost"):
|
||||
ok = pushmattermost(
|
||||
cfg.get("matter_host", ""),
|
||||
cfg.get("matter_token", ""),
|
||||
cfg.get("matter_channel", ""),
|
||||
msg,
|
||||
username=cfg.get("matter_username", "hbd"),
|
||||
icon=cfg.get("matter_icon"),
|
||||
debug=debug,
|
||||
)
|
||||
results["mattermost"] = ok
|
||||
if p in ("all", "signal"):
|
||||
ok = pushsignal(
|
||||
cfg.get("signal_cli", "/usr/local/bin/signal-cli"),
|
||||
cfg.get("signal_user", ""),
|
||||
cfg.get("signal_recipient", ""),
|
||||
msg,
|
||||
debug=debug,
|
||||
)
|
||||
results["signal"] = ok
|
||||
if p in ("all", "email"):
|
||||
ok = email("Heartbeat notification", msg, debug=debug)
|
||||
results["email"] = ok
|
||||
logger.debug("push results: %s", results)
|
||||
for channel_name, channel_config in channels:
|
||||
try:
|
||||
success = _dispatch_to_channel(channel_name, channel_config, msg, debug=debug)
|
||||
results[channel_name] = success
|
||||
if success:
|
||||
logger.info("Notification sent to channel '%s': %s", channel_name, msg)
|
||||
else:
|
||||
logger.warning("Failed to send notification to channel '%s'", channel_name)
|
||||
except Exception as e:
|
||||
logger.error("Error sending to channel '%s': %s", channel_name, e)
|
||||
results[channel_name] = False
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def pushmsg_from_config(msg: str, debug: int = 0) -> dict:
|
||||
"""Use the module-level configuration dict to dispatch a push message."""
|
||||
return pushmsg(_config, msg, debug=debug)
|
||||
|
||||
+32
-26
@@ -275,7 +275,6 @@ class ThresholdChecker:
|
||||
def __init__(
|
||||
self,
|
||||
config: Dict[str, Any],
|
||||
notification_callback: Optional[Callable] = None,
|
||||
renotify_interval: int = 3600,
|
||||
journal: Optional[Any] = None,
|
||||
):
|
||||
@@ -284,7 +283,6 @@ class ThresholdChecker:
|
||||
|
||||
Args:
|
||||
config: Threshold configuration dictionary from YAML
|
||||
notification_callback: Function to call for notifications
|
||||
renotify_interval: Seconds between repeat notifications (default: 1 hour)
|
||||
journal: Optional MessageJournal instance for logging threshold events
|
||||
"""
|
||||
@@ -300,7 +298,6 @@ class ThresholdChecker:
|
||||
# Default config name to use when no mapping exists
|
||||
self.default_config = "default"
|
||||
|
||||
self.notification_callback = notification_callback
|
||||
self.renotify_interval = renotify_interval
|
||||
self.journal = journal
|
||||
|
||||
@@ -367,8 +364,20 @@ class ThresholdChecker:
|
||||
target_dict=self.threshold_configs[config_name]
|
||||
)
|
||||
|
||||
# Parse host to config mapping
|
||||
self.host_config_mapping = config.get("host_threshold_mapping", {})
|
||||
# Parse host to config mapping from two possible sources
|
||||
# 1. New format: hosts section with threshold_config attribute
|
||||
if "hosts" in config:
|
||||
hosts_config = config["hosts"]
|
||||
if isinstance(hosts_config, dict):
|
||||
for host_name, host_attrs in hosts_config.items():
|
||||
if isinstance(host_attrs, dict) and "threshold_config" in host_attrs:
|
||||
self.host_config_mapping[host_name] = host_attrs["threshold_config"]
|
||||
|
||||
# 2. Legacy format: host_threshold_mapping section (for backward compatibility)
|
||||
if "host_threshold_mapping" in config:
|
||||
legacy_mapping = config.get("host_threshold_mapping", {})
|
||||
if isinstance(legacy_mapping, dict):
|
||||
self.host_config_mapping.update(legacy_mapping)
|
||||
|
||||
# Set default config (first one alphabetically or explicitly set)
|
||||
self.default_config = config.get("default_threshold_config", "default")
|
||||
@@ -513,12 +522,11 @@ class ThresholdChecker:
|
||||
rtt_thresholds: Dict[str, Any],
|
||||
target_dict: Optional[Dict[str, ThresholdConfig]] = None
|
||||
):
|
||||
"""Parse RTT thresholds (per-host network latency thresholds).
|
||||
"""Parse RTT thresholds (network latency thresholds).
|
||||
|
||||
RTT thresholds are configured as:
|
||||
thresholds:
|
||||
rtt:
|
||||
hostname1:
|
||||
warning: 100.0 # ms
|
||||
critical: 500.0 # ms
|
||||
|
||||
@@ -529,23 +537,22 @@ class ThresholdChecker:
|
||||
if target_dict is None:
|
||||
target_dict = self.thresholds
|
||||
|
||||
for hostname, threshold_config in rtt_thresholds.items():
|
||||
if not isinstance(threshold_config, dict):
|
||||
continue
|
||||
if not isinstance(rtt_thresholds, dict):
|
||||
return
|
||||
|
||||
# Metric path is "rtt.<hostname>"
|
||||
metric_path = f"rtt.{hostname}"
|
||||
# Metric path is simply "rtt" (not per-host)
|
||||
metric_path = "rtt"
|
||||
|
||||
warning = threshold_config.get("warning")
|
||||
critical = threshold_config.get("critical")
|
||||
operator = threshold_config.get("operator", ">")
|
||||
hysteresis = threshold_config.get("hysteresis", 0.1) # 10% default
|
||||
enabled = threshold_config.get("enabled", True)
|
||||
display = threshold_config.get("display")
|
||||
warning = rtt_thresholds.get("warning")
|
||||
critical = rtt_thresholds.get("critical")
|
||||
operator = rtt_thresholds.get("operator", ">")
|
||||
hysteresis = rtt_thresholds.get("hysteresis", 0.1) # 10% default
|
||||
enabled = rtt_thresholds.get("enabled", True)
|
||||
display = rtt_thresholds.get("display")
|
||||
|
||||
if warning is None and critical is None:
|
||||
logger.warning("No RTT thresholds defined for %s, skipping", hostname)
|
||||
continue
|
||||
logger.warning("No RTT thresholds defined, skipping")
|
||||
return
|
||||
|
||||
threshold = ThresholdConfig(
|
||||
metric_path=metric_path,
|
||||
@@ -559,8 +566,7 @@ class ThresholdChecker:
|
||||
|
||||
target_dict[metric_path] = threshold
|
||||
logger.debug(
|
||||
"Registered RTT threshold for %s: warn=%s ms, crit=%s ms",
|
||||
hostname,
|
||||
"Registered RTT threshold: warn=%s ms, crit=%s ms",
|
||||
warning,
|
||||
critical
|
||||
)
|
||||
@@ -887,9 +893,9 @@ class ThresholdChecker:
|
||||
value: Any,
|
||||
):
|
||||
"""Send notification and log to journal/eventlog."""
|
||||
if self.notification_callback is not None:
|
||||
# Send notification using host-specific channels
|
||||
try:
|
||||
self.notification_callback(f"{lvl}: {host_name} - {message}")
|
||||
notify_mod.pushmsg_for_host(host_name, f"{lvl}: {host_name} - {message}")
|
||||
logger.info("Notification sent: %s", message)
|
||||
except Exception as e:
|
||||
logger.error("Failed to send notification: %s", e)
|
||||
@@ -1017,9 +1023,9 @@ class ThresholdChecker:
|
||||
else:
|
||||
message = f"REMINDER ({alert_state.level.name}): {host_name} - {metric_path} = {value} (ongoing for {int(now - alert_state.since)}s)"
|
||||
|
||||
if self.notification_callback:
|
||||
# Send re-notification using host-specific channels
|
||||
try:
|
||||
self.notification_callback(message)
|
||||
notify_mod.pushmsg_for_host(host_name, message)
|
||||
alert_state.last_notification = now
|
||||
alert_state.notification_count += 1
|
||||
logger.info("Re-notification sent: %s", message)
|
||||
|
||||
+33
-24
@@ -68,7 +68,6 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
||||
- config: dict of configuration
|
||||
- hbdclass: module providing Host/Connection classes
|
||||
- log: callable(loghost, message)
|
||||
- pushmsg: callable(message)
|
||||
- msg_to_websockets: callable(typ, data)
|
||||
- msg_journal: MessageJournal instance for logging all messages
|
||||
- DEBUG, verbose
|
||||
@@ -91,7 +90,6 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
||||
cfg = ctx.get("config", {})
|
||||
hbdcls = ctx.get("hbdclass")
|
||||
log = ctx.get("log")
|
||||
pushmsg = ctx.get("pushmsg")
|
||||
msg_to_websockets = ctx.get("msg_to_websockets")
|
||||
DEBUG = ctx.get("DEBUG", 0)
|
||||
verbose = ctx.get("verbose", False)
|
||||
@@ -100,12 +98,15 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
||||
ip = addr[0] if isinstance(addr, (list, tuple)) else addr
|
||||
name = msg.get("name", "unknown")
|
||||
from ..common.utils import shortname
|
||||
from . import config as config_mod
|
||||
|
||||
uname = shortname(name)
|
||||
|
||||
if uname not in hbdcls.Host.hosts:
|
||||
host = hbdcls.Host(uname)
|
||||
host.dyn = uname in cfg.get("dyndnshosts", [])
|
||||
# Use new config function to check dyndns
|
||||
dyndnshosts = config_mod.get_dyndnshosts(cfg)
|
||||
host.dyn = uname in dyndnshosts
|
||||
if verbose:
|
||||
print(("XX: New host, num now %s" % (len(hbdcls.Host.hosts))))
|
||||
newh = True
|
||||
@@ -113,6 +114,9 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
||||
host = hbdcls.Host.hosts[uname]
|
||||
newh = False
|
||||
|
||||
# Get watchhosts once for use throughout message handling
|
||||
watchhosts = config_mod.get_watchhosts(cfg)
|
||||
|
||||
cid = msg.get("id", 0)
|
||||
try:
|
||||
rtt = float(msg.get("rtt"))
|
||||
@@ -181,9 +185,8 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
||||
|
||||
if res:
|
||||
eventlog(uname, "WARNING", res)
|
||||
if uname in cfg.get("watchhosts", []):
|
||||
if pushmsg:
|
||||
pushmsg("%s %s" % (host.name, res))
|
||||
if uname in watchhosts:
|
||||
notify_mod.pushmsg_for_host(uname, "%s %s" % (host.name, res))
|
||||
|
||||
interval = int(msg.get("interval", 0) or 0)
|
||||
shutdown = msg.get("shutdown", 0)
|
||||
@@ -193,15 +196,13 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
||||
|
||||
if boot:
|
||||
eventlog(uname, "INFO", "booted")
|
||||
if uname in cfg.get("watchhosts", []):
|
||||
if uname in watchhosts:
|
||||
m = "%s booted" % (host.name)
|
||||
if pushmsg:
|
||||
pushmsg(m)
|
||||
notify_mod.pushmsg_for_host(uname, m)
|
||||
if message:
|
||||
eventlog(uname, "INFO", "msg: %s" % message, service=service)
|
||||
if uname in cfg.get("watchhosts", []):
|
||||
if pushmsg:
|
||||
pushmsg(message)
|
||||
if uname in watchhosts:
|
||||
notify_mod.pushmsg_for_host(uname, message)
|
||||
|
||||
if conn.getstate() != hbdcls.Connection.UP:
|
||||
lasts = conn.state
|
||||
@@ -211,9 +212,8 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
||||
else:
|
||||
m = "%s back after being %s for %s" % (conn.afam, lasts, dur(d))
|
||||
eventlog(uname, "RECOVER", m)
|
||||
if uname in cfg.get("watchhosts", []):
|
||||
if pushmsg:
|
||||
pushmsg("%s %s is back" % (uname, conn.afam))
|
||||
if uname in watchhosts:
|
||||
notify_mod.pushmsg_for_host(uname, "%s %s is back" % (uname, conn.afam))
|
||||
|
||||
if boot or newh:
|
||||
host.upcount = host.doesack
|
||||
@@ -222,9 +222,8 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
||||
|
||||
if shutdown:
|
||||
eventlog(uname, "INFO", "%s shutdown" % conn.afam)
|
||||
if uname in cfg.get("watchhosts", []):
|
||||
if pushmsg:
|
||||
pushmsg("%s %s shutdown" % (uname, conn.afam))
|
||||
if uname in watchhosts:
|
||||
notify_mod.pushmsg_for_host(uname, "%s %s shutdown" % (uname, conn.afam))
|
||||
conn.newstate(hbdcls.Connection.DOWN, now)
|
||||
|
||||
if interval > 0:
|
||||
@@ -247,11 +246,21 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
||||
connection.newstate(hbdcls.Connection.OVERDUE, now, cfg.get("grace", 2))
|
||||
|
||||
msg = f"{connection.afam} overdue"
|
||||
eventlog(uname, "CRITICAL" if uname in cfg.get("watchhosts", []) else "WARNING", msg)
|
||||
eventlog(uname, "CRITICAL" if uname in watchhosts else "WARNING", msg)
|
||||
|
||||
if uname in cfg.get("watchhosts", []):
|
||||
if pushmsg:
|
||||
pushmsg(f"{uname} {msg}")
|
||||
if uname in watchhosts:
|
||||
notify_mod.pushmsg_for_host(uname, f"{uname} {msg}")
|
||||
|
||||
# Check RTT thresholds with infinite RTT for overdue hosts
|
||||
threshold_checker = ctx.get("threshold_checker")
|
||||
if threshold_checker:
|
||||
metric_path = "rtt"
|
||||
threshold_checker.check_value(
|
||||
host_name=uname,
|
||||
metric_path=metric_path,
|
||||
value=float('inf'),
|
||||
alert_states=host.alert_states
|
||||
)
|
||||
|
||||
# Notify websockets
|
||||
if msg_to_websockets:
|
||||
@@ -274,8 +283,8 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
||||
# Check RTT thresholds using the threshold checker
|
||||
threshold_checker = ctx.get("threshold_checker")
|
||||
if threshold_checker and rtt and rtt > 0:
|
||||
# Metric path for RTT is "rtt.<hostname>"
|
||||
metric_path = f"rtt.{uname}"
|
||||
# Metric path for RTT is simply "rtt"
|
||||
metric_path = "rtt"
|
||||
|
||||
# Check against configured thresholds (handles alerts, notifications, etc.)
|
||||
threshold_checker.check_value(
|
||||
|
||||
Reference in New Issue
Block a user