Compare commits
180 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 756b2323be | |||
| 6e7156b42d | |||
| 928035df50 | |||
| 0f90be659e | |||
| 4160e34a96 | |||
| 6430d2ddf3 | |||
| 4b87a90e76 | |||
| 450814daca | |||
| e7786ac5da | |||
| fed71d97d6 | |||
| ba96da9622 | |||
| 7f17ddc2ff | |||
| 7750c5a303 | |||
| e58530df7d | |||
| fe7143759c | |||
| 236b40cfe4 | |||
| 4e5bafd26c | |||
| 817ae064af | |||
| a00282913b | |||
| d699a29fa9 | |||
| 4ce7eacfdd | |||
| 1cefc2676e | |||
| 668a135e53 | |||
| 59e256a042 | |||
| 708508157f | |||
| f67fa9baff | |||
| 588eb2a792 | |||
| b907343e36 | |||
| e50a3996ae | |||
| e1056a0365 | |||
| 1dbe0f8e64 | |||
| 12e8812070 | |||
| 9b5d8ac9b1 | |||
| 500d256d76 | |||
| a7a45bf8c3 | |||
| 3e9b052f71 | |||
| 7444262985 | |||
| 3401cc0dbb | |||
| ab0132a38d | |||
| 9e389736f8 | |||
| b64a2a9313 | |||
| a52744a448 | |||
| 5e2b04b811 | |||
| 8e07b09d7e | |||
| 653e018e4f | |||
| c7326da7d9 | |||
| 0426a75d8c | |||
| 539f25d877 | |||
| 3e3099fc6d | |||
| c9f15a3f1c | |||
| 6e396ad760 | |||
| 2800de0b4a | |||
| 15f7e6a64d | |||
| 9768d13b88 | |||
| 8640d731aa | |||
| de81751e59 | |||
| 60c692cefc | |||
| 9a0baf3c78 | |||
| 55bdb9593a | |||
| 2009626fb4 | |||
| 18769afd37 | |||
| 31db5cf35e | |||
| 326f53f23d | |||
| 4f9bc8c868 | |||
| 259b4a3594 | |||
| 8646f68957 | |||
| a4a6c1e3d9 | |||
| 0e8250362e | |||
| 2f5da9fc5e | |||
| 87aeec5999 | |||
| f24500a6b5 | |||
| a7bb183222 | |||
| 8207cd7b5f | |||
| 11f1eefa8c | |||
| 62f496e9f8 | |||
| aef9e7769b | |||
| 58c2b9d996 | |||
| 2e8bcb630d | |||
| 338711181b | |||
| 43487f17e7 | |||
| 40205bf5c7 | |||
| b95f1a5bb7 | |||
| 12f7eb722b | |||
| 217bba1b76 | |||
| 967e05ed74 | |||
| c20245b0ab | |||
| b9db0c552e | |||
| 05045bafa2 | |||
| 39f1b5de30 | |||
| b06de6fdd3 | |||
| 940d0af35e | |||
| d6d31aa2e3 | |||
| 76edfe7577 | |||
| d190029728 | |||
| b8307e7a9d | |||
| a2fdf091f5 | |||
| 1914e6f28e | |||
| 82cbce9615 | |||
| dbb779b013 | |||
| ca908ee967 | |||
| 73c697b6c5 | |||
| 3e2357380b | |||
| cc4a103bae | |||
| 53fb10fdf5 | |||
| 2df2ad18c9 | |||
| b81a0d2a6c | |||
| 1a19088cfe | |||
| 172f6e950f | |||
| 4349ae217a | |||
| b3aa7b585f | |||
| 88a3c09b51 | |||
| 0504402a8a | |||
| ca58c18802 | |||
| 1ddc4b8132 | |||
| 5e1720ed32 | |||
| 77f127fe60 | |||
| 54fbd8d73d | |||
| 7ab17e26e2 | |||
| 28f5fa951c | |||
| 37f1c58969 | |||
| f006077a71 | |||
| d9fc8d632f | |||
| f640574e4f | |||
| 9a19424279 | |||
| ca8ba84e65 | |||
| f3d08d1c9e | |||
| 1e4263b793 | |||
| e931acb9f5 | |||
| 018409e71d | |||
| 1824f637b4 | |||
| a534c06b26 | |||
| d7b5c97a4e | |||
| ae447ac4a6 | |||
| d44ce3d124 | |||
| b1985d0eb2 | |||
| de778f680f | |||
| d7b368c7c6 | |||
| e790663f9f | |||
| 475319e248 | |||
| ca5ef384a8 | |||
| c93dbdc0f4 | |||
| 3a546a1e5c | |||
| 74c89d098c | |||
| 3301dbfe34 | |||
| d00d903e7d | |||
| babb5d61aa | |||
| 11d1c718b3 | |||
| a99b6b54c7 | |||
| 8da3d550eb | |||
| a76d0fc840 | |||
| 94cbb31c48 | |||
| ae60844a8a | |||
| 49fa310361 | |||
| 28e2180f7b | |||
| ce0590f015 | |||
| f50acca509 | |||
| 72fc82b91f | |||
| 46f8c32c0b | |||
| 691f62aa69 | |||
| cffc9805f9 | |||
| 917d6a401b | |||
| 2bd3a9beb6 | |||
| 5523c60866 | |||
| ab37ac7194 | |||
| f811a19d80 | |||
| 6239825f43 | |||
| b56245bb23 | |||
| 331c4e804d | |||
| 9fd945a481 | |||
| 26df08eeff | |||
| 5819dd6b25 | |||
| 6fb67f8615 | |||
| e70ae6f176 | |||
| a77f6d380c | |||
| 6aae2a1dab | |||
| 85ee0e1040 | |||
| c4f09e9ced | |||
| 64710fd4cd | |||
| 1f5e7465a3 | |||
| b290b21e23 |
@@ -0,0 +1,20 @@
|
|||||||
|
{
|
||||||
|
"permissions": {
|
||||||
|
"allow": [
|
||||||
|
"Edit(*)",
|
||||||
|
"Bash(pytest *)",
|
||||||
|
"Bash(python *)",
|
||||||
|
"Bash(python3 *)",
|
||||||
|
"Bash(.venv/bin/pytest *)",
|
||||||
|
"Bash(npm *)",
|
||||||
|
"Bash(git *)",
|
||||||
|
"Bash(ls *)",
|
||||||
|
"Bash(cat *)",
|
||||||
|
"Bash(grep *)",
|
||||||
|
"Bash(find *)",
|
||||||
|
"Bash(mkdir *)",
|
||||||
|
"Bash(touch *)",
|
||||||
|
"Bash(uv *)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -11,13 +11,7 @@ jobs:
|
|||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
# - name: Set up Python
|
|
||||||
# uses: actions/setup-python@v5
|
|
||||||
# with:
|
|
||||||
# python-version: '3.11'
|
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
# Use a generic run step for FreeBSD if actions/setup-python
|
|
||||||
# fails in restricted environments.
|
|
||||||
run: |
|
run: |
|
||||||
python3 --version
|
python3 --version
|
||||||
python3 -m ensurepip --upgrade
|
python3 -m ensurepip --upgrade
|
||||||
|
|||||||
@@ -12,3 +12,6 @@ dist/
|
|||||||
ssl/
|
ssl/
|
||||||
uv.lock
|
uv.lock
|
||||||
.hb.yaml
|
.hb.yaml
|
||||||
|
.superpowers/
|
||||||
|
rndc-key
|
||||||
|
docs/superpowers/
|
||||||
|
|||||||
+21
@@ -0,0 +1,21 @@
|
|||||||
|
# MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2002 - 2026 Andreas Wrede
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
@@ -53,6 +53,17 @@ See [User Management](USERS.md) for full authentication documentation.
|
|||||||
|--------|------|-------------|------|
|
|--------|------|-------------|------|
|
||||||
| `GET` | `/api/0/users` | List all users | Admin |
|
| `GET` | `/api/0/users` | List all users | Admin |
|
||||||
| `GET` | `/api/0/users/me` | Own profile | Authenticated |
|
| `GET` | `/api/0/users/me` | Own profile | Authenticated |
|
||||||
|
| `PUT` | `/api/0/users/me` | Update own profile | Authenticated |
|
||||||
|
|
||||||
|
### Notification Channels
|
||||||
|
|
||||||
|
| Method | Path | Description | Role |
|
||||||
|
|--------|------|-------------|------|
|
||||||
|
| `GET` | `/api/0/notification_channel_types` | Channel type schemas | Authenticated |
|
||||||
|
| `GET` | `/api/0/notification_channels` | List visible channels | Authenticated |
|
||||||
|
| `POST` | `/api/0/notification_channels` | Create a channel | Authenticated |
|
||||||
|
| `PUT` | `/api/0/notification_channels/{name}` | Update a channel | Owner or Admin |
|
||||||
|
| `DELETE` | `/api/0/notification_channels/{name}` | Delete a channel | Owner or Admin |
|
||||||
|
|
||||||
### Host Management
|
### Host Management
|
||||||
|
|
||||||
@@ -203,6 +214,101 @@ Changes take effect immediately but are not written back to the config file. Upd
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Notification Channel Endpoints
|
||||||
|
|
||||||
|
Channels are visible to all users by default. Channels marked `private: true` are only visible to their owner. Admins see all channels.
|
||||||
|
|
||||||
|
#### GET /api/0/notification_channel_types
|
||||||
|
Return the schema for every supported notifier type. Used by the web UI to dynamically render the channel creation form.
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"pushover": {
|
||||||
|
"label": "Pushover",
|
||||||
|
"fields": [
|
||||||
|
{"key": "token", "label": "App token", "type": "secret", "required": true},
|
||||||
|
{"key": "user", "label": "User key", "type": "secret", "required": true},
|
||||||
|
{"key": "sound", "label": "Sound", "type": "text", "required": false}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"email": { "label": "E-mail", "fields": [ ... ] },
|
||||||
|
...
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
#### GET /api/0/notification_channels
|
||||||
|
List channels visible to the current user (public channels + own private channels). Admins receive all channels.
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"name": "pushover_ops",
|
||||||
|
"type": "pushover",
|
||||||
|
"type_label": "Pushover",
|
||||||
|
"owner": null,
|
||||||
|
"private": false,
|
||||||
|
"min_level": "WARNING",
|
||||||
|
"fields": [
|
||||||
|
{"key": "token", "label": "App token", "value": "•••", "sensitive": true},
|
||||||
|
{"key": "user", "label": "User key", "value": "•••", "sensitive": true}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
Sensitive fields (`type: "secret"`) are always returned as `"•••"`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
#### POST /api/0/notification_channels
|
||||||
|
Create a new channel. The creating user becomes the channel's `owner`.
|
||||||
|
|
||||||
|
**Request body:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"name": "my_pushover",
|
||||||
|
"type": "pushover",
|
||||||
|
"token": "app-token",
|
||||||
|
"user": "user-key",
|
||||||
|
"min_level": "WARNING",
|
||||||
|
"private": true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:** `{"ok": true, "name": "my_pushover"}`
|
||||||
|
|
||||||
|
**Status codes:** `200 OK`, `400` (missing required field or unknown type), `409` (name already exists)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
#### PUT /api/0/notification_channels/{name}
|
||||||
|
Update an existing channel. Only the channel owner or an admin may update it.
|
||||||
|
|
||||||
|
Secret fields sent as `"•••"` are preserved from the existing config (same pattern as OAuth secrets in the admin config editor).
|
||||||
|
|
||||||
|
**Request body:** same shape as POST, `name` ignored (taken from URL).
|
||||||
|
|
||||||
|
**Response:** `{"ok": true}`
|
||||||
|
|
||||||
|
**Status codes:** `200 OK`, `403 Forbidden`, `404 Not Found`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
#### DELETE /api/0/notification_channels/{name}
|
||||||
|
Delete a channel. Only the channel owner or an admin may delete it.
|
||||||
|
|
||||||
|
**Response:** `{"ok": true}`
|
||||||
|
|
||||||
|
**Status codes:** `200 OK`, `403 Forbidden`, `404 Not Found`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
### Alert Endpoints
|
### Alert Endpoints
|
||||||
|
|
||||||
#### GET /api/0/hosts/{hostname}/alerts
|
#### GET /api/0/hosts/{hostname}/alerts
|
||||||
|
|||||||
@@ -104,11 +104,6 @@ The `nagios_runner` plugin collects:
|
|||||||
- `{name}_{metric}_min` - Minimum value (if present)
|
- `{name}_{metric}_min` - Minimum value (if present)
|
||||||
- `{name}_{metric}_max` - Maximum value (if present)
|
- `{name}_{metric}_max` - Maximum value (if present)
|
||||||
|
|
||||||
**Overall:**
|
|
||||||
- `overall_status` - Worst status from all commands
|
|
||||||
- `overall_status_code` - Worst status code
|
|
||||||
- `plugin_count` - Number of Nagios plugins executed
|
|
||||||
|
|
||||||
## Configuration Options
|
## Configuration Options
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
|
|||||||
+37
-7
@@ -30,9 +30,17 @@ Set `base_url` so notification links point to your hbd instance:
|
|||||||
base_url: https://hbd.example.com
|
base_url: https://hbd.example.com
|
||||||
```
|
```
|
||||||
|
|
||||||
### Global channel definitions
|
### Channel definitions
|
||||||
|
|
||||||
Define channels once; reference them by name from user configs:
|
Channels are defined under `notification_channels`. Each entry specifies a delivery type and its credentials. Two optional metadata fields control visibility:
|
||||||
|
|
||||||
|
| Field | Default | Description |
|
||||||
|
|---|---|---|
|
||||||
|
| `owner` | *(absent)* | Username who created/owns this channel. Absent = admin-created. |
|
||||||
|
| `private` | `false` | When `true`, only the owner can see and select this channel. |
|
||||||
|
| `min_level` | `WARNING` | Minimum alert level this channel receives. |
|
||||||
|
|
||||||
|
**Admin-created channels** (set in the config file or via the admin settings UI) are public by default — all users can select them:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
notification_channels:
|
notification_channels:
|
||||||
@@ -41,7 +49,7 @@ notification_channels:
|
|||||||
type: pushover
|
type: pushover
|
||||||
token: your-app-token
|
token: your-app-token
|
||||||
user: your-user-key
|
user: your-user-key
|
||||||
min_level: WARNING # optional, default: WARNING
|
min_level: WARNING
|
||||||
|
|
||||||
email_ops:
|
email_ops:
|
||||||
type: email
|
type: email
|
||||||
@@ -58,14 +66,14 @@ notification_channels:
|
|||||||
homeserver: https://matrix.example.org
|
homeserver: https://matrix.example.org
|
||||||
access_token: syt_xxx
|
access_token: syt_xxx
|
||||||
room_id: "!abc:matrix.example.org"
|
room_id: "!abc:matrix.example.org"
|
||||||
min_level: CRITICAL # only send critical alerts to this room
|
min_level: CRITICAL
|
||||||
|
|
||||||
sms_oncall:
|
sms_oncall:
|
||||||
type: sms_voipms
|
type: sms_voipms
|
||||||
api_user: me@example.com
|
api_user: me@example.com
|
||||||
api_password: secret
|
api_password: secret
|
||||||
did: "5551234567" # your voip.ms DID number
|
did: "5551234567"
|
||||||
dst: "5559876543" # destination number
|
dst: "5559876543"
|
||||||
min_level: CRITICAL
|
min_level: CRITICAL
|
||||||
|
|
||||||
signal_ops:
|
signal_ops:
|
||||||
@@ -82,9 +90,30 @@ notification_channels:
|
|||||||
username: heartbeat-bot
|
username: heartbeat-bot
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**User-created channels** are written by authenticated users through the API or their profile page. They carry an `owner` field and optionally `private: true`:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
notification_channels:
|
||||||
|
|
||||||
|
alice_personal:
|
||||||
|
type: pushover
|
||||||
|
token: personal-token
|
||||||
|
user: personal-key
|
||||||
|
owner: alice # created by alice
|
||||||
|
private: true # only alice can see this channel
|
||||||
|
```
|
||||||
|
|
||||||
|
### Channel visibility
|
||||||
|
|
||||||
|
| Channel | Who can see / select it |
|
||||||
|
|---|---|
|
||||||
|
| No `private` field (or `private: false`) | All users |
|
||||||
|
| `private: true` | Only the `owner` |
|
||||||
|
| Any channel | Admins always see everything |
|
||||||
|
|
||||||
### Users with notification channels
|
### Users with notification channels
|
||||||
|
|
||||||
Each user lists which global channels they receive notifications on:
|
Each user lists which channels they receive notifications on. Users can manage their own selection from the profile page:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
users:
|
users:
|
||||||
@@ -270,6 +299,7 @@ Called once at startup from `main.py`. Pass the running asyncio event loop so Ma
|
|||||||
- Check that the host has an `owner` or `managers` set
|
- Check that the host has an `owner` or `managers` set
|
||||||
- Check that users have `notification_channels` listed
|
- Check that users have `notification_channels` listed
|
||||||
- Check that the channel names in user config match keys under `notification_channels:`
|
- Check that the channel names in user config match keys under `notification_channels:`
|
||||||
|
- If a user can't select a channel, check whether it is `private: true` and owned by someone else
|
||||||
|
|
||||||
**min_level filtering too aggressive:**
|
**min_level filtering too aggressive:**
|
||||||
- Default is `WARNING` — both WARNING and CRITICAL are sent
|
- Default is `WARNING` — both WARNING and CRITICAL are sent
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ This guide explains how to create custom plugins for the Heartbeat monitoring sy
|
|||||||
- [Plugin Types](#plugin-types)
|
- [Plugin Types](#plugin-types)
|
||||||
- [Creating a Plugin](#creating-a-plugin)
|
- [Creating a Plugin](#creating-a-plugin)
|
||||||
- [Plugin Lifecycle](#plugin-lifecycle)
|
- [Plugin Lifecycle](#plugin-lifecycle)
|
||||||
|
- [Server-initiated InfoPlugin refresh](#server-initiated-infoplugin-refresh)
|
||||||
- [Configuration](#configuration)
|
- [Configuration](#configuration)
|
||||||
- [Best Practices](#best-practices)
|
- [Best Practices](#best-practices)
|
||||||
- [Examples](#examples)
|
- [Examples](#examples)
|
||||||
@@ -250,6 +251,28 @@ Understanding the plugin lifecycle helps you implement plugins correctly:
|
|||||||
└─> Plugin releases resources, closes connections
|
└─> Plugin releases resources, closes connections
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Server-initiated InfoPlugin refresh
|
||||||
|
|
||||||
|
When a heartbeat packet arrives from a host the server has no plugin data for (e.g. after a server restart), the server sets `request_update = 1` in the ACK reply. The client detects this flag and immediately re-runs all InfoPlugins — clearing their cached results first — then resends the data as PLG messages.
|
||||||
|
|
||||||
|
This means InfoPlugin data will always reach the server as soon as possible without requiring a client restart. No action is needed from plugin authors: the framework handles cache invalidation and re-collection automatically.
|
||||||
|
|
||||||
|
The lifecycle for this case looks like:
|
||||||
|
|
||||||
|
```
|
||||||
|
Server restarts, host reconnects
|
||||||
|
└─> hbd receives HTB with no existing plugin_data for host
|
||||||
|
└─> hbd sets request_update=1 in ACK
|
||||||
|
|
||||||
|
Client receives ACK
|
||||||
|
└─> Detects request_update flag
|
||||||
|
└─> Clears _cache on every registered InfoPlugin
|
||||||
|
└─> Calls collect() on each InfoPlugin
|
||||||
|
└─> Sends fresh PLG messages to server
|
||||||
|
```
|
||||||
|
|
||||||
|
If you write an `InfoPlugin` with side effects in `_collect_info()` (opening connections, writing files, etc.), be aware it may be called more than once per client session when this mechanism triggers.
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
### Plugin-Specific Configuration
|
### Plugin-Specific Configuration
|
||||||
|
|||||||
+224
-65
@@ -256,6 +256,56 @@ disk_monitor:
|
|||||||
operator: "<"
|
operator: "<"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### ZFS Monitor
|
||||||
|
|
||||||
|
ZFS pool health is checked automatically for every pool. A pool in any state
|
||||||
|
other than `ONLINE` (e.g. `DEGRADED`, `SUSPENDED`, `FAULTED`, `UNAVAIL`) raises
|
||||||
|
a **CRITICAL** alert by default — no configuration required.
|
||||||
|
|
||||||
|
The default threshold is equivalent to:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
zfs_monitor:
|
||||||
|
pools:
|
||||||
|
'*':
|
||||||
|
status:
|
||||||
|
warning: 1
|
||||||
|
critical: 2
|
||||||
|
operator: ">"
|
||||||
|
hysteresis: 0.0
|
||||||
|
display: "ZFS pool {pool_name} is {health}"
|
||||||
|
```
|
||||||
|
|
||||||
|
`'*'` matches every pool on the host. The notification message includes the pool
|
||||||
|
name and its current health string, e.g. `ZFS pool tank is DEGRADED`.
|
||||||
|
|
||||||
|
**Override for specific pools** — named pool entries take priority over `'*'`:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
zfs_monitor:
|
||||||
|
pools:
|
||||||
|
# Suppress health alerts for a scratch pool (not mission-critical)
|
||||||
|
scratch:
|
||||||
|
status:
|
||||||
|
enabled: false
|
||||||
|
|
||||||
|
# Capacity threshold for a specific pool
|
||||||
|
tank:
|
||||||
|
capacity:
|
||||||
|
warning: 75.0
|
||||||
|
critical: 90.0
|
||||||
|
operator: ">"
|
||||||
|
hysteresis: 0.05
|
||||||
|
```
|
||||||
|
|
||||||
|
**Alert state paths** follow the pattern `zfs_monitor.<pool_name>.status`,
|
||||||
|
so acknowledgements and silences target individual pools:
|
||||||
|
|
||||||
|
```
|
||||||
|
zfs_monitor.tank.status
|
||||||
|
zfs_monitor.backup.status
|
||||||
|
```
|
||||||
|
|
||||||
### Network Monitor
|
### Network Monitor
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
@@ -814,34 +864,32 @@ Planned features:
|
|||||||
|
|
||||||
## Multi-Threshold Configuration
|
## Multi-Threshold Configuration
|
||||||
|
|
||||||
**New in version 2.0**: Support for multiple named threshold configurations with per-host mapping.
|
Support for multiple named threshold configurations with per-host mapping and composable layering.
|
||||||
|
|
||||||
### Overview
|
### Overview
|
||||||
|
|
||||||
The multi-threshold feature allows you to:
|
The multi-threshold feature allows you to:
|
||||||
- Define multiple sets of threshold configurations
|
- Define multiple named threshold configurations
|
||||||
- Map different hosts to different threshold sets
|
- Assign one or more configurations to each host
|
||||||
|
- Compose configurations by layering — each named config's overrides are applied in order on top of the defaults
|
||||||
- Use different sensitivity levels for different environments
|
- Use different sensitivity levels for different environments
|
||||||
- Maintain a default configuration for unmapped hosts
|
|
||||||
|
|
||||||
### Configuration Structure
|
### Configuration Structure
|
||||||
|
|
||||||
|
Named configurations are defined under `threshold_configs`. Each host selects which ones to use via `threshold_config` in the `hosts` section (a string for a single config, or a list to layer multiple):
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
# Optional: Set the default configuration name (defaults to "default")
|
# Optional: set the default configuration name (defaults to "default")
|
||||||
default_threshold_config: "default"
|
default_threshold_config: "default"
|
||||||
|
|
||||||
# Define multiple named threshold configurations
|
|
||||||
threshold_configs:
|
threshold_configs:
|
||||||
# Configuration name 1
|
|
||||||
default:
|
default:
|
||||||
thresholds:
|
thresholds:
|
||||||
# Standard threshold definitions
|
|
||||||
cpu_monitor:
|
cpu_monitor:
|
||||||
cpu_percent:
|
cpu_percent:
|
||||||
warning: 80.0
|
warning: 80.0
|
||||||
critical: 90.0
|
critical: 90.0
|
||||||
|
|
||||||
# Configuration name 2
|
|
||||||
high_sensitivity:
|
high_sensitivity:
|
||||||
thresholds:
|
thresholds:
|
||||||
cpu_monitor:
|
cpu_monitor:
|
||||||
@@ -849,7 +897,6 @@ threshold_configs:
|
|||||||
warning: 60.0
|
warning: 60.0
|
||||||
critical: 75.0
|
critical: 75.0
|
||||||
|
|
||||||
# Configuration name 3
|
|
||||||
low_sensitivity:
|
low_sensitivity:
|
||||||
thresholds:
|
thresholds:
|
||||||
cpu_monitor:
|
cpu_monitor:
|
||||||
@@ -857,14 +904,77 @@ threshold_configs:
|
|||||||
warning: 90.0
|
warning: 90.0
|
||||||
critical: 95.0
|
critical: 95.0
|
||||||
|
|
||||||
# Map specific hosts to specific configurations
|
hosts:
|
||||||
host_threshold_mapping:
|
prod-web-01:
|
||||||
prod-web-01: high_sensitivity
|
threshold_config: high_sensitivity # single config
|
||||||
prod-web-02: high_sensitivity
|
|
||||||
dev-server-01: low_sensitivity
|
dev-server-01:
|
||||||
# Unmapped hosts use default_threshold_config
|
threshold_config: low_sensitivity
|
||||||
|
|
||||||
|
# Hosts with no threshold_config use default_threshold_config
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Composable Configurations (list form)
|
||||||
|
|
||||||
|
`threshold_config` can be a list. Configs are applied **left to right**: the defaults are the base, then each named config's overrides are layered on top. Later entries in the list win on any metric they define.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
threshold_configs:
|
||||||
|
default:
|
||||||
|
thresholds:
|
||||||
|
cpu_monitor:
|
||||||
|
cpu_percent: {warning: 80, critical: 90}
|
||||||
|
memory_monitor:
|
||||||
|
memory_percent: {warning: 85, critical: 95}
|
||||||
|
disk_monitor:
|
||||||
|
partitions:
|
||||||
|
/:
|
||||||
|
percent: {warning: 80, critical: 90}
|
||||||
|
|
||||||
|
# Tighter CPU limits for busy servers
|
||||||
|
high_cpu_load:
|
||||||
|
thresholds:
|
||||||
|
cpu_monitor:
|
||||||
|
cpu_percent: {warning: 60, critical: 75}
|
||||||
|
|
||||||
|
# Tighter disk limits for data-heavy servers
|
||||||
|
busy_disk:
|
||||||
|
thresholds:
|
||||||
|
disk_monitor:
|
||||||
|
partitions:
|
||||||
|
/:
|
||||||
|
percent: {warning: 70, critical: 85}
|
||||||
|
|
||||||
|
hosts:
|
||||||
|
# Gets default thresholds only
|
||||||
|
web-01:
|
||||||
|
threshold_config: default
|
||||||
|
|
||||||
|
# Gets tighter CPU limits, default memory and disk
|
||||||
|
build-server:
|
||||||
|
threshold_config: high_cpu_load
|
||||||
|
|
||||||
|
# Layers both: tighter CPU AND tighter disk, default memory
|
||||||
|
db-01:
|
||||||
|
threshold_config: [high_cpu_load, busy_disk]
|
||||||
|
|
||||||
|
# Three layers: busy_disk overrides high_cpu_load if they conflict
|
||||||
|
storage-01:
|
||||||
|
threshold_config: [default, high_cpu_load, busy_disk]
|
||||||
|
```
|
||||||
|
|
||||||
|
**How layering works:**
|
||||||
|
|
||||||
|
Starting from the `default` thresholds:
|
||||||
|
|
||||||
|
| Layer | Applied config | Effect |
|
||||||
|
|-------|---------------|--------|
|
||||||
|
| Base | `default` | all default thresholds |
|
||||||
|
| +1 | `high_cpu_load` | cpu_percent overridden to 60/75 |
|
||||||
|
| +2 | `busy_disk` | disk percent overridden to 70/85; cpu_percent stays at 60/75 |
|
||||||
|
|
||||||
|
Each named config only overrides the metrics it explicitly defines. Metrics not mentioned in a config inherit from the layers beneath.
|
||||||
|
|
||||||
### Use Cases
|
### Use Cases
|
||||||
|
|
||||||
#### 1. Environment-Based Thresholds
|
#### 1. Environment-Based Thresholds
|
||||||
@@ -887,11 +997,15 @@ threshold_configs:
|
|||||||
warning: 90.0 # More relaxed for dev
|
warning: 90.0 # More relaxed for dev
|
||||||
critical: 98.0
|
critical: 98.0
|
||||||
|
|
||||||
host_threshold_mapping:
|
hosts:
|
||||||
prod-web-01: production
|
prod-web-01:
|
||||||
prod-web-02: production
|
threshold_config: production
|
||||||
dev-web-01: development
|
prod-web-02:
|
||||||
dev-web-02: development
|
threshold_config: production
|
||||||
|
dev-web-01:
|
||||||
|
threshold_config: development
|
||||||
|
dev-web-02:
|
||||||
|
threshold_config: development
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 2. Server Role-Based Thresholds
|
#### 2. Server Role-Based Thresholds
|
||||||
@@ -914,7 +1028,7 @@ threshold_configs:
|
|||||||
warning: 70.0
|
warning: 70.0
|
||||||
critical: 85.0
|
critical: 85.0
|
||||||
memory_monitor:
|
memory_monitor:
|
||||||
percent:
|
memory_percent:
|
||||||
warning: 90.0 # Databases can use high memory
|
warning: 90.0 # Databases can use high memory
|
||||||
critical: 97.0
|
critical: 97.0
|
||||||
disk_monitor:
|
disk_monitor:
|
||||||
@@ -927,17 +1041,23 @@ threshold_configs:
|
|||||||
cache:
|
cache:
|
||||||
thresholds:
|
thresholds:
|
||||||
memory_monitor:
|
memory_monitor:
|
||||||
percent:
|
memory_percent:
|
||||||
warning: 95.0 # Redis/Memcached can use very high memory
|
warning: 95.0 # Redis/Memcached can use very high memory
|
||||||
critical: 99.0
|
critical: 99.0
|
||||||
|
|
||||||
host_threshold_mapping:
|
hosts:
|
||||||
web-01: webserver
|
web-01:
|
||||||
web-02: webserver
|
threshold_config: webserver
|
||||||
db-01: database
|
web-02:
|
||||||
db-02: database
|
threshold_config: webserver
|
||||||
redis-01: cache
|
db-01:
|
||||||
memcached-01: cache
|
threshold_config: database
|
||||||
|
db-02:
|
||||||
|
threshold_config: database
|
||||||
|
redis-01:
|
||||||
|
threshold_config: cache
|
||||||
|
memcached-01:
|
||||||
|
threshold_config: cache
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 3. Sensitivity Levels
|
#### 3. Sensitivity Levels
|
||||||
@@ -952,7 +1072,7 @@ threshold_configs:
|
|||||||
partitions:
|
partitions:
|
||||||
/:
|
/:
|
||||||
percent:
|
percent:
|
||||||
warning: 70.0 # Very sensitive
|
warning: 70.0
|
||||||
critical: 80.0
|
critical: 80.0
|
||||||
hysteresis: 0.15
|
hysteresis: 0.15
|
||||||
|
|
||||||
@@ -976,52 +1096,91 @@ threshold_configs:
|
|||||||
critical: 98.0
|
critical: 98.0
|
||||||
hysteresis: 0.05
|
hysteresis: 0.05
|
||||||
|
|
||||||
host_threshold_mapping:
|
hosts:
|
||||||
payment-gateway: critical
|
payment-gateway:
|
||||||
auth-server: critical
|
threshold_config: critical
|
||||||
web-01: standard
|
auth-server:
|
||||||
web-02: standard
|
threshold_config: critical
|
||||||
test-server: relaxed
|
web-01:
|
||||||
|
threshold_config: standard
|
||||||
|
web-02:
|
||||||
|
threshold_config: standard
|
||||||
|
test-server:
|
||||||
|
threshold_config: relaxed
|
||||||
```
|
```
|
||||||
|
|
||||||
### Backward Compatibility
|
#### 4. Composable Profiles
|
||||||
|
|
||||||
The legacy single threshold configuration is fully supported:
|
Build host-specific thresholds by combining small, focused configs:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
# Old format - still works
|
|
||||||
thresholds:
|
|
||||||
cpu_monitor:
|
|
||||||
cpu_percent:
|
|
||||||
warning: 80.0
|
|
||||||
critical: 90.0
|
|
||||||
```
|
|
||||||
|
|
||||||
This is equivalent to:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
# New format
|
|
||||||
threshold_configs:
|
threshold_configs:
|
||||||
|
# Baseline — everything at default levels
|
||||||
default:
|
default:
|
||||||
thresholds:
|
thresholds:
|
||||||
cpu_monitor:
|
cpu_monitor:
|
||||||
cpu_percent:
|
cpu_percent: {warning: 80, critical: 90}
|
||||||
warning: 80.0
|
memory_monitor:
|
||||||
critical: 90.0
|
memory_percent: {warning: 85, critical: 95}
|
||||||
```
|
|
||||||
|
|
||||||
|
# Overlay: tighter CPU only
|
||||||
|
tight_cpu:
|
||||||
|
thresholds:
|
||||||
|
cpu_monitor:
|
||||||
|
cpu_percent: {warning: 60, critical: 75}
|
||||||
|
|
||||||
|
# Overlay: tighter memory only
|
||||||
|
tight_memory:
|
||||||
|
thresholds:
|
||||||
|
memory_monitor:
|
||||||
|
memory_percent: {warning: 70, critical: 85}
|
||||||
|
|
||||||
|
# Overlay: extra disk partition for database servers
|
||||||
|
db_disk:
|
||||||
|
thresholds:
|
||||||
|
disk_monitor:
|
||||||
|
partitions:
|
||||||
|
/var/lib/postgresql:
|
||||||
|
percent: {warning: 75, critical: 88}
|
||||||
|
|
||||||
|
hosts:
|
||||||
|
# Plain web server
|
||||||
|
web-01:
|
||||||
|
threshold_config: default
|
||||||
|
|
||||||
|
# Build server: tight CPU, default memory and disk
|
||||||
|
build-01:
|
||||||
|
threshold_config: tight_cpu
|
||||||
|
|
||||||
|
# Database: tight CPU + tight memory + extra disk partition
|
||||||
|
db-01:
|
||||||
|
threshold_config: [tight_cpu, tight_memory, db_disk]
|
||||||
|
|
||||||
|
# Replica database: tight memory + extra disk, normal CPU
|
||||||
|
db-02:
|
||||||
|
threshold_config: [tight_memory, db_disk]
|
||||||
|
```
|
||||||
### Configuration Priority
|
### Configuration Priority
|
||||||
|
|
||||||
1. **Host-specific mapping**: If host is in `host_threshold_mapping`, use that config
|
1. **Host `threshold_config` (list)**: Layer each named config's overrides left-to-right on top of the defaults
|
||||||
2. **Default config**: Use `default_threshold_config`
|
2. **Host `threshold_config` (string)**: Use that single named config directly
|
||||||
3. **First alphabetically**: If default not found, use first config alphabetically
|
3. **`host_threshold_mapping`** (legacy): Same as above, string only
|
||||||
4. **Legacy fallback**: If `threshold_configs` not present, use `thresholds`
|
4. **`default_threshold_config`**: Used for hosts with no mapping
|
||||||
|
5. **First alphabetically**: If the default config is not found, use the first config alphabetically
|
||||||
|
6. **Legacy `thresholds` section**: Used when `threshold_configs` is absent entirely
|
||||||
|
|
||||||
### Example: Complete Multi-Threshold Setup
|
### Backward Compatibility
|
||||||
|
|
||||||
See `hbd/config_multi_threshold_example.yaml` for a complete example with:
|
The legacy `host_threshold_mapping` top-level key and the flat `thresholds` section are still fully supported:
|
||||||
- 4 named configurations (default, high_sensitivity, low_sensitivity, database)
|
|
||||||
- Host-to-config mappings for production, development, and test systems
|
```yaml
|
||||||
- Specialized database server thresholds
|
# Still works — equivalent to hosts: {prod-web-01: {threshold_config: high_sensitivity}}
|
||||||
- Custom display messages with plugin data
|
host_threshold_mapping:
|
||||||
|
prod-web-01: high_sensitivity
|
||||||
|
|
||||||
|
# Still works — equivalent to threshold_configs: {default: {thresholds: ...}}
|
||||||
|
thresholds:
|
||||||
|
cpu_monitor:
|
||||||
|
cpu_percent: {warning: 80, critical: 90}
|
||||||
|
```
|
||||||
|
|
||||||
|
|||||||
+45
-1
@@ -36,7 +36,7 @@ users:
|
|||||||
bob:
|
bob:
|
||||||
full_name: Bob Smith
|
full_name: Bob Smith
|
||||||
password: pbkdf2:sha256:...
|
password: pbkdf2:sha256:...
|
||||||
notification_channels: [pushover_standard]
|
notification_channels: [pushover_standard] # channels bob has selected
|
||||||
|
|
||||||
carol:
|
carol:
|
||||||
full_name: Carol Jones
|
full_name: Carol Jones
|
||||||
@@ -46,6 +46,24 @@ default_owner: andreas # owns hosts with no explicit owner
|
|||||||
# falls back to the first admin user if omitted
|
# falls back to the first admin user if omitted
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Client-declared host ownership
|
||||||
|
|
||||||
|
A host can declare its own owner directly in the hbc or hbc_mini client configuration. This is useful for hosts that are not listed in the server config, or during initial setup before a server-side config entry has been created.
|
||||||
|
|
||||||
|
**`~/.hbc.yaml`** (hbc):
|
||||||
|
```yaml
|
||||||
|
owner: andreas
|
||||||
|
```
|
||||||
|
|
||||||
|
**`~/.hbc.json`** (hbc_mini):
|
||||||
|
```json
|
||||||
|
{ "owner": "andreas" }
|
||||||
|
```
|
||||||
|
|
||||||
|
When set, the value is included in the `os_info` plugin data sent to the server. The server applies it as `host.owner` the first time `os_info` arrives, provided no owner has been configured server-side for that host. Server-configured ownership always takes precedence.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
### Assigning roles to hosts
|
### Assigning roles to hosts
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
@@ -170,6 +188,32 @@ Return the currently authenticated user's profile.
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
#### PUT /api/0/users/me
|
||||||
|
Update the current user's profile. All fields are optional — send only what you want to change.
|
||||||
|
|
||||||
|
**Update display name and avatar:**
|
||||||
|
```json
|
||||||
|
{ "full_name": "Carol Jones", "avatar": "/avatars/carol.png" }
|
||||||
|
```
|
||||||
|
|
||||||
|
**Change notification channel selection:**
|
||||||
|
```json
|
||||||
|
{ "notification_channels": ["pushover_ops", "email_ops"] }
|
||||||
|
```
|
||||||
|
Only channels visible to the user (public + own private) are accepted; others are silently dropped.
|
||||||
|
|
||||||
|
**Change password:**
|
||||||
|
```json
|
||||||
|
{ "password": { "current": "oldpass", "new": "newpass" } }
|
||||||
|
```
|
||||||
|
Requires the correct current password. New password is hashed before storage.
|
||||||
|
|
||||||
|
**Response:** `{"ok": true}`
|
||||||
|
|
||||||
|
**Status codes:** `200 OK`, `400` (missing/invalid field), `401` (unauthenticated), `403` (wrong current password)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
### Host Access
|
### Host Access
|
||||||
|
|
||||||
#### GET /api/0/hosts/{hostname}/access
|
#### GET /api/0/hosts/{hostname}/access
|
||||||
|
|||||||
@@ -1,602 +0,0 @@
|
|||||||
# Plugin Error Checking Implementation Plan
|
|
||||||
|
|
||||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
|
||||||
|
|
||||||
**Goal:** Improve plugin error checking in hbc, especially for nagios_runner, and fix logger messages silently discarded in daemon mode.
|
|
||||||
|
|
||||||
**Architecture:** Three focused changes across three files: (1) `hbd/client/plugin.py` gains a `skip_reason` attribute on Plugin and updated PluginLoader messaging; (2) `hbd/client/plugins/nagios_runner.py` gains async subprocess execution, stderr capture, signal-killed process handling, and init-time command path validation; (3) `hbd/client/main.py` gains proper post-fork logging reconfiguration to syslog.
|
|
||||||
|
|
||||||
**Tech Stack:** Python 3.11+, asyncio, `logging.handlers.SysLogHandler`, pytest
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## File Map
|
|
||||||
|
|
||||||
| Action | Path | What changes |
|
|
||||||
|---|---|---|
|
|
||||||
| Modify | `hbd/client/plugin.py` | `Plugin.__init__` gains `skip_reason`; `PluginLoader` checks it |
|
|
||||||
| Modify | `hbd/client/plugins/nagios_runner.py` | async subprocess, stderr, signal codes, init validation, `skip_reason` |
|
|
||||||
| Modify | `hbd/client/main.py` | `_reconfigure_logging_for_daemon()` helper; remove redundant syslog calls |
|
|
||||||
| Create | `tests/test_plugin.py` | PluginLoader messaging tests |
|
|
||||||
| Create | `tests/test_nagios_runner.py` | NagiosRunnerPlugin behaviour tests |
|
|
||||||
|
|
||||||
Run tests throughout with:
|
|
||||||
```bash
|
|
||||||
python -m pytest tests/test_plugin.py tests/test_nagios_runner.py -v
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Task 1: Plugin.skip_reason + PluginLoader messaging
|
|
||||||
|
|
||||||
**Files:**
|
|
||||||
- Modify: `hbd/client/plugin.py:40-48` (Plugin.__init__)
|
|
||||||
- Modify: `hbd/client/plugin.py:369-381` (PluginLoader.load_from_directory)
|
|
||||||
- Create: `tests/test_plugin.py`
|
|
||||||
|
|
||||||
- [ ] **Step 1: Write failing tests**
|
|
||||||
|
|
||||||
Create `tests/test_plugin.py`:
|
|
||||||
|
|
||||||
```python
|
|
||||||
import asyncio
|
|
||||||
import logging
|
|
||||||
import textwrap
|
|
||||||
|
|
||||||
from hbd.client.plugin import Plugin, PluginLoader, PluginRegistry
|
|
||||||
|
|
||||||
|
|
||||||
def test_plugin_skip_reason_defaults_none(tmp_path):
|
|
||||||
plugin_code = textwrap.dedent("""
|
|
||||||
from hbd.client.plugin import MonitorPlugin
|
|
||||||
|
|
||||||
class MinimalPlugin(MonitorPlugin):
|
|
||||||
name = "minimal"
|
|
||||||
version = "1.0.0"
|
|
||||||
interval = 60
|
|
||||||
|
|
||||||
async def initialize(self):
|
|
||||||
return True
|
|
||||||
|
|
||||||
async def _collect_metrics(self):
|
|
||||||
return {}
|
|
||||||
""")
|
|
||||||
(tmp_path / "minimal.py").write_text(plugin_code)
|
|
||||||
registry = PluginRegistry()
|
|
||||||
loader = PluginLoader(registry)
|
|
||||||
asyncio.run(loader.load_from_directory(tmp_path))
|
|
||||||
plugin = registry.get("minimal")
|
|
||||||
assert plugin is not None
|
|
||||||
assert plugin.skip_reason is None
|
|
||||||
|
|
||||||
|
|
||||||
def test_loader_logs_info_when_skip_reason_set(tmp_path, caplog):
|
|
||||||
plugin_code = textwrap.dedent("""
|
|
||||||
from hbd.client.plugin import MonitorPlugin
|
|
||||||
|
|
||||||
class SkippablePlugin(MonitorPlugin):
|
|
||||||
name = "skippable"
|
|
||||||
version = "1.0.0"
|
|
||||||
interval = 60
|
|
||||||
|
|
||||||
async def initialize(self):
|
|
||||||
self.skip_reason = "not configured in yaml"
|
|
||||||
return False
|
|
||||||
|
|
||||||
async def _collect_metrics(self):
|
|
||||||
return {}
|
|
||||||
""")
|
|
||||||
(tmp_path / "skippable.py").write_text(plugin_code)
|
|
||||||
registry = PluginRegistry()
|
|
||||||
loader = PluginLoader(registry)
|
|
||||||
|
|
||||||
with caplog.at_level(logging.INFO, logger="plugin.loader"):
|
|
||||||
count = asyncio.run(loader.load_from_directory(tmp_path))
|
|
||||||
|
|
||||||
assert count == 0
|
|
||||||
assert any("skipped: not configured in yaml" in r.message for r in caplog.records)
|
|
||||||
assert not any("failed initialization" in r.message for r in caplog.records)
|
|
||||||
|
|
||||||
|
|
||||||
def test_loader_logs_warning_when_no_skip_reason(tmp_path, caplog):
|
|
||||||
plugin_code = textwrap.dedent("""
|
|
||||||
from hbd.client.plugin import MonitorPlugin
|
|
||||||
|
|
||||||
class FailPlugin(MonitorPlugin):
|
|
||||||
name = "fail"
|
|
||||||
version = "1.0.0"
|
|
||||||
interval = 60
|
|
||||||
|
|
||||||
async def initialize(self):
|
|
||||||
return False
|
|
||||||
|
|
||||||
async def _collect_metrics(self):
|
|
||||||
return {}
|
|
||||||
""")
|
|
||||||
(tmp_path / "fail_plugin.py").write_text(plugin_code)
|
|
||||||
registry = PluginRegistry()
|
|
||||||
loader = PluginLoader(registry)
|
|
||||||
|
|
||||||
with caplog.at_level(logging.WARNING, logger="plugin.loader"):
|
|
||||||
count = asyncio.run(loader.load_from_directory(tmp_path))
|
|
||||||
|
|
||||||
assert count == 0
|
|
||||||
assert any("failed initialization" in r.message for r in caplog.records)
|
|
||||||
```
|
|
||||||
|
|
||||||
- [ ] **Step 2: Run tests to verify they fail**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python -m pytest tests/test_plugin.py -v
|
|
||||||
```
|
|
||||||
Expected: `test_plugin_skip_reason_defaults_none` FAILS (attribute missing), others may error.
|
|
||||||
|
|
||||||
- [ ] **Step 3: Add `skip_reason` to `Plugin.__init__`**
|
|
||||||
|
|
||||||
In `hbd/client/plugin.py`, in `Plugin.__init__` (around line 46), add one line:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
|
||||||
self.config = config or {}
|
|
||||||
self.logger = logging.getLogger(f"plugin.{self.name}")
|
|
||||||
self._initialized = False
|
|
||||||
self.skip_reason: Optional[str] = None
|
|
||||||
```
|
|
||||||
|
|
||||||
- [ ] **Step 4: Update PluginLoader messaging**
|
|
||||||
|
|
||||||
In `hbd/client/plugin.py`, replace the `if not initialized:` block (around line 372):
|
|
||||||
|
|
||||||
```python
|
|
||||||
if not initialized:
|
|
||||||
if plugin.skip_reason:
|
|
||||||
self.logger.info(
|
|
||||||
f"Plugin {plugin.name} skipped: {plugin.skip_reason}"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
self.logger.warning(
|
|
||||||
f"Plugin {plugin.name} failed initialization, skipping"
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
```
|
|
||||||
|
|
||||||
- [ ] **Step 5: Run tests to verify they pass**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python -m pytest tests/test_plugin.py -v
|
|
||||||
```
|
|
||||||
Expected: all 3 tests PASS.
|
|
||||||
|
|
||||||
- [ ] **Step 6: Commit**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git add hbd/client/plugin.py tests/test_plugin.py
|
|
||||||
git commit -m "feat: add skip_reason to Plugin; improve PluginLoader init messaging"
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Task 2: NagiosRunnerPlugin — skip_reason when no commands
|
|
||||||
|
|
||||||
**Files:**
|
|
||||||
- Modify: `hbd/client/plugins/nagios_runner.py:88-105` (initialize)
|
|
||||||
- Modify: `tests/test_nagios_runner.py` (create)
|
|
||||||
|
|
||||||
- [ ] **Step 1: Write failing test**
|
|
||||||
|
|
||||||
Create `tests/test_nagios_runner.py`:
|
|
||||||
|
|
||||||
```python
|
|
||||||
import asyncio
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import stat
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from hbd.client.plugins.nagios_runner import (
|
|
||||||
NagiosRunnerPlugin,
|
|
||||||
NAGIOS_OK,
|
|
||||||
NAGIOS_WARNING,
|
|
||||||
NAGIOS_CRITICAL,
|
|
||||||
NAGIOS_UNKNOWN,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_no_commands_sets_skip_reason():
|
|
||||||
plugin = NagiosRunnerPlugin(config={"commands": []})
|
|
||||||
result = asyncio.run(plugin.initialize())
|
|
||||||
assert result is False
|
|
||||||
assert plugin.skip_reason is not None
|
|
||||||
assert "nagios_runner.commands" in plugin.skip_reason
|
|
||||||
```
|
|
||||||
|
|
||||||
- [ ] **Step 2: Run test to verify it fails**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python -m pytest tests/test_nagios_runner.py::test_no_commands_sets_skip_reason -v
|
|
||||||
```
|
|
||||||
Expected: FAIL — `plugin.skip_reason` is `None`.
|
|
||||||
|
|
||||||
- [ ] **Step 3: Set skip_reason in NagiosRunnerPlugin.initialize()**
|
|
||||||
|
|
||||||
In `hbd/client/plugins/nagios_runner.py`, replace the early-return block in `initialize()` (around line 96):
|
|
||||||
|
|
||||||
```python
|
|
||||||
if not self.commands:
|
|
||||||
self.skip_reason = "no commands configured (add nagios_runner.commands to config)"
|
|
||||||
self.logger.info("No Nagios commands configured")
|
|
||||||
return False
|
|
||||||
```
|
|
||||||
|
|
||||||
- [ ] **Step 4: Run test to verify it passes**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python -m pytest tests/test_nagios_runner.py::test_no_commands_sets_skip_reason -v
|
|
||||||
```
|
|
||||||
Expected: PASS.
|
|
||||||
|
|
||||||
- [ ] **Step 5: Commit**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git add hbd/client/plugins/nagios_runner.py tests/test_nagios_runner.py
|
|
||||||
git commit -m "feat: set skip_reason on nagios_runner when no commands configured"
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Task 3: NagiosRunnerPlugin — async subprocess, stderr capture, negative return codes
|
|
||||||
|
|
||||||
**Files:**
|
|
||||||
- Modify: `hbd/client/plugins/nagios_runner.py` (imports + `_run_nagios_plugin`)
|
|
||||||
- Modify: `tests/test_nagios_runner.py`
|
|
||||||
|
|
||||||
- [ ] **Step 1: Write failing tests**
|
|
||||||
|
|
||||||
Append to `tests/test_nagios_runner.py`:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def test_stderr_used_when_stdout_empty(tmp_path):
|
|
||||||
script = tmp_path / "check_err.sh"
|
|
||||||
script.write_text("#!/bin/sh\necho 'error from stderr' >&2\nexit 2\n")
|
|
||||||
script.chmod(script.stat().st_mode | stat.S_IEXEC)
|
|
||||||
|
|
||||||
config = {"commands": [{"name": "t", "command": str(script)}], "timeout": 5}
|
|
||||||
plugin = NagiosRunnerPlugin(config=config)
|
|
||||||
asyncio.run(plugin.initialize())
|
|
||||||
data = asyncio.run(plugin._collect_metrics())
|
|
||||||
|
|
||||||
assert "error from stderr" in data["t_output"]
|
|
||||||
assert data["t_status_code"] == NAGIOS_CRITICAL
|
|
||||||
|
|
||||||
|
|
||||||
def test_stderr_appended_when_both_present(tmp_path):
|
|
||||||
script = tmp_path / "check_both.sh"
|
|
||||||
script.write_text("#!/bin/sh\necho 'OK - all good'\necho 'extra detail' >&2\nexit 0\n")
|
|
||||||
script.chmod(script.stat().st_mode | stat.S_IEXEC)
|
|
||||||
|
|
||||||
config = {"commands": [{"name": "t", "command": str(script)}], "timeout": 5}
|
|
||||||
plugin = NagiosRunnerPlugin(config=config)
|
|
||||||
asyncio.run(plugin.initialize())
|
|
||||||
data = asyncio.run(plugin._collect_metrics())
|
|
||||||
|
|
||||||
assert "OK - all good" in data["t_output"]
|
|
||||||
assert "extra detail" in data["t_output"]
|
|
||||||
assert data["t_status_code"] == NAGIOS_OK
|
|
||||||
|
|
||||||
|
|
||||||
def test_negative_returncode_maps_to_unknown():
|
|
||||||
# kill -9 $$ kills the shell itself; asyncio sees returncode -9
|
|
||||||
config = {"commands": [{"name": "t", "command": "kill -9 $$"}], "timeout": 5}
|
|
||||||
plugin = NagiosRunnerPlugin(config=config)
|
|
||||||
asyncio.run(plugin.initialize())
|
|
||||||
data = asyncio.run(plugin._collect_metrics())
|
|
||||||
|
|
||||||
assert data["t_status_code"] == NAGIOS_UNKNOWN
|
|
||||||
assert "signal" in data["t_output"].lower()
|
|
||||||
```
|
|
||||||
|
|
||||||
- [ ] **Step 2: Run tests to verify they fail**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python -m pytest tests/test_nagios_runner.py::test_stderr_used_when_stdout_empty \
|
|
||||||
tests/test_nagios_runner.py::test_stderr_appended_when_both_present \
|
|
||||||
tests/test_nagios_runner.py::test_negative_returncode_maps_to_unknown -v
|
|
||||||
```
|
|
||||||
Expected: all FAIL — current implementation ignores stderr and doesn't handle negative codes.
|
|
||||||
|
|
||||||
- [ ] **Step 3: Update imports in nagios_runner.py**
|
|
||||||
|
|
||||||
Replace the import block at the top of `hbd/client/plugins/nagios_runner.py`:
|
|
||||||
|
|
||||||
```python
|
|
||||||
import asyncio
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
from typing import Any, Dict, List, Optional, Tuple
|
|
||||||
|
|
||||||
from hbd.client.plugin import MonitorPlugin
|
|
||||||
```
|
|
||||||
|
|
||||||
(Remove `import subprocess`; add `import asyncio` and `import os`.)
|
|
||||||
|
|
||||||
- [ ] **Step 4: Upgrade collection log level from DEBUG to INFO**
|
|
||||||
|
|
||||||
In `hbd/client/plugins/nagios_runner.py`, in `_collect_metrics()`, change the debug log (around line 144) so results are visible at INFO level:
|
|
||||||
|
|
||||||
```python
|
|
||||||
self.logger.info(
|
|
||||||
f"Executed {name}: {STATUS_NAMES.get(status_code, 'UNKNOWN')} - {output[:50]}"
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
- [ ] **Step 5: Replace `_run_nagios_plugin` with async implementation**
|
|
||||||
|
|
||||||
Replace the entire `_run_nagios_plugin` method in `hbd/client/plugins/nagios_runner.py`:
|
|
||||||
|
|
||||||
```python
|
|
||||||
async def _run_nagios_plugin(
|
|
||||||
self,
|
|
||||||
command: str
|
|
||||||
) -> Tuple[int, str, Dict[str, Any]]:
|
|
||||||
"""Execute a Nagios plugin and parse its output."""
|
|
||||||
try:
|
|
||||||
proc = await asyncio.create_subprocess_shell(
|
|
||||||
command,
|
|
||||||
stdout=asyncio.subprocess.PIPE,
|
|
||||||
stderr=asyncio.subprocess.PIPE,
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
stdout_bytes, stderr_bytes = await asyncio.wait_for(
|
|
||||||
proc.communicate(), timeout=self.timeout
|
|
||||||
)
|
|
||||||
except asyncio.TimeoutError:
|
|
||||||
proc.kill()
|
|
||||||
await proc.communicate()
|
|
||||||
self.logger.error(f"Command timed out: {command}")
|
|
||||||
return NAGIOS_UNKNOWN, f"Command timed out after {self.timeout}s", {}
|
|
||||||
|
|
||||||
status_code = proc.returncode
|
|
||||||
|
|
||||||
if status_code < 0:
|
|
||||||
return NAGIOS_UNKNOWN, f"Process killed by signal {-status_code}", {}
|
|
||||||
|
|
||||||
if status_code > 3:
|
|
||||||
status_code = NAGIOS_UNKNOWN
|
|
||||||
|
|
||||||
stdout = stdout_bytes.decode(errors="replace").strip()
|
|
||||||
stderr = stderr_bytes.decode(errors="replace").strip()
|
|
||||||
|
|
||||||
# Parse perfdata from stdout before mixing in stderr
|
|
||||||
perfdata = self._parse_perfdata(stdout)
|
|
||||||
|
|
||||||
# Build status message
|
|
||||||
status_part = stdout.split('|')[0].strip() if '|' in stdout else stdout
|
|
||||||
|
|
||||||
if not stdout and stderr:
|
|
||||||
output_msg = stderr
|
|
||||||
elif stdout and stderr:
|
|
||||||
output_msg = f"{status_part} [stderr: {stderr}]"
|
|
||||||
else:
|
|
||||||
output_msg = status_part
|
|
||||||
|
|
||||||
return status_code, output_msg, perfdata
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error(f"Error executing command: {e}")
|
|
||||||
return NAGIOS_UNKNOWN, f"Execution error: {str(e)}", {}
|
|
||||||
```
|
|
||||||
|
|
||||||
Also remove the now-unused `self.shell` line from `__init__` (the `shell` config key is no longer used since `create_subprocess_shell` always uses a shell):
|
|
||||||
|
|
||||||
In `NagiosRunnerPlugin.__init__`, remove:
|
|
||||||
```python
|
|
||||||
self.shell: bool = config.get("shell", True) if config else True
|
|
||||||
```
|
|
||||||
|
|
||||||
- [ ] **Step 6: Run tests to verify they pass**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python -m pytest tests/test_nagios_runner.py -v
|
|
||||||
```
|
|
||||||
Expected: all tests PASS including the 3 new ones.
|
|
||||||
|
|
||||||
- [ ] **Step 7: Commit**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git add hbd/client/plugins/nagios_runner.py tests/test_nagios_runner.py
|
|
||||||
git commit -m "feat: async subprocess in nagios_runner with stderr capture and signal handling"
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Task 4: NagiosRunnerPlugin — command path validation at init
|
|
||||||
|
|
||||||
**Files:**
|
|
||||||
- Modify: `hbd/client/plugins/nagios_runner.py` (initialize)
|
|
||||||
- Modify: `tests/test_nagios_runner.py`
|
|
||||||
|
|
||||||
- [ ] **Step 1: Write failing tests**
|
|
||||||
|
|
||||||
Append to `tests/test_nagios_runner.py`:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def test_absolute_path_not_found_warns(caplog):
|
|
||||||
fake_cmd = "/nonexistent_hbc_test_path/check_something"
|
|
||||||
config = {"commands": [{"name": "t", "command": fake_cmd}]}
|
|
||||||
plugin = NagiosRunnerPlugin(config=config)
|
|
||||||
|
|
||||||
with caplog.at_level(logging.WARNING, logger="plugin.nagios_runner"):
|
|
||||||
asyncio.run(plugin.initialize())
|
|
||||||
|
|
||||||
assert any("not found" in r.message for r in caplog.records)
|
|
||||||
|
|
||||||
|
|
||||||
def test_absolute_path_not_executable_warns(caplog, tmp_path):
|
|
||||||
non_exec = tmp_path / "check_test"
|
|
||||||
non_exec.write_text("#!/bin/sh\necho OK\n")
|
|
||||||
non_exec.chmod(0o644) # readable but not executable
|
|
||||||
|
|
||||||
config = {"commands": [{"name": "t", "command": str(non_exec)}]}
|
|
||||||
plugin = NagiosRunnerPlugin(config=config)
|
|
||||||
|
|
||||||
with caplog.at_level(logging.WARNING, logger="plugin.nagios_runner"):
|
|
||||||
asyncio.run(plugin.initialize())
|
|
||||||
|
|
||||||
assert any("not executable" in r.message for r in caplog.records)
|
|
||||||
|
|
||||||
|
|
||||||
def test_relative_path_not_checked(caplog):
|
|
||||||
# Relative paths (resolved via PATH) must not generate warnings
|
|
||||||
config = {"commands": [{"name": "t", "command": "echo OK"}]}
|
|
||||||
plugin = NagiosRunnerPlugin(config=config)
|
|
||||||
|
|
||||||
with caplog.at_level(logging.WARNING, logger="plugin.nagios_runner"):
|
|
||||||
asyncio.run(plugin.initialize())
|
|
||||||
|
|
||||||
assert not any(
|
|
||||||
"not found" in r.message or "not executable" in r.message
|
|
||||||
for r in caplog.records
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
- [ ] **Step 2: Run tests to verify they fail**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python -m pytest tests/test_nagios_runner.py::test_absolute_path_not_found_warns \
|
|
||||||
tests/test_nagios_runner.py::test_absolute_path_not_executable_warns \
|
|
||||||
tests/test_nagios_runner.py::test_relative_path_not_checked -v
|
|
||||||
```
|
|
||||||
Expected: `test_absolute_path_not_found_warns` and `test_absolute_path_not_executable_warns` FAIL (no warnings logged); `test_relative_path_not_checked` may pass.
|
|
||||||
|
|
||||||
- [ ] **Step 3: Add command path validation to `initialize()`**
|
|
||||||
|
|
||||||
In `hbd/client/plugins/nagios_runner.py`, extend `initialize()` by adding validation after the existing "log each command" loop (after line 103, before `return True`):
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Validate absolute command paths early
|
|
||||||
for cmd_config in self.commands:
|
|
||||||
name = cmd_config.get("name", "unnamed")
|
|
||||||
command = cmd_config.get("command", "")
|
|
||||||
if not command:
|
|
||||||
continue
|
|
||||||
exe = command.split()[0]
|
|
||||||
if os.path.isabs(exe):
|
|
||||||
if not os.path.isfile(exe):
|
|
||||||
self.logger.warning(
|
|
||||||
f"Command '{name}': executable not found: {exe}"
|
|
||||||
)
|
|
||||||
elif not os.access(exe, os.X_OK):
|
|
||||||
self.logger.warning(
|
|
||||||
f"Command '{name}': executable not executable: {exe}"
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
- [ ] **Step 4: Run full test suite to verify all pass**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python -m pytest tests/test_plugin.py tests/test_nagios_runner.py -v
|
|
||||||
```
|
|
||||||
Expected: all tests PASS.
|
|
||||||
|
|
||||||
- [ ] **Step 5: Commit**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git add hbd/client/plugins/nagios_runner.py tests/test_nagios_runner.py
|
|
||||||
git commit -m "feat: validate absolute command paths at nagios_runner init"
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Task 5: Daemon mode logging — route to syslog after fork
|
|
||||||
|
|
||||||
**Files:**
|
|
||||||
- Modify: `hbd/client/main.py` (new helper + updated daemon block)
|
|
||||||
|
|
||||||
No automated test for daemonization itself (fork behaviour is hard to unit-test). Manual verification steps are provided below.
|
|
||||||
|
|
||||||
- [ ] **Step 1: Add `_reconfigure_logging_for_daemon` helper**
|
|
||||||
|
|
||||||
In `hbd/client/main.py`, add this function just before `def build_parser()` (around line 589):
|
|
||||||
|
|
||||||
```python
|
|
||||||
def _reconfigure_logging_for_daemon(log_level: int) -> None:
|
|
||||||
"""Replace StreamHandlers (now writing to /dev/null) with a SysLogHandler."""
|
|
||||||
from logging.handlers import SysLogHandler
|
|
||||||
|
|
||||||
root = logging.getLogger()
|
|
||||||
for handler in root.handlers[:]:
|
|
||||||
root.removeHandler(handler)
|
|
||||||
handler.close()
|
|
||||||
|
|
||||||
try:
|
|
||||||
syslog_handler = SysLogHandler(
|
|
||||||
address="/dev/log",
|
|
||||||
facility=SysLogHandler.LOG_DAEMON,
|
|
||||||
)
|
|
||||||
except OSError:
|
|
||||||
syslog_handler = SysLogHandler(
|
|
||||||
address=("localhost", 514),
|
|
||||||
facility=SysLogHandler.LOG_DAEMON,
|
|
||||||
)
|
|
||||||
# Attach the fallback first so the warning reaches syslog
|
|
||||||
syslog_handler.setFormatter(
|
|
||||||
logging.Formatter("hbc[%(process)d]: %(name)s %(levelname)s: %(message)s")
|
|
||||||
)
|
|
||||||
root.addHandler(syslog_handler)
|
|
||||||
root.setLevel(log_level)
|
|
||||||
logging.warning("/dev/log not found, using syslog UDP localhost:514")
|
|
||||||
return
|
|
||||||
|
|
||||||
syslog_handler.setFormatter(
|
|
||||||
logging.Formatter("hbc[%(process)d]: %(name)s %(levelname)s: %(message)s")
|
|
||||||
)
|
|
||||||
root.addHandler(syslog_handler)
|
|
||||||
root.setLevel(log_level)
|
|
||||||
```
|
|
||||||
|
|
||||||
- [ ] **Step 2: Update the daemon block in `main()`**
|
|
||||||
|
|
||||||
In `hbd/client/main.py`, replace the entire `if args.daemon:` block (lines 664–675):
|
|
||||||
|
|
||||||
```python
|
|
||||||
if args.daemon:
|
|
||||||
print("Daemonizing...")
|
|
||||||
daemonize()
|
|
||||||
_reconfigure_logging_for_daemon(log_level)
|
|
||||||
logging.info(f"hbc starting, sending heartbeat to {', '.join(args.hosts)}")
|
|
||||||
```
|
|
||||||
|
|
||||||
This removes the `import syslog`, `syslog.openlog()`, and `syslog.syslog()` calls (now handled by the logging system) and removes the no-op second `logging.basicConfig()` call.
|
|
||||||
|
|
||||||
- [ ] **Step 3: Run existing test suite to confirm no regressions**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python -m pytest tests/test_plugin.py tests/test_nagios_runner.py -v
|
|
||||||
```
|
|
||||||
Expected: all tests still PASS.
|
|
||||||
|
|
||||||
- [ ] **Step 4: Manual smoke test — verify syslog output in daemon mode**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# In one terminal, tail syslog
|
|
||||||
sudo journalctl -f -t hbc
|
|
||||||
|
|
||||||
# In another terminal, start hbc in daemon mode (replace HOST with a real or dummy host)
|
|
||||||
python -m hbd.client.main -d -v localhost
|
|
||||||
|
|
||||||
# Expected in journalctl output:
|
|
||||||
# hbc[<pid>]: hbc.main INFO: Starting hbc for <hostname> -> ['localhost']
|
|
||||||
# hbc[<pid>]: hbc.main INFO: hbc starting, sending heartbeat to localhost
|
|
||||||
# hbc[<pid>]: plugin.loader INFO: ...
|
|
||||||
|
|
||||||
# Stop the daemon
|
|
||||||
pkill -f "hbd.client.main"
|
|
||||||
```
|
|
||||||
|
|
||||||
- [ ] **Step 5: Commit**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git add hbd/client/main.py
|
|
||||||
git commit -m "fix: reconfigure logging to syslog after daemonize() instead of no-op basicConfig"
|
|
||||||
```
|
|
||||||
@@ -1,92 +0,0 @@
|
|||||||
# Plugin Error Checking & Daemon Logging — Design Spec
|
|
||||||
|
|
||||||
**Date:** 2026-04-25
|
|
||||||
**Scope:** hbc client — daemon mode logging, nagios_runner plugin robustness, PluginLoader messaging
|
|
||||||
**Files affected:** `hbd/client/main.py`, `hbd/client/plugins/nagios_runner.py`, `hbd/client/plugin.py`
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 1. Daemon Mode Logging
|
|
||||||
|
|
||||||
### Problem
|
|
||||||
In `main()`, `logging.basicConfig()` is called before `daemonize()` (establishing a StreamHandler to stderr), then called again after `daemonize()`. The second call is a no-op — Python ignores `basicConfig()` when handlers are already configured. After daemonization, stderr is redirected to `/dev/null`, so all subsequent log output is silently discarded.
|
|
||||||
|
|
||||||
The existing `syslog.openlog()` / `syslog.syslog()` calls (lines 666–668) write a single startup message but do not integrate with the `logging` system, so plugin and connection log messages never reach syslog.
|
|
||||||
|
|
||||||
### Fix
|
|
||||||
After `daemonize()`, explicitly reconfigure the root logger:
|
|
||||||
|
|
||||||
1. Remove all existing handlers (they now write to `/dev/null`).
|
|
||||||
2. Add `logging.handlers.SysLogHandler(address='/dev/log', facility=LOG_DAEMON)`.
|
|
||||||
3. Set formatter: `hbc[%(process)d]: %(name)s %(levelname)s: %(message)s`
|
|
||||||
4. Preserve the `log_level` already determined from `-v`/`-x` CLI flags.
|
|
||||||
|
|
||||||
Remove the redundant `syslog.openlog()` / `syslog.syslog()` calls — the logging system handles routing.
|
|
||||||
|
|
||||||
**Fallback:** If `/dev/log` does not exist (containers, some BSDs), fall back to `SysLogHandler(address=('localhost', 514))`. Log one warning (to stderr, before handlers are replaced) so the operator knows.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 2. Nagios Runner Improvements
|
|
||||||
|
|
||||||
### 2a — Async Subprocess
|
|
||||||
`_run_nagios_plugin()` is declared `async def` but calls `subprocess.run()` synchronously, blocking the event loop for the full command duration.
|
|
||||||
|
|
||||||
**Fix:** Replace with `asyncio.create_subprocess_shell()` + `await proc.communicate()`. Enforce timeout with `asyncio.wait_for(..., timeout=self.timeout)` and catch `asyncio.TimeoutError`.
|
|
||||||
|
|
||||||
### 2b — Stderr Capture
|
|
||||||
Subprocess stderr is currently discarded (`capture_output=True` only captures stdout in the sync call; stderr content is lost).
|
|
||||||
|
|
||||||
**Fix:** Pass `stderr=asyncio.subprocess.PIPE` to `create_subprocess_shell`. After `communicate()`, if stdout is empty but stderr has content, use stderr as the output message. If both have content, append stderr to the output for visibility.
|
|
||||||
|
|
||||||
### 2c — Negative Return Codes
|
|
||||||
A negative `returncode` means the process was killed by a signal (SIGKILL, OOM, etc.). The current code treats these as-is, which may produce unexpected status values.
|
|
||||||
|
|
||||||
**Fix:** If `returncode < 0`, map to `NAGIOS_UNKNOWN` with message `"Process killed by signal {-returncode}"`.
|
|
||||||
|
|
||||||
### 2d — Command Path Validation at Init
|
|
||||||
`initialize()` currently only checks that the commands list is non-empty.
|
|
||||||
|
|
||||||
**Fix:** For each command entry during `initialize()`:
|
|
||||||
- Warn and skip the entry if `name` or `command` is missing.
|
|
||||||
- Extract the executable (first whitespace-delimited token of the command string).
|
|
||||||
- If the executable is an absolute path, check `os.path.isfile()` and `os.access(..., os.X_OK)`. Log a `WARNING` if either check fails.
|
|
||||||
- Commands with relative paths or shell builtins are not checked (they may be on PATH) — just noted.
|
|
||||||
- Validation warns only; all original entries in `self.commands` are retained and still attempted at collection time (where the existing missing-name/command guard already skips them). The plugin initializes successfully as long as the commands list is non-empty.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 3. PluginLoader Messaging
|
|
||||||
|
|
||||||
### Problem
|
|
||||||
When `initialize()` returns `False`, the loader always logs:
|
|
||||||
> `WARNING: Plugin X failed initialization, skipping`
|
|
||||||
|
|
||||||
This is alarming when the real reason is simply "no commands configured". There is no API to distinguish "not configured" from "genuinely broken".
|
|
||||||
|
|
||||||
### Fix
|
|
||||||
Add an optional `skip_reason` attribute to `Plugin.__init__()` (defaults to `None`).
|
|
||||||
|
|
||||||
In `PluginLoader.load_from_directory()`, after `initialize()` returns `False`:
|
|
||||||
- If `plugin.skip_reason` is set → `logger.info(f"Plugin {plugin.name} skipped: {plugin.skip_reason}")`
|
|
||||||
- If `plugin.skip_reason` is `None` → `logger.warning(f"Plugin {plugin.name} failed initialization, skipping")` (existing behaviour)
|
|
||||||
|
|
||||||
In `NagiosRunnerPlugin.initialize()`, when no commands are configured:
|
|
||||||
```python
|
|
||||||
self.skip_reason = "no commands configured (add nagios_runner.commands to config)"
|
|
||||||
return False
|
|
||||||
```
|
|
||||||
|
|
||||||
Genuine failures (exceptions) continue to go through the existing `except` block in the loader, logging at `ERROR` with traceback — unchanged.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Decisions
|
|
||||||
|
|
||||||
| Topic | Decision |
|
|
||||||
|---|---|
|
|
||||||
| Daemon log destination | syslog only (LOG_DAEMON facility) |
|
|
||||||
| Syslog fallback | localhost:514 UDP if `/dev/log` absent |
|
|
||||||
| Nagios result log level | INFO for all statuses (OK/WARNING/CRITICAL/UNKNOWN) |
|
|
||||||
| Invalid command handling at init | Warn and continue; still attempt at collection time |
|
|
||||||
| PluginLoader API change | `skip_reason` attribute on Plugin base class, checked by loader |
|
|
||||||
+1
-1
@@ -14,4 +14,4 @@ Install options:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
__all__ = ["__version__"]
|
__all__ = ["__version__"]
|
||||||
__version__ = "5.1.7"
|
__version__ = "5.3.6"
|
||||||
|
|||||||
@@ -16,6 +16,9 @@ CLIENT_DEFAULTS = {
|
|||||||
"hb_port": 50003, # Port where hbd servers listen
|
"hb_port": 50003, # Port where hbd servers listen
|
||||||
"interval": 10, # Heartbeat interval in seconds
|
"interval": 10, # Heartbeat interval in seconds
|
||||||
|
|
||||||
|
# Host identity
|
||||||
|
"owner": None, # Optional username to set as this host's owner on the server
|
||||||
|
|
||||||
# Runtime flags
|
# Runtime flags
|
||||||
"foreground": False,
|
"foreground": False,
|
||||||
"verbose": False,
|
"verbose": False,
|
||||||
|
|||||||
+116
-37
@@ -21,6 +21,7 @@ from typing import Dict, List, Optional
|
|||||||
# Import protocol and config
|
# Import protocol and config
|
||||||
from .config import load_config
|
from .config import load_config
|
||||||
from ..common.proto import dicttos, stodict
|
from ..common.proto import dicttos, stodict
|
||||||
|
from .. import __version__
|
||||||
|
|
||||||
# Import plugin system
|
# Import plugin system
|
||||||
from .plugin import PluginRegistry, PluginLoader, InfoPlugin, MonitorPlugin
|
from .plugin import PluginRegistry, PluginLoader, InfoPlugin, MonitorPlugin
|
||||||
@@ -56,6 +57,9 @@ class AsyncConnection:
|
|||||||
self.transport: Optional[asyncio.DatagramTransport] = None
|
self.transport: Optional[asyncio.DatagramTransport] = None
|
||||||
self.protocol: Optional[asyncio.DatagramProtocol] = None
|
self.protocol: Optional[asyncio.DatagramProtocol] = None
|
||||||
self._dead = False
|
self._dead = False
|
||||||
|
self._ever_opened = False
|
||||||
|
self._open_fail_count = 0 # consecutive failures before first success
|
||||||
|
self.request_info_event: asyncio.Event = asyncio.Event()
|
||||||
|
|
||||||
self.logger = logging.getLogger(f"hbc.conn.{addr}")
|
self.logger = logging.getLogger(f"hbc.conn.{addr}")
|
||||||
|
|
||||||
@@ -73,6 +77,7 @@ class AsyncConnection:
|
|||||||
lambda: HeartbeatProtocol(self),
|
lambda: HeartbeatProtocol(self),
|
||||||
family=self.af
|
family=self.af
|
||||||
)
|
)
|
||||||
|
self._ever_opened = True
|
||||||
self.logger.debug(f"Opened connection to {self.addr}:{self.port}")
|
self.logger.debug(f"Opened connection to {self.addr}:{self.port}")
|
||||||
return True
|
return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -134,6 +139,9 @@ class AsyncConnection:
|
|||||||
|
|
||||||
self.ackcount += 1
|
self.ackcount += 1
|
||||||
self.logger.debug(f"ACK received, RTT: {rtt:.1f}ms")
|
self.logger.debug(f"ACK received, RTT: {rtt:.1f}ms")
|
||||||
|
if msg.get("request_update"):
|
||||||
|
self.logger.info("server requested plugin info refresh")
|
||||||
|
self.request_info_event.set()
|
||||||
|
|
||||||
|
|
||||||
class HeartbeatProtocol(asyncio.DatagramProtocol):
|
class HeartbeatProtocol(asyncio.DatagramProtocol):
|
||||||
@@ -169,9 +177,8 @@ class HeartbeatProtocol(asyncio.DatagramProtocol):
|
|||||||
self.logger.error(f"Error processing datagram: {e}", exc_info=True)
|
self.logger.error(f"Error processing datagram: {e}", exc_info=True)
|
||||||
|
|
||||||
def error_received(self, exc):
|
def error_received(self, exc):
|
||||||
"""Handle protocol errors."""
|
"""Handle protocol errors — close transport so the heartbeat sender retries."""
|
||||||
self.logger.warning(f"Protocol error on {self.connection.addr}: {exc} — dropping connection")
|
self.logger.warning(f"Protocol error on {self.connection.addr}: {exc} — will retry")
|
||||||
self.connection._dead = True
|
|
||||||
self.connection.close()
|
self.connection.close()
|
||||||
|
|
||||||
|
|
||||||
@@ -262,15 +269,51 @@ async def handle_update(conn: AsyncConnection, _msg: dict): # pyright: ignore[r
|
|||||||
|
|
||||||
|
|
||||||
async def heartbeat_sender(conn: AsyncConnection, interval: int):
|
async def heartbeat_sender(conn: AsyncConnection, interval: int):
|
||||||
"""Send periodic heartbeats.
|
"""Send periodic heartbeats, retrying the connection if it is not open.
|
||||||
|
|
||||||
|
IPv6 connections that fail to open before their first successful send are
|
||||||
|
dropped after IPV6_EARLY_FAIL_LIMIT attempts so that a network without IPv6
|
||||||
|
does not keep a dead sender alive. IPv4 connections are retried indefinitely.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
conn: Connection to send on
|
conn: Connection to send on
|
||||||
interval: Heartbeat interval in seconds
|
interval: Heartbeat interval in seconds
|
||||||
"""
|
"""
|
||||||
logger = logging.getLogger("hbc.heartbeat")
|
logger = logging.getLogger("hbc.heartbeat")
|
||||||
|
IPV6_EARLY_FAIL_LIMIT = 3
|
||||||
|
|
||||||
|
while running and not conn._dead:
|
||||||
|
# Ensure transport is open before attempting to send.
|
||||||
|
if not conn.transport:
|
||||||
|
opened = await conn.open()
|
||||||
|
if opened:
|
||||||
|
conn._open_fail_count = 0
|
||||||
|
else:
|
||||||
|
conn._open_fail_count += 1
|
||||||
|
# Drop an IPv6 connection that has never come up within the
|
||||||
|
# first few attempts — it is likely unavailable on this network.
|
||||||
|
if (not conn._ever_opened
|
||||||
|
and conn.af == socket.AF_INET6
|
||||||
|
and conn._open_fail_count >= IPV6_EARLY_FAIL_LIMIT):
|
||||||
|
logger.warning(
|
||||||
|
f"IPv6 connection to {conn.addr} unreachable after "
|
||||||
|
f"{conn._open_fail_count} attempts, disabling"
|
||||||
|
)
|
||||||
|
conn._dead = True
|
||||||
|
break
|
||||||
|
# Retry after the normal interval; IPv4 retries forever.
|
||||||
|
try:
|
||||||
|
if shutdown_event:
|
||||||
|
await asyncio.wait_for(shutdown_event.wait(), timeout=interval)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
await asyncio.sleep(interval)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
pass
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
raise
|
||||||
|
continue
|
||||||
|
|
||||||
while running:
|
|
||||||
try:
|
try:
|
||||||
msg = {
|
msg = {
|
||||||
"acks": conn.ackcount,
|
"acks": conn.ackcount,
|
||||||
@@ -279,19 +322,16 @@ async def heartbeat_sender(conn: AsyncConnection, interval: int):
|
|||||||
}
|
}
|
||||||
await conn.sendto(msg, "HTB")
|
await conn.sendto(msg, "HTB")
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error sending heartbeat: {e}", exc_info=True)
|
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
logger.debug("Heartbeat sender cancelled")
|
logger.debug("Heartbeat sender cancelled")
|
||||||
raise
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error sending heartbeat: {e}", exc_info=True)
|
||||||
|
|
||||||
# Wait for next interval or shutdown event
|
# Wait for next interval or shutdown event
|
||||||
try:
|
try:
|
||||||
if shutdown_event:
|
if shutdown_event:
|
||||||
await asyncio.wait_for(
|
await asyncio.wait_for(shutdown_event.wait(), timeout=interval)
|
||||||
shutdown_event.wait(),
|
|
||||||
timeout=interval
|
|
||||||
)
|
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
await asyncio.sleep(interval)
|
await asyncio.sleep(interval)
|
||||||
@@ -302,6 +342,26 @@ async def heartbeat_sender(conn: AsyncConnection, interval: int):
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
async def _info_plugin_refresh_loop(conn: AsyncConnection, info_plugins: List):
|
||||||
|
"""Wait for server requests to re-send InfoPlugin data."""
|
||||||
|
logger = logging.getLogger("hbc.plugins")
|
||||||
|
while running:
|
||||||
|
await conn.request_info_event.wait()
|
||||||
|
if not running:
|
||||||
|
break
|
||||||
|
conn.request_info_event.clear()
|
||||||
|
logger.info("refreshing InfoPlugins on server request")
|
||||||
|
for plugin in info_plugins:
|
||||||
|
plugin._cache = None
|
||||||
|
try:
|
||||||
|
data = await plugin.collect()
|
||||||
|
if data:
|
||||||
|
await conn.sendto({"plugin": plugin.name, **data}, "PLG")
|
||||||
|
logger.info(f"Resent {plugin.name} data")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error re-collecting {plugin.name}: {e}", exc_info=True)
|
||||||
|
|
||||||
|
|
||||||
async def plugin_collector(conn: AsyncConnection, registry: PluginRegistry):
|
async def plugin_collector(conn: AsyncConnection, registry: PluginRegistry):
|
||||||
"""Collect and send plugin data.
|
"""Collect and send plugin data.
|
||||||
|
|
||||||
@@ -333,16 +393,13 @@ async def plugin_collector(conn: AsyncConnection, registry: PluginRegistry):
|
|||||||
for plugin in monitor_plugins:
|
for plugin in monitor_plugins:
|
||||||
by_interval[plugin.interval].append(plugin)
|
by_interval[plugin.interval].append(plugin)
|
||||||
|
|
||||||
# Create tasks for each interval
|
# Create tasks for each interval; always include the info-refresh watcher
|
||||||
tasks = []
|
tasks = [asyncio.create_task(_info_plugin_refresh_loop(conn, info_plugins))]
|
||||||
for interval, plugins in by_interval.items():
|
for interval, plugins in by_interval.items():
|
||||||
task = asyncio.create_task(
|
tasks.append(asyncio.create_task(
|
||||||
plugin_collector_interval(conn, plugins, interval)
|
plugin_collector_interval(conn, plugins, interval)
|
||||||
)
|
))
|
||||||
tasks.append(task)
|
|
||||||
|
|
||||||
# Wait for all tasks
|
|
||||||
if tasks:
|
|
||||||
try:
|
try:
|
||||||
await asyncio.gather(*tasks, return_exceptions=True)
|
await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
@@ -427,16 +484,13 @@ async def cleanup(connections: List[AsyncConnection]):
|
|||||||
logger = logging.getLogger("hbc.cleanup")
|
logger = logging.getLogger("hbc.cleanup")
|
||||||
logger.info("Cleaning up connections")
|
logger.info("Cleaning up connections")
|
||||||
|
|
||||||
for conn in connections:
|
target = next((c for c in connections if c.transport), connections[0] if connections else None)
|
||||||
|
if target and send_shutdown:
|
||||||
try:
|
try:
|
||||||
msg = {
|
await target.sendto({"shutdown": 1, "acks": target.ackcount})
|
||||||
"shutdown": 1,
|
|
||||||
"acks": conn.ackcount
|
|
||||||
}
|
|
||||||
await conn.sendto(msg)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error sending shutdown: {e}")
|
logger.error(f"Error sending shutdown: {e}")
|
||||||
|
for conn in connections:
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
# Give messages time to send
|
# Give messages time to send
|
||||||
@@ -445,7 +499,7 @@ async def cleanup(connections: List[AsyncConnection]):
|
|||||||
|
|
||||||
async def async_main(args, config):
|
async def async_main(args, config):
|
||||||
"""Async main function."""
|
"""Async main function."""
|
||||||
global running, shutdown_event, active_tasks
|
global running, shutdown_event, active_tasks, send_shutdown
|
||||||
|
|
||||||
# Create shutdown event
|
# Create shutdown event
|
||||||
shutdown_event = asyncio.Event()
|
shutdown_event = asyncio.Event()
|
||||||
@@ -462,47 +516,62 @@ async def async_main(args, config):
|
|||||||
hb_port = config.get("hb_port", PORT)
|
hb_port = config.get("hb_port", PORT)
|
||||||
interval = config.get("interval", INTERVAL)
|
interval = config.get("interval", INTERVAL)
|
||||||
|
|
||||||
logger.info(f"Starting hbc for {iam} -> {hb_hosts}")
|
logger.info(f"hbc {__version__} on {iam} -> {hb_hosts} port={hb_port}, interval={interval}s")
|
||||||
logger.info(f"Port: {hb_port}, Interval: {interval}s")
|
|
||||||
|
af_filter = (socket.AF_INET if getattr(args, "ipv4_only", False)
|
||||||
|
else socket.AF_INET6 if getattr(args, "ipv6_only", False)
|
||||||
|
else 0)
|
||||||
|
|
||||||
# Create connections
|
# Create connections
|
||||||
connections = []
|
connections = []
|
||||||
conn_id = 1
|
conn_id = 1
|
||||||
|
_retry_delay = 5
|
||||||
|
|
||||||
|
while running and not connections:
|
||||||
for host in hb_hosts:
|
for host in hb_hosts:
|
||||||
try:
|
try:
|
||||||
addrs = socket.getaddrinfo(host, hb_port, 0, 0, socket.SOL_UDP)
|
addrs = socket.getaddrinfo(host, hb_port, af_filter, 0, socket.SOL_UDP)
|
||||||
except socket.gaierror as e:
|
except socket.gaierror as e:
|
||||||
logger.error(f"Cannot resolve {host}: {e}")
|
logger.warning(f"Cannot resolve {host}: {e} — retrying in {_retry_delay}s")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for addr_info in addrs:
|
for addr_info in addrs:
|
||||||
af = addr_info[0]
|
af = addr_info[0]
|
||||||
addr = addr_info[4][0]
|
addr = addr_info[4][0]
|
||||||
|
|
||||||
conn = AsyncConnection(conn_id, addr, hb_port, af, iam)
|
conn = AsyncConnection(conn_id, addr, hb_port, af, iam)
|
||||||
if await conn.open():
|
if not await conn.open():
|
||||||
|
logger.warning(f"Initial open to {addr} failed, heartbeat sender will retry")
|
||||||
connections.append(conn)
|
connections.append(conn)
|
||||||
conn_id += 1
|
conn_id += 1
|
||||||
|
if not connections:
|
||||||
|
try:
|
||||||
|
if shutdown_event:
|
||||||
|
await asyncio.wait_for(shutdown_event.wait(), timeout=_retry_delay)
|
||||||
|
else:
|
||||||
|
await asyncio.sleep(_retry_delay)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
pass
|
||||||
|
_retry_delay = min(_retry_delay * 2, 60)
|
||||||
|
|
||||||
if not connections:
|
if not connections:
|
||||||
logger.error("No connections established")
|
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
logger.info(f"Created {len(connections)} connections")
|
logger.info(f"Created {len(connections)} connections")
|
||||||
|
|
||||||
# Send boot/message if requested
|
# Send boot/message if requested
|
||||||
|
send_shutdown = False
|
||||||
if args.boot or args.message:
|
if args.boot or args.message:
|
||||||
boot_msg = {}
|
boot_msg = {}
|
||||||
if args.boot:
|
if args.boot:
|
||||||
boot_msg["boot"] = 1
|
boot_msg["boot"] = 1
|
||||||
|
args.boot = False # Clear boot flag so we don't send it again in main loop
|
||||||
|
send_shutdown = True
|
||||||
if args.message:
|
if args.message:
|
||||||
boot_msg["service"] = "service"
|
boot_msg["service"] = "service"
|
||||||
boot_msg["msg"] = args.message
|
boot_msg["msg"] = args.message
|
||||||
|
|
||||||
boot_msg["acks"] = 0
|
boot_msg["acks"] = 0
|
||||||
for conn in connections:
|
target = next((c for c in connections if c.transport), connections[0])
|
||||||
await conn.sendto(boot_msg)
|
await target.sendto(boot_msg)
|
||||||
|
|
||||||
if args.message and not args.daemon:
|
if args.message and not args.daemon:
|
||||||
# Message-only mode
|
# Message-only mode
|
||||||
@@ -525,6 +594,13 @@ async def async_main(args, config):
|
|||||||
for sig in (signal.SIGTERM, signal.SIGINT):
|
for sig in (signal.SIGTERM, signal.SIGINT):
|
||||||
loop.add_signal_handler(sig, stop)
|
loop.add_signal_handler(sig, stop)
|
||||||
|
|
||||||
|
def _sighup():
|
||||||
|
global dorestart
|
||||||
|
dorestart = True
|
||||||
|
stop()
|
||||||
|
|
||||||
|
loop.add_signal_handler(signal.SIGHUP, _sighup)
|
||||||
|
|
||||||
# Start async tasks
|
# Start async tasks
|
||||||
# Heartbeat senders (one per connection)
|
# Heartbeat senders (one per connection)
|
||||||
for conn in connections:
|
for conn in connections:
|
||||||
@@ -662,6 +738,9 @@ def build_parser():
|
|||||||
default=0,
|
default=0,
|
||||||
help="Increase debug level"
|
help="Increase debug level"
|
||||||
)
|
)
|
||||||
|
af_group = parser.add_mutually_exclusive_group()
|
||||||
|
af_group.add_argument("-4", dest="ipv4_only", action="store_true", help="Use IPv4 only")
|
||||||
|
af_group.add_argument("-6", dest="ipv6_only", action="store_true", help="Use IPv6 only")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"hosts",
|
"hosts",
|
||||||
nargs="+",
|
nargs="+",
|
||||||
@@ -695,7 +774,7 @@ def main(argv=None):
|
|||||||
|
|
||||||
# Daemonize if requested
|
# Daemonize if requested
|
||||||
if args.daemon:
|
if args.daemon:
|
||||||
print("Daemonizing...")
|
logging.info("Daemonizing...")
|
||||||
daemonize()
|
daemonize()
|
||||||
_reconfigure_logging_for_daemon(log_level)
|
_reconfigure_logging_for_daemon(log_level)
|
||||||
logging.info(f"hbc starting, sending heartbeat to {', '.join(args.hosts)}")
|
logging.info(f"hbc starting, sending heartbeat to {', '.join(args.hosts)}")
|
||||||
|
|||||||
@@ -364,7 +364,10 @@ class PluginLoader:
|
|||||||
|
|
||||||
# Instantiate plugin with config — check plugins subdict first,
|
# Instantiate plugin with config — check plugins subdict first,
|
||||||
# then top-level keys (e.g. nagios_runner: ... at root of config).
|
# then top-level keys (e.g. nagios_runner: ... at root of config).
|
||||||
plugin_instance_config = plugins_subconfig.get(obj.name) or raw_config.get(obj.name, {})
|
plugin_instance_config = dict(plugins_subconfig.get(obj.name) or raw_config.get(obj.name) or {})
|
||||||
|
# Propagate top-level owner so os_info (and any future plugin) can report it.
|
||||||
|
if "owner" in raw_config and "owner" not in plugin_instance_config:
|
||||||
|
plugin_instance_config["owner"] = raw_config["owner"]
|
||||||
plugin = obj(config=plugin_instance_config)
|
plugin = obj(config=plugin_instance_config)
|
||||||
|
|
||||||
# Initialize plugin
|
# Initialize plugin
|
||||||
|
|||||||
@@ -119,6 +119,13 @@ class CPUMonitorPlugin(MonitorPlugin):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.debug(f"Could not get CPU times: {e}")
|
self.logger.debug(f"Could not get CPU times: {e}")
|
||||||
|
|
||||||
|
# Uptime in seconds
|
||||||
|
try:
|
||||||
|
import time
|
||||||
|
data["uptime_seconds"] = int(time.time() - self.psutil.boot_time())
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.debug(f"Could not get uptime: {e}")
|
||||||
|
|
||||||
self.logger.debug(
|
self.logger.debug(
|
||||||
f"Collected CPU metrics: {data.get('cpu_percent', 'N/A')}% usage"
|
f"Collected CPU metrics: {data.get('cpu_percent', 'N/A')}% usage"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -14,6 +14,24 @@ except ImportError:
|
|||||||
|
|
||||||
from hbd.client.plugin import MonitorPlugin
|
from hbd.client.plugin import MonitorPlugin
|
||||||
|
|
||||||
|
|
||||||
|
def _zfs_arc_bytes() -> int:
|
||||||
|
"""Return current ZFS ARC size in bytes, or 0 if ZFS is not present.
|
||||||
|
|
||||||
|
ZFS ARC is reclaimable but is not included in MemAvailable by the Linux
|
||||||
|
kernel (it is not in SReclaimable), so it would otherwise be counted as
|
||||||
|
used memory.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with open("/proc/spl/kstat/zfs/arcstats") as fh:
|
||||||
|
for line in fh:
|
||||||
|
parts = line.split()
|
||||||
|
if len(parts) >= 3 and parts[0] == "size":
|
||||||
|
return int(parts[2])
|
||||||
|
except (OSError, ValueError):
|
||||||
|
pass
|
||||||
|
return 0
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@@ -101,11 +119,21 @@ class MemoryMonitorPlugin(MonitorPlugin):
|
|||||||
|
|
||||||
# Virtual (physical) memory statistics
|
# Virtual (physical) memory statistics
|
||||||
vmem = psutil.virtual_memory()
|
vmem = psutil.virtual_memory()
|
||||||
|
|
||||||
|
# psutil's available already excludes page cache / file buffers
|
||||||
|
# (uses MemAvailable on Linux). Add ZFS ARC on top because the kernel
|
||||||
|
# does not include it in SReclaimable / MemAvailable even though it is
|
||||||
|
# reclaimable.
|
||||||
|
arc_bytes = _zfs_arc_bytes()
|
||||||
|
available = min(vmem.available + arc_bytes, vmem.total)
|
||||||
|
used = vmem.total - available
|
||||||
|
percent = round(used / vmem.total * 100, 1) if vmem.total else 0.0
|
||||||
|
|
||||||
metrics['memory_total'] = vmem.total
|
metrics['memory_total'] = vmem.total
|
||||||
metrics['memory_available'] = vmem.available
|
metrics['memory_available'] = available
|
||||||
metrics['memory_used'] = vmem.used
|
metrics['memory_used'] = used
|
||||||
metrics['memory_free'] = vmem.free
|
metrics['memory_free'] = vmem.free
|
||||||
metrics['memory_percent'] = vmem.percent
|
metrics['memory_percent'] = percent
|
||||||
|
|
||||||
# Platform-specific memory details
|
# Platform-specific memory details
|
||||||
if hasattr(vmem, 'active'):
|
if hasattr(vmem, 'active'):
|
||||||
|
|||||||
@@ -31,16 +31,13 @@ from hbd.client.plugin import MonitorPlugin
|
|||||||
|
|
||||||
|
|
||||||
# Nagios exit codes
|
# Nagios exit codes
|
||||||
NAGIOS_OK = 0
|
|
||||||
NAGIOS_WARNING = 1
|
|
||||||
NAGIOS_CRITICAL = 2
|
|
||||||
NAGIOS_UNKNOWN = 3
|
NAGIOS_UNKNOWN = 3
|
||||||
|
|
||||||
STATUS_NAMES = {
|
STATUS_NAMES = {
|
||||||
NAGIOS_OK: "OK",
|
0: "OK",
|
||||||
NAGIOS_WARNING: "WARNING",
|
1: "WARNING",
|
||||||
NAGIOS_CRITICAL: "CRITICAL",
|
2: "CRITICAL",
|
||||||
NAGIOS_UNKNOWN: "UNKNOWN"
|
3: "UNKNOWN",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -129,9 +126,6 @@ class NagiosRunnerPlugin(MonitorPlugin):
|
|||||||
"""
|
"""
|
||||||
results = {}
|
results = {}
|
||||||
|
|
||||||
# Track overall status (worst status wins)
|
|
||||||
worst_status = NAGIOS_OK
|
|
||||||
|
|
||||||
for cmd_config in self.commands:
|
for cmd_config in self.commands:
|
||||||
name = cmd_config.get("name")
|
name = cmd_config.get("name")
|
||||||
command = cmd_config.get("command")
|
command = cmd_config.get("command")
|
||||||
@@ -149,10 +143,6 @@ class NagiosRunnerPlugin(MonitorPlugin):
|
|||||||
results[f"{name}_status_code"] = status_code
|
results[f"{name}_status_code"] = status_code
|
||||||
results[f"{name}_output"] = output
|
results[f"{name}_output"] = output
|
||||||
|
|
||||||
# Track worst status
|
|
||||||
if status_code > worst_status:
|
|
||||||
worst_status = status_code
|
|
||||||
|
|
||||||
# Parse and add performance data
|
# Parse and add performance data
|
||||||
if perfdata:
|
if perfdata:
|
||||||
for metric_name, metric_value in perfdata.items():
|
for metric_name, metric_value in perfdata.items():
|
||||||
@@ -167,12 +157,6 @@ class NagiosRunnerPlugin(MonitorPlugin):
|
|||||||
results[f"{name}_status"] = "ERROR"
|
results[f"{name}_status"] = "ERROR"
|
||||||
results[f"{name}_status_code"] = NAGIOS_UNKNOWN
|
results[f"{name}_status_code"] = NAGIOS_UNKNOWN
|
||||||
results[f"{name}_output"] = str(e)
|
results[f"{name}_output"] = str(e)
|
||||||
worst_status = NAGIOS_UNKNOWN
|
|
||||||
|
|
||||||
# Add overall status
|
|
||||||
results["overall_status"] = STATUS_NAMES.get(worst_status, "UNKNOWN")
|
|
||||||
results["overall_status_code"] = worst_status
|
|
||||||
results["plugin_count"] = len(self.commands)
|
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|||||||
@@ -60,7 +60,11 @@ class OSInfoPlugin(InfoPlugin):
|
|||||||
"python_version": platform.python_version(),
|
"python_version": platform.python_version(),
|
||||||
"python_implementation": platform.python_implementation(),
|
"python_implementation": platform.python_implementation(),
|
||||||
"hbc_version": hbc_version,
|
"hbc_version": hbc_version,
|
||||||
|
"hbc_type": "full",
|
||||||
}
|
}
|
||||||
|
if self.config.get("owner"):
|
||||||
|
self.logger.debug(f"Adding owner from config: {self.config['owner']}")
|
||||||
|
data["owner"] = self.config["owner"]
|
||||||
|
|
||||||
# Add Linux-specific distribution info
|
# Add Linux-specific distribution info
|
||||||
if platform.system() == "Linux":
|
if platform.system() == "Linux":
|
||||||
|
|||||||
@@ -13,12 +13,8 @@ plugins:
|
|||||||
count: 3 # ICMP packets per ping run (default 3)
|
count: 3 # ICMP packets per ping run (default 3)
|
||||||
timeout: 5 # seconds before a host is considered unreachable (default 5)
|
timeout: 5 # seconds before a host is considered unreachable (default 5)
|
||||||
hosts:
|
hosts:
|
||||||
8.8.8.8:
|
- 8.8.8.8
|
||||||
warning: 20.0 # ms
|
- 192.168.1.1
|
||||||
critical: 100.0 # ms
|
|
||||||
192.168.1.1:
|
|
||||||
warning: 5.0
|
|
||||||
critical: 20.0
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Reported metrics per host (metric key uses the hostname with dots/colons replaced
|
Reported metrics per host (metric key uses the hostname with dots/colons replaced
|
||||||
|
|||||||
@@ -0,0 +1,140 @@
|
|||||||
|
"""
|
||||||
|
ZFS pool monitoring plugin for Heartbeat.
|
||||||
|
|
||||||
|
Collects per-pool health, capacity, and cumulative I/O statistics via zpool(8).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import shutil
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from hbd.client.plugin import MonitorPlugin
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _int(s: str) -> Optional[int]:
|
||||||
|
try:
|
||||||
|
return int(s.strip().rstrip("KMGTkBkmgt%x"))
|
||||||
|
except (ValueError, AttributeError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _float(s: str) -> Optional[float]:
|
||||||
|
try:
|
||||||
|
return float(s.strip().rstrip("%x"))
|
||||||
|
except (ValueError, AttributeError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class ZFSMonitorPlugin(MonitorPlugin):
|
||||||
|
"""Monitor ZFS pool health, capacity, and I/O statistics.
|
||||||
|
|
||||||
|
Collects per pool:
|
||||||
|
- health: ONLINE, DEGRADED, FAULTED, etc.
|
||||||
|
- size / alloc / free: total, allocated and free bytes
|
||||||
|
- capacity: percentage used (0-100)
|
||||||
|
- frag: fragmentation percentage
|
||||||
|
- dedup: deduplication ratio
|
||||||
|
- read_ops / write_ops: cumulative I/O operations since last boot/clear
|
||||||
|
- read_bw / write_bw: cumulative bytes transferred since last boot/clear
|
||||||
|
|
||||||
|
Configuration:
|
||||||
|
interval: collection interval in seconds (default: 300)
|
||||||
|
pools: list of pool names to monitor (default: all)
|
||||||
|
"""
|
||||||
|
|
||||||
|
name = "zfs_monitor"
|
||||||
|
description = "ZFS pool health, capacity, and I/O statistics"
|
||||||
|
interval = 300
|
||||||
|
|
||||||
|
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||||
|
super().__init__(config)
|
||||||
|
self.interval = self.config.get("interval", 300)
|
||||||
|
self._pools_filter: Optional[List[str]] = self.config.get("pools", None)
|
||||||
|
|
||||||
|
async def initialize(self) -> bool:
|
||||||
|
if not shutil.which("zpool"):
|
||||||
|
self.skip_reason = "zpool not found"
|
||||||
|
return False
|
||||||
|
logger.info("ZFS monitor initialized (interval: %ds)", self.interval)
|
||||||
|
return True
|
||||||
|
|
||||||
|
async def _run(self, *args: str) -> List[str]:
|
||||||
|
"""Run a command and return its stdout lines, or [] on error."""
|
||||||
|
try:
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
*args,
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.DEVNULL,
|
||||||
|
)
|
||||||
|
stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=15)
|
||||||
|
return stdout.decode(errors="replace").splitlines()
|
||||||
|
except (FileNotFoundError, asyncio.TimeoutError) as exc:
|
||||||
|
logger.warning("zfs_monitor: %s: %s", args[0], exc)
|
||||||
|
return []
|
||||||
|
|
||||||
|
async def _zpool_list(self) -> Dict[str, Dict]:
|
||||||
|
"""Return per-pool health and capacity from `zpool list`."""
|
||||||
|
lines = await self._run(
|
||||||
|
"zpool", "list", "-H", "-p",
|
||||||
|
"-o", "name,health,size,alloc,free,cap,frag,dedup",
|
||||||
|
)
|
||||||
|
pools: Dict[str, Dict] = {}
|
||||||
|
for line in lines:
|
||||||
|
parts = line.split("\t")
|
||||||
|
if len(parts) < 8:
|
||||||
|
continue
|
||||||
|
name = parts[0].strip()
|
||||||
|
if self._pools_filter and name not in self._pools_filter:
|
||||||
|
continue
|
||||||
|
health = parts[1].strip()
|
||||||
|
if health == "ONLINE":
|
||||||
|
status = 0
|
||||||
|
elif health in ("DEGRADED", "ONLINE with errors"):
|
||||||
|
status = 1
|
||||||
|
elif health in ("FAULTED", "OFFLINE", "UNAVAIL"):
|
||||||
|
status = 2
|
||||||
|
else:
|
||||||
|
status = 3 # unknown status
|
||||||
|
pools[name] = {
|
||||||
|
"health": health,
|
||||||
|
"status": status,
|
||||||
|
"size": _int(parts[2]),
|
||||||
|
"alloc": _int(parts[3]),
|
||||||
|
"free": _int(parts[4]),
|
||||||
|
"capacity": _float(parts[5]),
|
||||||
|
"frag": _float(parts[6]),
|
||||||
|
"dedup": _float(parts[7]),
|
||||||
|
}
|
||||||
|
return pools
|
||||||
|
|
||||||
|
async def _zpool_iostat(self) -> Dict[str, Dict]:
|
||||||
|
"""Return per-pool cumulative I/O counters from `zpool iostat`."""
|
||||||
|
lines = await self._run("zpool", "iostat", "-H", "-p")
|
||||||
|
io: Dict[str, Dict] = {}
|
||||||
|
for line in lines:
|
||||||
|
parts = line.split("\t")
|
||||||
|
if len(parts) < 7:
|
||||||
|
continue
|
||||||
|
name = parts[0].strip()
|
||||||
|
if not name or name.startswith(" "):
|
||||||
|
continue
|
||||||
|
io[name] = {
|
||||||
|
"read_ops": _int(parts[3]),
|
||||||
|
"write_ops": _int(parts[4]),
|
||||||
|
"read_bw": _int(parts[5]),
|
||||||
|
"write_bw": _int(parts[6]),
|
||||||
|
}
|
||||||
|
return io
|
||||||
|
|
||||||
|
async def _collect_metrics(self) -> Dict[str, Any]:
|
||||||
|
pools, io = await asyncio.gather(self._zpool_list(), self._zpool_iostat())
|
||||||
|
for name, stats in io.items():
|
||||||
|
if name in pools:
|
||||||
|
pools[name].update(stats)
|
||||||
|
return {"pools": pools}
|
||||||
|
|
||||||
|
|
||||||
|
plugin = ZFSMonitorPlugin
|
||||||
@@ -134,6 +134,31 @@ thresholds:
|
|||||||
hysteresis: 0.1
|
hysteresis: 0.1
|
||||||
enabled: true
|
enabled: true
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
# ZFS Monitor Thresholds
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
zfs_monitor:
|
||||||
|
# Pool health check — built-in default; shown here for reference/override.
|
||||||
|
# status is 0 (ONLINE) or 1 (DEGRADED) or 2 (SUSPENDED, FAULTED, UNAVAIL…).
|
||||||
|
# Use '*' to apply the same rule to every pool, or name a specific pool.
|
||||||
|
pools:
|
||||||
|
'*':
|
||||||
|
status:
|
||||||
|
warning: 1 # Alert WARNING when pool is DEGRADED
|
||||||
|
critical: 2 # Alert CRITICAL when pool is SUSPENDED/FAULTED/UNAVAIL
|
||||||
|
operator: ">="
|
||||||
|
hysteresis: 0.0 # No hysteresis — a degraded pool is always alerting
|
||||||
|
grace: 0 # Fire immediately — don't wait for a second collection
|
||||||
|
display: "ZFS pool {pool_name} is {health}"
|
||||||
|
|
||||||
|
# Per-pool capacity thresholds (optional; add pools you care about)
|
||||||
|
# tank:
|
||||||
|
# capacity:
|
||||||
|
# warning: 75.0 # Warn at 75% used
|
||||||
|
# critical: 90.0 # Critical at 90% used
|
||||||
|
# operator: ">"
|
||||||
|
# hysteresis: 0.05
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------
|
||||||
# Network Monitor Thresholds
|
# Network Monitor Thresholds
|
||||||
# ----------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------
|
||||||
|
|||||||
+5
-6
@@ -144,17 +144,16 @@ def cmd_notify(args):
|
|||||||
url=f"{base_url}/plugins" if base_url else "",
|
url=f"{base_url}/plugins" if base_url else "",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Bypass min_level for explicit test sends; run async channels directly
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
from .notify import _send_matrix_async, _send_sms_voipms_async, _DRIVERS
|
||||||
ch_type = channel_cfg.get("type", "")
|
ch_type = channel_cfg.get("type", "")
|
||||||
print(f"Sending via {args.channel} ({ch_type}): {title} — {args.message}")
|
print(f"Sending via {args.channel} ({ch_type}): {title} — {args.message}")
|
||||||
|
|
||||||
if ch_type in ("matrix", "sms_voipms"):
|
if ch_type == "matrix":
|
||||||
from .notify import _send_matrix_async, _send_sms_voipms_async
|
ok = asyncio.run(_send_matrix_async(channel_cfg, notif))
|
||||||
driver_async = _send_matrix_async if ch_type == "matrix" else _send_sms_voipms_async
|
elif ch_type == "sms_voipms":
|
||||||
ok = asyncio.run(driver_async(channel_cfg, notif))
|
ok = asyncio.run(_send_sms_voipms_async(channel_cfg, notif))
|
||||||
else:
|
else:
|
||||||
from .notify import _DRIVERS
|
|
||||||
driver = _DRIVERS.get(ch_type)
|
driver = _DRIVERS.get(ch_type)
|
||||||
if driver is None:
|
if driver is None:
|
||||||
print(f"Error: unknown channel type '{ch_type}'", file=sys.stderr)
|
print(f"Error: unknown channel type '{ch_type}'", file=sys.stderr)
|
||||||
|
|||||||
+46
-34
@@ -27,21 +27,22 @@ SERVER_DEFAULTS = {
|
|||||||
|
|
||||||
# Monitoring settings
|
# Monitoring settings
|
||||||
"interval": 20, # Expected heartbeat interval (for server checks)
|
"interval": 20, # Expected heartbeat interval (for server checks)
|
||||||
"grace": 2, # Grace multiplier (interval * grace = timeout)
|
"grace": 2, # Grace period (extra seconds before notifying after a missed heartbeat)
|
||||||
"threshold_renotify_interval": 3600, # Seconds between threshold re-notifications
|
"threshold_renotify_interval": 3600, # Seconds between threshold re-notifications
|
||||||
|
|
||||||
# User management
|
# User management
|
||||||
"users": {}, # username -> {full_name, avatar, password, admin, notification_channels}
|
"users": {}, # username -> {full_name, avatar, password, admin, notification_channels}
|
||||||
"default_owner": None, # Username that owns hosts with no explicit owner
|
"default_owner": None, # Username that owns hosts with no explicit owner
|
||||||
|
|
||||||
|
# OAuth2 providers
|
||||||
|
"oauth": {}, # oauth.gitea.{url,client_id,client_secret}
|
||||||
|
|
||||||
# Host management
|
# Host management
|
||||||
"hosts": {}, # Unified host definitions
|
"hosts": {}, # Unified host definitions
|
||||||
"dyndnshosts": [], # Hosts with dynamic DNS (legacy)
|
"dyndomains": ["example.org"], # Domains to update via nsupdate when a host with dyndns: true is updated
|
||||||
"drophosts": [], # Hosts to ignore
|
|
||||||
"dyndomains": ["wrede.org"],
|
|
||||||
|
|
||||||
# DNS updates
|
# DNS updates
|
||||||
"nsupdate_bin": "/usr/bin/nsupdate",
|
"nsupdate_bin": "/usr/bin/nsupdate", # Path to nsupdate binary
|
||||||
|
|
||||||
# WebSocket settings
|
# WebSocket settings
|
||||||
"ws_port": 50005,
|
"ws_port": 50005,
|
||||||
@@ -76,9 +77,13 @@ THRESHOLD_DEFAULTS = {
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
'memory_monitor': {
|
'memory_monitor': {
|
||||||
'percent': {
|
'memory_percent': {
|
||||||
'warning': 85.0,
|
'warning': 85.0,
|
||||||
'critical': 95.0
|
'critical': 95.0
|
||||||
|
},
|
||||||
|
'swap_percent': {
|
||||||
|
'warning': 40.0,
|
||||||
|
'critical': 75.0
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
'disk_monitor': {
|
'disk_monitor': {
|
||||||
@@ -95,7 +100,31 @@ THRESHOLD_DEFAULTS = {
|
|||||||
'warning': 200,
|
'warning': 200,
|
||||||
'critical': 250.0,
|
'critical': 250.0,
|
||||||
'count': 3 # Optional: number of consecutive breaches before alerting
|
'count': 3 # Optional: number of consecutive breaches before alerting
|
||||||
|
},
|
||||||
|
'nagios_runner': {
|
||||||
|
'status_code': {
|
||||||
|
'display': '{check_name} {output}',
|
||||||
|
'operator': "nagios"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
'zfs_monitor': {
|
||||||
|
'pools': {
|
||||||
|
'*': {
|
||||||
|
'status': {
|
||||||
|
'warning': 1,
|
||||||
|
'critical': 2,
|
||||||
|
'operator': '>=',
|
||||||
|
'hysteresis': 0.0,
|
||||||
|
'grace': 0,
|
||||||
|
'display': 'ZFS pool {pool_name} is {health}'
|
||||||
|
},
|
||||||
|
'capacity': {
|
||||||
|
'warning': 80.0,
|
||||||
|
'critical': 90.0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -219,43 +248,26 @@ def get_watchhosts(config):
|
|||||||
"""Extract watched hostnames from config (hosts with watch: true).
|
"""Extract watched hostnames from config (hosts with watch: true).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of hostnames to watch
|
# List of hostnames to watch
|
||||||
"""
|
"""
|
||||||
watchhosts = []
|
watchhosts = []
|
||||||
hosts_config = config.get("hosts", {})
|
hosts_config = config.get("hosts", {})
|
||||||
if isinstance(hosts_config, dict):
|
if isinstance(hosts_config, dict):
|
||||||
for host_name, host_attrs in hosts_config.items():
|
for host_name, host_attrs in hosts_config.items():
|
||||||
if isinstance(host_attrs, dict) and host_attrs.get("watch", False):
|
if isinstance(host_attrs, dict) and host_attrs.get("watch", True):
|
||||||
watchhosts.append(host_name)
|
watchhosts.append(host_name)
|
||||||
return watchhosts
|
return watchhosts
|
||||||
|
|
||||||
|
|
||||||
def get_dyndnshosts(config):
|
def get_dyndnshosts(config):
|
||||||
"""Extract dyndnshosts from config, supporting both new and legacy formats.
|
"""Return hostnames that have a dyndns setting in the hosts section."""
|
||||||
|
hosts_config = config.get("hosts", {})
|
||||||
Args:
|
if not isinstance(hosts_config, dict):
|
||||||
config: Configuration dictionary
|
return []
|
||||||
|
return [
|
||||||
Returns:
|
name for name, attrs in hosts_config.items()
|
||||||
List of hostnames with dynamic DNS
|
if isinstance(attrs, dict) and attrs.get("dyndns")
|
||||||
"""
|
]
|
||||||
dyndnshosts = []
|
|
||||||
|
|
||||||
# New format: hosts section with dyndns attribute
|
|
||||||
if "hosts" in config:
|
|
||||||
hosts_config = config["hosts"]
|
|
||||||
if isinstance(hosts_config, dict):
|
|
||||||
for host_name, host_attrs in hosts_config.items():
|
|
||||||
if isinstance(host_attrs, dict) and host_attrs.get("dyndns", False):
|
|
||||||
dyndnshosts.append(host_name)
|
|
||||||
|
|
||||||
# Legacy format: dyndnshosts list/set
|
|
||||||
if "dyndnshosts" in config:
|
|
||||||
legacy_dyndnshosts = config.get("dyndnshosts", [])
|
|
||||||
if isinstance(legacy_dyndnshosts, (list, set)):
|
|
||||||
dyndnshosts.extend(legacy_dyndnshosts)
|
|
||||||
|
|
||||||
return list(set(dyndnshosts)) # Remove duplicates
|
|
||||||
|
|
||||||
|
|
||||||
def get_host_config(config, hostname):
|
def get_host_config(config, hostname):
|
||||||
@@ -303,7 +315,7 @@ def get_host_access(config, hostname) -> dict:
|
|||||||
"""
|
"""
|
||||||
host_cfg = get_host_config(config, hostname)
|
host_cfg = get_host_config(config, hostname)
|
||||||
|
|
||||||
owner = host_cfg.get("owner") or get_default_owner(config)
|
owner = host_cfg.get("owner") # or get_default_owner(config)
|
||||||
|
|
||||||
managers = host_cfg.get("managers", [])
|
managers = host_cfg.get("managers", [])
|
||||||
if isinstance(managers, str):
|
if isinstance(managers, str):
|
||||||
|
|||||||
@@ -0,0 +1,130 @@
|
|||||||
|
"""YAML round-trip read/write for .hb.yaml, with backup and atomic writes."""
|
||||||
|
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
import threading
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from ruamel.yaml import YAML
|
||||||
|
|
||||||
|
_write_lock = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
|
def _make_yaml() -> YAML:
|
||||||
|
y = YAML()
|
||||||
|
y.preserve_quotes = True
|
||||||
|
return y
|
||||||
|
|
||||||
|
# Top-level keys managed by the 'server' logical section
|
||||||
|
_SERVER_KEYS = [
|
||||||
|
"hbd_port", "hbd_host", "ws_port", "wss_port", "hb_port",
|
||||||
|
"interval", "grace", "base_url", "threshold_renotify_interval",
|
||||||
|
"logfile", "pidfile", "pickfile", "journal_enabled", "journal_dir",
|
||||||
|
"journal_max_size", "journal_max_backups", "default_owner",
|
||||||
|
"default_threshold_config",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Top-level keys managed by the 'dns' logical section
|
||||||
|
_DNS_KEYS = ["nsupdate_bin", "rndc_key", "dyndomains"]
|
||||||
|
|
||||||
|
|
||||||
|
def read_roundtrip(path: str):
|
||||||
|
"""Load .hb.yaml with ruamel.yaml, preserving comments and ordering."""
|
||||||
|
with open(path, "r", encoding="utf-8") as f:
|
||||||
|
return _make_yaml().load(f)
|
||||||
|
|
||||||
|
|
||||||
|
def write_config(path: str, data) -> None:
|
||||||
|
"""Backup current file then atomically write data.
|
||||||
|
|
||||||
|
Backup naming: {path}.bak.YYYYMMDD-HHMMSS
|
||||||
|
Rotation: keep the 10 most recent backups, delete older ones.
|
||||||
|
Atomic write: write to {path}.tmp then os.replace({path}.tmp, path).
|
||||||
|
Acquires _write_lock for the full backup+write sequence.
|
||||||
|
"""
|
||||||
|
with _write_lock:
|
||||||
|
ts = datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||||
|
backup_path = f"{path}.bak.{ts}"
|
||||||
|
n = 0
|
||||||
|
while os.path.exists(backup_path):
|
||||||
|
n += 1
|
||||||
|
backup_path = f"{path}.bak.{ts}-{n}"
|
||||||
|
orig_mode = None
|
||||||
|
if os.path.exists(path):
|
||||||
|
orig_mode = os.stat(path).st_mode
|
||||||
|
with open(path, "rb") as src, open(backup_path, "wb") as dst:
|
||||||
|
dst.write(src.read())
|
||||||
|
os.chmod(backup_path, orig_mode)
|
||||||
|
backups = sorted(glob.glob(f"{path}.bak.*"), reverse=True)
|
||||||
|
for old in backups[10:]:
|
||||||
|
os.unlink(old)
|
||||||
|
tmp = f"{path}.tmp"
|
||||||
|
try:
|
||||||
|
with open(tmp, "w", encoding="utf-8") as f:
|
||||||
|
_make_yaml().dump(data, f)
|
||||||
|
if orig_mode is not None:
|
||||||
|
os.chmod(tmp, orig_mode)
|
||||||
|
os.replace(tmp, path)
|
||||||
|
except Exception:
|
||||||
|
try:
|
||||||
|
os.unlink(tmp)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def list_backups(path: str) -> list:
|
||||||
|
"""Return backup paths sorted newest-first."""
|
||||||
|
return sorted(glob.glob(f"{path}.bak.*"), reverse=True)
|
||||||
|
|
||||||
|
|
||||||
|
def apply_structured_section(data, section: str, values: dict) -> None:
|
||||||
|
"""Merge a dict of scalar/list values into data for the named logical section.
|
||||||
|
|
||||||
|
For 'server': updates each known key individually, preserving comments on
|
||||||
|
unchanged keys. For 'users': replaces the entire users dict.
|
||||||
|
"""
|
||||||
|
if section == "server":
|
||||||
|
for key in _SERVER_KEYS:
|
||||||
|
if key in values:
|
||||||
|
data[key] = values[key]
|
||||||
|
elif section == "users":
|
||||||
|
data["users"] = values
|
||||||
|
elif section == "hosts":
|
||||||
|
data["hosts"] = values
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown structured section: {section!r}")
|
||||||
|
|
||||||
|
|
||||||
|
def apply_channel(data, name: str, channel_cfg: dict) -> None:
|
||||||
|
"""Insert or replace a single notification channel entry, preserving others."""
|
||||||
|
if not data.get("notification_channels"):
|
||||||
|
data["notification_channels"] = {}
|
||||||
|
data["notification_channels"][name] = channel_cfg
|
||||||
|
|
||||||
|
|
||||||
|
def delete_channel(data, name: str) -> None:
|
||||||
|
"""Remove a notification channel by name. No-op if not found."""
|
||||||
|
nc = data.get("notification_channels") or {}
|
||||||
|
nc.pop(name, None)
|
||||||
|
|
||||||
|
|
||||||
|
def apply_yaml_section(data, section: str, yaml_text: str) -> None:
|
||||||
|
"""Replace the named logical section by parsing yaml_text."""
|
||||||
|
parsed = _make_yaml().load(yaml_text)
|
||||||
|
if section == "notification_channels":
|
||||||
|
data["notification_channels"] = parsed
|
||||||
|
elif section == "thresholds":
|
||||||
|
data["threshold_configs"] = parsed
|
||||||
|
elif section == "hosts":
|
||||||
|
data["hosts"] = parsed
|
||||||
|
elif section == "dns":
|
||||||
|
if parsed:
|
||||||
|
for key in _DNS_KEYS:
|
||||||
|
if key in parsed:
|
||||||
|
data[key] = parsed[key]
|
||||||
|
else:
|
||||||
|
for key in _DNS_KEYS:
|
||||||
|
data.pop(key, None)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown YAML section: {section!r}")
|
||||||
+18
-15
@@ -4,6 +4,9 @@ from __future__ import annotations
|
|||||||
from subprocess import Popen, PIPE, STDOUT
|
from subprocess import Popen, PIPE, STDOUT
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def create_nsupdate_payload(
|
def create_nsupdate_payload(
|
||||||
@@ -123,7 +126,6 @@ async def dns_update_worker(
|
|||||||
pass
|
pass
|
||||||
continue
|
continue
|
||||||
|
|
||||||
m = f"changed address to {addr}"
|
|
||||||
for dyndomain in cfg.get("dyndomains", []):
|
for dyndomain in cfg.get("dyndomains", []):
|
||||||
err = await loop.run_in_executor(
|
err = await loop.run_in_executor(
|
||||||
None,
|
None,
|
||||||
@@ -135,28 +137,29 @@ async def dns_update_worker(
|
|||||||
cfg.get("rndc_key", "/etc/dhcpc/rndc-key"),
|
cfg.get("rndc_key", "/etc/dhcpc/rndc-key"),
|
||||||
)
|
)
|
||||||
if err:
|
if err:
|
||||||
m += f", DNS update failed: {err}"
|
m = f"DNS update failed for {addr} ({dyndomain}): {err}"
|
||||||
logger.error("DNS update failed for %s: %s", name, err)
|
logger.error("DNS update failed for %s: %s", name, err)
|
||||||
|
if log:
|
||||||
|
try:
|
||||||
|
await loop.run_in_executor(None, log, name, "ERROR", m)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
else:
|
else:
|
||||||
m += ", DNS updated."
|
m = f"DNS updated {name}.dy.{dyndomain} → {addr}"
|
||||||
|
if log:
|
||||||
|
try:
|
||||||
|
await loop.run_in_executor(None, log, name, "INFO", m)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if not cfg.get("dyndomains"):
|
||||||
|
logger.warning("DNS update triggered for %s but no dyndomains configured", name)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
dnsq.task_done()
|
dnsq.task_done()
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if log:
|
|
||||||
try:
|
|
||||||
await loop.run_in_executor(None, log, name, m)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if log:
|
|
||||||
try:
|
|
||||||
await loop.run_in_executor(None, log, None, "dns_update_worker exiting")
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def start_dns_worker(
|
def start_dns_worker(
|
||||||
hbdclass,
|
hbdclass,
|
||||||
|
|||||||
@@ -95,7 +95,7 @@ class Connection:
|
|||||||
if not Null:
|
if not Null:
|
||||||
d["addr"] = self.addr
|
d["addr"] = self.addr
|
||||||
if self.rtts[-1]:
|
if self.rtts[-1]:
|
||||||
d["rtt"] = "%0.1f" % self.rtts[-1]
|
d["rtt"] = "%d" % round(self.rtts[-1])
|
||||||
elif self.state == Connection.UNKNOWN:
|
elif self.state == Connection.UNKNOWN:
|
||||||
d["rtt"] = ""
|
d["rtt"] = ""
|
||||||
else:
|
else:
|
||||||
@@ -286,7 +286,7 @@ class Host:
|
|||||||
Host.hosts[name] = self
|
Host.hosts[name] = self
|
||||||
self.num = num
|
self.num = num
|
||||||
self.dyn = False
|
self.dyn = False
|
||||||
self.watched = False
|
self.watched = True
|
||||||
self.upcount = 0
|
self.upcount = 0
|
||||||
self.interval = 0
|
self.interval = 0
|
||||||
self.doesack = -1
|
self.doesack = -1
|
||||||
@@ -304,6 +304,7 @@ class Host:
|
|||||||
|
|
||||||
def statedict(self):
|
def statedict(self):
|
||||||
d = {}
|
d = {}
|
||||||
|
d["raw_name"] = self.name
|
||||||
d["name"] = self.name
|
d["name"] = self.name
|
||||||
if self.dyn:
|
if self.dyn:
|
||||||
d["name"] += "*"
|
d["name"] += "*"
|
||||||
|
|||||||
+852
-4
File diff suppressed because it is too large
Load Diff
+14
-10
@@ -78,9 +78,7 @@ async def reload_configuration(config_obj, config_path, components):
|
|||||||
True if reload succeeded, False otherwise
|
True if reload succeeded, False otherwise
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
logger.info("=" * 60)
|
|
||||||
logger.info("Starting configuration reload...")
|
logger.info("Starting configuration reload...")
|
||||||
logger.info("=" * 60)
|
|
||||||
|
|
||||||
# Reload config file
|
# Reload config file
|
||||||
new_config = await config_obj.reload(config_path)
|
new_config = await config_obj.reload(config_path)
|
||||||
@@ -101,9 +99,10 @@ async def reload_configuration(config_obj, config_path, components):
|
|||||||
access = config_mod.get_host_access(new_config, hostname)
|
access = config_mod.get_host_access(new_config, hostname)
|
||||||
host.apply_access(access["owner"], access["managers"], access["monitors"])
|
host.apply_access(access["owner"], access["managers"], access["monitors"])
|
||||||
|
|
||||||
# Reload threshold checker
|
# Reload threshold checker and prune alerts orphaned by the new config
|
||||||
if 'threshold_checker' in components:
|
if 'threshold_checker' in components:
|
||||||
components['threshold_checker'].reload(new_config)
|
components['threshold_checker'].reload(new_config)
|
||||||
|
components['threshold_checker'].purge_stale_alerts(hbdclass)
|
||||||
|
|
||||||
# Note: Changes to the following require restart:
|
# Note: Changes to the following require restart:
|
||||||
# - hb_port, hbd_port, ws_port (already bound)
|
# - hb_port, hbd_port, ws_port (already bound)
|
||||||
@@ -114,13 +113,11 @@ async def reload_configuration(config_obj, config_path, components):
|
|||||||
# These are reloadable and effective immediately:
|
# These are reloadable and effective immediately:
|
||||||
# - notification_channels
|
# - notification_channels
|
||||||
# - threshold_configs
|
# - threshold_configs
|
||||||
# - hosts (watchhosts, dyndnshosts, notification_channels)
|
# - hosts (watchhosts, dyndns, notification_channels)
|
||||||
# - grace period (used on next heartbeat)
|
# - grace period (used on next heartbeat)
|
||||||
# - debug/verbose flags (used on next message)
|
# - debug/verbose flags (used on next message)
|
||||||
|
|
||||||
logger.info("=" * 60)
|
|
||||||
logger.info("Configuration reload completed successfully")
|
logger.info("Configuration reload completed successfully")
|
||||||
logger.info("=" * 60)
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -241,6 +238,13 @@ async def _run_async(config, config_path=None):
|
|||||||
)
|
)
|
||||||
udp.restore_connection_timers(hbdclass, restore_ctx)
|
udp.restore_connection_timers(hbdclass, restore_ctx)
|
||||||
|
|
||||||
|
# Drop alert states that no longer have a matching threshold (stale after
|
||||||
|
# upgrade or config change between runs).
|
||||||
|
threshold_checker.purge_stale_alerts(hbdclass)
|
||||||
|
|
||||||
|
async def _http_reload_callback():
|
||||||
|
await reload_configuration(config, config_path, components)
|
||||||
|
|
||||||
# HTTP server (asyncio-based via aiohttp)
|
# HTTP server (asyncio-based via aiohttp)
|
||||||
try:
|
try:
|
||||||
http_task = asyncio.create_task(
|
http_task = asyncio.create_task(
|
||||||
@@ -250,9 +254,11 @@ async def _run_async(config, config_path=None):
|
|||||||
config=config,
|
config=config,
|
||||||
hbdclass=hbdclass,
|
hbdclass=hbdclass,
|
||||||
tcss=None,
|
tcss=None,
|
||||||
|
threshold_checker=threshold_checker,
|
||||||
verbose=config.get("verbose", False),
|
verbose=config.get("verbose", False),
|
||||||
get_now=lambda: time.time(),
|
get_now=lambda: time.time(),
|
||||||
VER="",
|
VER="",
|
||||||
|
reload_callback=_http_reload_callback,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
@@ -416,7 +422,6 @@ def load_pickled_hosts(config, hbdclass):
|
|||||||
pickfile = config.get("pickfile", "hbd.pickle")
|
pickfile = config.get("pickfile", "hbd.pickle")
|
||||||
dyndnshosts = config_mod.get_dyndnshosts(config)
|
dyndnshosts = config_mod.get_dyndnshosts(config)
|
||||||
watchhosts = config_mod.get_watchhosts(config)
|
watchhosts = config_mod.get_watchhosts(config)
|
||||||
drophosts = config.get("drophosts", [])
|
|
||||||
if 1 and os.path.exists(pickfile):
|
if 1 and os.path.exists(pickfile):
|
||||||
if config.get("verbose", False):
|
if config.get("verbose", False):
|
||||||
logger.info("opening pickls %s", pickfile)
|
logger.info("opening pickls %s", pickfile)
|
||||||
@@ -442,9 +447,6 @@ def load_pickled_hosts(config, hbdclass):
|
|||||||
hbdclass.Host.hosts[h].apply_access(
|
hbdclass.Host.hosts[h].apply_access(
|
||||||
access["owner"], access["managers"], access["monitors"]
|
access["owner"], access["managers"], access["monitors"]
|
||||||
)
|
)
|
||||||
for h in drophosts:
|
|
||||||
if h in hbdclass.Host.hosts:
|
|
||||||
del hbdclass.Host.hosts[h]
|
|
||||||
if config.get("verbose", False):
|
if config.get("verbose", False):
|
||||||
logger.info("%s pickled hosts loaded", len(hbdclass.Host.hosts))
|
logger.info("%s pickled hosts loaded", len(hbdclass.Host.hosts))
|
||||||
else:
|
else:
|
||||||
@@ -469,6 +471,8 @@ def run(config, config_path=None):
|
|||||||
if config.get("debug", 0) > 0:
|
if config.get("debug", 0) > 0:
|
||||||
log_level = logging.DEBUG
|
log_level = logging.DEBUG
|
||||||
logging.basicConfig(level=log_level)
|
logging.basicConfig(level=log_level)
|
||||||
|
if not config.get("debug", 0):
|
||||||
|
logging.getLogger("aiohttp.access").propagate = False
|
||||||
load_pickled_hosts(config, hbdclass)
|
load_pickled_hosts(config, hbdclass)
|
||||||
|
|
||||||
notify_mod.initlog(logfile=config.get("logfile", "messages.log"))
|
notify_mod.initlog(logfile=config.get("logfile", "messages.log"))
|
||||||
|
|||||||
+42
-58
@@ -15,7 +15,6 @@ their own ``notification_channels`` list. When no users are configured the
|
|||||||
server runs silently (no notifications sent).
|
server runs silently (no notifications sent).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
import smtplib
|
import smtplib
|
||||||
@@ -30,13 +29,10 @@ from . import ws as ws_mod
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
msg_to_websockets = ws_mod.broadcast
|
msg_to_websockets = ws_mod.broadcast
|
||||||
|
|
||||||
# Module-level state set via setup()
|
# Module-level state set via setup()
|
||||||
_config: dict = {}
|
_config: dict = {}
|
||||||
_loop: Optional[asyncio.AbstractEventLoop] = None
|
|
||||||
|
|
||||||
# Tracks which channels fired a WARNING/CRITICAL per host.
|
# Tracks which channels fired a WARNING/CRITICAL per host.
|
||||||
# {host_name: set of channel_names} — used to route RECOVER to the same channels.
|
# {host_name: set of channel_names} — used to route RECOVER to the same channels.
|
||||||
@@ -73,11 +69,9 @@ class Notification:
|
|||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
def setup(cfg: dict, loop: Optional[asyncio.AbstractEventLoop] = None):
|
def setup(cfg: dict, loop: Optional[asyncio.AbstractEventLoop] = None):
|
||||||
"""Initialize notifier from configuration dict and event loop."""
|
"""Initialize notifier from configuration dict."""
|
||||||
global _config, _loop
|
global _config
|
||||||
_config = dict(cfg)
|
_config = dict(cfg)
|
||||||
if loop is not None:
|
|
||||||
_loop = loop
|
|
||||||
|
|
||||||
|
|
||||||
def reload_config(cfg: dict):
|
def reload_config(cfg: dict):
|
||||||
@@ -112,11 +106,18 @@ def closelog():
|
|||||||
|
|
||||||
def eventlog(host, lvl, m, service=None):
|
def eventlog(host, lvl, m, service=None):
|
||||||
ts = time.time()
|
ts = time.time()
|
||||||
|
msg = {
|
||||||
|
"ts": ts,
|
||||||
|
"host": host or None,
|
||||||
|
"level": lvl,
|
||||||
|
"service": service,
|
||||||
|
"message": m,
|
||||||
|
}
|
||||||
|
data.msgs.append(msg)
|
||||||
s = f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(ts))} {lvl} "
|
s = f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(ts))} {lvl} "
|
||||||
if host:
|
if host:
|
||||||
s += f"{host} "
|
s += f"{host} "
|
||||||
s += m
|
s += m
|
||||||
data.msgs.append(s)
|
|
||||||
logger.info(s)
|
logger.info(s)
|
||||||
if logf:
|
if logf:
|
||||||
try:
|
try:
|
||||||
@@ -124,7 +125,7 @@ def eventlog(host, lvl, m, service=None):
|
|||||||
logf.flush()
|
logf.flush()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("failed to write to logfile: %s", e)
|
logger.warning("failed to write to logfile: %s", e)
|
||||||
msg_to_websockets("message", s)
|
msg_to_websockets("message", msg)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -140,9 +141,11 @@ def _send_pushover(channel_cfg: dict, notif: Notification) -> bool:
|
|||||||
logger.warning("pushover: missing token or user")
|
logger.warning("pushover: missing token or user")
|
||||||
return False
|
return False
|
||||||
params: dict = {"token": token, "user": user, "title": notif.title, "message": notif.body}
|
params: dict = {"token": token, "user": user, "title": notif.title, "message": notif.body}
|
||||||
|
if channel_cfg.get("sound"):
|
||||||
|
params["sound"] = channel_cfg["sound"]
|
||||||
if notif.url:
|
if notif.url:
|
||||||
params["url"] = notif.url
|
params["url"] = notif.url
|
||||||
params["url_title"] = "Plugin metrics"
|
params["url_title"] = "Heartbeat"
|
||||||
conn = http.client.HTTPSConnection("api.pushover.net:443")
|
conn = http.client.HTTPSConnection("api.pushover.net:443")
|
||||||
try:
|
try:
|
||||||
conn.request(
|
conn.request(
|
||||||
@@ -215,7 +218,7 @@ def _send_mattermost(channel_cfg: dict, notif: Notification) -> bool:
|
|||||||
return False
|
return False
|
||||||
text = f"**{notif.title}**\n{notif.body}"
|
text = f"**{notif.title}**\n{notif.body}"
|
||||||
if notif.url:
|
if notif.url:
|
||||||
text += f"\n[Plugin metrics]({notif.url})"
|
text += f"\n[Plugin metrics] {notif.url}"
|
||||||
ses = {"url": host, "scheme": "http", "basepath": "/api/v4", "port": 8065}
|
ses = {"url": host, "scheme": "http", "basepath": "/api/v4", "port": 8065}
|
||||||
mm = Driver(ses)
|
mm = Driver(ses)
|
||||||
payload: dict = {"text": text, "channel": channel, "username": channel_cfg.get("username", "hbd")}
|
payload: dict = {"text": text, "channel": channel, "username": channel_cfg.get("username", "hbd")}
|
||||||
@@ -299,17 +302,6 @@ async def _send_sms_voipms_async(channel_cfg: dict, notif: Notification) -> bool
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def _send_sms_voipms(channel_cfg: dict, notif: Notification) -> bool:
|
|
||||||
"""Dispatch voip.ms SMS send onto the shared event loop."""
|
|
||||||
if _loop is None:
|
|
||||||
logger.warning("sms_voipms: event loop not available")
|
|
||||||
return False
|
|
||||||
future = asyncio.run_coroutine_threadsafe(_send_sms_voipms_async(channel_cfg, notif), _loop)
|
|
||||||
try:
|
|
||||||
return future.result(timeout=15)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error("sms_voipms send timed out or failed: %s", e)
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
async def _send_matrix_async(channel_cfg: dict, notif: Notification) -> bool:
|
async def _send_matrix_async(channel_cfg: dict, notif: Notification) -> bool:
|
||||||
@@ -357,40 +349,26 @@ async def _send_matrix_async(channel_cfg: dict, notif: Notification) -> bool:
|
|||||||
await client.close()
|
await client.close()
|
||||||
|
|
||||||
|
|
||||||
def _send_matrix(channel_cfg: dict, notif: Notification) -> bool:
|
|
||||||
"""Dispatch matrix send onto the shared event loop."""
|
|
||||||
if _loop is None:
|
|
||||||
logger.warning("matrix: event loop not available")
|
|
||||||
return False
|
|
||||||
future = asyncio.run_coroutine_threadsafe(_send_matrix_async(channel_cfg, notif), _loop)
|
|
||||||
try:
|
|
||||||
return future.result(timeout=15)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error("matrix send timed out or failed: %s", e)
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Channel dispatcher
|
# Channel dispatcher (all async — sync drivers run in a thread executor)
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Sync drivers kept for `hbd notify` CLI usage (asyncio.run wraps them there).
|
||||||
_DRIVERS = {
|
_DRIVERS = {
|
||||||
"pushover": _send_pushover,
|
"pushover": _send_pushover,
|
||||||
"email": _send_email,
|
"email": _send_email,
|
||||||
"mattermost": _send_mattermost,
|
"mattermost": _send_mattermost,
|
||||||
"signal": _send_signal,
|
"signal": _send_signal,
|
||||||
"sms_voipms": _send_sms_voipms,
|
|
||||||
"matrix": _send_matrix,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_TIMEOUT = 15 # seconds per channel send
|
||||||
|
|
||||||
def _dispatch_to_channel(channel_name: str, channel_cfg: dict, notif: Notification) -> bool:
|
|
||||||
"""Send *notif* to a single named channel, honouring min_level.
|
|
||||||
|
|
||||||
RECOVER always bypasses min_level — a recovery is always relevant if the
|
async def _dispatch_to_channel(channel_name: str, channel_cfg: dict, notif: Notification) -> bool:
|
||||||
channel was configured for any alerting (handles the restart-then-recover case
|
"""Send *notif* to a single named channel, honouring min_level."""
|
||||||
where _alerted_channels is empty and we fall through to the normal loop).
|
# Strip ownership metadata — notifier drivers only need delivery credentials.
|
||||||
"""
|
channel_cfg = {k: v for k, v in channel_cfg.items() if k not in ("owner", "private")}
|
||||||
|
|
||||||
level = notif.level.upper()
|
level = notif.level.upper()
|
||||||
if level != "RECOVER":
|
if level != "RECOVER":
|
||||||
min_level = channel_cfg.get("min_level", "WARNING").upper()
|
min_level = channel_cfg.get("min_level", "WARNING").upper()
|
||||||
@@ -398,14 +376,24 @@ def _dispatch_to_channel(channel_name: str, channel_cfg: dict, notif: Notificati
|
|||||||
logger.debug(
|
logger.debug(
|
||||||
"channel '%s': skipping level %s (min_level=%s)", channel_name, level, min_level
|
"channel '%s': skipping level %s (min_level=%s)", channel_name, level, min_level
|
||||||
)
|
)
|
||||||
return True # not an error — filtered intentionally
|
return True # filtered intentionally
|
||||||
|
|
||||||
ch_type = channel_cfg.get("type", "")
|
ch_type = channel_cfg.get("type", "")
|
||||||
driver = _DRIVERS.get(ch_type)
|
try:
|
||||||
if driver is None:
|
if ch_type == "matrix":
|
||||||
|
return await asyncio.wait_for(_send_matrix_async(channel_cfg, notif), timeout=_TIMEOUT)
|
||||||
|
if ch_type == "sms_voipms":
|
||||||
|
return await asyncio.wait_for(_send_sms_voipms_async(channel_cfg, notif), timeout=_TIMEOUT)
|
||||||
|
sync_driver = _DRIVERS.get(ch_type)
|
||||||
|
if sync_driver is None:
|
||||||
logger.warning("unknown channel type '%s' for channel '%s'", ch_type, channel_name)
|
logger.warning("unknown channel type '%s' for channel '%s'", ch_type, channel_name)
|
||||||
return False
|
return False
|
||||||
return driver(channel_cfg, notif)
|
return await asyncio.wait_for(
|
||||||
|
asyncio.to_thread(sync_driver, channel_cfg, notif), timeout=_TIMEOUT
|
||||||
|
)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
logger.error("channel '%s' timed out after %ds", channel_name, _TIMEOUT)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -416,10 +404,10 @@ def _build_url(host_name: str) -> str:
|
|||||||
base_url = _config.get("base_url", "").rstrip("/")
|
base_url = _config.get("base_url", "").rstrip("/")
|
||||||
if not base_url:
|
if not base_url:
|
||||||
return ""
|
return ""
|
||||||
return f"{base_url}/plugins#{host_name}"
|
return f"{base_url}/alerts?filter={host_name}"
|
||||||
|
|
||||||
|
|
||||||
def send_notification(host_name: str, notif: Notification) -> dict:
|
async def send_notification(host_name: str, notif: Notification) -> dict:
|
||||||
"""Dispatch *notif* to all managers/owner of *host_name*.
|
"""Dispatch *notif* to all managers/owner of *host_name*.
|
||||||
|
|
||||||
Looks up the host's owner + managers, resolves each user's
|
Looks up the host's owner + managers, resolves each user's
|
||||||
@@ -469,16 +457,12 @@ def send_notification(host_name: str, notif: Notification) -> dict:
|
|||||||
if not channel_cfg:
|
if not channel_cfg:
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
ch_type = channel_cfg.get("type", "")
|
ok = await _dispatch_to_channel(channel_name, channel_cfg, notif)
|
||||||
driver = _DRIVERS.get(ch_type)
|
|
||||||
if driver:
|
|
||||||
ok = driver(channel_cfg, notif)
|
|
||||||
results[channel_name] = ok
|
results[channel_name] = ok
|
||||||
if ok:
|
if ok:
|
||||||
logger.info("recover sent to channel '%s': %s", channel_name, notif.title)
|
logger.info("recover sent to channel '%s': %s", channel_name, notif.title)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("error sending recover to channel '%s': %s", channel_name, e)
|
logger.error("error sending recover to channel '%s': %s", channel_name, e)
|
||||||
# Clear the alerted set once recovery is delivered
|
|
||||||
del _alerted_channels[host_name]
|
del _alerted_channels[host_name]
|
||||||
return results
|
return results
|
||||||
|
|
||||||
@@ -489,14 +473,14 @@ def send_notification(host_name: str, notif: Notification) -> dict:
|
|||||||
continue
|
continue
|
||||||
for channel_name in user.notification_channels:
|
for channel_name in user.notification_channels:
|
||||||
if channel_name in results:
|
if channel_name in results:
|
||||||
continue # already dispatched to this channel this notification
|
continue
|
||||||
channel_cfg = global_channels.get(channel_name)
|
channel_cfg = global_channels.get(channel_name)
|
||||||
if not channel_cfg:
|
if not channel_cfg:
|
||||||
logger.warning("channel '%s' not defined in notification_channels", channel_name)
|
logger.warning("channel '%s' not defined in notification_channels", channel_name)
|
||||||
results[channel_name] = False
|
results[channel_name] = False
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
ok = _dispatch_to_channel(channel_name, channel_cfg, notif)
|
ok = await _dispatch_to_channel(channel_name, channel_cfg, notif)
|
||||||
results[channel_name] = ok
|
results[channel_name] = ok
|
||||||
if ok:
|
if ok:
|
||||||
logger.info("notification sent to channel '%s': %s", channel_name, notif.title)
|
logger.info("notification sent to channel '%s': %s", channel_name, notif.title)
|
||||||
|
|||||||
@@ -0,0 +1,254 @@
|
|||||||
|
"""OAuth2 provider support.
|
||||||
|
|
||||||
|
Config shape (in ~/.hb.yaml):
|
||||||
|
|
||||||
|
oauth:
|
||||||
|
my-gitea: # route slug → /login/oauth/my-gitea
|
||||||
|
type: gitea # "gitea" | "github" | "nextcloud"
|
||||||
|
# omit type to default to "gitea"
|
||||||
|
url: https://git.example.com # required for gitea and nextcloud
|
||||||
|
client_id: <client-id>
|
||||||
|
client_secret: <client-secret>
|
||||||
|
label: "Work Gitea" # optional display name on login button
|
||||||
|
logo: https://example.com/logo.png # optional logo URL
|
||||||
|
|
||||||
|
github:
|
||||||
|
type: github
|
||||||
|
client_id: <client-id>
|
||||||
|
client_secret: <client-secret>
|
||||||
|
|
||||||
|
nextcloud:
|
||||||
|
type: nextcloud
|
||||||
|
url: https://cloud.example.com
|
||||||
|
client_id: <client-id>
|
||||||
|
client_secret: <client-secret>
|
||||||
|
|
||||||
|
Register the OAuth app with each provider and set the redirect URI to:
|
||||||
|
https://<hbd-host>/login/oauth/<name>/callback
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import secrets
|
||||||
|
import time
|
||||||
|
import urllib.parse
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
STATE_TTL = 600 # 10 minutes
|
||||||
|
|
||||||
|
# state_token -> expiry timestamp
|
||||||
|
_states: dict[str, float] = {}
|
||||||
|
|
||||||
|
|
||||||
|
def make_state() -> str:
|
||||||
|
"""Generate a CSRF state token, store it with TTL, and return it."""
|
||||||
|
_purge_states()
|
||||||
|
token = secrets.token_hex(32)
|
||||||
|
_states[token] = time.time() + STATE_TTL
|
||||||
|
return token
|
||||||
|
|
||||||
|
|
||||||
|
def validate_state(state: str) -> bool:
|
||||||
|
"""Return True if *state* is known and unexpired; always removes it."""
|
||||||
|
expiry = _states.pop(state, None)
|
||||||
|
if expiry is None:
|
||||||
|
return False
|
||||||
|
return time.time() < expiry
|
||||||
|
|
||||||
|
|
||||||
|
def _purge_states() -> None:
|
||||||
|
"""Remove all expired CSRF state tokens from the in-memory store."""
|
||||||
|
now = time.time()
|
||||||
|
expired = [k for k, exp in list(_states.items()) if exp < now]
|
||||||
|
for k in expired:
|
||||||
|
del _states[k]
|
||||||
|
|
||||||
|
|
||||||
|
class OAuthError(Exception):
|
||||||
|
"""Raised when the OAuth2 flow fails for any reason."""
|
||||||
|
|
||||||
|
|
||||||
|
PROVIDER_DEFS: dict = {
|
||||||
|
"gitea": {
|
||||||
|
"authorize_url_tmpl": "{url}/login/oauth/authorize",
|
||||||
|
"token_url_tmpl": "{url}/login/oauth/access_token",
|
||||||
|
"profile_url_tmpl": "{url}/api/v1/user",
|
||||||
|
"scope": "user:email",
|
||||||
|
"field_map": {"username": "login", "full_name": "full_name", "avatar": "avatar_url"},
|
||||||
|
"profile_data_path": [],
|
||||||
|
"requires_url": True,
|
||||||
|
"default_label": "Gitea",
|
||||||
|
},
|
||||||
|
"github": {
|
||||||
|
"authorize_url_tmpl": "https://github.com/login/oauth/authorize",
|
||||||
|
"token_url_tmpl": "https://github.com/login/oauth/access_token",
|
||||||
|
"profile_url_tmpl": "https://api.github.com/user",
|
||||||
|
"scope": "read:user",
|
||||||
|
"field_map": {"username": "login", "full_name": "name", "avatar": "avatar_url"},
|
||||||
|
"profile_data_path": [],
|
||||||
|
"requires_url": False,
|
||||||
|
"default_label": "GitHub",
|
||||||
|
},
|
||||||
|
"nextcloud": {
|
||||||
|
"authorize_url_tmpl": "{url}/apps/oauth2/authorize",
|
||||||
|
"token_url_tmpl": "{url}/apps/oauth2/api/v1/token",
|
||||||
|
"profile_url_tmpl": "{url}/ocs/v2.php/cloud/user?format=json",
|
||||||
|
"scope": "",
|
||||||
|
"field_map": {"username": "id", "full_name": "display-name", "avatar": None},
|
||||||
|
"profile_data_path": ["ocs", "data"],
|
||||||
|
"requires_url": True,
|
||||||
|
"default_label": "Nextcloud",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ResolvedProvider:
|
||||||
|
"""A fully resolved OAuth2 provider instance, ready to use."""
|
||||||
|
name: str
|
||||||
|
type: str
|
||||||
|
label: str
|
||||||
|
logo: str
|
||||||
|
authorize_url: str
|
||||||
|
token_url: str
|
||||||
|
profile_url: str
|
||||||
|
scope: str
|
||||||
|
client_id: str
|
||||||
|
client_secret: str
|
||||||
|
field_map: dict
|
||||||
|
profile_data_path: list
|
||||||
|
|
||||||
|
|
||||||
|
def get_providers(config: dict) -> list[ResolvedProvider]:
|
||||||
|
"""Return a ResolvedProvider for every valid entry in config['oauth'].
|
||||||
|
|
||||||
|
Entries with missing required fields or unknown types are skipped with
|
||||||
|
a warning log. Order follows config declaration order.
|
||||||
|
"""
|
||||||
|
result = []
|
||||||
|
oauth_cfg = config.get("oauth", {})
|
||||||
|
if not isinstance(oauth_cfg, dict):
|
||||||
|
return result
|
||||||
|
for name, entry in oauth_cfg.items():
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
provider_type = entry.get("type", "gitea")
|
||||||
|
defn = PROVIDER_DEFS.get(provider_type)
|
||||||
|
if defn is None:
|
||||||
|
logger.warning("OAuth: unknown provider type %r for %r, skipping", provider_type, name)
|
||||||
|
continue
|
||||||
|
client_id = entry.get("client_id", "")
|
||||||
|
client_secret = entry.get("client_secret", "")
|
||||||
|
if not client_id or not client_secret:
|
||||||
|
logger.warning("OAuth: %r missing client_id or client_secret, skipping", name)
|
||||||
|
continue
|
||||||
|
url = entry.get("url", "").rstrip("/")
|
||||||
|
if defn["requires_url"] and not url:
|
||||||
|
logger.warning("OAuth: %r requires url but none configured, skipping", name)
|
||||||
|
continue
|
||||||
|
label = entry.get("label") or defn["default_label"]
|
||||||
|
logo = entry.get("logo", "")
|
||||||
|
result.append(ResolvedProvider(
|
||||||
|
name=name,
|
||||||
|
type=provider_type,
|
||||||
|
label=label,
|
||||||
|
logo=logo,
|
||||||
|
authorize_url=defn["authorize_url_tmpl"].format(url=url),
|
||||||
|
token_url=defn["token_url_tmpl"].format(url=url),
|
||||||
|
profile_url=defn["profile_url_tmpl"].format(url=url),
|
||||||
|
scope=defn["scope"],
|
||||||
|
client_id=client_id,
|
||||||
|
client_secret=client_secret,
|
||||||
|
field_map=dict(defn["field_map"]),
|
||||||
|
profile_data_path=list(defn["profile_data_path"]),
|
||||||
|
))
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def is_enabled(config: dict) -> bool:
|
||||||
|
"""Return True when at least one OAuth provider is fully configured."""
|
||||||
|
return bool(get_providers(config))
|
||||||
|
|
||||||
|
|
||||||
|
def build_auth_url(provider: ResolvedProvider, state: str, redirect_uri: str) -> str:
|
||||||
|
"""Return the provider's OAuth2 authorization URL to redirect the browser to."""
|
||||||
|
params: dict = {
|
||||||
|
"client_id": provider.client_id,
|
||||||
|
"redirect_uri": redirect_uri,
|
||||||
|
"response_type": "code",
|
||||||
|
"state": state,
|
||||||
|
}
|
||||||
|
if provider.scope:
|
||||||
|
params["scope"] = provider.scope
|
||||||
|
return f"{provider.authorize_url}?{urllib.parse.urlencode(params)}"
|
||||||
|
|
||||||
|
|
||||||
|
async def exchange_code(provider: ResolvedProvider, code: str, redirect_uri: str) -> str:
|
||||||
|
"""Exchange an authorization *code* for an access token.
|
||||||
|
|
||||||
|
Returns the access token string. Raises OAuthError on any failure.
|
||||||
|
"""
|
||||||
|
payload = {
|
||||||
|
"client_id": provider.client_id,
|
||||||
|
"client_secret": provider.client_secret,
|
||||||
|
"code": code,
|
||||||
|
"grant_type": "authorization_code",
|
||||||
|
"redirect_uri": redirect_uri,
|
||||||
|
}
|
||||||
|
timeout = aiohttp.ClientTimeout(total=10)
|
||||||
|
try:
|
||||||
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||||
|
async with session.post(
|
||||||
|
provider.token_url,
|
||||||
|
json=payload,
|
||||||
|
headers={"Accept": "application/json"},
|
||||||
|
) as resp:
|
||||||
|
if resp.status != 200:
|
||||||
|
text = await resp.text()
|
||||||
|
raise OAuthError(f"Token exchange failed ({resp.status}): {text}")
|
||||||
|
data = await resp.json()
|
||||||
|
token = data.get("access_token")
|
||||||
|
if not token:
|
||||||
|
raise OAuthError(f"No access_token in response: {data}")
|
||||||
|
except aiohttp.ClientError as exc:
|
||||||
|
raise OAuthError(f"Token exchange network error: {exc}") from exc
|
||||||
|
return token
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_user(provider: ResolvedProvider, token: str) -> dict:
|
||||||
|
"""Fetch the authenticated user's profile from the provider.
|
||||||
|
|
||||||
|
Returns a dict with keys: login, full_name, avatar_url.
|
||||||
|
Raises OAuthError on any failure.
|
||||||
|
"""
|
||||||
|
timeout = aiohttp.ClientTimeout(total=10)
|
||||||
|
try:
|
||||||
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||||
|
async with session.get(
|
||||||
|
provider.profile_url,
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {token}",
|
||||||
|
"Accept": "application/json",
|
||||||
|
},
|
||||||
|
) as resp:
|
||||||
|
if resp.status != 200:
|
||||||
|
text = await resp.text()
|
||||||
|
raise OAuthError(f"User fetch failed ({resp.status}): {text}")
|
||||||
|
data = await resp.json()
|
||||||
|
except aiohttp.ClientError as exc:
|
||||||
|
raise OAuthError(f"User fetch network error: {exc}") from exc
|
||||||
|
|
||||||
|
try:
|
||||||
|
for key in provider.profile_data_path:
|
||||||
|
data = data.get(key, {})
|
||||||
|
avatar_field = provider.field_map.get("avatar")
|
||||||
|
return {
|
||||||
|
"login": data.get(provider.field_map["username"], ""),
|
||||||
|
"full_name": data.get(provider.field_map["full_name"], ""),
|
||||||
|
"avatar_url": data.get(avatar_field, "") if avatar_field else "",
|
||||||
|
}
|
||||||
|
except AttributeError:
|
||||||
|
raise OAuthError(f"Unexpected profile response structure from {provider.type}")
|
||||||
+197
-35
@@ -24,16 +24,68 @@ sensitive bool True when the raw value must never be shown
|
|||||||
# Credential field names that should always be masked.
|
# Credential field names that should always be masked.
|
||||||
_SECRET_KEYS = frozenset({
|
_SECRET_KEYS = frozenset({
|
||||||
"password", "token", "user_key", "api_key", "secret",
|
"password", "token", "user_key", "api_key", "secret",
|
||||||
"smtp_password", "smtp_user",
|
"smtp_password", "smtp_user", "api_password", "access_token",
|
||||||
})
|
})
|
||||||
|
|
||||||
_CHANNEL_TYPE_LABELS = {
|
CHANNEL_TYPE_SCHEMAS = {
|
||||||
"pushover": "Pushover",
|
"pushover": {
|
||||||
"email": "E-mail",
|
"label": "Pushover",
|
||||||
"signal": "Signal",
|
"fields": [
|
||||||
"mattermost": "Mattermost",
|
{"key": "token", "label": "App token", "type": "secret", "required": True},
|
||||||
|
{"key": "user", "label": "User key", "type": "secret", "required": True},
|
||||||
|
{"key": "sound", "label": "Sound", "type": "text", "required": False},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"email": {
|
||||||
|
"label": "E-mail",
|
||||||
|
"fields": [
|
||||||
|
{"key": "recipients", "label": "Recipients (comma-separated)", "type": "list", "required": True},
|
||||||
|
{"key": "sender", "label": "From address", "type": "text", "required": True},
|
||||||
|
{"key": "smtp_server", "label": "SMTP server", "type": "text", "required": True},
|
||||||
|
{"key": "smtp_port", "label": "SMTP port", "type": "port", "required": False},
|
||||||
|
{"key": "smtp_user", "label": "SMTP username", "type": "text", "required": False},
|
||||||
|
{"key": "smtp_password", "label": "SMTP password", "type": "secret", "required": False},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"signal": {
|
||||||
|
"label": "Signal",
|
||||||
|
"fields": [
|
||||||
|
{"key": "user", "label": "Sender number", "type": "text", "required": True},
|
||||||
|
{"key": "recipient", "label": "Recipient number", "type": "text", "required": True},
|
||||||
|
{"key": "cli_path", "label": "signal-cli path", "type": "text", "required": False},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"matrix": {
|
||||||
|
"label": "Matrix",
|
||||||
|
"fields": [
|
||||||
|
{"key": "homeserver", "label": "Homeserver URL", "type": "text", "required": True},
|
||||||
|
{"key": "access_token", "label": "Access token", "type": "secret", "required": True},
|
||||||
|
{"key": "room_id", "label": "Room ID", "type": "text", "required": True},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"sms_voipms": {
|
||||||
|
"label": "SMS (voip.ms)",
|
||||||
|
"fields": [
|
||||||
|
{"key": "api_user", "label": "API username", "type": "text", "required": True},
|
||||||
|
{"key": "api_password", "label": "API password", "type": "secret", "required": True},
|
||||||
|
{"key": "did", "label": "DID (from)", "type": "text", "required": True},
|
||||||
|
{"key": "dst", "label": "Destination", "type": "text", "required": True},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"mattermost": {
|
||||||
|
"label": "Mattermost",
|
||||||
|
"fields": [
|
||||||
|
{"key": "host", "label": "Host", "type": "text", "required": True},
|
||||||
|
{"key": "token", "label": "Webhook token", "type": "secret", "required": True},
|
||||||
|
{"key": "channel", "label": "Channel", "type": "text", "required": True},
|
||||||
|
{"key": "username", "label": "Bot username", "type": "text", "required": False},
|
||||||
|
{"key": "icon", "label": "Icon URL", "type": "text", "required": False},
|
||||||
|
],
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_CHANNEL_TYPE_LABELS = {k: v["label"] for k, v in CHANNEL_TYPE_SCHEMAS.items()}
|
||||||
|
|
||||||
|
|
||||||
def _mask(value):
|
def _mask(value):
|
||||||
"""Return a masked placeholder for sensitive values."""
|
"""Return a masked placeholder for sensitive values."""
|
||||||
@@ -88,7 +140,7 @@ def _sanitize_channel(name, cfg):
|
|||||||
# Public API
|
# Public API
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
def get_settings_sections(config: dict) -> list:
|
def get_settings_sections(config: dict, threshold_checker=None) -> list:
|
||||||
"""Return ordered list of setting sections for the settings page.
|
"""Return ordered list of setting sections for the settings page.
|
||||||
|
|
||||||
Each section:
|
Each section:
|
||||||
@@ -143,6 +195,7 @@ def get_settings_sections(config: dict) -> list:
|
|||||||
}
|
}
|
||||||
|
|
||||||
# ---- Notification channels (complex, built separately) ----------------
|
# ---- Notification channels (complex, built separately) ----------------
|
||||||
|
_METADATA_KEYS = {"type", "owner", "private", "min_level"}
|
||||||
notif_channels = []
|
notif_channels = []
|
||||||
for ch_name, ch_cfg in (config.get("notification_channels") or {}).items():
|
for ch_name, ch_cfg in (config.get("notification_channels") or {}).items():
|
||||||
if not isinstance(ch_cfg, dict):
|
if not isinstance(ch_cfg, dict):
|
||||||
@@ -150,7 +203,7 @@ def get_settings_sections(config: dict) -> list:
|
|||||||
ch_type = ch_cfg.get("type", "")
|
ch_type = ch_cfg.get("type", "")
|
||||||
fields = []
|
fields = []
|
||||||
for k, v in ch_cfg.items():
|
for k, v in ch_cfg.items():
|
||||||
if k == "type":
|
if k in _METADATA_KEYS:
|
||||||
continue
|
continue
|
||||||
sensitive = k in _SECRET_KEYS
|
sensitive = k in _SECRET_KEYS
|
||||||
fields.append({
|
fields.append({
|
||||||
@@ -165,6 +218,9 @@ def get_settings_sections(config: dict) -> list:
|
|||||||
"name": ch_name,
|
"name": ch_name,
|
||||||
"type": ch_type,
|
"type": ch_type,
|
||||||
"type_label": _CHANNEL_TYPE_LABELS.get(ch_type, ch_type.title()),
|
"type_label": _CHANNEL_TYPE_LABELS.get(ch_type, ch_type.title()),
|
||||||
|
"owner": ch_cfg.get("owner"),
|
||||||
|
"private": bool(ch_cfg.get("private", False)),
|
||||||
|
"min_level": ch_cfg.get("min_level", "WARNING"),
|
||||||
"fields": fields,
|
"fields": fields,
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -181,6 +237,43 @@ def get_settings_sections(config: dict) -> list:
|
|||||||
"notification_channels": attrs.get("notification_channels", []),
|
"notification_channels": attrs.get("notification_channels", []),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# ---- Threshold configurations -----------------------------------------
|
||||||
|
def _tc_to_row(tc):
|
||||||
|
return {
|
||||||
|
"metric": tc.metric_path,
|
||||||
|
"operator": tc.operator.value,
|
||||||
|
"warning": tc.warning,
|
||||||
|
"critical": tc.critical,
|
||||||
|
"hysteresis": tc.hysteresis,
|
||||||
|
"count": tc.count,
|
||||||
|
"enabled": tc.enabled,
|
||||||
|
"display": tc.display or "",
|
||||||
|
"grace": tc.grace,
|
||||||
|
}
|
||||||
|
|
||||||
|
threshold_config_list = []
|
||||||
|
if threshold_checker is not None:
|
||||||
|
if threshold_checker.threshold_configs:
|
||||||
|
for cfg_name, cfg_metrics in sorted(threshold_checker.threshold_configs.items()):
|
||||||
|
# For the default config use the merged effective set;
|
||||||
|
# for named overrides use only the explicitly defined metrics
|
||||||
|
# (threshold_raw_configs) so inherited defaults are not repeated.
|
||||||
|
if cfg_name == "default":
|
||||||
|
display_metrics = cfg_metrics
|
||||||
|
else:
|
||||||
|
display_metrics = threshold_checker.threshold_raw_configs.get(cfg_name, cfg_metrics)
|
||||||
|
metrics = sorted(
|
||||||
|
[_tc_to_row(tc) for tc in display_metrics.values()],
|
||||||
|
key=lambda m: m["metric"],
|
||||||
|
)
|
||||||
|
threshold_config_list.append({"name": cfg_name, "metrics": metrics})
|
||||||
|
elif threshold_checker.thresholds:
|
||||||
|
metrics = sorted(
|
||||||
|
[_tc_to_row(tc) for tc in threshold_checker.thresholds.values()],
|
||||||
|
key=lambda m: m["metric"],
|
||||||
|
)
|
||||||
|
threshold_config_list.append({"name": "default", "metrics": metrics})
|
||||||
|
|
||||||
# ---- Hosts summary ----------------------------------------------------
|
# ---- Hosts summary ----------------------------------------------------
|
||||||
hosts_list = []
|
hosts_list = []
|
||||||
for hname, hcfg in (config.get("hosts") or {}).items():
|
for hname, hcfg in (config.get("hosts") or {}).items():
|
||||||
@@ -188,37 +281,60 @@ def get_settings_sections(config: dict) -> list:
|
|||||||
continue
|
continue
|
||||||
hosts_list.append({
|
hosts_list.append({
|
||||||
"name": hname,
|
"name": hname,
|
||||||
"watch": bool(hcfg.get("watch", False)),
|
"watch": bool(hcfg.get("watch", True)),
|
||||||
"dyndns": bool(hcfg.get("dyndns", False)),
|
"dyndns": bool(hcfg.get("dyndns", False)),
|
||||||
"owner": hcfg.get("owner", ""),
|
"owner": hcfg.get("owner", ""),
|
||||||
"managers": hcfg.get("managers", []),
|
"managers": hcfg.get("managers", []),
|
||||||
"monitors": hcfg.get("monitors", []),
|
"monitors": hcfg.get("monitors", []),
|
||||||
"threshold_config": hcfg.get("threshold_config", ""),
|
"threshold_configs": (
|
||||||
|
list(v) if isinstance(v := hcfg.get("threshold_config"), list)
|
||||||
|
else ([v] if v else [])
|
||||||
|
),
|
||||||
"notification_channels": hcfg.get("notification_channels", []),
|
"notification_channels": hcfg.get("notification_channels", []),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# ---- OAuth providers -------------------------------------------------------
|
||||||
|
oauth_providers = []
|
||||||
|
for pname, pattrs in (config.get("oauth") or {}).items():
|
||||||
|
if not isinstance(pattrs, dict):
|
||||||
|
continue
|
||||||
|
cs = pattrs.get("client_secret", "")
|
||||||
|
oauth_providers.append({
|
||||||
|
"name": pname,
|
||||||
|
"type": pattrs.get("type", "gitea"),
|
||||||
|
"url": pattrs.get("url", ""),
|
||||||
|
"client_id": pattrs.get("client_id", ""),
|
||||||
|
"client_secret": "•••" if cs else "",
|
||||||
|
"label": pattrs.get("label", ""),
|
||||||
|
"logo": pattrs.get("logo", ""),
|
||||||
|
})
|
||||||
|
|
||||||
return [
|
return [
|
||||||
{
|
{
|
||||||
"id": "network",
|
"id": "network",
|
||||||
"title": "Network",
|
"title": "Network",
|
||||||
"description": "Ports and bind addresses for all server sockets.",
|
"description": "Ports and bind addresses for all server sockets.",
|
||||||
|
"section_mode": "form",
|
||||||
|
"api_section": "server",
|
||||||
"fields": [
|
"fields": [
|
||||||
field("hb_port", "Heartbeat UDP port", "port",
|
field("hb_port", "Heartbeat UDP port", "port",
|
||||||
"UDP port the server listens on for heartbeat datagrams."),
|
"UDP port the server listens on for heartbeat datagrams.", editable=True),
|
||||||
field("hbd_host", "HTTP bind address", "text",
|
field("hbd_host", "HTTP bind address", "text",
|
||||||
"Interface to bind the HTTP server to. Empty = all interfaces."),
|
"Interface to bind the HTTP server to. Empty = all interfaces.", editable=True),
|
||||||
field("hbd_port", "HTTP API port", "port",
|
field("hbd_port", "HTTP API port", "port",
|
||||||
"TCP port for the HTTP API and web UI."),
|
"TCP port for the HTTP API and web UI.", editable=True),
|
||||||
field("ws_port", "WebSocket port", "port",
|
field("ws_port", "WebSocket port", "port",
|
||||||
"TCP port for the plain WebSocket server."),
|
"TCP port for the plain WebSocket server.", editable=True),
|
||||||
field("wss_port", "Secure WebSocket port", "port",
|
field("wss_port", "Secure WebSocket port", "port",
|
||||||
"TCP port for WSS (TLS WebSocket). Leave empty to disable."),
|
"TCP port for WSS (TLS WebSocket). Leave empty to disable.", editable=True),
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "tls",
|
"id": "tls",
|
||||||
"title": "TLS / WebSocket Security",
|
"title": "TLS / WebSocket Security",
|
||||||
"description": "Certificate paths used when wss_port is set.",
|
"description": "Certificate paths used when wss_port is set.",
|
||||||
|
"section_mode": "form",
|
||||||
|
"api_section": None,
|
||||||
"fields": [
|
"fields": [
|
||||||
field("cert_path", "Certificate directory", "path",
|
field("cert_path", "Certificate directory", "path",
|
||||||
"Directory containing the TLS certificate and key files."),
|
"Directory containing the TLS certificate and key files."),
|
||||||
@@ -232,73 +348,89 @@ def get_settings_sections(config: dict) -> list:
|
|||||||
"id": "monitoring",
|
"id": "monitoring",
|
||||||
"title": "Monitoring",
|
"title": "Monitoring",
|
||||||
"description": "Heartbeat timing and alert re-notification behaviour.",
|
"description": "Heartbeat timing and alert re-notification behaviour.",
|
||||||
|
"section_mode": "form",
|
||||||
|
"api_section": "server",
|
||||||
"fields": [
|
"fields": [
|
||||||
field("interval", "Heartbeat interval", "duration",
|
field("interval", "Heartbeat interval", "duration",
|
||||||
"Expected time between heartbeat messages from each client."),
|
"Expected time between heartbeat messages from each client.", editable=True),
|
||||||
field("grace", "Grace multiplier", "number",
|
field("grace", "Grace period", "number",
|
||||||
"A host is marked overdue after interval × grace seconds of silence."),
|
"Extra seconds to wait after a missed heartbeat before sending notifications.", editable=True),
|
||||||
field("threshold_renotify_interval", "Re-notify interval", "duration",
|
field("threshold_renotify_interval", "Re-notify interval", "duration",
|
||||||
"How often to re-send notifications for ongoing threshold alerts."),
|
"How often to re-send notifications for ongoing threshold alerts.", editable=True),
|
||||||
field("autosave_interval", "Autosave interval", "duration",
|
field("autosave_interval", "Autosave interval", "duration",
|
||||||
"How often the server saves its state to disk."),
|
"How often the server saves its state to disk."),
|
||||||
|
field("base_url", "Base URL", "text",
|
||||||
|
"Base URL for notification links.", editable=True),
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "persistence",
|
"id": "persistence",
|
||||||
"title": "Persistence & Logging",
|
"title": "Persistence & Logging",
|
||||||
"description": "State file and event log settings.",
|
"description": "State file and event log settings.",
|
||||||
|
"section_mode": "form",
|
||||||
|
"api_section": "server",
|
||||||
"fields": [
|
"fields": [
|
||||||
field("pickfile", "State file", "path",
|
field("pickfile", "State file", "path",
|
||||||
"Path to the pickle file used to persist host state across restarts."),
|
"Path to the pickle file used to persist host state across restarts.", editable=True),
|
||||||
field("logfile", "Event log", "path",
|
field("logfile", "Event log", "path",
|
||||||
"Path to the event log file."),
|
"Path to the event log file.", editable=True),
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "journal",
|
"id": "journal",
|
||||||
"title": "Message Journal",
|
"title": "Message Journal",
|
||||||
"description": "All received heartbeat and plugin messages are journalled here.",
|
"description": "All received heartbeat and plugin messages are journalled here.",
|
||||||
|
"section_mode": "form",
|
||||||
|
"api_section": "server",
|
||||||
"fields": [
|
"fields": [
|
||||||
field("journal_enabled", "Enabled", "boolean",
|
field("journal_enabled", "Enabled", "boolean",
|
||||||
"Turn journalling on or off."),
|
"Turn journalling on or off.", editable=True),
|
||||||
field("journal_dir", "Journal directory","path",
|
field("journal_dir", "Journal directory","path",
|
||||||
"Directory where journal files are written."),
|
"Directory where journal files are written.", editable=True),
|
||||||
field("journal_file", "Journal filename", "text",
|
field("journal_file", "Journal filename", "text",
|
||||||
"Base filename for the journal (rotated copies get a numeric suffix)."),
|
"Base filename for the journal (rotated copies get a numeric suffix)."),
|
||||||
field("journal_max_size", "Max file size", "size",
|
field("journal_max_size", "Max file size", "size",
|
||||||
"Rotate the journal when it exceeds this size."),
|
"Rotate the journal when it exceeds this size.", editable=True),
|
||||||
field("journal_max_backups", "Backup count", "number",
|
field("journal_max_backups", "Backup count", "number",
|
||||||
"Number of rotated journal files to keep."),
|
"Number of rotated journal files to keep.", editable=True),
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "dns",
|
"id": "dns",
|
||||||
"title": "Dynamic DNS",
|
"title": "Dynamic DNS",
|
||||||
"description": "nsupdate-based DNS registration for dynamic hosts.",
|
"description": "nsupdate-based DNS registration — edit raw YAML.",
|
||||||
"fields": [
|
"section_mode": "yaml",
|
||||||
field("nsupdate_bin", "nsupdate binary", "path",
|
"api_section": "dns",
|
||||||
"Full path to the nsupdate executable."),
|
"fields": [],
|
||||||
field("dyndomains", "Dynamic domains", "list",
|
|
||||||
"DNS zones managed by nsupdate for dynamic hosts."),
|
|
||||||
field("drophosts", "Drop hosts", "list",
|
|
||||||
"Hostnames to silently ignore — no state, no alerts."),
|
|
||||||
],
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "users",
|
"id": "users",
|
||||||
"title": "Users",
|
"title": "Users",
|
||||||
"description": "Accounts defined in the config file. Password hashes are never shown.",
|
"description": "Accounts defined in the config file. Password hashes are never shown.",
|
||||||
|
"section_mode": "form",
|
||||||
|
"api_section": "users",
|
||||||
"users": users_list,
|
"users": users_list,
|
||||||
"fields": [
|
"fields": [
|
||||||
field("default_owner", "Default owner", "text",
|
field("default_owner", "Default owner", "text",
|
||||||
"Username that owns hosts with no explicit owner. "
|
"Username that owns hosts with no explicit owner. "
|
||||||
"Falls back to the first admin user."),
|
"Falls back to the first admin user.", editable=True),
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"id": "oauth",
|
||||||
|
"title": "OAuth Providers",
|
||||||
|
"description": "OAuth2 login providers. Client secrets are masked.",
|
||||||
|
"section_mode": "form",
|
||||||
|
"api_section": "oauth",
|
||||||
|
"providers": oauth_providers,
|
||||||
|
"fields": [],
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"id": "channels",
|
"id": "channels",
|
||||||
"title": "Notification Channels",
|
"title": "Notification Channels",
|
||||||
"description": "Named notification providers. Credentials are masked.",
|
"description": "Named notification providers. Credentials are masked.",
|
||||||
|
"section_mode": "channels",
|
||||||
|
"api_section": "notification_channels",
|
||||||
"channels": notif_channels,
|
"channels": notif_channels,
|
||||||
"fields": [
|
"fields": [
|
||||||
field("default_notification_channels", "Default channels", "list",
|
field("default_notification_channels", "Default channels", "list",
|
||||||
@@ -309,13 +441,29 @@ def get_settings_sections(config: dict) -> list:
|
|||||||
"id": "hosts",
|
"id": "hosts",
|
||||||
"title": "Hosts",
|
"title": "Hosts",
|
||||||
"description": "Host definitions loaded from the config file.",
|
"description": "Host definitions loaded from the config file.",
|
||||||
|
"section_mode": "hosts",
|
||||||
|
"api_section": "hosts",
|
||||||
"hosts": hosts_list,
|
"hosts": hosts_list,
|
||||||
"fields": [],
|
"fields": [],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"id": "thresholds",
|
||||||
|
"title": "Threshold Configurations",
|
||||||
|
"description": "Named alert threshold sets. Each defines warning/critical levels per metric.",
|
||||||
|
"section_mode": "thresholds",
|
||||||
|
"api_section": "thresholds",
|
||||||
|
"threshold_configs": threshold_config_list,
|
||||||
|
"fields": [
|
||||||
|
field("default_threshold_config", "Default config", "text",
|
||||||
|
"Threshold config used for hosts with no explicit mapping.", editable=True),
|
||||||
|
],
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"id": "runtime",
|
"id": "runtime",
|
||||||
"title": "Runtime",
|
"title": "Runtime",
|
||||||
"description": "Flags set at startup (require restart to change).",
|
"description": "Flags set at startup (require restart to change).",
|
||||||
|
"section_mode": "form",
|
||||||
|
"api_section": None,
|
||||||
"fields": [
|
"fields": [
|
||||||
field("foreground", "Foreground mode", "boolean",
|
field("foreground", "Foreground mode", "boolean",
|
||||||
"Run in the foreground instead of daemonising."),
|
"Run in the foreground instead of daemonising."),
|
||||||
@@ -326,3 +474,17 @@ def get_settings_sections(config: dict) -> list:
|
|||||||
],
|
],
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def get_settings_data(config: dict, threshold_checker=None) -> dict:
|
||||||
|
"""Return sections list + auxiliary data for the settings template."""
|
||||||
|
sections = get_settings_sections(config, threshold_checker=threshold_checker)
|
||||||
|
all_channel_names = sorted((config.get("notification_channels") or {}).keys())
|
||||||
|
all_usernames = sorted((config.get("users") or {}).keys())
|
||||||
|
all_threshold_configs = sorted((config.get("threshold_configs") or {}).keys())
|
||||||
|
return {
|
||||||
|
"sections": sections,
|
||||||
|
"all_channel_names": all_channel_names,
|
||||||
|
"all_usernames": all_usernames,
|
||||||
|
"all_threshold_configs": all_threshold_configs,
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,199 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
{% include 'head.html' %}
|
||||||
|
|
||||||
|
<style>
|
||||||
|
html, body { overflow: visible; }
|
||||||
|
|
||||||
|
.container {
|
||||||
|
max-width: 700px;
|
||||||
|
margin: 0 auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
h1 {
|
||||||
|
color: #333;
|
||||||
|
margin-bottom: 4px;
|
||||||
|
font-size: 1.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.subtitle {
|
||||||
|
color: #666;
|
||||||
|
margin-bottom: 24px;
|
||||||
|
font-size: 0.9em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.section {
|
||||||
|
background: #fff;
|
||||||
|
border-radius: 8px;
|
||||||
|
box-shadow: 0 1px 6px rgba(0,0,0,0.1);
|
||||||
|
padding: 20px 24px;
|
||||||
|
margin-bottom: 20px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.section h2 {
|
||||||
|
font-size: 1em;
|
||||||
|
font-weight: 700;
|
||||||
|
color: #333;
|
||||||
|
margin: 0 0 16px;
|
||||||
|
padding-bottom: 10px;
|
||||||
|
border-bottom: 1px solid #eee;
|
||||||
|
text-transform: uppercase;
|
||||||
|
letter-spacing: 0.5px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.info-row {
|
||||||
|
display: flex;
|
||||||
|
align-items: baseline;
|
||||||
|
padding: 8px 0;
|
||||||
|
border-bottom: 1px solid #f5f5f5;
|
||||||
|
font-size: 0.9em;
|
||||||
|
}
|
||||||
|
.info-row:last-child { border-bottom: none; }
|
||||||
|
|
||||||
|
.info-label {
|
||||||
|
width: 160px;
|
||||||
|
flex-shrink: 0;
|
||||||
|
color: #666;
|
||||||
|
font-size: 0.88em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.info-value {
|
||||||
|
color: #222;
|
||||||
|
word-break: break-all;
|
||||||
|
}
|
||||||
|
|
||||||
|
.info-value a {
|
||||||
|
color: #0066cc;
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
.info-value a:hover { text-decoration: underline; }
|
||||||
|
|
||||||
|
.version-badge {
|
||||||
|
display: inline-block;
|
||||||
|
padding: 3px 12px;
|
||||||
|
background: #e8f0fe;
|
||||||
|
color: #1a73e8;
|
||||||
|
border-radius: 12px;
|
||||||
|
font-size: 0.85em;
|
||||||
|
font-weight: 600;
|
||||||
|
font-family: monospace;
|
||||||
|
}
|
||||||
|
|
||||||
|
.hb-logo {
|
||||||
|
font-size: 2.5em;
|
||||||
|
font-weight: 700;
|
||||||
|
color: #0066cc;
|
||||||
|
letter-spacing: -1px;
|
||||||
|
margin-bottom: 6px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.hb-tagline {
|
||||||
|
color: #555;
|
||||||
|
font-size: 0.95em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.logo-section {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 20px;
|
||||||
|
padding: 8px 0 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.logo-text { flex: 1; }
|
||||||
|
</style>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
{% include 'nav.html' %}
|
||||||
|
|
||||||
|
<div class="container">
|
||||||
|
<h1>{{ header }}</h1>
|
||||||
|
<p class="subtitle">Heartbeat monitoring system</p>
|
||||||
|
|
||||||
|
<div class="section">
|
||||||
|
<div class="logo-section">
|
||||||
|
<div class="logo-text">
|
||||||
|
<div class="hb-logo">Heartbeat</div>
|
||||||
|
<div class="hb-tagline">Lightweight host monitoring over UDP</div>
|
||||||
|
</div>
|
||||||
|
<span class="version-badge">v{{ hbd_version }}</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="section">
|
||||||
|
<h2>Version</h2>
|
||||||
|
<div class="info-row">
|
||||||
|
<span class="info-label">Server version</span>
|
||||||
|
<span class="info-value">{{ hbd_version }}</span>
|
||||||
|
</div>
|
||||||
|
<div class="info-row">
|
||||||
|
<span class="info-label">Python</span>
|
||||||
|
<span class="info-value">{{ python_version }}</span>
|
||||||
|
</div>
|
||||||
|
<div class="info-row">
|
||||||
|
<span class="info-label">License</span>
|
||||||
|
<span class="info-value">MIT</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="section">
|
||||||
|
<h2>Runtime</h2>
|
||||||
|
<div class="info-row">
|
||||||
|
<span class="info-label">Host</span>
|
||||||
|
<span class="info-value">{{ server_hostname }}</span>
|
||||||
|
</div>
|
||||||
|
<div class="info-row">
|
||||||
|
<span class="info-label">Started</span>
|
||||||
|
<span class="info-value">{{ start_time_str }}</span>
|
||||||
|
</div>
|
||||||
|
<div class="info-row">
|
||||||
|
<span class="info-label">Uptime</span>
|
||||||
|
<span class="info-value" id="uptime-value">{{ uptime_str }}</span>
|
||||||
|
</div>
|
||||||
|
<div class="info-row">
|
||||||
|
<span class="info-label">Hosts monitored</span>
|
||||||
|
<span class="info-value">{{ host_count }}</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="section">
|
||||||
|
<h2>Contact & Source</h2>
|
||||||
|
<div class="info-row">
|
||||||
|
<span class="info-label">Author</span>
|
||||||
|
<span class="info-value">Andreas Wrede</span>
|
||||||
|
</div>
|
||||||
|
<div class="info-row">
|
||||||
|
<span class="info-label">Email</span>
|
||||||
|
<span class="info-value"><a href="mailto:aew.hbd@wrede.ca">aew.hbd@wrede.ca</a></span>
|
||||||
|
</div>
|
||||||
|
<div class="info-row">
|
||||||
|
<span class="info-label">Repository</span>
|
||||||
|
<span class="info-value"><a href="https://git.wrede.ca/andreas/heartbeat" target="_blank" rel="noopener">git.wrede.ca/andreas/heartbeat</a></span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
(function() {
|
||||||
|
var startEpoch = {{ start_epoch }};
|
||||||
|
var el = document.getElementById('uptime-value');
|
||||||
|
if (!el) return;
|
||||||
|
function fmt(s) {
|
||||||
|
var d = Math.floor(s / 86400);
|
||||||
|
var h = Math.floor((s % 86400) / 3600);
|
||||||
|
var m = Math.floor((s % 3600) / 60);
|
||||||
|
var sec = s % 60;
|
||||||
|
if (d > 0) return d + 'd ' + h + 'h ' + m + 'm';
|
||||||
|
if (h > 0) return h + 'h ' + m + 'm ' + sec + 's';
|
||||||
|
return m + 'm ' + sec + 's';
|
||||||
|
}
|
||||||
|
function tick() {
|
||||||
|
var up = Math.floor(Date.now() / 1000 - startEpoch);
|
||||||
|
el.textContent = fmt(up);
|
||||||
|
}
|
||||||
|
tick();
|
||||||
|
setInterval(tick, 1000);
|
||||||
|
})();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
@@ -4,12 +4,17 @@
|
|||||||
|
|
||||||
<style>
|
<style>
|
||||||
|
|
||||||
|
html, body {
|
||||||
|
height: auto;
|
||||||
|
overflow-y: auto;
|
||||||
|
}
|
||||||
|
|
||||||
.container {
|
.container {
|
||||||
max-width: 1400px;
|
max-width: 1400px;
|
||||||
margin: 0 auto;
|
margin: 0 auto;
|
||||||
}
|
}
|
||||||
|
|
||||||
h1 { color: #333; margin-bottom: 10px; font-size: 1.5em; }
|
h1 { color: #333; margin-bottom: 5px; margin-top: 15px; font-size: 1.5em; }
|
||||||
|
|
||||||
.subtitle {
|
.subtitle {
|
||||||
color: #666;
|
color: #666;
|
||||||
@@ -89,6 +94,24 @@
|
|||||||
border-color: #2196f3;
|
border-color: #2196f3;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.filter-input {
|
||||||
|
padding: 7px 12px;
|
||||||
|
border: 2px solid #ddd;
|
||||||
|
border-radius: 20px;
|
||||||
|
font-size: 0.9em;
|
||||||
|
outline: none;
|
||||||
|
width: 200px;
|
||||||
|
transition: border-color 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.filter-input:focus {
|
||||||
|
border-color: #2196f3;
|
||||||
|
}
|
||||||
|
|
||||||
|
.filter-input.invalid {
|
||||||
|
border-color: #f44336;
|
||||||
|
}
|
||||||
|
|
||||||
.alerts-container {
|
.alerts-container {
|
||||||
background: white;
|
background: white;
|
||||||
border-radius: 8px;
|
border-radius: 8px;
|
||||||
@@ -170,14 +193,18 @@
|
|||||||
|
|
||||||
.alert-hostname {
|
.alert-hostname {
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
color: #333;
|
color: #0066cc;
|
||||||
font-size: 1.1em;
|
font-size: 1.1em;
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
.alert-hostname:hover {
|
||||||
|
text-decoration: underline;
|
||||||
}
|
}
|
||||||
|
|
||||||
.alert-metric {
|
.alert-metric {
|
||||||
color: #666;
|
color: #0066cc;
|
||||||
font-family: 'Courier New', monospace;
|
font-size: 1.1em;
|
||||||
font-size: 0.9em;
|
font-weight: normal;
|
||||||
}
|
}
|
||||||
|
|
||||||
.alert-details {
|
.alert-details {
|
||||||
@@ -307,6 +334,7 @@
|
|||||||
<button class="filter-button active" onclick="filterAlerts('all')">All</button>
|
<button class="filter-button active" onclick="filterAlerts('all')">All</button>
|
||||||
<button class="filter-button" onclick="filterAlerts('critical')">Critical Only</button>
|
<button class="filter-button" onclick="filterAlerts('critical')">Critical Only</button>
|
||||||
<button class="filter-button" onclick="filterAlerts('warning')">Warning Only</button>
|
<button class="filter-button" onclick="filterAlerts('warning')">Warning Only</button>
|
||||||
|
<input id="host-filter" class="filter-input" type="text" placeholder="host filter (regex)" oninput="onHostFilterInput(this)">
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="alerts-container">
|
<div class="alerts-container">
|
||||||
@@ -323,6 +351,7 @@
|
|||||||
<script>
|
<script>
|
||||||
let currentFilter = 'all';
|
let currentFilter = 'all';
|
||||||
let allAlerts = [];
|
let allAlerts = [];
|
||||||
|
let hostFilterRe = null;
|
||||||
|
|
||||||
async function loadAlerts() {
|
async function loadAlerts() {
|
||||||
try {
|
try {
|
||||||
@@ -357,10 +386,13 @@
|
|||||||
// Filter alerts based on current filter
|
// Filter alerts based on current filter
|
||||||
let filteredAlerts = alerts;
|
let filteredAlerts = alerts;
|
||||||
if (currentFilter !== 'all') {
|
if (currentFilter !== 'all') {
|
||||||
filteredAlerts = alerts.filter(alert =>
|
filteredAlerts = filteredAlerts.filter(alert =>
|
||||||
alert.level.toLowerCase() === currentFilter
|
alert.level.toLowerCase() === currentFilter
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
if (hostFilterRe) {
|
||||||
|
filteredAlerts = filteredAlerts.filter(alert => hostFilterRe.test(alert.hostname));
|
||||||
|
}
|
||||||
|
|
||||||
if (filteredAlerts.length === 0) {
|
if (filteredAlerts.length === 0) {
|
||||||
if (currentFilter === 'all' && alerts.length === 0) {
|
if (currentFilter === 'all' && alerts.length === 0) {
|
||||||
@@ -400,6 +432,10 @@
|
|||||||
} else if (alert.threshold_value !== undefined && alert.threshold_value !== null && alert.operator) {
|
} else if (alert.threshold_value !== undefined && alert.threshold_value !== null && alert.operator) {
|
||||||
valueText += ` <span class="threshold-info">(threshold: ${alert.operator} ${formatValue(alert.threshold_value)})</span>`;
|
valueText += ` <span class="threshold-info">(threshold: ${alert.operator} ${formatValue(alert.threshold_value)})</span>`;
|
||||||
}
|
}
|
||||||
|
if (alert.recovery_threshold !== undefined && alert.recovery_threshold !== null) {
|
||||||
|
const recOp = (alert.operator === '>' || alert.operator === '>=') ? '<' : '>';
|
||||||
|
valueText += ` <span class="threshold-info" style="color:#888">(recovers ${recOp} ${formatValue(alert.recovery_threshold)})</span>`;
|
||||||
|
}
|
||||||
|
|
||||||
// Build actions section
|
// Build actions section
|
||||||
let actionsHtml = '';
|
let actionsHtml = '';
|
||||||
@@ -424,9 +460,9 @@
|
|||||||
<div class="alert-main">
|
<div class="alert-main">
|
||||||
<div class="alert-header">
|
<div class="alert-header">
|
||||||
<span class="alert-level ${level}">${alert.level}</span>
|
<span class="alert-level ${level}">${alert.level}</span>
|
||||||
<span class="alert-hostname">${alert.hostname}</span>
|
<a class="alert-hostname" href="/plugins#${alert.hostname}">${alert.hostname}</a>
|
||||||
|
<span class="alert-metric">${(alert.metric_path.includes('.') ? alert.metric_path.slice(alert.metric_path.indexOf('.') + 1) : alert.metric_path).replace(/_status_code$/, '')}</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="alert-metric">${alert.metric_path}</div>
|
|
||||||
<div class="alert-details">
|
<div class="alert-details">
|
||||||
<span>${valueText}</span>
|
<span>${valueText}</span>
|
||||||
<span class="alert-duration">Active for ${duration}</span>
|
<span class="alert-duration">Active for ${duration}</span>
|
||||||
@@ -525,9 +561,36 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function onHostFilterInput(input) {
|
||||||
|
const val = input.value.trim();
|
||||||
|
if (!val) {
|
||||||
|
hostFilterRe = null;
|
||||||
|
input.classList.remove('invalid');
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
hostFilterRe = new RegExp(val, 'i');
|
||||||
|
input.classList.remove('invalid');
|
||||||
|
} catch (_) {
|
||||||
|
hostFilterRe = null;
|
||||||
|
input.classList.add('invalid');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
renderAlerts(allAlerts);
|
||||||
|
}
|
||||||
|
|
||||||
// Auto-refresh every 15 seconds
|
// Auto-refresh every 15 seconds
|
||||||
setInterval(loadAlerts, 15000);
|
setInterval(loadAlerts, 15000);
|
||||||
|
|
||||||
|
// Initialise filter from URL query string (?filter=...)
|
||||||
|
(function () {
|
||||||
|
const param = new URLSearchParams(window.location.search).get('filter');
|
||||||
|
if (param) {
|
||||||
|
const input = document.getElementById('host-filter');
|
||||||
|
input.value = param;
|
||||||
|
onHostFilterInput(input);
|
||||||
|
}
|
||||||
|
})();
|
||||||
|
|
||||||
// Initial load
|
// Initial load
|
||||||
loadAlerts();
|
loadAlerts();
|
||||||
</script>
|
</script>
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
<footer>
|
<footer>
|
||||||
<div id="copyright">
|
<div id="copyright">
|
||||||
©2002-2026 <A HREF="mailto:andreas@wrede.ca">Andreas Wrede</A> All Rights Reserved.</p>
|
©2002-2026 <A HREF="mailto:aew.hbd@wrede.ca">Andreas Wrede</A> All Rights Reserved.</p>
|
||||||
</div>
|
</div>
|
||||||
</footer>
|
</footer>
|
||||||
@@ -15,6 +15,7 @@
|
|||||||
body {
|
body {
|
||||||
margin: 0;
|
margin: 0;
|
||||||
padding: 10px;
|
padding: 10px;
|
||||||
|
padding-top: 60px;
|
||||||
background: #f5f5f5;
|
background: #f5f5f5;
|
||||||
}
|
}
|
||||||
h1 { font-size: 1.5em; color: #333; margin: 0 0 5px; }
|
h1 { font-size: 1.5em; color: #333; margin: 0 0 5px; }
|
||||||
@@ -23,11 +24,14 @@
|
|||||||
|
|
||||||
/* Navigation bar — shared across all pages */
|
/* Navigation bar — shared across all pages */
|
||||||
.nav {
|
.nav {
|
||||||
|
position: fixed;
|
||||||
|
top: 0;
|
||||||
|
left: 0;
|
||||||
|
right: 0;
|
||||||
|
z-index: 200;
|
||||||
background: #fff;
|
background: #fff;
|
||||||
padding: 6px 12px;
|
padding: 6px 12px;
|
||||||
margin-bottom: 10px;
|
|
||||||
box-shadow: 0 2px 4px rgba(0,0,0,.1);
|
box-shadow: 0 2px 4px rgba(0,0,0,.1);
|
||||||
border-radius: 4px;
|
|
||||||
display: flex;
|
display: flex;
|
||||||
align-items: center;
|
align-items: center;
|
||||||
justify-content: space-between;
|
justify-content: space-between;
|
||||||
@@ -121,12 +125,35 @@
|
|||||||
.nav-links a { margin-right: 0; padding: 6px 0; font-size: 1em; }
|
.nav-links a { margin-right: 0; padding: 6px 0; font-size: 1em; }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Pending config publish button */
|
||||||
|
.nav-publish-btn {
|
||||||
|
background: #e65100;
|
||||||
|
color: #fff;
|
||||||
|
border: none;
|
||||||
|
border-radius: 4px;
|
||||||
|
padding: 4px 10px;
|
||||||
|
font-size: 0.82em;
|
||||||
|
font-weight: 600;
|
||||||
|
cursor: pointer;
|
||||||
|
flex-shrink: 0;
|
||||||
|
white-space: nowrap;
|
||||||
|
margin-left: auto;
|
||||||
|
}
|
||||||
|
.nav-publish-btn:hover { background: #bf360c; }
|
||||||
|
.nav-publish-btn:disabled { opacity: 0.7; cursor: default; }
|
||||||
|
|
||||||
/* Swiss railway clock — nav */
|
/* Swiss railway clock — nav */
|
||||||
.nav-clock {
|
.nav-pie {
|
||||||
flex-shrink: 0;
|
flex-shrink: 0;
|
||||||
line-height: 0;
|
line-height: 0;
|
||||||
margin-left: auto;
|
margin-left: auto;
|
||||||
padding: 4px 4px 4px 0;
|
padding: 4px 4px 4px 0;
|
||||||
|
}
|
||||||
|
#alert-pie { display: block; cursor: default; }
|
||||||
|
.nav-clock {
|
||||||
|
flex-shrink: 0;
|
||||||
|
line-height: 0;
|
||||||
|
padding: 4px 4px 4px 0;
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
}
|
}
|
||||||
#swiss-clock { display: block; }
|
#swiss-clock { display: block; }
|
||||||
@@ -204,7 +231,7 @@
|
|||||||
ctx.restore();
|
ctx.restore();
|
||||||
}
|
}
|
||||||
|
|
||||||
hand((m + s / 60) / 60 * Math.PI * 2 - Math.PI / 2,
|
hand((sFrac >= 58.5 ? m + 1 : m) / 60 * Math.PI * 2 - Math.PI / 2,
|
||||||
R * 0.88, -R * 0.12, SIZE * 0.027, '#222'); /* minute */
|
R * 0.88, -R * 0.12, SIZE * 0.027, '#222'); /* minute */
|
||||||
hand((h + m / 60) / 12 * Math.PI * 2 - Math.PI / 2,
|
hand((h + m / 60) / 12 * Math.PI * 2 - Math.PI / 2,
|
||||||
R * 0.58, -R * 0.12, SIZE * 0.039, '#222'); /* hour */
|
R * 0.58, -R * 0.12, SIZE * 0.039, '#222'); /* hour */
|
||||||
|
|||||||
@@ -45,6 +45,7 @@
|
|||||||
h1 {
|
h1 {
|
||||||
color: #333;
|
color: #333;
|
||||||
margin-bottom: 5px;
|
margin-bottom: 5px;
|
||||||
|
margin-top: 15px;
|
||||||
font-size: 1.5em;
|
font-size: 1.5em;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -182,11 +183,61 @@
|
|||||||
line-height: 1.0;
|
line-height: 1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#messages div {
|
#messages .log-entry {
|
||||||
padding: 5px 0;
|
padding: 5px 0;
|
||||||
border-bottom: 1px solid #f0f0f0;
|
border-bottom: 1px solid #f0f0f0;
|
||||||
|
display: flex;
|
||||||
|
gap: 0.5em;
|
||||||
|
align-items: baseline;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.log-ts { color: #888; white-space: nowrap; }
|
||||||
|
.log-level { font-weight: bold; min-width: 6em; }
|
||||||
|
.log-host { font-weight: 600; }
|
||||||
|
.log-service { color: #888; }
|
||||||
|
|
||||||
|
.log-warning .log-level { color: #b8860b; }
|
||||||
|
.log-critical .log-level { color: #c00; }
|
||||||
|
.log-recover .log-level { color: #2a7a2a; }
|
||||||
|
.log-info .log-level { color: #555; }
|
||||||
|
|
||||||
|
.log-section-header {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 12px;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
margin-bottom: 10px;
|
||||||
|
background: white;
|
||||||
|
border-radius: 6px;
|
||||||
|
box-shadow: 0 1px 4px rgba(0,0,0,0.1);
|
||||||
|
padding: 8px 15px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.log-section-title {
|
||||||
|
font-size: 1.2em;
|
||||||
|
font-weight: bold;
|
||||||
|
color: #333;
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
.log-filter-bar {
|
||||||
|
display: flex;
|
||||||
|
gap: 6px;
|
||||||
|
align-items: center;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
.log-filter-bar input[type="text"],
|
||||||
|
.log-filter-bar select {
|
||||||
|
padding: 3px 7px;
|
||||||
|
border: 1px solid #ccc;
|
||||||
|
border-radius: 4px;
|
||||||
|
font-size: 0.85em;
|
||||||
|
color: #333;
|
||||||
|
}
|
||||||
|
|
||||||
|
.log-filter-bar input[type="text"] { width: 110px; }
|
||||||
|
|
||||||
/* Modal for connection status messages */
|
/* Modal for connection status messages */
|
||||||
.connection-modal {
|
.connection-modal {
|
||||||
display: none;
|
display: none;
|
||||||
@@ -235,6 +286,8 @@
|
|||||||
color: #ff9800;
|
color: #ff9800;
|
||||||
font-weight: 700;
|
font-weight: 700;
|
||||||
}
|
}
|
||||||
|
#ntable a.host-link { color: inherit; text-decoration: none; }
|
||||||
|
#ntable a.host-link:hover { text-decoration: underline; }
|
||||||
</style>
|
</style>
|
||||||
<script type="text/javascript">
|
<script type="text/javascript">
|
||||||
var cnt = 0;
|
var cnt = 0;
|
||||||
@@ -244,11 +297,13 @@
|
|||||||
var HBD_VERSION = "{{ hbd_version }}";
|
var HBD_VERSION = "{{ hbd_version }}";
|
||||||
|
|
||||||
function hostNameHtml(data) {
|
function hostNameHtml(data) {
|
||||||
|
var rawName = data.raw_name || data.name.replace(/<[^>]+>/g, '').replace('*', '').trim();
|
||||||
var nameHtml = data.name;
|
var nameHtml = data.name;
|
||||||
if (!data.hbc_version || data.hbc_version !== HBD_VERSION) {
|
if (!data.hbc_version || data.hbc_version !== HBD_VERSION) {
|
||||||
nameHtml += ' 🥀';
|
nameHtml += ' 🥀';
|
||||||
}
|
}
|
||||||
return data.dyn ? '<b>' + nameHtml + '</b>' : nameHtml;
|
var display = data.dyn ? '<b>' + nameHtml + '</b>' : nameHtml;
|
||||||
|
return '<a class="host-link" href="/plugins#' + encodeURIComponent(rawName) + '">' + display + '</a>';
|
||||||
}
|
}
|
||||||
|
|
||||||
function setup() {
|
function setup() {
|
||||||
@@ -403,7 +458,7 @@
|
|||||||
);
|
);
|
||||||
if (data.connections[i].state == "up") {
|
if (data.connections[i].state == "up") {
|
||||||
state = '<span class="state-up">up</span>';
|
state = '<span class="state-up">up</span>';
|
||||||
latency = Number.parseFloat(data.connections[i].rtts[0]).toFixed(2);
|
latency = String(Math.round(Number.parseFloat(data.connections[i].rtts[0])));
|
||||||
} else {
|
} else {
|
||||||
if (data.connections[i].state == "unknown") {
|
if (data.connections[i].state == "unknown") {
|
||||||
state = "";
|
state = "";
|
||||||
@@ -427,6 +482,22 @@
|
|||||||
updateRowAlert(name_idx[data.name], data);
|
updateRowAlert(name_idx[data.name], data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function applyLogFilters() {
|
||||||
|
var hostFilter = document.getElementById('filter-host').value.toLowerCase().trim();
|
||||||
|
var levelFilter = document.getElementById('filter-level').value;
|
||||||
|
var msgFilter = document.getElementById('filter-msg').value.toLowerCase().trim();
|
||||||
|
document.querySelectorAll('#messages .log-entry').forEach(function(entry) {
|
||||||
|
var show = true;
|
||||||
|
if (hostFilter && !(entry.dataset.host || '').toLowerCase().includes(hostFilter)) show = false;
|
||||||
|
if (levelFilter && entry.dataset.level !== levelFilter) show = false;
|
||||||
|
if (msgFilter) {
|
||||||
|
var msgEl = entry.querySelector('.log-msg');
|
||||||
|
if (!msgEl || !msgEl.textContent.toLowerCase().includes(msgFilter)) show = false;
|
||||||
|
}
|
||||||
|
entry.style.display = show ? '' : 'none';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
function WS_Connect() {
|
function WS_Connect() {
|
||||||
if ("WebSocket" in window) {
|
if ("WebSocket" in window) {
|
||||||
//N.B: subprotocol field causes chrome to error 1006
|
//N.B: subprotocol field causes chrome to error 1006
|
||||||
@@ -455,7 +526,22 @@
|
|||||||
update_table(state.data);
|
update_table(state.data);
|
||||||
} else if (state.type == "message") {
|
} else if (state.type == "message") {
|
||||||
var msgs = document.getElementById("messages");
|
var msgs = document.getElementById("messages");
|
||||||
msgs.insertAdjacentHTML("afterbegin", "<div>" + state.data + "</div>");
|
var msg = state.data;
|
||||||
|
var _d = new Date(msg.ts * 1000);
|
||||||
|
function _p(n) { return n < 10 ? '0' + n : '' + n; }
|
||||||
|
var ts_str = _d.getFullYear() + '-' + _p(_d.getMonth()+1) + '-' + _p(_d.getDate())
|
||||||
|
+ ' ' + _p(_d.getHours()) + ':' + _p(_d.getMinutes()) + ':' + _p(_d.getSeconds());
|
||||||
|
var lvl = (msg.level || "INFO").toLowerCase();
|
||||||
|
var hostVal = msg.host || '';
|
||||||
|
var html = '<div class="log-entry log-' + lvl + '" data-level="' + lvl + '" data-host="' + hostVal.replace(/"/g, '"') + '">';
|
||||||
|
html += '<span class="log-ts">' + ts_str + '</span>';
|
||||||
|
html += '<span class="log-level">' + (msg.level || "") + '</span>';
|
||||||
|
if (msg.host) html += '<span class="log-host">' + msg.host + '</span>';
|
||||||
|
if (msg.service) html += '<span class="log-service">' + msg.service + '</span>';
|
||||||
|
html += '<span class="log-msg">' + msg.message + '</span>';
|
||||||
|
html += '</div>';
|
||||||
|
msgs.insertAdjacentHTML("afterbegin", html);
|
||||||
|
applyLogFilters();
|
||||||
}
|
}
|
||||||
cnt++;
|
cnt++;
|
||||||
};
|
};
|
||||||
@@ -510,7 +596,7 @@
|
|||||||
<tbody id="ntablebody">
|
<tbody id="ntablebody">
|
||||||
{% for host in hosts %}
|
{% for host in hosts %}
|
||||||
<tr class="{% if host.alert_critical_unacked > 0 or host.alert_critical_acked > 0 %}row-critical{% elif host.alert_warning_unacked > 0 or host.alert_warning_acked > 0 %}row-warning{% endif %}">
|
<tr class="{% if host.alert_critical_unacked > 0 or host.alert_critical_acked > 0 %}row-critical{% elif host.alert_warning_unacked > 0 or host.alert_warning_acked > 0 %}row-warning{% endif %}">
|
||||||
<td data-name="{{ host.name }}">{{ host.name }}{% if not host.hbc_version or host.hbc_version != hbd_version %} 🥀{% endif %}</td>
|
<td data-name="{{ host.name }}"><a class="host-link" href="/plugins#{{ host.raw_name | urlencode }}">{{ host.name }}{% if not host.hbc_version or host.hbc_version != hbd_version %} 🥀{% endif %}</a></td>
|
||||||
<td style="text-align: center; color: #ff9800; font-weight: bold;">
|
<td style="text-align: center; color: #ff9800; font-weight: bold;">
|
||||||
{%- set warning_unacked = host.alert_warning_unacked -%}
|
{%- set warning_unacked = host.alert_warning_unacked -%}
|
||||||
{%- set warning_acked = host.alert_warning_acked -%}
|
{%- set warning_acked = host.alert_warning_acked -%}
|
||||||
@@ -544,7 +630,20 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="log-section">
|
<div class="log-section">
|
||||||
<h2>Log of Events</h2>
|
<div class="log-section-header">
|
||||||
|
<span class="log-section-title">Log of Events</span>
|
||||||
|
<div class="log-filter-bar">
|
||||||
|
<input type="text" id="filter-host" placeholder="Host…" title="Filter by host" />
|
||||||
|
<select id="filter-level" title="Filter by level">
|
||||||
|
<option value="">All levels</option>
|
||||||
|
<option value="info">INFO</option>
|
||||||
|
<option value="warning">WARNING</option>
|
||||||
|
<option value="critical">CRITICAL</option>
|
||||||
|
<option value="recover">RECOVER</option>
|
||||||
|
</select>
|
||||||
|
<input type="text" id="filter-msg" placeholder="Message…" title="Filter by message text" />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
<div id="messages"></div>
|
<div id="messages"></div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -560,6 +659,9 @@
|
|||||||
|
|
||||||
<script>
|
<script>
|
||||||
setup();
|
setup();
|
||||||
|
document.getElementById('filter-host').addEventListener('input', applyLogFilters);
|
||||||
|
document.getElementById('filter-level').addEventListener('change', applyLogFilters);
|
||||||
|
document.getElementById('filter-msg').addEventListener('input', applyLogFilters);
|
||||||
</script>
|
</script>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|||||||
@@ -9,6 +9,13 @@
|
|||||||
{% if current_user and current_user.admin %}
|
{% if current_user and current_user.admin %}
|
||||||
<a href="/settings"{% if active_page == "settings" %} class="active"{% endif %}>Settings</a>
|
<a href="/settings"{% if active_page == "settings" %} class="active"{% endif %}>Settings</a>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
<a href="/about"{% if active_page == "about" %} class="active"{% endif %}>About</a>
|
||||||
|
</div>
|
||||||
|
{% if current_user and current_user.admin %}
|
||||||
|
<button id="nav-publish-btn" class="nav-publish-btn" onclick="navPublishConfig()" style="display:none" title="Publish pending config changes to .hb.yaml">⚠ Publish Config</button>
|
||||||
|
{% endif %}
|
||||||
|
<div class="nav-pie" title="Host alert status">
|
||||||
|
<canvas id="alert-pie" width="44" height="44"></canvas>
|
||||||
</div>
|
</div>
|
||||||
<div class="nav-clock" title="Click for full-screen clock">
|
<div class="nav-clock" title="Click for full-screen clock">
|
||||||
<canvas id="swiss-clock" width="44" height="44"></canvas>
|
<canvas id="swiss-clock" width="44" height="44"></canvas>
|
||||||
@@ -41,4 +48,87 @@
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
})();
|
})();
|
||||||
|
|
||||||
|
function drawAlertPie(critical, warning, ok) {
|
||||||
|
var canvas = document.getElementById('alert-pie');
|
||||||
|
if (!canvas) return;
|
||||||
|
var ctx = canvas.getContext('2d');
|
||||||
|
var SIZE = canvas.width;
|
||||||
|
var R = SIZE / 2;
|
||||||
|
ctx.clearRect(0, 0, SIZE, SIZE);
|
||||||
|
var total = critical + warning + ok;
|
||||||
|
if (total === 0) {
|
||||||
|
ctx.beginPath();
|
||||||
|
ctx.arc(R, R, R - 1, 0, Math.PI * 2);
|
||||||
|
ctx.fillStyle = '#ccc';
|
||||||
|
ctx.fill();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
var slices = [
|
||||||
|
{ value: critical, color: '#e53935' },
|
||||||
|
{ value: warning, color: '#ffb300' },
|
||||||
|
{ value: ok, color: '#43a047' }
|
||||||
|
];
|
||||||
|
var start = -Math.PI / 2;
|
||||||
|
slices.forEach(function(s) {
|
||||||
|
if (s.value === 0) return;
|
||||||
|
var sweep = (s.value / total) * Math.PI * 2;
|
||||||
|
ctx.beginPath();
|
||||||
|
ctx.moveTo(R, R);
|
||||||
|
ctx.arc(R, R, R - 1, start, start + sweep);
|
||||||
|
ctx.closePath();
|
||||||
|
ctx.fillStyle = s.color;
|
||||||
|
ctx.fill();
|
||||||
|
start += sweep;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function updateAlertPie() {
|
||||||
|
fetch('/api/0/alert_summary').then(function(r) {
|
||||||
|
if (!r.ok) return;
|
||||||
|
return r.json();
|
||||||
|
}).then(function(d) {
|
||||||
|
if (d) drawAlertPie(d.critical || 0, d.warning || 0, d.ok || 0);
|
||||||
|
}).catch(function() {});
|
||||||
|
}
|
||||||
|
|
||||||
|
document.addEventListener('DOMContentLoaded', function() {
|
||||||
|
updateAlertPie();
|
||||||
|
setInterval(updateAlertPie, 30000);
|
||||||
|
navCheckPendingConfig();
|
||||||
|
window.addEventListener('storage', navCheckPendingConfig);
|
||||||
|
});
|
||||||
|
|
||||||
|
function navCheckPendingConfig() {
|
||||||
|
var btn = document.getElementById('nav-publish-btn');
|
||||||
|
if (!btn) return;
|
||||||
|
btn.style.display = localStorage.getItem('hbd_pending_config') ? '' : 'none';
|
||||||
|
}
|
||||||
|
|
||||||
|
async function navPublishConfig() {
|
||||||
|
var btn = document.getElementById('nav-publish-btn');
|
||||||
|
var pending = localStorage.getItem('hbd_pending_config');
|
||||||
|
if (!pending) return;
|
||||||
|
var staged;
|
||||||
|
try { staged = JSON.parse(pending); } catch(e) { return; }
|
||||||
|
if (btn) { btn.disabled = true; btn.textContent = 'Saving…'; }
|
||||||
|
try {
|
||||||
|
var resp = await fetch('/api/0/config', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {'Content-Type': 'application/json'},
|
||||||
|
body: pending
|
||||||
|
});
|
||||||
|
if (resp.ok) {
|
||||||
|
localStorage.removeItem('hbd_pending_config');
|
||||||
|
window.location.reload();
|
||||||
|
} else {
|
||||||
|
var err = await resp.json().catch(function() { return {}; });
|
||||||
|
alert('Error: ' + (err.error || resp.statusText));
|
||||||
|
if (btn) { btn.disabled = false; btn.textContent = '⚠ Publish Config'; }
|
||||||
|
}
|
||||||
|
} catch(e) {
|
||||||
|
alert('Network error: ' + e.message);
|
||||||
|
if (btn) { btn.disabled = false; btn.textContent = '⚠ Publish Config'; }
|
||||||
|
}
|
||||||
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|||||||
@@ -16,6 +16,7 @@
|
|||||||
h1 {
|
h1 {
|
||||||
color: #333;
|
color: #333;
|
||||||
margin-bottom: 5px;
|
margin-bottom: 5px;
|
||||||
|
margin-top: 15px;
|
||||||
font-size: 1.5em;
|
font-size: 1.5em;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -130,6 +131,52 @@
|
|||||||
text-overflow: ellipsis;
|
text-overflow: ellipsis;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.host-action-btn {
|
||||||
|
font-size: 0.75em;
|
||||||
|
font-weight: bold;
|
||||||
|
padding: 3px 10px;
|
||||||
|
border-radius: 4px;
|
||||||
|
border: none;
|
||||||
|
cursor: pointer;
|
||||||
|
text-decoration: none;
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
.host-action-btn.update-btn {
|
||||||
|
background: #e3f2fd;
|
||||||
|
color: #1565c0;
|
||||||
|
}
|
||||||
|
.host-action-btn.update-btn:hover { background: #bbdefb; }
|
||||||
|
.host-action-btn.delete-btn {
|
||||||
|
background: #ffebee;
|
||||||
|
color: #c62828;
|
||||||
|
}
|
||||||
|
.host-action-btn.delete-btn:hover { background: #ffcdd2; }
|
||||||
|
|
||||||
|
/* ── Action result toast ───────────────────────────────────── */
|
||||||
|
#action-toast {
|
||||||
|
position: fixed;
|
||||||
|
bottom: 24px;
|
||||||
|
left: 50%;
|
||||||
|
transform: translateX(-50%) translateY(20px);
|
||||||
|
background: #323232;
|
||||||
|
color: #fff;
|
||||||
|
padding: 12px 22px;
|
||||||
|
border-radius: 6px;
|
||||||
|
font-size: 0.9em;
|
||||||
|
max-width: 480px;
|
||||||
|
text-align: center;
|
||||||
|
opacity: 0;
|
||||||
|
pointer-events: none;
|
||||||
|
transition: opacity 0.25s, transform 0.25s;
|
||||||
|
z-index: 9000;
|
||||||
|
white-space: pre-wrap;
|
||||||
|
}
|
||||||
|
#action-toast.show {
|
||||||
|
opacity: 1;
|
||||||
|
transform: translateX(-50%) translateY(0);
|
||||||
|
}
|
||||||
|
#action-toast.error { background: #c62828; }
|
||||||
|
|
||||||
/* ── Host body ──────────────────────────────────────────────── */
|
/* ── Host body ──────────────────────────────────────────────── */
|
||||||
|
|
||||||
.host-body {
|
.host-body {
|
||||||
@@ -341,6 +388,30 @@
|
|||||||
.container::-webkit-scrollbar-track { background: #f1f1f1; border-radius: 4px; }
|
.container::-webkit-scrollbar-track { background: #f1f1f1; border-radius: 4px; }
|
||||||
.container::-webkit-scrollbar-thumb { background: #ccc; border-radius: 4px; }
|
.container::-webkit-scrollbar-thumb { background: #ccc; border-radius: 4px; }
|
||||||
.container::-webkit-scrollbar-thumb:hover { background: #999; }
|
.container::-webkit-scrollbar-thumb:hover { background: #999; }
|
||||||
|
|
||||||
|
/* ── Host info section ──────────────────────────────────────────────────── */
|
||||||
|
.host-info-section {
|
||||||
|
padding: 12px 16px;
|
||||||
|
background: #fafafa;
|
||||||
|
border-bottom: 1px solid #e0e0e0;
|
||||||
|
font-size: 0.85em;
|
||||||
|
}
|
||||||
|
.info-meta {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: max-content 1fr;
|
||||||
|
gap: 3px 14px;
|
||||||
|
margin-bottom: 10px;
|
||||||
|
}
|
||||||
|
.info-label { font-weight: 600; color: #555; white-space: nowrap; }
|
||||||
|
.info-value { color: #222; }
|
||||||
|
.info-thresholds-title {
|
||||||
|
font-weight: 600;
|
||||||
|
color: #555;
|
||||||
|
margin-bottom: 6px;
|
||||||
|
}
|
||||||
|
.info-note { color: #888; font-style: italic; }
|
||||||
|
.info-loading { color: #bbb; font-style: italic; }
|
||||||
|
.threshold-covers { font-size: 0.85em; color: #777; font-style: italic; }
|
||||||
</style>
|
</style>
|
||||||
|
|
||||||
<body>
|
<body>
|
||||||
@@ -369,7 +440,8 @@
|
|||||||
<span class="host-name">{{ host.name }}</span>
|
<span class="host-name">{{ host.name }}</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="glance-strip" id="glance-{{ host.name }}">
|
<div class="glance-strip" id="glance-{{ host.name }}" data-owner="{{ host.owner or '' }}">
|
||||||
|
{% if current_user and current_user.admin and host.owner %}<span class="glance-chip neutral">{{ host.owner }}</span>{% endif %}
|
||||||
<span class="glance-loading">—</span>
|
<span class="glance-loading">—</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -378,11 +450,20 @@
|
|||||||
<span class="nagios-badge" id="nagios-badge-{{ host.name }}">—</span>
|
<span class="nagios-badge" id="nagios-badge-{{ host.name }}">—</span>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<span class="os-label" id="os-label-{{ host.name }}"></span>
|
<span class="os-label" id="os-label-{{ host.name }}"></span>
|
||||||
|
{% if host.is_owner %}
|
||||||
|
<button class="host-action-btn update-btn"
|
||||||
|
onclick="event.stopPropagation(); hostAction(this, '/u?h={{ host.name }}')">Update</button>
|
||||||
|
<button class="host-action-btn delete-btn"
|
||||||
|
onclick="event.stopPropagation(); hostDelete(this, '{{ host.name }}')">Delete</button>
|
||||||
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="host-body">
|
<div class="host-body">
|
||||||
{% set plugin_order = ['os_info','cpu_monitor','memory_monitor','disk_monitor','network_monitor','nagios_runner','filesystem_info'] %}
|
<div class="host-info-section" id="info-{{ host.name }}">
|
||||||
|
<div class="info-loading">Loading…</div>
|
||||||
|
</div>
|
||||||
|
{% set plugin_order = ['os_info','cpu_monitor','memory_monitor','disk_monitor','network_monitor','zfs_monitor','nagios_runner','filesystem_info'] %}
|
||||||
{% for plugin in plugin_order if plugin in host.plugins %}
|
{% for plugin in plugin_order if plugin in host.plugins %}
|
||||||
<div class="plugin-accordion collapsed"
|
<div class="plugin-accordion collapsed"
|
||||||
data-hostname="{{ host.name }}"
|
data-hostname="{{ host.name }}"
|
||||||
@@ -427,12 +508,16 @@
|
|||||||
const GLANCE_PLUGINS = ['cpu_monitor','memory_monitor','disk_monitor',
|
const GLANCE_PLUGINS = ['cpu_monitor','memory_monitor','disk_monitor',
|
||||||
'network_monitor','nagios_runner','os_info'];
|
'network_monitor','nagios_runner','os_info'];
|
||||||
const SKIP_FIELDS = new Set(['id','name']);
|
const SKIP_FIELDS = new Set(['id','name']);
|
||||||
|
const CURRENT_USER_ADMIN = {{ 'true' if current_user and current_user.admin else 'false' }};
|
||||||
|
|
||||||
// ── Cache ───────────────────────────────────────────────────────────────
|
// ── Cache ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
// pluginCache[hostname][pluginName] = { data, timestamp, fetchedAt }
|
// pluginCache[hostname][pluginName] = { data, timestamp, fetchedAt }
|
||||||
const pluginCache = {};
|
const pluginCache = {};
|
||||||
|
|
||||||
|
// infoCache[hostname] = info data object from /api/0/hosts/{hostname}/info
|
||||||
|
const infoCache = {};
|
||||||
|
|
||||||
function setCache(hostname, pluginName, sample) {
|
function setCache(hostname, pluginName, sample) {
|
||||||
if (!pluginCache[hostname]) pluginCache[hostname] = {};
|
if (!pluginCache[hostname]) pluginCache[hostname] = {};
|
||||||
pluginCache[hostname][pluginName] = {
|
pluginCache[hostname][pluginName] = {
|
||||||
@@ -446,6 +531,17 @@
|
|||||||
return pluginCache[hostname]?.[pluginName] ?? null;
|
return pluginCache[hostname]?.[pluginName] ?? null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Return worst nagios exit code (0-3) found in a nagios_runner data object.
|
||||||
|
function nagiosWorstStatus(data) {
|
||||||
|
let worst = 0;
|
||||||
|
for (const [k, v] of Object.entries(data || {})) {
|
||||||
|
if (k.endsWith('_status_code') && typeof v === 'number' && v > worst) {
|
||||||
|
worst = v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return worst;
|
||||||
|
}
|
||||||
|
|
||||||
// ── Fetch helpers ───────────────────────────────────────────────────────
|
// ── Fetch helpers ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
async function fetchPlugin(hostname, pluginName) {
|
async function fetchPlugin(hostname, pluginName) {
|
||||||
@@ -455,6 +551,61 @@
|
|||||||
return json.samples?.[0] ?? null;
|
return json.samples?.[0] ?? null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function fetchHostInfo(hostname) {
|
||||||
|
const r = await fetch(`/api/0/hosts/${encodeURIComponent(hostname)}/info`);
|
||||||
|
if (!r.ok) throw new Error(`HTTP ${r.status}`);
|
||||||
|
return await r.json();
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderInfoSection(hostname, data) {
|
||||||
|
const el = document.getElementById(`info-${hostname}`);
|
||||||
|
if (!el) return;
|
||||||
|
|
||||||
|
const owner = data.owner ? escHtml(data.owner) : '—';
|
||||||
|
const managers = data.managers && data.managers.length
|
||||||
|
? data.managers.map(escHtml).join(', ') : '—';
|
||||||
|
const hbcVer = data.hbc_version ? escHtml(String(data.hbc_version)) : '—';
|
||||||
|
const hbcType = data.hbc_type ? escHtml(String(data.hbc_type)) : '—';
|
||||||
|
const lastPkt = data.last_packet != null
|
||||||
|
? new Date(data.last_packet * 1000).toLocaleString() : '—';
|
||||||
|
|
||||||
|
let html = `<div class="info-meta">
|
||||||
|
<span class="info-label">Owner</span><span class="info-value">${owner}</span>
|
||||||
|
<span class="info-label">Managers</span><span class="info-value">${managers}</span>
|
||||||
|
<span class="info-label">Agent Version</span><span class="info-value">${hbcVer}</span>
|
||||||
|
<span class="info-label">Agent Type</span><span class="info-value">${hbcType}</span>
|
||||||
|
<span class="info-label">Last Packet</span><span class="info-value">${lastPkt}</span>
|
||||||
|
</div>`;
|
||||||
|
|
||||||
|
if (data.thresholds === null) {
|
||||||
|
html += `<div class="info-note">Threshold alerting not configured.</div>`;
|
||||||
|
} else if (data.thresholds.length === 0) {
|
||||||
|
html += `<div class="info-note">No thresholds defined.</div>`;
|
||||||
|
} else {
|
||||||
|
html += `<div class="info-thresholds-title">Effective Thresholds</div>
|
||||||
|
<table class="data-table"><thead><tr>
|
||||||
|
<th>Metric</th><th>Op</th><th>Warning</th><th>Critical</th>
|
||||||
|
</tr></thead><tbody>`;
|
||||||
|
for (const t of data.thresholds) {
|
||||||
|
const w = t.warning != null ? escHtml(String(t.warning)) : '—';
|
||||||
|
const c = t.critical != null ? escHtml(String(t.critical)) : '—';
|
||||||
|
let metricCell = escHtml(t.metric);
|
||||||
|
if (t.covers && t.covers.length > 0) {
|
||||||
|
metricCell += `<br><span class="threshold-covers">↳ ${t.covers.map(escHtml).join(', ')}</span>`;
|
||||||
|
}
|
||||||
|
html += `<tr>
|
||||||
|
<td class="key">${metricCell}</td>
|
||||||
|
<td>${escHtml(t.operator)}</td>
|
||||||
|
<td>${w}</td>
|
||||||
|
<td>${c}</td>
|
||||||
|
</tr>`;
|
||||||
|
}
|
||||||
|
html += `</tbody></table>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
el.innerHTML = html;
|
||||||
|
}
|
||||||
|
|
||||||
async function fetchHostGlance(hostname) {
|
async function fetchHostGlance(hostname) {
|
||||||
const card = document.querySelector(`.host-card[data-hostname="${hostname}"]`);
|
const card = document.querySelector(`.host-card[data-hostname="${hostname}"]`);
|
||||||
const availablePlugins = (card?.dataset.plugins || '').split(',').filter(Boolean);
|
const availablePlugins = (card?.dataset.plugins || '').split(',').filter(Boolean);
|
||||||
@@ -494,6 +645,12 @@
|
|||||||
|
|
||||||
const chips = [];
|
const chips = [];
|
||||||
|
|
||||||
|
// Owner (admin only, static from server)
|
||||||
|
const owner = strip.dataset.owner;
|
||||||
|
if (CURRENT_USER_ADMIN && owner) {
|
||||||
|
chips.push(`<span class="glance-chip neutral">${owner}</span>`);
|
||||||
|
}
|
||||||
|
|
||||||
// CPU
|
// CPU
|
||||||
const cpu = getCache(hostname, 'cpu_monitor');
|
const cpu = getCache(hostname, 'cpu_monitor');
|
||||||
if (cpu) {
|
if (cpu) {
|
||||||
@@ -547,13 +704,13 @@
|
|||||||
? chips.join('')
|
? chips.join('')
|
||||||
: '<span class="glance-loading">—</span>';
|
: '<span class="glance-loading">—</span>';
|
||||||
|
|
||||||
// Nagios badge
|
// Nagios badge — derive worst status from individual check codes
|
||||||
const nagios = getCache(hostname, 'nagios_runner');
|
const nagios = getCache(hostname, 'nagios_runner');
|
||||||
if (nagosBadge && nagios) {
|
if (nagosBadge && nagios) {
|
||||||
const status = (nagios.data.overall_status || '—').toUpperCase();
|
const worst = nagiosWorstStatus(nagios.data);
|
||||||
const cls = status === 'OK' ? 'ok'
|
const names = {0:'OK', 1:'WARNING', 2:'CRITICAL', 3:'UNKNOWN'};
|
||||||
: status === 'WARNING' ? 'warning'
|
const status = names[worst] || '—';
|
||||||
: status === 'CRITICAL' ? 'critical' : '';
|
const cls = worst === 0 ? 'ok' : worst === 1 ? 'warning' : worst >= 2 ? 'critical' : '';
|
||||||
nagosBadge.className = `nagios-badge ${cls}`;
|
nagosBadge.className = `nagios-badge ${cls}`;
|
||||||
nagosBadge.textContent = status;
|
nagosBadge.textContent = status;
|
||||||
}
|
}
|
||||||
@@ -572,9 +729,22 @@
|
|||||||
const card = document.querySelector(`.host-card[data-hostname="${hostname}"]`);
|
const card = document.querySelector(`.host-card[data-hostname="${hostname}"]`);
|
||||||
const wasCollapsed = card.classList.contains('collapsed');
|
const wasCollapsed = card.classList.contains('collapsed');
|
||||||
card.classList.toggle('collapsed');
|
card.classList.toggle('collapsed');
|
||||||
if (wasCollapsed && !pluginCache[hostname]) {
|
if (wasCollapsed) {
|
||||||
|
if (!pluginCache[hostname]) {
|
||||||
fetchHostGlance(hostname);
|
fetchHostGlance(hostname);
|
||||||
}
|
}
|
||||||
|
if (!infoCache[hostname]) {
|
||||||
|
const infoEl = document.getElementById(`info-${hostname}`);
|
||||||
|
if (infoEl) infoEl.innerHTML = '<div class="info-loading">Loading…</div>';
|
||||||
|
fetchHostInfo(hostname).then(data => {
|
||||||
|
infoCache[hostname] = data;
|
||||||
|
renderInfoSection(hostname, data);
|
||||||
|
}).catch(() => {
|
||||||
|
const el = document.getElementById(`info-${hostname}`);
|
||||||
|
if (el) el.innerHTML = '<div class="info-loading">Could not load host info.</div>';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Toggle plugin accordion ─────────────────────────────────────────────
|
// ── Toggle plugin accordion ─────────────────────────────────────────────
|
||||||
@@ -662,9 +832,10 @@
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 'nagios_runner': {
|
case 'nagios_runner': {
|
||||||
const status = (d.overall_status || '?').toUpperCase();
|
const worst = nagiosWorstStatus(d);
|
||||||
const count = d.plugin_count;
|
const names = {0:'OK', 1:'WARNING', 2:'CRITICAL', 3:'UNKNOWN'};
|
||||||
text = status + (count != null ? ` — ${count} checks` : '');
|
const codes = Object.keys(d).filter(k => k.endsWith('_status_code'));
|
||||||
|
text = (names[worst] || '?') + (codes.length ? ` — ${codes.length} checks` : '');
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 'filesystem_info': {
|
case 'filesystem_info': {
|
||||||
@@ -672,6 +843,19 @@
|
|||||||
text = `${count} filesystem${count !== 1 ? 's' : ''}`;
|
text = `${count} filesystem${count !== 1 ? 's' : ''}`;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case 'zfs_monitor': {
|
||||||
|
const pools = d.pools || {};
|
||||||
|
const names = Object.keys(pools);
|
||||||
|
if (names.length === 0) { text = 'No pools'; break; }
|
||||||
|
const degraded = names.filter(n => pools[n].health && pools[n].health !== 'ONLINE');
|
||||||
|
text = names.map(n => {
|
||||||
|
const p = pools[n];
|
||||||
|
const cap = p.capacity != null ? ` ${p.capacity.toFixed(0)}%` : '';
|
||||||
|
return `${n}${cap}`;
|
||||||
|
}).join(' · ');
|
||||||
|
if (degraded.length) text += ` ⚠ ${degraded.map(n => pools[n].health).join(',')}`;
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
text = 'Loaded';
|
text = 'Loaded';
|
||||||
}
|
}
|
||||||
@@ -693,6 +877,7 @@
|
|||||||
case 'memory_monitor': html = renderMemoryTable(cached.data); break;
|
case 'memory_monitor': html = renderMemoryTable(cached.data); break;
|
||||||
case 'disk_monitor': html = renderDiskTables(cached.data); break;
|
case 'disk_monitor': html = renderDiskTables(cached.data); break;
|
||||||
case 'network_monitor':html = renderNetworkTables(cached.data); break;
|
case 'network_monitor':html = renderNetworkTables(cached.data); break;
|
||||||
|
case 'zfs_monitor': html = renderZfsTables(cached.data); break;
|
||||||
case 'nagios_runner': html = renderNagiosTable(cached.data); break;
|
case 'nagios_runner': html = renderNagiosTable(cached.data); break;
|
||||||
case 'filesystem_info':html = renderFilesystemTable(cached.data); break;
|
case 'filesystem_info':html = renderFilesystemTable(cached.data); break;
|
||||||
default: html = renderGenericTable(cached.data); break;
|
default: html = renderGenericTable(cached.data); break;
|
||||||
@@ -707,10 +892,11 @@
|
|||||||
function renderOsInfoTable(d) {
|
function renderOsInfoTable(d) {
|
||||||
const ORDER = ['distro_pretty_name','system','release','version','machine',
|
const ORDER = ['distro_pretty_name','system','release','version','machine',
|
||||||
'processor','architecture','node','python_version',
|
'processor','architecture','node','python_version',
|
||||||
'python_implementation','hbc_version',
|
'python_implementation',
|
||||||
'distro_name','distro_version','distro_id','distro_version_id'];
|
'distro_name','distro_version','distro_id','distro_version_id'];
|
||||||
|
const INFO_FIELDS = new Set(['hbc_version', 'hbc_type']);
|
||||||
const shown = new Set(ORDER);
|
const shown = new Set(ORDER);
|
||||||
const keys = [...ORDER, ...Object.keys(d).filter(k => !shown.has(k) && !SKIP_FIELDS.has(k))];
|
const keys = [...ORDER, ...Object.keys(d).filter(k => !shown.has(k) && !SKIP_FIELDS.has(k) && !INFO_FIELDS.has(k))];
|
||||||
|
|
||||||
let html = '<table class="data-table"><thead><tr><th>Field</th><th>Value</th></tr></thead><tbody>';
|
let html = '<table class="data-table"><thead><tr><th>Field</th><th>Value</th></tr></thead><tbody>';
|
||||||
for (const k of keys) {
|
for (const k of keys) {
|
||||||
@@ -1023,6 +1209,66 @@
|
|||||||
return html;
|
return html;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function renderZfsTables(d) {
|
||||||
|
const pools = d.pools || {};
|
||||||
|
const names = Object.keys(pools);
|
||||||
|
if (names.length === 0) return '<div class="no-data">No ZFS pools found</div>';
|
||||||
|
|
||||||
|
const healthCls = h => {
|
||||||
|
if (!h || h === 'ONLINE') return 'pct-ok';
|
||||||
|
if (h === 'DEGRADED') return 'pct-warn';
|
||||||
|
return 'pct-crit';
|
||||||
|
};
|
||||||
|
|
||||||
|
let pt = '<table class="data-table"><thead><tr>'
|
||||||
|
+ '<th>Pool</th><th>Health</th>'
|
||||||
|
+ '<th class="num">Size</th><th class="num">Used</th>'
|
||||||
|
+ '<th class="num">Free</th><th class="num">Cap %</th>'
|
||||||
|
+ '<th class="num">Frag %</th><th class="num">Dedup</th>'
|
||||||
|
+ '</tr></thead><tbody>';
|
||||||
|
for (const name of names) {
|
||||||
|
const p = pools[name];
|
||||||
|
const cap = p.capacity != null ? p.capacity : 0;
|
||||||
|
const capCls = cap > 90 ? 'pct-crit' : cap > 75 ? 'pct-warn' : 'pct-ok';
|
||||||
|
pt += `<tr>
|
||||||
|
<td class="iface-name">${escHtml(name)}</td>
|
||||||
|
<td class="${healthCls(p.health)}">${escHtml(p.health || '—')}</td>
|
||||||
|
<td class="num">${formatBytes(p.size || 0)}</td>
|
||||||
|
<td class="num">${formatBytes(p.alloc || 0)}</td>
|
||||||
|
<td class="num">${formatBytes(p.free || 0)}</td>
|
||||||
|
<td class="num ${capCls}">${cap.toFixed(1)}%</td>
|
||||||
|
<td class="num">${p.frag != null ? p.frag.toFixed(1) + '%' : '—'}</td>
|
||||||
|
<td class="num">${p.dedup != null ? p.dedup.toFixed(2) + 'x' : '—'}</td>
|
||||||
|
</tr>`;
|
||||||
|
}
|
||||||
|
pt += '</tbody></table>';
|
||||||
|
|
||||||
|
const hasIo = names.some(n => pools[n].read_ops != null);
|
||||||
|
if (!hasIo) return pt;
|
||||||
|
|
||||||
|
let iot = '<table class="data-table"><thead><tr>'
|
||||||
|
+ '<th>Pool</th>'
|
||||||
|
+ '<th class="num">Read ops</th><th class="num">Write ops</th>'
|
||||||
|
+ '<th class="num">Read BW</th><th class="num">Write BW</th>'
|
||||||
|
+ '</tr></thead><tbody>';
|
||||||
|
for (const name of names) {
|
||||||
|
const p = pools[name];
|
||||||
|
iot += `<tr>
|
||||||
|
<td class="iface-name">${escHtml(name)}</td>
|
||||||
|
<td class="num">${p.read_ops != null ? p.read_ops.toLocaleString() : '—'}</td>
|
||||||
|
<td class="num">${p.write_ops != null ? p.write_ops.toLocaleString() : '—'}</td>
|
||||||
|
<td class="num">${p.read_bw != null ? formatBytes(p.read_bw) : '—'}</td>
|
||||||
|
<td class="num">${p.write_bw != null ? formatBytes(p.write_bw) : '—'}</td>
|
||||||
|
</tr>`;
|
||||||
|
}
|
||||||
|
iot += '</tbody></table>';
|
||||||
|
|
||||||
|
return `<div class="flex-tables">
|
||||||
|
<div><div class="table-section-label">Pools</div>${pt}</div>
|
||||||
|
<div><div class="table-section-label">I/O (cumulative)</div>${iot}</div>
|
||||||
|
</div>`;
|
||||||
|
}
|
||||||
|
|
||||||
function renderGenericTable(d) {
|
function renderGenericTable(d) {
|
||||||
let html = '<table class="data-table"><thead><tr><th>Field</th><th>Value</th></tr></thead><tbody>';
|
let html = '<table class="data-table"><thead><tr><th>Field</th><th>Value</th></tr></thead><tbody>';
|
||||||
for (const [k, v] of Object.entries(d)) {
|
for (const [k, v] of Object.entries(d)) {
|
||||||
@@ -1059,9 +1305,12 @@
|
|||||||
// ── Auto-refresh (30 s) ─────────────────────────────────────────────────
|
// ── Auto-refresh (30 s) ─────────────────────────────────────────────────
|
||||||
|
|
||||||
setInterval(() => {
|
setInterval(() => {
|
||||||
|
document.querySelectorAll('.host-card').forEach(card => {
|
||||||
|
fetchHostGlance(card.dataset.hostname);
|
||||||
|
});
|
||||||
|
|
||||||
document.querySelectorAll('.host-card:not(.collapsed)').forEach(card => {
|
document.querySelectorAll('.host-card:not(.collapsed)').forEach(card => {
|
||||||
const hostname = card.dataset.hostname;
|
const hostname = card.dataset.hostname;
|
||||||
fetchHostGlance(hostname);
|
|
||||||
|
|
||||||
card.querySelectorAll('.plugin-accordion:not(.collapsed)').forEach(acc => {
|
card.querySelectorAll('.plugin-accordion:not(.collapsed)').forEach(acc => {
|
||||||
const pname = acc.dataset.plugin;
|
const pname = acc.dataset.plugin;
|
||||||
@@ -1081,12 +1330,83 @@
|
|||||||
// ── Init ────────────────────────────────────────────────────────────────
|
// ── Init ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
document.addEventListener('DOMContentLoaded', () => {
|
document.addEventListener('DOMContentLoaded', () => {
|
||||||
const first = document.querySelector('.host-card');
|
// Fetch glance data for every host immediately so the strip is always populated.
|
||||||
if (first) {
|
document.querySelectorAll('.host-card').forEach(card => {
|
||||||
first.classList.remove('collapsed');
|
fetchHostGlance(card.dataset.hostname);
|
||||||
fetchHostGlance(first.dataset.hostname);
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Expand and load info for the target host (URL hash or first host).
|
||||||
|
function expandHost(hostname) {
|
||||||
|
const card = document.querySelector(`.host-card[data-hostname="${hostname}"]`);
|
||||||
|
if (!card) return false;
|
||||||
|
card.classList.remove('collapsed');
|
||||||
|
fetchHostInfo(hostname).then(data => {
|
||||||
|
infoCache[hostname] = data;
|
||||||
|
renderInfoSection(hostname, data);
|
||||||
|
}).catch(() => {
|
||||||
|
const el = document.getElementById(`info-${hostname}`);
|
||||||
|
if (el) el.innerHTML = '<div class="info-loading">Could not load host info.</div>';
|
||||||
|
});
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const hash = window.location.hash;
|
||||||
|
if (hash) {
|
||||||
|
const hostname = decodeURIComponent(hash.slice(1));
|
||||||
|
if (expandHost(hostname)) {
|
||||||
|
setTimeout(() => {
|
||||||
|
const card = document.querySelector(`.host-card[data-hostname="${hostname}"]`);
|
||||||
|
if (card) card.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
||||||
|
}, 150);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const first = document.querySelector('.host-card');
|
||||||
|
if (first) expandHost(first.dataset.hostname);
|
||||||
|
});
|
||||||
|
// ── Host action helpers ──────────────────────────────────────
|
||||||
|
|
||||||
|
let _toastTimer = null;
|
||||||
|
function showToast(msg, isError) {
|
||||||
|
const t = document.getElementById('action-toast');
|
||||||
|
t.textContent = msg;
|
||||||
|
t.classList.toggle('error', !!isError);
|
||||||
|
t.classList.add('show');
|
||||||
|
clearTimeout(_toastTimer);
|
||||||
|
_toastTimer = setTimeout(() => t.classList.remove('show'), 4000);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function hostAction(btn, url) {
|
||||||
|
btn.disabled = true;
|
||||||
|
try {
|
||||||
|
const res = await fetch(url);
|
||||||
|
const text = await res.text();
|
||||||
|
showToast(text, !res.ok);
|
||||||
|
} catch (e) {
|
||||||
|
showToast('Request failed: ' + e.message, true);
|
||||||
|
} finally {
|
||||||
|
btn.disabled = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function hostDelete(btn, hostname) {
|
||||||
|
if (!confirm('Delete host ' + hostname + '?')) return;
|
||||||
|
btn.disabled = true;
|
||||||
|
try {
|
||||||
|
const res = await fetch('/d?h=' + encodeURIComponent(hostname));
|
||||||
|
const text = await res.text();
|
||||||
|
showToast(text, !res.ok);
|
||||||
|
if (res.ok) {
|
||||||
|
const card = document.querySelector(`.host-card[data-hostname="${hostname}"]`);
|
||||||
|
if (card) card.remove();
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
showToast('Request failed: ' + e.message, true);
|
||||||
|
btn.disabled = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
|
<div id="action-toast"></div>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|||||||
@@ -204,6 +204,70 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
.channel-name { color: #333; }
|
.channel-name { color: #333; }
|
||||||
|
|
||||||
|
.edit-section { margin-top: 20px; }
|
||||||
|
.edit-section h4 { font-size: .88em; font-weight: 600; color: #333; margin: 0 0 10px; text-transform: uppercase; letter-spacing: .04em; border-bottom: 1px solid #eee; padding-bottom: 6px; }
|
||||||
|
.edit-field { margin-bottom: 10px; }
|
||||||
|
.edit-field label { display: block; font-size: .82em; color: #666; margin-bottom: 3px; }
|
||||||
|
.edit-input { width: 100%; border: 1px solid #ccc; border-radius: 4px; padding: 5px 8px; font-size: .88em; box-sizing: border-box; }
|
||||||
|
.edit-input:focus { border-color: #0066cc; outline: none; }
|
||||||
|
.status-msg { font-size: .82em; margin-left: 8px; }
|
||||||
|
.save-row { display: flex; align-items: center; margin-top: 8px; }
|
||||||
|
.btn-save { background: #0066cc; color: #fff; border: none; border-radius: 4px; padding: 5px 14px; font-size: .85em; cursor: pointer; }
|
||||||
|
.btn-save:hover { background: #0055aa; }
|
||||||
|
/* ---- Channel chip picker ---- */
|
||||||
|
.ch-picker { }
|
||||||
|
.ch-picker-label { font-size: .8em; font-weight: 600; color: #888; text-transform: uppercase; letter-spacing: .04em; margin-bottom: 6px; }
|
||||||
|
.ch-chips { display: flex; flex-wrap: wrap; gap: 6px; min-height: 32px; margin-bottom: 10px; }
|
||||||
|
.ch-chip {
|
||||||
|
display: inline-flex; align-items: center; gap: 5px;
|
||||||
|
padding: 4px 10px; border-radius: 14px; font-size: .85em; font-weight: 500; cursor: pointer;
|
||||||
|
border: none; font-family: inherit;
|
||||||
|
}
|
||||||
|
.ch-chip.selected { background: #e3f2fd; color: #1565c0; }
|
||||||
|
.ch-chip.selected:hover { background: #bbdefb; }
|
||||||
|
.ch-chip.available { background: #f1f3f4; color: #555; }
|
||||||
|
.ch-chip.available:hover { background: #e8eaf6; color: #283593; }
|
||||||
|
.ch-chip-x { font-size: .9em; line-height: 1; color: inherit; opacity: .7; }
|
||||||
|
|
||||||
|
/* ---- My Channels card list ---- */
|
||||||
|
.my-ch-card {
|
||||||
|
border: 1px solid #e8eaf6; border-radius: 6px; margin-bottom: 8px; overflow: hidden;
|
||||||
|
}
|
||||||
|
.my-ch-header {
|
||||||
|
display: flex; align-items: center; gap: 8px; padding: 8px 12px;
|
||||||
|
background: #f8f9ff; border-bottom: 1px solid #e8eaf6;
|
||||||
|
}
|
||||||
|
.my-ch-name { font-weight: 600; font-size: .9em; color: #222; }
|
||||||
|
.my-ch-type { padding: 2px 7px; border-radius: 8px; font-size: .72em; font-weight: 600; background: #e8eaf6; color: #3949ab; }
|
||||||
|
.my-ch-private { padding: 2px 7px; border-radius: 8px; font-size: .72em; font-weight: 600; background: #fce4ec; color: #c62828; }
|
||||||
|
.my-ch-actions { margin-left: auto; display: flex; gap: 5px; }
|
||||||
|
.btn-sm-edit { background: #888; color: #fff; border: none; border-radius: 4px; padding: 2px 8px; font-size: .78em; cursor: pointer; }
|
||||||
|
.btn-sm-edit:hover { background: #666; }
|
||||||
|
.btn-sm-del { background: transparent; color: #c62828; border: 1px solid #e0e0e0; border-radius: 4px; padding: 2px 7px; font-size: .78em; cursor: pointer; }
|
||||||
|
.btn-sm-del:hover { background: #fce4ec; }
|
||||||
|
|
||||||
|
/* ---- Channel modal (for My Channels CRUD) ---- */
|
||||||
|
.ch-modal-overlay {
|
||||||
|
position: fixed; inset: 0; background: rgba(0,0,0,.4);
|
||||||
|
display: flex; align-items: center; justify-content: center; z-index: 1001;
|
||||||
|
}
|
||||||
|
.ch-modal-box {
|
||||||
|
background: #fff; border-radius: 8px; padding: 24px;
|
||||||
|
min-width: 360px; max-width: 520px; width: 95%;
|
||||||
|
box-shadow: 0 8px 32px rgba(0,0,0,.2);
|
||||||
|
}
|
||||||
|
.ch-modal-box h3 { margin: 0 0 16px; font-size: 1em; }
|
||||||
|
.ch-form-row { margin-bottom: 12px; }
|
||||||
|
.ch-form-row label { display: block; font-size: .83em; font-weight: 600; color: #555; margin-bottom: 3px; }
|
||||||
|
.ch-form-row input[type=text], .ch-form-row input[type=password], .ch-form-row select {
|
||||||
|
width: 100%; border: 1px solid #ccc; border-radius: 4px; padding: 5px 8px;
|
||||||
|
font-size: .88em; box-sizing: border-box; font-family: inherit;
|
||||||
|
}
|
||||||
|
.ch-form-row input:focus, .ch-form-row select:focus { border-color: #0066cc; outline: none; }
|
||||||
|
.ch-form-divider { font-size: .78em; font-weight: 700; text-transform: uppercase; letter-spacing: .05em; color: #888; margin: 14px 0 8px; border-top: 1px solid #eee; padding-top: 10px; }
|
||||||
|
.ch-modal-footer { display: flex; justify-content: flex-end; gap: 8px; margin-top: 18px; }
|
||||||
|
.ch-modal-status { font-size: .83em; margin-top: 8px; }
|
||||||
</style>
|
</style>
|
||||||
|
|
||||||
<body>
|
<body>
|
||||||
@@ -266,20 +330,152 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Notification channels -->
|
{% if current_user %}
|
||||||
|
<!-- ---- Editable identity ---- -->
|
||||||
|
<div class="section edit-section">
|
||||||
|
<h4>Identity</h4>
|
||||||
|
<div class="edit-field">
|
||||||
|
<label for="profile-fullname">Display name</label>
|
||||||
|
<input id="profile-fullname" class="edit-input" type="text" value="{{ current_user.full_name | e }}" placeholder="Full name">
|
||||||
|
</div>
|
||||||
|
<div class="edit-field">
|
||||||
|
<label for="profile-avatar">Avatar URL or path</label>
|
||||||
|
<input id="profile-avatar" class="edit-input" type="text" value="{{ current_user.avatar | e }}" placeholder="/path/to/avatar.png or https://…">
|
||||||
|
</div>
|
||||||
|
<div class="save-row">
|
||||||
|
<button class="btn-save" onclick="saveIdentity()">Save</button>
|
||||||
|
<span id="identity-status" class="status-msg"></span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- ---- Change password ---- -->
|
||||||
|
<div class="section edit-section">
|
||||||
|
<h4>Change password</h4>
|
||||||
|
<div class="edit-field">
|
||||||
|
<label for="profile-current-pw">Current password</label>
|
||||||
|
<input id="profile-current-pw" class="edit-input" type="password" autocomplete="current-password">
|
||||||
|
</div>
|
||||||
|
<div class="edit-field">
|
||||||
|
<label for="profile-new-pw">New password</label>
|
||||||
|
<input id="profile-new-pw" class="edit-input" type="password" autocomplete="new-password">
|
||||||
|
</div>
|
||||||
|
<div class="save-row">
|
||||||
|
<button class="btn-save" onclick="changePassword()">Change password</button>
|
||||||
|
<span id="password-status" class="status-msg"></span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<!-- Notification channels — chip picker -->
|
||||||
<div class="section">
|
<div class="section">
|
||||||
<h2>Notification Channels</h2>
|
<h2>Notification Channels</h2>
|
||||||
{% if notification_channels %}
|
{% if current_user %}
|
||||||
{% for ch in notification_channels %}
|
<p style="font-size:.82em;color:#888;margin:0 0 12px">Click a channel to add or remove it from your alert list.</p>
|
||||||
<div class="channel-row">
|
{% if all_channels %}
|
||||||
<span class="channel-type">{{ ch.type }}</span>
|
<div class="ch-picker">
|
||||||
<span class="channel-name">{{ ch.name }}</span>
|
<div class="ch-picker-label">Selected</div>
|
||||||
</div>
|
<div id="selected-chips" class="ch-chips">
|
||||||
|
{% for ch in all_channels %}
|
||||||
|
{% if ch.name in (current_user.notification_channels or []) %}
|
||||||
|
<button class="ch-chip selected" data-ch="{{ ch.name | e }}" onclick="toggleChip(this)">
|
||||||
|
{{ ch.name | e }} <span class="ch-chip-x">×</span>
|
||||||
|
</button>
|
||||||
|
{% endif %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% else %}
|
{% set selected_set = current_user.notification_channels or [] %}
|
||||||
<span class="no-hosts">No personal notification channels configured.</span>
|
{% set has_selected = selected_set | length > 0 %}
|
||||||
|
{% if not has_selected %}
|
||||||
|
<span style="font-size:.83em;color:#bbb;font-style:italic;align-self:center">None selected</span>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
|
<div class="ch-picker-label">Available</div>
|
||||||
|
<div id="available-chips" class="ch-chips">
|
||||||
|
{% for ch in all_channels %}
|
||||||
|
{% if ch.name not in (current_user.notification_channels or []) %}
|
||||||
|
<button class="ch-chip available" data-ch="{{ ch.name | e }}" onclick="toggleChip(this)">
|
||||||
|
+ {{ ch.name | e }}
|
||||||
|
</button>
|
||||||
|
{% endif %}
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% else %}
|
||||||
|
<p style="font-size:.83em;color:#bbb;font-style:italic">No notification channels available. You can create your own below.</p>
|
||||||
|
{% endif %}
|
||||||
|
<div class="save-row">
|
||||||
|
<button class="btn-save" onclick="saveChannels()">Save channels</button>
|
||||||
|
<span id="channels-status" class="status-msg"></span>
|
||||||
|
</div>
|
||||||
|
{% else %}
|
||||||
|
<span class="no-hosts">Log in to manage notification channels.</span>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- My Channels — create/edit/delete own channels -->
|
||||||
|
{% if current_user %}
|
||||||
|
<div class="section">
|
||||||
|
<h2>My Channels</h2>
|
||||||
|
<p style="font-size:.82em;color:#888;margin:0 0 12px">Channels you own. Public channels are available to all users; private channels are visible only to you.</p>
|
||||||
|
<div id="my-channels-list">
|
||||||
|
{% set my_channels = all_channels | selectattr('owner', 'equalto', current_user.username) | list %}
|
||||||
|
{% for ch in my_channels %}
|
||||||
|
<div class="my-ch-card" id="mychcard-{{ ch.name | e }}">
|
||||||
|
<div class="my-ch-header">
|
||||||
|
<span class="my-ch-name">{{ ch.name | e }}</span>
|
||||||
|
<span class="my-ch-type">{{ ch.type | e }}</span>
|
||||||
|
{% if ch.private %}<span class="my-ch-private">private</span>{% endif %}
|
||||||
|
<span class="my-ch-actions">
|
||||||
|
<button class="btn-sm-edit" onclick="openMyChModal('{{ ch.name | e }}')">Edit</button>
|
||||||
|
<button class="btn-sm-del" onclick="deleteMyChannel('{{ ch.name | e }}')">✕</button>
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
{% if not my_channels %}
|
||||||
|
<p id="my-channels-empty" style="font-size:.83em;color:#bbb;font-style:italic">No channels yet.</p>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
<div class="save-row" style="margin-top:8px">
|
||||||
|
<button class="btn-save" onclick="openMyChModal()">+ New channel</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- My Channels modal -->
|
||||||
|
<div id="my-ch-modal" class="ch-modal-overlay" style="display:none" onclick="if(event.target===this)closeMyChModal()">
|
||||||
|
<div class="ch-modal-box">
|
||||||
|
<h3 id="my-ch-modal-title">New Channel</h3>
|
||||||
|
<div class="ch-form-row">
|
||||||
|
<label>Channel name</label>
|
||||||
|
<input type="text" id="my-ch-name" placeholder="e.g. my_pushover" autocomplete="off">
|
||||||
|
</div>
|
||||||
|
<div class="ch-form-row">
|
||||||
|
<label>Type</label>
|
||||||
|
<select id="my-ch-type" onchange="onMyChTypeChange()">
|
||||||
|
<option value="">— select —</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<div id="my-ch-type-fields"></div>
|
||||||
|
<div class="ch-form-divider">Options</div>
|
||||||
|
<div class="ch-form-row">
|
||||||
|
<label>Minimum alert level</label>
|
||||||
|
<select id="my-ch-min-level">
|
||||||
|
<option value="WARNING">WARNING (and above)</option>
|
||||||
|
<option value="CRITICAL">CRITICAL only</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<div class="ch-form-row">
|
||||||
|
<label style="display:flex;align-items:center;gap:6px;cursor:pointer">
|
||||||
|
<input type="checkbox" id="my-ch-private"> Private — visible only to you
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
<div id="my-ch-modal-status" class="ch-modal-status"></div>
|
||||||
|
<div class="ch-modal-footer">
|
||||||
|
<button class="btn-save" style="background:#888" onclick="closeMyChModal()">Cancel</button>
|
||||||
|
<button class="btn-save" onclick="saveMyChannel()">Save</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
<!-- Host access -->
|
<!-- Host access -->
|
||||||
<div class="section">
|
<div class="section">
|
||||||
@@ -326,5 +522,236 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
<script>
|
||||||
|
// ---- Identity ----
|
||||||
|
async function saveIdentity() {
|
||||||
|
const full_name = document.getElementById('profile-fullname').value;
|
||||||
|
const avatar = document.getElementById('profile-avatar').value;
|
||||||
|
const resp = await fetch('/api/0/users/me', {
|
||||||
|
method: 'PUT',
|
||||||
|
headers: {'Content-Type': 'application/json'},
|
||||||
|
body: JSON.stringify({full_name, avatar}),
|
||||||
|
});
|
||||||
|
if (resp.ok) {
|
||||||
|
showStatus('identity-status', 'Saved', '#2e7d32');
|
||||||
|
} else {
|
||||||
|
const err = await resp.json().catch(() => ({}));
|
||||||
|
showStatus('identity-status', err.error || 'Error saving', '#c62828');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- Password ----
|
||||||
|
async function changePassword() {
|
||||||
|
const current = document.getElementById('profile-current-pw').value;
|
||||||
|
const newpw = document.getElementById('profile-new-pw').value;
|
||||||
|
if (!current || !newpw) {
|
||||||
|
showStatus('password-status', 'Both fields are required', '#c62828');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const resp = await fetch('/api/0/users/me', {
|
||||||
|
method: 'PUT',
|
||||||
|
headers: {'Content-Type': 'application/json'},
|
||||||
|
body: JSON.stringify({password: {current, new: newpw}}),
|
||||||
|
});
|
||||||
|
if (resp.ok) {
|
||||||
|
document.getElementById('profile-current-pw').value = '';
|
||||||
|
document.getElementById('profile-new-pw').value = '';
|
||||||
|
showStatus('password-status', 'Password changed', '#2e7d32');
|
||||||
|
} else {
|
||||||
|
const err = await resp.json().catch(() => ({}));
|
||||||
|
showStatus('password-status', err.error || 'Error', '#c62828');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- Channel chip picker ----
|
||||||
|
function toggleChip(btn) {
|
||||||
|
const name = btn.dataset.ch;
|
||||||
|
const isSelected = btn.classList.contains('selected');
|
||||||
|
if (isSelected) {
|
||||||
|
// Move to available
|
||||||
|
btn.classList.remove('selected');
|
||||||
|
btn.classList.add('available');
|
||||||
|
btn.innerHTML = '+ ' + escHtml(name);
|
||||||
|
btn.onclick = function() { toggleChip(this); };
|
||||||
|
document.getElementById('available-chips').appendChild(btn);
|
||||||
|
// Remove "None selected" placeholder if it exists
|
||||||
|
} else {
|
||||||
|
// Move to selected
|
||||||
|
btn.classList.remove('available');
|
||||||
|
btn.classList.add('selected');
|
||||||
|
btn.innerHTML = escHtml(name) + ' <span class="ch-chip-x">×</span>';
|
||||||
|
btn.onclick = function() { toggleChip(this); };
|
||||||
|
document.getElementById('selected-chips').appendChild(btn);
|
||||||
|
}
|
||||||
|
// Update placeholder visibility
|
||||||
|
const sel = document.getElementById('selected-chips');
|
||||||
|
const placeholder = sel.querySelector('span[style]');
|
||||||
|
const hasChips = sel.querySelectorAll('.ch-chip.selected').length > 0;
|
||||||
|
if (placeholder) placeholder.style.display = hasChips ? 'none' : '';
|
||||||
|
}
|
||||||
|
|
||||||
|
async function saveChannels() {
|
||||||
|
const notification_channels = [
|
||||||
|
...document.querySelectorAll('#selected-chips .ch-chip.selected')
|
||||||
|
].map(b => b.dataset.ch);
|
||||||
|
const resp = await fetch('/api/0/users/me', {
|
||||||
|
method: 'PUT',
|
||||||
|
headers: {'Content-Type': 'application/json'},
|
||||||
|
body: JSON.stringify({notification_channels}),
|
||||||
|
});
|
||||||
|
if (resp.ok) {
|
||||||
|
showStatus('channels-status', 'Saved', '#2e7d32');
|
||||||
|
} else {
|
||||||
|
const err = await resp.json().catch(() => ({}));
|
||||||
|
showStatus('channels-status', err.error || 'Error saving', '#c62828');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- My Channels CRUD ----
|
||||||
|
let _myChSchemas = {};
|
||||||
|
let _myChEditName = null;
|
||||||
|
|
||||||
|
async function _loadMyChSchemas() {
|
||||||
|
try {
|
||||||
|
const r = await fetch('/api/0/notification_channel_types');
|
||||||
|
_myChSchemas = await r.json();
|
||||||
|
const sel = document.getElementById('my-ch-type');
|
||||||
|
if (!sel) return;
|
||||||
|
Object.entries(_myChSchemas).forEach(([k, v]) => {
|
||||||
|
const opt = document.createElement('option');
|
||||||
|
opt.value = k; opt.textContent = v.label;
|
||||||
|
sel.appendChild(opt);
|
||||||
|
});
|
||||||
|
} catch(e) { console.warn('Could not load channel schemas', e); }
|
||||||
|
}
|
||||||
|
|
||||||
|
function onMyChTypeChange() {
|
||||||
|
const type = document.getElementById('my-ch-type').value;
|
||||||
|
const container = document.getElementById('my-ch-type-fields');
|
||||||
|
container.innerHTML = '';
|
||||||
|
if (!type || !_myChSchemas[type]) return;
|
||||||
|
const divider = document.createElement('div');
|
||||||
|
divider.className = 'ch-form-divider';
|
||||||
|
divider.textContent = _myChSchemas[type].label + ' settings';
|
||||||
|
container.appendChild(divider);
|
||||||
|
(_myChSchemas[type].fields || []).forEach(sf => {
|
||||||
|
const row = document.createElement('div');
|
||||||
|
row.className = 'ch-form-row';
|
||||||
|
const lbl = document.createElement('label');
|
||||||
|
lbl.textContent = sf.label + (sf.required ? ' *' : '');
|
||||||
|
const inp = document.createElement('input');
|
||||||
|
inp.type = sf.type === 'secret' ? 'password' : 'text';
|
||||||
|
inp.id = 'mychf-' + sf.key;
|
||||||
|
inp.placeholder = sf.required ? '(required)' : '(optional)';
|
||||||
|
inp.autocomplete = 'off';
|
||||||
|
row.appendChild(lbl);
|
||||||
|
row.appendChild(inp);
|
||||||
|
container.appendChild(row);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function openMyChModal(name) {
|
||||||
|
_myChEditName = name || null;
|
||||||
|
document.getElementById('my-ch-modal-status').textContent = '';
|
||||||
|
document.getElementById('my-ch-modal-title').textContent = name ? 'Edit Channel' : 'New Channel';
|
||||||
|
document.getElementById('my-ch-name').value = name || '';
|
||||||
|
document.getElementById('my-ch-name').disabled = !!name;
|
||||||
|
document.getElementById('my-ch-type').value = '';
|
||||||
|
document.getElementById('my-ch-type-fields').innerHTML = '';
|
||||||
|
document.getElementById('my-ch-min-level').value = 'WARNING';
|
||||||
|
document.getElementById('my-ch-private').checked = false;
|
||||||
|
|
||||||
|
if (name) {
|
||||||
|
try {
|
||||||
|
const r = await fetch('/api/0/notification_channels');
|
||||||
|
const channels = await r.json();
|
||||||
|
const ch = channels.find(c => c.name === name);
|
||||||
|
if (ch) {
|
||||||
|
document.getElementById('my-ch-type').value = ch.type;
|
||||||
|
onMyChTypeChange();
|
||||||
|
document.getElementById('my-ch-min-level').value = ch.min_level || 'WARNING';
|
||||||
|
document.getElementById('my-ch-private').checked = ch.private || false;
|
||||||
|
(ch.fields || []).forEach(f => {
|
||||||
|
const inp = document.getElementById('mychf-' + f.key);
|
||||||
|
if (inp) inp.value = f.value || '';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} catch(e) { console.warn('Failed to load channel', e); }
|
||||||
|
}
|
||||||
|
document.getElementById('my-ch-modal').style.display = 'flex';
|
||||||
|
}
|
||||||
|
|
||||||
|
function closeMyChModal() {
|
||||||
|
document.getElementById('my-ch-modal').style.display = 'none';
|
||||||
|
}
|
||||||
|
|
||||||
|
async function saveMyChannel() {
|
||||||
|
const name = document.getElementById('my-ch-name').value.trim();
|
||||||
|
const type = document.getElementById('my-ch-type').value;
|
||||||
|
const minLevel = document.getElementById('my-ch-min-level').value;
|
||||||
|
const isPrivate = document.getElementById('my-ch-private').checked;
|
||||||
|
const statusEl = document.getElementById('my-ch-modal-status');
|
||||||
|
statusEl.textContent = '';
|
||||||
|
|
||||||
|
if (!name) { statusEl.textContent = 'Name is required.'; statusEl.style.color = '#c62828'; return; }
|
||||||
|
if (!type) { statusEl.textContent = 'Please select a type.'; statusEl.style.color = '#c62828'; return; }
|
||||||
|
|
||||||
|
const body = { name, type, min_level: minLevel, private: isPrivate };
|
||||||
|
if (_myChSchemas[type]) {
|
||||||
|
(_myChSchemas[type].fields || []).forEach(sf => {
|
||||||
|
const inp = document.getElementById('mychf-' + sf.key);
|
||||||
|
if (inp) body[sf.key] = inp.value;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const isEdit = !!_myChEditName;
|
||||||
|
const url = isEdit
|
||||||
|
? '/api/0/notification_channels/' + encodeURIComponent(_myChEditName)
|
||||||
|
: '/api/0/notification_channels';
|
||||||
|
const method = isEdit ? 'PUT' : 'POST';
|
||||||
|
try {
|
||||||
|
const r = await fetch(url, { method, headers: {'Content-Type': 'application/json'}, body: JSON.stringify(body) });
|
||||||
|
if (r.ok) {
|
||||||
|
closeMyChModal();
|
||||||
|
window.location.reload();
|
||||||
|
} else {
|
||||||
|
const err = await r.json().catch(() => ({}));
|
||||||
|
statusEl.textContent = err.error || 'Error saving.';
|
||||||
|
statusEl.style.color = '#c62828';
|
||||||
|
}
|
||||||
|
} catch(e) {
|
||||||
|
statusEl.textContent = 'Network error: ' + e.message;
|
||||||
|
statusEl.style.color = '#c62828';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function deleteMyChannel(name) {
|
||||||
|
if (!confirm('Delete channel "' + name + '"?')) return;
|
||||||
|
try {
|
||||||
|
const r = await fetch('/api/0/notification_channels/' + encodeURIComponent(name), { method: 'DELETE' });
|
||||||
|
if (r.ok) {
|
||||||
|
window.location.reload();
|
||||||
|
} else {
|
||||||
|
const err = await r.json().catch(() => ({}));
|
||||||
|
alert('Error: ' + (err.error || 'Could not delete.'));
|
||||||
|
}
|
||||||
|
} catch(e) { alert('Network error: ' + e.message); }
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- Utilities ----
|
||||||
|
function showStatus(id, msg, color) {
|
||||||
|
const el = document.getElementById(id);
|
||||||
|
if (!el) return;
|
||||||
|
el.textContent = msg;
|
||||||
|
el.style.color = color;
|
||||||
|
setTimeout(() => { el.textContent = ''; }, 3000);
|
||||||
|
}
|
||||||
|
|
||||||
|
function escHtml(s) {
|
||||||
|
return String(s).replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>').replace(/"/g,'"');
|
||||||
|
}
|
||||||
|
|
||||||
|
document.addEventListener('DOMContentLoaded', _loadMyChSchemas);
|
||||||
|
</script>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|||||||
+1235
-145
File diff suppressed because it is too large
Load Diff
+468
-109
@@ -9,10 +9,11 @@ This module provides a flexible threshold checking system that:
|
|||||||
- Supports multiple comparison operators
|
- Supports multiple comparison operators
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Dict, Any, Optional, Tuple, Callable
|
from typing import Dict, List, Any, Optional, Tuple, Callable
|
||||||
from . import notify as notify_mod
|
from . import notify as notify_mod
|
||||||
from .config import THRESHOLD_DEFAULTS
|
from .config import THRESHOLD_DEFAULTS
|
||||||
|
|
||||||
@@ -35,6 +36,7 @@ class ComparisonOperator(Enum):
|
|||||||
LTE = "<=" # Less than or equal
|
LTE = "<=" # Less than or equal
|
||||||
EQ = "==" # Equal to
|
EQ = "==" # Equal to
|
||||||
NEQ = "!=" # Not equal to
|
NEQ = "!=" # Not equal to
|
||||||
|
NAGIOS = "nagios" # Nagios exit-code semantics: 0=OK 1=WARNING 2=CRITICAL 3=UNKNOWN
|
||||||
|
|
||||||
|
|
||||||
class AlertState:
|
class AlertState:
|
||||||
@@ -56,6 +58,7 @@ class AlertState:
|
|||||||
self.last_notification = None
|
self.last_notification = None
|
||||||
self.threshold_value = None # The threshold value that triggered alert
|
self.threshold_value = None # The threshold value that triggered alert
|
||||||
self.operator = None # The comparison operator (>, <, >=, etc.)
|
self.operator = None # The comparison operator (>, <, >=, etc.)
|
||||||
|
self.hysteresis: Optional[float] = None # Hysteresis fraction used for recovery
|
||||||
self.formatted_message = None # Formatted display message for UI
|
self.formatted_message = None # Formatted display message for UI
|
||||||
self.acknowledged = False # Whether alert has been acknowledged
|
self.acknowledged = False # Whether alert has been acknowledged
|
||||||
self.acknowledged_at = None # Timestamp when acknowledged
|
self.acknowledged_at = None # Timestamp when acknowledged
|
||||||
@@ -151,6 +154,15 @@ class AlertState:
|
|||||||
if self.formatted_message is not None:
|
if self.formatted_message is not None:
|
||||||
result["formatted_message"] = self.formatted_message
|
result["formatted_message"] = self.formatted_message
|
||||||
|
|
||||||
|
# Compute and expose the recovery threshold so the UI can display it
|
||||||
|
if (self.hysteresis and self.threshold_value is not None
|
||||||
|
and self.operator is not None):
|
||||||
|
ha = abs(self.threshold_value * self.hysteresis)
|
||||||
|
if self.operator in ('>', '>='):
|
||||||
|
result["recovery_threshold"] = round(self.threshold_value - ha, 4)
|
||||||
|
elif self.operator in ('<', '<='):
|
||||||
|
result["recovery_threshold"] = round(self.threshold_value + ha, 4)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def __setstate__(self, state):
|
def __setstate__(self, state):
|
||||||
@@ -158,6 +170,8 @@ class AlertState:
|
|||||||
self.__dict__.update(state)
|
self.__dict__.update(state)
|
||||||
if not hasattr(self, 'consecutive_count'):
|
if not hasattr(self, 'consecutive_count'):
|
||||||
self.consecutive_count = 0
|
self.consecutive_count = 0
|
||||||
|
if not hasattr(self, 'hysteresis'):
|
||||||
|
self.hysteresis = None
|
||||||
|
|
||||||
def acknowledge(self):
|
def acknowledge(self):
|
||||||
"""Acknowledge this alert to stop reminder notifications."""
|
"""Acknowledge this alert to stop reminder notifications."""
|
||||||
@@ -181,6 +195,7 @@ class ThresholdConfig:
|
|||||||
hysteresis: float = 0.0,
|
hysteresis: float = 0.0,
|
||||||
enabled: bool = True,
|
enabled: bool = True,
|
||||||
count: int = 1,
|
count: int = 1,
|
||||||
|
grace: Optional[float] = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Initialize threshold configuration.
|
Initialize threshold configuration.
|
||||||
@@ -193,6 +208,7 @@ class ThresholdConfig:
|
|||||||
hysteresis: Hysteresis percentage to prevent flapping (0.0-1.0)
|
hysteresis: Hysteresis percentage to prevent flapping (0.0-1.0)
|
||||||
enabled: Whether this threshold is enabled
|
enabled: Whether this threshold is enabled
|
||||||
count: Number of consecutive exceedances required before alerting (default 1)
|
count: Number of consecutive exceedances required before alerting (default 1)
|
||||||
|
grace: Per-metric grace period in seconds; overrides global grace when set
|
||||||
"""
|
"""
|
||||||
self.metric_path = metric_path
|
self.metric_path = metric_path
|
||||||
self.warning = warning
|
self.warning = warning
|
||||||
@@ -201,6 +217,7 @@ class ThresholdConfig:
|
|||||||
self.hysteresis = hysteresis
|
self.hysteresis = hysteresis
|
||||||
self.display = display
|
self.display = display
|
||||||
self.count = max(1, int(count))
|
self.count = max(1, int(count))
|
||||||
|
self.grace = float(grace) if grace is not None else None
|
||||||
|
|
||||||
# Parse operator
|
# Parse operator
|
||||||
try:
|
try:
|
||||||
@@ -226,6 +243,16 @@ class ThresholdConfig:
|
|||||||
if not self.enabled:
|
if not self.enabled:
|
||||||
return AlertLevel.OK
|
return AlertLevel.OK
|
||||||
|
|
||||||
|
# Nagios exit-code semantics: value IS the severity
|
||||||
|
if self.operator == ComparisonOperator.NAGIOS:
|
||||||
|
try:
|
||||||
|
code = int(value)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return AlertLevel.UNKNOWN
|
||||||
|
return {0: AlertLevel.OK, 1: AlertLevel.WARNING, 2: AlertLevel.CRITICAL}.get(
|
||||||
|
code, AlertLevel.UNKNOWN
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Convert value to float for comparison
|
# Convert value to float for comparison
|
||||||
value = float(value)
|
value = float(value)
|
||||||
@@ -262,6 +289,10 @@ class ThresholdConfig:
|
|||||||
"""
|
"""
|
||||||
new_level = self.evaluate(value)
|
new_level = self.evaluate(value)
|
||||||
|
|
||||||
|
# Nagios exit codes are discrete integers — hysteresis doesn't apply
|
||||||
|
if self.operator == ComparisonOperator.NAGIOS:
|
||||||
|
return new_level
|
||||||
|
|
||||||
# If no hysteresis, return new level
|
# If no hysteresis, return new level
|
||||||
if self.hysteresis == 0.0:
|
if self.hysteresis == 0.0:
|
||||||
return new_level
|
return new_level
|
||||||
@@ -328,14 +359,17 @@ class ThresholdChecker:
|
|||||||
renotify_interval: Seconds between repeat notifications (default: 1 hour)
|
renotify_interval: Seconds between repeat notifications (default: 1 hour)
|
||||||
journal: Optional MessageJournal instance for logging threshold events
|
journal: Optional MessageJournal instance for logging threshold events
|
||||||
"""
|
"""
|
||||||
# Named threshold configurations: {config_name: {metric_path: ThresholdConfig}}
|
# Named threshold configurations (pre-merged: defaults + overrides): {config_name: {metric_path: ThresholdConfig}}
|
||||||
self.threshold_configs = {}
|
self.threshold_configs = {}
|
||||||
|
|
||||||
|
# Raw overrides only for each named config (no defaults baked in): {config_name: {metric_path: ThresholdConfig}}
|
||||||
|
self.threshold_raw_configs: Dict[str, Dict[str, ThresholdConfig]] = {}
|
||||||
|
|
||||||
# Single threshold set for backward compatibility: {metric_path: ThresholdConfig}
|
# Single threshold set for backward compatibility: {metric_path: ThresholdConfig}
|
||||||
self.thresholds = {}
|
self.thresholds = {}
|
||||||
|
|
||||||
# Host to config name mapping: {host_name: config_name}
|
# Host to ordered list of config names: {host_name: [config_name, ...]}
|
||||||
self.host_config_mapping = {}
|
self.host_config_mapping: Dict[str, List[str]] = {}
|
||||||
|
|
||||||
# Default config name to use when no mapping exists
|
# Default config name to use when no mapping exists
|
||||||
self.default_config = "default"
|
self.default_config = "default"
|
||||||
@@ -372,6 +406,7 @@ class ThresholdChecker:
|
|||||||
|
|
||||||
# Clear old configuration
|
# Clear old configuration
|
||||||
self.threshold_configs.clear()
|
self.threshold_configs.clear()
|
||||||
|
self.threshold_raw_configs.clear()
|
||||||
self.thresholds.clear()
|
self.thresholds.clear()
|
||||||
self.host_config_mapping.clear()
|
self.host_config_mapping.clear()
|
||||||
self.grace_seconds = float(config.get("grace", 2))
|
self.grace_seconds = float(config.get("grace", 2))
|
||||||
@@ -391,10 +426,24 @@ class ThresholdChecker:
|
|||||||
Supports two formats:
|
Supports two formats:
|
||||||
1. Legacy format with direct 'thresholds' section
|
1. Legacy format with direct 'thresholds' section
|
||||||
2. New format with 'threshold_configs' and 'host_threshold_mapping'
|
2. New format with 'threshold_configs' and 'host_threshold_mapping'
|
||||||
|
|
||||||
|
In all cases, THRESHOLD_DEFAULTS are seeded into threshold_configs["default"]
|
||||||
|
so the Settings page always shows the built-in defaults.
|
||||||
|
_parse_multi_config() overwrites this with the fully-merged effective defaults.
|
||||||
"""
|
"""
|
||||||
|
# Always expose built-in defaults through threshold_configs["default"] so
|
||||||
|
# the Settings page has something to display even in legacy/no-config mode.
|
||||||
|
seed: Dict[str, ThresholdConfig] = {}
|
||||||
|
for plugin_name, plugin_thresholds in THRESHOLD_DEFAULTS.get("thresholds", {}).items():
|
||||||
|
if isinstance(plugin_thresholds, dict):
|
||||||
|
self._parse_plugin_thresholds(plugin_name, plugin_thresholds, target_dict=seed)
|
||||||
|
if seed:
|
||||||
|
self.threshold_configs["default"] = seed
|
||||||
|
self.threshold_raw_configs["default"] = {}
|
||||||
|
|
||||||
# Check for new multi-config format
|
# Check for new multi-config format
|
||||||
if "threshold_configs" in config:
|
if "threshold_configs" in config:
|
||||||
self._parse_multi_config(config)
|
self._parse_multi_config(config) # overwrites threshold_configs["default"]
|
||||||
elif "thresholds" in config:
|
elif "thresholds" in config:
|
||||||
# Legacy single threshold configuration
|
# Legacy single threshold configuration
|
||||||
self._parse_legacy_config(config)
|
self._parse_legacy_config(config)
|
||||||
@@ -424,9 +473,10 @@ class ThresholdChecker:
|
|||||||
self._parse_plugin_thresholds(plugin_name, plugin_thresholds, target_dict=effective_defaults)
|
self._parse_plugin_thresholds(plugin_name, plugin_thresholds, target_dict=effective_defaults)
|
||||||
|
|
||||||
self.threshold_configs["default"] = dict(effective_defaults)
|
self.threshold_configs["default"] = dict(effective_defaults)
|
||||||
|
self.threshold_raw_configs["default"] = {}
|
||||||
logger.info("Registered 'default' threshold config with %d metrics", len(effective_defaults))
|
logger.info("Registered 'default' threshold config with %d metrics", len(effective_defaults))
|
||||||
|
|
||||||
# Parse each named configuration, seeding it with effective_defaults first
|
# Parse each named configuration
|
||||||
for config_name, config_data in threshold_configs.items():
|
for config_name, config_data in threshold_configs.items():
|
||||||
if config_name == "default":
|
if config_name == "default":
|
||||||
continue # already handled above
|
continue # already handled above
|
||||||
@@ -440,33 +490,61 @@ class ThresholdChecker:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
logger.info("Parsing threshold configuration: %s", config_name)
|
logger.info("Parsing threshold configuration: %s", config_name)
|
||||||
self.threshold_configs[config_name] = dict(effective_defaults)
|
|
||||||
|
|
||||||
|
# Raw overrides only (used for multi-config layering)
|
||||||
|
raw_overrides: Dict[str, ThresholdConfig] = {}
|
||||||
thresholds_config = config_data["thresholds"]
|
thresholds_config = config_data["thresholds"]
|
||||||
for plugin_name, plugin_thresholds in thresholds_config.items():
|
for plugin_name, plugin_thresholds in thresholds_config.items():
|
||||||
if not isinstance(plugin_thresholds, dict):
|
if not isinstance(plugin_thresholds, dict):
|
||||||
continue
|
continue
|
||||||
|
plugin_enabled = plugin_thresholds.get('enabled', plugin_thresholds.get('enable', True))
|
||||||
self._parse_plugin_thresholds(
|
if not plugin_enabled:
|
||||||
plugin_name,
|
# raw_overrides is empty at this point so there's nothing to delete.
|
||||||
plugin_thresholds,
|
# Instead, inject disabled stubs for every matching effective_default so
|
||||||
target_dict=self.threshold_configs[config_name]
|
# the merge step overwrites the inherited defaults.
|
||||||
|
for key, tc in effective_defaults.items():
|
||||||
|
if key.startswith(f"{plugin_name}."):
|
||||||
|
raw_overrides[key] = ThresholdConfig(
|
||||||
|
metric_path=key,
|
||||||
|
warning=tc.warning,
|
||||||
|
critical=tc.critical,
|
||||||
|
operator=tc.operator.value,
|
||||||
|
enabled=False,
|
||||||
)
|
)
|
||||||
|
logger.info(
|
||||||
|
"Plugin-level disable in config '%s': disabled all thresholds for %s",
|
||||||
|
config_name, plugin_name,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self._parse_plugin_thresholds(plugin_name, plugin_thresholds, target_dict=raw_overrides)
|
||||||
|
self.threshold_raw_configs[config_name] = raw_overrides
|
||||||
|
|
||||||
# Parse host to config mapping from two possible sources
|
# Pre-merged version (defaults + overrides) for single-config fast path
|
||||||
# 1. New format: hosts section with threshold_config attribute
|
self.threshold_configs[config_name] = dict(effective_defaults)
|
||||||
|
self.threshold_configs[config_name].update(raw_overrides)
|
||||||
|
|
||||||
|
# Parse host → config list mapping from two possible sources
|
||||||
|
|
||||||
|
def _normalise(value) -> List[str]:
|
||||||
|
"""Accept a string or list; always return a list."""
|
||||||
|
if isinstance(value, list):
|
||||||
|
return [str(v) for v in value]
|
||||||
|
return [str(value)]
|
||||||
|
|
||||||
|
# 1. hosts section with threshold_config attribute (string or list)
|
||||||
if "hosts" in config:
|
if "hosts" in config:
|
||||||
hosts_config = config["hosts"]
|
hosts_config = config["hosts"]
|
||||||
if isinstance(hosts_config, dict):
|
if isinstance(hosts_config, dict):
|
||||||
for host_name, host_attrs in hosts_config.items():
|
for host_name, host_attrs in hosts_config.items():
|
||||||
if isinstance(host_attrs, dict) and "threshold_config" in host_attrs:
|
if isinstance(host_attrs, dict) and "threshold_config" in host_attrs:
|
||||||
self.host_config_mapping[host_name] = host_attrs["threshold_config"]
|
self.host_config_mapping[host_name] = _normalise(host_attrs["threshold_config"])
|
||||||
|
|
||||||
# 2. Legacy format: host_threshold_mapping section (for backward compatibility)
|
# 2. Legacy host_threshold_mapping section (string values only)
|
||||||
if "host_threshold_mapping" in config:
|
if "host_threshold_mapping" in config:
|
||||||
legacy_mapping = config.get("host_threshold_mapping", {})
|
legacy_mapping = config.get("host_threshold_mapping", {})
|
||||||
if isinstance(legacy_mapping, dict):
|
if isinstance(legacy_mapping, dict):
|
||||||
self.host_config_mapping.update(legacy_mapping)
|
for host_name, value in legacy_mapping.items():
|
||||||
|
self.host_config_mapping[host_name] = _normalise(value)
|
||||||
|
|
||||||
# Set default config (first one alphabetically or explicitly set)
|
# Set default config (first one alphabetically or explicitly set)
|
||||||
self.default_config = config.get("default_threshold_config", "default")
|
self.default_config = config.get("default_threshold_config", "default")
|
||||||
@@ -516,14 +594,26 @@ class ThresholdChecker:
|
|||||||
self._parse_rtt_thresholds(thresholds, target_dict)
|
self._parse_rtt_thresholds(thresholds, target_dict)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Plugin-level enabled: false (also accept 'enable' as a common typo) removes all
|
||||||
|
# thresholds for this plugin — e.g. memory_monitor: {enabled: false}.
|
||||||
|
plugin_enabled = thresholds.get('enabled', thresholds.get('enable', True))
|
||||||
|
if not plugin_enabled:
|
||||||
|
for key in [k for k in target_dict if k.startswith(f"{plugin_name}.")]:
|
||||||
|
del target_dict[key]
|
||||||
|
logger.info("Plugin-level disable: removed all thresholds for %s", plugin_name)
|
||||||
|
return
|
||||||
|
|
||||||
for metric_name, threshold_config in thresholds.items():
|
for metric_name, threshold_config in thresholds.items():
|
||||||
if not isinstance(threshold_config, dict):
|
if not isinstance(threshold_config, dict):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Handle nested metrics (e.g., partitions./.percent)
|
# Handle nested metrics (e.g., partitions./.percent or pools.*.status)
|
||||||
if metric_name == "partitions":
|
if metric_name == "partitions":
|
||||||
self._parse_partition_thresholds(plugin_name, threshold_config, target_dict)
|
self._parse_partition_thresholds(plugin_name, threshold_config, target_dict)
|
||||||
continue
|
continue
|
||||||
|
if metric_name == "pools":
|
||||||
|
self._parse_pool_thresholds(plugin_name, threshold_config, target_dict)
|
||||||
|
continue
|
||||||
|
|
||||||
metric_path = f"{plugin_name}.{metric_name}"
|
metric_path = f"{plugin_name}.{metric_name}"
|
||||||
|
|
||||||
@@ -531,11 +621,15 @@ class ThresholdChecker:
|
|||||||
warning = threshold_config.get("warning")
|
warning = threshold_config.get("warning")
|
||||||
critical = threshold_config.get("critical")
|
critical = threshold_config.get("critical")
|
||||||
operator = threshold_config.get("operator", ">")
|
operator = threshold_config.get("operator", ">")
|
||||||
display = threshold_config.get("display", "(threshold: {op_symbol} {threshold_value})")
|
# Nagios operator maps exit codes directly; no numeric thresholds needed
|
||||||
hysteresis = threshold_config.get("hysteresis", 0.1) # 10% default
|
is_nagios_op = (operator == "nagios")
|
||||||
|
default_display = "{check_name}: {output}" if is_nagios_op else "(threshold: {op_symbol} {threshold_value})"
|
||||||
|
display = threshold_config.get("display", default_display)
|
||||||
|
hysteresis = threshold_config.get("hysteresis", 0.0 if is_nagios_op else 0.02)
|
||||||
enabled = threshold_config.get("enabled", True)
|
enabled = threshold_config.get("enabled", True)
|
||||||
|
grace = threshold_config.get("grace", None)
|
||||||
|
|
||||||
if warning is None and critical is None:
|
if warning is None and critical is None and not is_nagios_op:
|
||||||
logger.warning("No thresholds defined for %s, skipping", metric_path)
|
logger.warning("No thresholds defined for %s, skipping", metric_path)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -546,7 +640,8 @@ class ThresholdChecker:
|
|||||||
operator=operator,
|
operator=operator,
|
||||||
hysteresis=hysteresis,
|
hysteresis=hysteresis,
|
||||||
enabled=enabled,
|
enabled=enabled,
|
||||||
display=display
|
display=display,
|
||||||
|
grace=grace,
|
||||||
)
|
)
|
||||||
|
|
||||||
target_dict[metric_path] = threshold
|
target_dict[metric_path] = threshold
|
||||||
@@ -591,6 +686,7 @@ class ThresholdChecker:
|
|||||||
hysteresis = threshold_config.get("hysteresis", 0.1)
|
hysteresis = threshold_config.get("hysteresis", 0.1)
|
||||||
enabled = threshold_config.get("enabled", True)
|
enabled = threshold_config.get("enabled", True)
|
||||||
display = threshold_config.get("display")
|
display = threshold_config.get("display")
|
||||||
|
grace = threshold_config.get("grace", None)
|
||||||
if warning is None and critical is None:
|
if warning is None and critical is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -601,11 +697,64 @@ class ThresholdChecker:
|
|||||||
operator=operator,
|
operator=operator,
|
||||||
hysteresis=hysteresis,
|
hysteresis=hysteresis,
|
||||||
enabled=enabled,
|
enabled=enabled,
|
||||||
display=display
|
display=display,
|
||||||
|
grace=grace,
|
||||||
)
|
)
|
||||||
|
|
||||||
target_dict[metric_path] = threshold
|
target_dict[metric_path] = threshold
|
||||||
|
|
||||||
|
def _parse_pool_thresholds(
|
||||||
|
self,
|
||||||
|
plugin_name: str,
|
||||||
|
pools: Dict[str, Any],
|
||||||
|
target_dict: Optional[Dict[str, ThresholdConfig]] = None,
|
||||||
|
):
|
||||||
|
"""Parse ZFS pool thresholds. Pool names may be literal or '*' (all pools).
|
||||||
|
|
||||||
|
Config shape::
|
||||||
|
|
||||||
|
zfs_monitor:
|
||||||
|
pools:
|
||||||
|
'*':
|
||||||
|
status:
|
||||||
|
warning: 1
|
||||||
|
critical: 2
|
||||||
|
operator: '>'
|
||||||
|
tank:
|
||||||
|
capacity:
|
||||||
|
warning: 80
|
||||||
|
critical: 90
|
||||||
|
"""
|
||||||
|
if target_dict is None:
|
||||||
|
target_dict = self.thresholds
|
||||||
|
|
||||||
|
for pool_name, metrics in pools.items():
|
||||||
|
if not isinstance(metrics, dict):
|
||||||
|
continue
|
||||||
|
for metric_name, threshold_config in metrics.items():
|
||||||
|
if not isinstance(threshold_config, dict):
|
||||||
|
continue
|
||||||
|
metric_path = f"{plugin_name}.{pool_name}.{metric_name}"
|
||||||
|
warning = threshold_config.get("warning")
|
||||||
|
critical = threshold_config.get("critical")
|
||||||
|
operator = threshold_config.get("operator", ">")
|
||||||
|
hysteresis = threshold_config.get("hysteresis", 0.02)
|
||||||
|
enabled = threshold_config.get("enabled", True)
|
||||||
|
display = threshold_config.get("display")
|
||||||
|
grace = threshold_config.get("grace", None)
|
||||||
|
if warning is None and critical is None:
|
||||||
|
continue
|
||||||
|
target_dict[metric_path] = ThresholdConfig(
|
||||||
|
metric_path=metric_path,
|
||||||
|
warning=warning,
|
||||||
|
critical=critical,
|
||||||
|
operator=operator,
|
||||||
|
hysteresis=hysteresis,
|
||||||
|
enabled=enabled,
|
||||||
|
display=display,
|
||||||
|
grace=grace,
|
||||||
|
)
|
||||||
|
|
||||||
def _parse_rtt_thresholds(
|
def _parse_rtt_thresholds(
|
||||||
self,
|
self,
|
||||||
rtt_thresholds: Dict[str, Any],
|
rtt_thresholds: Dict[str, Any],
|
||||||
@@ -635,10 +784,11 @@ class ThresholdChecker:
|
|||||||
warning = rtt_thresholds.get("warning")
|
warning = rtt_thresholds.get("warning")
|
||||||
critical = rtt_thresholds.get("critical")
|
critical = rtt_thresholds.get("critical")
|
||||||
operator = rtt_thresholds.get("operator", ">")
|
operator = rtt_thresholds.get("operator", ">")
|
||||||
hysteresis = rtt_thresholds.get("hysteresis", 0.1) # 10% default
|
hysteresis = rtt_thresholds.get("hysteresis", 0.02) # 2% default
|
||||||
enabled = rtt_thresholds.get("enabled", True)
|
enabled = rtt_thresholds.get("enabled", True)
|
||||||
display = rtt_thresholds.get("display")
|
display = rtt_thresholds.get("display")
|
||||||
count = rtt_thresholds.get("count", 1)
|
count = rtt_thresholds.get("count", 1)
|
||||||
|
grace = rtt_thresholds.get("grace", None)
|
||||||
|
|
||||||
if warning is None and critical is None:
|
if warning is None and critical is None:
|
||||||
logger.warning("No RTT thresholds defined, skipping")
|
logger.warning("No RTT thresholds defined, skipping")
|
||||||
@@ -653,6 +803,7 @@ class ThresholdChecker:
|
|||||||
enabled=enabled,
|
enabled=enabled,
|
||||||
display=display,
|
display=display,
|
||||||
count=count,
|
count=count,
|
||||||
|
grace=grace,
|
||||||
)
|
)
|
||||||
|
|
||||||
target_dict[metric_path] = threshold
|
target_dict[metric_path] = threshold
|
||||||
@@ -664,7 +815,10 @@ class ThresholdChecker:
|
|||||||
)
|
)
|
||||||
|
|
||||||
def get_thresholds_for_host(self, host_name: str) -> Dict[str, ThresholdConfig]:
|
def get_thresholds_for_host(self, host_name: str) -> Dict[str, ThresholdConfig]:
|
||||||
"""Get the appropriate threshold configuration for a host.
|
"""Get the effective threshold configuration for a host.
|
||||||
|
|
||||||
|
When threshold_config is a list, configs are applied left-to-right on top
|
||||||
|
of the default thresholds so earlier entries can be overridden by later ones.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
host_name: Name of the host
|
host_name: Name of the host
|
||||||
@@ -676,23 +830,40 @@ class ThresholdChecker:
|
|||||||
if self.thresholds and not self.threshold_configs:
|
if self.thresholds and not self.threshold_configs:
|
||||||
return self.thresholds
|
return self.thresholds
|
||||||
|
|
||||||
# Multi-config mode: look up host-specific configuration
|
if not self.threshold_configs:
|
||||||
if self.threshold_configs:
|
return {}
|
||||||
config_name = self.host_config_mapping.get(host_name, self.default_config)
|
|
||||||
|
|
||||||
if config_name in self.threshold_configs:
|
config_names = self.host_config_mapping.get(host_name)
|
||||||
return self.threshold_configs[config_name]
|
|
||||||
else:
|
# No host-specific mapping → return pre-merged default
|
||||||
|
if not config_names:
|
||||||
|
return self.threshold_configs.get(self.default_config, {})
|
||||||
|
|
||||||
|
# Single config → fast path using pre-merged copy
|
||||||
|
if len(config_names) == 1:
|
||||||
|
name = config_names[0]
|
||||||
|
if name in self.threshold_configs:
|
||||||
|
return self.threshold_configs[name]
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Threshold config '%s' not found for host '%s', using default '%s'",
|
"Threshold config '%s' not found for host '%s', using default '%s'",
|
||||||
config_name,
|
name, host_name, self.default_config,
|
||||||
host_name,
|
|
||||||
self.default_config
|
|
||||||
)
|
)
|
||||||
return self.threshold_configs.get(self.default_config, {})
|
return self.threshold_configs.get(self.default_config, {})
|
||||||
|
|
||||||
# No thresholds configured
|
# Multiple configs → start from defaults, layer raw overrides in order
|
||||||
return {}
|
result = dict(self.threshold_configs.get(self.default_config, {}))
|
||||||
|
for name in config_names:
|
||||||
|
if name == self.default_config:
|
||||||
|
continue # defaults already the base
|
||||||
|
raw = self.threshold_raw_configs.get(name)
|
||||||
|
if raw is None:
|
||||||
|
logger.warning(
|
||||||
|
"Threshold config '%s' not found for host '%s', skipping",
|
||||||
|
name, host_name,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
result.update(raw)
|
||||||
|
return result
|
||||||
|
|
||||||
def check_value(
|
def check_value(
|
||||||
self,
|
self,
|
||||||
@@ -760,6 +931,12 @@ class ThresholdChecker:
|
|||||||
elif new_level == AlertLevel.WARNING and threshold.warning is not None:
|
elif new_level == AlertLevel.WARNING and threshold.warning is not None:
|
||||||
threshold_value = threshold.warning
|
threshold_value = threshold.warning
|
||||||
|
|
||||||
|
# Keep hysteresis on the state so the UI can show the recovery threshold
|
||||||
|
if new_level != AlertLevel.OK:
|
||||||
|
alert_state.hysteresis = threshold.hysteresis
|
||||||
|
else:
|
||||||
|
alert_state.hysteresis = None
|
||||||
|
|
||||||
# Update state and check for changes
|
# Update state and check for changes
|
||||||
old_level = alert_state.level
|
old_level = alert_state.level
|
||||||
if alert_state.update(new_level, value, threshold_value, threshold.operator.value):
|
if alert_state.update(new_level, value, threshold_value, threshold.operator.value):
|
||||||
@@ -769,6 +946,36 @@ class ThresholdChecker:
|
|||||||
self._check_pending_or_renotify(host_name, alert_state, metric_path, value, threshold, None)
|
self._check_pending_or_renotify(host_name, alert_state, metric_path, value, threshold, None)
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
def _find_threshold(
|
||||||
|
self, thresholds: Dict[str, "ThresholdConfig"], metric_path: str
|
||||||
|
) -> Tuple[Optional["ThresholdConfig"], Optional[str]]:
|
||||||
|
"""Return (threshold, check_name) for *metric_path*, falling back to suffix matches.
|
||||||
|
|
||||||
|
Allows generic thresholds like ``nagios_runner.status_code`` to match
|
||||||
|
fully-qualified paths like ``nagios_runner.check_disk_root_status_code``.
|
||||||
|
The exact match is always tried first; then successive leading
|
||||||
|
underscore-delimited segments are stripped from the field name until
|
||||||
|
a match is found or no segments remain.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(ThresholdConfig, None) for an exact match.
|
||||||
|
(ThresholdConfig, "check_disk_root") for a suffix match — the second
|
||||||
|
element is the stripped prefix, available as ``{check_name}`` in
|
||||||
|
display format templates.
|
||||||
|
(None, None) when no threshold is found.
|
||||||
|
"""
|
||||||
|
if metric_path in thresholds:
|
||||||
|
return thresholds[metric_path], None
|
||||||
|
plugin, sep, field = metric_path.partition(".")
|
||||||
|
if not sep:
|
||||||
|
return None, None
|
||||||
|
parts = field.split("_")
|
||||||
|
for i in range(1, len(parts)):
|
||||||
|
candidate = plugin + "." + "_".join(parts[i:])
|
||||||
|
if candidate in thresholds:
|
||||||
|
return thresholds[candidate], "_".join(parts[:i])
|
||||||
|
return None, None
|
||||||
|
|
||||||
def check_plugin_data(
|
def check_plugin_data(
|
||||||
self,
|
self,
|
||||||
host_name: str,
|
host_name: str,
|
||||||
@@ -797,11 +1004,10 @@ class ThresholdChecker:
|
|||||||
for metric_name, value in data.items():
|
for metric_name, value in data.items():
|
||||||
metric_path = f"{plugin_name}.{metric_name}"
|
metric_path = f"{plugin_name}.{metric_name}"
|
||||||
|
|
||||||
if metric_path not in thresholds:
|
threshold, check_name = self._find_threshold(thresholds, metric_path)
|
||||||
|
if threshold is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
threshold = thresholds[metric_path]
|
|
||||||
|
|
||||||
# Get or create alert state
|
# Get or create alert state
|
||||||
if metric_path not in alert_states:
|
if metric_path not in alert_states:
|
||||||
alert_states[metric_path] = AlertState(metric_path)
|
alert_states[metric_path] = AlertState(metric_path)
|
||||||
@@ -821,13 +1027,15 @@ class ThresholdChecker:
|
|||||||
elif new_level == AlertLevel.WARNING and threshold.warning is not None:
|
elif new_level == AlertLevel.WARNING and threshold.warning is not None:
|
||||||
threshold_value = threshold.warning
|
threshold_value = threshold.warning
|
||||||
|
|
||||||
|
alert_state.hysteresis = threshold.hysteresis if new_level != AlertLevel.OK else None
|
||||||
|
|
||||||
# Update state and check for changes
|
# Update state and check for changes
|
||||||
old_level = alert_state.level
|
old_level = alert_state.level
|
||||||
if alert_state.update(new_level, value, threshold_value, threshold.operator.value):
|
if alert_state.update(new_level, value, threshold_value, threshold.operator.value):
|
||||||
state_changes.append((metric_path, old_level, new_level, value))
|
state_changes.append((metric_path, old_level, new_level, value))
|
||||||
self._apply_grace(host_name, alert_state, metric_path, old_level, new_level, value, threshold, data)
|
self._apply_grace(host_name, alert_state, metric_path, old_level, new_level, value, threshold, data, check_name=check_name, metric_name=metric_name)
|
||||||
elif new_level != AlertLevel.OK:
|
elif new_level != AlertLevel.OK:
|
||||||
self._check_pending_or_renotify(host_name, alert_state, metric_path, value, threshold, data)
|
self._check_pending_or_renotify(host_name, alert_state, metric_path, value, threshold, data, check_name=check_name, metric_name=metric_name)
|
||||||
|
|
||||||
# Check nested metrics (e.g., partition data in disk_monitor)
|
# Check nested metrics (e.g., partition data in disk_monitor)
|
||||||
self._check_nested_metrics(
|
self._check_nested_metrics(
|
||||||
@@ -852,6 +1060,44 @@ class ThresholdChecker:
|
|||||||
# Get host-specific thresholds
|
# Get host-specific thresholds
|
||||||
thresholds = self.get_thresholds_for_host(host_name)
|
thresholds = self.get_thresholds_for_host(host_name)
|
||||||
|
|
||||||
|
# ZFS pool health checks
|
||||||
|
if plugin_name == "zfs_monitor" and "pools" in data:
|
||||||
|
pools = data["pools"]
|
||||||
|
if isinstance(pools, dict):
|
||||||
|
for pool_name, pool_metrics in pools.items():
|
||||||
|
if not isinstance(pool_metrics, dict):
|
||||||
|
continue
|
||||||
|
# Synthesize status from health string for older clients
|
||||||
|
# that predate the status field.
|
||||||
|
pool_metrics_effective = dict(pool_metrics)
|
||||||
|
if "health" in pool_metrics and "status" not in pool_metrics:
|
||||||
|
pool_metrics_effective["status"] = 0 if pool_metrics["health"] == "ONLINE" else 1
|
||||||
|
for metric_name, value in pool_metrics_effective.items():
|
||||||
|
# Try specific pool name first, then wildcard '*'
|
||||||
|
metric_path = f"{plugin_name}.{pool_name}.{metric_name}"
|
||||||
|
wildcard_path = f"{plugin_name}.*.{metric_name}"
|
||||||
|
threshold = thresholds.get(metric_path) or thresholds.get(wildcard_path)
|
||||||
|
if threshold is None:
|
||||||
|
continue
|
||||||
|
if metric_path not in alert_states:
|
||||||
|
alert_states[metric_path] = AlertState(metric_path)
|
||||||
|
alert_state = alert_states[metric_path]
|
||||||
|
new_level = threshold.evaluate_with_hysteresis(value, alert_state.level)
|
||||||
|
threshold_value = None
|
||||||
|
if new_level == AlertLevel.CRITICAL and threshold.critical is not None:
|
||||||
|
threshold_value = threshold.critical
|
||||||
|
elif new_level == AlertLevel.WARNING and threshold.warning is not None:
|
||||||
|
threshold_value = threshold.warning
|
||||||
|
alert_state.hysteresis = threshold.hysteresis if new_level != AlertLevel.OK else None
|
||||||
|
pool_context = dict(pool_metrics_effective)
|
||||||
|
pool_context["pool_name"] = pool_name
|
||||||
|
old_level = alert_state.level
|
||||||
|
if alert_state.update(new_level, value, threshold_value, threshold.operator.value):
|
||||||
|
state_changes.append((metric_path, old_level, new_level, value))
|
||||||
|
self._apply_grace(host_name, alert_state, metric_path, old_level, new_level, value, threshold, pool_context, metric_name=pool_name)
|
||||||
|
elif new_level != AlertLevel.OK:
|
||||||
|
self._check_pending_or_renotify(host_name, alert_state, metric_path, value, threshold, pool_context, metric_name=pool_name)
|
||||||
|
|
||||||
# Look for partition data in disk_monitor
|
# Look for partition data in disk_monitor
|
||||||
if plugin_name == "disk_monitor" and "partitions" in data:
|
if plugin_name == "disk_monitor" and "partitions" in data:
|
||||||
partitions = data["partitions"]
|
partitions = data["partitions"]
|
||||||
@@ -887,6 +1133,8 @@ class ThresholdChecker:
|
|||||||
elif new_level == AlertLevel.WARNING and threshold.warning is not None:
|
elif new_level == AlertLevel.WARNING and threshold.warning is not None:
|
||||||
threshold_value = threshold.warning
|
threshold_value = threshold.warning
|
||||||
|
|
||||||
|
alert_state.hysteresis = threshold.hysteresis if new_level != AlertLevel.OK else None
|
||||||
|
|
||||||
old_level = alert_state.level
|
old_level = alert_state.level
|
||||||
if alert_state.update(new_level, value, threshold_value, threshold.operator.value):
|
if alert_state.update(new_level, value, threshold_value, threshold.operator.value):
|
||||||
state_changes.append((metric_path, old_level, new_level, value))
|
state_changes.append((metric_path, old_level, new_level, value))
|
||||||
@@ -903,6 +1151,8 @@ class ThresholdChecker:
|
|||||||
value: Any,
|
value: Any,
|
||||||
threshold: ThresholdConfig,
|
threshold: ThresholdConfig,
|
||||||
plugin_data: Optional[Dict[str, Any]] = None,
|
plugin_data: Optional[Dict[str, Any]] = None,
|
||||||
|
check_name: Optional[str] = None,
|
||||||
|
metric_name: Optional[str] = None,
|
||||||
):
|
):
|
||||||
"""Trigger a notification for an alert state change.
|
"""Trigger a notification for an alert state change.
|
||||||
|
|
||||||
@@ -925,54 +1175,52 @@ class ThresholdChecker:
|
|||||||
# Format operator symbol
|
# Format operator symbol
|
||||||
op_symbol = threshold.operator.value
|
op_symbol = threshold.operator.value
|
||||||
|
|
||||||
|
# Short metric label: strip the plugin-name prefix and _status_code suffix
|
||||||
|
short_path = (metric_path.partition(".")[2] or metric_path).removesuffix("_status_code")
|
||||||
|
|
||||||
# Use a display-friendly value (inf is the sentinel for "overdue")
|
# Use a display-friendly value (inf is the sentinel for "overdue")
|
||||||
import math
|
import math
|
||||||
display_value = "overdue" if isinstance(value, float) and math.isinf(value) else value
|
display_value = "overdue" if isinstance(value, float) and math.isinf(value) else value
|
||||||
|
|
||||||
# Format message
|
# Format message — for the nagios operator there is no numeric threshold_value;
|
||||||
|
# render the display template whenever one is available.
|
||||||
|
has_display = threshold_value is not None or threshold.operator == ComparisonOperator.NAGIOS
|
||||||
|
|
||||||
|
def _fmt():
|
||||||
|
return self._format_display(
|
||||||
|
threshold.display,
|
||||||
|
value=display_value,
|
||||||
|
threshold_value=threshold_value,
|
||||||
|
op_symbol=op_symbol,
|
||||||
|
plugin_data=plugin_data,
|
||||||
|
check_name=check_name,
|
||||||
|
metric_name=metric_name,
|
||||||
|
)
|
||||||
|
|
||||||
if new_level == AlertLevel.OK:
|
if new_level == AlertLevel.OK:
|
||||||
lvl = "RECOVER"
|
lvl = "RECOVER"
|
||||||
message = f"{metric_path} = {display_value} ({old_level.name} -> OK)"
|
message = f"{short_path} = {display_value} ({old_level.name} -> OK)"
|
||||||
elif new_level == AlertLevel.WARNING:
|
elif new_level == AlertLevel.WARNING:
|
||||||
lvl = "WARNING"
|
lvl = "WARNING"
|
||||||
if threshold_value is not None:
|
if has_display:
|
||||||
threshold_info = self._format_display(
|
message = f"{short_path} = {display_value} {_fmt()}"
|
||||||
threshold.display,
|
|
||||||
value=display_value,
|
|
||||||
threshold_value=threshold_value,
|
|
||||||
op_symbol=op_symbol,
|
|
||||||
plugin_data=plugin_data
|
|
||||||
)
|
|
||||||
message = f"{metric_path} = {display_value} {threshold_info}"
|
|
||||||
else:
|
else:
|
||||||
message = f"{metric_path} = {display_value}"
|
message = f"{short_path} = {display_value}"
|
||||||
elif new_level == AlertLevel.CRITICAL:
|
elif new_level == AlertLevel.CRITICAL:
|
||||||
lvl = "CRITICAL"
|
lvl = "CRITICAL"
|
||||||
if threshold_value is not None:
|
if has_display:
|
||||||
threshold_info = self._format_display(
|
message = f"{short_path} = {display_value} {_fmt()}"
|
||||||
threshold.display,
|
|
||||||
value=display_value,
|
|
||||||
threshold_value=threshold_value,
|
|
||||||
op_symbol=op_symbol,
|
|
||||||
plugin_data=plugin_data
|
|
||||||
)
|
|
||||||
message = f"{metric_path} = {display_value} {threshold_info}"
|
|
||||||
else:
|
else:
|
||||||
message = f"{metric_path} = {display_value}"
|
message = f"{short_path} = {display_value}"
|
||||||
else:
|
else:
|
||||||
lvl = "UNKNOWN"
|
lvl = "UNKNOWN"
|
||||||
message = f"{metric_path} = {display_value}"
|
if has_display:
|
||||||
|
message = f"{short_path} = {display_value} {_fmt()}"
|
||||||
|
else:
|
||||||
|
message = f"{short_path} = {display_value}"
|
||||||
|
|
||||||
# Return the formatted threshold info for storing in AlertState
|
# Formatted threshold info stored on AlertState for the UI
|
||||||
formatted_threshold_msg = None
|
formatted_threshold_msg = _fmt() if has_display and new_level != AlertLevel.OK else None
|
||||||
if threshold_value is not None and new_level != AlertLevel.OK:
|
|
||||||
formatted_threshold_msg = self._format_display(
|
|
||||||
threshold.display,
|
|
||||||
value=display_value,
|
|
||||||
threshold_value=threshold_value,
|
|
||||||
op_symbol=op_symbol,
|
|
||||||
plugin_data=plugin_data
|
|
||||||
)
|
|
||||||
|
|
||||||
return lvl, message, formatted_threshold_msg
|
return lvl, message, formatted_threshold_msg
|
||||||
|
|
||||||
@@ -987,23 +1235,28 @@ class ThresholdChecker:
|
|||||||
value: Any,
|
value: Any,
|
||||||
):
|
):
|
||||||
"""Send notification and log to journal/eventlog."""
|
"""Send notification and log to journal/eventlog."""
|
||||||
try:
|
from . import hbdclass
|
||||||
notify_mod.send_notification(
|
host = hbdclass.Host.hosts.get(host_name)
|
||||||
|
if host is not None and not host.watched:
|
||||||
|
eventlog(host_name, lvl, message, service="threshold")
|
||||||
|
return
|
||||||
|
short_path = (metric_path.partition(".")[2] or metric_path).removesuffix("_status_code")
|
||||||
|
title = f"[{lvl}] {host_name} {short_path}"
|
||||||
|
# Strip the "metric = " prefix from message so body is just the value/detail
|
||||||
|
prefix = short_path + " = "
|
||||||
|
body = message[len(prefix):] if message.startswith(prefix) else message
|
||||||
|
asyncio.get_event_loop().create_task(notify_mod.send_notification(
|
||||||
host_name,
|
host_name,
|
||||||
notify_mod.Notification(
|
notify_mod.Notification(
|
||||||
title=f"[{lvl}] {host_name}",
|
title=title,
|
||||||
body=message,
|
body=body,
|
||||||
level=lvl,
|
level=lvl,
|
||||||
),
|
),
|
||||||
)
|
))
|
||||||
logger.info("Notification sent: %s", message)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error("Failed to send notification: %s", e)
|
|
||||||
|
|
||||||
# Log to journal
|
# Log to journal
|
||||||
if self.journal is not None:
|
if self.journal is not None:
|
||||||
try:
|
try:
|
||||||
import asyncio
|
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
loop.create_task(self.journal.log_threshold_event(
|
loop.create_task(self.journal.log_threshold_event(
|
||||||
host_name=host_name,
|
host_name=host_name,
|
||||||
@@ -1021,33 +1274,62 @@ class ThresholdChecker:
|
|||||||
self,
|
self,
|
||||||
display_format: str,
|
display_format: str,
|
||||||
value: Any,
|
value: Any,
|
||||||
threshold_value: float,
|
threshold_value: Optional[float],
|
||||||
op_symbol: str,
|
op_symbol: str,
|
||||||
plugin_data: Optional[Dict[str, Any]] = None,
|
plugin_data: Optional[Dict[str, Any]] = None,
|
||||||
|
check_name: Optional[str] = None,
|
||||||
|
metric_name: Optional[str] = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Format the display string using available data.
|
"""Format the display string using available data.
|
||||||
|
|
||||||
Args:
|
Available template variables:
|
||||||
display_format: Format string from threshold config
|
{value} - current metric value
|
||||||
value: Current metric value
|
{threshold_value} - threshold that was exceeded
|
||||||
threshold_value: Threshold value that was exceeded
|
{op_symbol} - comparison operator (>, <, >=, <=, ==, !=)
|
||||||
op_symbol: Comparison operator symbol
|
{check_name} - prefix stripped for generic threshold match
|
||||||
plugin_data: Optional dictionary of plugin data fields
|
(e.g. "check_disk_root" when metric
|
||||||
|
"check_disk_root_status_code" matched generic
|
||||||
|
threshold "status_code")
|
||||||
|
{metric_name} - field name within the plugin data dict
|
||||||
|
Any key from plugin_data is also available.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Formatted display string
|
Formatted display string
|
||||||
"""
|
"""
|
||||||
|
if not display_format:
|
||||||
|
display_format = "(threshold: {op_symbol} {threshold_value})" if threshold_value is not None else ""
|
||||||
|
|
||||||
# Build format context with standard variables
|
# Build format context with standard variables
|
||||||
format_context = {
|
format_context = {
|
||||||
'value': value,
|
'value': value,
|
||||||
'threshold_value': threshold_value,
|
|
||||||
'op_symbol': op_symbol,
|
'op_symbol': op_symbol,
|
||||||
}
|
}
|
||||||
|
if threshold_value is not None:
|
||||||
|
format_context['threshold_value'] = threshold_value
|
||||||
|
|
||||||
|
# Add generic-match context variables when available
|
||||||
|
if check_name is not None:
|
||||||
|
format_context['check_name'] = check_name
|
||||||
|
if metric_name is not None:
|
||||||
|
format_context['metric_name'] = metric_name
|
||||||
|
|
||||||
# Add all plugin data fields if available
|
# Add all plugin data fields if available
|
||||||
if plugin_data:
|
if plugin_data:
|
||||||
format_context.update(plugin_data)
|
format_context.update(plugin_data)
|
||||||
|
|
||||||
|
# For nagios_runner generic matches, expose the matched check's output
|
||||||
|
# and status as short aliases {output} and {status} so display templates
|
||||||
|
# don't need to use the full {check_disk_root_output} form.
|
||||||
|
if check_name and plugin_data:
|
||||||
|
if 'output' not in format_context:
|
||||||
|
output = plugin_data.get(f"{check_name}_output")
|
||||||
|
if output is not None:
|
||||||
|
format_context['output'] = output
|
||||||
|
if 'status' not in format_context:
|
||||||
|
status = plugin_data.get(f"{check_name}_status")
|
||||||
|
if status is not None:
|
||||||
|
format_context['status'] = status
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Format the display string
|
# Format the display string
|
||||||
return display_format.format(**format_context)
|
return display_format.format(**format_context)
|
||||||
@@ -1077,17 +1359,26 @@ class ThresholdChecker:
|
|||||||
value: Any,
|
value: Any,
|
||||||
threshold: ThresholdConfig,
|
threshold: ThresholdConfig,
|
||||||
plugin_data: Optional[Dict[str, Any]],
|
plugin_data: Optional[Dict[str, Any]],
|
||||||
|
check_name: Optional[str] = None,
|
||||||
|
metric_name: Optional[str] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Handle a state-change transition with grace-period logic.
|
"""Handle a state-change transition with grace-period logic.
|
||||||
|
|
||||||
Transitioning INTO alert: defers the notification for grace_seconds.
|
Transitioning INTO alert (worsening): defers the notification for the effective
|
||||||
|
grace period (threshold.grace if set, else self.grace_seconds). Grace of 0 fires
|
||||||
|
the notification immediately with no deferral.
|
||||||
|
De-escalation within alert states (e.g. CRITICAL→WARNING): no new notification;
|
||||||
|
the metric is still alerting so no RECOVER was sent.
|
||||||
Transitioning TO OK:
|
Transitioning TO OK:
|
||||||
- Still in grace window (pending_since set): suppresses both the alert
|
- Still in grace window (pending_since set): suppresses both the alert
|
||||||
and the recovery — the spike never warranted a page.
|
and the recovery — the spike never warranted a page.
|
||||||
- Past grace: fires the RECOVER notification normally.
|
- Past grace: fires the RECOVER notification normally.
|
||||||
"""
|
"""
|
||||||
|
effective_grace = threshold.grace if threshold.grace is not None else self.grace_seconds
|
||||||
|
|
||||||
lvl, message, formatted_msg = self._trigger_notification(
|
lvl, message, formatted_msg = self._trigger_notification(
|
||||||
host_name, metric_path, old_level, new_level, value, threshold, plugin_data
|
host_name, metric_path, old_level, new_level, value, threshold, plugin_data,
|
||||||
|
check_name=check_name, metric_name=metric_name,
|
||||||
)
|
)
|
||||||
alert_state.formatted_message = formatted_msg
|
alert_state.formatted_message = formatted_msg
|
||||||
|
|
||||||
@@ -1095,16 +1386,31 @@ class ThresholdChecker:
|
|||||||
if alert_state.pending_since is not None:
|
if alert_state.pending_since is not None:
|
||||||
logger.info(
|
logger.info(
|
||||||
"Alert suppressed (recovered within %.0fs grace): %s on %s",
|
"Alert suppressed (recovered within %.0fs grace): %s on %s",
|
||||||
self.grace_seconds, metric_path, host_name,
|
effective_grace, metric_path, host_name,
|
||||||
)
|
)
|
||||||
alert_state.pending_since = None
|
alert_state.pending_since = None
|
||||||
else:
|
else:
|
||||||
self._send_notification(host_name, lvl, message, metric_path, old_level, new_level, value)
|
self._send_notification(host_name, lvl, message, metric_path, old_level, new_level, value)
|
||||||
|
elif new_level.value > old_level.value:
|
||||||
|
# Worsening (OK→WARNING, OK→CRITICAL, WARNING→CRITICAL).
|
||||||
|
if effective_grace <= 0:
|
||||||
|
# No grace period — fire immediately.
|
||||||
|
self._send_notification(host_name, lvl, message, metric_path, old_level, new_level, value)
|
||||||
|
now = time.time()
|
||||||
|
alert_state.last_notification = now
|
||||||
|
alert_state.notification_count = 1
|
||||||
else:
|
else:
|
||||||
alert_state.pending_since = time.time()
|
alert_state.pending_since = time.time()
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"Alert deferred (%.0fs grace): %s on %s = %s",
|
"Alert deferred (%.0fs grace): %s on %s = %s",
|
||||||
self.grace_seconds, metric_path, host_name, value,
|
effective_grace, metric_path, host_name, value,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# De-escalation within alert states (e.g. CRITICAL→WARNING): metric is still
|
||||||
|
# alerting but did not recover, so no new notification.
|
||||||
|
logger.debug(
|
||||||
|
"De-escalation %s→%s for %s on %s, no notification",
|
||||||
|
old_level.name, new_level.name, metric_path, host_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _check_pending_or_renotify(
|
def _check_pending_or_renotify(
|
||||||
@@ -1115,25 +1421,43 @@ class ThresholdChecker:
|
|||||||
value: Any,
|
value: Any,
|
||||||
threshold: ThresholdConfig,
|
threshold: ThresholdConfig,
|
||||||
plugin_data: Optional[Dict[str, Any]],
|
plugin_data: Optional[Dict[str, Any]],
|
||||||
|
check_name: Optional[str] = None,
|
||||||
|
metric_name: Optional[str] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Called when alert level is unchanged and non-OK.
|
"""Called when alert level is unchanged and non-OK.
|
||||||
|
|
||||||
If a deferred notification is pending and grace_seconds have elapsed,
|
If a deferred notification is pending and grace_seconds have elapsed,
|
||||||
fires it now. Otherwise falls through to normal reminder logic.
|
fires it now. Otherwise falls through to normal reminder logic.
|
||||||
"""
|
"""
|
||||||
|
effective_grace = threshold.grace if threshold.grace is not None else self.grace_seconds
|
||||||
if alert_state.pending_since is not None:
|
if alert_state.pending_since is not None:
|
||||||
if time.time() - alert_state.pending_since >= self.grace_seconds:
|
if time.time() - alert_state.pending_since >= effective_grace:
|
||||||
lvl, message, formatted_msg = self._trigger_notification(
|
lvl, message, formatted_msg = self._trigger_notification(
|
||||||
host_name, metric_path, AlertLevel.OK, alert_state.level, value, threshold, plugin_data
|
host_name, metric_path, AlertLevel.OK, alert_state.level, value, threshold, plugin_data,
|
||||||
|
check_name=check_name, metric_name=metric_name,
|
||||||
)
|
)
|
||||||
alert_state.formatted_message = formatted_msg
|
alert_state.formatted_message = formatted_msg
|
||||||
self._send_notification(
|
self._send_notification(
|
||||||
host_name, lvl, message, metric_path, AlertLevel.OK, alert_state.level, value
|
host_name, lvl, message, metric_path, AlertLevel.OK, alert_state.level, value
|
||||||
)
|
)
|
||||||
alert_state.pending_since = None
|
alert_state.pending_since = None
|
||||||
|
now = time.time()
|
||||||
|
alert_state.last_notification = now
|
||||||
|
alert_state.notification_count = 1
|
||||||
# else: still within grace window, do nothing
|
# else: still within grace window, do nothing
|
||||||
else:
|
else:
|
||||||
self._check_renotify(host_name, alert_state, metric_path, value, threshold, plugin_data)
|
self._check_renotify(host_name, alert_state, metric_path, value, threshold, plugin_data, check_name=check_name, metric_name=metric_name)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _human_duration(seconds: float) -> str:
|
||||||
|
s = int(seconds)
|
||||||
|
if s < 120:
|
||||||
|
return f"{s}s"
|
||||||
|
if s < 3600:
|
||||||
|
return f"{s // 60}m {s % 60}s"
|
||||||
|
h, rem = divmod(s, 3600)
|
||||||
|
m = rem // 60
|
||||||
|
return f"{h}h {m}m" if m else f"{h}h"
|
||||||
|
|
||||||
def _check_renotify(
|
def _check_renotify(
|
||||||
self,
|
self,
|
||||||
@@ -1143,6 +1467,8 @@ class ThresholdChecker:
|
|||||||
value: Any,
|
value: Any,
|
||||||
threshold: ThresholdConfig,
|
threshold: ThresholdConfig,
|
||||||
plugin_data: Optional[Dict[str, Any]] = None,
|
plugin_data: Optional[Dict[str, Any]] = None,
|
||||||
|
check_name: Optional[str] = None,
|
||||||
|
metric_name: Optional[str] = None,
|
||||||
):
|
):
|
||||||
"""Check if we should send a repeat notification.
|
"""Check if we should send a repeat notification.
|
||||||
|
|
||||||
@@ -1180,6 +1506,7 @@ class ThresholdChecker:
|
|||||||
|
|
||||||
# Format operator symbol
|
# Format operator symbol
|
||||||
op_symbol = threshold.operator.value
|
op_symbol = threshold.operator.value
|
||||||
|
short_path = (metric_path.partition(".")[2] or metric_path).removesuffix("_status_code")
|
||||||
|
|
||||||
# Time to re-notify
|
# Time to re-notify
|
||||||
if threshold_value is not None:
|
if threshold_value is not None:
|
||||||
@@ -1189,26 +1516,58 @@ class ThresholdChecker:
|
|||||||
value=value,
|
value=value,
|
||||||
threshold_value=threshold_value,
|
threshold_value=threshold_value,
|
||||||
op_symbol=op_symbol,
|
op_symbol=op_symbol,
|
||||||
plugin_data=plugin_data
|
plugin_data=plugin_data,
|
||||||
|
check_name=check_name,
|
||||||
|
metric_name=metric_name,
|
||||||
)
|
)
|
||||||
message = f"REMINDER ({alert_state.level.name}): {host_name} - {metric_path} = {value} {threshold_info}, ongoing for {int(now - alert_state.since)}s"
|
body = f"{value} {threshold_info}, ongoing for {self._human_duration(now - alert_state.since)}"
|
||||||
else:
|
else:
|
||||||
message = f"REMINDER ({alert_state.level.name}): {host_name} - {metric_path} = {value} (ongoing for {int(now - alert_state.since)}s)"
|
body = f"{value} (ongoing for {self._human_duration(now - alert_state.since)})"
|
||||||
|
message = f"REMINDER ({alert_state.level.name}): {host_name} - {short_path} = {body}"
|
||||||
|
|
||||||
try:
|
from . import hbdclass
|
||||||
notify_mod.send_notification(
|
host = hbdclass.Host.hosts.get(host_name)
|
||||||
|
if host is None or host.watched:
|
||||||
|
asyncio.get_event_loop().create_task(notify_mod.send_notification(
|
||||||
host_name,
|
host_name,
|
||||||
notify_mod.Notification(
|
notify_mod.Notification(
|
||||||
title=f"[REMINDER/{alert_state.level.name}] {host_name}",
|
title=f"[REMINDER/{alert_state.level.name}] {host_name} {short_path}",
|
||||||
body=message,
|
body=body,
|
||||||
level=alert_state.level.name,
|
level=alert_state.level.name,
|
||||||
),
|
),
|
||||||
)
|
))
|
||||||
|
logger.info("Re-notification sent: %s", message)
|
||||||
alert_state.last_notification = now
|
alert_state.last_notification = now
|
||||||
alert_state.notification_count += 1
|
alert_state.notification_count += 1
|
||||||
logger.info("Re-notification sent: %s", message)
|
|
||||||
except Exception as e:
|
def purge_stale_alerts(self, hbdclass) -> None:
|
||||||
logger.error("Failed to send re-notification: %s", e)
|
"""Remove alert states that have no matching threshold configuration.
|
||||||
|
|
||||||
|
Called after startup (pickle restore) and after each config reload so
|
||||||
|
that alerts orphaned by configuration changes do not linger forever.
|
||||||
|
Alerts whose metric_path is not present in the current threshold config
|
||||||
|
for that host are silently dropped.
|
||||||
|
"""
|
||||||
|
for hostname, host in hbdclass.Host.hosts.items():
|
||||||
|
if not host.alert_states:
|
||||||
|
continue
|
||||||
|
configured = self.get_thresholds_for_host(hostname)
|
||||||
|
stale = []
|
||||||
|
for mp in host.alert_states:
|
||||||
|
if self._find_threshold(configured, mp)[0] is not None:
|
||||||
|
continue
|
||||||
|
# Also match wildcard pool/partition thresholds (e.g. "zfs_monitor.*.status"
|
||||||
|
# covers alert state "zfs_monitor.tank.status").
|
||||||
|
parts = mp.split(".")
|
||||||
|
if len(parts) == 3 and f"{parts[0]}.*.{parts[2]}" in configured:
|
||||||
|
continue
|
||||||
|
stale.append(mp)
|
||||||
|
for mp in stale:
|
||||||
|
logger.info(
|
||||||
|
"Purging stale alert state for %s / %s (no threshold configured)",
|
||||||
|
hostname, mp,
|
||||||
|
)
|
||||||
|
del host.alert_states[mp]
|
||||||
|
|
||||||
def get_active_alerts(self, alert_states: Dict[str, AlertState]) -> list:
|
def get_active_alerts(self, alert_states: Dict[str, AlertState]) -> list:
|
||||||
"""
|
"""
|
||||||
|
|||||||
+31
-13
@@ -211,10 +211,11 @@ def _make_timer_callbacks(uname, host, ctx):
|
|||||||
connection.newstate(connection.__class__.OVERDUE, now, cfg.get("grace", 2))
|
connection.newstate(connection.__class__.OVERDUE, now, cfg.get("grace", 2))
|
||||||
msg = f"{connection.afam} overdue"
|
msg = f"{connection.afam} overdue"
|
||||||
eventlog(uname, "CRITICAL", msg)
|
eventlog(uname, "CRITICAL", msg)
|
||||||
notify_mod.send_notification(
|
if host.watched:
|
||||||
|
asyncio.create_task(notify_mod.send_notification(
|
||||||
uname,
|
uname,
|
||||||
notify_mod.Notification(title=f"[CRITICAL] {uname}", body=msg, level="CRITICAL"),
|
notify_mod.Notification(title=f"[CRITICAL] {uname}", body=msg, level="CRITICAL"),
|
||||||
)
|
))
|
||||||
# Track in alert_states so the Alerts Dashboard shows this
|
# Track in alert_states so the Alerts Dashboard shows this
|
||||||
_set_connectivity_alert(host, connection.afam, "CRITICAL")
|
_set_connectivity_alert(host, connection.afam, "CRITICAL")
|
||||||
if threshold_checker:
|
if threshold_checker:
|
||||||
@@ -335,8 +336,7 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
|||||||
# Apply user-access settings from config
|
# Apply user-access settings from config
|
||||||
access = config_mod.get_host_access(cfg, uname)
|
access = config_mod.get_host_access(cfg, uname)
|
||||||
host.apply_access(access["owner"], access["managers"], access["monitors"])
|
host.apply_access(access["owner"], access["managers"], access["monitors"])
|
||||||
if verbose:
|
logger.info("New host signed on: %s (dyn=%s, access=%s)", uname, host.dyn, access)
|
||||||
print(("XX: New host, num now %s" % (len(hbdcls.Host.hosts))))
|
|
||||||
newh = True
|
newh = True
|
||||||
else:
|
else:
|
||||||
host = hbdcls.Host.hosts[uname]
|
host = hbdcls.Host.hosts[uname]
|
||||||
@@ -350,8 +350,10 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
|||||||
|
|
||||||
if msg.get("ID") == "HTB":
|
if msg.get("ID") == "HTB":
|
||||||
host.doesack = msg.get("acks", -1)
|
host.doesack = msg.get("acks", -1)
|
||||||
# send ACK back
|
# send ACK back; ask client to resend plugin info when we have none yet
|
||||||
rmsg = {"time": time.time()}
|
rmsg = {"time": time.time()}
|
||||||
|
if not host.plugin_data:
|
||||||
|
rmsg["request_update"] = 1
|
||||||
opkt = dicttos("ACK", rmsg)
|
opkt = dicttos("ACK", rmsg)
|
||||||
try:
|
try:
|
||||||
transport.sendto(opkt, addr)
|
transport.sendto(opkt, addr)
|
||||||
@@ -368,6 +370,14 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
|||||||
if k not in ("ID", "plugin", "id", "name")}
|
if k not in ("ID", "plugin", "id", "name")}
|
||||||
# Store plugin data with timestamp
|
# Store plugin data with timestamp
|
||||||
host.add_plugin_data(plugin_name, plugin_data, timestamp=now)
|
host.add_plugin_data(plugin_name, plugin_data, timestamp=now)
|
||||||
|
|
||||||
|
# If os_info reports an owner and none is configured server-side, apply it
|
||||||
|
if plugin_name == "os_info":
|
||||||
|
config_owner = config_mod.get_host_access(cfg, uname).get("owner")
|
||||||
|
default_owner = config_mod.get_default_owner(cfg)
|
||||||
|
inferred_owner = plugin_data.get("owner", config_owner or default_owner)
|
||||||
|
host.owner = inferred_owner
|
||||||
|
logger.info(f"owner for {uname} is {host.owner}")
|
||||||
if DEBUG > 1:
|
if DEBUG > 1:
|
||||||
print(f"Stored plugin data for {uname}: {plugin_name}")
|
print(f"Stored plugin data for {uname}: {plugin_name}")
|
||||||
|
|
||||||
@@ -407,10 +417,11 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
|||||||
|
|
||||||
if res:
|
if res:
|
||||||
eventlog(uname, "WARNING", res)
|
eventlog(uname, "WARNING", res)
|
||||||
notify_mod.send_notification(
|
if host.watched:
|
||||||
|
asyncio.create_task(notify_mod.send_notification(
|
||||||
uname,
|
uname,
|
||||||
notify_mod.Notification(title=f"[WARNING] {uname}", body=res, level="WARNING"),
|
notify_mod.Notification(title=f"[WARNING] {uname}", body=res, level="WARNING"),
|
||||||
)
|
))
|
||||||
|
|
||||||
interval = int(msg.get("interval", 0) or 0)
|
interval = int(msg.get("interval", 0) or 0)
|
||||||
shutdown = msg.get("shutdown", 0)
|
shutdown = msg.get("shutdown", 0)
|
||||||
@@ -420,10 +431,11 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
|||||||
|
|
||||||
if boot:
|
if boot:
|
||||||
eventlog(uname, "INFO", "booted")
|
eventlog(uname, "INFO", "booted")
|
||||||
notify_mod.send_notification(
|
if host.watched:
|
||||||
|
asyncio.create_task(notify_mod.send_notification(
|
||||||
uname,
|
uname,
|
||||||
notify_mod.Notification(title=f"[INFO] {uname}", body=f"{host.name} booted", level="INFO"),
|
notify_mod.Notification(title=f"[INFO] {uname}", body=f"{host.name} booted", level="INFO"),
|
||||||
)
|
))
|
||||||
if message:
|
if message:
|
||||||
eventlog(uname, "INFO", "msg: %s" % message, service=service)
|
eventlog(uname, "INFO", "msg: %s" % message, service=service)
|
||||||
|
|
||||||
@@ -437,13 +449,18 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
|||||||
if not newh:
|
if not newh:
|
||||||
if d == 0 or lasts == "unknown":
|
if d == 0 or lasts == "unknown":
|
||||||
m = "%s is up" % (conn.afam)
|
m = "%s is up" % (conn.afam)
|
||||||
|
elif d < 4:
|
||||||
|
# Transient blip (likely client restart) — skip log and notification
|
||||||
|
m = None
|
||||||
else:
|
else:
|
||||||
m = "%s back after being %s for %s" % (conn.afam, lasts, dur(d))
|
m = "%s back after being %s for %s" % (conn.afam, lasts, dur(d))
|
||||||
|
if m:
|
||||||
eventlog(uname, "RECOVER", m)
|
eventlog(uname, "RECOVER", m)
|
||||||
notify_mod.send_notification(
|
if host.watched:
|
||||||
|
asyncio.create_task(notify_mod.send_notification(
|
||||||
uname,
|
uname,
|
||||||
notify_mod.Notification(title=f"[RECOVER] {uname}", body=m, level="RECOVER"),
|
notify_mod.Notification(title=f"[RECOVER] {uname}", body=m, level="RECOVER"),
|
||||||
)
|
))
|
||||||
|
|
||||||
if boot or newh:
|
if boot or newh:
|
||||||
host.upcount = host.doesack
|
host.upcount = host.doesack
|
||||||
@@ -453,10 +470,11 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
|||||||
if shutdown:
|
if shutdown:
|
||||||
m = "%s shutdown" % conn.afam
|
m = "%s shutdown" % conn.afam
|
||||||
eventlog(uname, "INFO", m)
|
eventlog(uname, "INFO", m)
|
||||||
notify_mod.send_notification(
|
if host.watched:
|
||||||
|
asyncio.create_task(notify_mod.send_notification(
|
||||||
uname,
|
uname,
|
||||||
notify_mod.Notification(title=f"[INFO] {uname}", body=m, level="INFO"),
|
notify_mod.Notification(title=f"[INFO] {uname}", body=m, level="INFO"),
|
||||||
)
|
))
|
||||||
conn.newstate(hbdcls.Connection.DOWN, now)
|
conn.newstate(hbdcls.Connection.DOWN, now)
|
||||||
_set_connectivity_alert(host, conn.afam, "CRITICAL")
|
_set_connectivity_alert(host, conn.afam, "CRITICAL")
|
||||||
|
|
||||||
|
|||||||
@@ -146,9 +146,14 @@ def load_users(config: dict) -> dict:
|
|||||||
Returns the new ``users`` dict.
|
Returns the new ``users`` dict.
|
||||||
"""
|
"""
|
||||||
global users
|
global users
|
||||||
|
old_users = dict(users) # snapshot before rebuild
|
||||||
users_cfg = config.get("users", {})
|
users_cfg = config.get("users", {})
|
||||||
if not isinstance(users_cfg, dict):
|
if not isinstance(users_cfg, dict):
|
||||||
users = {}
|
users = {}
|
||||||
|
# Preserve OAuth-provisioned users (password_hash == "") that aren't in config.
|
||||||
|
for username, existing_user in old_users.items():
|
||||||
|
if not existing_user.password_hash and username not in users:
|
||||||
|
users[username] = existing_user
|
||||||
return users
|
return users
|
||||||
|
|
||||||
result: dict = {}
|
result: dict = {}
|
||||||
@@ -166,6 +171,10 @@ def load_users(config: dict) -> dict:
|
|||||||
)
|
)
|
||||||
|
|
||||||
users = result
|
users = result
|
||||||
|
# Preserve OAuth-provisioned users (password_hash == "") that aren't in config.
|
||||||
|
for username, existing_user in old_users.items():
|
||||||
|
if not existing_user.password_hash and username not in users:
|
||||||
|
users[username] = existing_user
|
||||||
logger.info("Loaded %d user(s) from config", len(users))
|
logger.info("Loaded %d user(s) from config", len(users))
|
||||||
return users
|
return users
|
||||||
|
|
||||||
@@ -187,6 +196,26 @@ def authenticate(username: str, password: str) -> "User | None":
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def provision_oauth_user(username: str, full_name: str, avatar: str) -> "User":
|
||||||
|
"""Create or update a user sourced from an OAuth2 provider.
|
||||||
|
|
||||||
|
New users are inserted with no password_hash — they can only authenticate
|
||||||
|
via OAuth. Existing users (e.g. defined in config with a password) have
|
||||||
|
their display name and avatar refreshed; all other attributes are preserved.
|
||||||
|
"""
|
||||||
|
user = users.get(username)
|
||||||
|
if user is None:
|
||||||
|
user = User(username=username, full_name=full_name, avatar=avatar)
|
||||||
|
users[username] = user
|
||||||
|
logger.info("Provisioned OAuth user %r", username)
|
||||||
|
else:
|
||||||
|
if full_name:
|
||||||
|
user.full_name = full_name
|
||||||
|
if avatar:
|
||||||
|
user.avatar = avatar
|
||||||
|
return user
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Session management
|
# Session management
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|||||||
+57
-10
@@ -13,7 +13,8 @@ from . import data
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
_connections: set = set()
|
# Map of WebSocket → User object (or None when auth is disabled)
|
||||||
|
_connections: dict = {}
|
||||||
_loop: Optional[asyncio.AbstractEventLoop] = None
|
_loop: Optional[asyncio.AbstractEventLoop] = None
|
||||||
_get_hosts: Optional[Callable[[], Iterable]] = None
|
_get_hosts: Optional[Callable[[], Iterable]] = None
|
||||||
_verbose: bool = False
|
_verbose: bool = False
|
||||||
@@ -34,30 +35,62 @@ def setup(
|
|||||||
_verbose = verbose
|
_verbose = verbose
|
||||||
|
|
||||||
|
|
||||||
|
def _user_can_see_host(user, host_name: str) -> bool:
|
||||||
|
"""Return True if *user* may see updates for *host_name* (manager or higher)."""
|
||||||
|
from . import hbdclass, users as users_mod
|
||||||
|
if user is None or not users_mod.users_enabled():
|
||||||
|
return True
|
||||||
|
if user.admin:
|
||||||
|
return True
|
||||||
|
host = hbdclass.Host.hosts.get(host_name)
|
||||||
|
if host is None:
|
||||||
|
return False
|
||||||
|
return host.is_manager(user.username)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_token(request) -> str:
|
||||||
|
"""Extract session token from request (mirrors logic in http.py)."""
|
||||||
|
auth = request.headers.get("Authorization", "")
|
||||||
|
if auth.startswith("Bearer "):
|
||||||
|
return auth[7:].strip()
|
||||||
|
token = request.headers.get("X-Auth-Token", "")
|
||||||
|
if token:
|
||||||
|
return token
|
||||||
|
return request.cookies.get("hbd_session", "")
|
||||||
|
|
||||||
|
|
||||||
async def handler(request):
|
async def handler(request):
|
||||||
"""aiohttp WebSocket upgrade handler — register as GET /ws."""
|
"""aiohttp WebSocket upgrade handler — register as GET /ws."""
|
||||||
from aiohttp import web
|
from aiohttp import web
|
||||||
|
from . import users as users_mod
|
||||||
|
|
||||||
ws = web.WebSocketResponse()
|
ws = web.WebSocketResponse()
|
||||||
await ws.prepare(request)
|
await ws.prepare(request)
|
||||||
|
|
||||||
_connections.add(ws)
|
token = _get_token(request)
|
||||||
|
user = users_mod.get_session_user(token) if token else None
|
||||||
|
|
||||||
|
_connections[ws] = user
|
||||||
remote = request.remote
|
remote = request.remote
|
||||||
logger.info("WebSocket connected from %s", remote)
|
logger.info("WebSocket connected from %s", remote)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Send current host state to the new client
|
# Send current host state, filtered to hosts this user may see
|
||||||
if _get_hosts:
|
if _get_hosts:
|
||||||
try:
|
try:
|
||||||
for h in list(_get_hosts()):
|
for h in list(_get_hosts()):
|
||||||
|
host_name = h.get("raw_name") or h.get("name", "")
|
||||||
|
if _user_can_see_host(user, host_name):
|
||||||
await ws.send_str(json.dumps({"type": "host", "data": h}))
|
await ws.send_str(json.dumps({"type": "host", "data": h}))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("Error sending initial hosts: %s", e)
|
logger.error("Error sending initial hosts: %s", e)
|
||||||
|
|
||||||
# Send recent messages
|
# Send recent messages, filtered to hosts this user may see
|
||||||
if data.msgs:
|
if data.msgs:
|
||||||
try:
|
try:
|
||||||
for m in data.msgs:
|
for m in data.msgs:
|
||||||
|
host_name = m.get("host") if isinstance(m, dict) else None
|
||||||
|
if not host_name or _user_can_see_host(user, host_name):
|
||||||
await ws.send_str(json.dumps({"type": "message", "data": m}))
|
await ws.send_str(json.dumps({"type": "message", "data": m}))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("Error sending initial messages: %s", e)
|
logger.error("Error sending initial messages: %s", e)
|
||||||
@@ -74,7 +107,7 @@ async def handler(request):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception("WebSocket handler error from %s: %s", remote, e)
|
logger.exception("WebSocket handler error from %s: %s", remote, e)
|
||||||
finally:
|
finally:
|
||||||
_connections.discard(ws)
|
_connections.pop(ws, None)
|
||||||
logger.info("WebSocket disconnected from %s", remote)
|
logger.info("WebSocket disconnected from %s", remote)
|
||||||
|
|
||||||
return ws
|
return ws
|
||||||
@@ -83,25 +116,39 @@ async def handler(request):
|
|||||||
def broadcast(typ: str, payload) -> bool:
|
def broadcast(typ: str, payload) -> bool:
|
||||||
"""Thread-safe broadcast to all connected WebSocket clients.
|
"""Thread-safe broadcast to all connected WebSocket clients.
|
||||||
|
|
||||||
|
For host and plugin updates, only sends to clients whose user has
|
||||||
|
manager-or-higher access to that host. Other message types are
|
||||||
|
broadcast to all clients.
|
||||||
|
|
||||||
Can be called from any thread; schedules sends on the event loop.
|
Can be called from any thread; schedules sends on the event loop.
|
||||||
Returns False if the loop is not running yet.
|
Returns False if the loop is not running yet.
|
||||||
"""
|
"""
|
||||||
if not _loop:
|
if not _loop:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# Determine the host name for access-filtered message types
|
||||||
|
host_name: Optional[str] = None
|
||||||
|
if typ in ("host", "plugin"):
|
||||||
|
host_name = payload.get("raw_name") or payload.get("host") or payload.get("name")
|
||||||
|
elif typ == "message" and isinstance(payload, dict):
|
||||||
|
host_name = payload.get("host")
|
||||||
|
|
||||||
jmsg = json.dumps({"type": typ, "data": payload})
|
jmsg = json.dumps({"type": typ, "data": payload})
|
||||||
|
|
||||||
async def _send_all():
|
async def _send_all():
|
||||||
dead = set()
|
dead = set()
|
||||||
for ws in list(_connections):
|
for ws, user in list(_connections.items()):
|
||||||
try:
|
try:
|
||||||
if not ws.closed:
|
if ws.closed:
|
||||||
await ws.send_str(jmsg)
|
|
||||||
else:
|
|
||||||
dead.add(ws)
|
dead.add(ws)
|
||||||
|
continue
|
||||||
|
if host_name is not None and not _user_can_see_host(user, host_name):
|
||||||
|
continue
|
||||||
|
await ws.send_str(jmsg)
|
||||||
except Exception:
|
except Exception:
|
||||||
dead.add(ws)
|
dead.add(ws)
|
||||||
for ws in dead:
|
for ws in dead:
|
||||||
_connections.discard(ws)
|
_connections.pop(ws, None)
|
||||||
|
|
||||||
asyncio.run_coroutine_threadsafe(_send_all(), _loop)
|
asyncio.run_coroutine_threadsafe(_send_all(), _loop)
|
||||||
return True
|
return True
|
||||||
|
|||||||
+24
-8
@@ -4,20 +4,32 @@ build-backend = "setuptools.build_meta"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "hbd"
|
name = "hbd"
|
||||||
version = "5.1.7"
|
version = "5.3.6"
|
||||||
description = "Heartbeat monitoring system — client (hbc) and server (hbd)"
|
description = "Heartbeat monitoring system — client (hbc) and server (hbd)"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.11"
|
requires-python = ">=3.11"
|
||||||
license = "MIT"
|
|
||||||
keywords = ["heartbeat", "monitoring", "dns", "websocket", "system-monitoring"]
|
|
||||||
authors = [
|
|
||||||
{ name = "heartbeat contributors" }
|
|
||||||
]
|
|
||||||
|
|
||||||
# Core dependencies (required for both client and server)
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"PyYAML>=6.0",
|
"PyYAML>=6.0",
|
||||||
]
|
]
|
||||||
|
license = "MIT"
|
||||||
|
license-files = ["LICENSE.md"]
|
||||||
|
keywords = ["heartbeat", "monitoring", "dns", "websocket", "system-monitoring"]
|
||||||
|
authors = [
|
||||||
|
{ name = "Andreas Wrede" }
|
||||||
|
]
|
||||||
|
classifiers = [
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
"Programming Language :: Python :: 3.11",
|
||||||
|
"Programming Language :: Python :: 3.12",
|
||||||
|
"Programming Language :: Python :: 3.13",
|
||||||
|
"Operating System :: POSIX :: Linux",
|
||||||
|
"Operating System :: POSIX :: BSD",
|
||||||
|
"Topic :: System :: Monitoring",
|
||||||
|
"Topic :: System :: Networking :: Monitoring",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.urls]
|
||||||
|
Repository = "https://git.wrede.ca/andreas/heartbeat"
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
# Client-only dependencies (hbc - system monitoring client)
|
# Client-only dependencies (hbc - system monitoring client)
|
||||||
@@ -32,8 +44,12 @@ server = [
|
|||||||
"aiohttp>=3.11",
|
"aiohttp>=3.11",
|
||||||
"Jinja2>=3.1.6",
|
"Jinja2>=3.1.6",
|
||||||
"matrix-nio>=0.24",
|
"matrix-nio>=0.24",
|
||||||
|
"ruamel.yaml>=0.18",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Minimal client — hbc_mini only, no external dependencies
|
||||||
|
mini = []
|
||||||
|
|
||||||
# Install both client and server
|
# Install both client and server
|
||||||
all = [
|
all = [
|
||||||
"hbd[client,server]",
|
"hbd[client,server]",
|
||||||
|
|||||||
@@ -1,4 +0,0 @@
|
|||||||
key "rndc-key" {
|
|
||||||
algorithm hmac-md5;
|
|
||||||
secret "qlGa+AYKtyOgWNuozqECMw==";
|
|
||||||
};
|
|
||||||
@@ -4,12 +4,14 @@ set -e
|
|||||||
uv version --bump patch
|
uv version --bump patch
|
||||||
VER=$(uv version --short)
|
VER=$(uv version --short)
|
||||||
sed -i".bak" "s/__version__ = \"[0-9.]*\"\(.*\)$/__version__ = \"$VER\"\1/" hbd/__init__.py
|
sed -i".bak" "s/__version__ = \"[0-9.]*\"\(.*\)$/__version__ = \"$VER\"\1/" hbd/__init__.py
|
||||||
|
sed -i".bak" "s/__version__ = \"[0-9.]*\"\(.*\)$/__version__ = \"$VER\"\1/" scripts/hbc_mini.py
|
||||||
|
|
||||||
# commit pyproject.toml
|
# commit pyproject.toml
|
||||||
git commit -m "version $VER" pyproject.toml hbd/__init__.py
|
git commit -m "version $VER" pyproject.toml hbd/__init__.py scripts/hbc_mini.py
|
||||||
git push
|
git push
|
||||||
# tag version
|
# tag version
|
||||||
git tag -a v$VER -m "Version $VER"
|
git tag -a v$VER -m "Version $VER"
|
||||||
git push --tags
|
git push --tags
|
||||||
|
|
||||||
rm hbd/__init__.py.bak
|
rm hbd/__init__.py.bak
|
||||||
|
rm scripts/hbc_mini.py.bak
|
||||||
|
|||||||
@@ -0,0 +1,2 @@
|
|||||||
|
hbc_mini
|
||||||
|
hbc_mini_dbg
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
CC ?= cc
|
||||||
|
CFLAGS = -O2 -Wall -Wextra -std=c11
|
||||||
|
LDFLAGS = -lz -lpthread -lm
|
||||||
|
TARGET = hbc_mini
|
||||||
|
SRC = hbc_mini.c
|
||||||
|
|
||||||
|
# FreeBSD/NetBSD keep zlib in base; no extra flags needed.
|
||||||
|
# On some NetBSD installs pthreads may need -lpthread from pkgsrc.
|
||||||
|
|
||||||
|
.PHONY: all clean debug
|
||||||
|
|
||||||
|
all: $(TARGET)
|
||||||
|
|
||||||
|
$(TARGET): $(SRC)
|
||||||
|
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
|
||||||
|
|
||||||
|
debug: $(SRC)
|
||||||
|
$(CC) -g -fsanitize=address,undefined -o $(TARGET)_dbg $< $(LDFLAGS)
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f $(TARGET) $(TARGET)_dbg
|
||||||
File diff suppressed because it is too large
Load Diff
+45
-27
@@ -12,11 +12,14 @@
|
|||||||
set -e
|
set -e
|
||||||
what=$1
|
what=$1
|
||||||
on_ha=0
|
on_ha=0
|
||||||
|
where=""
|
||||||
|
venv=""
|
||||||
|
[ "$2" = "HA" ] && on_ha=1
|
||||||
[ -z "$what" ] && what="client"
|
[ -z "$what" ] && what="client"
|
||||||
|
|
||||||
if [ -d /homeassistant ]; then
|
if [ -d /homeassistant ]; then # if running from HA command line
|
||||||
echo "cannot install in HA, running \"docker exec homeassistant $0 $@\""
|
echo "HA, running \"docker exec homeassistant /config/bin/hb_install.sh $@\""
|
||||||
docker exec homeassistant $0 $@
|
docker exec homeassistant /config/bin/hb_install.sh $@ HA
|
||||||
rc=$?
|
rc=$?
|
||||||
if [ $rc -ne 0 ]; then
|
if [ $rc -ne 0 ]; then
|
||||||
echo "Failed to install heartbeat in HA, please check the logs for more details"
|
echo "Failed to install heartbeat in HA, please check the logs for more details"
|
||||||
@@ -24,11 +27,12 @@ if [ -d /homeassistant ]; then
|
|||||||
fi
|
fi
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
if [ -d /config ]; then
|
|
||||||
echo "Installing on HA"
|
if [ $on_ha -eq 1 ] || [ -r /.dockerenv ] && [ -d /config/bin ]; then
|
||||||
|
# Installing under docker on Home Assistant OS, using /config/bin for executables and /config/venvs for virtual environments
|
||||||
|
echo "Home Assistant OS detected, installing under docker"
|
||||||
where="/config/bin"
|
where="/config/bin"
|
||||||
venv="/config/venvs"
|
venv="/config/venvs"
|
||||||
on_ha=1
|
|
||||||
else
|
else
|
||||||
if [ ! -d $HOME/.local/bin ] && [ ! -d $HOME/bin ]; then
|
if [ ! -d $HOME/.local/bin ] && [ ! -d $HOME/bin ]; then
|
||||||
echo "No suitable bin directory found in PATH, please add either $HOME/.local/bin or $HOME/bin to your PATH"
|
echo "No suitable bin directory found in PATH, please add either $HOME/.local/bin or $HOME/bin to your PATH"
|
||||||
@@ -43,24 +47,32 @@ else
|
|||||||
echo "No suitable bin directory found in PATH, please add either $HOME/.local/bin or $HOME/bin to your PATH"
|
echo "No suitable bin directory found in PATH, please add either $HOME/.local/bin or $HOME/bin to your PATH"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
if [ "$what" = "mini" ]; then
|
||||||
|
venv=""
|
||||||
|
else
|
||||||
venv="$HOME/venvs"
|
venv="$HOME/venvs"
|
||||||
fi
|
fi
|
||||||
|
fi
|
||||||
|
echo "Installing $what to $where"
|
||||||
|
if [ ! -z "$venv" ]; then
|
||||||
|
echo "Using virtual environment at $venv/hbd"
|
||||||
|
fi
|
||||||
|
|
||||||
echo "Installing heartbeat $what"
|
if [ "$venv" != "" ] && [ ! -d $venv/hbd ]; then
|
||||||
|
arg=""
|
||||||
if [ ! -d $venv/hbd ]; then
|
have_pip=$(python3 -c "import pip" 2>/dev/null &> /dev/null && echo "Installed" || echo "Not Installed")
|
||||||
set +e
|
if [ "$have_pip" = "Not Installed" ]; then
|
||||||
python3 -m pip --version > /dev/null 2>&1
|
# some systems do not have pip installed by default, so we need to fetch get-pip.py and install pip
|
||||||
rc=$?
|
|
||||||
set -e
|
|
||||||
if [ $rc -ne 0 ]; then
|
|
||||||
# truenas does not have pip installed by default, so we need to fetch get-pip.py and install pip
|
|
||||||
echo "pip is not installed, fetching get-pip.py and installing pip"
|
echo "pip is not installed, fetching get-pip.py and installing pip"
|
||||||
arg="--without-pip"
|
arg="--without-pip"
|
||||||
fi
|
fi
|
||||||
mkdir -p $venv
|
mkdir -p $venv
|
||||||
have_venv=$(python3 -c "import venv" &> /dev/null && echo "Installed" || echo "Not Installed")
|
have_venv=$(python3 -c "import venv" 2>/dev/null &> /dev/null && echo "Installed" || echo "Not Installed")
|
||||||
if [ "$have_venv" = "Not Installed" ]; then
|
if [ "$have_venv" = "Not Installed" ]; then
|
||||||
|
if [ "$have_pip" = "Not Installed" ]; then
|
||||||
|
echo "python has no venv, and no pip to install virtualenv, cannot continue"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
echo "python venv module not found, installing virtualenv"
|
echo "python venv module not found, installing virtualenv"
|
||||||
python3 -m pip install --user virtualenv
|
python3 -m pip install --user virtualenv
|
||||||
python3 -m virtualenv $venv/hbd --system-site-packages $arg
|
python3 -m virtualenv $venv/hbd --system-site-packages $arg
|
||||||
@@ -74,24 +86,30 @@ if [ ! -d $venv/hbd ]; then
|
|||||||
deactivate
|
deactivate
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ ! -z "$venv" ]; then
|
||||||
. $venv/hbd/bin/activate
|
. $venv/hbd/bin/activate
|
||||||
|
fi
|
||||||
|
if [ "$what" = "mini" ]; then
|
||||||
|
curl -s -o $where/hbc_mini https://git.wrede.ca/andreas/heartbeat/raw/branch/master/scripts/hbc_mini.py
|
||||||
|
chmod +x $where/hbc_mini
|
||||||
|
else
|
||||||
python3 -mpip install --upgrade --index-url https://git.wrede.ca/api/packages/andreas/pypi/simple/ --extra-index-url https://pypi.org/simple hbd[$what]
|
python3 -mpip install --upgrade --index-url https://git.wrede.ca/api/packages/andreas/pypi/simple/ --extra-index-url https://pypi.org/simple hbd[$what]
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -z "$venv" ]; then
|
||||||
|
echo "linking executables to $where"
|
||||||
if [ "$what" = "server" ]; then
|
if [ "$what" = "server" ]; then
|
||||||
rm -f $where/hbd
|
rm -f $where/hbd
|
||||||
ln -sf $(which hbd) $where/hbd
|
ln -sf $(which hbd) $where/hbd
|
||||||
echo "hbd installed, you can run it with \"$where/hbd\" or \"hbd\" if $where is in your PATH"
|
elif [ "$what" = "client" ]; then
|
||||||
else
|
|
||||||
rm -f $where/hbc
|
rm -f $where/hbc
|
||||||
ln -sf $(which hbc) $where/hbc
|
ln -sf $(which hbc) $where/hbc
|
||||||
# rm -f $where/hb_install.sh
|
|
||||||
cp "$0" $where/hb_install.sh
|
|
||||||
chmod +x $where/hb_install.sh
|
|
||||||
if [ $on_ha -eq 1 ]; then
|
|
||||||
echo "restarting hbc "
|
|
||||||
job=$(grep run_hbc configuration.yaml | sed 's/run_hbc://')
|
|
||||||
$job
|
|
||||||
else
|
|
||||||
echo "hbc installed, you can run it with \"$where/hbc\" or \"hbc\" if $where is in your PATH"
|
|
||||||
fi
|
fi
|
||||||
|
rm -f $where/hb_install.sh
|
||||||
|
ln -sf $(which hb_install.sh) $where/hb_install.sh
|
||||||
fi
|
fi
|
||||||
|
echo "Installation complete. To upgrade, run the following:"
|
||||||
|
echo " $where/hb_install.sh $what"
|
||||||
|
echo "To install on another machine, run the following obtain the install script and run it:"
|
||||||
|
echo "from https://git.wrede.ca/andreas/heartbeat/raw/branch/master/scripts/hb_install.sh"
|
||||||
|
echo "and then run sh hb_install.sh [mini|client]"
|
||||||
+122
-47
@@ -40,6 +40,9 @@ from logging.handlers import SysLogHandler
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional, Tuple
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
# updated by scripts/bumpminor.sh
|
||||||
|
__version__ = "5.3.6"
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Protocol (mirrors hbd/common/proto.py)
|
# Protocol (mirrors hbd/common/proto.py)
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -111,6 +114,7 @@ def _stodict(data: bytes) -> Dict[str, Any]:
|
|||||||
_DEFAULTS: Dict[str, Any] = {
|
_DEFAULTS: Dict[str, Any] = {
|
||||||
"hb_port": 50003,
|
"hb_port": 50003,
|
||||||
"interval": 10,
|
"interval": 10,
|
||||||
|
"owner": None,
|
||||||
"plugins": {},
|
"plugins": {},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -233,7 +237,11 @@ class OSInfoPlugin(InfoPlugin):
|
|||||||
"machine": platform.machine(),
|
"machine": platform.machine(),
|
||||||
"architecture": platform.architecture()[0],
|
"architecture": platform.architecture()[0],
|
||||||
"python_version": platform.python_version(),
|
"python_version": platform.python_version(),
|
||||||
|
"hbc_version": __version__,
|
||||||
|
"hbc_type": "mini",
|
||||||
}
|
}
|
||||||
|
if self.config.get("owner"):
|
||||||
|
data["owner"] = self.config["owner"]
|
||||||
if platform.system() == "Linux":
|
if platform.system() == "Linux":
|
||||||
data.update(_linux_distro())
|
data.update(_linux_distro())
|
||||||
elif platform.system() == "Darwin":
|
elif platform.system() == "Darwin":
|
||||||
@@ -383,7 +391,6 @@ class NagiosRunnerPlugin(MonitorPlugin):
|
|||||||
|
|
||||||
async def _collect_metrics(self) -> Dict[str, Any]:
|
async def _collect_metrics(self) -> Dict[str, Any]:
|
||||||
results: Dict[str, Any] = {}
|
results: Dict[str, Any] = {}
|
||||||
worst = 0
|
|
||||||
for cmd_cfg in self.commands:
|
for cmd_cfg in self.commands:
|
||||||
name = cmd_cfg.get("name")
|
name = cmd_cfg.get("name")
|
||||||
command = cmd_cfg.get("command")
|
command = cmd_cfg.get("command")
|
||||||
@@ -394,10 +401,6 @@ class NagiosRunnerPlugin(MonitorPlugin):
|
|||||||
results[f"{name}_status_code"] = rc
|
results[f"{name}_status_code"] = rc
|
||||||
results[f"{name}_output"] = msg
|
results[f"{name}_output"] = msg
|
||||||
results.update({f"{name}_{k}": v for k, v in perf.items()})
|
results.update({f"{name}_{k}": v for k, v in perf.items()})
|
||||||
worst = max(worst, rc)
|
|
||||||
results["overall_status"] = _NAGIOS_STATUS.get(worst, "UNKNOWN")
|
|
||||||
results["overall_status_code"] = worst
|
|
||||||
results["plugin_count"] = len(self.commands)
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
@@ -482,6 +485,12 @@ class CPUMonitorPlugin(MonitorPlugin):
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open("/proc/uptime") as fh:
|
||||||
|
data["uptime_seconds"] = int(float(fh.read().split()[0]))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
@@ -529,19 +538,41 @@ class MemoryMonitorPlugin(MonitorPlugin):
|
|||||||
return {}
|
return {}
|
||||||
total = mi.get("MemTotal", 0)
|
total = mi.get("MemTotal", 0)
|
||||||
avail = mi.get("MemAvailable", mi.get("MemFree", 0))
|
avail = mi.get("MemAvailable", mi.get("MemFree", 0))
|
||||||
|
free = mi.get("MemFree", 0)
|
||||||
|
|
||||||
|
# ZFS ARC is reclaimable but not included in MemAvailable; add it.
|
||||||
|
arc_kb = 0
|
||||||
|
try:
|
||||||
|
with open("/proc/spl/kstat/zfs/arcstats") as _f:
|
||||||
|
for _line in _f:
|
||||||
|
_p = _line.split()
|
||||||
|
if len(_p) >= 3 and _p[0] == "size":
|
||||||
|
arc_kb = int(_p[2]) // 1024
|
||||||
|
break
|
||||||
|
except (OSError, ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
avail = min(avail + arc_kb, total)
|
||||||
used = total - avail
|
used = total - avail
|
||||||
data: Dict[str, Any] = {
|
data: Dict[str, Any] = {
|
||||||
"mem_total_kb": total,
|
"memory_total": total * 1024,
|
||||||
"mem_used_kb": used,
|
"memory_used": used * 1024,
|
||||||
"mem_available_kb": avail,
|
"memory_available": avail * 1024,
|
||||||
"mem_percent": round(100.0 * used / total, 1) if total else 0.0,
|
"memory_free": free * 1024,
|
||||||
|
"memory_percent": round(100.0 * used / total, 1) if total else 0.0,
|
||||||
}
|
}
|
||||||
|
for field, key in (("Buffers", "memory_buffers"), ("Cached", "memory_cached"),
|
||||||
|
("Active", "memory_active"), ("Inactive", "memory_inactive")):
|
||||||
|
if field in mi:
|
||||||
|
data[key] = mi[field] * 1024
|
||||||
stotal = mi.get("SwapTotal", 0)
|
stotal = mi.get("SwapTotal", 0)
|
||||||
if stotal:
|
if stotal:
|
||||||
sfree = mi.get("SwapFree", 0)
|
sfree = mi.get("SwapFree", 0)
|
||||||
data["swap_total_kb"] = stotal
|
sused = stotal - sfree
|
||||||
data["swap_used_kb"] = stotal - sfree
|
data["swap_total"] = stotal * 1024
|
||||||
data["swap_percent"] = round(100.0 * (stotal - sfree) / stotal, 1)
|
data["swap_used"] = sused * 1024
|
||||||
|
data["swap_free"] = sfree * 1024
|
||||||
|
data["swap_percent"] = round(100.0 * sused / stotal, 1)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
@@ -577,7 +608,7 @@ class DiskMonitorPlugin(MonitorPlugin):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning("df failed: %s", e)
|
self.logger.warning("df failed: %s", e)
|
||||||
return {}
|
return {}
|
||||||
data: Dict[str, Any] = {}
|
partitions: Dict[str, Any] = {}
|
||||||
for line in out.decode(errors="replace").splitlines()[1:]:
|
for line in out.decode(errors="replace").splitlines()[1:]:
|
||||||
parts = line.split()
|
parts = line.split()
|
||||||
if len(parts) < 6:
|
if len(parts) < 6:
|
||||||
@@ -586,14 +617,19 @@ class DiskMonitorPlugin(MonitorPlugin):
|
|||||||
if self.mounts and mount not in self.mounts:
|
if self.mounts and mount not in self.mounts:
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
key = re.sub(r"[^a-zA-Z0-9_]", "_", mount).strip("_") or "root"
|
total_kb = int(parts[1])
|
||||||
data[f"{key}_total_kb"] = int(parts[1])
|
used_kb = int(parts[2])
|
||||||
data[f"{key}_used_kb"] = int(parts[2])
|
avail_kb = int(parts[3])
|
||||||
data[f"{key}_avail_kb"] = int(parts[3])
|
pct = int(parts[4].rstrip("%"))
|
||||||
data[f"{key}_percent"] = int(parts[4].rstrip("%"))
|
partitions[mount] = {
|
||||||
|
"total": total_kb * 1024,
|
||||||
|
"used": used_kb * 1024,
|
||||||
|
"free": avail_kb * 1024,
|
||||||
|
"percent": pct,
|
||||||
|
}
|
||||||
except (ValueError, IndexError):
|
except (ValueError, IndexError):
|
||||||
continue
|
continue
|
||||||
return data
|
return {"partitions": partitions} if partitions else {}
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -649,17 +685,18 @@ class NetworkMonitorPlugin(MonitorPlugin):
|
|||||||
self._prev = (now, curr)
|
self._prev = (now, curr)
|
||||||
if dt <= 0:
|
if dt <= 0:
|
||||||
return {}
|
return {}
|
||||||
data: Dict[str, Any] = {}
|
interfaces: Dict[str, Any] = {}
|
||||||
for iface, (rx, tx) in curr.items():
|
for iface, (rx, tx) in curr.items():
|
||||||
if iface in self.skip_ifaces or iface not in prev:
|
if iface in self.skip_ifaces or iface not in prev:
|
||||||
continue
|
continue
|
||||||
prx, ptx = prev[iface]
|
prx, ptx = prev[iface]
|
||||||
key = re.sub(r"[^a-zA-Z0-9_]", "_", iface)
|
interfaces[iface] = {
|
||||||
data[f"{key}_rx_bps"] = round((rx - prx) / dt)
|
"bytes_recv": rx,
|
||||||
data[f"{key}_tx_bps"] = round((tx - ptx) / dt)
|
"bytes_sent": tx,
|
||||||
data[f"{key}_rx_bytes"] = rx
|
"bytes_recv_delta": rx - prx,
|
||||||
data[f"{key}_tx_bytes"] = tx
|
"bytes_sent_delta": tx - ptx,
|
||||||
return data
|
}
|
||||||
|
return {"interfaces": interfaces} if interfaces else {}
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -682,7 +719,9 @@ async def _load_plugins(cfg: Dict[str, Any]) -> List[Plugin]:
|
|||||||
plugins_cfg: Dict[str, Any] = cfg.get("plugins", {})
|
plugins_cfg: Dict[str, Any] = cfg.get("plugins", {})
|
||||||
loaded: List[Plugin] = []
|
loaded: List[Plugin] = []
|
||||||
for cls in _ALL_PLUGIN_CLASSES:
|
for cls in _ALL_PLUGIN_CLASSES:
|
||||||
plugin_cfg = plugins_cfg.get(cls.name) or cfg.get(cls.name, {})
|
plugin_cfg = dict(plugins_cfg.get(cls.name) or cfg.get(cls.name) or {})
|
||||||
|
if "owner" in cfg and "owner" not in plugin_cfg:
|
||||||
|
plugin_cfg["owner"] = cfg["owner"]
|
||||||
plugin: Plugin = cls(config=plugin_cfg)
|
plugin: Plugin = cls(config=plugin_cfg)
|
||||||
try:
|
try:
|
||||||
ok = await plugin.initialize()
|
ok = await plugin.initialize()
|
||||||
@@ -752,7 +791,7 @@ class _HeartbeatProtocol(asyncio.DatagramProtocol):
|
|||||||
msg_id = msg.get("ID")
|
msg_id = msg.get("ID")
|
||||||
now = time.time()
|
now = time.time()
|
||||||
if msg_id == "ACK":
|
if msg_id == "ACK":
|
||||||
self._conn._handle_ack(now)
|
self._conn._handle_ack(msg, now)
|
||||||
elif msg_id == "CMD":
|
elif msg_id == "CMD":
|
||||||
asyncio.create_task(_handle_command(self._conn, msg))
|
asyncio.create_task(_handle_command(self._conn, msg))
|
||||||
elif msg_id == "UPD":
|
elif msg_id == "UPD":
|
||||||
@@ -763,8 +802,7 @@ class _HeartbeatProtocol(asyncio.DatagramProtocol):
|
|||||||
self._log.error("datagram error: %s", e)
|
self._log.error("datagram error: %s", e)
|
||||||
|
|
||||||
def error_received(self, exc):
|
def error_received(self, exc):
|
||||||
self._log.warning("protocol error on %s: %s — dropping connection", self._conn.addr, exc)
|
self._log.warning("protocol error on %s: %s — will retry", self._conn.addr, exc)
|
||||||
self._conn._dead = True
|
|
||||||
self._conn.close()
|
self._conn.close()
|
||||||
|
|
||||||
|
|
||||||
@@ -780,6 +818,7 @@ class AsyncConnection:
|
|||||||
self.rtts: List[float] = [0.0]
|
self.rtts: List[float] = [0.0]
|
||||||
self._transport: Optional[asyncio.DatagramTransport] = None
|
self._transport: Optional[asyncio.DatagramTransport] = None
|
||||||
self._dead = False
|
self._dead = False
|
||||||
|
self._request_info: asyncio.Event = asyncio.Event()
|
||||||
self._log = logging.getLogger(f"hbc.conn.{addr}")
|
self._log = logging.getLogger(f"hbc.conn.{addr}")
|
||||||
|
|
||||||
async def open(self) -> bool:
|
async def open(self) -> bool:
|
||||||
@@ -798,12 +837,14 @@ class AsyncConnection:
|
|||||||
self._transport.close()
|
self._transport.close()
|
||||||
self._transport = None
|
self._transport = None
|
||||||
|
|
||||||
def _handle_ack(self, now: float):
|
def _handle_ack(self, msg: Dict[str, Any], now: float):
|
||||||
rtt = (now - self.lastsend) * 1000.0
|
rtt = (now - self.lastsend) * 1000.0
|
||||||
self.rtts.append(rtt)
|
self.rtts.append(rtt)
|
||||||
if len(self.rtts) > 10:
|
if len(self.rtts) > 10:
|
||||||
self.rtts.pop(0)
|
self.rtts.pop(0)
|
||||||
self.ackcount += 1
|
self.ackcount += 1
|
||||||
|
if msg.get("request_update"):
|
||||||
|
self._request_info.set()
|
||||||
|
|
||||||
async def sendto(self, msg: Dict[str, Any], msg_id: str = "HTB"):
|
async def sendto(self, msg: Dict[str, Any], msg_id: str = "HTB"):
|
||||||
if self._dead:
|
if self._dead:
|
||||||
@@ -859,7 +900,7 @@ async def _handle_update(conn: AsyncConnection):
|
|||||||
log.info("running installer: %s", installer)
|
log.info("running installer: %s", installer)
|
||||||
try:
|
try:
|
||||||
proc = await asyncio.create_subprocess_exec(
|
proc = await asyncio.create_subprocess_exec(
|
||||||
installer, "client",
|
installer, "mini",
|
||||||
stdout=asyncio.subprocess.PIPE,
|
stdout=asyncio.subprocess.PIPE,
|
||||||
stderr=asyncio.subprocess.STDOUT,
|
stderr=asyncio.subprocess.STDOUT,
|
||||||
)
|
)
|
||||||
@@ -936,6 +977,19 @@ async def _run_monitor_group(conn: AsyncConnection, plugins: List[Plugin], inter
|
|||||||
await _sleep(interval)
|
await _sleep(interval)
|
||||||
|
|
||||||
|
|
||||||
|
async def _info_refresh_loop(conn: AsyncConnection, info: List[Plugin]):
|
||||||
|
log = logging.getLogger("hbc.plugins")
|
||||||
|
while _running:
|
||||||
|
await conn._request_info.wait()
|
||||||
|
if not _running:
|
||||||
|
break
|
||||||
|
conn._request_info.clear()
|
||||||
|
log.info("refreshing InfoPlugins on server request")
|
||||||
|
for plugin in info:
|
||||||
|
plugin._cache = None
|
||||||
|
await _run_info_plugins(conn, info)
|
||||||
|
|
||||||
|
|
||||||
async def _plugin_collector(conn: AsyncConnection, plugins: List[Plugin]):
|
async def _plugin_collector(conn: AsyncConnection, plugins: List[Plugin]):
|
||||||
info = [p for p in plugins if isinstance(p, InfoPlugin)]
|
info = [p for p in plugins if isinstance(p, InfoPlugin)]
|
||||||
monitor = [p for p in plugins if isinstance(p, MonitorPlugin)]
|
monitor = [p for p in plugins if isinstance(p, MonitorPlugin)]
|
||||||
@@ -946,12 +1000,10 @@ async def _plugin_collector(conn: AsyncConnection, plugins: List[Plugin]):
|
|||||||
for p in monitor:
|
for p in monitor:
|
||||||
by_interval[p.interval].append(p)
|
by_interval[p.interval].append(p)
|
||||||
|
|
||||||
if by_interval:
|
tasks = [asyncio.create_task(_info_refresh_loop(conn, info))]
|
||||||
await asyncio.gather(
|
tasks += [asyncio.create_task(_run_monitor_group(conn, grp, iv))
|
||||||
*[asyncio.create_task(_run_monitor_group(conn, grp, iv))
|
for iv, grp in by_interval.items()]
|
||||||
for iv, grp in by_interval.items()],
|
await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
return_exceptions=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -995,7 +1047,7 @@ def _reconfigure_syslog(level: int):
|
|||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
async def _async_main(args, cfg: Dict[str, Any]) -> int:
|
async def _async_main(args, cfg: Dict[str, Any]) -> int:
|
||||||
global _running, _shutdown_event, _active_tasks
|
global _running, _shutdown_event, _active_tasks, send_shutdown
|
||||||
_running = True
|
_running = True
|
||||||
_shutdown_event = asyncio.Event()
|
_shutdown_event = asyncio.Event()
|
||||||
_active_tasks = []
|
_active_tasks = []
|
||||||
@@ -1005,36 +1057,47 @@ async def _async_main(args, cfg: Dict[str, Any]) -> int:
|
|||||||
port = cfg.get("hb_port", PORT)
|
port = cfg.get("hb_port", PORT)
|
||||||
interval = cfg.get("interval", INTERVAL)
|
interval = cfg.get("interval", INTERVAL)
|
||||||
|
|
||||||
log.info("starting: %s -> %s port=%d interval=%ds", iam, args.hosts, port, interval)
|
log.info("hbc_mini %s on %s -> %s port=%d interval=%ds",__version__, iam, args.hosts, port, interval)
|
||||||
|
|
||||||
|
af_filter = (socket.AF_INET if getattr(args, "ipv4_only", False)
|
||||||
|
else socket.AF_INET6 if getattr(args, "ipv6_only", False)
|
||||||
|
else 0)
|
||||||
|
|
||||||
connections: List[AsyncConnection] = []
|
connections: List[AsyncConnection] = []
|
||||||
conn_id = 1
|
conn_id = 1
|
||||||
|
_retry_delay = 5
|
||||||
|
while _running and not connections:
|
||||||
for host in args.hosts:
|
for host in args.hosts:
|
||||||
try:
|
try:
|
||||||
addrs = socket.getaddrinfo(host, port, 0, 0, socket.SOL_UDP)
|
addrs = socket.getaddrinfo(host, port, af_filter, 0, socket.SOL_UDP)
|
||||||
except socket.gaierror as e:
|
except socket.gaierror as e:
|
||||||
log.error("cannot resolve %s: %s", host, e)
|
log.warning("cannot resolve %s: %s — retrying in %ds", host, e, _retry_delay)
|
||||||
continue
|
continue
|
||||||
for ai in addrs:
|
for ai in addrs:
|
||||||
conn = AsyncConnection(conn_id, ai[4][0], port, ai[0], iam)
|
conn = AsyncConnection(conn_id, ai[4][0], port, ai[0], iam)
|
||||||
if await conn.open():
|
if await conn.open():
|
||||||
connections.append(conn)
|
connections.append(conn)
|
||||||
conn_id += 1
|
conn_id += 1
|
||||||
|
if not connections:
|
||||||
|
await _sleep(_retry_delay)
|
||||||
|
_retry_delay = min(_retry_delay * 2, 60)
|
||||||
|
|
||||||
if not connections:
|
if not connections:
|
||||||
log.error("no connections established")
|
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
# Boot / one-shot message
|
# Boot / one-shot message
|
||||||
|
send_shutdown = False
|
||||||
if args.boot or args.message:
|
if args.boot or args.message:
|
||||||
bmsg: Dict[str, Any] = {"acks": 0}
|
bmsg: Dict[str, Any] = {"acks": 0}
|
||||||
if args.boot:
|
if args.boot:
|
||||||
bmsg["boot"] = 1
|
bmsg["boot"] = 1
|
||||||
|
args.boot = False # don't repeat on restart
|
||||||
|
send_shutdown = True
|
||||||
if args.message:
|
if args.message:
|
||||||
bmsg["service"] = "service"
|
bmsg["service"] = "service"
|
||||||
bmsg["msg"] = args.message
|
bmsg["msg"] = args.message
|
||||||
for c in connections:
|
target = next((c for c in connections if c._transport), connections[0])
|
||||||
await c.sendto(bmsg)
|
await target.sendto(bmsg)
|
||||||
if args.message and not args.daemon:
|
if args.message and not args.daemon:
|
||||||
await asyncio.sleep(0.3)
|
await asyncio.sleep(0.3)
|
||||||
for c in connections:
|
for c in connections:
|
||||||
@@ -1047,6 +1110,13 @@ async def _async_main(args, cfg: Dict[str, Any]) -> int:
|
|||||||
for sig in (signal.SIGTERM, signal.SIGINT):
|
for sig in (signal.SIGTERM, signal.SIGINT):
|
||||||
loop.add_signal_handler(sig, _stop)
|
loop.add_signal_handler(sig, _stop)
|
||||||
|
|
||||||
|
def _sighup():
|
||||||
|
global _dorestart
|
||||||
|
_dorestart = True
|
||||||
|
_stop()
|
||||||
|
|
||||||
|
loop.add_signal_handler(signal.SIGHUP, _sighup)
|
||||||
|
|
||||||
for conn in connections:
|
for conn in connections:
|
||||||
_active_tasks.append(asyncio.create_task(_heartbeat_sender(conn, interval)))
|
_active_tasks.append(asyncio.create_task(_heartbeat_sender(conn, interval)))
|
||||||
|
|
||||||
@@ -1059,11 +1129,13 @@ async def _async_main(args, cfg: Dict[str, Any]) -> int:
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
log.info("shutting down")
|
log.info("shutting down")
|
||||||
for conn in connections:
|
target = next((c for c in connections if c._transport), connections[0] if connections else None)
|
||||||
|
if target and send_shutdown:
|
||||||
try:
|
try:
|
||||||
await conn.sendto({"shutdown": 1, "acks": conn.ackcount})
|
await target.sendto({"shutdown": 1, "acks": target.ackcount})
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
for conn in connections:
|
||||||
conn.close()
|
conn.close()
|
||||||
await asyncio.sleep(0.3)
|
await asyncio.sleep(0.3)
|
||||||
for plugin in plugins:
|
for plugin in plugins:
|
||||||
@@ -1089,6 +1161,9 @@ def main(argv=None):
|
|||||||
parser.add_argument("-d", "--daemon", action="store_true", help="Run as daemon")
|
parser.add_argument("-d", "--daemon", action="store_true", help="Run as daemon")
|
||||||
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
|
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
|
||||||
parser.add_argument("-x", "--debug", action="count", default=0, help="Debug level")
|
parser.add_argument("-x", "--debug", action="count", default=0, help="Debug level")
|
||||||
|
af_group = parser.add_mutually_exclusive_group()
|
||||||
|
af_group.add_argument("-4", dest="ipv4_only", action="store_true", help="Use IPv4 only")
|
||||||
|
af_group.add_argument("-6", dest="ipv6_only", action="store_true", help="Use IPv6 only")
|
||||||
parser.add_argument("hosts", nargs="+", help="HBD server(s)")
|
parser.add_argument("hosts", nargs="+", help="HBD server(s)")
|
||||||
args = parser.parse_args(argv)
|
args = parser.parse_args(argv)
|
||||||
|
|
||||||
|
|||||||
@@ -68,8 +68,7 @@ async def test_nagios_runner():
|
|||||||
print(f" ✓ Collected {len(data)} data points")
|
print(f" ✓ Collected {len(data)} data points")
|
||||||
|
|
||||||
print(f"\n4. Results:")
|
print(f"\n4. Results:")
|
||||||
print(f" Overall Status: {data.get('overall_status')} (code: {data.get('overall_status_code')})")
|
print(f" Data points collected: {len(data)}")
|
||||||
print(f" Plugins Executed: {data.get('plugin_count')}")
|
|
||||||
|
|
||||||
# Show individual plugin results
|
# Show individual plugin results
|
||||||
print(f"\n5. Individual Plugin Results:")
|
print(f"\n5. Individual Plugin Results:")
|
||||||
@@ -0,0 +1,162 @@
|
|||||||
|
import glob
|
||||||
|
import os
|
||||||
|
import pytest
|
||||||
|
from hbd.server import configio
|
||||||
|
|
||||||
|
SAMPLE_YAML = """\
|
||||||
|
# Server configuration
|
||||||
|
hbd_port: 50004 # HTTP API port
|
||||||
|
interval: 20
|
||||||
|
users:
|
||||||
|
alice:
|
||||||
|
full_name: Alice Smith
|
||||||
|
admin: true
|
||||||
|
notification_channels:
|
||||||
|
pushover_ops:
|
||||||
|
type: pushover
|
||||||
|
token: abc123
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_roundtrip_loads_values(tmp_path):
|
||||||
|
f = tmp_path / ".hb.yaml"
|
||||||
|
f.write_text(SAMPLE_YAML)
|
||||||
|
data = configio.read_roundtrip(str(f))
|
||||||
|
assert data["hbd_port"] == 50004
|
||||||
|
assert data["interval"] == 20
|
||||||
|
assert data["users"]["alice"]["full_name"] == "Alice Smith"
|
||||||
|
|
||||||
|
|
||||||
|
def test_write_config_creates_backup(tmp_path):
|
||||||
|
f = tmp_path / ".hb.yaml"
|
||||||
|
f.write_text(SAMPLE_YAML)
|
||||||
|
data = configio.read_roundtrip(str(f))
|
||||||
|
data["interval"] = 30
|
||||||
|
configio.write_config(str(f), data)
|
||||||
|
backups = configio.list_backups(str(f))
|
||||||
|
assert len(backups) == 1
|
||||||
|
assert ".bak." in backups[0]
|
||||||
|
|
||||||
|
|
||||||
|
def test_write_config_preserves_comments(tmp_path):
|
||||||
|
f = tmp_path / ".hb.yaml"
|
||||||
|
f.write_text(SAMPLE_YAML)
|
||||||
|
data = configio.read_roundtrip(str(f))
|
||||||
|
data["interval"] = 30
|
||||||
|
configio.write_config(str(f), data)
|
||||||
|
content = f.read_text()
|
||||||
|
assert "# Server configuration" in content
|
||||||
|
assert "# HTTP API port" in content
|
||||||
|
|
||||||
|
|
||||||
|
def test_write_config_atomically_replaces_file(tmp_path):
|
||||||
|
f = tmp_path / ".hb.yaml"
|
||||||
|
f.write_text(SAMPLE_YAML)
|
||||||
|
data = configio.read_roundtrip(str(f))
|
||||||
|
data["interval"] = 99
|
||||||
|
configio.write_config(str(f), data)
|
||||||
|
assert not (tmp_path / ".hb.yaml.tmp").exists()
|
||||||
|
data2 = configio.read_roundtrip(str(f))
|
||||||
|
assert data2["interval"] == 99
|
||||||
|
|
||||||
|
|
||||||
|
def test_write_config_backup_rotation(tmp_path):
|
||||||
|
cfg = tmp_path / ".hb.yaml"
|
||||||
|
cfg.write_text(SAMPLE_YAML)
|
||||||
|
# Pre-create 10 existing backups with old timestamps
|
||||||
|
for i in range(10):
|
||||||
|
(tmp_path / f".hb.yaml.bak.20260101-{i:06d}").write_text("old")
|
||||||
|
data = configio.read_roundtrip(str(cfg))
|
||||||
|
configio.write_config(str(cfg), data)
|
||||||
|
backups = configio.list_backups(str(cfg))
|
||||||
|
assert len(backups) == 10
|
||||||
|
assert not (tmp_path / ".hb.yaml.bak.20260101-000000").exists()
|
||||||
|
|
||||||
|
|
||||||
|
def test_list_backups_newest_first(tmp_path):
|
||||||
|
cfg = tmp_path / ".hb.yaml"
|
||||||
|
cfg.write_text(SAMPLE_YAML)
|
||||||
|
for i in range(3):
|
||||||
|
(tmp_path / f".hb.yaml.bak.20260101-{i:02d}0000").write_text("b")
|
||||||
|
backups = configio.list_backups(str(cfg))
|
||||||
|
assert len(backups) == 3
|
||||||
|
assert backups == sorted(backups, reverse=True)
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_structured_section_server_updates_keys(tmp_path):
|
||||||
|
f = tmp_path / ".hb.yaml"
|
||||||
|
f.write_text(SAMPLE_YAML)
|
||||||
|
data = configio.read_roundtrip(str(f))
|
||||||
|
configio.apply_structured_section(data, "server", {"interval": 60, "hbd_port": 8080})
|
||||||
|
assert data["interval"] == 60
|
||||||
|
assert data["hbd_port"] == 8080
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_structured_section_server_ignores_unknown_keys(tmp_path):
|
||||||
|
f = tmp_path / ".hb.yaml"
|
||||||
|
f.write_text(SAMPLE_YAML)
|
||||||
|
data = configio.read_roundtrip(str(f))
|
||||||
|
configio.apply_structured_section(data, "server", {"interval": 60, "not_a_key": "x"})
|
||||||
|
assert "not_a_key" not in data
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_structured_section_users_replaces_dict(tmp_path):
|
||||||
|
f = tmp_path / ".hb.yaml"
|
||||||
|
f.write_text(SAMPLE_YAML)
|
||||||
|
data = configio.read_roundtrip(str(f))
|
||||||
|
new_users = {"bob": {"full_name": "Bob Jones", "admin": False}}
|
||||||
|
configio.apply_structured_section(data, "users", new_users)
|
||||||
|
assert "alice" not in data["users"]
|
||||||
|
assert data["users"]["bob"]["full_name"] == "Bob Jones"
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_yaml_section_notification_channels(tmp_path):
|
||||||
|
f = tmp_path / ".hb.yaml"
|
||||||
|
f.write_text(SAMPLE_YAML)
|
||||||
|
data = configio.read_roundtrip(str(f))
|
||||||
|
new_yaml = "email_ops:\n type: email\n recipients: [ops@example.com]\n"
|
||||||
|
configio.apply_yaml_section(data, "notification_channels", new_yaml)
|
||||||
|
assert "email_ops" in data["notification_channels"]
|
||||||
|
assert "pushover_ops" not in data["notification_channels"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_yaml_section_thresholds_maps_to_threshold_configs(tmp_path):
|
||||||
|
f = tmp_path / ".hb.yaml"
|
||||||
|
f.write_text(SAMPLE_YAML)
|
||||||
|
data = configio.read_roundtrip(str(f))
|
||||||
|
configio.apply_yaml_section(data, "thresholds", "default:\n cpu: 80\n")
|
||||||
|
assert "threshold_configs" in data
|
||||||
|
assert data["threshold_configs"]["default"]["cpu"] == 80
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_yaml_section_dns_replaces_each_key(tmp_path):
|
||||||
|
f = tmp_path / ".hb.yaml"
|
||||||
|
f.write_text(SAMPLE_YAML)
|
||||||
|
data = configio.read_roundtrip(str(f))
|
||||||
|
configio.apply_yaml_section(
|
||||||
|
data, "dns",
|
||||||
|
"nsupdate_bin: /usr/bin/nsupdate\ndyndomains: [dyn.example.com]\n"
|
||||||
|
)
|
||||||
|
assert data["nsupdate_bin"] == "/usr/bin/nsupdate"
|
||||||
|
assert data["dyndomains"] == ["dyn.example.com"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_yaml_section_unknown_raises(tmp_path):
|
||||||
|
f = tmp_path / ".hb.yaml"
|
||||||
|
f.write_text(SAMPLE_YAML)
|
||||||
|
data = configio.read_roundtrip(str(f))
|
||||||
|
with pytest.raises(ValueError, match="Unknown YAML section"):
|
||||||
|
configio.apply_yaml_section(data, "nope", "x: 1\n")
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_structured_section_unknown_raises(tmp_path):
|
||||||
|
f = tmp_path / ".hb.yaml"
|
||||||
|
f.write_text(SAMPLE_YAML)
|
||||||
|
data = configio.read_roundtrip(str(f))
|
||||||
|
with pytest.raises(ValueError, match="Unknown structured section"):
|
||||||
|
configio.apply_structured_section(data, "nope", {"x": 1})
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_roundtrip_missing_file_raises(tmp_path):
|
||||||
|
with pytest.raises(FileNotFoundError):
|
||||||
|
configio.read_roundtrip(str(tmp_path / "nonexistent.yaml"))
|
||||||
@@ -20,7 +20,7 @@ def test_handle_cmd_sends_command():
|
|||||||
import hbdclass
|
import hbdclass
|
||||||
|
|
||||||
ctx = {
|
ctx = {
|
||||||
"config": {"watchhosts": [], "dyndnshosts": []},
|
"config": {"watchhosts": []},
|
||||||
"hbdclass": hbdclass,
|
"hbdclass": hbdclass,
|
||||||
"log": dummy_noop,
|
"log": dummy_noop,
|
||||||
"email": dummy_noop,
|
"email": dummy_noop,
|
||||||
|
|||||||
@@ -0,0 +1,173 @@
|
|||||||
|
"""Tests for the config read/write API helpers in http.py."""
|
||||||
|
import pytest
|
||||||
|
from hbd.server import http
|
||||||
|
|
||||||
|
|
||||||
|
def test_mask_config_for_api_masks_user_passwords():
|
||||||
|
config = {
|
||||||
|
"hbd_port": 50004,
|
||||||
|
"interval": 20,
|
||||||
|
"users": {
|
||||||
|
"alice": {"full_name": "Alice", "admin": True, "password": "pbkdf2:sha256:abc"},
|
||||||
|
},
|
||||||
|
"oauth": {},
|
||||||
|
}
|
||||||
|
result = http._mask_config_for_api(config)
|
||||||
|
assert result["users"]["alice"]["password"] == "•••"
|
||||||
|
assert result["users"]["alice"]["full_name"] == "Alice"
|
||||||
|
|
||||||
|
|
||||||
|
def test_mask_config_for_api_masks_oauth_client_secret():
|
||||||
|
config = {
|
||||||
|
"hbd_port": 50004,
|
||||||
|
"interval": 20,
|
||||||
|
"users": {},
|
||||||
|
"oauth": {
|
||||||
|
"gitea": {"type": "gitea", "url": "https://git.example.com",
|
||||||
|
"client_id": "cid", "client_secret": "verysecret"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
result = http._mask_config_for_api(config)
|
||||||
|
assert result["oauth"]["gitea"]["client_secret"] == "•••"
|
||||||
|
assert result["oauth"]["gitea"]["client_id"] == "cid"
|
||||||
|
|
||||||
|
|
||||||
|
def test_mask_config_for_api_includes_server_keys():
|
||||||
|
config = {"hbd_port": 50004, "interval": 20, "users": {}, "oauth": {}}
|
||||||
|
result = http._mask_config_for_api(config)
|
||||||
|
assert result["server"]["hbd_port"] == 50004
|
||||||
|
assert result["server"]["interval"] == 20
|
||||||
|
|
||||||
|
|
||||||
|
def test_mask_config_for_api_no_password_in_users_leaves_no_key():
|
||||||
|
config = {
|
||||||
|
"hbd_port": 50004,
|
||||||
|
"users": {"bob": {"full_name": "Bob", "admin": False}},
|
||||||
|
"oauth": {},
|
||||||
|
}
|
||||||
|
result = http._mask_config_for_api(config)
|
||||||
|
assert "password" not in result["users"]["bob"]
|
||||||
|
|
||||||
|
|
||||||
|
# ---- configio integration for write path ----
|
||||||
|
|
||||||
|
def test_write_path_applies_server_section(tmp_path):
|
||||||
|
cfg = tmp_path / ".hb.yaml"
|
||||||
|
cfg.write_text("hbd_port: 50004\ninterval: 20\nusers: {}\n")
|
||||||
|
from hbd.server import configio
|
||||||
|
data = configio.read_roundtrip(str(cfg))
|
||||||
|
configio.apply_structured_section(data, "server", {"interval": 60})
|
||||||
|
configio.write_config(str(cfg), data)
|
||||||
|
data2 = configio.read_roundtrip(str(cfg))
|
||||||
|
assert data2["interval"] == 60
|
||||||
|
assert data2["hbd_port"] == 50004 # unchanged
|
||||||
|
|
||||||
|
|
||||||
|
def test_write_path_applies_yaml_section(tmp_path):
|
||||||
|
cfg = tmp_path / ".hb.yaml"
|
||||||
|
cfg.write_text(
|
||||||
|
"hbd_port: 50004\nnotification_channels:\n old_ch:\n type: email\n"
|
||||||
|
)
|
||||||
|
from hbd.server import configio
|
||||||
|
data = configio.read_roundtrip(str(cfg))
|
||||||
|
configio.apply_yaml_section(data, "notification_channels", "new_ch:\n type: pushover\n")
|
||||||
|
configio.write_config(str(cfg), data)
|
||||||
|
data2 = configio.read_roundtrip(str(cfg))
|
||||||
|
assert "new_ch" in data2["notification_channels"]
|
||||||
|
assert "old_ch" not in data2["notification_channels"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_write_path_hashes_plaintext_password(tmp_path):
|
||||||
|
cfg = tmp_path / ".hb.yaml"
|
||||||
|
cfg.write_text("hbd_port: 50004\nusers:\n alice:\n full_name: Alice\n admin: true\n password: pbkdf2:sha256:old\n")
|
||||||
|
from hbd.server import configio
|
||||||
|
from hbd.server import users as users_mod
|
||||||
|
data = configio.read_roundtrip(str(cfg))
|
||||||
|
# Simulate what the POST handler does: hash plaintext password
|
||||||
|
new_users = {"alice": {"full_name": "Alice", "admin": True, "password": "newplaintext"}}
|
||||||
|
for username, attrs in new_users.items():
|
||||||
|
pw = attrs.get("password", "")
|
||||||
|
if pw and not pw.startswith("pbkdf2:"):
|
||||||
|
attrs["password"] = users_mod.hash_password(pw)
|
||||||
|
configio.apply_structured_section(data, "users", new_users)
|
||||||
|
configio.write_config(str(cfg), data)
|
||||||
|
data2 = configio.read_roundtrip(str(cfg))
|
||||||
|
assert data2["users"]["alice"]["password"].startswith("pbkdf2:")
|
||||||
|
assert data2["users"]["alice"]["password"] != "newplaintext"
|
||||||
|
|
||||||
|
|
||||||
|
def test_rollback_restores_backup(tmp_path):
|
||||||
|
cfg = tmp_path / ".hb.yaml"
|
||||||
|
cfg.write_text("hbd_port: 50004\ninterval: 20\n")
|
||||||
|
from hbd.server import configio
|
||||||
|
# Make a change to create a backup
|
||||||
|
data = configio.read_roundtrip(str(cfg))
|
||||||
|
data["interval"] = 99
|
||||||
|
configio.write_config(str(cfg), data)
|
||||||
|
backups = configio.list_backups(str(cfg))
|
||||||
|
assert len(backups) == 1
|
||||||
|
# Read the backup and write it back (simulating rollback)
|
||||||
|
backup_data = configio.read_roundtrip(backups[0])
|
||||||
|
configio.write_config(str(cfg), backup_data)
|
||||||
|
restored = configio.read_roundtrip(str(cfg))
|
||||||
|
assert restored["interval"] == 20
|
||||||
|
|
||||||
|
|
||||||
|
def test_write_path_preserves_masked_password(tmp_path):
|
||||||
|
"""The "•••" sentinel must preserve the existing hash, not write "•••" to disk."""
|
||||||
|
cfg = tmp_path / ".hb.yaml"
|
||||||
|
original_hash = "pbkdf2:sha256:original_hash"
|
||||||
|
cfg.write_text(
|
||||||
|
f"hbd_port: 50004\nusers:\n alice:\n full_name: Alice\n admin: true\n password: {original_hash}\n"
|
||||||
|
)
|
||||||
|
from hbd.server import configio
|
||||||
|
from hbd.server import users as users_mod
|
||||||
|
data = configio.read_roundtrip(str(cfg))
|
||||||
|
# Simulate what api_config_post does when client sends "•••" back
|
||||||
|
existing_users = data.get("users") or {}
|
||||||
|
users_payload = {"alice": {"full_name": "Alice", "admin": True, "password": "•••"}}
|
||||||
|
for username, attrs in users_payload.items():
|
||||||
|
pw = attrs.get("password", "")
|
||||||
|
if pw and pw != "•••" and not pw.startswith("pbkdf2:"):
|
||||||
|
attrs["password"] = users_mod.hash_password(pw)
|
||||||
|
elif not pw or pw == "•••":
|
||||||
|
existing_hash = (existing_users.get(username) or {}).get("password", "")
|
||||||
|
if existing_hash:
|
||||||
|
attrs["password"] = existing_hash
|
||||||
|
else:
|
||||||
|
attrs.pop("password", None)
|
||||||
|
configio.apply_structured_section(data, "users", users_payload)
|
||||||
|
configio.write_config(str(cfg), data)
|
||||||
|
data2 = configio.read_roundtrip(str(cfg))
|
||||||
|
assert data2["users"]["alice"]["password"] == original_hash, (
|
||||||
|
f"Expected original hash preserved, got: {data2['users']['alice']['password']!r}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_write_path_preserves_oauth_client_secret(tmp_path):
|
||||||
|
"""The "•••" sentinel for oauth client_secret must preserve the existing secret."""
|
||||||
|
cfg = tmp_path / ".hb.yaml"
|
||||||
|
original_secret = "real_client_secret_value"
|
||||||
|
cfg.write_text(
|
||||||
|
f"hbd_port: 50004\noauth:\n gitea:\n type: gitea\n url: https://git.example.com\n"
|
||||||
|
f" client_id: cid123\n client_secret: {original_secret}\n"
|
||||||
|
)
|
||||||
|
from hbd.server import configio
|
||||||
|
data = configio.read_roundtrip(str(cfg))
|
||||||
|
# Simulate what api_config_post does when client sends "•••" back for client_secret
|
||||||
|
existing_oauth = data.get("oauth") or {}
|
||||||
|
new_oauth = {"gitea": {"type": "gitea", "url": "https://git.example.com", "client_id": "cid123", "client_secret": "•••"}}
|
||||||
|
for name, attrs in new_oauth.items():
|
||||||
|
cs = attrs.get("client_secret", "")
|
||||||
|
if not cs or cs == "•••":
|
||||||
|
existing_cs = (existing_oauth.get(name) or {}).get("client_secret", "")
|
||||||
|
if existing_cs:
|
||||||
|
attrs["client_secret"] = existing_cs
|
||||||
|
else:
|
||||||
|
attrs.pop("client_secret", None)
|
||||||
|
data["oauth"] = new_oauth
|
||||||
|
configio.write_config(str(cfg), data)
|
||||||
|
data2 = configio.read_roundtrip(str(cfg))
|
||||||
|
assert data2["oauth"]["gitea"]["client_secret"] == original_secret, (
|
||||||
|
f"Expected original secret preserved, got: {data2['oauth']['gitea']['client_secret']!r}"
|
||||||
|
)
|
||||||
@@ -0,0 +1,174 @@
|
|||||||
|
"""Tests for _build_host_info helper in http.py."""
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
from hbd.server.http import _build_host_info
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeConn:
|
||||||
|
def __init__(self, lastbeat):
|
||||||
|
self.lastbeat = lastbeat
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeHost:
|
||||||
|
def __init__(self, name="myhost", owner=None, managers=None,
|
||||||
|
connections=None, os_data=None, plugin_data=None):
|
||||||
|
self.name = name
|
||||||
|
self.owner = owner
|
||||||
|
self.managers = managers or []
|
||||||
|
self.connections = connections or {}
|
||||||
|
self._os_data = os_data
|
||||||
|
self.plugin_data = plugin_data or {}
|
||||||
|
|
||||||
|
def get_latest_plugin_data(self, plugin_name):
|
||||||
|
if plugin_name == "os_info" and self._os_data is not None:
|
||||||
|
return (1234567890.0, self._os_data)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_host_info_basic_fields():
|
||||||
|
host = _FakeHost(owner="alice", managers=["bob", "carol"])
|
||||||
|
result = _build_host_info(host)
|
||||||
|
assert result["owner"] == "alice"
|
||||||
|
assert result["managers"] == ["bob", "carol"]
|
||||||
|
assert result["hbc_version"] is None
|
||||||
|
assert result["hbc_type"] is None
|
||||||
|
assert result["last_packet"] is None
|
||||||
|
assert result["thresholds"] is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_host_info_no_owner():
|
||||||
|
host = _FakeHost()
|
||||||
|
result = _build_host_info(host)
|
||||||
|
assert result["owner"] is None
|
||||||
|
assert result["managers"] == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_host_info_reads_hbc_from_os_info():
|
||||||
|
host = _FakeHost(os_data={"hbc_version": "5.3.0", "hbc_type": "full"})
|
||||||
|
result = _build_host_info(host)
|
||||||
|
assert result["hbc_version"] == "5.3.0"
|
||||||
|
assert result["hbc_type"] == "full"
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_host_info_hbc_none_when_no_os_info():
|
||||||
|
host = _FakeHost(os_data=None)
|
||||||
|
result = _build_host_info(host)
|
||||||
|
assert result["hbc_version"] is None
|
||||||
|
assert result["hbc_type"] is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_host_info_last_packet_is_max_lastbeat():
|
||||||
|
host = _FakeHost(connections={
|
||||||
|
"IPv4": _FakeConn(1000.0),
|
||||||
|
"IPv6": _FakeConn(2000.0),
|
||||||
|
})
|
||||||
|
result = _build_host_info(host)
|
||||||
|
assert result["last_packet"] == 2000.0
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_host_info_last_packet_none_when_no_connections():
|
||||||
|
host = _FakeHost(connections={})
|
||||||
|
result = _build_host_info(host)
|
||||||
|
assert result["last_packet"] is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_host_info_thresholds_none_without_checker():
|
||||||
|
host = _FakeHost()
|
||||||
|
result = _build_host_info(host, threshold_checker=None)
|
||||||
|
assert result["thresholds"] is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_host_info_thresholds_sorted_by_metric():
|
||||||
|
from hbd.server.threshold import ThresholdConfig
|
||||||
|
tc_cpu = ThresholdConfig("cpu_monitor.cpu_percent", warning=80.0, critical=95.0)
|
||||||
|
tc_mem = ThresholdConfig("memory_monitor.memory_percent", warning=85.0, critical=98.0)
|
||||||
|
|
||||||
|
checker = MagicMock()
|
||||||
|
checker.get_thresholds_for_host.return_value = {
|
||||||
|
"memory_monitor.memory_percent": tc_mem,
|
||||||
|
"cpu_monitor.cpu_percent": tc_cpu,
|
||||||
|
}
|
||||||
|
|
||||||
|
host = _FakeHost()
|
||||||
|
result = _build_host_info(host, threshold_checker=checker)
|
||||||
|
|
||||||
|
assert result["thresholds"] is not None
|
||||||
|
assert len(result["thresholds"]) == 2
|
||||||
|
assert result["thresholds"][0]["metric"] == "cpu_monitor.cpu_percent"
|
||||||
|
assert result["thresholds"][0]["warning"] == 80.0
|
||||||
|
assert result["thresholds"][0]["critical"] == 95.0
|
||||||
|
assert result["thresholds"][0]["operator"] == ">"
|
||||||
|
assert result["thresholds"][1]["metric"] == "memory_monitor.memory_percent"
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_host_info_thresholds_empty_list_when_no_thresholds():
|
||||||
|
checker = MagicMock()
|
||||||
|
checker.get_thresholds_for_host.return_value = {}
|
||||||
|
host = _FakeHost()
|
||||||
|
result = _build_host_info(host, threshold_checker=checker)
|
||||||
|
assert result["thresholds"] == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_host_info_threshold_null_warning_critical():
|
||||||
|
from hbd.server.threshold import ThresholdConfig
|
||||||
|
tc = ThresholdConfig("rtt.myhost", warning=None, critical=500.0)
|
||||||
|
checker = MagicMock()
|
||||||
|
checker.get_thresholds_for_host.return_value = {"rtt.myhost": tc}
|
||||||
|
host = _FakeHost()
|
||||||
|
result = _build_host_info(host, threshold_checker=checker)
|
||||||
|
assert result["thresholds"][0]["warning"] is None
|
||||||
|
assert result["thresholds"][0]["critical"] == 500.0
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_host_info_nagios_operator_serialized():
|
||||||
|
from hbd.server.threshold import ThresholdConfig
|
||||||
|
tc = ThresholdConfig("nagios_runner.check_http", operator="nagios")
|
||||||
|
checker = MagicMock()
|
||||||
|
checker.get_thresholds_for_host.return_value = {"nagios_runner.check_http": tc}
|
||||||
|
host = _FakeHost()
|
||||||
|
result = _build_host_info(host, threshold_checker=checker)
|
||||||
|
assert result["thresholds"][0]["operator"] == "nagios"
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_host_info_covers_suffix_matched_metrics():
|
||||||
|
"""memory_monitor.percent threshold covers swap_percent via suffix match."""
|
||||||
|
from hbd.server.threshold import ThresholdConfig
|
||||||
|
tc_pct = ThresholdConfig("memory_monitor.percent", warning=85.0, critical=95.0)
|
||||||
|
checker = MagicMock()
|
||||||
|
checker.get_thresholds_for_host.return_value = {"memory_monitor.percent": tc_pct}
|
||||||
|
|
||||||
|
host = _FakeHost(
|
||||||
|
connections={},
|
||||||
|
os_data=None,
|
||||||
|
)
|
||||||
|
# Simulate plugin_data with both percent and swap_percent fields
|
||||||
|
host.plugin_data = {
|
||||||
|
"memory_monitor": [(1234567890.0, {
|
||||||
|
"percent": 80.0,
|
||||||
|
"swap_percent": 25.0,
|
||||||
|
"available_mb": 2000,
|
||||||
|
})]
|
||||||
|
}
|
||||||
|
|
||||||
|
result = _build_host_info(host, threshold_checker=checker)
|
||||||
|
assert result["thresholds"] is not None
|
||||||
|
t = result["thresholds"][0]
|
||||||
|
assert t["metric"] == "memory_monitor.percent"
|
||||||
|
assert t["covers"] == ["memory_monitor.swap_percent"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_host_info_covers_empty_when_exact_matches_only():
|
||||||
|
"""No covers when all plugin fields match their threshold exactly."""
|
||||||
|
from hbd.server.threshold import ThresholdConfig
|
||||||
|
tc_pct = ThresholdConfig("memory_monitor.percent", warning=85.0, critical=95.0)
|
||||||
|
checker = MagicMock()
|
||||||
|
checker.get_thresholds_for_host.return_value = {"memory_monitor.percent": tc_pct}
|
||||||
|
|
||||||
|
host = _FakeHost()
|
||||||
|
host.plugin_data = {
|
||||||
|
"memory_monitor": [(1234567890.0, {"percent": 80.0})]
|
||||||
|
}
|
||||||
|
|
||||||
|
result = _build_host_info(host, threshold_checker=checker)
|
||||||
|
t = result["thresholds"][0]
|
||||||
|
assert t["covers"] == []
|
||||||
@@ -0,0 +1,123 @@
|
|||||||
|
"""Tests for PUT /api/0/users/me logic."""
|
||||||
|
import pytest
|
||||||
|
from hbd.server import users as users_mod
|
||||||
|
|
||||||
|
|
||||||
|
def test_hash_password_roundtrip():
|
||||||
|
h = users_mod.hash_password("mysecret")
|
||||||
|
assert h.startswith("pbkdf2:sha256:")
|
||||||
|
assert users_mod.authenticate.__doc__ is not None # module loaded
|
||||||
|
|
||||||
|
|
||||||
|
def test_password_change_requires_correct_current(tmp_path):
|
||||||
|
cfg = tmp_path / ".hb.yaml"
|
||||||
|
initial_hash = users_mod.hash_password("oldpass")
|
||||||
|
cfg.write_text(
|
||||||
|
f"hbd_port: 50004\nusers:\n alice:\n full_name: Alice\n admin: true\n password: {initial_hash}\n"
|
||||||
|
)
|
||||||
|
users_mod.load_users({"users": {"alice": {"full_name": "Alice", "admin": True, "password": initial_hash}}})
|
||||||
|
|
||||||
|
# Correct current password authenticates
|
||||||
|
assert users_mod.authenticate("alice", "oldpass") is not None
|
||||||
|
# Wrong current password does not authenticate
|
||||||
|
assert users_mod.authenticate("alice", "wrongpass") is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_put_users_me_writes_new_fields(tmp_path):
|
||||||
|
"""Simulate the write path: read config, update user, write back."""
|
||||||
|
initial_hash = users_mod.hash_password("secret")
|
||||||
|
yaml_content = (
|
||||||
|
"hbd_port: 50004\n"
|
||||||
|
f"users:\n alice:\n full_name: Old Name\n admin: true\n password: {initial_hash}\n"
|
||||||
|
)
|
||||||
|
cfg = tmp_path / ".hb.yaml"
|
||||||
|
cfg.write_text(yaml_content)
|
||||||
|
|
||||||
|
from hbd.server import configio
|
||||||
|
data = configio.read_roundtrip(str(cfg))
|
||||||
|
|
||||||
|
# Simulate handler updating full_name and avatar
|
||||||
|
user_entry = dict(data["users"]["alice"])
|
||||||
|
user_entry["full_name"] = "New Name"
|
||||||
|
user_entry["avatar"] = "/img/alice.png"
|
||||||
|
data["users"]["alice"] = user_entry
|
||||||
|
|
||||||
|
configio.write_config(str(cfg), data)
|
||||||
|
result = configio.read_roundtrip(str(cfg))
|
||||||
|
assert result["users"]["alice"]["full_name"] == "New Name"
|
||||||
|
assert result["users"]["alice"]["avatar"] == "/img/alice.png"
|
||||||
|
assert result["users"]["alice"]["password"] == initial_hash # unchanged
|
||||||
|
|
||||||
|
|
||||||
|
def test_put_users_me_changes_password(tmp_path):
|
||||||
|
initial_hash = users_mod.hash_password("oldpass")
|
||||||
|
cfg = tmp_path / ".hb.yaml"
|
||||||
|
cfg.write_text(
|
||||||
|
f"hbd_port: 50004\nusers:\n alice:\n full_name: Alice\n password: {initial_hash}\n"
|
||||||
|
)
|
||||||
|
from hbd.server import configio
|
||||||
|
data = configio.read_roundtrip(str(cfg))
|
||||||
|
|
||||||
|
new_hash = users_mod.hash_password("newpass")
|
||||||
|
data["users"]["alice"]["password"] = new_hash
|
||||||
|
configio.write_config(str(cfg), data)
|
||||||
|
|
||||||
|
result = configio.read_roundtrip(str(cfg))
|
||||||
|
# Load users from new config and authenticate with new password
|
||||||
|
new_config = {"users": dict(result["users"])}
|
||||||
|
users_mod.load_users(new_config)
|
||||||
|
assert users_mod.authenticate("alice", "newpass") is not None
|
||||||
|
assert users_mod.authenticate("alice", "oldpass") is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_put_users_me_notification_channels(tmp_path):
|
||||||
|
cfg = tmp_path / ".hb.yaml"
|
||||||
|
cfg.write_text(
|
||||||
|
"hbd_port: 50004\n"
|
||||||
|
"notification_channels:\n pushover_ops:\n type: pushover\n"
|
||||||
|
"users:\n alice:\n full_name: Alice\n notification_channels: []\n"
|
||||||
|
)
|
||||||
|
from hbd.server import configio
|
||||||
|
data = configio.read_roundtrip(str(cfg))
|
||||||
|
data["users"]["alice"]["notification_channels"] = ["pushover_ops"]
|
||||||
|
configio.write_config(str(cfg), data)
|
||||||
|
result = configio.read_roundtrip(str(cfg))
|
||||||
|
assert result["users"]["alice"]["notification_channels"] == ["pushover_ops"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_visible_channels_excludes_private_from_others():
|
||||||
|
"""Private channels owned by another user must not appear in the visible set."""
|
||||||
|
from hbd.server import settings as settings_mod
|
||||||
|
|
||||||
|
config = {
|
||||||
|
"notification_channels": {
|
||||||
|
"public_ch": {"type": "pushover", "token": "t", "user": "u"},
|
||||||
|
"alice_priv": {"type": "email", "owner": "alice", "private": True,
|
||||||
|
"recipients": ["a@b.com"], "sender": "s@b.com", "smtp_server": "s"},
|
||||||
|
"bob_priv": {"type": "email", "owner": "bob", "private": True,
|
||||||
|
"recipients": ["b@b.com"], "sender": "s@b.com", "smtp_server": "s"},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class FakeUser:
|
||||||
|
def __init__(self, username, admin=False):
|
||||||
|
self.username = username
|
||||||
|
self.admin = admin
|
||||||
|
|
||||||
|
alice = FakeUser("alice")
|
||||||
|
bob = FakeUser("bob")
|
||||||
|
admin = FakeUser("admin", admin=True)
|
||||||
|
|
||||||
|
# Simulate _visible_channels_for_user logic (mirrors http.py implementation)
|
||||||
|
def visible(user):
|
||||||
|
all_channels = config.get("notification_channels") or {}
|
||||||
|
if user.admin:
|
||||||
|
return set(all_channels.keys())
|
||||||
|
return {
|
||||||
|
name for name, cfg in all_channels.items()
|
||||||
|
if not cfg.get("private") or cfg.get("owner") == user.username
|
||||||
|
}
|
||||||
|
|
||||||
|
assert visible(alice) == {"public_ch", "alice_priv"}
|
||||||
|
assert visible(bob) == {"public_ch", "bob_priv"}
|
||||||
|
assert visible(admin) == {"public_ch", "alice_priv", "bob_priv"}
|
||||||
@@ -0,0 +1,178 @@
|
|||||||
|
"""Tests for notification channel CRUD via configio helpers and visibility logic."""
|
||||||
|
import pytest
|
||||||
|
from hbd.server import configio, settings as settings_mod
|
||||||
|
|
||||||
|
|
||||||
|
SAMPLE_YAML = """\
|
||||||
|
hbd_port: 50004
|
||||||
|
notification_channels:
|
||||||
|
pushover_ops:
|
||||||
|
type: pushover
|
||||||
|
token: abc123
|
||||||
|
user: usr456
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# configio helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_apply_channel_adds_new_entry(tmp_path):
|
||||||
|
f = tmp_path / ".hb.yaml"
|
||||||
|
f.write_text(SAMPLE_YAML)
|
||||||
|
data = configio.read_roundtrip(str(f))
|
||||||
|
configio.apply_channel(data, "email_ops", {"type": "email", "recipients": ["ops@example.com"]})
|
||||||
|
assert "email_ops" in data["notification_channels"]
|
||||||
|
assert data["notification_channels"]["email_ops"]["type"] == "email"
|
||||||
|
# Existing channel preserved
|
||||||
|
assert "pushover_ops" in data["notification_channels"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_channel_updates_existing(tmp_path):
|
||||||
|
f = tmp_path / ".hb.yaml"
|
||||||
|
f.write_text(SAMPLE_YAML)
|
||||||
|
data = configio.read_roundtrip(str(f))
|
||||||
|
configio.apply_channel(data, "pushover_ops", {"type": "pushover", "token": "new_tok", "user": "new_usr"})
|
||||||
|
assert data["notification_channels"]["pushover_ops"]["token"] == "new_tok"
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_channel_creates_section_if_absent():
|
||||||
|
data = {"hbd_port": 50004}
|
||||||
|
configio.apply_channel(data, "test_ch", {"type": "pushover", "token": "t", "user": "u"})
|
||||||
|
assert "notification_channels" in data
|
||||||
|
assert "test_ch" in data["notification_channels"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_delete_channel_removes_entry(tmp_path):
|
||||||
|
f = tmp_path / ".hb.yaml"
|
||||||
|
f.write_text(SAMPLE_YAML)
|
||||||
|
data = configio.read_roundtrip(str(f))
|
||||||
|
configio.delete_channel(data, "pushover_ops")
|
||||||
|
assert "pushover_ops" not in data["notification_channels"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_delete_channel_noop_for_missing():
|
||||||
|
data = {"notification_channels": {"ch1": {"type": "pushover"}}}
|
||||||
|
configio.delete_channel(data, "nonexistent") # must not raise
|
||||||
|
assert "ch1" in data["notification_channels"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_delete_channel_noop_when_no_section():
|
||||||
|
data = {}
|
||||||
|
configio.delete_channel(data, "anything") # must not raise
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_channel_persisted_after_write(tmp_path):
|
||||||
|
f = tmp_path / ".hb.yaml"
|
||||||
|
f.write_text(SAMPLE_YAML)
|
||||||
|
data = configio.read_roundtrip(str(f))
|
||||||
|
configio.apply_channel(data, "signal_ops", {"type": "signal", "user": "+1", "recipient": "+2"})
|
||||||
|
configio.write_config(str(f), data)
|
||||||
|
result = configio.read_roundtrip(str(f))
|
||||||
|
assert "signal_ops" in result["notification_channels"]
|
||||||
|
assert result["notification_channels"]["signal_ops"]["user"] == "+1"
|
||||||
|
# Original channel preserved
|
||||||
|
assert "pushover_ops" in result["notification_channels"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_delete_channel_persisted_after_write(tmp_path):
|
||||||
|
f = tmp_path / ".hb.yaml"
|
||||||
|
f.write_text(SAMPLE_YAML)
|
||||||
|
data = configio.read_roundtrip(str(f))
|
||||||
|
configio.delete_channel(data, "pushover_ops")
|
||||||
|
configio.write_config(str(f), data)
|
||||||
|
result = configio.read_roundtrip(str(f))
|
||||||
|
assert "pushover_ops" not in (result.get("notification_channels") or {})
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Visibility logic (mirrors http.py _visible_channels_for_user)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _visible(config, user):
|
||||||
|
"""Local copy of the visibility helper for unit testing without the HTTP layer."""
|
||||||
|
all_channels = config.get("notification_channels") or {}
|
||||||
|
if user.get("admin"):
|
||||||
|
return set(all_channels.keys())
|
||||||
|
username = user["username"]
|
||||||
|
return {
|
||||||
|
name for name, cfg in all_channels.items()
|
||||||
|
if isinstance(cfg, dict) and (not cfg.get("private") or cfg.get("owner") == username)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
CONFIG_VISIBILITY = {
|
||||||
|
"notification_channels": {
|
||||||
|
"pub_ch": {"type": "pushover", "token": "t", "user": "u"},
|
||||||
|
"alice_priv": {"type": "email", "owner": "alice", "private": True,
|
||||||
|
"recipients": ["a@a.com"], "sender": "s@a.com", "smtp_server": "s"},
|
||||||
|
"bob_priv": {"type": "signal", "owner": "bob", "private": True,
|
||||||
|
"user": "+1", "recipient": "+2"},
|
||||||
|
"admin_owned": {"type": "pushover", "token": "t2", "user": "u2", "owner": "adminuser"},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_public_channel_visible_to_all():
|
||||||
|
for uname in ("alice", "bob", "carol"):
|
||||||
|
user = {"username": uname, "admin": False}
|
||||||
|
assert "pub_ch" in _visible(CONFIG_VISIBILITY, user)
|
||||||
|
|
||||||
|
|
||||||
|
def test_private_channel_visible_only_to_owner():
|
||||||
|
alice = {"username": "alice", "admin": False}
|
||||||
|
bob = {"username": "bob", "admin": False}
|
||||||
|
carol = {"username": "carol", "admin": False}
|
||||||
|
|
||||||
|
assert "alice_priv" in _visible(CONFIG_VISIBILITY, alice)
|
||||||
|
assert "alice_priv" not in _visible(CONFIG_VISIBILITY, bob)
|
||||||
|
assert "alice_priv" not in _visible(CONFIG_VISIBILITY, carol)
|
||||||
|
|
||||||
|
assert "bob_priv" in _visible(CONFIG_VISIBILITY, bob)
|
||||||
|
assert "bob_priv" not in _visible(CONFIG_VISIBILITY, alice)
|
||||||
|
|
||||||
|
|
||||||
|
def test_admin_sees_all_channels():
|
||||||
|
admin = {"username": "adminuser", "admin": True}
|
||||||
|
visible = _visible(CONFIG_VISIBILITY, admin)
|
||||||
|
assert visible == {"pub_ch", "alice_priv", "bob_priv", "admin_owned"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_admin_owned_channel_is_public_by_default():
|
||||||
|
alice = {"username": "alice", "admin": False}
|
||||||
|
assert "admin_owned" in _visible(CONFIG_VISIBILITY, alice)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Channel type schemas
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_all_required_types_in_schema():
|
||||||
|
for t in ("pushover", "email", "signal", "matrix", "sms_voipms"):
|
||||||
|
assert t in settings_mod.CHANNEL_TYPE_SCHEMAS
|
||||||
|
|
||||||
|
|
||||||
|
def test_schema_fields_have_required_keys():
|
||||||
|
for type_id, schema in settings_mod.CHANNEL_TYPE_SCHEMAS.items():
|
||||||
|
assert "label" in schema, f"{type_id} missing label"
|
||||||
|
assert "fields" in schema, f"{type_id} missing fields"
|
||||||
|
for f in schema["fields"]:
|
||||||
|
for k in ("key", "label", "type", "required"):
|
||||||
|
assert k in f, f"{type_id} field missing {k!r}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_secret_fields_use_secret_type():
|
||||||
|
"""Known secret fields must be typed 'secret' so the UI masks them."""
|
||||||
|
secret_keys = {"token", "user_key", "api_key", "api_password",
|
||||||
|
"smtp_password", "access_token"}
|
||||||
|
for type_id, schema in settings_mod.CHANNEL_TYPE_SCHEMAS.items():
|
||||||
|
for f in schema["fields"]:
|
||||||
|
if f["key"] in secret_keys:
|
||||||
|
assert f["type"] == "secret", (
|
||||||
|
f"{type_id}.{f['key']} should be type 'secret'"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_channel_labels_not_empty():
|
||||||
|
for type_id, schema in settings_mod.CHANNEL_TYPE_SCHEMAS.items():
|
||||||
|
assert schema["label"].strip(), f"{type_id} has empty label"
|
||||||
@@ -0,0 +1,602 @@
|
|||||||
|
import logging
|
||||||
|
import time as time_mod
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
from urllib.parse import urlparse, parse_qs
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from hbd.server import oauth
|
||||||
|
from hbd.server import users as users_mod
|
||||||
|
from hbd.server.users import User
|
||||||
|
|
||||||
|
|
||||||
|
CFG_OFF = {}
|
||||||
|
CFG_ON = {
|
||||||
|
"oauth": {
|
||||||
|
"gitea": {
|
||||||
|
"url": "https://git.example.com",
|
||||||
|
"client_id": "cid",
|
||||||
|
"client_secret": "csec",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
CFG_PARTIAL = {"oauth": {"gitea": {"url": "https://git.example.com"}}}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def clear_oauth_states():
|
||||||
|
oauth._states.clear()
|
||||||
|
yield
|
||||||
|
oauth._states.clear()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def reset_users_dict():
|
||||||
|
original = dict(users_mod.users)
|
||||||
|
yield
|
||||||
|
users_mod.users = original
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def test_make_state_returns_unique_tokens():
|
||||||
|
s1 = oauth.make_state()
|
||||||
|
s2 = oauth.make_state()
|
||||||
|
assert s1 != s2
|
||||||
|
assert len(s1) == 64 # 32 bytes hex
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_state_valid():
|
||||||
|
state = oauth.make_state()
|
||||||
|
assert oauth.validate_state(state) is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_state_consumed_on_use():
|
||||||
|
state = oauth.make_state()
|
||||||
|
oauth.validate_state(state)
|
||||||
|
assert oauth.validate_state(state) is False # replay rejected
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_state_unknown():
|
||||||
|
assert oauth.validate_state("notastate") is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_state_expired(monkeypatch):
|
||||||
|
state = oauth.make_state()
|
||||||
|
# Wind expiry into the past
|
||||||
|
monkeypatch.setitem(oauth._states, state, time_mod.time() - 1000)
|
||||||
|
assert oauth.validate_state(state) is False
|
||||||
|
|
||||||
|
|
||||||
|
def _reset_users(entries=None):
|
||||||
|
users_mod.users = entries or {}
|
||||||
|
|
||||||
|
|
||||||
|
def test_provision_oauth_user_new():
|
||||||
|
_reset_users()
|
||||||
|
user = users_mod.provision_oauth_user("gituser", "Git User", "https://example.com/avatar.png")
|
||||||
|
assert user.username == "gituser"
|
||||||
|
assert user.full_name == "Git User"
|
||||||
|
assert user.avatar == "https://example.com/avatar.png"
|
||||||
|
assert user.admin is False
|
||||||
|
assert user.password_hash == ""
|
||||||
|
assert "gituser" in users_mod.users
|
||||||
|
|
||||||
|
|
||||||
|
def test_provision_oauth_user_no_password_login():
|
||||||
|
_reset_users()
|
||||||
|
user = users_mod.provision_oauth_user("gituser", "Git User", "")
|
||||||
|
assert user.check_password("anything") is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_provision_oauth_user_existing_updates_profile():
|
||||||
|
existing = User(
|
||||||
|
username="alice",
|
||||||
|
full_name="Old Name",
|
||||||
|
avatar="old.png",
|
||||||
|
password_hash="pbkdf2:sha256:1:salt:abc",
|
||||||
|
admin=True,
|
||||||
|
notification_channels=["chan1"],
|
||||||
|
)
|
||||||
|
_reset_users({"alice": existing})
|
||||||
|
user = users_mod.provision_oauth_user("alice", "New Name", "new.png")
|
||||||
|
assert user.full_name == "New Name"
|
||||||
|
assert user.avatar == "new.png"
|
||||||
|
# Preserved
|
||||||
|
assert user.admin is True
|
||||||
|
assert user.password_hash == "pbkdf2:sha256:1:salt:abc"
|
||||||
|
assert user.notification_channels == ["chan1"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_provision_oauth_user_does_not_overwrite_with_empty():
|
||||||
|
existing = User(username="bob", full_name="Bob", avatar="bob.png")
|
||||||
|
_reset_users({"bob": existing})
|
||||||
|
user = users_mod.provision_oauth_user("bob", "", "")
|
||||||
|
assert user.full_name == "Bob"
|
||||||
|
assert user.avatar == "bob.png"
|
||||||
|
|
||||||
|
|
||||||
|
def test_provision_oauth_user_survives_config_reload():
|
||||||
|
_reset_users()
|
||||||
|
users_mod.provision_oauth_user("oauthonly", "OAuth Only", "https://example.com/a.png")
|
||||||
|
assert "oauthonly" in users_mod.users
|
||||||
|
# Reload with empty config — OAuth user should survive
|
||||||
|
users_mod.load_users({})
|
||||||
|
assert "oauthonly" in users_mod.users
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Integration-style tests: callback logic chain
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_callback_invalid_state_rejects():
|
||||||
|
"""Verify validate_state returns False for unknown state tokens."""
|
||||||
|
fake_state = "this-is-not-a-real-state"
|
||||||
|
assert oauth.validate_state(fake_state) is False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_full_oauth_flow_chain():
|
||||||
|
"""Integration-style test: state → exchange → fetch → provision chain."""
|
||||||
|
p = _gitea_provider()
|
||||||
|
redirect_uri = "https://hbd.example.com/login/oauth/gitea/callback"
|
||||||
|
|
||||||
|
state = oauth.make_state()
|
||||||
|
assert oauth.validate_state(state) is True
|
||||||
|
|
||||||
|
mock_token_response = AsyncMock()
|
||||||
|
mock_token_response.status = 200
|
||||||
|
mock_token_response.json = AsyncMock(return_value={"access_token": "flow_token"})
|
||||||
|
|
||||||
|
mock_user_response = AsyncMock()
|
||||||
|
mock_user_response.status = 200
|
||||||
|
mock_user_response.json = AsyncMock(return_value={
|
||||||
|
"login": "flowuser",
|
||||||
|
"full_name": "Flow User",
|
||||||
|
"avatar_url": "https://git.example.com/avatars/flow.png",
|
||||||
|
})
|
||||||
|
|
||||||
|
mock_session = MagicMock()
|
||||||
|
mock_session.post = MagicMock(return_value=AsyncMock(
|
||||||
|
__aenter__=AsyncMock(return_value=mock_token_response),
|
||||||
|
__aexit__=AsyncMock(return_value=False),
|
||||||
|
))
|
||||||
|
mock_session.get = MagicMock(return_value=AsyncMock(
|
||||||
|
__aenter__=AsyncMock(return_value=mock_user_response),
|
||||||
|
__aexit__=AsyncMock(return_value=False),
|
||||||
|
))
|
||||||
|
|
||||||
|
with patch("hbd.server.oauth.aiohttp.ClientSession", return_value=AsyncMock(
|
||||||
|
__aenter__=AsyncMock(return_value=mock_session),
|
||||||
|
__aexit__=AsyncMock(return_value=False),
|
||||||
|
)):
|
||||||
|
token = await oauth.exchange_code(p, "authcode", redirect_uri)
|
||||||
|
profile = await oauth.fetch_user(p, token)
|
||||||
|
|
||||||
|
assert token == "flow_token"
|
||||||
|
assert profile["login"] == "flowuser"
|
||||||
|
|
||||||
|
_reset_users()
|
||||||
|
user = users_mod.provision_oauth_user(
|
||||||
|
profile["login"], profile["full_name"], profile["avatar_url"]
|
||||||
|
)
|
||||||
|
assert user.username == "flowuser"
|
||||||
|
assert user.check_password("anything") is False
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# get_providers()
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
CFG_GITHUB = {
|
||||||
|
"oauth": {
|
||||||
|
"github": {"type": "github", "client_id": "ghid", "client_secret": "ghs"},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CFG_NEXTCLOUD = {
|
||||||
|
"oauth": {
|
||||||
|
"nc": {
|
||||||
|
"type": "nextcloud",
|
||||||
|
"url": "https://nc.example.com",
|
||||||
|
"client_id": "ncid",
|
||||||
|
"client_secret": "ncs",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CFG_MULTI = {
|
||||||
|
"oauth": {
|
||||||
|
"mygitea": {
|
||||||
|
"type": "gitea",
|
||||||
|
"url": "https://git.example.com",
|
||||||
|
"client_id": "cid",
|
||||||
|
"client_secret": "cs",
|
||||||
|
"label": "Work Gitea",
|
||||||
|
"logo": "https://example.com/logo.png",
|
||||||
|
},
|
||||||
|
"github": {"type": "github", "client_id": "ghid", "client_secret": "ghs"},
|
||||||
|
"nc": {
|
||||||
|
"type": "nextcloud",
|
||||||
|
"url": "https://nc.example.com",
|
||||||
|
"client_id": "ncid",
|
||||||
|
"client_secret": "ncs",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_providers_backward_compat_no_type_field():
|
||||||
|
"""Old config without 'type' defaults to gitea."""
|
||||||
|
providers = oauth.get_providers(CFG_ON)
|
||||||
|
assert len(providers) == 1
|
||||||
|
p = providers[0]
|
||||||
|
assert p.name == "gitea"
|
||||||
|
assert p.type == "gitea"
|
||||||
|
assert p.label == "Gitea"
|
||||||
|
assert p.client_id == "cid"
|
||||||
|
assert p.authorize_url == "https://git.example.com/login/oauth/authorize"
|
||||||
|
assert p.token_url == "https://git.example.com/login/oauth/access_token"
|
||||||
|
assert p.profile_url == "https://git.example.com/api/v1/user"
|
||||||
|
assert p.scope == "user:email"
|
||||||
|
assert p.profile_data_path == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_providers_multiple():
|
||||||
|
providers = oauth.get_providers(CFG_MULTI)
|
||||||
|
assert len(providers) == 3
|
||||||
|
names = [p.name for p in providers]
|
||||||
|
assert "mygitea" in names
|
||||||
|
assert "github" in names
|
||||||
|
assert "nc" in names
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_providers_custom_label_and_logo():
|
||||||
|
providers = oauth.get_providers(CFG_MULTI)
|
||||||
|
gitea = next(p for p in providers if p.name == "mygitea")
|
||||||
|
assert gitea.label == "Work Gitea"
|
||||||
|
assert gitea.logo == "https://example.com/logo.png"
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_providers_github_default_label():
|
||||||
|
providers = oauth.get_providers(CFG_GITHUB)
|
||||||
|
assert providers[0].label == "GitHub"
|
||||||
|
assert providers[0].logo == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_providers_github_fixed_urls():
|
||||||
|
providers = oauth.get_providers(CFG_GITHUB)
|
||||||
|
p = providers[0]
|
||||||
|
assert p.authorize_url == "https://github.com/login/oauth/authorize"
|
||||||
|
assert p.token_url == "https://github.com/login/oauth/access_token"
|
||||||
|
assert p.profile_url == "https://api.github.com/user"
|
||||||
|
assert p.scope == "read:user"
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_providers_nextcloud_urls_and_path():
|
||||||
|
providers = oauth.get_providers(CFG_NEXTCLOUD)
|
||||||
|
p = providers[0]
|
||||||
|
assert p.authorize_url == "https://nc.example.com/apps/oauth2/authorize"
|
||||||
|
assert p.token_url == "https://nc.example.com/apps/oauth2/api/v1/token"
|
||||||
|
assert p.profile_url == "https://nc.example.com/ocs/v2.php/cloud/user?format=json"
|
||||||
|
assert p.profile_data_path == ["ocs", "data"]
|
||||||
|
assert p.scope == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_providers_skips_missing_client_id(caplog):
|
||||||
|
cfg = {"oauth": {"gitea": {"url": "https://git.example.com", "client_secret": "cs"}}}
|
||||||
|
with caplog.at_level(logging.WARNING, logger="hbd.server.oauth"):
|
||||||
|
result = oauth.get_providers(cfg)
|
||||||
|
assert result == []
|
||||||
|
assert "missing" in caplog.text.lower()
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_providers_skips_missing_client_secret(caplog):
|
||||||
|
cfg = {"oauth": {"gitea": {"url": "https://git.example.com", "client_id": "cid"}}}
|
||||||
|
with caplog.at_level(logging.WARNING, logger="hbd.server.oauth"):
|
||||||
|
result = oauth.get_providers(cfg)
|
||||||
|
assert result == []
|
||||||
|
assert "missing" in caplog.text.lower()
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_providers_skips_missing_url_for_gitea(caplog):
|
||||||
|
cfg = {"oauth": {"gitea": {"type": "gitea", "client_id": "cid", "client_secret": "cs"}}}
|
||||||
|
with caplog.at_level(logging.WARNING, logger="hbd.server.oauth"):
|
||||||
|
result = oauth.get_providers(cfg)
|
||||||
|
assert result == []
|
||||||
|
assert "url" in caplog.text.lower()
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_providers_skips_missing_url_for_nextcloud(caplog):
|
||||||
|
cfg = {"oauth": {"nc": {"type": "nextcloud", "client_id": "cid", "client_secret": "cs"}}}
|
||||||
|
with caplog.at_level(logging.WARNING, logger="hbd.server.oauth"):
|
||||||
|
result = oauth.get_providers(cfg)
|
||||||
|
assert result == []
|
||||||
|
assert "url" in caplog.text.lower()
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_providers_github_no_url_required():
|
||||||
|
providers = oauth.get_providers(CFG_GITHUB)
|
||||||
|
assert len(providers) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_providers_skips_unknown_type(caplog):
|
||||||
|
cfg = {"oauth": {"mystery": {"type": "saml", "client_id": "cid", "client_secret": "cs"}}}
|
||||||
|
import logging
|
||||||
|
with caplog.at_level(logging.WARNING, logger="hbd.server.oauth"):
|
||||||
|
result = oauth.get_providers(cfg)
|
||||||
|
assert result == []
|
||||||
|
assert "saml" in caplog.text
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_providers_empty_config():
|
||||||
|
assert oauth.get_providers({}) == []
|
||||||
|
assert oauth.get_providers(CFG_OFF) == []
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# build_auth_url / exchange_code / fetch_user (generic, ResolvedProvider-based)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _gitea_provider() -> oauth.ResolvedProvider:
|
||||||
|
return oauth.get_providers(CFG_ON)[0]
|
||||||
|
|
||||||
|
|
||||||
|
def _github_provider() -> oauth.ResolvedProvider:
|
||||||
|
return oauth.get_providers(CFG_GITHUB)[0]
|
||||||
|
|
||||||
|
|
||||||
|
def _nextcloud_provider() -> oauth.ResolvedProvider:
|
||||||
|
return oauth.get_providers(CFG_NEXTCLOUD)[0]
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_auth_url_gitea():
|
||||||
|
p = _gitea_provider()
|
||||||
|
url = oauth.build_auth_url(p, "teststate", "https://hbd.example.com/login/oauth/gitea/callback")
|
||||||
|
parsed = urlparse(url)
|
||||||
|
qs = parse_qs(parsed.query)
|
||||||
|
assert parsed.netloc == "git.example.com"
|
||||||
|
assert parsed.path == "/login/oauth/authorize"
|
||||||
|
assert qs["client_id"] == ["cid"]
|
||||||
|
assert qs["state"] == ["teststate"]
|
||||||
|
assert qs["scope"] == ["user:email"]
|
||||||
|
assert qs["response_type"] == ["code"]
|
||||||
|
assert qs["redirect_uri"] == ["https://hbd.example.com/login/oauth/gitea/callback"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_auth_url_github():
|
||||||
|
p = _github_provider()
|
||||||
|
url = oauth.build_auth_url(p, "st", "https://hbd.example.com/login/oauth/github/callback")
|
||||||
|
parsed = urlparse(url)
|
||||||
|
qs = parse_qs(parsed.query)
|
||||||
|
assert parsed.netloc == "github.com"
|
||||||
|
assert qs["scope"] == ["read:user"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_auth_url_nextcloud_no_scope_param():
|
||||||
|
"""Nextcloud scope is empty — the 'scope' key must be absent from the URL."""
|
||||||
|
p = _nextcloud_provider()
|
||||||
|
url = oauth.build_auth_url(p, "st", "https://hbd.example.com/login/oauth/nc/callback")
|
||||||
|
qs = parse_qs(urlparse(url).query)
|
||||||
|
assert "scope" not in qs
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_exchange_code_generic_returns_token():
|
||||||
|
p = _gitea_provider()
|
||||||
|
redirect_uri = "https://hbd.example.com/login/oauth/gitea/callback"
|
||||||
|
mock_response = AsyncMock()
|
||||||
|
mock_response.status = 200
|
||||||
|
mock_response.json = AsyncMock(return_value={"access_token": "tok123"})
|
||||||
|
|
||||||
|
mock_session = MagicMock()
|
||||||
|
mock_session.post = MagicMock(return_value=AsyncMock(
|
||||||
|
__aenter__=AsyncMock(return_value=mock_response),
|
||||||
|
__aexit__=AsyncMock(return_value=False),
|
||||||
|
))
|
||||||
|
|
||||||
|
with patch("hbd.server.oauth.aiohttp.ClientSession", return_value=AsyncMock(
|
||||||
|
__aenter__=AsyncMock(return_value=mock_session),
|
||||||
|
__aexit__=AsyncMock(return_value=False),
|
||||||
|
)):
|
||||||
|
token = await oauth.exchange_code(p, "mycode", redirect_uri)
|
||||||
|
assert token == "tok123"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_exchange_code_sends_accept_json():
|
||||||
|
"""Accept: application/json must be present for all providers (required by GitHub)."""
|
||||||
|
p = _github_provider()
|
||||||
|
captured_headers = {}
|
||||||
|
|
||||||
|
mock_response = AsyncMock()
|
||||||
|
mock_response.status = 200
|
||||||
|
mock_response.json = AsyncMock(return_value={"access_token": "ghtoken"})
|
||||||
|
|
||||||
|
mock_session = MagicMock()
|
||||||
|
|
||||||
|
def capture_post(url, **kwargs):
|
||||||
|
captured_headers.update(kwargs.get("headers", {}))
|
||||||
|
return AsyncMock(
|
||||||
|
__aenter__=AsyncMock(return_value=mock_response),
|
||||||
|
__aexit__=AsyncMock(return_value=False),
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_session.post = capture_post
|
||||||
|
|
||||||
|
with patch("hbd.server.oauth.aiohttp.ClientSession", return_value=AsyncMock(
|
||||||
|
__aenter__=AsyncMock(return_value=mock_session),
|
||||||
|
__aexit__=AsyncMock(return_value=False),
|
||||||
|
)):
|
||||||
|
await oauth.exchange_code(p, "code", "https://hbd.example.com/login/oauth/github/callback")
|
||||||
|
|
||||||
|
assert captured_headers.get("Accept") == "application/json"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_exchange_code_raises_on_error_status():
|
||||||
|
p = _gitea_provider()
|
||||||
|
mock_response = AsyncMock()
|
||||||
|
mock_response.status = 401
|
||||||
|
mock_response.text = AsyncMock(return_value="unauthorized")
|
||||||
|
|
||||||
|
mock_session = MagicMock()
|
||||||
|
mock_session.post = MagicMock(return_value=AsyncMock(
|
||||||
|
__aenter__=AsyncMock(return_value=mock_response),
|
||||||
|
__aexit__=AsyncMock(return_value=False),
|
||||||
|
))
|
||||||
|
|
||||||
|
with patch("hbd.server.oauth.aiohttp.ClientSession", return_value=AsyncMock(
|
||||||
|
__aenter__=AsyncMock(return_value=mock_session),
|
||||||
|
__aexit__=AsyncMock(return_value=False),
|
||||||
|
)):
|
||||||
|
with pytest.raises(oauth.OAuthError):
|
||||||
|
await oauth.exchange_code(p, "badcode", "https://hbd.example.com/login/oauth/gitea/callback")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_exchange_code_raises_when_no_access_token():
|
||||||
|
p = _gitea_provider()
|
||||||
|
mock_response = AsyncMock()
|
||||||
|
mock_response.status = 200
|
||||||
|
mock_response.json = AsyncMock(return_value={"error": "bad_request"})
|
||||||
|
|
||||||
|
mock_session = MagicMock()
|
||||||
|
mock_session.post = MagicMock(return_value=AsyncMock(
|
||||||
|
__aenter__=AsyncMock(return_value=mock_response),
|
||||||
|
__aexit__=AsyncMock(return_value=False),
|
||||||
|
))
|
||||||
|
|
||||||
|
with patch("hbd.server.oauth.aiohttp.ClientSession", return_value=AsyncMock(
|
||||||
|
__aenter__=AsyncMock(return_value=mock_session),
|
||||||
|
__aexit__=AsyncMock(return_value=False),
|
||||||
|
)):
|
||||||
|
with pytest.raises(oauth.OAuthError):
|
||||||
|
await oauth.exchange_code(p, "mycode", "https://hbd.example.com/login/oauth/gitea/callback")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_fetch_user_gitea_returns_profile():
|
||||||
|
p = _gitea_provider()
|
||||||
|
mock_response = AsyncMock()
|
||||||
|
mock_response.status = 200
|
||||||
|
mock_response.json = AsyncMock(return_value={
|
||||||
|
"login": "alice",
|
||||||
|
"full_name": "Alice Smith",
|
||||||
|
"avatar_url": "https://git.example.com/avatars/alice.png",
|
||||||
|
})
|
||||||
|
|
||||||
|
mock_session = MagicMock()
|
||||||
|
mock_session.get = MagicMock(return_value=AsyncMock(
|
||||||
|
__aenter__=AsyncMock(return_value=mock_response),
|
||||||
|
__aexit__=AsyncMock(return_value=False),
|
||||||
|
))
|
||||||
|
|
||||||
|
with patch("hbd.server.oauth.aiohttp.ClientSession", return_value=AsyncMock(
|
||||||
|
__aenter__=AsyncMock(return_value=mock_session),
|
||||||
|
__aexit__=AsyncMock(return_value=False),
|
||||||
|
)):
|
||||||
|
profile = await oauth.fetch_user(p, "tok123")
|
||||||
|
|
||||||
|
assert profile == {
|
||||||
|
"login": "alice",
|
||||||
|
"full_name": "Alice Smith",
|
||||||
|
"avatar_url": "https://git.example.com/avatars/alice.png",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_fetch_user_github_maps_name_field():
|
||||||
|
p = _github_provider()
|
||||||
|
mock_response = AsyncMock()
|
||||||
|
mock_response.status = 200
|
||||||
|
mock_response.json = AsyncMock(return_value={
|
||||||
|
"login": "bobgh",
|
||||||
|
"name": "Bob GitHub",
|
||||||
|
"avatar_url": "https://avatars.githubusercontent.com/u/1",
|
||||||
|
})
|
||||||
|
|
||||||
|
mock_session = MagicMock()
|
||||||
|
mock_session.get = MagicMock(return_value=AsyncMock(
|
||||||
|
__aenter__=AsyncMock(return_value=mock_response),
|
||||||
|
__aexit__=AsyncMock(return_value=False),
|
||||||
|
))
|
||||||
|
|
||||||
|
with patch("hbd.server.oauth.aiohttp.ClientSession", return_value=AsyncMock(
|
||||||
|
__aenter__=AsyncMock(return_value=mock_session),
|
||||||
|
__aexit__=AsyncMock(return_value=False),
|
||||||
|
)):
|
||||||
|
profile = await oauth.fetch_user(p, "ghtoken")
|
||||||
|
|
||||||
|
assert profile["login"] == "bobgh"
|
||||||
|
assert profile["full_name"] == "Bob GitHub"
|
||||||
|
assert profile["avatar_url"] == "https://avatars.githubusercontent.com/u/1"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_fetch_user_nextcloud_nested_extraction():
|
||||||
|
"""Nextcloud profile is nested under ocs.data; avatar is absent."""
|
||||||
|
p = _nextcloud_provider()
|
||||||
|
mock_response = AsyncMock()
|
||||||
|
mock_response.status = 200
|
||||||
|
mock_response.json = AsyncMock(return_value={
|
||||||
|
"ocs": {
|
||||||
|
"meta": {"status": "ok", "statuscode": 200},
|
||||||
|
"data": {
|
||||||
|
"id": "ncuser",
|
||||||
|
"display-name": "NC User",
|
||||||
|
"email": "nc@example.com",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
mock_session = MagicMock()
|
||||||
|
mock_session.get = MagicMock(return_value=AsyncMock(
|
||||||
|
__aenter__=AsyncMock(return_value=mock_response),
|
||||||
|
__aexit__=AsyncMock(return_value=False),
|
||||||
|
))
|
||||||
|
|
||||||
|
with patch("hbd.server.oauth.aiohttp.ClientSession", return_value=AsyncMock(
|
||||||
|
__aenter__=AsyncMock(return_value=mock_session),
|
||||||
|
__aexit__=AsyncMock(return_value=False),
|
||||||
|
)):
|
||||||
|
profile = await oauth.fetch_user(p, "nctoken")
|
||||||
|
|
||||||
|
assert profile["login"] == "ncuser"
|
||||||
|
assert profile["full_name"] == "NC User"
|
||||||
|
assert profile["avatar_url"] == "" # Nextcloud has no avatar field
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_fetch_user_raises_on_error_status():
|
||||||
|
p = _gitea_provider()
|
||||||
|
mock_response = AsyncMock()
|
||||||
|
mock_response.status = 401
|
||||||
|
mock_response.text = AsyncMock(return_value="unauthorized")
|
||||||
|
|
||||||
|
mock_session = MagicMock()
|
||||||
|
mock_session.get = MagicMock(return_value=AsyncMock(
|
||||||
|
__aenter__=AsyncMock(return_value=mock_response),
|
||||||
|
__aexit__=AsyncMock(return_value=False),
|
||||||
|
))
|
||||||
|
|
||||||
|
with patch("hbd.server.oauth.aiohttp.ClientSession", return_value=AsyncMock(
|
||||||
|
__aenter__=AsyncMock(return_value=mock_session),
|
||||||
|
__aexit__=AsyncMock(return_value=False),
|
||||||
|
)):
|
||||||
|
with pytest.raises(oauth.OAuthError):
|
||||||
|
await oauth.fetch_user(p, "badtoken")
|
||||||
|
|
||||||
|
|
||||||
|
def test_is_enabled_with_valid_provider():
|
||||||
|
assert oauth.is_enabled(CFG_ON) is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_is_enabled_false_when_no_providers():
|
||||||
|
assert oauth.is_enabled(CFG_OFF) is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_is_enabled_false_partial_config():
|
||||||
|
assert oauth.is_enabled(CFG_PARTIAL) is False
|
||||||
@@ -0,0 +1,114 @@
|
|||||||
|
import pytest
|
||||||
|
from hbd.server import settings as settings_mod
|
||||||
|
|
||||||
|
CFG = {
|
||||||
|
"hbd_port": 50004,
|
||||||
|
"interval": 20,
|
||||||
|
"grace": 2,
|
||||||
|
"users": {
|
||||||
|
"alice": {"full_name": "Alice Smith", "admin": True, "password": "pbkdf2:sha256:abc",
|
||||||
|
"notification_channels": ["pushover_ops"]},
|
||||||
|
},
|
||||||
|
"oauth": {
|
||||||
|
"gitea": {"type": "gitea", "url": "https://git.example.com",
|
||||||
|
"client_id": "cid", "client_secret": "csec", "label": "Sign in with Gitea"},
|
||||||
|
},
|
||||||
|
"notification_channels": {
|
||||||
|
"pushover_ops": {"type": "pushover", "token": "tok", "user": "usr"},
|
||||||
|
},
|
||||||
|
"hosts": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_sections_have_section_mode():
|
||||||
|
sections = settings_mod.get_settings_sections(CFG)
|
||||||
|
for s in sections:
|
||||||
|
assert "section_mode" in s, f"Section {s['id']} missing section_mode"
|
||||||
|
assert s["section_mode"] in ("form", "yaml", "channels", "hosts")
|
||||||
|
|
||||||
|
|
||||||
|
def test_sections_have_api_section():
|
||||||
|
sections = settings_mod.get_settings_sections(CFG)
|
||||||
|
for s in sections:
|
||||||
|
assert "api_section" in s, f"Section {s['id']} missing api_section"
|
||||||
|
|
||||||
|
|
||||||
|
def test_network_section_has_editable_fields():
|
||||||
|
sections = settings_mod.get_settings_sections(CFG)
|
||||||
|
network = next(s for s in sections if s["id"] == "network")
|
||||||
|
assert network["section_mode"] == "form"
|
||||||
|
assert network["api_section"] == "server"
|
||||||
|
editable = [f for f in network["fields"] if f["editable"]]
|
||||||
|
assert len(editable) >= 2 # hbd_port, ws_port at minimum
|
||||||
|
|
||||||
|
|
||||||
|
def test_yaml_sections_have_correct_mode():
|
||||||
|
sections = settings_mod.get_settings_sections(CFG)
|
||||||
|
yaml_sections = {s["id"]: s for s in sections if s["section_mode"] == "yaml"}
|
||||||
|
assert "channels" not in yaml_sections # now uses "channels" mode
|
||||||
|
assert "hosts" not in yaml_sections # now uses "hosts" mode
|
||||||
|
assert "thresholds" in yaml_sections
|
||||||
|
assert "dns" in yaml_sections
|
||||||
|
assert yaml_sections["thresholds"]["api_section"] == "thresholds"
|
||||||
|
assert yaml_sections["dns"]["api_section"] == "dns"
|
||||||
|
|
||||||
|
|
||||||
|
def test_hosts_section_uses_hosts_mode():
|
||||||
|
sections = settings_mod.get_settings_sections(CFG)
|
||||||
|
hosts_sec = next(s for s in sections if s["id"] == "hosts")
|
||||||
|
assert hosts_sec["section_mode"] == "hosts"
|
||||||
|
assert hosts_sec["api_section"] == "hosts"
|
||||||
|
|
||||||
|
|
||||||
|
def test_channels_section_uses_channels_mode():
|
||||||
|
sections = settings_mod.get_settings_sections(CFG)
|
||||||
|
ch_sec = next(s for s in sections if s["id"] == "channels")
|
||||||
|
assert ch_sec["section_mode"] == "channels"
|
||||||
|
assert ch_sec["api_section"] == "notification_channels"
|
||||||
|
assert len(ch_sec["channels"]) == 1
|
||||||
|
ch = ch_sec["channels"][0]
|
||||||
|
assert ch["name"] == "pushover_ops"
|
||||||
|
assert ch["type"] == "pushover"
|
||||||
|
assert "owner" in ch
|
||||||
|
assert "private" in ch
|
||||||
|
|
||||||
|
|
||||||
|
def test_channel_type_schemas_exported():
|
||||||
|
assert hasattr(settings_mod, "CHANNEL_TYPE_SCHEMAS")
|
||||||
|
for required_type in ("pushover", "email", "signal", "matrix", "sms_voipms"):
|
||||||
|
assert required_type in settings_mod.CHANNEL_TYPE_SCHEMAS
|
||||||
|
schema = settings_mod.CHANNEL_TYPE_SCHEMAS[required_type]
|
||||||
|
assert "label" in schema
|
||||||
|
assert "fields" in schema
|
||||||
|
for f in schema["fields"]:
|
||||||
|
assert "key" in f
|
||||||
|
assert "type" in f
|
||||||
|
assert "required" in f
|
||||||
|
|
||||||
|
|
||||||
|
def test_oauth_section_exists():
|
||||||
|
sections = settings_mod.get_settings_sections(CFG)
|
||||||
|
oauth = next((s for s in sections if s["id"] == "oauth"), None)
|
||||||
|
assert oauth is not None
|
||||||
|
assert oauth["section_mode"] == "form"
|
||||||
|
assert oauth["api_section"] == "oauth"
|
||||||
|
assert len(oauth["providers"]) == 1
|
||||||
|
assert oauth["providers"][0]["name"] == "gitea"
|
||||||
|
assert oauth["providers"][0]["client_secret"] == "•••"
|
||||||
|
|
||||||
|
|
||||||
|
def test_all_channel_names_returned():
|
||||||
|
result = settings_mod.get_settings_data(CFG)
|
||||||
|
assert "all_channel_names" in result
|
||||||
|
assert "pushover_ops" in result["all_channel_names"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_users_section_has_user_list():
|
||||||
|
sections = settings_mod.get_settings_sections(CFG)
|
||||||
|
users_sec = next(s for s in sections if s["id"] == "users")
|
||||||
|
assert users_sec["section_mode"] == "form"
|
||||||
|
assert users_sec["api_section"] == "users"
|
||||||
|
assert len(users_sec["users"]) == 1
|
||||||
|
assert users_sec["users"][0]["username"] == "alice"
|
||||||
|
# Password hash never exposed
|
||||||
|
assert "password" not in users_sec["users"][0]
|
||||||
Reference in New Issue
Block a user