fix: correct zero-safe pathconf checks and connectivity prefix match

- Use `is not None` for pathconf values so 0 is not silently dropped - Broaden connectivity prefix check to catch bare "connectivity" key Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
fix: address security vulnerabilities from audit
2026-06-08 13:07:54 -04:00 · 2026-06-08 13:06:05 -04:00 · 2026-06-07 07:53:57 -04:00 · 2026-06-06 14:45:47 -04:00 · 2026-06-06 14:40:04 -04:00 · 2026-06-06 14:35:58 -04:00
80 changed files with 10973 additions and 2024 deletions
@@ -10,36 +10,48 @@ jobs:
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
        
-#      - name: Set up Python
-#        uses: actions/setup-python@v5
-#        with:
-#          python-version: '3.11'
      - name: Set up Python
-        # Use a generic run step for FreeBSD if actions/setup-python
-        # fails in restricted environments.
        run: |
          python3 --version
          python3 -m ensurepip --upgrade
          
      - name: Install build tools
        run: |
-          python3 -m pip install --upgrade pip
-          python3 -m pip install build twine
+          python3 -m venv .venv
+          .venv/bin/pip install --upgrade pip
+          .venv/bin/pip install build twine
          
      - name: Build package
-        run: python3 -m build
+        run: .venv/bin/python -m build
        
      - name: Extract version from tag
        id: get_version
        run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_OUTPUT
+      - name: Generate changelog
+        id: changelog
+        run: |
+          PREV_TAG=$(git tag --sort=-version:refname | grep -m 1 -v "^${GITHUB_REF#refs/tags/}$")
+          if [ -n "$PREV_TAG" ]; then
+            CHANGELOG=$(git log --pretty=format:"- %s" "${PREV_TAG}..HEAD")
+          else
+            CHANGELOG="Initial release"
+          fi
+          # Write multiline to output
+          {
+            echo "CHANGELOG<<EOF"
+            echo "$CHANGELOG"
+            echo "EOF"
+          } >> $GITHUB_OUTPUT

      - name: Upload to Gitea PyPI registry
        env:
          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
          TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
        run: |
-          python3 -m twine upload --repository-url https://git.wrede.ca/api/packages/andreas/pypi dist/*
+          .venv/bin/python3 -m twine upload --repository-url https://git.wrede.ca/api/packages/andreas/pypi dist/*

      - name: Create release
        uses: actions/gitea-release-action@v1
@@ -48,4 +60,4 @@ jobs:
            dist/*.whl
            dist/*.tar.gz
          title: "Release ${{ steps.get_version.outputs.VERSION }}"
-          body: "Release version ${{ steps.get_version.outputs.VERSION }}"
+          body: "${{ steps.changelog.outputs.CHANGELOG }}"
@@ -5,6 +5,7 @@ __pycache__/
 *.pyo
 .flake8
 .venv/
+.continue/
 test/
 build/
 dist/
@@ -12,3 +13,6 @@ dist/
 ssl/
 uv.lock
 .hb.yaml
+.superpowers/
+rndc-key
+docs/superpowers/
@@ -0,0 +1,457 @@
+# Changelog
+
+All notable changes to this project are documented here, organized by release.
+
+## [5.3.10]
+
+### Added
+- clear stale plugin data and persist OAuth users to config
+- auto-scale CPU history graph Y axis
+- add CPU usage history graph to CPU Monitor section
+
+### Fixed
+- remove bak file in bumpminor.sh
+
+---
+
+## [5.3.9]
+
+### Added
+- auto-update CHANGELOG and README in bumpminor.sh
+
+---
+
+## [5.3.8]
+
+### Added
+- Wiki home page with overview and getting started guide
+
+### Fixed
+- Release workflow: use `GITHUB_REF`/`GITHUB_OUTPUT` (Gitea Actions uses GitHub-compatible variable names)
+- Release workflow: replace `head -1` with `grep -m 1` to avoid SIGPIPE (exit 141) in changelog step
+
+---
+
+## [5.3.7]
+
+### Added
+- Dark mode with light/dark/auto theme setting
+- UNKNOWN level filter in Log of Events
+- Per-metric grace period input in threshold settings
+- Replace Dynamic DNS YAML editor with a web form
+- Sort hosts, thresholds, and channels alphabetically on settings page
+- Suppress alerts for unwatched hosts
+
+### Fixed
+- Preserve log message order when replaying history on connect
+
+---
+
+## [5.3.6]
+
+### Added
+- MIT license
+
+### Fixed
+- Correct ZFS pool status threshold operator and add per-metric grace
+- Normalize email and domain fields
+- Move dependencies back under `[project]` in pyproject.toml
+
+---
+
+## [5.3.4]
+
+### Fixed
+- Run full reload after HTTP config publish, not just `config.reload()`
+
+---
+
+## [5.3.3]
+
+### Added
+- Replace YAML threshold editor with a form-based UI
+- Replace multi-select fields with dual-panel picker on settings page
+- Nav bar button to publish pending config changes
+- Host, level, and message filters in Log of Events
+
+### Fixed
+- Remove container max-width; stop stretching inputs on settings page
+
+### Removed
+- Legacy `dyndnshosts`/`drophosts` config keys
+
+---
+
+## [5.3.2]
+
+### Added
+- Retry DNS resolution indefinitely; add `-4`/`-6` address-family flags to `hbc` and `hbc_mini`
+- Replace YAML hosts editor with form-based CRUD table
+- Replace YAML notification channel editor with form-based UI
+
+### Fixed
+- Support list-valued `threshold_config` in hosts table
+- Derive hosts threshold config list from config file keys
+- Replace channel checkboxes in Users table with multi-select
+- Support plugin-level `enabled: false` in threshold config
+- Always populate glance strip for all hosts on page load
+- Fetch host info on initial page load
+
+---
+
+## [5.3.1]
+
+### Added
+- Host info section in Host Overview (fetched and rendered on card expand)
+- `GET /api/0/hosts/{hostname}/info` endpoint
+- Show suffix-matched metric coverage in host info threshold table
+- Move `hbc_version` and `hbc_type` out of `os_info` into the host info section
+
+### Fixed
+- Correct `THRESHOLD_DEFAULTS` metric keys and add missing defaults
+
+---
+
+## [5.3.0]
+
+### Added
+- Profile page self-service: change identity, password, and notification channels
+- Settings page editor with form sections, YAML editors, stage/publish/rollback workflow
+- Config read API: `GET /api/0/config`, `/section/{name}`, `/backups`
+- Config write API: `POST /api/0/config`, `POST /api/0/config/rollback`
+- `configio` module for comment-preserving YAML round-trip writes
+- Multi-provider OAuth2 login page and generic provider routes
+- Log login/logout events to the event log with auth source
+
+### Fixed
+- ZFS monitor alerts dropped on restart with wildcard pool thresholds
+- Preserve OAuth users across config reload
+- Config API error handling, consistent 403 messages, deduplicated key lists
+- Validate password body type; coerce `notification_channels` to strings in profile API
+- Preserve OAuth `client_secret` on roundtrip; harden rollback path validation
+
+---
+
+## [5.2.6]
+
+### Added
+- Alerts host-filter field with URL query parameter and notify URL
+- Optional logo on Gitea OAuth login button
+
+### Fixed
+- Show human-readable duration in re-notification messages
+
+---
+
+## [5.2.5]
+
+### Added
+- Alert CRITICAL on degraded or suspended ZFS pools (ONLINE=OK, DEGRADED=WARNING, all else=CRITICAL)
+- Sign in with Gitea button on login page with OAuth2 redirect/callback routes
+- OAuth2 CSRF state management
+- Host owner shown in glance strip for admin users
+- C port of `hbc_mini` (single-file client in `scripts/c/`)
+
+### Fixed
+- Use `base_url` config for OAuth redirect URI to handle reverse proxy deployments
+- Preserve OAuth users across config reload
+- Escape HTML in login page error display
+
+---
+
+## [5.2.4]
+
+### Added
+- `hbc`/`hbc_mini`: `owner` config field included in `os_info`; server applies to host record
+- Server requests InfoPlugin refresh when a host has no plugin data
+- Event log stores structured dicts; filter by user
+
+### Fixed
+- Strip `_status_code` suffix from displayed metric names in threshold alerts
+- Use plain URL in Mattermost plugin metrics link
+- Fall back to `default_owner` when `os_info` has no owner
+
+---
+
+## [5.2.3]
+
+### Added
+- `hbc`/`hbc_mini`: log name and version at startup
+- Show metric name inline with hostname in alerts and notifications
+
+### Fixed
+- Send shutdown message only if a boot message was previously sent; suppress both on restart
+
+---
+
+## [5.2.2]
+
+### Fixed
+- Retry connection on network error instead of permanently dropping it
+- Silence `aiohttp.access` log; strip plugin prefix in alerts UI
+
+---
+
+## [5.2.1]
+
+### Fixed
+- Threshold and logging improvements
+
+---
+
+## [5.2.0]
+
+### Added
+- `nagios` operator for direct exit-code severity mapping
+
+### Fixed
+- Always show `THRESHOLD_DEFAULTS` in Settings threshold config
+
+---
+
+## [5.1.21]
+
+### Added
+- `nagios_runner` improvements and alerts page fixes
+
+---
+
+## [5.1.20]
+
+### Added
+- Generic threshold matching for `nagios_runner` with `{check_name}` display support
+
+### Fixed
+- Reduce default hysteresis from 10% to 2%
+- Show recovery threshold in alerts UI
+
+---
+
+## [5.1.19]
+
+### Added
+- Exclude ZFS ARC from `memory_percent`
+- Add `uptime_seconds` to `cpu_monitor`
+
+### Fixed
+- Send boot/shutdown message on the first open connection, not blindly on the first in list
+
+---
+
+## [5.1.18]
+
+### Added
+- Fetch-based Update/Delete buttons with toast notifications on Host Overview
+
+### Fixed
+- Settings thresholds show correct per-config metrics; miscellaneous `hbc` fixes
+
+---
+
+## [5.1.17]
+
+### Added
+- Owner Update/Delete buttons on Host Overview; purge stale alerts on reload
+- Retry `AsyncConnection.open()` indefinitely; drop IPv6 only on early startup failure
+- Alert pie chart in the nav bar
+
+### Fixed
+- Make Alerts page scrollable
+
+---
+
+## [5.1.16]
+
+### Added
+- Generic `ping_monitor` thresholds; round RTT to nearest ms
+
+---
+
+## [5.1.15]
+
+### Added
+- Link hostnames in Live Dashboard to Host Overview
+- Threshold Configurations section on settings page
+
+### Fixed
+- Suppress notifications on alert de-escalation (e.g. CRITICAL→WARNING)
+- Suppress recover messages for down durations under 4 seconds
+
+---
+
+## [5.1.14]
+
+### Added
+- ZFS pool renderer in Host Overview
+
+---
+
+## [5.1.13]
+
+### Added
+- ZFS monitor plugin
+- Host-level watch flag to suppress notifications
+- Filter Live Dashboard and Host Overview by owner/manager
+- Composable `threshold_config` list for per-host threshold layering
+- Restart on SIGHUP in `hbc` and `hbc_mini`
+
+### Fixed
+- Mask `api_password` and `access_token` in settings page
+
+---
+
+## [5.1.12]
+
+Internal release — no user-visible changes.
+
+---
+
+## [5.1.11]
+
+### Fixed
+- Install under Docker
+- Clean up install script
+
+---
+
+## [5.1.10]
+
+### Fixed
+- Synchronize version in `hbc_mini`
+- Install script no longer overwrites itself
+
+---
+
+## [5.1.9]
+
+### Added
+- Install `hbc_mini` via package or install script
+
+---
+
+## [5.1.8]
+
+### Added
+- Track `hbc` type and version
+
+### Fixed
+- Nav bar position
+
+---
+
+## [5.1.7]
+
+### Added
+- `hbc_mini`: single-file heartbeat client
+
+### Fixed
+- Drop dead connections on protocol error
+
+---
+
+## [5.1.6]
+
+### Fixed
+- Simplify event log usage; fix argument handling
+
+---
+
+## [5.1.5]
+
+### Added
+- Update `hbc` via `hb_install.sh` instead of code patching
+
+---
+
+## [5.1.4]
+
+### Added
+- Redesign Plugin Metrics page as Host Overview
+
+---
+
+## [5.1.3]
+
+### Added
+- Validate absolute command paths at `nagios_runner` init
+- Async subprocess in `nagios_runner` with stderr capture and signal handling
+- `skip_reason` field on `Plugin`; surface in `PluginLoader` init messaging
+
+### Fixed
+- Use `shlex.split()` for `nagios_runner` path validation to handle quoted paths
+- Reconfigure logging to syslog after `daemonize()`
+
+---
+
+## [5.1.2]
+
+### Fixed
+- Plugin config lookup shadowed by `CLIENT_DEFAULTS` plugins key
+- Apply grace period to all threshold alerts before logging/notifying
+- RECOVER routing: use consistent level name and route via alerted channel
+- Early reminder notifications and lost recovery notifications
+- Non-alerting of overdue hosts
+
+### Added
+- Swiss clock widget in the UI
+
+---
+
+## [5.1.1]
+
+### Added
+- SMS and Matrix notification channels
+- CLI commands `stop`, `restart`, and `reload` for `hbd`
+- WebSocket endpoint at `http://.../ws`
+- Mobile HTML pages
+
+### Fixed
+- Profile not updating
+- Sortable columns in tables
+
+---
+
+## [5.1.0]
+
+### Added
+- Ping monitor plugin
+- Persist state to pickle file; restart timers on server restart
+- SIGHUP config reload for `hbd`
+- Renotify on CRITICAL only; persistent user sessions
+- RTT count threshold
+
+### Fixed
+- Bogus notification on new clients
+- Show "overdue" in alerts instead of null
+
+---
+
+## [5.0.12]
+
+### Added
+- User management and settings page
+
+---
+
+## [5.0.10]
+
+### Added
+- Publish package to Gitea PyPI registry
+
+---
+
+## [5.0.9]
+
+### Added
+- Use `SO_TIMESTAMP` for RTT measurement (Linux, FreeBSD, macOS)
+- Persist state to pickle file; restart timers on restart
+
+---
+
+## [5.0.6]
+
+### Added
+- Major codebase refactoring: restructured into client/server components
+- Per-client threshold configuration
+- Display and acknowledge alerts in the UI
+- Proper `hbc` termination; `hbd` config reloadable at runtime
@@ -0,0 +1,210 @@
+# Heartbeat
+
+Heartbeat is a lightweight host monitoring system built around a simple idea: each machine you want to monitor runs a small client (`hbc`) that sends a UDP "heartbeat" packet to a central server (`hbd`) on a regular interval. If a heartbeat stops arriving, you get notified. Alongside reachability, clients can ship system metrics — CPU, memory, disk, network — and the server will alert you when any of those cross a threshold.
+
+## How it works
+
+```
+  [ monitored host ]              [ your server ]
+  ┌─────────────┐  UDP 50003     ┌────────────────────────┐
+  │     hbc     │ ────────────>  │  hbd                   │
+  │             │                │  host state tracking   │
+  │  plugins:   │  <──────────── │  threshold alerting    │
+  │  cpu, mem,  │  ACK / CMD     │  notifications         │
+  │  disk, ...  │                │  web dashboard + API   │
+  └─────────────┘                └────────────────────────┘
+```
+
+- **hbd** — the server daemon. Tracks which hosts are alive, evaluates metric thresholds, fires notifications, serves the web dashboard and REST API.
+- **hbc** — the client. Sends heartbeats and plugin data over UDP. Runs on any Linux/BSD/macOS host.
+- **hbc_mini** — a zero-dependency single-file alternative (`hbc_mini.py` or `hbc_mini.c`) for hosts where you can't install Python packages.
+
+Notifications can go to Pushover, email, Mattermost, Matrix, Signal, or VoIP.ms SMS. The dashboard shows host connectivity, RTT graphs, active alerts, and per-host plugin metrics in real time via WebSocket.
+
+---
+
+## Getting started
+
+This tutorial sets up a server on one machine and a client on a second machine. You'll end up with a working dashboard and your first host being monitored.
+
+### 1. Install the server
+
+On the machine that will run `hbd`:
+
+```bash
+git clone https://git.wrede.ca/andreas/heartbeat.git
+cd heartbeat
+python3 -m venv .venv
+source .venv/bin/activate
+pip install .
+```
+
+Verify the install:
+
+```bash
+hbd --help
+```
+
+### 2. Create a server config
+
+Create `~/.hb.yaml`:
+
+```yaml
+hb_port: 50003       # UDP port — clients send heartbeats here
+hbd_port: 50004      # HTTP port — web dashboard and API
+ws_port: 50005       # WebSocket port — live dashboard updates
+
+interval: 20         # Expected heartbeat interval (seconds)
+grace: 2             # Seconds of slack before a host is considered overdue
+
+pickfile: ~/.hb.pick
+pidfile:  ~/.hb.pid
+logfile:  ~/.hb.log
+```
+
+That's enough to get started. No hosts, no users, no notifications needed yet — the server will accept any client that connects.
+
+### 3. Start the server
+
+```bash
+hbd serve -c ~/.hb.yaml -f -v
+```
+
+`-f` keeps it in the foreground so you can watch the log. You should see:
+
+```
+Heartbeat daemon starting on UDP :50003, HTTP :50004, WS :50005
+```
+
+Open `http://your-server:50004/live` in a browser. The dashboard is empty for now.
+
+### 4. Install the client on a host to monitor
+
+On the machine you want to monitor (must be able to reach the server on UDP 50003):
+
+```bash
+pip install hbd      # or: copy scripts/hbc_mini.py if you can't install packages
+```
+
+#### Quick start — no config file
+
+```bash
+hbc your-server.example.com
+```
+
+Within a few seconds the server log will show the host checking in, and it will appear on the dashboard.
+
+#### With a config file
+
+Create `~/.hbc.yaml` on the client host:
+
+```yaml
+hb_port: 50003
+interval: 10         # Send a heartbeat every 10 seconds
+
+plugins:
+  cpu_monitor:
+    interval: 60
+  memory_monitor:
+    interval: 60
+  disk_monitor:
+    interval: 60
+```
+
+Then start the client:
+
+```bash
+hbc -c ~/.hbc.yaml your-server.example.com
+```
+
+Send a boot message at startup so the server logs when the host came up:
+
+```bash
+hbc -b -c ~/.hbc.yaml your-server.example.com
+```
+
+Run as a daemon (logs go to syslog):
+
+```bash
+hbc -d -b -c ~/.hbc.yaml your-server.example.com
+```
+
+### 5. View the dashboard
+
+Open `http://your-server:50004/live`. You'll see the monitored host, its last heartbeat time, and RTT. Click the host name to see plugin metrics.
+
+Navigate to `/plugins/<hostname>` for CPU, memory, and disk graphs.
+
+### 6. Add a notification channel (optional)
+
+Edit `~/.hb.yaml` on the server:
+
+```yaml
+notification_channels:
+  pushover_ops:
+    type: pushover
+    token: YOUR_APP_TOKEN
+    user: YOUR_USER_KEY
+
+users:
+  alice:
+    password: pbkdf2:sha256:...    # generate: hbd passwd alice
+    admin: true
+    notification_channels: [pushover_ops]
+
+default_owner: alice
+```
+
+Generate the password hash:
+
+```bash
+hbd passwd alice
+```
+
+Paste the output into the config, then reload:
+
+```bash
+hbd reload
+```
+
+Test the channel:
+
+```bash
+hbd notify
+```
+
+### 7. Set a threshold alert (optional)
+
+Add to `~/.hb.yaml`:
+
+```yaml
+thresholds:
+  cpu_monitor:
+    cpu_percent:
+      warning: 80.0
+      critical: 90.0
+  disk_monitor:
+    partitions:
+      /:
+        percent:
+          warning: 80.0
+          critical: 90.0
+```
+
+Reload: `hbd reload`. The server will now alert when a monitored host crosses these values.
+
+---
+
+## What's next
+
+| Topic | Where to look |
+|---|---|
+| Full server config reference | [README — Server](https://git.wrede.ca/andreas/heartbeat/src/branch/master/README.md#server-hbd) |
+| Client options and all plugins | [README — Client](https://git.wrede.ca/andreas/heartbeat/src/branch/master/README.md#client-hbc) |
+| Threshold alerting details | [THRESHOLD_ALERTING.md](https://git.wrede.ca/andreas/heartbeat/src/branch/master/docs/THRESHOLD_ALERTING.md) |
+| Notification channels | [NOTIFICATIONS.md](https://git.wrede.ca/andreas/heartbeat/src/branch/master/docs/NOTIFICATIONS.md) |
+| User accounts and roles | [USERS.md](https://git.wrede.ca/andreas/heartbeat/src/branch/master/docs/USERS.md) |
+| Writing a custom plugin | [PLUGIN_DEVELOPMENT.md](https://git.wrede.ca/andreas/heartbeat/src/branch/master/docs/PLUGIN_DEVELOPMENT.md) |
+| Nagios check integration | [NAGIOS_INTEGRATION.md](https://git.wrede.ca/andreas/heartbeat/src/branch/master/docs/NAGIOS_INTEGRATION.md) |
+| REST API | [HTTP_API.md](https://git.wrede.ca/andreas/heartbeat/src/branch/master/docs/HTTP_API.md) |
+| Zero-dependency client | [README — hbc_mini](https://git.wrede.ca/andreas/heartbeat/src/branch/master/README.md#hbc_mini--zero-dependency-client) |
@@ -0,0 +1,21 @@
+# MIT License
+
+Copyright (c) 2002 - 2026 Andreas Wrede
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,66 @@
+# Dark Mode
+
+Every page in the Heartbeat web UI supports light mode, dark mode, and automatic (follows the OS/browser setting). Each user picks their preference independently; it is stored in the browser and takes effect immediately without a page reload.
+
+---
+
+## Choosing a theme
+
+Open your profile page (`/profile`) and scroll to the **Appearance** section. Click one of the three buttons:
+
+| Button | Behaviour |
+|--------|-----------|
+| **Auto** | Follows the OS or browser dark-mode preference. Updates live if the system setting changes. |
+| **Light** | Always light, regardless of system setting. |
+| **Dark** | Always dark, regardless of system setting. |
+
+The preference is stored in `localStorage` under the key `hbd_theme` and applies to the current browser only. Clearing browser storage resets it to **Auto**.
+
+---
+
+## Implementation notes
+
+### No flash of unstyled content
+
+A small synchronous `<script>` runs at the very top of `<head>`, before any CSS is parsed, and sets `data-theme="dark"` on `<html>` when the stored preference (or the system setting in auto mode) calls for dark. Because it runs before paint, there is no visible flicker on page load.
+
+### CSS custom properties
+
+All colours are expressed as CSS custom properties defined in `head.html`:
+
+```
+:root                    — light-mode values (default)
+html[data-theme="dark"]  — dark-mode overrides
+```
+
+Key variables:
+
+| Variable | Purpose |
+|----------|---------|
+| `--bg` | Page background |
+| `--surface` | Card / panel background |
+| `--surface-2` / `--surface-3` | Slightly lighter/darker surfaces (table rows, hover states) |
+| `--text` / `--text-sec` / `--text-muted` | Primary, secondary, muted text |
+| `--border` / `--border-2`…`4` | Border shades from prominent to faint |
+| `--link` | Hyperlink and interactive-element colour |
+| `--nav-bg` | Navigation bar background |
+| `--input-bg` / `--input-border` | Form control colours |
+| `--shadow` / `--shadow-sm` | Box-shadow alphas |
+
+A single global rule in `head.html` themes all `<input>`, `<select>`, and `<textarea>` elements across every page at once:
+
+```css
+html[data-theme="dark"] input:not([type=checkbox]):not([type=radio]),
+html[data-theme="dark"] select,
+html[data-theme="dark"] textarea { … }
+```
+
+Each page template adds its own `html[data-theme="dark"]` block for page-specific elements (cards, tables, badges, etc.).
+
+### Auto-mode live updates
+
+A `matchMedia` change listener in `head.html` updates `data-theme` whenever the OS preference changes, so users in **Auto** mode see the theme switch without reloading.
+
+### Semantic colours are unchanged
+
+Alert colours (red for critical, orange for warning, green for ok) and status indicators are intentionally left as fixed values — they are semantic signals, not surface colours, and look correct on both light and dark backgrounds.
@@ -53,6 +53,17 @@ See [User Management](USERS.md) for full authentication documentation.
 |--------|------|-------------|------|
 | `GET` | `/api/0/users` | List all users | Admin |
 | `GET` | `/api/0/users/me` | Own profile | Authenticated |
+| `PUT` | `/api/0/users/me` | Update own profile | Authenticated |
+
+### Notification Channels
+
+| Method | Path | Description | Role |
+|--------|------|-------------|------|
+| `GET` | `/api/0/notification_channel_types` | Channel type schemas | Authenticated |
+| `GET` | `/api/0/notification_channels` | List visible channels | Authenticated |
+| `POST` | `/api/0/notification_channels` | Create a channel | Authenticated |
+| `PUT` | `/api/0/notification_channels/{name}` | Update a channel | Owner or Admin |
+| `DELETE` | `/api/0/notification_channels/{name}` | Delete a channel | Owner or Admin |

 ### Host Management

@@ -203,6 +214,101 @@ Changes take effect immediately but are not written back to the config file. Upd

 ---

+---
+
+### Notification Channel Endpoints
+
+Channels are visible to all users by default. Channels marked `private: true` are only visible to their owner. Admins see all channels.
+
+#### GET /api/0/notification_channel_types
+Return the schema for every supported notifier type. Used by the web UI to dynamically render the channel creation form.
+
+**Response:**
+```json
+{
+  "pushover": {
+    "label": "Pushover",
+    "fields": [
+      {"key": "token",  "label": "App token",  "type": "secret", "required": true},
+      {"key": "user",   "label": "User key",   "type": "secret", "required": true},
+      {"key": "sound",  "label": "Sound",      "type": "text",   "required": false}
+    ]
+  },
+  "email": { "label": "E-mail", "fields": [ ... ] },
+  ...
+}
+```
+
+---
+
+#### GET /api/0/notification_channels
+List channels visible to the current user (public channels + own private channels). Admins receive all channels.
+
+**Response:**
+```json
+[
+  {
+    "name": "pushover_ops",
+    "type": "pushover",
+    "type_label": "Pushover",
+    "owner": null,
+    "private": false,
+    "min_level": "WARNING",
+    "fields": [
+      {"key": "token", "label": "App token", "value": "•••", "sensitive": true},
+      {"key": "user",  "label": "User key",  "value": "•••", "sensitive": true}
+    ]
+  }
+]
+```
+
+Sensitive fields (`type: "secret"`) are always returned as `"•••"`.
+
+---
+
+#### POST /api/0/notification_channels
+Create a new channel. The creating user becomes the channel's `owner`.
+
+**Request body:**
+```json
+{
+  "name": "my_pushover",
+  "type": "pushover",
+  "token": "app-token",
+  "user": "user-key",
+  "min_level": "WARNING",
+  "private": true
+}
+```
+
+**Response:** `{"ok": true, "name": "my_pushover"}`
+
+**Status codes:** `200 OK`, `400` (missing required field or unknown type), `409` (name already exists)
+
+---
+
+#### PUT /api/0/notification_channels/{name}
+Update an existing channel. Only the channel owner or an admin may update it.
+
+Secret fields sent as `"•••"` are preserved from the existing config (same pattern as OAuth secrets in the admin config editor).
+
+**Request body:** same shape as POST, `name` ignored (taken from URL).
+
+**Response:** `{"ok": true}`
+
+**Status codes:** `200 OK`, `403 Forbidden`, `404 Not Found`
+
+---
+
+#### DELETE /api/0/notification_channels/{name}
+Delete a channel. Only the channel owner or an admin may delete it.
+
+**Response:** `{"ok": true}`
+
+**Status codes:** `200 OK`, `403 Forbidden`, `404 Not Found`
+
+---
+
 ### Alert Endpoints

 #### GET /api/0/hosts/{hostname}/alerts
@@ -104,11 +104,6 @@ The `nagios_runner` plugin collects:
 - `{name}_{metric}_min` - Minimum value (if present)
 - `{name}_{metric}_max` - Maximum value (if present)

-**Overall:**
- `overall_status` - Worst status from all commands
- `overall_status_code` - Worst status code
- `plugin_count` - Number of Nagios plugins executed
-
 ## Configuration Options

 ```yaml
@@ -30,9 +30,17 @@ Set `base_url` so notification links point to your hbd instance:
 base_url: https://hbd.example.com
 ```

-### Global channel definitions
+### Channel definitions

-Define channels once; reference them by name from user configs:
+Channels are defined under `notification_channels`. Each entry specifies a delivery type and its credentials. Two optional metadata fields control visibility:
+
+| Field | Default | Description |
+|---|---|---|
+| `owner` | *(absent)* | Username who created/owns this channel. Absent = admin-created. |
+| `private` | `false` | When `true`, only the owner can see and select this channel. |
+| `min_level` | `WARNING` | Minimum alert level this channel receives. |
+
+**Admin-created channels** (set in the config file or via the admin settings UI) are public by default — all users can select them:

 ```yaml
 notification_channels:
@@ -41,7 +49,7 @@ notification_channels:
    type: pushover
    token: your-app-token
    user: your-user-key
-    min_level: WARNING        # optional, default: WARNING
+    min_level: WARNING

  email_ops:
    type: email
@@ -58,14 +66,14 @@ notification_channels:
    homeserver: https://matrix.example.org
    access_token: syt_xxx
    room_id: "!abc:matrix.example.org"
-    min_level: CRITICAL       # only send critical alerts to this room
+    min_level: CRITICAL

  sms_oncall:
    type: sms_voipms
    api_user: me@example.com
    api_password: secret
-    did: "5551234567"         # your voip.ms DID number
-    dst: "5559876543"         # destination number
+    did: "5551234567"
+    dst: "5559876543"
    min_level: CRITICAL

  signal_ops:
@@ -82,9 +90,30 @@ notification_channels:
    username: heartbeat-bot
 ```

+**User-created channels** are written by authenticated users through the API or their profile page. They carry an `owner` field and optionally `private: true`:
+
+```yaml
+notification_channels:
+
+  alice_personal:
+    type: pushover
+    token: personal-token
+    user: personal-key
+    owner: alice          # created by alice
+    private: true         # only alice can see this channel
+```
+
+### Channel visibility
+
+| Channel | Who can see / select it |
+|---|---|
+| No `private` field (or `private: false`) | All users |
+| `private: true` | Only the `owner` |
+| Any channel | Admins always see everything |
+
 ### Users with notification channels

-Each user lists which global channels they receive notifications on:
+Each user lists which channels they receive notifications on. Users can manage their own selection from the profile page:

 ```yaml
 users:
@@ -270,6 +299,7 @@ Called once at startup from `main.py`. Pass the running asyncio event loop so Ma
 - Check that the host has an `owner` or `managers` set
 - Check that users have `notification_channels` listed
 - Check that the channel names in user config match keys under `notification_channels:`
+- If a user can't select a channel, check whether it is `private: true` and owned by someone else

 **min_level filtering too aggressive:**
 - Default is `WARNING` — both WARNING and CRITICAL are sent
@@ -8,6 +8,7 @@ This guide explains how to create custom plugins for the Heartbeat monitoring sy
 - [Plugin Types](#plugin-types)
 - [Creating a Plugin](#creating-a-plugin)
 - [Plugin Lifecycle](#plugin-lifecycle)
+- [Server-initiated InfoPlugin refresh](#server-initiated-infoplugin-refresh)
 - [Configuration](#configuration)
 - [Best Practices](#best-practices)
 - [Examples](#examples)
@@ -250,6 +251,28 @@ Understanding the plugin lifecycle helps you implement plugins correctly:
   └─> Plugin releases resources, closes connections
 ```

+## Server-initiated InfoPlugin refresh
+
+When a heartbeat packet arrives from a host the server has no plugin data for (e.g. after a server restart), the server sets `request_update = 1` in the ACK reply. The client detects this flag and immediately re-runs all InfoPlugins — clearing their cached results first — then resends the data as PLG messages.
+
+This means InfoPlugin data will always reach the server as soon as possible without requiring a client restart. No action is needed from plugin authors: the framework handles cache invalidation and re-collection automatically.
+
+The lifecycle for this case looks like:
+
+```
+Server restarts, host reconnects
+   └─> hbd receives HTB with no existing plugin_data for host
+   └─> hbd sets request_update=1 in ACK
+
+Client receives ACK
+   └─> Detects request_update flag
+   └─> Clears _cache on every registered InfoPlugin
+   └─> Calls collect() on each InfoPlugin
+   └─> Sends fresh PLG messages to server
+```
+
+If you write an `InfoPlugin` with side effects in `_collect_info()` (opening connections, writing files, etc.), be aware it may be called more than once per client session when this mechanism triggers.
+
 ## Configuration

 ### Plugin-Specific Configuration
@@ -256,6 +256,56 @@ disk_monitor:
        operator: "<"
 ```

+### ZFS Monitor
+
+ZFS pool health is checked automatically for every pool. A pool in any state
+other than `ONLINE` (e.g. `DEGRADED`, `SUSPENDED`, `FAULTED`, `UNAVAIL`) raises
+a **CRITICAL** alert by default — no configuration required.
+
+The default threshold is equivalent to:
+
+```yaml
+zfs_monitor:
+  pools:
+    '*':
+      status:
+        warning: 1
+        critical: 2
+        operator: ">"
+        hysteresis: 0.0
+        display: "ZFS pool {pool_name} is {health}"
+```
+
+`'*'` matches every pool on the host. The notification message includes the pool
+name and its current health string, e.g. `ZFS pool tank is DEGRADED`.
+
+**Override for specific pools** — named pool entries take priority over `'*'`:
+
+```yaml
+zfs_monitor:
+  pools:
+    # Suppress health alerts for a scratch pool (not mission-critical)
+    scratch:
+      status:
+        enabled: false
+
+    # Capacity threshold for a specific pool
+    tank:
+      capacity:
+        warning: 75.0
+        critical: 90.0
+        operator: ">"
+        hysteresis: 0.05
+```
+
+**Alert state paths** follow the pattern `zfs_monitor.<pool_name>.status`,
+so acknowledgements and silences target individual pools:
+
+```
+zfs_monitor.tank.status
+zfs_monitor.backup.status
+```
+
 ### Network Monitor

 ```yaml
@@ -1110,33 +1160,6 @@ hosts:
  db-02:
    threshold_config: [tight_memory, db_disk]
 ```
-
-### Backward Compatibility
-
-The legacy single threshold configuration is fully supported:
-
-```yaml
-# Old format - still works
-thresholds:
-  cpu_monitor:
-    cpu_percent:
-      warning: 80.0
-      critical: 90.0
-```
-
-This is equivalent to:
-
-```yaml
-# New format
-threshold_configs:
-  default:
-    thresholds:
-      cpu_monitor:
-        cpu_percent:
-          warning: 80.0
-          critical: 90.0
-```
-
 ### Configuration Priority

 1. **Host `threshold_config` (list)**: Layer each named config's overrides left-to-right on top of the defaults
@@ -36,7 +36,7 @@ users:
  bob:
    full_name: Bob Smith
    password: pbkdf2:sha256:...
-    notification_channels: [pushover_standard]
+    notification_channels: [pushover_standard]   # channels bob has selected

  carol:
    full_name: Carol Jones
@@ -46,6 +46,24 @@ default_owner: andreas            # owns hosts with no explicit owner
                                  # falls back to the first admin user if omitted
 ```

+### Client-declared host ownership
+
+A host can declare its own owner directly in the hbc or hbc_mini client configuration. This is useful for hosts that are not listed in the server config, or during initial setup before a server-side config entry has been created.
+
+**`~/.hbc.yaml`** (hbc):
+```yaml
+owner: andreas
+```
+
+**`~/.hbc.json`** (hbc_mini):
+```json
+{ "owner": "andreas" }
+```
+
+When set, the value is included in the `os_info` plugin data sent to the server. The server applies it as `host.owner` the first time `os_info` arrives, provided no owner has been configured server-side for that host. Server-configured ownership always takes precedence.
+
+---
+
 ### Assigning roles to hosts

 ```yaml
@@ -170,6 +188,32 @@ Return the currently authenticated user's profile.

 ---

+#### PUT /api/0/users/me
+Update the current user's profile. All fields are optional — send only what you want to change.
+
+**Update display name and avatar:**
+```json
+{ "full_name": "Carol Jones", "avatar": "/avatars/carol.png" }
+```
+
+**Change notification channel selection:**
+```json
+{ "notification_channels": ["pushover_ops", "email_ops"] }
+```
+Only channels visible to the user (public + own private) are accepted; others are silently dropped.
+
+**Change password:**
+```json
+{ "password": { "current": "oldpass", "new": "newpass" } }
+```
+Requires the correct current password. New password is hashed before storage.
+
+**Response:** `{"ok": true}`
+
+**Status codes:** `200 OK`, `400` (missing/invalid field), `401` (unauthenticated), `403` (wrong current password)
+
+---
+
 ### Host Access

 #### GET /api/0/hosts/{hostname}/access
@@ -1,602 +0,0 @@
-# Plugin Error Checking Implementation Plan
-
-> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
-
-**Goal:** Improve plugin error checking in hbc, especially for nagios_runner, and fix logger messages silently discarded in daemon mode.
-
-**Architecture:** Three focused changes across three files: (1) `hbd/client/plugin.py` gains a `skip_reason` attribute on Plugin and updated PluginLoader messaging; (2) `hbd/client/plugins/nagios_runner.py` gains async subprocess execution, stderr capture, signal-killed process handling, and init-time command path validation; (3) `hbd/client/main.py` gains proper post-fork logging reconfiguration to syslog.
-
-**Tech Stack:** Python 3.11+, asyncio, `logging.handlers.SysLogHandler`, pytest
-
---
-
-## File Map
-
-| Action | Path | What changes |
-|---|---|---|
-| Modify | `hbd/client/plugin.py` | `Plugin.__init__` gains `skip_reason`; `PluginLoader` checks it |
-| Modify | `hbd/client/plugins/nagios_runner.py` | async subprocess, stderr, signal codes, init validation, `skip_reason` |
-| Modify | `hbd/client/main.py` | `_reconfigure_logging_for_daemon()` helper; remove redundant syslog calls |
-| Create | `tests/test_plugin.py` | PluginLoader messaging tests |
-| Create | `tests/test_nagios_runner.py` | NagiosRunnerPlugin behaviour tests |
-
-Run tests throughout with:
-```bash
-python -m pytest tests/test_plugin.py tests/test_nagios_runner.py -v
-```
-
---
-
-## Task 1: Plugin.skip_reason + PluginLoader messaging
-
-**Files:**
- Modify: `hbd/client/plugin.py:40-48` (Plugin.__init__)
- Modify: `hbd/client/plugin.py:369-381` (PluginLoader.load_from_directory)
- Create: `tests/test_plugin.py`
-
- [ ] **Step 1: Write failing tests**
-
-Create `tests/test_plugin.py`:
-
-```python
-import asyncio
-import logging
-import textwrap
-
-from hbd.client.plugin import Plugin, PluginLoader, PluginRegistry
-
-
-def test_plugin_skip_reason_defaults_none(tmp_path):
-    plugin_code = textwrap.dedent("""
-        from hbd.client.plugin import MonitorPlugin
-
-        class MinimalPlugin(MonitorPlugin):
-            name = "minimal"
-            version = "1.0.0"
-            interval = 60
-
-            async def initialize(self):
-                return True
-
-            async def _collect_metrics(self):
-                return {}
-    """)
-    (tmp_path / "minimal.py").write_text(plugin_code)
-    registry = PluginRegistry()
-    loader = PluginLoader(registry)
-    asyncio.run(loader.load_from_directory(tmp_path))
-    plugin = registry.get("minimal")
-    assert plugin is not None
-    assert plugin.skip_reason is None
-
-
-def test_loader_logs_info_when_skip_reason_set(tmp_path, caplog):
-    plugin_code = textwrap.dedent("""
-        from hbd.client.plugin import MonitorPlugin
-
-        class SkippablePlugin(MonitorPlugin):
-            name = "skippable"
-            version = "1.0.0"
-            interval = 60
-
-            async def initialize(self):
-                self.skip_reason = "not configured in yaml"
-                return False
-
-            async def _collect_metrics(self):
-                return {}
-    """)
-    (tmp_path / "skippable.py").write_text(plugin_code)
-    registry = PluginRegistry()
-    loader = PluginLoader(registry)
-
-    with caplog.at_level(logging.INFO, logger="plugin.loader"):
-        count = asyncio.run(loader.load_from_directory(tmp_path))
-
-    assert count == 0
-    assert any("skipped: not configured in yaml" in r.message for r in caplog.records)
-    assert not any("failed initialization" in r.message for r in caplog.records)
-
-
-def test_loader_logs_warning_when_no_skip_reason(tmp_path, caplog):
-    plugin_code = textwrap.dedent("""
-        from hbd.client.plugin import MonitorPlugin
-
-        class FailPlugin(MonitorPlugin):
-            name = "fail"
-            version = "1.0.0"
-            interval = 60
-
-            async def initialize(self):
-                return False
-
-            async def _collect_metrics(self):
-                return {}
-    """)
-    (tmp_path / "fail_plugin.py").write_text(plugin_code)
-    registry = PluginRegistry()
-    loader = PluginLoader(registry)
-
-    with caplog.at_level(logging.WARNING, logger="plugin.loader"):
-        count = asyncio.run(loader.load_from_directory(tmp_path))
-
-    assert count == 0
-    assert any("failed initialization" in r.message for r in caplog.records)
-```
-
- [ ] **Step 2: Run tests to verify they fail**
-
-```bash
-python -m pytest tests/test_plugin.py -v
-```
-Expected: `test_plugin_skip_reason_defaults_none` FAILS (attribute missing), others may error.
-
- [ ] **Step 3: Add `skip_reason` to `Plugin.__init__`**
-
-In `hbd/client/plugin.py`, in `Plugin.__init__` (around line 46), add one line:
-
-```python
-def __init__(self, config: Optional[Dict[str, Any]] = None):
-    self.config = config or {}
-    self.logger = logging.getLogger(f"plugin.{self.name}")
-    self._initialized = False
-    self.skip_reason: Optional[str] = None
-```
-
- [ ] **Step 4: Update PluginLoader messaging**
-
-In `hbd/client/plugin.py`, replace the `if not initialized:` block (around line 372):
-
-```python
-                    if not initialized:
-                        if plugin.skip_reason:
-                            self.logger.info(
-                                f"Plugin {plugin.name} skipped: {plugin.skip_reason}"
-                            )
-                        else:
-                            self.logger.warning(
-                                f"Plugin {plugin.name} failed initialization, skipping"
-                            )
-                        continue
-```
-
- [ ] **Step 5: Run tests to verify they pass**
-
-```bash
-python -m pytest tests/test_plugin.py -v
-```
-Expected: all 3 tests PASS.
-
- [ ] **Step 6: Commit**
-
-```bash
-git add hbd/client/plugin.py tests/test_plugin.py
-git commit -m "feat: add skip_reason to Plugin; improve PluginLoader init messaging"
-```
-
---
-
-## Task 2: NagiosRunnerPlugin — skip_reason when no commands
-
-**Files:**
- Modify: `hbd/client/plugins/nagios_runner.py:88-105` (initialize)
- Modify: `tests/test_nagios_runner.py` (create)
-
- [ ] **Step 1: Write failing test**
-
-Create `tests/test_nagios_runner.py`:
-
-```python
-import asyncio
-import logging
-import os
-import stat
-
-import pytest
-
-from hbd.client.plugins.nagios_runner import (
-    NagiosRunnerPlugin,
-    NAGIOS_OK,
-    NAGIOS_WARNING,
-    NAGIOS_CRITICAL,
-    NAGIOS_UNKNOWN,
-)
-
-
-def test_no_commands_sets_skip_reason():
-    plugin = NagiosRunnerPlugin(config={"commands": []})
-    result = asyncio.run(plugin.initialize())
-    assert result is False
-    assert plugin.skip_reason is not None
-    assert "nagios_runner.commands" in plugin.skip_reason
-```
-
- [ ] **Step 2: Run test to verify it fails**
-
-```bash
-python -m pytest tests/test_nagios_runner.py::test_no_commands_sets_skip_reason -v
-```
-Expected: FAIL — `plugin.skip_reason` is `None`.
-
- [ ] **Step 3: Set skip_reason in NagiosRunnerPlugin.initialize()**
-
-In `hbd/client/plugins/nagios_runner.py`, replace the early-return block in `initialize()` (around line 96):
-
-```python
-        if not self.commands:
-            self.skip_reason = "no commands configured (add nagios_runner.commands to config)"
-            self.logger.info("No Nagios commands configured")
-            return False
-```
-
- [ ] **Step 4: Run test to verify it passes**
-
-```bash
-python -m pytest tests/test_nagios_runner.py::test_no_commands_sets_skip_reason -v
-```
-Expected: PASS.
-
- [ ] **Step 5: Commit**
-
-```bash
-git add hbd/client/plugins/nagios_runner.py tests/test_nagios_runner.py
-git commit -m "feat: set skip_reason on nagios_runner when no commands configured"
-```
-
---
-
-## Task 3: NagiosRunnerPlugin — async subprocess, stderr capture, negative return codes
-
-**Files:**
- Modify: `hbd/client/plugins/nagios_runner.py` (imports + `_run_nagios_plugin`)
- Modify: `tests/test_nagios_runner.py`
-
- [ ] **Step 1: Write failing tests**
-
-Append to `tests/test_nagios_runner.py`:
-
-```python
-def test_stderr_used_when_stdout_empty(tmp_path):
-    script = tmp_path / "check_err.sh"
-    script.write_text("#!/bin/sh\necho 'error from stderr' >&2\nexit 2\n")
-    script.chmod(script.stat().st_mode | stat.S_IEXEC)
-
-    config = {"commands": [{"name": "t", "command": str(script)}], "timeout": 5}
-    plugin = NagiosRunnerPlugin(config=config)
-    asyncio.run(plugin.initialize())
-    data = asyncio.run(plugin._collect_metrics())
-
-    assert "error from stderr" in data["t_output"]
-    assert data["t_status_code"] == NAGIOS_CRITICAL
-
-
-def test_stderr_appended_when_both_present(tmp_path):
-    script = tmp_path / "check_both.sh"
-    script.write_text("#!/bin/sh\necho 'OK - all good'\necho 'extra detail' >&2\nexit 0\n")
-    script.chmod(script.stat().st_mode | stat.S_IEXEC)
-
-    config = {"commands": [{"name": "t", "command": str(script)}], "timeout": 5}
-    plugin = NagiosRunnerPlugin(config=config)
-    asyncio.run(plugin.initialize())
-    data = asyncio.run(plugin._collect_metrics())
-
-    assert "OK - all good" in data["t_output"]
-    assert "extra detail" in data["t_output"]
-    assert data["t_status_code"] == NAGIOS_OK
-
-
-def test_negative_returncode_maps_to_unknown():
-    # kill -9 $$ kills the shell itself; asyncio sees returncode -9
-    config = {"commands": [{"name": "t", "command": "kill -9 $$"}], "timeout": 5}
-    plugin = NagiosRunnerPlugin(config=config)
-    asyncio.run(plugin.initialize())
-    data = asyncio.run(plugin._collect_metrics())
-
-    assert data["t_status_code"] == NAGIOS_UNKNOWN
-    assert "signal" in data["t_output"].lower()
-```
-
- [ ] **Step 2: Run tests to verify they fail**
-
-```bash
-python -m pytest tests/test_nagios_runner.py::test_stderr_used_when_stdout_empty \
-    tests/test_nagios_runner.py::test_stderr_appended_when_both_present \
-    tests/test_nagios_runner.py::test_negative_returncode_maps_to_unknown -v
-```
-Expected: all FAIL — current implementation ignores stderr and doesn't handle negative codes.
-
- [ ] **Step 3: Update imports in nagios_runner.py**
-
-Replace the import block at the top of `hbd/client/plugins/nagios_runner.py`:
-
-```python
-import asyncio
-import os
-import re
-from typing import Any, Dict, List, Optional, Tuple
-
-from hbd.client.plugin import MonitorPlugin
-```
-
-(Remove `import subprocess`; add `import asyncio` and `import os`.)
-
- [ ] **Step 4: Upgrade collection log level from DEBUG to INFO**
-
-In `hbd/client/plugins/nagios_runner.py`, in `_collect_metrics()`, change the debug log (around line 144) so results are visible at INFO level:
-
-```python
-                self.logger.info(
-                    f"Executed {name}: {STATUS_NAMES.get(status_code, 'UNKNOWN')} - {output[:50]}"
-                )
-```
-
- [ ] **Step 5: Replace `_run_nagios_plugin` with async implementation**
-
-Replace the entire `_run_nagios_plugin` method in `hbd/client/plugins/nagios_runner.py`:
-
-```python
-    async def _run_nagios_plugin(
-        self,
-        command: str
-    ) -> Tuple[int, str, Dict[str, Any]]:
-        """Execute a Nagios plugin and parse its output."""
-        try:
-            proc = await asyncio.create_subprocess_shell(
-                command,
-                stdout=asyncio.subprocess.PIPE,
-                stderr=asyncio.subprocess.PIPE,
-            )
-            try:
-                stdout_bytes, stderr_bytes = await asyncio.wait_for(
-                    proc.communicate(), timeout=self.timeout
-                )
-            except asyncio.TimeoutError:
-                proc.kill()
-                await proc.communicate()
-                self.logger.error(f"Command timed out: {command}")
-                return NAGIOS_UNKNOWN, f"Command timed out after {self.timeout}s", {}
-
-            status_code = proc.returncode
-
-            if status_code < 0:
-                return NAGIOS_UNKNOWN, f"Process killed by signal {-status_code}", {}
-
-            if status_code > 3:
-                status_code = NAGIOS_UNKNOWN
-
-            stdout = stdout_bytes.decode(errors="replace").strip()
-            stderr = stderr_bytes.decode(errors="replace").strip()
-
-            # Parse perfdata from stdout before mixing in stderr
-            perfdata = self._parse_perfdata(stdout)
-
-            # Build status message
-            status_part = stdout.split('|')[0].strip() if '|' in stdout else stdout
-
-            if not stdout and stderr:
-                output_msg = stderr
-            elif stdout and stderr:
-                output_msg = f"{status_part} [stderr: {stderr}]"
-            else:
-                output_msg = status_part
-
-            return status_code, output_msg, perfdata
-
-        except Exception as e:
-            self.logger.error(f"Error executing command: {e}")
-            return NAGIOS_UNKNOWN, f"Execution error: {str(e)}", {}
-```
-
-Also remove the now-unused `self.shell` line from `__init__` (the `shell` config key is no longer used since `create_subprocess_shell` always uses a shell):
-
-In `NagiosRunnerPlugin.__init__`, remove:
-```python
-        self.shell: bool = config.get("shell", True) if config else True
-```
-
- [ ] **Step 6: Run tests to verify they pass**
-
-```bash
-python -m pytest tests/test_nagios_runner.py -v
-```
-Expected: all tests PASS including the 3 new ones.
-
- [ ] **Step 7: Commit**
-
-```bash
-git add hbd/client/plugins/nagios_runner.py tests/test_nagios_runner.py
-git commit -m "feat: async subprocess in nagios_runner with stderr capture and signal handling"
-```
-
---
-
-## Task 4: NagiosRunnerPlugin — command path validation at init
-
-**Files:**
- Modify: `hbd/client/plugins/nagios_runner.py` (initialize)
- Modify: `tests/test_nagios_runner.py`
-
- [ ] **Step 1: Write failing tests**
-
-Append to `tests/test_nagios_runner.py`:
-
-```python
-def test_absolute_path_not_found_warns(caplog):
-    fake_cmd = "/nonexistent_hbc_test_path/check_something"
-    config = {"commands": [{"name": "t", "command": fake_cmd}]}
-    plugin = NagiosRunnerPlugin(config=config)
-
-    with caplog.at_level(logging.WARNING, logger="plugin.nagios_runner"):
-        asyncio.run(plugin.initialize())
-
-    assert any("not found" in r.message for r in caplog.records)
-
-
-def test_absolute_path_not_executable_warns(caplog, tmp_path):
-    non_exec = tmp_path / "check_test"
-    non_exec.write_text("#!/bin/sh\necho OK\n")
-    non_exec.chmod(0o644)  # readable but not executable
-
-    config = {"commands": [{"name": "t", "command": str(non_exec)}]}
-    plugin = NagiosRunnerPlugin(config=config)
-
-    with caplog.at_level(logging.WARNING, logger="plugin.nagios_runner"):
-        asyncio.run(plugin.initialize())
-
-    assert any("not executable" in r.message for r in caplog.records)
-
-
-def test_relative_path_not_checked(caplog):
-    # Relative paths (resolved via PATH) must not generate warnings
-    config = {"commands": [{"name": "t", "command": "echo OK"}]}
-    plugin = NagiosRunnerPlugin(config=config)
-
-    with caplog.at_level(logging.WARNING, logger="plugin.nagios_runner"):
-        asyncio.run(plugin.initialize())
-
-    assert not any(
-        "not found" in r.message or "not executable" in r.message
-        for r in caplog.records
-    )
-```
-
- [ ] **Step 2: Run tests to verify they fail**
-
-```bash
-python -m pytest tests/test_nagios_runner.py::test_absolute_path_not_found_warns \
-    tests/test_nagios_runner.py::test_absolute_path_not_executable_warns \
-    tests/test_nagios_runner.py::test_relative_path_not_checked -v
-```
-Expected: `test_absolute_path_not_found_warns` and `test_absolute_path_not_executable_warns` FAIL (no warnings logged); `test_relative_path_not_checked` may pass.
-
- [ ] **Step 3: Add command path validation to `initialize()`**
-
-In `hbd/client/plugins/nagios_runner.py`, extend `initialize()` by adding validation after the existing "log each command" loop (after line 103, before `return True`):
-
-```python
-        # Validate absolute command paths early
-        for cmd_config in self.commands:
-            name = cmd_config.get("name", "unnamed")
-            command = cmd_config.get("command", "")
-            if not command:
-                continue
-            exe = command.split()[0]
-            if os.path.isabs(exe):
-                if not os.path.isfile(exe):
-                    self.logger.warning(
-                        f"Command '{name}': executable not found: {exe}"
-                    )
-                elif not os.access(exe, os.X_OK):
-                    self.logger.warning(
-                        f"Command '{name}': executable not executable: {exe}"
-                    )
-```
-
- [ ] **Step 4: Run full test suite to verify all pass**
-
-```bash
-python -m pytest tests/test_plugin.py tests/test_nagios_runner.py -v
-```
-Expected: all tests PASS.
-
- [ ] **Step 5: Commit**
-
-```bash
-git add hbd/client/plugins/nagios_runner.py tests/test_nagios_runner.py
-git commit -m "feat: validate absolute command paths at nagios_runner init"
-```
-
---
-
-## Task 5: Daemon mode logging — route to syslog after fork
-
-**Files:**
- Modify: `hbd/client/main.py` (new helper + updated daemon block)
-
-No automated test for daemonization itself (fork behaviour is hard to unit-test). Manual verification steps are provided below.
-
- [ ] **Step 1: Add `_reconfigure_logging_for_daemon` helper**
-
-In `hbd/client/main.py`, add this function just before `def build_parser()` (around line 589):
-
-```python
-def _reconfigure_logging_for_daemon(log_level: int) -> None:
-    """Replace StreamHandlers (now writing to /dev/null) with a SysLogHandler."""
-    from logging.handlers import SysLogHandler
-
-    root = logging.getLogger()
-    for handler in root.handlers[:]:
-        root.removeHandler(handler)
-        handler.close()
-
-    try:
-        syslog_handler = SysLogHandler(
-            address="/dev/log",
-            facility=SysLogHandler.LOG_DAEMON,
-        )
-    except OSError:
-        syslog_handler = SysLogHandler(
-            address=("localhost", 514),
-            facility=SysLogHandler.LOG_DAEMON,
-        )
-        # Attach the fallback first so the warning reaches syslog
-        syslog_handler.setFormatter(
-            logging.Formatter("hbc[%(process)d]: %(name)s %(levelname)s: %(message)s")
-        )
-        root.addHandler(syslog_handler)
-        root.setLevel(log_level)
-        logging.warning("/dev/log not found, using syslog UDP localhost:514")
-        return
-
-    syslog_handler.setFormatter(
-        logging.Formatter("hbc[%(process)d]: %(name)s %(levelname)s: %(message)s")
-    )
-    root.addHandler(syslog_handler)
-    root.setLevel(log_level)
-```
-
- [ ] **Step 2: Update the daemon block in `main()`**
-
-In `hbd/client/main.py`, replace the entire `if args.daemon:` block (lines 664–675):
-
-```python
-    if args.daemon:
-        print("Daemonizing...")
-        daemonize()
-        _reconfigure_logging_for_daemon(log_level)
-        logging.info(f"hbc starting, sending heartbeat to {', '.join(args.hosts)}")
-```
-
-This removes the `import syslog`, `syslog.openlog()`, and `syslog.syslog()` calls (now handled by the logging system) and removes the no-op second `logging.basicConfig()` call.
-
- [ ] **Step 3: Run existing test suite to confirm no regressions**
-
-```bash
-python -m pytest tests/test_plugin.py tests/test_nagios_runner.py -v
-```
-Expected: all tests still PASS.
-
- [ ] **Step 4: Manual smoke test — verify syslog output in daemon mode**
-
-```bash
-# In one terminal, tail syslog
-sudo journalctl -f -t hbc
-
-# In another terminal, start hbc in daemon mode (replace HOST with a real or dummy host)
-python -m hbd.client.main -d -v localhost
-
-# Expected in journalctl output:
-#   hbc[<pid>]: hbc.main INFO: Starting hbc for <hostname> -> ['localhost']
-#   hbc[<pid>]: hbc.main INFO: hbc starting, sending heartbeat to localhost
-#   hbc[<pid>]: plugin.loader INFO: ...
-
-# Stop the daemon
-pkill -f "hbd.client.main"
-```
-
- [ ] **Step 5: Commit**
-
-```bash
-git add hbd/client/main.py
-git commit -m "fix: reconfigure logging to syslog after daemonize() instead of no-op basicConfig"
-```
@@ -1,92 +0,0 @@
-# Plugin Error Checking & Daemon Logging — Design Spec
-
-**Date:** 2026-04-25  
-**Scope:** hbc client — daemon mode logging, nagios_runner plugin robustness, PluginLoader messaging  
-**Files affected:** `hbd/client/main.py`, `hbd/client/plugins/nagios_runner.py`, `hbd/client/plugin.py`
-
---
-
-## 1. Daemon Mode Logging
-
-### Problem
-In `main()`, `logging.basicConfig()` is called before `daemonize()` (establishing a StreamHandler to stderr), then called again after `daemonize()`. The second call is a no-op — Python ignores `basicConfig()` when handlers are already configured. After daemonization, stderr is redirected to `/dev/null`, so all subsequent log output is silently discarded.
-
-The existing `syslog.openlog()` / `syslog.syslog()` calls (lines 666–668) write a single startup message but do not integrate with the `logging` system, so plugin and connection log messages never reach syslog.
-
-### Fix
-After `daemonize()`, explicitly reconfigure the root logger:
-
-1. Remove all existing handlers (they now write to `/dev/null`).
-2. Add `logging.handlers.SysLogHandler(address='/dev/log', facility=LOG_DAEMON)`.
-3. Set formatter: `hbc[%(process)d]: %(name)s %(levelname)s: %(message)s`
-4. Preserve the `log_level` already determined from `-v`/`-x` CLI flags.
-
-Remove the redundant `syslog.openlog()` / `syslog.syslog()` calls — the logging system handles routing.
-
-**Fallback:** If `/dev/log` does not exist (containers, some BSDs), fall back to `SysLogHandler(address=('localhost', 514))`. Log one warning (to stderr, before handlers are replaced) so the operator knows.
-
---
-
-## 2. Nagios Runner Improvements
-
-### 2a — Async Subprocess
-`_run_nagios_plugin()` is declared `async def` but calls `subprocess.run()` synchronously, blocking the event loop for the full command duration.
-
-**Fix:** Replace with `asyncio.create_subprocess_shell()` + `await proc.communicate()`. Enforce timeout with `asyncio.wait_for(..., timeout=self.timeout)` and catch `asyncio.TimeoutError`.
-
-### 2b — Stderr Capture
-Subprocess stderr is currently discarded (`capture_output=True` only captures stdout in the sync call; stderr content is lost).
-
-**Fix:** Pass `stderr=asyncio.subprocess.PIPE` to `create_subprocess_shell`. After `communicate()`, if stdout is empty but stderr has content, use stderr as the output message. If both have content, append stderr to the output for visibility.
-
-### 2c — Negative Return Codes
-A negative `returncode` means the process was killed by a signal (SIGKILL, OOM, etc.). The current code treats these as-is, which may produce unexpected status values.
-
-**Fix:** If `returncode < 0`, map to `NAGIOS_UNKNOWN` with message `"Process killed by signal {-returncode}"`.
-
-### 2d — Command Path Validation at Init
-`initialize()` currently only checks that the commands list is non-empty.
-
-**Fix:** For each command entry during `initialize()`:
- Warn and skip the entry if `name` or `command` is missing.
- Extract the executable (first whitespace-delimited token of the command string).
- If the executable is an absolute path, check `os.path.isfile()` and `os.access(..., os.X_OK)`. Log a `WARNING` if either check fails.
- Commands with relative paths or shell builtins are not checked (they may be on PATH) — just noted.
- Validation warns only; all original entries in `self.commands` are retained and still attempted at collection time (where the existing missing-name/command guard already skips them). The plugin initializes successfully as long as the commands list is non-empty.
-
---
-
-## 3. PluginLoader Messaging
-
-### Problem
-When `initialize()` returns `False`, the loader always logs:
-> `WARNING: Plugin X failed initialization, skipping`
-
-This is alarming when the real reason is simply "no commands configured". There is no API to distinguish "not configured" from "genuinely broken".
-
-### Fix
-Add an optional `skip_reason` attribute to `Plugin.__init__()` (defaults to `None`).
-
-In `PluginLoader.load_from_directory()`, after `initialize()` returns `False`:
- If `plugin.skip_reason` is set → `logger.info(f"Plugin {plugin.name} skipped: {plugin.skip_reason}")`
- If `plugin.skip_reason` is `None` → `logger.warning(f"Plugin {plugin.name} failed initialization, skipping")` (existing behaviour)
-
-In `NagiosRunnerPlugin.initialize()`, when no commands are configured:
-```python
-self.skip_reason = "no commands configured (add nagios_runner.commands to config)"
-return False
-```
-
-Genuine failures (exceptions) continue to go through the existing `except` block in the loader, logging at `ERROR` with traceback — unchanged.
-
---
-
-## Decisions
-
-| Topic | Decision |
-|---|---|
-| Daemon log destination | syslog only (LOG_DAEMON facility) |
-| Syslog fallback | localhost:514 UDP if `/dev/log` absent |
-| Nagios result log level | INFO for all statuses (OK/WARNING/CRITICAL/UNKNOWN) |
-| Invalid command handling at init | Warn and continue; still attempt at collection time |
-| PluginLoader API change | `skip_reason` attribute on Plugin base class, checked by loader |
@@ -14,4 +14,4 @@ Install options:
 """

 __all__ = ["__version__"]
-__version__ = "5.1.17"
+__version__ = "5.3.10"
@@ -16,6 +16,9 @@ CLIENT_DEFAULTS = {
    "hb_port": 50003,          # Port where hbd servers listen
    "interval": 10,             # Heartbeat interval in seconds

+    # Host identity
+    "owner": None,             # Optional username to set as this host's owner on the server
+
    # Runtime flags
    "foreground": False,
    "verbose": False,
@@ -21,6 +21,7 @@ from typing import Dict, List, Optional
 # Import protocol and config
 from .config import load_config
 from ..common.proto import dicttos, stodict
+from .. import __version__

 # Import plugin system
 from .plugin import PluginRegistry, PluginLoader, InfoPlugin, MonitorPlugin
@@ -58,6 +59,7 @@ class AsyncConnection:
        self._dead = False
        self._ever_opened = False
        self._open_fail_count = 0   # consecutive failures before first success
+        self.request_info_event: asyncio.Event = asyncio.Event()

        self.logger = logging.getLogger(f"hbc.conn.{addr}")

@@ -137,6 +139,9 @@ class AsyncConnection:
        
        self.ackcount += 1
        self.logger.debug(f"ACK received, RTT: {rtt:.1f}ms")
+        if msg.get("request_update"):
+            self.logger.info("server requested plugin info refresh")
+            self.request_info_event.set()


 class HeartbeatProtocol(asyncio.DatagramProtocol):
@@ -172,9 +177,8 @@ class HeartbeatProtocol(asyncio.DatagramProtocol):
            self.logger.error(f"Error processing datagram: {e}", exc_info=True)
    
    def error_received(self, exc):
-        """Handle protocol errors."""
-        self.logger.warning(f"Protocol error on {self.connection.addr}: {exc} — dropping connection")
-        self.connection._dead = True
+        """Handle protocol errors — close transport so the heartbeat sender retries."""
+        self.logger.warning(f"Protocol error on {self.connection.addr}: {exc} — will retry")
        self.connection.close()


@@ -338,6 +342,26 @@ async def heartbeat_sender(conn: AsyncConnection, interval: int):
            raise


+async def _info_plugin_refresh_loop(conn: AsyncConnection, info_plugins: List):
+    """Wait for server requests to re-send InfoPlugin data."""
+    logger = logging.getLogger("hbc.plugins")
+    while running:
+        await conn.request_info_event.wait()
+        if not running:
+            break
+        conn.request_info_event.clear()
+        logger.info("refreshing InfoPlugins on server request")
+        for plugin in info_plugins:
+            plugin._cache = None
+            try:
+                data = await plugin.collect()
+                if data:
+                    await conn.sendto({"plugin": plugin.name, **data}, "PLG")
+                    logger.info(f"Resent {plugin.name} data")
+            except Exception as e:
+                logger.error(f"Error re-collecting {plugin.name}: {e}", exc_info=True)
+
+
 async def plugin_collector(conn: AsyncConnection, registry: PluginRegistry):
    """Collect and send plugin data.

@@ -369,16 +393,13 @@ async def plugin_collector(conn: AsyncConnection, registry: PluginRegistry):
    for plugin in monitor_plugins:
        by_interval[plugin.interval].append(plugin)

-    # Create tasks for each interval
-    tasks = []
+    # Create tasks for each interval; always include the info-refresh watcher
+    tasks = [asyncio.create_task(_info_plugin_refresh_loop(conn, info_plugins))]
    for interval, plugins in by_interval.items():
-        task = asyncio.create_task(
+        tasks.append(asyncio.create_task(
            plugin_collector_interval(conn, plugins, interval)
-        )
-        tasks.append(task)
+        ))

-    # Wait for all tasks
-    if tasks:
    try:
        await asyncio.gather(*tasks, return_exceptions=True)
    except asyncio.CancelledError:
@@ -463,16 +484,13 @@ async def cleanup(connections: List[AsyncConnection]):
    logger = logging.getLogger("hbc.cleanup")
    logger.info("Cleaning up connections")
    
-    for conn in connections:
+    target = next((c for c in connections if c.transport), connections[0] if connections else None)
+    if target and send_shutdown:
        try:
-            msg = {
-                "shutdown": 1,
-                "acks": conn.ackcount
-            }
-            await conn.sendto(msg)
+            await target.sendto({"shutdown": 1, "acks": target.ackcount})
        except Exception as e:
            logger.error(f"Error sending shutdown: {e}")
-        
+    for conn in connections:
        conn.close()
    
    # Give messages time to send
@@ -481,7 +499,7 @@ async def cleanup(connections: List[AsyncConnection]):

 async def async_main(args, config):
    """Async main function."""
-    global running, shutdown_event, active_tasks
+    global running, shutdown_event, active_tasks, send_shutdown 
    
    # Create shutdown event
    shutdown_event = asyncio.Event()
@@ -498,48 +516,62 @@ async def async_main(args, config):
    hb_port = config.get("hb_port", PORT)
    interval = config.get("interval", INTERVAL)
    
-    logger.info(f"Starting hbc for {iam} -> {hb_hosts}")
-    logger.info(f"Port: {hb_port}, Interval: {interval}s")
+    logger.info(f"hbc {__version__} on {iam} -> {hb_hosts} port={hb_port}, interval={interval}s")
+    
+    af_filter = (socket.AF_INET if getattr(args, "ipv4_only", False)
+                 else socket.AF_INET6 if getattr(args, "ipv6_only", False)
+                 else 0)

    # Create connections
    connections = []
    conn_id = 1
+    _retry_delay = 5

+    while running and not connections:
        for host in hb_hosts:
            try:
-            addrs = socket.getaddrinfo(host, hb_port, 0, 0, socket.SOL_UDP)
+                addrs = socket.getaddrinfo(host, hb_port, af_filter, 0, socket.SOL_UDP)
            except socket.gaierror as e:
-            logger.error(f"Cannot resolve {host}: {e}")
+                logger.warning(f"Cannot resolve {host}: {e} — retrying in {_retry_delay}s")
                continue
-        
            for addr_info in addrs:
                af = addr_info[0]
                addr = addr_info[4][0]
-
                conn = AsyncConnection(conn_id, addr, hb_port, af, iam)
                if not await conn.open():
                    logger.warning(f"Initial open to {addr} failed, heartbeat sender will retry")
                connections.append(conn)
                conn_id += 1
+        if not connections:
+            try:
+                if shutdown_event:
+                    await asyncio.wait_for(shutdown_event.wait(), timeout=_retry_delay)
+                else:
+                    await asyncio.sleep(_retry_delay)
+            except asyncio.TimeoutError:
+                pass
+            _retry_delay = min(_retry_delay * 2, 60)

    if not connections:
-        logger.error("No connections established (DNS resolution failed for all hosts)")
        return 1

    logger.info(f"Created {len(connections)} connections")
    
    # Send boot/message if requested
+    send_shutdown = False
    if args.boot or args.message:
        boot_msg = {}
        if args.boot:
            boot_msg["boot"] = 1
+            args.boot = False  # Clear boot flag so we don't send it again in main loop
+            send_shutdown = True
        if args.message:
            boot_msg["service"] = "service"
            boot_msg["msg"] = args.message
        
        boot_msg["acks"] = 0
-        for conn in connections:
-            await conn.sendto(boot_msg)
+        target = next((c for c in connections if c.transport), connections[0])
+        await target.sendto(boot_msg)
        
        if args.message and not args.daemon:
            # Message-only mode
@@ -706,6 +738,9 @@ def build_parser():
        default=0,
        help="Increase debug level"
    )
+    af_group = parser.add_mutually_exclusive_group()
+    af_group.add_argument("-4", dest="ipv4_only", action="store_true", help="Use IPv4 only")
+    af_group.add_argument("-6", dest="ipv6_only", action="store_true", help="Use IPv6 only")
    parser.add_argument(
        "hosts",
        nargs="+",
@@ -739,7 +774,7 @@ def main(argv=None):
    
    # Daemonize if requested
    if args.daemon:
-        print("Daemonizing...")
+        logging.info("Daemonizing...")
        daemonize()
        _reconfigure_logging_for_daemon(log_level)
        logging.info(f"hbc starting, sending heartbeat to {', '.join(args.hosts)}")
@@ -364,7 +364,10 @@ class PluginLoader:
                    
                    # Instantiate plugin with config — check plugins subdict first,
                    # then top-level keys (e.g. nagios_runner: ... at root of config).
-                    plugin_instance_config = plugins_subconfig.get(obj.name) or raw_config.get(obj.name, {})
+                    plugin_instance_config = dict(plugins_subconfig.get(obj.name) or raw_config.get(obj.name) or {})
+                    # Propagate top-level owner so os_info (and any future plugin) can report it.
+                    if "owner" in raw_config and "owner" not in plugin_instance_config:
+                        plugin_instance_config["owner"] = raw_config["owner"]
                    plugin = obj(config=plugin_instance_config)
                    
                    # Initialize plugin
@@ -119,6 +119,13 @@ class CPUMonitorPlugin(MonitorPlugin):
            except Exception as e:
                self.logger.debug(f"Could not get CPU times: {e}")

+            # Uptime in seconds
+            try:
+                import time
+                data["uptime_seconds"] = int(time.time() - self.psutil.boot_time())
+            except Exception as e:
+                self.logger.debug(f"Could not get uptime: {e}")
+            
            self.logger.debug(
                f"Collected CPU metrics: {data.get('cpu_percent', 'N/A')}% usage"
            )
@@ -127,7 +127,7 @@ class FilesystemInfoPlugin(InfoPlugin):
                    try:
                        # Maximum filename length
                        max_name = os.pathconf(partition.mountpoint, 'PC_NAME_MAX')
-                        if max_name:
+                        if max_name is not None:
                            fs_info['maxfile'] = max_name
                    except (OSError, ValueError):
                        pass
@@ -135,7 +135,7 @@ class FilesystemInfoPlugin(InfoPlugin):
                    try:
                        # Maximum path length
                        max_path = os.pathconf(partition.mountpoint, 'PC_PATH_MAX')
-                        if max_path:
+                        if max_path is not None:
                            fs_info['maxpath'] = max_path
                    except (OSError, ValueError):
                        pass
@@ -14,6 +14,24 @@ except ImportError:

 from hbd.client.plugin import MonitorPlugin

+
+def _zfs_arc_bytes() -> int:
+    """Return current ZFS ARC size in bytes, or 0 if ZFS is not present.
+
+    ZFS ARC is reclaimable but is not included in MemAvailable by the Linux
+    kernel (it is not in SReclaimable), so it would otherwise be counted as
+    used memory.
+    """
+    try:
+        with open("/proc/spl/kstat/zfs/arcstats") as fh:
+            for line in fh:
+                parts = line.split()
+                if len(parts) >= 3 and parts[0] == "size":
+                    return int(parts[2])
+    except (OSError, ValueError):
+        pass
+    return 0
+
 logger = logging.getLogger(__name__)


@@ -101,11 +119,21 @@ class MemoryMonitorPlugin(MonitorPlugin):
        
        # Virtual (physical) memory statistics
        vmem = psutil.virtual_memory()
+
+        # psutil's available already excludes page cache / file buffers
+        # (uses MemAvailable on Linux). Add ZFS ARC on top because the kernel
+        # does not include it in SReclaimable / MemAvailable even though it is
+        # reclaimable.
+        arc_bytes = _zfs_arc_bytes()
+        available = min(vmem.available + arc_bytes, vmem.total)
+        used = vmem.total - available
+        percent = round(used / vmem.total * 100, 1) if vmem.total else 0.0
+
        metrics['memory_total'] = vmem.total
-        metrics['memory_available'] = vmem.available
-        metrics['memory_used'] = vmem.used
+        metrics['memory_available'] = available
+        metrics['memory_used'] = used
        metrics['memory_free'] = vmem.free
-        metrics['memory_percent'] = vmem.percent
+        metrics['memory_percent'] = percent
        
        # Platform-specific memory details
        if hasattr(vmem, 'active'):
@@ -31,16 +31,13 @@ from hbd.client.plugin import MonitorPlugin


 # Nagios exit codes
-NAGIOS_OK = 0
-NAGIOS_WARNING = 1
-NAGIOS_CRITICAL = 2
 NAGIOS_UNKNOWN = 3

 STATUS_NAMES = {
-    NAGIOS_OK: "OK",
-    NAGIOS_WARNING: "WARNING",
-    NAGIOS_CRITICAL: "CRITICAL",
-    NAGIOS_UNKNOWN: "UNKNOWN"
+    0: "OK",
+    1: "WARNING",
+    2: "CRITICAL",
+    3: "UNKNOWN",
 }


@@ -129,9 +126,6 @@ class NagiosRunnerPlugin(MonitorPlugin):
        """
        results = {}

-        # Track overall status (worst status wins)
-        worst_status = NAGIOS_OK
-        
        for cmd_config in self.commands:
            name = cmd_config.get("name")
            command = cmd_config.get("command")
@@ -149,10 +143,6 @@ class NagiosRunnerPlugin(MonitorPlugin):
                results[f"{name}_status_code"] = status_code
                results[f"{name}_output"] = output

-                # Track worst status
-                if status_code > worst_status:
-                    worst_status = status_code
-                
                # Parse and add performance data
                if perfdata:
                    for metric_name, metric_value in perfdata.items():
@@ -167,12 +157,6 @@ class NagiosRunnerPlugin(MonitorPlugin):
                results[f"{name}_status"] = "ERROR"
                results[f"{name}_status_code"] = NAGIOS_UNKNOWN
                results[f"{name}_output"] = str(e)
-                worst_status = NAGIOS_UNKNOWN
-        
-        # Add overall status
-        results["overall_status"] = STATUS_NAMES.get(worst_status, "UNKNOWN")
-        results["overall_status_code"] = worst_status
-        results["plugin_count"] = len(self.commands)

        return results
    
@@ -62,6 +62,9 @@ class OSInfoPlugin(InfoPlugin):
                "hbc_version": hbc_version,
                "hbc_type": "full",
            }
+            if self.config.get("owner"):
+                self.logger.debug(f"Adding owner from config: {self.config['owner']}")
+                data["owner"] = self.config["owner"]
            
            # Add Linux-specific distribution info
            if platform.system() == "Linux":
@@ -13,12 +13,8 @@ plugins:
    count: 3              # ICMP packets per ping run (default 3)
    timeout: 5            # seconds before a host is considered unreachable (default 5)
    hosts:
-      8.8.8.8:
-        warning: 20.0     # ms
-        critical: 100.0   # ms
-      192.168.1.1:
-        warning: 5.0
-        critical: 20.0
+      - 8.8.8.8
+      - 192.168.1.1
 ```

 Reported metrics per host (metric key uses the hostname with dots/colons replaced
@@ -89,8 +89,18 @@ class ZFSMonitorPlugin(MonitorPlugin):
            name = parts[0].strip()
            if self._pools_filter and name not in self._pools_filter:
                continue
+            health = parts[1].strip()
+            if health == "ONLINE":
+                status = 0
+            elif health in ("DEGRADED", "ONLINE with errors"):
+                status = 1
+            elif health in ("FAULTED", "OFFLINE", "UNAVAIL"):
+                status = 2
+            else:
+                status = 3  # unknown status
            pools[name] = {
-                "health":   parts[1].strip(),
+                "health":    health,
+                "status": status,
                "size":      _int(parts[2]),
                "alloc":     _int(parts[3]),
                "free":      _int(parts[4]),
@@ -134,6 +134,31 @@ thresholds:
          hysteresis: 0.1
          enabled: true
  
+  # ----------------------------------------------------------------------------
+  # ZFS Monitor Thresholds
+  # ----------------------------------------------------------------------------
+  zfs_monitor:
+    # Pool health check — built-in default; shown here for reference/override.
+    # status is 0 (ONLINE) or 1 (DEGRADED) or 2 (SUSPENDED, FAULTED, UNAVAIL…).
+    # Use '*' to apply the same rule to every pool, or name a specific pool.
+    pools:
+      '*':
+        status:
+          warning: 1           # Alert WARNING when pool is DEGRADED
+          critical: 2           # Alert CRITICAL when pool is SUSPENDED/FAULTED/UNAVAIL
+          operator: ">="
+          hysteresis: 0.0       # No hysteresis — a degraded pool is always alerting
+          grace: 0              # Fire immediately — don't wait for a second collection
+          display: "ZFS pool {pool_name} is {health}"
+
+      # Per-pool capacity thresholds (optional; add pools you care about)
+      # tank:
+      #   capacity:
+      #     warning: 75.0       # Warn at 75% used
+      #     critical: 90.0      # Critical at 90% used
+      #     operator: ">"
+      #     hysteresis: 0.05
+
  # ----------------------------------------------------------------------------
  # Network Monitor Thresholds
  # ----------------------------------------------------------------------------
@@ -27,21 +27,22 @@ SERVER_DEFAULTS = {

    # Monitoring settings
    "interval": 20,             # Expected heartbeat interval (for server checks)
-    "grace": 2,                 # Grace multiplier (interval * grace = timeout)
+    "grace": 2,                 # Grace period (extra seconds before notifying after a missed heartbeat)
    "threshold_renotify_interval": 3600,  # Seconds between threshold re-notifications

    # User management
    "users": {},                # username -> {full_name, avatar, password, admin, notification_channels}
    "default_owner": None,      # Username that owns hosts with no explicit owner

+    # OAuth2 providers
+    "oauth": {},                 # oauth.gitea.{url,client_id,client_secret}
+
    # Host management
    "hosts": {},                # Unified host definitions
-    "dyndnshosts": [],          # Hosts with dynamic DNS (legacy)
-    "drophosts": [],            # Hosts to ignore
-    "dyndomains": ["wrede.org"],
+    "dyndomains": ["example.org"], # Domains to update via nsupdate when a host with dyndns: true is updated

    # DNS updates
-    "nsupdate_bin": "/usr/bin/nsupdate",
+    "nsupdate_bin": "/usr/bin/nsupdate", # Path to nsupdate binary
    
    # WebSocket settings
    "ws_port": 50005, 
@@ -76,9 +77,13 @@ THRESHOLD_DEFAULTS = {
                }
            },
            'memory_monitor': {
-                'percent': {
+                'memory_percent': {
                    'warning': 85.0,
                    'critical': 95.0
+                },
+                'swap_percent': {
+                    'warning': 40.0,
+                    'critical': 75.0
                }
            },
            'disk_monitor': {
@@ -95,7 +100,31 @@ THRESHOLD_DEFAULTS = {
                'warning': 200,
                'critical': 250.0,
                'count': 3  # Optional: number of consecutive breaches before alerting
+            },
+            'nagios_runner': {
+                'status_code': {
+                    'display': '{check_name} {output}',
+                    'operator': "nagios"
                }
+            },
+            'zfs_monitor': {
+                'pools': {
+                    '*': {
+                        'status': {
+                            'warning': 1,
+                            'critical': 2,
+                            'operator': '>=',
+                            'hysteresis': 0.0,
+                            'grace': 0,
+                            'display': 'ZFS pool {pool_name} is {health}'
+                        },
+                        'capacity': {
+                            'warning': 80.0,
+                            'critical': 90.0,
+                        }
+                    }
+                }
+            },
        }
    }

@@ -219,7 +248,7 @@ def get_watchhosts(config):
    """Extract watched hostnames from config (hosts with watch: true).

    Returns:
-        List of hostnames to watch
+#        List of hostnames to watch
    """
    watchhosts = []
    hosts_config = config.get("hosts", {})
@@ -231,31 +260,14 @@ def get_watchhosts(config):


 def get_dyndnshosts(config):
-    """Extract dyndnshosts from config, supporting both new and legacy formats.
-    
-    Args:
-        config: Configuration dictionary
-        
-    Returns:
-        List of hostnames with dynamic DNS
-    """
-    dyndnshosts = []
-    
-    # New format: hosts section with dyndns attribute
-    if "hosts" in config:
-        hosts_config = config["hosts"]
-        if isinstance(hosts_config, dict):
-            for host_name, host_attrs in hosts_config.items():
-                if isinstance(host_attrs, dict) and host_attrs.get("dyndns", False):
-                    dyndnshosts.append(host_name)
-    
-    # Legacy format: dyndnshosts list/set
-    if "dyndnshosts" in config:
-        legacy_dyndnshosts = config.get("dyndnshosts", [])
-        if isinstance(legacy_dyndnshosts, (list, set)):
-            dyndnshosts.extend(legacy_dyndnshosts)
-    
-    return list(set(dyndnshosts))  # Remove duplicates
+    """Return hostnames that have a dyndns setting in the hosts section."""
+    hosts_config = config.get("hosts", {})
+    if not isinstance(hosts_config, dict):
+        return []
+    return [
+        name for name, attrs in hosts_config.items()
+        if isinstance(attrs, dict) and attrs.get("dyndns")
+    ]


 def get_host_config(config, hostname):
@@ -303,7 +315,7 @@ def get_host_access(config, hostname) -> dict:
    """
    host_cfg = get_host_config(config, hostname)

-    owner = host_cfg.get("owner") or get_default_owner(config)
+    owner = host_cfg.get("owner") # or get_default_owner(config)

    managers = host_cfg.get("managers", [])
    if isinstance(managers, str):
@@ -0,0 +1,136 @@
+"""YAML round-trip read/write for .hb.yaml, with backup and atomic writes."""
+
+import glob
+import os
+import threading
+from datetime import datetime
+
+from ruamel.yaml import YAML
+
+_write_lock = threading.Lock()
+
+
+def _make_yaml() -> YAML:
+    y = YAML()
+    y.preserve_quotes = True
+    return y
+
+# Top-level keys managed by the 'server' logical section
+_SERVER_KEYS = [
+    "hbd_port", "hbd_host", "ws_port", "wss_port", "hb_port",
+    "interval", "grace", "base_url", "threshold_renotify_interval",
+    "logfile", "pidfile", "pickfile", "journal_enabled", "journal_dir",
+    "journal_max_size", "journal_max_backups", "default_owner",
+    "default_threshold_config",
+]
+
+# Top-level keys managed by the 'dns' logical section
+_DNS_KEYS = ["nsupdate_bin", "rndc_key", "dyndomains"]
+
+
+def read_roundtrip(path: str):
+    """Load .hb.yaml with ruamel.yaml, preserving comments and ordering."""
+    with open(path, "r", encoding="utf-8") as f:
+        return _make_yaml().load(f)
+
+
+def write_config(path: str, data) -> None:
+    """Backup current file then atomically write data.
+
+    Backup naming: {path}.bak.YYYYMMDD-HHMMSS
+    Rotation: keep the 10 most recent backups, delete older ones.
+    Atomic write: write to {path}.tmp then os.replace({path}.tmp, path).
+    Acquires _write_lock for the full backup+write sequence.
+    """
+    with _write_lock:
+        ts = datetime.now().strftime("%Y%m%d-%H%M%S")
+        backup_path = f"{path}.bak.{ts}"
+        n = 0
+        while os.path.exists(backup_path):
+            n += 1
+            backup_path = f"{path}.bak.{ts}-{n}"
+        orig_mode = None
+        if os.path.exists(path):
+            orig_mode = os.stat(path).st_mode
+            with open(path, "rb") as src, open(backup_path, "wb") as dst:
+                dst.write(src.read())
+            os.chmod(backup_path, orig_mode)
+        backups = sorted(glob.glob(f"{path}.bak.*"), reverse=True)
+        for old in backups[10:]:
+            os.unlink(old)
+        tmp = f"{path}.tmp"
+        try:
+            with open(tmp, "w", encoding="utf-8") as f:
+                _make_yaml().dump(data, f)
+            if orig_mode is not None:
+                os.chmod(tmp, orig_mode)
+            os.replace(tmp, path)
+        except Exception:
+            try:
+                os.unlink(tmp)
+            except OSError:
+                pass
+            raise
+
+
+def list_backups(path: str) -> list:
+    """Return backup paths sorted newest-first."""
+    return sorted(glob.glob(f"{path}.bak.*"), reverse=True)
+
+
+def apply_structured_section(data, section: str, values: dict) -> None:
+    """Merge a dict of scalar/list values into data for the named logical section.
+
+    For 'server': updates each known key individually, preserving comments on
+    unchanged keys. For 'users': replaces the entire users dict.
+    """
+    if section == "server":
+        for key in _SERVER_KEYS:
+            if key in values:
+                data[key] = values[key]
+    elif section == "dns":
+        for key in _DNS_KEYS:
+            if key in values:
+                data[key] = values[key]
+            else:
+                data.pop(key, None)
+    elif section == "users":
+        data["users"] = values
+    elif section == "hosts":
+        data["hosts"] = values
+    else:
+        raise ValueError(f"Unknown structured section: {section!r}")
+
+
+def apply_channel(data, name: str, channel_cfg: dict) -> None:
+    """Insert or replace a single notification channel entry, preserving others."""
+    if not data.get("notification_channels"):
+        data["notification_channels"] = {}
+    data["notification_channels"][name] = channel_cfg
+
+
+def delete_channel(data, name: str) -> None:
+    """Remove a notification channel by name. No-op if not found."""
+    nc = data.get("notification_channels") or {}
+    nc.pop(name, None)
+
+
+def apply_yaml_section(data, section: str, yaml_text: str) -> None:
+    """Replace the named logical section by parsing yaml_text."""
+    parsed = _make_yaml().load(yaml_text)
+    if section == "notification_channels":
+        data["notification_channels"] = parsed
+    elif section == "thresholds":
+        data["threshold_configs"] = parsed
+    elif section == "hosts":
+        data["hosts"] = parsed
+    elif section == "dns":
+        if parsed:
+            for key in _DNS_KEYS:
+                if key in parsed:
+                    data[key] = parsed[key]
+        else:
+            for key in _DNS_KEYS:
+                data.pop(key, None)
+    else:
+        raise ValueError(f"Unknown YAML section: {section!r}")
@@ -4,6 +4,9 @@ from __future__ import annotations
 from subprocess import Popen, PIPE, STDOUT
 from typing import Optional
 import asyncio
+import logging
+
+logger = logging.getLogger(__name__)


 def create_nsupdate_payload(
@@ -123,7 +126,6 @@ async def dns_update_worker(
                pass
            continue

-        m = f"changed address to {addr}"
        for dyndomain in cfg.get("dyndomains", []):
            err = await loop.run_in_executor(
                None,
@@ -135,28 +137,29 @@ async def dns_update_worker(
                cfg.get("rndc_key", "/etc/dhcpc/rndc-key"),
            )
            if err:
-                m += f", DNS update failed: {err}"
+                m = f"DNS update failed for {addr} ({dyndomain}): {err}"
                logger.error("DNS update failed for %s: %s", name, err)
+                if log:
+                    try:
+                        await loop.run_in_executor(None, log, name, "ERROR", m)
+                    except Exception:
+                        pass
            else:
-                m += ", DNS updated."
+                m = f"DNS updated {name}.dy.{dyndomain} → {addr}"
+                if log:
+                    try:
+                        await loop.run_in_executor(None, log, name, "INFO", m)
+                    except Exception:
+                        pass
+
+        if not cfg.get("dyndomains"):
+            logger.warning("DNS update triggered for %s but no dyndomains configured", name)

        try:
            dnsq.task_done()
        except Exception:
            pass

-        if log:
-            try:
-                await loop.run_in_executor(None, log, name, m)
-            except Exception:
-                pass
-
-    if log:
-        try:
-            await loop.run_in_executor(None, log, None, "dns_update_worker exiting")
-        except Exception:
-            pass
-

 def start_dns_worker(
    hbdclass,
@@ -286,7 +286,7 @@ class Host:
            Host.hosts[name] = self
        self.num = num
        self.dyn = False
-        self.watched = True
+        self.watched = False
        self.upcount = 0
        self.interval = 0
        self.doesack = -1
@@ -297,6 +297,8 @@ class Host:
        self.plugin_retention = 100  # Keep last N samples per plugin
        # Alert state tracking: {metric_path: AlertState}
        self.alert_states = {}
+        # Stale-data timers: {plugin_name: asyncio.TimerHandle}
+        self.plugin_timers = {}
        # User access control
        self.owner: str | None = None       # username of owner
        self.managers: list = []            # usernames with manager role
@@ -365,7 +367,7 @@ class Host:
    def stateinfo(self):
        ddict = {}
        for d in self.__dict__:
-            if d in ["alert_states", "plugin_data"]:
+            if d in ["alert_states", "plugin_data", "plugin_timers"]:
                continue
            if d == "connections":
                cl = []
@@ -483,6 +485,8 @@ class Host:
            self.managers = []
        if not hasattr(self, "monitors"):
            self.monitors = []
+        if not hasattr(self, "plugin_timers"):
+            self.plugin_timers = {}

        pass

@@ -542,6 +546,34 @@ class Host:
        """
        return self.plugin_data

+    def reset_plugin_timer(self, plugin_name, timeout_seconds, callback):
+        """Reset the stale-data timer for a plugin.
+
+        If no new PLG data arrives within timeout_seconds, callback(host, plugin_name)
+        is called so the caller can clear history and alerts.
+        """
+        import asyncio
+        existing = self.plugin_timers.get(plugin_name)
+        if existing and not existing.cancelled():
+            existing.cancel()
+
+        async def _fire():
+            await callback(self, plugin_name)
+
+        try:
+            loop = asyncio.get_event_loop()
+            self.plugin_timers[plugin_name] = loop.call_later(
+                timeout_seconds, lambda: asyncio.create_task(_fire())
+            )
+        except RuntimeError:
+            pass
+
+    def cancel_plugin_timer(self, plugin_name):
+        """Cancel the stale timer for a plugin, if any."""
+        handle = self.plugin_timers.pop(plugin_name, None)
+        if handle and not handle.cancelled():
+            handle.cancel()
+
    # ------------------------------------------------------------------
    # User-role helpers
    # ------------------------------------------------------------------
@@ -78,9 +78,7 @@ async def reload_configuration(config_obj, config_path, components):
        True if reload succeeded, False otherwise
    """
    try:
-        logger.info("=" * 60)
        logger.info("Starting configuration reload...")
-        logger.info("=" * 60)
        
        # Reload config file
        new_config = await config_obj.reload(config_path)
@@ -115,13 +113,11 @@ async def reload_configuration(config_obj, config_path, components):
        # These are reloadable and effective immediately:
        # - notification_channels
        # - threshold_configs
-        # - hosts (watchhosts, dyndnshosts, notification_channels)
+        # - hosts (watchhosts, dyndns, notification_channels)
        # - grace period (used on next heartbeat)
        # - debug/verbose flags (used on next message)
        
-        logger.info("=" * 60)
        logger.info("Configuration reload completed successfully")
-        logger.info("=" * 60)
        return True
        
    except Exception as e:
@@ -246,6 +242,9 @@ async def _run_async(config, config_path=None):
    # upgrade or config change between runs).
    threshold_checker.purge_stale_alerts(hbdclass)

+    async def _http_reload_callback():
+        await reload_configuration(config, config_path, components)
+
    # HTTP server (asyncio-based via aiohttp)
    try:
        http_task = asyncio.create_task(
@@ -255,9 +254,11 @@ async def _run_async(config, config_path=None):
                config=config,
                hbdclass=hbdclass,
                tcss=None,
+                threshold_checker=threshold_checker,
                verbose=config.get("verbose", False),
                get_now=lambda: time.time(),
                VER="",
+                reload_callback=_http_reload_callback,
            )
        )
        logger.info(
@@ -421,7 +422,6 @@ def load_pickled_hosts(config, hbdclass):
    pickfile = config.get("pickfile", "hbd.pickle")
    dyndnshosts = config_mod.get_dyndnshosts(config)
    watchhosts = config_mod.get_watchhosts(config)
-    drophosts = config.get("drophosts", [])
    if 1 and os.path.exists(pickfile):
        if config.get("verbose", False):
            logger.info("opening pickls %s", pickfile)
@@ -447,9 +447,6 @@ def load_pickled_hosts(config, hbdclass):
            hbdclass.Host.hosts[h].apply_access(
                access["owner"], access["managers"], access["monitors"]
            )
-        for h in drophosts:
-            if h in hbdclass.Host.hosts:
-                del hbdclass.Host.hosts[h]
        if config.get("verbose", False):
            logger.info("%s pickled hosts loaded", len(hbdclass.Host.hosts))
    else:
@@ -474,6 +471,8 @@ def run(config, config_path=None):
    if config.get("debug", 0) > 0:
        log_level = logging.DEBUG
    logging.basicConfig(level=log_level)
+    if not config.get("debug", 0):
+        logging.getLogger("aiohttp.access").propagate = False
    load_pickled_hosts(config, hbdclass)

    notify_mod.initlog(logfile=config.get("logfile", "messages.log"))
@@ -106,11 +106,18 @@ def closelog():

 def eventlog(host, lvl, m, service=None):
    ts = time.time()
+    msg = {
+        "ts": ts,
+        "host": host or None,
+        "level": lvl,
+        "service": service,
+        "message": m,
+    }
+    data.msgs.append(msg)
    s = f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(ts))} {lvl} "
    if host:
        s += f"{host} "
    s += m
-    data.msgs.append(s)
    logger.info(s)
    if logf:
        try:
@@ -118,7 +125,7 @@ def eventlog(host, lvl, m, service=None):
            logf.flush()
        except Exception as e:
            logger.warning("failed to write to logfile: %s", e)
-    msg_to_websockets("message", s)
+    msg_to_websockets("message", msg)


 # ---------------------------------------------------------------------------
@@ -133,10 +140,14 @@ def _send_pushover(channel_cfg: dict, notif: Notification) -> bool:
    if not token or not user:
        logger.warning("pushover: missing token or user")
        return False
-    params: dict = {"token": token, "user": user, "title": notif.title, "message": notif.body}
+    body = "%s: %s" % (notif.title, notif.body)
+    title = ""
+    params: dict = {"token": token, "user": user, "title": title, "message": body}
+    if channel_cfg.get("sound"):
+        params["sound"] = channel_cfg["sound"]
    if notif.url:
        params["url"] = notif.url
-        params["url_title"] = "Plugin metrics"
+        params["url_title"] = "Heartbeat"
    conn = http.client.HTTPSConnection("api.pushover.net:443")
    try:
        conn.request(
@@ -209,7 +220,7 @@ def _send_mattermost(channel_cfg: dict, notif: Notification) -> bool:
        return False
    text = f"**{notif.title}**\n{notif.body}"
    if notif.url:
-        text += f"\n[Plugin metrics]({notif.url})"
+        text += f"\n[Plugin metrics] {notif.url}"
    ses = {"url": host, "scheme": "http", "basepath": "/api/v4", "port": 8065}
    mm = Driver(ses)
    payload: dict = {"text": text, "channel": channel, "username": channel_cfg.get("username", "hbd")}
@@ -357,6 +368,9 @@ _TIMEOUT = 15  # seconds per channel send

 async def _dispatch_to_channel(channel_name: str, channel_cfg: dict, notif: Notification) -> bool:
    """Send *notif* to a single named channel, honouring min_level."""
+    # Strip ownership metadata — notifier drivers only need delivery credentials.
+    channel_cfg = {k: v for k, v in channel_cfg.items() if k not in ("owner", "private")}
+
    level = notif.level.upper()
    if level != "RECOVER":
        min_level = channel_cfg.get("min_level", "WARNING").upper()
@@ -392,7 +406,7 @@ def _build_url(host_name: str) -> str:
    base_url = _config.get("base_url", "").rstrip("/")
    if not base_url:
        return ""
-    return f"{base_url}/plugins#{host_name}"
+    return f"{base_url}/alerts?filter={host_name}"


 async def send_notification(host_name: str, notif: Notification) -> dict:
@@ -0,0 +1,254 @@
+"""OAuth2 provider support.
+
+Config shape (in ~/.hb.yaml):
+
+    oauth:
+      my-gitea:                          # route slug → /login/oauth/my-gitea
+        type: gitea                      # "gitea" | "github" | "nextcloud"
+                                         # omit type to default to "gitea"
+        url: https://git.example.com     # required for gitea and nextcloud
+        client_id: <client-id>
+        client_secret: <client-secret>
+        label: "Work Gitea"              # optional display name on login button
+        logo: https://example.com/logo.png  # optional logo URL
+
+      github:
+        type: github
+        client_id: <client-id>
+        client_secret: <client-secret>
+
+      nextcloud:
+        type: nextcloud
+        url: https://cloud.example.com
+        client_id: <client-id>
+        client_secret: <client-secret>
+
+Register the OAuth app with each provider and set the redirect URI to:
+  https://<hbd-host>/login/oauth/<name>/callback
+"""
+
+import logging
+import secrets
+import time
+import urllib.parse
+from dataclasses import dataclass
+
+import aiohttp
+
+logger = logging.getLogger(__name__)
+
+STATE_TTL = 600  # 10 minutes
+
+# state_token -> expiry timestamp
+_states: dict[str, float] = {}
+
+
+def make_state() -> str:
+    """Generate a CSRF state token, store it with TTL, and return it."""
+    _purge_states()
+    token = secrets.token_hex(32)
+    _states[token] = time.time() + STATE_TTL
+    return token
+
+
+def validate_state(state: str) -> bool:
+    """Return True if *state* is known and unexpired; always removes it."""
+    expiry = _states.pop(state, None)
+    if expiry is None:
+        return False
+    return time.time() < expiry
+
+
+def _purge_states() -> None:
+    """Remove all expired CSRF state tokens from the in-memory store."""
+    now = time.time()
+    expired = [k for k, exp in list(_states.items()) if exp < now]
+    for k in expired:
+        del _states[k]
+
+
+class OAuthError(Exception):
+    """Raised when the OAuth2 flow fails for any reason."""
+
+
+PROVIDER_DEFS: dict = {
+    "gitea": {
+        "authorize_url_tmpl": "{url}/login/oauth/authorize",
+        "token_url_tmpl":     "{url}/login/oauth/access_token",
+        "profile_url_tmpl":   "{url}/api/v1/user",
+        "scope":              "user:email",
+        "field_map":          {"username": "login", "full_name": "full_name", "avatar": "avatar_url"},
+        "profile_data_path":  [],
+        "requires_url":       True,
+        "default_label":      "Gitea",
+    },
+    "github": {
+        "authorize_url_tmpl": "https://github.com/login/oauth/authorize",
+        "token_url_tmpl":     "https://github.com/login/oauth/access_token",
+        "profile_url_tmpl":   "https://api.github.com/user",
+        "scope":              "read:user",
+        "field_map":          {"username": "login", "full_name": "name", "avatar": "avatar_url"},
+        "profile_data_path":  [],
+        "requires_url":       False,
+        "default_label":      "GitHub",
+    },
+    "nextcloud": {
+        "authorize_url_tmpl": "{url}/apps/oauth2/authorize",
+        "token_url_tmpl":     "{url}/apps/oauth2/api/v1/token",
+        "profile_url_tmpl":   "{url}/ocs/v2.php/cloud/user?format=json",
+        "scope":              "",
+        "field_map":          {"username": "id", "full_name": "display-name", "avatar": None},
+        "profile_data_path":  ["ocs", "data"],
+        "requires_url":       True,
+        "default_label":      "Nextcloud",
+    },
+}
+
+
+@dataclass
+class ResolvedProvider:
+    """A fully resolved OAuth2 provider instance, ready to use."""
+    name: str
+    type: str
+    label: str
+    logo: str
+    authorize_url: str
+    token_url: str
+    profile_url: str
+    scope: str
+    client_id: str
+    client_secret: str
+    field_map: dict
+    profile_data_path: list
+
+
+def get_providers(config: dict) -> list[ResolvedProvider]:
+    """Return a ResolvedProvider for every valid entry in config['oauth'].
+
+    Entries with missing required fields or unknown types are skipped with
+    a warning log.  Order follows config declaration order.
+    """
+    result = []
+    oauth_cfg = config.get("oauth", {})
+    if not isinstance(oauth_cfg, dict):
+        return result
+    for name, entry in oauth_cfg.items():
+        if not isinstance(entry, dict):
+            continue
+        provider_type = entry.get("type", "gitea")
+        defn = PROVIDER_DEFS.get(provider_type)
+        if defn is None:
+            logger.warning("OAuth: unknown provider type %r for %r, skipping", provider_type, name)
+            continue
+        client_id = entry.get("client_id", "")
+        client_secret = entry.get("client_secret", "")
+        if not client_id or not client_secret:
+            logger.warning("OAuth: %r missing client_id or client_secret, skipping", name)
+            continue
+        url = entry.get("url", "").rstrip("/")
+        if defn["requires_url"] and not url:
+            logger.warning("OAuth: %r requires url but none configured, skipping", name)
+            continue
+        label = entry.get("label") or defn["default_label"]
+        logo = entry.get("logo", "")
+        result.append(ResolvedProvider(
+            name=name,
+            type=provider_type,
+            label=label,
+            logo=logo,
+            authorize_url=defn["authorize_url_tmpl"].format(url=url),
+            token_url=defn["token_url_tmpl"].format(url=url),
+            profile_url=defn["profile_url_tmpl"].format(url=url),
+            scope=defn["scope"],
+            client_id=client_id,
+            client_secret=client_secret,
+            field_map=dict(defn["field_map"]),
+            profile_data_path=list(defn["profile_data_path"]),
+        ))
+    return result
+
+
+def is_enabled(config: dict) -> bool:
+    """Return True when at least one OAuth provider is fully configured."""
+    return bool(get_providers(config))
+
+
+def build_auth_url(provider: ResolvedProvider, state: str, redirect_uri: str) -> str:
+    """Return the provider's OAuth2 authorization URL to redirect the browser to."""
+    params: dict = {
+        "client_id": provider.client_id,
+        "redirect_uri": redirect_uri,
+        "response_type": "code",
+        "state": state,
+    }
+    if provider.scope:
+        params["scope"] = provider.scope
+    return f"{provider.authorize_url}?{urllib.parse.urlencode(params)}"
+
+
+async def exchange_code(provider: ResolvedProvider, code: str, redirect_uri: str) -> str:
+    """Exchange an authorization *code* for an access token.
+
+    Returns the access token string.  Raises OAuthError on any failure.
+    """
+    payload = {
+        "client_id": provider.client_id,
+        "client_secret": provider.client_secret,
+        "code": code,
+        "grant_type": "authorization_code",
+        "redirect_uri": redirect_uri,
+    }
+    timeout = aiohttp.ClientTimeout(total=10)
+    try:
+        async with aiohttp.ClientSession(timeout=timeout) as session:
+            async with session.post(
+                provider.token_url,
+                json=payload,
+                headers={"Accept": "application/json"},
+            ) as resp:
+                if resp.status != 200:
+                    text = await resp.text()
+                    raise OAuthError(f"Token exchange failed ({resp.status}): {text}")
+                data = await resp.json()
+                token = data.get("access_token")
+                if not token:
+                    raise OAuthError(f"No access_token in response: {data}")
+    except aiohttp.ClientError as exc:
+        raise OAuthError(f"Token exchange network error: {exc}") from exc
+    return token
+
+
+async def fetch_user(provider: ResolvedProvider, token: str) -> dict:
+    """Fetch the authenticated user's profile from the provider.
+
+    Returns a dict with keys: login, full_name, avatar_url.
+    Raises OAuthError on any failure.
+    """
+    timeout = aiohttp.ClientTimeout(total=10)
+    try:
+        async with aiohttp.ClientSession(timeout=timeout) as session:
+            async with session.get(
+                provider.profile_url,
+                headers={
+                    "Authorization": f"Bearer {token}",
+                    "Accept": "application/json",
+                },
+            ) as resp:
+                if resp.status != 200:
+                    text = await resp.text()
+                    raise OAuthError(f"User fetch failed ({resp.status}): {text}")
+                data = await resp.json()
+    except aiohttp.ClientError as exc:
+        raise OAuthError(f"User fetch network error: {exc}") from exc
+
+    try:
+        for key in provider.profile_data_path:
+            data = data.get(key, {})
+        avatar_field = provider.field_map.get("avatar")
+        return {
+            "login":      data.get(provider.field_map["username"], ""),
+            "full_name":  data.get(provider.field_map["full_name"], ""),
+            "avatar_url": data.get(avatar_field, "") if avatar_field else "",
+        }
+    except AttributeError:
+        raise OAuthError(f"Unexpected profile response structure from {provider.type}")
@@ -27,13 +27,65 @@ _SECRET_KEYS = frozenset({
    "smtp_password", "smtp_user", "api_password", "access_token",
 })

-_CHANNEL_TYPE_LABELS = {
-    "pushover":   "Pushover",
-    "email":      "E-mail",
-    "signal":     "Signal",
-    "mattermost": "Mattermost",
+CHANNEL_TYPE_SCHEMAS = {
+    "pushover": {
+        "label": "Pushover",
+        "fields": [
+            {"key": "token",  "label": "App token",  "type": "secret", "required": True},
+            {"key": "user",   "label": "User key",   "type": "secret", "required": True},
+            {"key": "sound",  "label": "Sound",      "type": "text",   "required": False},
+        ],
+    },
+    "email": {
+        "label": "E-mail",
+        "fields": [
+            {"key": "recipients",    "label": "Recipients (comma-separated)", "type": "list",   "required": True},
+            {"key": "sender",        "label": "From address",                 "type": "text",   "required": True},
+            {"key": "smtp_server",   "label": "SMTP server",                  "type": "text",   "required": True},
+            {"key": "smtp_port",     "label": "SMTP port",                    "type": "port",   "required": False},
+            {"key": "smtp_user",     "label": "SMTP username",                "type": "text",   "required": False},
+            {"key": "smtp_password", "label": "SMTP password",                "type": "secret", "required": False},
+        ],
+    },
+    "signal": {
+        "label": "Signal",
+        "fields": [
+            {"key": "user",      "label": "Sender number",    "type": "text", "required": True},
+            {"key": "recipient", "label": "Recipient number", "type": "text", "required": True},
+            {"key": "cli_path",  "label": "signal-cli path",  "type": "text", "required": False},
+        ],
+    },
+    "matrix": {
+        "label": "Matrix",
+        "fields": [
+            {"key": "homeserver",   "label": "Homeserver URL", "type": "text",   "required": True},
+            {"key": "access_token", "label": "Access token",   "type": "secret", "required": True},
+            {"key": "room_id",      "label": "Room ID",        "type": "text",   "required": True},
+        ],
+    },
+    "sms_voipms": {
+        "label": "SMS (voip.ms)",
+        "fields": [
+            {"key": "api_user",     "label": "API username", "type": "text",   "required": True},
+            {"key": "api_password", "label": "API password", "type": "secret", "required": True},
+            {"key": "did",          "label": "DID (from)",   "type": "text",   "required": True},
+            {"key": "dst",          "label": "Destination",  "type": "text",   "required": True},
+        ],
+    },
+    "mattermost": {
+        "label": "Mattermost",
+        "fields": [
+            {"key": "host",     "label": "Host",          "type": "text",   "required": True},
+            {"key": "token",    "label": "Webhook token", "type": "secret", "required": True},
+            {"key": "channel",  "label": "Channel",       "type": "text",   "required": True},
+            {"key": "username", "label": "Bot username",  "type": "text",   "required": False},
+            {"key": "icon",     "label": "Icon URL",      "type": "text",   "required": False},
+        ],
+    },
 }

+_CHANNEL_TYPE_LABELS = {k: v["label"] for k, v in CHANNEL_TYPE_SCHEMAS.items()}
+

 def _mask(value):
    """Return a masked placeholder for sensitive values."""
@@ -88,7 +140,7 @@ def _sanitize_channel(name, cfg):
 # Public API
 # ---------------------------------------------------------------------------

-def get_settings_sections(config: dict) -> list:
+def get_settings_sections(config: dict, threshold_checker=None) -> list:
    """Return ordered list of setting sections for the settings page.

    Each section:
@@ -143,14 +195,15 @@ def get_settings_sections(config: dict) -> list:
        }

    # ---- Notification channels (complex, built separately) ----------------
+    _METADATA_KEYS = {"type", "owner", "private", "min_level"}
    notif_channels = []
-    for ch_name, ch_cfg in (config.get("notification_channels") or {}).items():
+    for ch_name, ch_cfg in sorted((config.get("notification_channels") or {}).items()):
        if not isinstance(ch_cfg, dict):
            continue
        ch_type = ch_cfg.get("type", "")
        fields = []
        for k, v in ch_cfg.items():
-            if k == "type":
+            if k in _METADATA_KEYS:
                continue
            sensitive = k in _SECRET_KEYS
            fields.append({
@@ -165,6 +218,9 @@ def get_settings_sections(config: dict) -> list:
            "name": ch_name,
            "type": ch_type,
            "type_label": _CHANNEL_TYPE_LABELS.get(ch_type, ch_type.title()),
+            "owner": ch_cfg.get("owner"),
+            "private": bool(ch_cfg.get("private", False)),
+            "min_level": ch_cfg.get("min_level", "WARNING"),
            "fields": fields,
        })

@@ -182,50 +238,45 @@ def get_settings_sections(config: dict) -> list:
        })

    # ---- Threshold configurations -----------------------------------------
-    def _parse_metric_row(metric_path, metric_cfg):
-        if not isinstance(metric_cfg, dict):
-            return None
+    def _tc_to_row(tc):
        return {
-            "metric": metric_path,
-            "operator": metric_cfg.get("operator", ">"),
-            "warning": metric_cfg.get("warning"),
-            "critical": metric_cfg.get("critical"),
-            "hysteresis": metric_cfg.get("hysteresis"),
-            "count": metric_cfg.get("count", 1),
-            "enabled": metric_cfg.get("enabled", True),
+            "metric": tc.metric_path,
+            "operator": tc.operator.value,
+            "warning": tc.warning,
+            "critical": tc.critical,
+            "hysteresis": tc.hysteresis,
+            "count": tc.count,
+            "enabled": tc.enabled,
+            "display": tc.display or "",
+            "grace": tc.grace,
        }

    threshold_config_list = []
-    raw_tconfigs = config.get("threshold_configs") or {}
-    if raw_tconfigs:
-        for cfg_name, cfg_data in sorted(raw_tconfigs.items()):
-            if not isinstance(cfg_data, dict):
-                continue
-            metrics = [
-                r for r in (
-                    _parse_metric_row(mp, mc)
-                    for mp, mc in (cfg_data.get("thresholds") or {}).items()
-                ) if r
-            ]
-            threshold_config_list.append({
-                "name": cfg_name,
-                "metrics": sorted(metrics, key=lambda m: m["metric"]),
-            })
-    elif config.get("thresholds"):
-        metrics = [
-            r for r in (
-                _parse_metric_row(mp, mc)
-                for mp, mc in config["thresholds"].items()
-            ) if r
-        ]
-        threshold_config_list.append({
-            "name": "default",
-            "metrics": sorted(metrics, key=lambda m: m["metric"]),
-        })
+    if threshold_checker is not None:
+        if threshold_checker.threshold_configs:
+            for cfg_name, cfg_metrics in sorted(threshold_checker.threshold_configs.items()):
+                # For the default config use the merged effective set;
+                # for named overrides use only the explicitly defined metrics
+                # (threshold_raw_configs) so inherited defaults are not repeated.
+                if cfg_name == "default":
+                    display_metrics = cfg_metrics
+                else:
+                    display_metrics = threshold_checker.threshold_raw_configs.get(cfg_name, cfg_metrics)
+                metrics = sorted(
+                    [_tc_to_row(tc) for tc in display_metrics.values()],
+                    key=lambda m: m["metric"],
+                )
+                threshold_config_list.append({"name": cfg_name, "metrics": metrics})
+        elif threshold_checker.thresholds:
+            metrics = sorted(
+                [_tc_to_row(tc) for tc in threshold_checker.thresholds.values()],
+                key=lambda m: m["metric"],
+            )
+            threshold_config_list.append({"name": "default", "metrics": metrics})

    # ---- Hosts summary ----------------------------------------------------
    hosts_list = []
-    for hname, hcfg in (config.get("hosts") or {}).items():
+    for hname, hcfg in sorted((config.get("hosts") or {}).items()):
        if not isinstance(hcfg, dict):
            continue
        hosts_list.append({
@@ -235,32 +286,55 @@ def get_settings_sections(config: dict) -> list:
            "owner": hcfg.get("owner", ""),
            "managers": hcfg.get("managers", []),
            "monitors": hcfg.get("monitors", []),
-            "threshold_config": hcfg.get("threshold_config", ""),
+            "threshold_configs": (
+                list(v) if isinstance(v := hcfg.get("threshold_config"), list)
+                else ([v] if v else [])
+            ),
            "notification_channels": hcfg.get("notification_channels", []),
        })

+    # ---- OAuth providers -------------------------------------------------------
+    oauth_providers = []
+    for pname, pattrs in (config.get("oauth") or {}).items():
+        if not isinstance(pattrs, dict):
+            continue
+        cs = pattrs.get("client_secret", "")
+        oauth_providers.append({
+            "name": pname,
+            "type": pattrs.get("type", "gitea"),
+            "url": pattrs.get("url", ""),
+            "client_id": pattrs.get("client_id", ""),
+            "client_secret": "•••" if cs else "",
+            "label": pattrs.get("label", ""),
+            "logo": pattrs.get("logo", ""),
+        })
+
    return [
        {
            "id": "network",
            "title": "Network",
            "description": "Ports and bind addresses for all server sockets.",
+            "section_mode": "form",
+            "api_section": "server",
            "fields": [
                field("hb_port",  "Heartbeat UDP port",  "port",
-                      "UDP port the server listens on for heartbeat datagrams."),
+                      "UDP port the server listens on for heartbeat datagrams.", editable=True),
                field("hbd_host", "HTTP bind address",   "text",
-                      "Interface to bind the HTTP server to. Empty = all interfaces."),
+                      "Interface to bind the HTTP server to. Empty = all interfaces.", editable=True),
                field("hbd_port", "HTTP API port",       "port",
-                      "TCP port for the HTTP API and web UI."),
+                      "TCP port for the HTTP API and web UI.", editable=True),
                field("ws_port",  "WebSocket port",      "port",
-                      "TCP port for the plain WebSocket server."),
+                      "TCP port for the plain WebSocket server.", editable=True),
                field("wss_port", "Secure WebSocket port", "port",
-                      "TCP port for WSS (TLS WebSocket). Leave empty to disable."),
+                      "TCP port for WSS (TLS WebSocket). Leave empty to disable.", editable=True),
            ],
        },
        {
            "id": "tls",
            "title": "TLS / WebSocket Security",
            "description": "Certificate paths used when wss_port is set.",
+            "section_mode": "form",
+            "api_section": None,
            "fields": [
                field("cert_path", "Certificate directory", "path",
                      "Directory containing the TLS certificate and key files."),
@@ -274,73 +348,97 @@ def get_settings_sections(config: dict) -> list:
            "id": "monitoring",
            "title": "Monitoring",
            "description": "Heartbeat timing and alert re-notification behaviour.",
+            "section_mode": "form",
+            "api_section": "server",
            "fields": [
                field("interval",  "Heartbeat interval", "duration",
-                      "Expected time between heartbeat messages from each client."),
-                field("grace",     "Grace multiplier",   "number",
-                      "A host is marked overdue after interval × grace seconds of silence."),
+                      "Expected time between heartbeat messages from each client.", editable=True),
+                field("grace",     "Grace period",        "number",
+                      "Extra seconds to wait after a missed heartbeat before sending notifications.", editable=True),
                field("threshold_renotify_interval", "Re-notify interval", "duration",
-                      "How often to re-send notifications for ongoing threshold alerts."),
+                      "How often to re-send notifications for ongoing threshold alerts.", editable=True),
                field("autosave_interval", "Autosave interval", "duration",
                      "How often the server saves its state to disk."),
+                field("base_url", "Base URL", "text",
+                      "Base URL for notification links.", editable=True),
            ],
        },
        {
            "id": "persistence",
            "title": "Persistence & Logging",
            "description": "State file and event log settings.",
+            "section_mode": "form",
+            "api_section": "server",
            "fields": [
                field("pickfile", "State file",   "path",
-                      "Path to the pickle file used to persist host state across restarts."),
+                      "Path to the pickle file used to persist host state across restarts.", editable=True),
                field("logfile",  "Event log",    "path",
-                      "Path to the event log file."),
+                      "Path to the event log file.", editable=True),
            ],
        },
        {
            "id": "journal",
            "title": "Message Journal",
            "description": "All received heartbeat and plugin messages are journalled here.",
+            "section_mode": "form",
+            "api_section": "server",
            "fields": [
                field("journal_enabled",     "Enabled",          "boolean",
-                      "Turn journalling on or off."),
+                      "Turn journalling on or off.", editable=True),
                field("journal_dir",         "Journal directory","path",
-                      "Directory where journal files are written."),
+                      "Directory where journal files are written.", editable=True),
                field("journal_file",        "Journal filename", "text",
                      "Base filename for the journal (rotated copies get a numeric suffix)."),
                field("journal_max_size",    "Max file size",    "size",
-                      "Rotate the journal when it exceeds this size."),
+                      "Rotate the journal when it exceeds this size.", editable=True),
                field("journal_max_backups", "Backup count",     "number",
-                      "Number of rotated journal files to keep."),
+                      "Number of rotated journal files to keep.", editable=True),
            ],
        },
        {
            "id": "dns",
            "title": "Dynamic DNS",
-            "description": "nsupdate-based DNS registration for dynamic hosts.",
+            "description": "nsupdate-based DNS registration via nsupdate(8).",
+            "section_mode": "form",
+            "api_section": "dns",
            "fields": [
                field("nsupdate_bin", "nsupdate binary", "path",
-                      "Full path to the nsupdate executable."),
+                      "Path to the nsupdate binary.", editable=True),
+                field("rndc_key", "RNDC key file", "path",
+                      "Path to the rndc key file used to authenticate DNS updates.", editable=True),
                field("dyndomains", "Dynamic domains", "list",
-                      "DNS zones managed by nsupdate for dynamic hosts."),
-                field("drophosts",    "Drop hosts",      "list",
-                      "Hostnames to silently ignore — no state, no alerts."),
+                      "Domains updated via nsupdate when a host with dyndns: true reports in.",
+                      editable=True),
            ],
        },
        {
            "id": "users",
            "title": "Users",
            "description": "Accounts defined in the config file. Password hashes are never shown.",
+            "section_mode": "form",
+            "api_section": "users",
            "users": users_list,
            "fields": [
                field("default_owner", "Default owner", "text",
                      "Username that owns hosts with no explicit owner. "
-                      "Falls back to the first admin user."),
+                      "Falls back to the first admin user.", editable=True),
            ],
        },
+        {
+            "id": "oauth",
+            "title": "OAuth Providers",
+            "description": "OAuth2 login providers. Client secrets are masked.",
+            "section_mode": "form",
+            "api_section": "oauth",
+            "providers": oauth_providers,
+            "fields": [],
+        },
        {
            "id": "channels",
            "title": "Notification Channels",
            "description": "Named notification providers. Credentials are masked.",
+            "section_mode": "channels",
+            "api_section": "notification_channels",
            "channels": notif_channels,
            "fields": [
                field("default_notification_channels", "Default channels", "list",
@@ -351,6 +449,8 @@ def get_settings_sections(config: dict) -> list:
            "id": "hosts",
            "title": "Hosts",
            "description": "Host definitions loaded from the config file.",
+            "section_mode": "hosts",
+            "api_section": "hosts",
            "hosts": hosts_list,
            "fields": [],
        },
@@ -358,16 +458,20 @@ def get_settings_sections(config: dict) -> list:
            "id": "thresholds",
            "title": "Threshold Configurations",
            "description": "Named alert threshold sets. Each defines warning/critical levels per metric.",
+            "section_mode": "thresholds",
+            "api_section": "thresholds",
            "threshold_configs": threshold_config_list,
            "fields": [
                field("default_threshold_config", "Default config", "text",
-                      "Threshold config used for hosts with no explicit mapping."),
+                      "Threshold config used for hosts with no explicit mapping.", editable=True),
            ],
        },
        {
            "id": "runtime",
            "title": "Runtime",
            "description": "Flags set at startup (require restart to change).",
+            "section_mode": "form",
+            "api_section": None,
            "fields": [
                field("foreground", "Foreground mode", "boolean",
                      "Run in the foreground instead of daemonising."),
@@ -378,3 +482,17 @@ def get_settings_sections(config: dict) -> list:
            ],
        },
    ]
+
+
+def get_settings_data(config: dict, threshold_checker=None) -> dict:
+    """Return sections list + auxiliary data for the settings template."""
+    sections = get_settings_sections(config, threshold_checker=threshold_checker)
+    all_channel_names = sorted((config.get("notification_channels") or {}).keys())
+    all_usernames = sorted((config.get("users") or {}).keys())
+    all_threshold_configs = sorted((config.get("threshold_configs") or {}).keys())
+    return {
+        "sections": sections,
+        "all_channel_names": all_channel_names,
+        "all_usernames": all_usernames,
+        "all_threshold_configs": all_threshold_configs,
+    }
@@ -185,7 +185,7 @@
  /* Slightly larger tap targets in tables */
  #ntable td, #ntable th {
    padding: 4px 6px !important;
-    font-size: 0.82em !important;
+    font-size: 1.00em !important;
  }

  /* Cards on plugin/alerts pages */
@@ -74,7 +74,7 @@
      background: #e8f0fe;
      color: #1a73e8;
      border-radius: 12px;
-      font-size: 0.85em;
+      font-size: 1.00em;
      font-weight: 600;
      font-family: monospace;
    }
@@ -100,6 +100,19 @@
    }

    .logo-text { flex: 1; }
+
+    /* ── Dark mode ── */
+    html[data-theme="dark"] h1 { color: var(--text); }
+    html[data-theme="dark"] .subtitle { color: var(--text-sec); }
+    html[data-theme="dark"] .section { background: var(--surface); box-shadow: 0 1px 6px var(--shadow); }
+    html[data-theme="dark"] .section h2 { color: var(--text); border-bottom-color: var(--border); }
+    html[data-theme="dark"] .info-row { border-bottom-color: var(--border-4); }
+    html[data-theme="dark"] .info-label { color: var(--text-sec); }
+    html[data-theme="dark"] .info-value { color: var(--text); }
+    html[data-theme="dark"] .info-value a { color: var(--link); }
+    html[data-theme="dark"] .hb-logo { color: var(--link); }
+    html[data-theme="dark"] .hb-tagline { color: var(--text-sec); }
+    html[data-theme="dark"] .version-badge { background: #1a3255; color: #60a5fa; }
  </style>

  <body>
@@ -163,7 +176,7 @@
        </div>
        <div class="info-row">
          <span class="info-label">Email</span>
-          <span class="info-value"><a href="mailto:aew@wrede.ca">aew@wrede.ca</a></span>
+          <span class="info-value"><a href="mailto:aew.hbd@wrede.ca">aew.hbd@wrede.ca</a></span>
        </div>
        <div class="info-row">
          <span class="info-label">Repository</span>
@@ -4,7 +4,7 @@

  <style>

-    body {
+    html, body {
      height: auto;
      overflow-y: auto;
    }
@@ -55,7 +55,7 @@

    .summary-label {
      color: #666;
-      font-size: 0.85em;
+      font-size: 1.00em;
    }

    .filters {
@@ -94,6 +94,24 @@
      border-color: #2196f3;
    }

+    .filter-input {
+      padding: 7px 12px;
+      border: 2px solid #ddd;
+      border-radius: 20px;
+      font-size: 0.9em;
+      outline: none;
+      width: 200px;
+      transition: border-color 0.2s;
+    }
+
+    .filter-input:focus {
+      border-color: #2196f3;
+    }
+
+    .filter-input.invalid {
+      border-color: #f44336;
+    }
+
    .alerts-container {
      background: white;
      border-radius: 8px;
@@ -175,14 +193,18 @@

    .alert-hostname {
      font-weight: bold;
-      color: #333;
+      color: #0066cc;
      font-size: 1.1em;
+      text-decoration: none;
+    }
+    .alert-hostname:hover {
+      text-decoration: underline;
    }

    .alert-metric {
-      color: #666;
-      font-family: 'Courier New', monospace;
-      font-size: 0.9em;
+      color: #0066cc;
+      font-size: 1.1em;
+      font-weight: normal;
    }

    .alert-details {
@@ -199,7 +221,7 @@

    .alert-duration {
      color: #999;
-      font-size: 0.85em;
+      font-size: 1.00em;
    }
    
    .alert-actions {
@@ -216,7 +238,7 @@
      border: none;
      border-radius: 4px;
      cursor: pointer;
-      font-size: 0.85em;
+      font-size: 1.00em;
      transition: all 0.2s;
      white-space: nowrap;
    }
@@ -271,7 +293,7 @@
    .refresh-info {
      text-align: center;
      color: #999;
-      font-size: 0.85em;
+      font-size: 1.00em;
      margin-top: 20px;
      padding-top: 20px;
      border-top: 1px solid #e0e0e0;
@@ -283,6 +305,31 @@
      text-align: right;
      margin-bottom: 15px;
    }
+
+    /* ── Dark mode ── */
+    html[data-theme="dark"] h1 { color: var(--text); }
+    html[data-theme="dark"] .subtitle { color: var(--text-sec); }
+    html[data-theme="dark"] .summary-card { background: var(--surface); }
+    html[data-theme="dark"] .summary-label { color: var(--text-sec); }
+    html[data-theme="dark"] .filters { background: var(--surface); }
+    html[data-theme="dark"] .filter-label { color: var(--text-sec); }
+    html[data-theme="dark"] .filter-button { background: var(--surface-2); border-color: var(--border); color: var(--text); }
+    html[data-theme="dark"] .filter-button.active { background: #2196f3; color: #fff; border-color: #2196f3; }
+    html[data-theme="dark"] .filter-input { background: var(--input-bg); border-color: var(--input-border); color: var(--text); }
+    html[data-theme="dark"] .alerts-container { background: var(--surface); }
+    html[data-theme="dark"] .alert-item { background: var(--surface-2); }
+    html[data-theme="dark"] .alert-item.acknowledged { background: var(--surface-3); }
+    html[data-theme="dark"] .alert-item.critical { background: #2e0a0a; border-left-color: #f44336; }
+    html[data-theme="dark"] .alert-item.warning  { background: #2e1a00; border-left-color: #ff9800; }
+    html[data-theme="dark"] .alert-item.unknown  { background: var(--surface-2); }
+    html[data-theme="dark"] .alert-hostname { color: var(--link); }
+    html[data-theme="dark"] .alert-details { color: var(--text-sec); }
+    html[data-theme="dark"] .alert-value { color: var(--text); }
+    html[data-theme="dark"] .alert-duration { color: var(--text-muted); }
+    html[data-theme="dark"] .last-update { color: var(--text-sec); }
+    html[data-theme="dark"] .refresh-info { color: var(--text-muted); border-top-color: var(--border); }
+    html[data-theme="dark"] .no-alerts,
+    html[data-theme="dark"] .loading { color: var(--text-muted); }
  </style>

  <body>
@@ -312,6 +359,7 @@
        <button class="filter-button active" onclick="filterAlerts('all')">All</button>
        <button class="filter-button" onclick="filterAlerts('critical')">Critical Only</button>
        <button class="filter-button" onclick="filterAlerts('warning')">Warning Only</button>
+        <input id="host-filter" class="filter-input" type="text" placeholder="host filter (regex)" oninput="onHostFilterInput(this)">
      </div>

      <div class="alerts-container">
@@ -328,6 +376,7 @@
    <script>
      let currentFilter = 'all';
      let allAlerts = [];
+      let hostFilterRe = null;

      async function loadAlerts() {
        try {
@@ -362,10 +411,13 @@
        // Filter alerts based on current filter
        let filteredAlerts = alerts;
        if (currentFilter !== 'all') {
-          filteredAlerts = alerts.filter(alert => 
+          filteredAlerts = filteredAlerts.filter(alert =>
            alert.level.toLowerCase() === currentFilter
          );
        }
+        if (hostFilterRe) {
+          filteredAlerts = filteredAlerts.filter(alert => hostFilterRe.test(alert.hostname));
+        }
        
        if (filteredAlerts.length === 0) {
          if (currentFilter === 'all' && alerts.length === 0) {
@@ -405,6 +457,10 @@
        } else if (alert.threshold_value !== undefined && alert.threshold_value !== null && alert.operator) {
          valueText += ` <span class="threshold-info">(threshold: ${alert.operator} ${formatValue(alert.threshold_value)})</span>`;
        }
+        if (alert.recovery_threshold !== undefined && alert.recovery_threshold !== null) {
+          const recOp = (alert.operator === '>' || alert.operator === '>=') ? '<' : '>';
+          valueText += ` <span class="threshold-info" style="color:#888">(recovers ${recOp} ${formatValue(alert.recovery_threshold)})</span>`;
+        }
        
        // Build actions section
        let actionsHtml = '';
@@ -429,9 +485,9 @@
            <div class="alert-main">
              <div class="alert-header">
                <span class="alert-level ${level}">${alert.level}</span>
-                <span class="alert-hostname">${alert.hostname}</span>
+                <a class="alert-hostname" href="/plugins#${alert.hostname}">${alert.hostname}</a>
+                <span class="alert-metric">${(alert.metric_path.includes('.') ? alert.metric_path.slice(alert.metric_path.indexOf('.') + 1) : alert.metric_path).replace(/_status_code$/, '')}</span>
              </div>
-              <div class="alert-metric">${alert.metric_path}</div>
              <div class="alert-details">
                <span>${valueText}</span>
                <span class="alert-duration">Active for ${duration}</span>
@@ -530,9 +586,36 @@
        }
      }

+      function onHostFilterInput(input) {
+        const val = input.value.trim();
+        if (!val) {
+          hostFilterRe = null;
+          input.classList.remove('invalid');
+        } else {
+          try {
+            hostFilterRe = new RegExp(val, 'i');
+            input.classList.remove('invalid');
+          } catch (_) {
+            hostFilterRe = null;
+            input.classList.add('invalid');
+          }
+        }
+        renderAlerts(allAlerts);
+      }
+
      // Auto-refresh every 15 seconds
      setInterval(loadAlerts, 15000);

+      // Initialise filter from URL query string (?filter=...)
+      (function () {
+        const param = new URLSearchParams(window.location.search).get('filter');
+        if (param) {
+          const input = document.getElementById('host-filter');
+          input.value = param;
+          onHostFilterInput(input);
+        }
+      })();
+
      // Initial load
      loadAlerts();
    </script>
@@ -1,5 +1,5 @@
 <footer>
 <div id="copyright">
-    &copy;2002-2026 <A HREF="mailto:andreas@wrede.ca">Andreas Wrede</A> All Rights Reserved.</p>
+    &copy;2002-2026 <A HREF="mailto:aew.hbd@wrede.ca">Andreas Wrede</A> All Rights Reserved.</p>
    </div>
 </footer>
@@ -5,7 +5,68 @@
    <link rel="icon" href="/static/images/favicon.ico" sizes="32x32" />
    <title>{{ title }}</title>
    {% if extra_scripts %}<script src="{{ extra_scripts }}"></script>{% endif %}
+    <script>
+    /* Apply saved theme before first paint to avoid flash */
+    (function() {
+      try {
+        var p = localStorage.getItem('hbd_theme') || 'auto';
+        var dark = p === 'dark' || (p === 'auto' && window.matchMedia('(prefers-color-scheme: dark)').matches);
+        if (dark) document.documentElement.setAttribute('data-theme', 'dark');
+      } catch(e) {}
+    })();
+    </script>
    <style>
+      /* ── Theme variables ── */
+      :root {
+        --bg:           #f5f5f5;
+        --surface:      #ffffff;
+        --surface-2:    #f8f8f8;
+        --surface-3:    #f5f5f5;
+        --text:         #222222;
+        --text-2:       #333333;
+        --text-3:       #555555;
+        --text-sec:     #666666;
+        --text-muted:   #888888;
+        --text-dim:     #aaaaaa;
+        --text-ghost:   #cccccc;
+        --border:       #e0e0e0;
+        --border-2:     #eeeeee;
+        --border-3:     #f0f0f0;
+        --border-4:     #f5f5f5;
+        --link:         #0066cc;
+        --nav-bg:       #ffffff;
+        --input-bg:     #ffffff;
+        --input-border: #cccccc;
+        --shadow-sm:    rgba(0,0,0,.08);
+        --shadow:       rgba(0,0,0,.10);
+        --shadow-nav:   rgba(0,0,0,.10);
+      }
+      html[data-theme="dark"] {
+        color-scheme: dark;
+        --bg:           #111827;
+        --surface:      #1f2937;
+        --surface-2:    #283447;
+        --surface-3:    #374151;
+        --text:         #e5e7eb;
+        --text-2:       #d1d5db;
+        --text-3:       #9ca3af;
+        --text-sec:     #9ca3af;
+        --text-muted:   #6b7280;
+        --text-dim:     #4b5563;
+        --text-ghost:   #374151;
+        --border:       #374151;
+        --border-2:     #2d3748;
+        --border-3:     #253040;
+        --border-4:     #1e2a38;
+        --link:         #60a5fa;
+        --nav-bg:       #1f2937;
+        --input-bg:     #283447;
+        --input-border: #4b5563;
+        --shadow-sm:    rgba(0,0,0,.30);
+        --shadow:       rgba(0,0,0,.40);
+        --shadow-nav:   rgba(0,0,0,.40);
+      }
+
      /* ── Reset / shared baseline ── */
      *, *::before, *::after { box-sizing: border-box; }
      html {
@@ -16,10 +77,11 @@
        margin: 0;
        padding: 10px;
        padding-top: 60px;
-        background: #f5f5f5;
+        background: var(--bg);
+        color: var(--text);
      }
-      h1 { font-size: 1.5em; color: #333; margin: 0 0 5px; }
-      h2 { font-size: 1.1em; color: #333; margin: 0 0 8px; }
+      h1 { font-size: 1.5em; color: var(--text-2); margin: 0 0 5px; }
+      h2 { font-size: 1.1em; color: var(--text-2); margin: 0 0 8px; }
      p  { margin: 0; }

      /* Navigation bar — shared across all pages */
@@ -29,9 +91,9 @@
        left: 0;
        right: 0;
        z-index: 200;
-        background: #fff;
+        background: var(--nav-bg);
        padding: 6px 12px;
-        box-shadow: 0 2px 4px rgba(0,0,0,.1);
+        box-shadow: 0 2px 4px var(--shadow-nav);
        display: flex;
        align-items: center;
        justify-content: space-between;
@@ -42,25 +104,25 @@
      .nav a {
        margin-right: 20px;
        text-decoration: none;
-        color: #0066cc;
+        color: var(--link);
        font-weight: 500;
        font-size: 0.9em;
      }
      .nav a:hover { text-decoration: underline; }
-      .nav a.active { color: #333; font-weight: bold; }
+      .nav a.active { color: var(--text-2); font-weight: bold; }
      .nav-user {
        display: flex;
        align-items: center;
        gap: 8px;
        text-decoration: none;
-        color: #333;
+        color: var(--text-2);
        font-size: 0.9em;
        font-weight: 500;
        padding: 4px 8px;
        border-radius: 20px;
        transition: background 0.15s;
      }
-      .nav-user:hover { background: #f0f4ff; text-decoration: none; }
+      .nav-user:hover { background: var(--surface-2); text-decoration: none; }
      .nav-username {
        max-width: 0;
        overflow: hidden;
@@ -81,7 +143,7 @@
      .nav-initials {
        width: 28px; height: 28px;
        border-radius: 50%;
-        background: #0066cc;
+        background: var(--link);
        color: #fff;
        display: flex;
        align-items: center;
@@ -106,7 +168,7 @@
      .nav-hamburger span {
        display: block;
        height: 3px;
-        background: #555;
+        background: var(--text-muted);
        border-radius: 2px;
      }

@@ -118,13 +180,39 @@
          flex-direction: column;
          align-items: flex-start;
          padding-top: 8px;
-          border-top: 1px solid #eee;
+          border-top: 1px solid var(--border-2);
          order: 3;
        }
        .nav-links.nav-open { display: flex; }
        .nav-links a { margin-right: 0; padding: 6px 0; font-size: 1em; }
      }

+      /* ── Global dark-mode: inputs ── */
+      html[data-theme="dark"] input:not([type=checkbox]):not([type=radio]),
+      html[data-theme="dark"] select,
+      html[data-theme="dark"] textarea {
+        background-color: var(--input-bg);
+        border-color: var(--input-border);
+        color: var(--text);
+      }
+
+      /* Pending config publish button */
+      .nav-publish-btn {
+        background: #e65100;
+        color: #fff;
+        border: none;
+        border-radius: 4px;
+        padding: 4px 10px;
+        font-size: 0.82em;
+        font-weight: 600;
+        cursor: pointer;
+        flex-shrink: 0;
+        white-space: nowrap;
+        margin-left: auto;
+      }
+      .nav-publish-btn:hover { background: #bf360c; }
+      .nav-publish-btn:disabled { opacity: 0.7; cursor: default; }
+
      /* Swiss railway clock — nav */
      .nav-pie {
        flex-shrink: 0;
@@ -214,7 +302,7 @@
        ctx.restore();
      }

-      hand((m + s / 60) / 60 * Math.PI * 2 - Math.PI / 2,
+      hand((sFrac >= 58.5 ? m + 1 : m) / 60 * Math.PI * 2 - Math.PI / 2,
           R * 0.88, -R * 0.12, SIZE * 0.027, '#222');           /* minute */
      hand((h + m / 60) / 12 * Math.PI * 2 - Math.PI / 2,
           R * 0.58, -R * 0.12, SIZE * 0.039, '#222');           /* hour   */
@@ -262,6 +350,17 @@
      setTimeout(clockTick, delay);
    }

+    /* Keep auto-theme in sync with system setting changes */
+    try {
+      window.matchMedia('(prefers-color-scheme: dark)').addEventListener('change', function(e) {
+        var pref = localStorage.getItem('hbd_theme') || 'auto';
+        if (pref === 'auto') {
+          if (e.matches) { document.documentElement.setAttribute('data-theme', 'dark'); }
+          else { document.documentElement.removeAttribute('data-theme'); }
+        }
+      });
+    } catch(e) {}
+
    document.addEventListener('DOMContentLoaded', function() {
      /* Start the shared tick loop */
      clockTick();
@@ -179,15 +179,65 @@

    /* Message styling */
    #messages {
-      font-size: 0.85em;
+      font-size: 1.00em;
      line-height: 1.0;
    }

-    #messages div {
+    #messages .log-entry {
      padding: 5px 0;
      border-bottom: 1px solid #f0f0f0;
+      display: flex;
+      gap: 0.5em;
+      align-items: baseline;
    }

+    .log-ts { color: #888; white-space: nowrap; }
+    .log-level { font-weight: bold; min-width: 6em; }
+    .log-host { font-weight: 600; }
+    .log-service { color: #888; }
+
+    .log-warning .log-level  { color: #b8860b; }
+    .log-critical .log-level { color: #c00; }
+    .log-recover .log-level  { color: #2a7a2a; }
+    .log-info .log-level     { color: #555; }
+
+    .log-section-header {
+      display: flex;
+      align-items: center;
+      gap: 12px;
+      flex-wrap: wrap;
+      margin-bottom: 10px;
+      background: white;
+      border-radius: 6px;
+      box-shadow: 0 1px 4px rgba(0,0,0,0.1);
+      padding: 8px 15px;
+    }
+
+    .log-section-title {
+      font-size: 1.2em;
+      font-weight: bold;
+      color: #333;
+      white-space: nowrap;
+    }
+
+    .log-filter-bar {
+      display: flex;
+      gap: 6px;
+      align-items: center;
+      flex-wrap: wrap;
+    }
+
+    .log-filter-bar input[type="text"],
+    .log-filter-bar select {
+      padding: 3px 7px;
+      border: 1px solid #ccc;
+      border-radius: 4px;
+      font-size: 1.00em;
+      color: #333;
+    }
+
+    .log-filter-bar input[type="text"] { width: 110px; }
+
    /* Modal for connection status messages */
    .connection-modal {
      display: none;
@@ -238,6 +288,31 @@
    }
    #ntable a.host-link { color: inherit; text-decoration: none; }
    #ntable a.host-link:hover { text-decoration: underline; }
+
+    /* ── Dark mode ── */
+    html[data-theme="dark"] h1,
+    html[data-theme="dark"] h2 { color: var(--text); }
+    html[data-theme="dark"] .subtitle { color: var(--text-sec); }
+    html[data-theme="dark"] h2,
+    html[data-theme="dark"] .table-section,
+    html[data-theme="dark"] .log-section,
+    html[data-theme="dark"] .log-section-header { background: var(--surface); }
+    html[data-theme="dark"] .log-section-title { color: var(--text); }
+    html[data-theme="dark"] #ntable td,
+    html[data-theme="dark"] #ntable th { border-color: var(--border); }
+    html[data-theme="dark"] #ntable tr:nth-child(even) { background: var(--surface-2); }
+    html[data-theme="dark"] #ntable tr:hover { background: #1e3a5f; }
+    html[data-theme="dark"] #ntable tbody tr.row-warning { background: #3a2800; }
+    html[data-theme="dark"] #ntable tbody tr.row-critical { background: #3a0a0a; }
+    html[data-theme="dark"] #ntable tbody tr.row-warning:hover { background: #4a3200; }
+    html[data-theme="dark"] #ntable tbody tr.row-critical:hover { background: #4a1010; }
+    html[data-theme="dark"] #messages .log-entry { border-bottom-color: var(--border-3); }
+    html[data-theme="dark"] .log-ts,
+    html[data-theme="dark"] .log-service { color: var(--text-muted); }
+    html[data-theme="dark"] .log-info .log-level { color: var(--text-sec); }
+    html[data-theme="dark"] .log-filter-bar input,
+    html[data-theme="dark"] .log-filter-bar select { color: var(--text); }
+    html[data-theme="dark"] .connection-modal-content { background: var(--surface); color: var(--text); }
  </style>
  <script type="text/javascript">
    var cnt = 0;
@@ -246,9 +321,15 @@
    var c = 0;
    var HBD_VERSION = "{{ hbd_version }}";

+    function escHtml(s) {
+      var d = document.createElement('div');
+      d.textContent = String(s);
+      return d.innerHTML;
+    }
+
    function hostNameHtml(data) {
      var rawName = data.raw_name || data.name.replace(/<[^>]+>/g, '').replace('*', '').trim();
-      var nameHtml = data.name;
+      var nameHtml = escHtml(data.name);
      if (!data.hbc_version || data.hbc_version !== HBD_VERSION) {
        nameHtml += ' 🥀';
      }
@@ -335,11 +416,11 @@
        c_critical.innerHTML = "";
      }
      
-      c_ipv4addr.innerHTML = data.connections[0].addr;
-      c_ipv4state.innerHTML = data.connections[0].state;
+      c_ipv4addr.innerHTML = escHtml(data.connections[0].addr);
+      c_ipv4state.innerHTML = escHtml(data.connections[0].state);
      if (data.connections.length > 1) {
-        c_ipv6addr.innerHTML = data.connections[1].addr;
-        c_ipv6state.innerHTML = data.connections[1].state;
+        c_ipv6addr.innerHTML = escHtml(data.connections[1].addr);
+        c_ipv6state.innerHTML = escHtml(data.connections[1].state);
      }
      var table = document.getElementById("ntablebody"); // find table to append to
      table.appendChild(row); // append row to table
@@ -402,7 +483,7 @@

      for  (var i = 0; i < data.connections.length; i++) {
        // Offset by 2 for the warning/critical count columns
-        name_idx[data.name].cells[3 + i * 4].innerHTML = data.connections[i].addr;
+        name_idx[data.name].cells[3 + i * 4].innerHTML = escHtml(data.connections[i].addr);
        name_idx[data.name].cells[6 + i * 4].innerHTML = formatTS(
          data.connections[i].statetime
        );
@@ -422,7 +503,7 @@
            state = '<span class="state-overdue">overdue</span>';
            latency = "-";
          } else {
-            state = "<b>" + data.connections[i].state + "</b>";
+            state = "<b>" + escHtml(data.connections[i].state) + "</b>";
            latency = "-";
          }
        }
@@ -432,6 +513,22 @@
      updateRowAlert(name_idx[data.name], data);
    }

+    function applyLogFilters() {
+      var hostFilter = document.getElementById('filter-host').value.toLowerCase().trim();
+      var levelFilter = document.getElementById('filter-level').value;
+      var msgFilter = document.getElementById('filter-msg').value.toLowerCase().trim();
+      document.querySelectorAll('#messages .log-entry').forEach(function(entry) {
+        var show = true;
+        if (hostFilter && !(entry.dataset.host || '').toLowerCase().includes(hostFilter)) show = false;
+        if (levelFilter && entry.dataset.level !== levelFilter) show = false;
+        if (msgFilter) {
+          var msgEl = entry.querySelector('.log-msg');
+          if (!msgEl || !msgEl.textContent.toLowerCase().includes(msgFilter)) show = false;
+        }
+        entry.style.display = show ? '' : 'none';
+      });
+    }
+
    function WS_Connect() {
      if ("WebSocket" in window) {
        //N.B: subprotocol field causes chrome to error 1006
@@ -460,7 +557,22 @@
            update_table(state.data);
          } else if (state.type == "message") {
            var msgs = document.getElementById("messages");
-            msgs.insertAdjacentHTML("afterbegin", "<div>" + state.data + "</div>");
+            var msg = state.data;
+            var _d = new Date(msg.ts * 1000);
+            function _p(n) { return n < 10 ? '0' + n : '' + n; }
+            var ts_str = _d.getFullYear() + '-' + _p(_d.getMonth()+1) + '-' + _p(_d.getDate())
+                       + ' ' + _p(_d.getHours()) + ':' + _p(_d.getMinutes()) + ':' + _p(_d.getSeconds());
+            var lvl = (msg.level || "INFO").toLowerCase();
+            var hostVal = msg.host || '';
+            var html = '<div class="log-entry log-' + escHtml(lvl) + '" data-level="' + escHtml(lvl) + '" data-host="' + escHtml(hostVal) + '">';
+            html += '<span class="log-ts">' + ts_str + '</span>';
+            html += '<span class="log-level">' + escHtml(msg.level || "") + '</span>';
+            if (msg.host) html += '<span class="log-host">' + escHtml(msg.host) + '</span>';
+            if (msg.service) html += '<span class="log-service">' + escHtml(msg.service) + '</span>';
+            html += '<span class="log-msg">' + escHtml(msg.message) + '</span>';
+            html += '</div>';
+            msgs.insertAdjacentHTML(state.history ? "beforeend" : "afterbegin", html);
+            applyLogFilters();
          }
          cnt++;
        };
@@ -515,7 +627,7 @@
          <tbody id="ntablebody">
            {% for host in hosts %}
            <tr class="{% if host.alert_critical_unacked > 0 or host.alert_critical_acked > 0 %}row-critical{% elif host.alert_warning_unacked > 0 or host.alert_warning_acked > 0 %}row-warning{% endif %}">
-              <td data-name="{{ host.name }}"><a class="host-link" href="/plugins#{{ host.raw_name | urlencode }}">{{ host.name }}{% if not host.hbc_version or host.hbc_version != hbd_version %} 🥀{% endif %}</a></td>
+              <td data-name="{{ host.name }}"><a class="host-link" href="/plugins#{{ host.name | urlencode }}">{{ host.name }}{% if not host.hbc_version or host.hbc_version != hbd_version %} 🥀{% endif %}</a></td>
              <td style="text-align: center; color: #ff9800; font-weight: bold;">
                {%- set warning_unacked = host.alert_warning_unacked -%}
                {%- set warning_acked = host.alert_warning_acked -%}
@@ -549,7 +661,21 @@
      </div>
      
      <div class="log-section">
-        <h2>Log of Events</h2>
+        <div class="log-section-header">
+          <span class="log-section-title">Log of Events</span>
+          <div class="log-filter-bar">
+            <input type="text" id="filter-host" placeholder="Host…" title="Filter by host" />
+            <select id="filter-level" title="Filter by level">
+              <option value="">All levels</option>
+              <option value="info">INFO</option>
+              <option value="warning">WARNING</option>
+              <option value="critical">CRITICAL</option>
+              <option value="recover">RECOVER</option>
+              <option value="unknown">UNKNOWN</option>
+            </select>
+            <input type="text" id="filter-msg" placeholder="Message…" title="Filter by message text" />
+          </div>
+        </div>
        <div id="messages"></div>
      </div>
    </div>
@@ -565,6 +691,9 @@
    
    <script>
      setup();
+      document.getElementById('filter-host').addEventListener('input', applyLogFilters);
+      document.getElementById('filter-level').addEventListener('change', applyLogFilters);
+      document.getElementById('filter-msg').addEventListener('input', applyLogFilters);
    </script>
  </body>
 </html>
@@ -11,6 +11,9 @@
    {% endif %}
    <a href="/about"{% if active_page == "about" %} class="active"{% endif %}>About</a>
  </div>
+  {% if current_user and current_user.admin %}
+  <button id="nav-publish-btn" class="nav-publish-btn" onclick="navPublishConfig()" style="display:none" title="Publish pending config changes to .hb.yaml">&#9888; Publish Config</button>
+  {% endif %}
  <div class="nav-pie" title="Host alert status">
    <canvas id="alert-pie" width="44" height="44"></canvas>
  </div>
@@ -92,5 +95,40 @@
  document.addEventListener('DOMContentLoaded', function() {
    updateAlertPie();
    setInterval(updateAlertPie, 30000);
+    navCheckPendingConfig();
+    window.addEventListener('storage', navCheckPendingConfig);
  });
+
+  function navCheckPendingConfig() {
+    var btn = document.getElementById('nav-publish-btn');
+    if (!btn) return;
+    btn.style.display = localStorage.getItem('hbd_pending_config') ? '' : 'none';
+  }
+
+  async function navPublishConfig() {
+    var btn = document.getElementById('nav-publish-btn');
+    var pending = localStorage.getItem('hbd_pending_config');
+    if (!pending) return;
+    var staged;
+    try { staged = JSON.parse(pending); } catch(e) { return; }
+    if (btn) { btn.disabled = true; btn.textContent = 'Saving…'; }
+    try {
+      var resp = await fetch('/api/0/config', {
+        method: 'POST',
+        headers: {'Content-Type': 'application/json'},
+        body: pending
+      });
+      if (resp.ok) {
+        localStorage.removeItem('hbd_pending_config');
+        window.location.reload();
+      } else {
+        var err = await resp.json().catch(function() { return {}; });
+        alert('Error: ' + (err.error || resp.statusText));
+        if (btn) { btn.disabled = false; btn.textContent = '⚠ Publish Config'; }
+      }
+    } catch(e) {
+      alert('Network error: ' + e.message);
+      if (btn) { btn.disabled = false; btn.textContent = '⚠ Publish Config'; }
+    }
+  }
 </script>
@@ -152,6 +152,31 @@
    }
    .host-action-btn.delete-btn:hover { background: #ffcdd2; }

+    /* ── Action result toast ───────────────────────────────────── */
+    #action-toast {
+      position: fixed;
+      bottom: 24px;
+      left: 50%;
+      transform: translateX(-50%) translateY(20px);
+      background: #323232;
+      color: #fff;
+      padding: 12px 22px;
+      border-radius: 6px;
+      font-size: 0.9em;
+      max-width: 480px;
+      text-align: center;
+      opacity: 0;
+      pointer-events: none;
+      transition: opacity 0.25s, transform 0.25s;
+      z-index: 9000;
+      white-space: pre-wrap;
+    }
+    #action-toast.show {
+      opacity: 1;
+      transform: translateX(-50%) translateY(0);
+    }
+    #action-toast.error { background: #c62828; }
+
    /* ── Host body ──────────────────────────────────────────────── */

    .host-body {
@@ -193,7 +218,7 @@

    .plugin-label {
      font-weight: 600;
-      font-size: 0.85em;
+      font-size: 1.00em;
      color: #444;
      min-width: 140px;
    }
@@ -213,7 +238,7 @@
    .data-table {
      width: 100%;
      border-collapse: collapse;
-      font-size: 0.85em;
+      font-size: 1.00em;
      background: #fff;
      box-shadow: 0 1px 3px rgba(0,0,0,0.08);
      border-radius: 4px;
@@ -236,7 +261,7 @@
    .data-table th.center { text-align: center; }

    .data-table td {
-      padding: 6px 10px;
+    /*  padding: 6px 10px; */
      border-top: 1px solid #e8e8e8;
      color: #333;
    }
@@ -344,7 +369,7 @@
      text-align: center;
      padding: 12px;
      color: #aaa;
-      font-size: 0.85em;
+      font-size: 1.00em;
    }

    .error {
@@ -354,7 +379,7 @@
      margin: 8px 0;
      border-radius: 3px;
      color: #c62828;
-      font-size: 0.85em;
+      font-size: 1.00em;
    }

    /* ── Scrollbar ──────────────────────────────────────────────── */
@@ -363,6 +388,71 @@
    .container::-webkit-scrollbar-track { background: #f1f1f1; border-radius: 4px; }
    .container::-webkit-scrollbar-thumb { background: #ccc; border-radius: 4px; }
    .container::-webkit-scrollbar-thumb:hover { background: #999; }
+
+    /* ── Host info section ──────────────────────────────────────────────────── */
+    .host-info-section {
+      padding: 12px 16px;
+      background: #fafafa;
+      border-bottom: 1px solid #e0e0e0;
+      font-size: 1.00em;
+    }
+    .info-meta {
+      display: grid;
+      grid-template-columns: max-content 1fr;
+      gap: 3px 14px;
+      margin-bottom: 10px;
+    }
+    .info-label { font-weight: 600; color: #555; white-space: nowrap; }
+    .info-value { color: #222; }
+    .info-thresholds-title {
+      font-weight: 600;
+      color: #555;
+      margin-bottom: 6px;
+    }
+    .info-note { color: #888; font-style: italic; }
+    .info-loading { color: #bbb; font-style: italic; }
+    .threshold-covers { font-size: 1.00em; color: #777; font-style: italic; }
+
+    /* ── Dark mode ── */
+    html[data-theme="dark"] h1 { color: var(--text); }
+    html[data-theme="dark"] .subtitle { color: var(--text-sec); }
+    html[data-theme="dark"] .host-card { background: var(--surface); }
+    html[data-theme="dark"] .host-header:hover { background: var(--surface-2); }
+    html[data-theme="dark"] .host-name { color: var(--text); }
+    html[data-theme="dark"] .collapse-icon,
+    html[data-theme="dark"] .acc-icon { color: var(--text-muted); }
+    html[data-theme="dark"] .host-body { border-top-color: var(--border-3); }
+    html[data-theme="dark"] .plugin-accordion { border-color: var(--border); }
+    html[data-theme="dark"] .plugin-acc-header { background: var(--surface-2); }
+    html[data-theme="dark"] .plugin-acc-header:hover { background: var(--surface-3); }
+    html[data-theme="dark"] .plugin-label { color: var(--text-2); }
+    html[data-theme="dark"] .plugin-summary { color: var(--text-muted); }
+    html[data-theme="dark"] .data-table { background: var(--surface); }
+    html[data-theme="dark"] .data-table td { border-top-color: var(--border); color: var(--text); }
+    html[data-theme="dark"] .data-table td.key { color: var(--text-sec); }
+    html[data-theme="dark"] .data-table tbody tr:nth-child(even) { background: var(--surface-2); }
+    html[data-theme="dark"] .data-table tbody tr:hover { background: #1e3a5f; }
+    html[data-theme="dark"] .bar-track { background: var(--border); }
+    html[data-theme="dark"] .table-section-label { color: var(--text-muted); }
+    html[data-theme="dark"] .no-data,
+    html[data-theme="dark"] .loading { color: var(--text-dim); }
+    html[data-theme="dark"] .timestamp { color: var(--text-dim); border-top-color: var(--border-3); }
+    html[data-theme="dark"] .glance-chip.neutral { background: var(--surface-3); color: var(--text-sec); }
+    html[data-theme="dark"] .os-label { color: var(--text-muted); }
+    html[data-theme="dark"] .host-info-section { background: var(--surface-2); border-bottom-color: var(--border); }
+    html[data-theme="dark"] .info-label { color: var(--text-3); }
+    html[data-theme="dark"] .info-value { color: var(--text); }
+    html[data-theme="dark"] .info-thresholds-title { color: var(--text-3); }
+    html[data-theme="dark"] .info-note,
+    html[data-theme="dark"] .info-loading,
+    html[data-theme="dark"] .threshold-covers { color: var(--text-muted); }
+    html[data-theme="dark"] .check-ok      { background: #0d2e17; }
+    html[data-theme="dark"] .check-warning  { background: #2e1a00; }
+    html[data-theme="dark"] .check-critical { background: #2e0a0a; }
+    html[data-theme="dark"] .check-unknown  { background: var(--surface-2); }
+    html[data-theme="dark"] .check-output { color: var(--text-sec); }
+    html[data-theme="dark"] .container::-webkit-scrollbar-track { background: var(--surface-2); }
+    html[data-theme="dark"] .container::-webkit-scrollbar-thumb { background: var(--border); }
  </style>

  <body>
@@ -391,7 +481,8 @@
              <span class="host-name">{{ host.name }}</span>
            </div>

-            <div class="glance-strip" id="glance-{{ host.name }}">
+            <div class="glance-strip" id="glance-{{ host.name }}" data-owner="{{ host.owner or '' }}">
+              {% if current_user and current_user.admin and host.owner %}<span class="glance-chip neutral">{{ host.owner }}</span>{% endif %}
              <span class="glance-loading">—</span>
            </div>

@@ -401,17 +492,18 @@
              {% endif %}
              <span class="os-label" id="os-label-{{ host.name }}"></span>
              {% if host.is_owner %}
-              <a class="host-action-btn update-btn"
-                 href="/u?h={{ host.name }}"
-                 onclick="event.stopPropagation()">Update</a>
-              <a class="host-action-btn delete-btn"
-                 href="/d?h={{ host.name }}"
-                 onclick="event.stopPropagation(); return confirm('Delete host {{ host.name }}?')">Delete</a>
+              <button class="host-action-btn update-btn"
+                      onclick="event.stopPropagation(); hostAction(this, '/u?h={{ host.name }}')">Update</button>
+              <button class="host-action-btn delete-btn"
+                      onclick="event.stopPropagation(); hostDelete(this, '{{ host.name }}')">Delete</button>
              {% endif %}
            </div>
          </div>

          <div class="host-body">
+            <div class="host-info-section" id="info-{{ host.name }}">
+              <div class="info-loading">Loading…</div>
+            </div>
            {% set plugin_order = ['os_info','cpu_monitor','memory_monitor','disk_monitor','network_monitor','zfs_monitor','nagios_runner','filesystem_info'] %}
            {% for plugin in plugin_order if plugin in host.plugins %}
            <div class="plugin-accordion collapsed"
@@ -457,12 +549,16 @@
      const GLANCE_PLUGINS = ['cpu_monitor','memory_monitor','disk_monitor',
                              'network_monitor','nagios_runner','os_info'];
      const SKIP_FIELDS = new Set(['id','name']);
+      const CURRENT_USER_ADMIN = {{ 'true' if current_user and current_user.admin else 'false' }};

      // ── Cache ───────────────────────────────────────────────────────────────

      // pluginCache[hostname][pluginName] = { data, timestamp, fetchedAt }
      const pluginCache = {};

+      // infoCache[hostname] = info data object from /api/0/hosts/{hostname}/info
+      const infoCache = {};
+
      function setCache(hostname, pluginName, sample) {
        if (!pluginCache[hostname]) pluginCache[hostname] = {};
        pluginCache[hostname][pluginName] = {
@@ -476,6 +572,17 @@
        return pluginCache[hostname]?.[pluginName] ?? null;
      }

+      // Return worst nagios exit code (0-3) found in a nagios_runner data object.
+      function nagiosWorstStatus(data) {
+        let worst = 0;
+        for (const [k, v] of Object.entries(data || {})) {
+          if (k.endsWith('_status_code') && typeof v === 'number' && v > worst) {
+            worst = v;
+          }
+        }
+        return worst;
+      }
+
      // ── Fetch helpers ───────────────────────────────────────────────────────

      async function fetchPlugin(hostname, pluginName) {
@@ -485,6 +592,61 @@
        return json.samples?.[0] ?? null;
      }

+      async function fetchHostInfo(hostname) {
+        const r = await fetch(`/api/0/hosts/${encodeURIComponent(hostname)}/info`);
+        if (!r.ok) throw new Error(`HTTP ${r.status}`);
+        return await r.json();
+      }
+
+      function renderInfoSection(hostname, data) {
+        const el = document.getElementById(`info-${hostname}`);
+        if (!el) return;
+
+        const owner    = data.owner ? escHtml(data.owner) : '—';
+        const managers = data.managers && data.managers.length
+          ? data.managers.map(escHtml).join(', ') : '—';
+        const hbcVer  = data.hbc_version ? escHtml(String(data.hbc_version)) : '—';
+        const hbcType = data.hbc_type    ? escHtml(String(data.hbc_type))    : '—';
+        const lastPkt = data.last_packet != null
+          ? new Date(data.last_packet * 1000).toLocaleString() : '—';
+
+        let html = `<div class="info-meta">
+          <span class="info-label">Owner</span><span class="info-value">${owner}</span>
+          <span class="info-label">Managers</span><span class="info-value">${managers}</span>
+          <span class="info-label">Agent Version</span><span class="info-value">${hbcVer}</span>
+          <span class="info-label">Agent Type</span><span class="info-value">${hbcType}</span>
+          <span class="info-label">Last Packet</span><span class="info-value">${lastPkt}</span>
+        </div>`;
+
+        if (data.thresholds === null) {
+          html += `<div class="info-note">Threshold alerting not configured.</div>`;
+        } else if (data.thresholds.length === 0) {
+          html += `<div class="info-note">No thresholds defined.</div>`;
+        } else {
+          html += `<div class="info-thresholds-title">Effective Thresholds</div>
+            <table class="data-table"><thead><tr>
+              <th>Metric</th><th>Op</th><th>Warning</th><th>Critical</th>
+            </tr></thead><tbody>`;
+          for (const t of data.thresholds) {
+            const w = t.warning  != null ? escHtml(String(t.warning))  : '—';
+            const c = t.critical != null ? escHtml(String(t.critical)) : '—';
+            let metricCell = escHtml(t.metric);
+            if (t.covers && t.covers.length > 0) {
+              metricCell += `<br><span class="threshold-covers">↳ ${t.covers.map(escHtml).join(', ')}</span>`;
+            }
+            html += `<tr>
+              <td class="key">${metricCell}</td>
+              <td>${escHtml(t.operator)}</td>
+              <td>${w}</td>
+              <td>${c}</td>
+            </tr>`;
+          }
+          html += `</tbody></table>`;
+        }
+
+        el.innerHTML = html;
+      }
+
      async function fetchHostGlance(hostname) {
        const card = document.querySelector(`.host-card[data-hostname="${hostname}"]`);
        const availablePlugins = (card?.dataset.plugins || '').split(',').filter(Boolean);
@@ -524,6 +686,12 @@

        const chips = [];

+        // Owner (admin only, static from server)
+        const owner = strip.dataset.owner;
+        if (CURRENT_USER_ADMIN && owner) {
+          chips.push(`<span class="glance-chip neutral">${owner}</span>`);
+        }
+
        // CPU
        const cpu = getCache(hostname, 'cpu_monitor');
        if (cpu) {
@@ -577,13 +745,13 @@
          ? chips.join('')
          : '<span class="glance-loading">—</span>';

-        // Nagios badge
+        // Nagios badge — derive worst status from individual check codes
        const nagios = getCache(hostname, 'nagios_runner');
        if (nagosBadge && nagios) {
-          const status = (nagios.data.overall_status || '—').toUpperCase();
-          const cls = status === 'OK' ? 'ok'
-            : status === 'WARNING' ? 'warning'
-            : status === 'CRITICAL' ? 'critical' : '';
+          const worst = nagiosWorstStatus(nagios.data);
+          const names = {0:'OK', 1:'WARNING', 2:'CRITICAL', 3:'UNKNOWN'};
+          const status = names[worst] || '—';
+          const cls = worst === 0 ? 'ok' : worst === 1 ? 'warning' : worst >= 2 ? 'critical' : '';
          nagosBadge.className = `nagios-badge ${cls}`;
          nagosBadge.textContent = status;
        }
@@ -602,9 +770,22 @@
        const card = document.querySelector(`.host-card[data-hostname="${hostname}"]`);
        const wasCollapsed = card.classList.contains('collapsed');
        card.classList.toggle('collapsed');
-        if (wasCollapsed && !pluginCache[hostname]) {
+        if (wasCollapsed) {
+          if (!pluginCache[hostname]) {
            fetchHostGlance(hostname);
          }
+          if (!infoCache[hostname]) {
+            const infoEl = document.getElementById(`info-${hostname}`);
+            if (infoEl) infoEl.innerHTML = '<div class="info-loading">Loading…</div>';
+            fetchHostInfo(hostname).then(data => {
+              infoCache[hostname] = data;
+              renderInfoSection(hostname, data);
+            }).catch(() => {
+              const el = document.getElementById(`info-${hostname}`);
+              if (el) el.innerHTML = '<div class="info-loading">Could not load host info.</div>';
+            });
+          }
+        }
      }

      // ── Toggle plugin accordion ─────────────────────────────────────────────
@@ -692,9 +873,10 @@
            break;
          }
          case 'nagios_runner': {
-            const status = (d.overall_status || '?').toUpperCase();
-            const count = d.plugin_count;
-            text = status + (count != null ? ` — ${count} checks` : '');
+            const worst = nagiosWorstStatus(d);
+            const names = {0:'OK', 1:'WARNING', 2:'CRITICAL', 3:'UNKNOWN'};
+            const codes = Object.keys(d).filter(k => k.endsWith('_status_code'));
+            text = (names[worst] || '?') + (codes.length ? ` — ${codes.length} checks` : '');
            break;
          }
          case 'filesystem_info': {
@@ -732,7 +914,7 @@
        let html = '';
        switch (pluginName) {
          case 'os_info':        html = renderOsInfoTable(cached.data); break;
-          case 'cpu_monitor':    html = renderCpuTable(cached.data); break;
+          case 'cpu_monitor':    html = renderCpuTable(hostname, cached.data); break;
          case 'memory_monitor': html = renderMemoryTable(cached.data); break;
          case 'disk_monitor':   html = renderDiskTables(cached.data); break;
          case 'network_monitor':html = renderNetworkTables(cached.data); break;
@@ -744,6 +926,10 @@

        html += `<div class="timestamp">Last updated: ${new Date(cached.timestamp * 1000).toLocaleString()}</div>`;
        body.innerHTML = html;
+
+        if (pluginName === 'cpu_monitor') {
+          fetchCpuHistory(hostname).then(samples => renderCpuChart(hostname, samples)).catch(() => {});
+        }
      }

      // ── Per-plugin renderers ────────────────────────────────────────────────
@@ -751,10 +937,11 @@
      function renderOsInfoTable(d) {
        const ORDER = ['distro_pretty_name','system','release','version','machine',
                       'processor','architecture','node','python_version',
-                       'python_implementation','hbc_version',
+                       'python_implementation',
                       'distro_name','distro_version','distro_id','distro_version_id'];
+        const INFO_FIELDS = new Set(['hbc_version', 'hbc_type']);
        const shown = new Set(ORDER);
-        const keys = [...ORDER, ...Object.keys(d).filter(k => !shown.has(k) && !SKIP_FIELDS.has(k))];
+        const keys = [...ORDER, ...Object.keys(d).filter(k => !shown.has(k) && !SKIP_FIELDS.has(k) && !INFO_FIELDS.has(k))];

        let html = '<table class="data-table"><thead><tr><th>Field</th><th>Value</th></tr></thead><tbody>';
        for (const k of keys) {
@@ -765,7 +952,92 @@
        return html;
      }

-      function renderCpuTable(d) {
+      async function fetchCpuHistory(hostname) {
+        const r = await fetch(`/api/0/hosts/${encodeURIComponent(hostname)}/plugins/cpu_monitor?limit=100`);
+        if (!r.ok) return [];
+        const json = await r.json();
+        return json.samples || [];
+      }
+
+      function renderCpuChart(hostname, samples) {
+        const el = document.getElementById(`cpu-chart-${hostname}`);
+        if (!el || !samples.length) return;
+
+        const pts = samples
+          .filter(s => s.data.cpu_percent != null)
+          .map(s => ({ t: s.timestamp, v: s.data.cpu_percent }));
+        if (pts.length < 2) { el.style.display = 'none'; return; }
+
+        const W = 600, H = 80, PAD = { top: 6, right: 8, bottom: 18, left: 28 };
+        const cW = W - PAD.left - PAD.right;
+        const cH = H - PAD.top - PAD.bottom;
+
+        const tMin = pts[0].t, tMax = pts[pts.length - 1].t;
+        const tRange = tMax - tMin || 1;
+        const x = t => PAD.left + ((t - tMin) / tRange) * cW;
+
+        // Auto-scale Y axis with 10% padding, clamped to [0, 100]
+        const vMin = Math.min(...pts.map(p => p.v));
+        const vMax = Math.max(...pts.map(p => p.v));
+        const vRange = vMax - vMin || 1;
+        const vPad = Math.max(vRange * 0.1, 1);
+        const yLow  = Math.max(0,   vMin - vPad);
+        const yHigh = Math.min(100, vMax + vPad);
+        const yRange = yHigh - yLow || 1;
+        const y = v => PAD.top + cH - ((v - yLow) / yRange) * cH;
+
+        // Build polyline points and filled area path
+        const linePoints = pts.map(p => `${x(p.t).toFixed(1)},${y(p.v).toFixed(1)}`).join(' ');
+        const areaPath = `M${x(pts[0].t).toFixed(1)},${(PAD.top + cH).toFixed(1)} ` +
+          pts.map(p => `L${x(p.t).toFixed(1)},${y(p.v).toFixed(1)}`).join(' ') +
+          ` L${x(pts[pts.length-1].t).toFixed(1)},${(PAD.top + cH).toFixed(1)} Z`;
+
+        // Color based on latest absolute CPU %
+        const latest = pts[pts.length - 1].v;
+        const strokeColor = latest > 90 ? '#e53935' : latest > 70 ? '#fb8c00' : '#43a047';
+        const fillColor   = latest > 90 ? '#ffcdd2' : latest > 70 ? '#ffe0b2' : '#c8e6c9';
+
+        // Compute nice tick step for ~3-5 grid lines
+        const rawStep = yRange / 4;
+        const mag = Math.pow(10, Math.floor(Math.log10(rawStep || 1)));
+        const niceStep = [1, 2, 5, 10].map(f => f * mag).find(s => yRange / s <= 5) || mag * 10;
+        const tickStart = Math.ceil(yLow / niceStep) * niceStep;
+        let gridLines = '';
+        for (let v = tickStart; v <= yHigh + 0.001; v += niceStep) {
+          const yy = y(v).toFixed(1);
+          const label = Number.isInteger(v) ? v : v.toFixed(1);
+          gridLines += `<line x1="${PAD.left}" y1="${yy}" x2="${PAD.left + cW}" y2="${yy}" stroke="#e0e0e0" stroke-width="1"/>`;
+          gridLines += `<text x="${(PAD.left - 3).toFixed(1)}" y="${yy}" text-anchor="end" dominant-baseline="middle" font-size="8" fill="#999">${label}</text>`;
+        }
+
+        // X-axis time labels
+        const fmt = ts => {
+          const d = new Date(ts * 1000);
+          return d.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' });
+        };
+        const xLabels = `
+          <text x="${PAD.left}" y="${H - 2}" text-anchor="start" font-size="8" fill="#999">${fmt(pts[0].t)}</text>
+          <text x="${PAD.left + cW}" y="${H - 2}" text-anchor="end" font-size="8" fill="#999">${fmt(pts[pts.length-1].t)}</text>`;
+
+        el.innerHTML = `<svg viewBox="0 0 ${W} ${H}" preserveAspectRatio="none"
+          style="width:100%;height:${H}px;display:block;">
+          <defs>
+            <clipPath id="cpu-clip-${hostname}">
+              <rect x="${PAD.left}" y="${PAD.top}" width="${cW}" height="${cH}"/>
+            </clipPath>
+          </defs>
+          ${gridLines}
+          <line x1="${PAD.left}" y1="${PAD.top}" x2="${PAD.left}" y2="${PAD.top + cH}" stroke="#ccc" stroke-width="1"/>
+          <line x1="${PAD.left}" y1="${PAD.top + cH}" x2="${PAD.left + cW}" y2="${PAD.top + cH}" stroke="#ccc" stroke-width="1"/>
+          <g clip-path="url(#cpu-clip-${hostname})">
+            <path d="${areaPath}" fill="${fillColor}" opacity="0.6"/>
+            <polyline points="${linePoints}" fill="none" stroke="${strokeColor}" stroke-width="1.5" stroke-linejoin="round"/>
+          </g>
+          ${xLabels}
+        </svg>`;
+      }
+
+      function renderCpuTable(hostname, d) {
        const KEYS = [
          ['cpu_percent',      'CPU Usage',         'bar'],
          ['load_1min',        'Load (1 min)',       'num'],
@@ -783,7 +1055,8 @@
        ];

        const handled = new Set(KEYS.map(r => r[0]));
-        let html = '<table class="data-table"><thead><tr><th>Metric</th><th>Value</th></tr></thead><tbody>';
+        let html = `<div id="cpu-chart-${hostname}" style="margin-bottom:8px;"></div>`;
+        html += '<table class="data-table"><thead><tr><th>Metric</th><th>Value</th></tr></thead><tbody>';
        for (const [k, label, fmt] of KEYS) {
          if (!(k in d)) continue;
          const v = d[k];
@@ -1163,9 +1436,12 @@
      // ── Auto-refresh (30 s) ─────────────────────────────────────────────────

      setInterval(() => {
+        document.querySelectorAll('.host-card').forEach(card => {
+          fetchHostGlance(card.dataset.hostname);
+        });
+
        document.querySelectorAll('.host-card:not(.collapsed)').forEach(card => {
          const hostname = card.dataset.hostname;
-          fetchHostGlance(hostname);

          card.querySelectorAll('.plugin-accordion:not(.collapsed)').forEach(acc => {
            const pname = acc.dataset.plugin;
@@ -1185,25 +1461,83 @@
      // ── Init ────────────────────────────────────────────────────────────────

      document.addEventListener('DOMContentLoaded', () => {
-        // If a host fragment is in the URL, expand and scroll to that host;
-        // otherwise expand the first host as before.
+        // Fetch glance data for every host immediately so the strip is always populated.
+        document.querySelectorAll('.host-card').forEach(card => {
+          fetchHostGlance(card.dataset.hostname);
+        });
+
+        // Expand and load info for the target host (URL hash or first host).
+        function expandHost(hostname) {
+          const card = document.querySelector(`.host-card[data-hostname="${hostname}"]`);
+          if (!card) return false;
+          card.classList.remove('collapsed');
+          fetchHostInfo(hostname).then(data => {
+            infoCache[hostname] = data;
+            renderInfoSection(hostname, data);
+          }).catch(() => {
+            const el = document.getElementById(`info-${hostname}`);
+            if (el) el.innerHTML = '<div class="info-loading">Could not load host info.</div>';
+          });
+          return true;
+        }
+
        const hash = window.location.hash;
        if (hash) {
          const hostname = decodeURIComponent(hash.slice(1));
+          if (expandHost(hostname)) {
+            setTimeout(() => {
              const card = document.querySelector(`.host-card[data-hostname="${hostname}"]`);
-          if (card) {
-            card.classList.remove('collapsed');
-            fetchHostGlance(hostname);
-            setTimeout(() => card.scrollIntoView({ behavior: 'smooth', block: 'start' }), 150);
+              if (card) card.scrollIntoView({ behavior: 'smooth', block: 'start' });
+            }, 150);
            return;
          }
        }
        const first = document.querySelector('.host-card');
-        if (first) {
-          first.classList.remove('collapsed');
-          fetchHostGlance(first.dataset.hostname);
-        }
+        if (first) expandHost(first.dataset.hostname);
      });
+      // ── Host action helpers ──────────────────────────────────────
+
+      let _toastTimer = null;
+      function showToast(msg, isError) {
+        const t = document.getElementById('action-toast');
+        t.textContent = msg;
+        t.classList.toggle('error', !!isError);
+        t.classList.add('show');
+        clearTimeout(_toastTimer);
+        _toastTimer = setTimeout(() => t.classList.remove('show'), 4000);
+      }
+
+      async function hostAction(btn, url) {
+        btn.disabled = true;
+        try {
+          const res = await fetch(url);
+          const text = await res.text();
+          showToast(text, !res.ok);
+        } catch (e) {
+          showToast('Request failed: ' + e.message, true);
+        } finally {
+          btn.disabled = false;
+        }
+      }
+
+      async function hostDelete(btn, hostname) {
+        if (!confirm('Delete host ' + hostname + '?')) return;
+        btn.disabled = true;
+        try {
+          const res = await fetch('/d?h=' + encodeURIComponent(hostname));
+          const text = await res.text();
+          showToast(text, !res.ok);
+          if (res.ok) {
+            const card = document.querySelector(`.host-card[data-hostname="${hostname}"]`);
+            if (card) card.remove();
+          }
+        } catch (e) {
+          showToast('Request failed: ' + e.message, true);
+          btn.disabled = false;
+        }
+      }
    </script>
+
+    <div id="action-toast"></div>
  </body>
 </html>
@@ -96,7 +96,7 @@
      border-radius: 4px;
      background: #f44336;
      color: #fff;
-      font-size: 0.85em;
+      font-size: 1.00em;
      font-weight: 500;
      text-decoration: none;
      transition: background 0.15s;
@@ -157,7 +157,7 @@
      gap: 6px;
      padding: 4px 12px;
      border-radius: 16px;
-      font-size: 0.85em;
+      font-size: 1.00em;
      font-weight: 500;
      text-decoration: none;
    }
@@ -204,6 +204,120 @@
    }

    .channel-name { color: #333; }
+
+    .edit-section { margin-top: 20px; }
+    .edit-section h4 { font-size: .88em; font-weight: 600; color: #333; margin: 0 0 10px; text-transform: uppercase; letter-spacing: .04em; border-bottom: 1px solid #eee; padding-bottom: 6px; }
+    .edit-field { margin-bottom: 10px; }
+    .edit-field label { display: block; font-size: .82em; color: #666; margin-bottom: 3px; }
+    .edit-input { width: 100%; border: 1px solid #ccc; border-radius: 4px; padding: 5px 8px; font-size: .88em; box-sizing: border-box; }
+    .edit-input:focus { border-color: #0066cc; outline: none; }
+    .status-msg { font-size: .82em; margin-left: 8px; }
+    .save-row { display: flex; align-items: center; margin-top: 8px; }
+    .btn-save { background: #0066cc; color: #fff; border: none; border-radius: 4px; padding: 5px 14px; font-size: .85em; cursor: pointer; }
+    .btn-save:hover { background: #0055aa; }
+    /* ---- Channel chip picker ---- */
+    .ch-picker { }
+    .ch-picker-label { font-size: .8em; font-weight: 600; color: #888; text-transform: uppercase; letter-spacing: .04em; margin-bottom: 6px; }
+    .ch-chips { display: flex; flex-wrap: wrap; gap: 6px; min-height: 32px; margin-bottom: 10px; }
+    .ch-chip {
+      display: inline-flex; align-items: center; gap: 5px;
+      padding: 4px 10px; border-radius: 14px; font-size: .85em; font-weight: 500; cursor: pointer;
+      border: none; font-family: inherit;
+    }
+    .ch-chip.selected { background: #e3f2fd; color: #1565c0; }
+    .ch-chip.selected:hover { background: #bbdefb; }
+    .ch-chip.available { background: #f1f3f4; color: #555; }
+    .ch-chip.available:hover { background: #e8eaf6; color: #283593; }
+    .ch-chip-x { font-size: .9em; line-height: 1; color: inherit; opacity: .7; }
+
+    /* ---- My Channels card list ---- */
+    .my-ch-card {
+      border: 1px solid #e8eaf6; border-radius: 6px; margin-bottom: 8px; overflow: hidden;
+    }
+    .my-ch-header {
+      display: flex; align-items: center; gap: 8px; padding: 8px 12px;
+      background: #f8f9ff; border-bottom: 1px solid #e8eaf6;
+    }
+    .my-ch-name { font-weight: 600; font-size: .9em; color: #222; }
+    .my-ch-type { padding: 2px 7px; border-radius: 8px; font-size: .72em; font-weight: 600; background: #e8eaf6; color: #3949ab; }
+    .my-ch-private { padding: 2px 7px; border-radius: 8px; font-size: .72em; font-weight: 600; background: #fce4ec; color: #c62828; }
+    .my-ch-actions { margin-left: auto; display: flex; gap: 5px; }
+    .btn-sm-edit { background: #888; color: #fff; border: none; border-radius: 4px; padding: 2px 8px; font-size: .78em; cursor: pointer; }
+    .btn-sm-edit:hover { background: #666; }
+    .btn-sm-del { background: transparent; color: #c62828; border: 1px solid #e0e0e0; border-radius: 4px; padding: 2px 7px; font-size: .78em; cursor: pointer; }
+    .btn-sm-del:hover { background: #fce4ec; }
+
+    /* ---- Theme picker ---- */
+    .theme-btns { display: flex; gap: 6px; }
+    .theme-btn {
+      padding: 5px 14px;
+      border: 1px solid var(--border, #e0e0e0);
+      border-radius: 4px;
+      background: var(--surface-3, #f5f5f5);
+      color: var(--text-sec, #666);
+      cursor: pointer;
+      font-size: .88em;
+      font-family: inherit;
+    }
+    .theme-btn:hover { border-color: var(--link, #0066cc); color: var(--link, #0066cc); }
+    .theme-btn.active { background: var(--link, #0066cc); color: #fff; border-color: var(--link, #0066cc); }
+
+    /* ── Dark mode ── */
+    html[data-theme="dark"] h1 { color: var(--text); }
+    html[data-theme="dark"] .subtitle { color: var(--text-sec); }
+    html[data-theme="dark"] .profile-card { background: var(--surface); box-shadow: 0 1px 6px var(--shadow); }
+    html[data-theme="dark"] .profile-name { color: var(--text); }
+    html[data-theme="dark"] .profile-username { color: var(--text-sec); }
+    html[data-theme="dark"] .badge-admin { background: #1a3255; color: #7aa8f0; }
+    html[data-theme="dark"] .badge-user  { background: var(--surface-3); color: var(--text-sec); }
+    html[data-theme="dark"] .section { background: var(--surface); box-shadow: 0 1px 6px var(--shadow); }
+    html[data-theme="dark"] .section h2 { color: var(--text); border-bottom-color: var(--border); }
+    html[data-theme="dark"] .settings-row { border-bottom-color: var(--border-4); }
+    html[data-theme="dark"] .settings-label { color: var(--text-sec); }
+    html[data-theme="dark"] .settings-value { color: var(--text); }
+    html[data-theme="dark"] .settings-empty { color: var(--text-dim); }
+    html[data-theme="dark"] .edit-section h4 { color: var(--text); border-bottom-color: var(--border); }
+    html[data-theme="dark"] .edit-field label { color: var(--text-sec); }
+    html[data-theme="dark"] .edit-input { background: var(--input-bg); border-color: var(--input-border); color: var(--text); }
+    html[data-theme="dark"] .channel-row { border-bottom-color: var(--border-4); }
+    html[data-theme="dark"] .channel-name { color: var(--text); }
+    html[data-theme="dark"] .ch-picker-label { color: var(--text-sec); }
+    html[data-theme="dark"] .ch-chip.selected { background: #1a3255; color: #60a5fa; }
+    html[data-theme="dark"] .ch-chip.available { background: var(--surface-3); color: var(--text-sec); }
+    html[data-theme="dark"] .ch-chip.available:hover { background: var(--border); color: var(--link); }
+    html[data-theme="dark"] .my-ch-card { border-color: var(--border); }
+    html[data-theme="dark"] .my-ch-header { background: var(--surface-2); border-bottom-color: var(--border); }
+    html[data-theme="dark"] .my-ch-name { color: var(--text); }
+    html[data-theme="dark"] .host-chip.owner   { background: #0d2e17; color: #66bb6a; }
+    html[data-theme="dark"] .host-chip.manager { background: #0d1f40; color: #64b5f6; }
+    html[data-theme="dark"] .host-chip.monitor { background: #1e0d30; color: #ba68c8; }
+    html[data-theme="dark"] .no-hosts { color: var(--text-dim); }
+    html[data-theme="dark"] .ch-modal-box { background: var(--surface); color: var(--text); }
+    html[data-theme="dark"] .ch-modal-box h3 { color: var(--text); }
+    html[data-theme="dark"] .ch-form-row label { color: var(--text-sec); }
+    html[data-theme="dark"] .ch-form-divider { color: var(--text-muted); border-top-color: var(--border); }
+
+    /* ---- Channel modal (for My Channels CRUD) ---- */
+    .ch-modal-overlay {
+      position: fixed; inset: 0; background: rgba(0,0,0,.4);
+      display: flex; align-items: center; justify-content: center; z-index: 1001;
+    }
+    .ch-modal-box {
+      background: #fff; border-radius: 8px; padding: 24px;
+      min-width: 360px; max-width: 520px; width: 95%;
+      box-shadow: 0 8px 32px rgba(0,0,0,.2);
+    }
+    .ch-modal-box h3 { margin: 0 0 16px; font-size: 1em; }
+    .ch-form-row { margin-bottom: 12px; }
+    .ch-form-row label { display: block; font-size: .83em; font-weight: 600; color: #555; margin-bottom: 3px; }
+    .ch-form-row input[type=text], .ch-form-row input[type=password], .ch-form-row select {
+      width: 100%; border: 1px solid #ccc; border-radius: 4px; padding: 5px 8px;
+      font-size: .88em; box-sizing: border-box; font-family: inherit;
+    }
+    .ch-form-row input:focus, .ch-form-row select:focus { border-color: #0066cc; outline: none; }
+    .ch-form-divider { font-size: .78em; font-weight: 700; text-transform: uppercase; letter-spacing: .05em; color: #888; margin: 14px 0 8px; border-top: 1px solid #eee; padding-top: 10px; }
+    .ch-modal-footer { display: flex; justify-content: flex-end; gap: 8px; margin-top: 18px; }
+    .ch-modal-status { font-size: .83em; margin-top: 8px; }
  </style>

  <body>
@@ -266,20 +380,165 @@
        </div>
      </div>

-      <!-- Notification channels -->
+      {% if current_user %}
+      <!-- ---- Editable identity ---- -->
+      <div class="section edit-section">
+        <h4>Identity</h4>
+        <div class="edit-field">
+          <label for="profile-fullname">Display name</label>
+          <input id="profile-fullname" class="edit-input" type="text" value="{{ current_user.full_name | e }}" placeholder="Full name">
+        </div>
+        <div class="edit-field">
+          <label for="profile-avatar">Avatar URL or path</label>
+          <input id="profile-avatar" class="edit-input" type="text" value="{{ current_user.avatar | e }}" placeholder="/path/to/avatar.png or https://…">
+        </div>
+        <div class="save-row">
+          <button class="btn-save" onclick="saveIdentity()">Save</button>
+          <span id="identity-status" class="status-msg"></span>
+        </div>
+      </div>
+
+      <!-- ---- Change password ---- -->
+      <div class="section edit-section">
+        <h4>Change password</h4>
+        <div class="edit-field">
+          <label for="profile-current-pw">Current password</label>
+          <input id="profile-current-pw" class="edit-input" type="password" autocomplete="current-password">
+        </div>
+        <div class="edit-field">
+          <label for="profile-new-pw">New password</label>
+          <input id="profile-new-pw" class="edit-input" type="password" autocomplete="new-password">
+        </div>
+        <div class="save-row">
+          <button class="btn-save" onclick="changePassword()">Change password</button>
+          <span id="password-status" class="status-msg"></span>
+        </div>
+      </div>
+      {% endif %}
+
+      <!-- Notification channels — chip picker -->
      <div class="section">
        <h2>Notification Channels</h2>
-        {% if notification_channels %}
-        {% for ch in notification_channels %}
-        <div class="channel-row">
-          <span class="channel-type">{{ ch.type }}</span>
-          <span class="channel-name">{{ ch.name }}</span>
+        {% if current_user %}
+        <p style="font-size:.82em;color:#888;margin:0 0 12px">Click a channel to add or remove it from your alert list.</p>
+        {% if all_channels %}
+        <div class="ch-picker">
+          <div class="ch-picker-label">Selected</div>
+          <div id="selected-chips" class="ch-chips">
+            {% for ch in all_channels %}
+            {% if ch.name in (current_user.notification_channels or []) %}
+            <button class="ch-chip selected" data-ch="{{ ch.name | e }}" onclick="toggleChip(this)">
+              {{ ch.name | e }} <span class="ch-chip-x">×</span>
+            </button>
+            {% endif %}
+            {% endfor %}
+            {% set selected_set = current_user.notification_channels or [] %}
+            {% set has_selected = selected_set | length > 0 %}
+            {% if not has_selected %}
+            <span style="font-size:.83em;color:#bbb;font-style:italic;align-self:center">None selected</span>
+            {% endif %}
+          </div>
+          <div class="ch-picker-label">Available</div>
+          <div id="available-chips" class="ch-chips">
+            {% for ch in all_channels %}
+            {% if ch.name not in (current_user.notification_channels or []) %}
+            <button class="ch-chip available" data-ch="{{ ch.name | e }}" onclick="toggleChip(this)">
+              + {{ ch.name | e }}
+            </button>
+            {% endif %}
+            {% endfor %}
+          </div>
+        </div>
+        {% else %}
+        <p style="font-size:.83em;color:#bbb;font-style:italic">No notification channels available. You can create your own below.</p>
+        {% endif %}
+        <div class="save-row">
+          <button class="btn-save" onclick="saveChannels()">Save channels</button>
+          <span id="channels-status" class="status-msg"></span>
+        </div>
+        {% else %}
+        <span class="no-hosts">Log in to manage notification channels.</span>
+        {% endif %}
+      </div>
+
+      <!-- My Channels — create/edit/delete own channels -->
+      {% if current_user %}
+      <div class="section">
+        <h2>My Channels</h2>
+        <p style="font-size:.82em;color:#888;margin:0 0 12px">Channels you own. Public channels are available to all users; private channels are visible only to you.</p>
+        <div id="my-channels-list">
+          {% set my_channels = all_channels | selectattr('owner', 'equalto', current_user.username) | list %}
+          {% for ch in my_channels %}
+          <div class="my-ch-card" id="mychcard-{{ ch.name | e }}">
+            <div class="my-ch-header">
+              <span class="my-ch-name">{{ ch.name | e }}</span>
+              <span class="my-ch-type">{{ ch.type | e }}</span>
+              {% if ch.private %}<span class="my-ch-private">private</span>{% endif %}
+              <span class="my-ch-actions">
+                <button class="btn-sm-edit" onclick="openMyChModal('{{ ch.name | e }}')">Edit</button>
+                <button class="btn-sm-del" onclick="deleteMyChannel('{{ ch.name | e }}')">✕</button>
+              </span>
+            </div>
          </div>
          {% endfor %}
-        {% else %}
-        <span class="no-hosts">No personal notification channels configured.</span>
+          {% if not my_channels %}
+          <p id="my-channels-empty" style="font-size:.83em;color:#bbb;font-style:italic">No channels yet.</p>
          {% endif %}
        </div>
+        <div class="save-row" style="margin-top:8px">
+          <button class="btn-save" onclick="openMyChModal()">+ New channel</button>
+        </div>
+      </div>
+
+      <!-- My Channels modal -->
+      <div id="my-ch-modal" class="ch-modal-overlay" style="display:none" onclick="if(event.target===this)closeMyChModal()">
+        <div class="ch-modal-box">
+          <h3 id="my-ch-modal-title">New Channel</h3>
+          <div class="ch-form-row">
+            <label>Channel name</label>
+            <input type="text" id="my-ch-name" placeholder="e.g. my_pushover" autocomplete="off">
+          </div>
+          <div class="ch-form-row">
+            <label>Type</label>
+            <select id="my-ch-type" onchange="onMyChTypeChange()">
+              <option value="">— select —</option>
+            </select>
+          </div>
+          <div id="my-ch-type-fields"></div>
+          <div class="ch-form-divider">Options</div>
+          <div class="ch-form-row">
+            <label>Minimum alert level</label>
+            <select id="my-ch-min-level">
+              <option value="WARNING">WARNING (and above)</option>
+              <option value="CRITICAL">CRITICAL only</option>
+            </select>
+          </div>
+          <div class="ch-form-row">
+            <label style="display:flex;align-items:center;gap:6px;cursor:pointer">
+              <input type="checkbox" id="my-ch-private"> Private — visible only to you
+            </label>
+          </div>
+          <div id="my-ch-modal-status" class="ch-modal-status"></div>
+          <div class="ch-modal-footer">
+            <button class="btn-save" style="background:#888" onclick="closeMyChModal()">Cancel</button>
+            <button class="btn-save" onclick="saveMyChannel()">Save</button>
+          </div>
+        </div>
+      </div>
+      {% endif %}
+
+      <!-- Appearance -->
+      <div class="section">
+        <h2>Appearance</h2>
+        <div class="settings-row">
+          <span class="settings-label">Theme</span>
+          <div class="theme-btns">
+            <button class="theme-btn" data-theme-val="auto"  onclick="setTheme('auto')">Auto</button>
+            <button class="theme-btn" data-theme-val="light" onclick="setTheme('light')">Light</button>
+            <button class="theme-btn" data-theme-val="dark"  onclick="setTheme('dark')">Dark</button>
+          </div>
+        </div>
+      </div>

      <!-- Host access -->
      <div class="section">
@@ -326,5 +585,258 @@
      </div>

    </div>
+    <script>
+      // ---- Theme ----
+      function applyTheme(pref) {
+        var dark = pref === 'dark' ||
+          (pref === 'auto' && window.matchMedia('(prefers-color-scheme: dark)').matches);
+        if (dark) { document.documentElement.setAttribute('data-theme', 'dark'); }
+        else { document.documentElement.removeAttribute('data-theme'); }
+      }
+      function setTheme(pref) {
+        try { localStorage.setItem('hbd_theme', pref); } catch(e) {}
+        applyTheme(pref);
+        document.querySelectorAll('.theme-btn').forEach(function(b) {
+          b.classList.toggle('active', b.dataset.themeVal === pref);
+        });
+      }
+      (function() {
+        var pref = 'auto';
+        try { pref = localStorage.getItem('hbd_theme') || 'auto'; } catch(e) {}
+        document.querySelectorAll('.theme-btn').forEach(function(b) {
+          b.classList.toggle('active', b.dataset.themeVal === pref);
+        });
+      })();
+
+      // ---- Identity ----
+      async function saveIdentity() {
+        const full_name = document.getElementById('profile-fullname').value;
+        const avatar = document.getElementById('profile-avatar').value;
+        const resp = await fetch('/api/0/users/me', {
+          method: 'PUT',
+          headers: {'Content-Type': 'application/json'},
+          body: JSON.stringify({full_name, avatar}),
+        });
+        if (resp.ok) {
+          showStatus('identity-status', 'Saved', '#2e7d32');
+        } else {
+          const err = await resp.json().catch(() => ({}));
+          showStatus('identity-status', err.error || 'Error saving', '#c62828');
+        }
+      }
+
+      // ---- Password ----
+      async function changePassword() {
+        const current = document.getElementById('profile-current-pw').value;
+        const newpw = document.getElementById('profile-new-pw').value;
+        if (!current || !newpw) {
+          showStatus('password-status', 'Both fields are required', '#c62828');
+          return;
+        }
+        const resp = await fetch('/api/0/users/me', {
+          method: 'PUT',
+          headers: {'Content-Type': 'application/json'},
+          body: JSON.stringify({password: {current, new: newpw}}),
+        });
+        if (resp.ok) {
+          document.getElementById('profile-current-pw').value = '';
+          document.getElementById('profile-new-pw').value = '';
+          showStatus('password-status', 'Password changed', '#2e7d32');
+        } else {
+          const err = await resp.json().catch(() => ({}));
+          showStatus('password-status', err.error || 'Error', '#c62828');
+        }
+      }
+
+      // ---- Channel chip picker ----
+      function toggleChip(btn) {
+        const name = btn.dataset.ch;
+        const isSelected = btn.classList.contains('selected');
+        if (isSelected) {
+          // Move to available
+          btn.classList.remove('selected');
+          btn.classList.add('available');
+          btn.innerHTML = '+ ' + escHtml(name);
+          btn.onclick = function() { toggleChip(this); };
+          document.getElementById('available-chips').appendChild(btn);
+          // Remove "None selected" placeholder if it exists
+        } else {
+          // Move to selected
+          btn.classList.remove('available');
+          btn.classList.add('selected');
+          btn.innerHTML = escHtml(name) + ' <span class="ch-chip-x">×</span>';
+          btn.onclick = function() { toggleChip(this); };
+          document.getElementById('selected-chips').appendChild(btn);
+        }
+        // Update placeholder visibility
+        const sel = document.getElementById('selected-chips');
+        const placeholder = sel.querySelector('span[style]');
+        const hasChips = sel.querySelectorAll('.ch-chip.selected').length > 0;
+        if (placeholder) placeholder.style.display = hasChips ? 'none' : '';
+      }
+
+      async function saveChannels() {
+        const notification_channels = [
+          ...document.querySelectorAll('#selected-chips .ch-chip.selected')
+        ].map(b => b.dataset.ch);
+        const resp = await fetch('/api/0/users/me', {
+          method: 'PUT',
+          headers: {'Content-Type': 'application/json'},
+          body: JSON.stringify({notification_channels}),
+        });
+        if (resp.ok) {
+          showStatus('channels-status', 'Saved', '#2e7d32');
+        } else {
+          const err = await resp.json().catch(() => ({}));
+          showStatus('channels-status', err.error || 'Error saving', '#c62828');
+        }
+      }
+
+      // ---- My Channels CRUD ----
+      let _myChSchemas = {};
+      let _myChEditName = null;
+
+      async function _loadMyChSchemas() {
+        try {
+          const r = await fetch('/api/0/notification_channel_types');
+          _myChSchemas = await r.json();
+          const sel = document.getElementById('my-ch-type');
+          if (!sel) return;
+          Object.entries(_myChSchemas).forEach(([k, v]) => {
+            const opt = document.createElement('option');
+            opt.value = k; opt.textContent = v.label;
+            sel.appendChild(opt);
+          });
+        } catch(e) { console.warn('Could not load channel schemas', e); }
+      }
+
+      function onMyChTypeChange() {
+        const type = document.getElementById('my-ch-type').value;
+        const container = document.getElementById('my-ch-type-fields');
+        container.innerHTML = '';
+        if (!type || !_myChSchemas[type]) return;
+        const divider = document.createElement('div');
+        divider.className = 'ch-form-divider';
+        divider.textContent = _myChSchemas[type].label + ' settings';
+        container.appendChild(divider);
+        (_myChSchemas[type].fields || []).forEach(sf => {
+          const row = document.createElement('div');
+          row.className = 'ch-form-row';
+          const lbl = document.createElement('label');
+          lbl.textContent = sf.label + (sf.required ? ' *' : '');
+          const inp = document.createElement('input');
+          inp.type = sf.type === 'secret' ? 'password' : 'text';
+          inp.id = 'mychf-' + sf.key;
+          inp.placeholder = sf.required ? '(required)' : '(optional)';
+          inp.autocomplete = 'off';
+          row.appendChild(lbl);
+          row.appendChild(inp);
+          container.appendChild(row);
+        });
+      }
+
+      async function openMyChModal(name) {
+        _myChEditName = name || null;
+        document.getElementById('my-ch-modal-status').textContent = '';
+        document.getElementById('my-ch-modal-title').textContent = name ? 'Edit Channel' : 'New Channel';
+        document.getElementById('my-ch-name').value = name || '';
+        document.getElementById('my-ch-name').disabled = !!name;
+        document.getElementById('my-ch-type').value = '';
+        document.getElementById('my-ch-type-fields').innerHTML = '';
+        document.getElementById('my-ch-min-level').value = 'WARNING';
+        document.getElementById('my-ch-private').checked = false;
+
+        if (name) {
+          try {
+            const r = await fetch('/api/0/notification_channels');
+            const channels = await r.json();
+            const ch = channels.find(c => c.name === name);
+            if (ch) {
+              document.getElementById('my-ch-type').value = ch.type;
+              onMyChTypeChange();
+              document.getElementById('my-ch-min-level').value = ch.min_level || 'WARNING';
+              document.getElementById('my-ch-private').checked = ch.private || false;
+              (ch.fields || []).forEach(f => {
+                const inp = document.getElementById('mychf-' + f.key);
+                if (inp) inp.value = f.value || '';
+              });
+            }
+          } catch(e) { console.warn('Failed to load channel', e); }
+        }
+        document.getElementById('my-ch-modal').style.display = 'flex';
+      }
+
+      function closeMyChModal() {
+        document.getElementById('my-ch-modal').style.display = 'none';
+      }
+
+      async function saveMyChannel() {
+        const name = document.getElementById('my-ch-name').value.trim();
+        const type = document.getElementById('my-ch-type').value;
+        const minLevel = document.getElementById('my-ch-min-level').value;
+        const isPrivate = document.getElementById('my-ch-private').checked;
+        const statusEl = document.getElementById('my-ch-modal-status');
+        statusEl.textContent = '';
+
+        if (!name) { statusEl.textContent = 'Name is required.'; statusEl.style.color = '#c62828'; return; }
+        if (!type) { statusEl.textContent = 'Please select a type.'; statusEl.style.color = '#c62828'; return; }
+
+        const body = { name, type, min_level: minLevel, private: isPrivate };
+        if (_myChSchemas[type]) {
+          (_myChSchemas[type].fields || []).forEach(sf => {
+            const inp = document.getElementById('mychf-' + sf.key);
+            if (inp) body[sf.key] = inp.value;
+          });
+        }
+
+        const isEdit = !!_myChEditName;
+        const url = isEdit
+          ? '/api/0/notification_channels/' + encodeURIComponent(_myChEditName)
+          : '/api/0/notification_channels';
+        const method = isEdit ? 'PUT' : 'POST';
+        try {
+          const r = await fetch(url, { method, headers: {'Content-Type': 'application/json'}, body: JSON.stringify(body) });
+          if (r.ok) {
+            closeMyChModal();
+            window.location.reload();
+          } else {
+            const err = await r.json().catch(() => ({}));
+            statusEl.textContent = err.error || 'Error saving.';
+            statusEl.style.color = '#c62828';
+          }
+        } catch(e) {
+          statusEl.textContent = 'Network error: ' + e.message;
+          statusEl.style.color = '#c62828';
+        }
+      }
+
+      async function deleteMyChannel(name) {
+        if (!confirm('Delete channel "' + name + '"?')) return;
+        try {
+          const r = await fetch('/api/0/notification_channels/' + encodeURIComponent(name), { method: 'DELETE' });
+          if (r.ok) {
+            window.location.reload();
+          } else {
+            const err = await r.json().catch(() => ({}));
+            alert('Error: ' + (err.error || 'Could not delete.'));
+          }
+        } catch(e) { alert('Network error: ' + e.message); }
+      }
+
+      // ---- Utilities ----
+      function showStatus(id, msg, color) {
+        const el = document.getElementById(id);
+        if (!el) return;
+        el.textContent = msg;
+        el.style.color = color;
+        setTimeout(() => { el.textContent = ''; }, 3000);
+      }
+
+      function escHtml(s) {
+        return String(s).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/"/g,'&quot;');
+      }
+
+      document.addEventListener('DOMContentLoaded', _loadMyChSchemas);
+    </script>
  </body>
 </html>
@@ -36,6 +36,7 @@ class ComparisonOperator(Enum):
    LTE = "<="      # Less than or equal
    EQ = "=="       # Equal to
    NEQ = "!="      # Not equal to
+    NAGIOS = "nagios"  # Nagios exit-code semantics: 0=OK 1=WARNING 2=CRITICAL 3=UNKNOWN


 class AlertState:
@@ -57,6 +58,7 @@ class AlertState:
        self.last_notification = None
        self.threshold_value = None  # The threshold value that triggered alert
        self.operator = None  # The comparison operator (>, <, >=, etc.)
+        self.hysteresis: Optional[float] = None  # Hysteresis fraction used for recovery
        self.formatted_message = None  # Formatted display message for UI
        self.acknowledged = False  # Whether alert has been acknowledged
        self.acknowledged_at = None  # Timestamp when acknowledged
@@ -152,6 +154,15 @@ class AlertState:
        if self.formatted_message is not None:
            result["formatted_message"] = self.formatted_message

+        # Compute and expose the recovery threshold so the UI can display it
+        if (self.hysteresis and self.threshold_value is not None
+                and self.operator is not None):
+            ha = abs(self.threshold_value * self.hysteresis)
+            if self.operator in ('>', '>='):
+                result["recovery_threshold"] = round(self.threshold_value - ha, 4)
+            elif self.operator in ('<', '<='):
+                result["recovery_threshold"] = round(self.threshold_value + ha, 4)
+
        return result
    
    def __setstate__(self, state):
@@ -159,6 +170,8 @@ class AlertState:
        self.__dict__.update(state)
        if not hasattr(self, 'consecutive_count'):
            self.consecutive_count = 0
+        if not hasattr(self, 'hysteresis'):
+            self.hysteresis = None

    def acknowledge(self):
        """Acknowledge this alert to stop reminder notifications."""
@@ -182,6 +195,7 @@ class ThresholdConfig:
        hysteresis: float = 0.0,
        enabled: bool = True,
        count: int = 1,
+        grace: Optional[float] = None,
    ):
        """
        Initialize threshold configuration.
@@ -194,6 +208,7 @@ class ThresholdConfig:
            hysteresis: Hysteresis percentage to prevent flapping (0.0-1.0)
            enabled: Whether this threshold is enabled
            count: Number of consecutive exceedances required before alerting (default 1)
+            grace: Per-metric grace period in seconds; overrides global grace when set
        """
        self.metric_path = metric_path
        self.warning = warning
@@ -202,6 +217,7 @@ class ThresholdConfig:
        self.hysteresis = hysteresis
        self.display = display
        self.count = max(1, int(count))
+        self.grace = float(grace) if grace is not None else None
        
        # Parse operator
        try:
@@ -227,6 +243,16 @@ class ThresholdConfig:
        if not self.enabled:
            return AlertLevel.OK

+        # Nagios exit-code semantics: value IS the severity
+        if self.operator == ComparisonOperator.NAGIOS:
+            try:
+                code = int(value)
+            except (TypeError, ValueError):
+                return AlertLevel.UNKNOWN
+            return {0: AlertLevel.OK, 1: AlertLevel.WARNING, 2: AlertLevel.CRITICAL}.get(
+                code, AlertLevel.UNKNOWN
+            )
+
        try:
            # Convert value to float for comparison
            value = float(value)
@@ -263,6 +289,10 @@ class ThresholdConfig:
        """
        new_level = self.evaluate(value)

+        # Nagios exit codes are discrete integers — hysteresis doesn't apply
+        if self.operator == ComparisonOperator.NAGIOS:
+            return new_level
+
        # If no hysteresis, return new level
        if self.hysteresis == 0.0:
            return new_level
@@ -396,10 +426,24 @@ class ThresholdChecker:
        Supports two formats:
        1. Legacy format with direct 'thresholds' section
        2. New format with 'threshold_configs' and 'host_threshold_mapping'
+
+        In all cases, THRESHOLD_DEFAULTS are seeded into threshold_configs["default"]
+        so the Settings page always shows the built-in defaults.
+        _parse_multi_config() overwrites this with the fully-merged effective defaults.
        """
+        # Always expose built-in defaults through threshold_configs["default"] so
+        # the Settings page has something to display even in legacy/no-config mode.
+        seed: Dict[str, ThresholdConfig] = {}
+        for plugin_name, plugin_thresholds in THRESHOLD_DEFAULTS.get("thresholds", {}).items():
+            if isinstance(plugin_thresholds, dict):
+                self._parse_plugin_thresholds(plugin_name, plugin_thresholds, target_dict=seed)
+        if seed:
+            self.threshold_configs["default"] = seed
+            self.threshold_raw_configs["default"] = {}
+
        # Check for new multi-config format
        if "threshold_configs" in config:
-            self._parse_multi_config(config)
+            self._parse_multi_config(config)  # overwrites threshold_configs["default"]
        elif "thresholds" in config:
            # Legacy single threshold configuration
            self._parse_legacy_config(config)
@@ -451,7 +495,27 @@ class ThresholdChecker:
            raw_overrides: Dict[str, ThresholdConfig] = {}
            thresholds_config = config_data["thresholds"]
            for plugin_name, plugin_thresholds in thresholds_config.items():
-                if isinstance(plugin_thresholds, dict):
+                if not isinstance(plugin_thresholds, dict):
+                    continue
+                plugin_enabled = plugin_thresholds.get('enabled', plugin_thresholds.get('enable', True))
+                if not plugin_enabled:
+                    # raw_overrides is empty at this point so there's nothing to delete.
+                    # Instead, inject disabled stubs for every matching effective_default so
+                    # the merge step overwrites the inherited defaults.
+                    for key, tc in effective_defaults.items():
+                        if key.startswith(f"{plugin_name}."):
+                            raw_overrides[key] = ThresholdConfig(
+                                metric_path=key,
+                                warning=tc.warning,
+                                critical=tc.critical,
+                                operator=tc.operator.value,
+                                enabled=False,
+                            )
+                    logger.info(
+                        "Plugin-level disable in config '%s': disabled all thresholds for %s",
+                        config_name, plugin_name,
+                    )
+                else:
                    self._parse_plugin_thresholds(plugin_name, plugin_thresholds, target_dict=raw_overrides)
            self.threshold_raw_configs[config_name] = raw_overrides

@@ -530,14 +594,26 @@ class ThresholdChecker:
            self._parse_rtt_thresholds(thresholds, target_dict)
            return

+        # Plugin-level enabled: false (also accept 'enable' as a common typo) removes all
+        # thresholds for this plugin — e.g. memory_monitor: {enabled: false}.
+        plugin_enabled = thresholds.get('enabled', thresholds.get('enable', True))
+        if not plugin_enabled:
+            for key in [k for k in target_dict if k.startswith(f"{plugin_name}.")]:
+                del target_dict[key]
+            logger.info("Plugin-level disable: removed all thresholds for %s", plugin_name)
+            return
+
        for metric_name, threshold_config in thresholds.items():
            if not isinstance(threshold_config, dict):
                continue
            
-            # Handle nested metrics (e.g., partitions./.percent)
+            # Handle nested metrics (e.g., partitions./.percent or pools.*.status)
            if metric_name == "partitions":
                self._parse_partition_thresholds(plugin_name, threshold_config, target_dict)
                continue
+            if metric_name == "pools":
+                self._parse_pool_thresholds(plugin_name, threshold_config, target_dict)
+                continue
            
            metric_path = f"{plugin_name}.{metric_name}"
            
@@ -545,11 +621,15 @@ class ThresholdChecker:
            warning = threshold_config.get("warning")
            critical = threshold_config.get("critical")
            operator = threshold_config.get("operator", ">")
-            display = threshold_config.get("display", "(threshold: {op_symbol} {threshold_value})")
-            hysteresis = threshold_config.get("hysteresis", 0.1)  # 10% default
+            # Nagios operator maps exit codes directly; no numeric thresholds needed
+            is_nagios_op = (operator == "nagios")
+            default_display = "{check_name}: {output}" if is_nagios_op else "(threshold: {op_symbol} {threshold_value})"
+            display = threshold_config.get("display", default_display)
+            hysteresis = threshold_config.get("hysteresis", 0.0 if is_nagios_op else 0.02)
            enabled = threshold_config.get("enabled", True)
+            grace = threshold_config.get("grace", None)

-            if warning is None and critical is None:
+            if warning is None and critical is None and not is_nagios_op:
                logger.warning("No thresholds defined for %s, skipping", metric_path)
                continue

@@ -560,7 +640,8 @@ class ThresholdChecker:
                operator=operator,
                hysteresis=hysteresis,
                enabled=enabled,
-                display=display
+                display=display,
+                grace=grace,
            )
            
            target_dict[metric_path] = threshold
@@ -605,6 +686,7 @@ class ThresholdChecker:
                hysteresis = threshold_config.get("hysteresis", 0.1)
                enabled = threshold_config.get("enabled", True)
                display = threshold_config.get("display")
+                grace = threshold_config.get("grace", None)
                if warning is None and critical is None:
                    continue

@@ -615,11 +697,64 @@ class ThresholdChecker:
                    operator=operator,
                    hysteresis=hysteresis,
                    enabled=enabled,
-                    display=display 
+                    display=display,
+                    grace=grace,
                )
                
                target_dict[metric_path] = threshold

+    def _parse_pool_thresholds(
+        self,
+        plugin_name: str,
+        pools: Dict[str, Any],
+        target_dict: Optional[Dict[str, ThresholdConfig]] = None,
+    ):
+        """Parse ZFS pool thresholds.  Pool names may be literal or '*' (all pools).
+
+        Config shape::
+
+            zfs_monitor:
+              pools:
+                '*':
+                  status:
+                    warning: 1
+                    critical: 2
+                    operator: '>'
+                tank:
+                  capacity:
+                    warning: 80
+                    critical: 90
+        """
+        if target_dict is None:
+            target_dict = self.thresholds
+
+        for pool_name, metrics in pools.items():
+            if not isinstance(metrics, dict):
+                continue
+            for metric_name, threshold_config in metrics.items():
+                if not isinstance(threshold_config, dict):
+                    continue
+                metric_path = f"{plugin_name}.{pool_name}.{metric_name}"
+                warning = threshold_config.get("warning")
+                critical = threshold_config.get("critical")
+                operator = threshold_config.get("operator", ">")
+                hysteresis = threshold_config.get("hysteresis", 0.02)
+                enabled = threshold_config.get("enabled", True)
+                display = threshold_config.get("display")
+                grace = threshold_config.get("grace", None)
+                if warning is None and critical is None:
+                    continue
+                target_dict[metric_path] = ThresholdConfig(
+                    metric_path=metric_path,
+                    warning=warning,
+                    critical=critical,
+                    operator=operator,
+                    hysteresis=hysteresis,
+                    enabled=enabled,
+                    display=display,
+                    grace=grace,
+                )
+
    def _parse_rtt_thresholds(
        self,
        rtt_thresholds: Dict[str, Any],
@@ -649,10 +784,11 @@ class ThresholdChecker:
        warning = rtt_thresholds.get("warning")
        critical = rtt_thresholds.get("critical")
        operator = rtt_thresholds.get("operator", ">")
-        hysteresis = rtt_thresholds.get("hysteresis", 0.1)  # 10% default
+        hysteresis = rtt_thresholds.get("hysteresis", 0.02)  # 2% default
        enabled = rtt_thresholds.get("enabled", True)
        display = rtt_thresholds.get("display")
        count = rtt_thresholds.get("count", 1)
+        grace = rtt_thresholds.get("grace", None)

        if warning is None and critical is None:
            logger.warning("No RTT thresholds defined, skipping")
@@ -667,6 +803,7 @@ class ThresholdChecker:
            enabled=enabled,
            display=display,
            count=count,
+            grace=grace,
        )

        target_dict[metric_path] = threshold
@@ -794,6 +931,12 @@ class ThresholdChecker:
        elif new_level == AlertLevel.WARNING and threshold.warning is not None:
            threshold_value = threshold.warning

+        # Keep hysteresis on the state so the UI can show the recovery threshold
+        if new_level != AlertLevel.OK:
+            alert_state.hysteresis = threshold.hysteresis
+        else:
+            alert_state.hysteresis = None
+
        # Update state and check for changes
        old_level = alert_state.level
        if alert_state.update(new_level, value, threshold_value, threshold.operator.value):
@@ -805,26 +948,33 @@ class ThresholdChecker:
        return None
    def _find_threshold(
        self, thresholds: Dict[str, "ThresholdConfig"], metric_path: str
-    ) -> Optional["ThresholdConfig"]:
-        """Return the threshold for *metric_path*, falling back to suffix matches.
+    ) -> Tuple[Optional["ThresholdConfig"], Optional[str]]:
+        """Return (threshold, check_name) for *metric_path*, falling back to suffix matches.

-        Allows generic thresholds like ``ping_monitor.rtt_avg`` to match
-        fully-qualified paths like ``ping_monitor.8_8_8_8_rtt_avg``.
+        Allows generic thresholds like ``nagios_runner.status_code`` to match
+        fully-qualified paths like ``nagios_runner.check_disk_root_status_code``.
        The exact match is always tried first; then successive leading
        underscore-delimited segments are stripped from the field name until
        a match is found or no segments remain.
+
+        Returns:
+            (ThresholdConfig, None) for an exact match.
+            (ThresholdConfig, "check_disk_root") for a suffix match — the second
+            element is the stripped prefix, available as ``{check_name}`` in
+            display format templates.
+            (None, None) when no threshold is found.
        """
        if metric_path in thresholds:
-            return thresholds[metric_path]
+            return thresholds[metric_path], None
        plugin, sep, field = metric_path.partition(".")
        if not sep:
-            return None
+            return None, None
        parts = field.split("_")
        for i in range(1, len(parts)):
            candidate = plugin + "." + "_".join(parts[i:])
            if candidate in thresholds:
-                return thresholds[candidate]
-        return None
+                return thresholds[candidate], "_".join(parts[:i])
+        return None, None

    def check_plugin_data(
        self,
@@ -854,7 +1004,7 @@ class ThresholdChecker:
        for metric_name, value in data.items():
            metric_path = f"{plugin_name}.{metric_name}"

-            threshold = self._find_threshold(thresholds, metric_path)
+            threshold, check_name = self._find_threshold(thresholds, metric_path)
            if threshold is None:
                continue

@@ -877,13 +1027,15 @@ class ThresholdChecker:
            elif new_level == AlertLevel.WARNING and threshold.warning is not None:
                threshold_value = threshold.warning

+            alert_state.hysteresis = threshold.hysteresis if new_level != AlertLevel.OK else None
+
            # Update state and check for changes
            old_level = alert_state.level
            if alert_state.update(new_level, value, threshold_value, threshold.operator.value):
                state_changes.append((metric_path, old_level, new_level, value))
-                self._apply_grace(host_name, alert_state, metric_path, old_level, new_level, value, threshold, data)
+                self._apply_grace(host_name, alert_state, metric_path, old_level, new_level, value, threshold, data, check_name=check_name, metric_name=metric_name)
            elif new_level != AlertLevel.OK:
-                self._check_pending_or_renotify(host_name, alert_state, metric_path, value, threshold, data)
+                self._check_pending_or_renotify(host_name, alert_state, metric_path, value, threshold, data, check_name=check_name, metric_name=metric_name)

        # Check nested metrics (e.g., partition data in disk_monitor)
        self._check_nested_metrics(
@@ -908,6 +1060,44 @@ class ThresholdChecker:
        # Get host-specific thresholds
        thresholds = self.get_thresholds_for_host(host_name)
        
+        # ZFS pool health checks
+        if plugin_name == "zfs_monitor" and "pools" in data:
+            pools = data["pools"]
+            if isinstance(pools, dict):
+                for pool_name, pool_metrics in pools.items():
+                    if not isinstance(pool_metrics, dict):
+                        continue
+                    # Synthesize status from health string for older clients
+                    # that predate the status field.
+                    pool_metrics_effective = dict(pool_metrics)
+                    if "health" in pool_metrics and "status" not in pool_metrics:
+                        pool_metrics_effective["status"] = 0 if pool_metrics["health"] == "ONLINE" else 1
+                    for metric_name, value in pool_metrics_effective.items():
+                        # Try specific pool name first, then wildcard '*'
+                        metric_path = f"{plugin_name}.{pool_name}.{metric_name}"
+                        wildcard_path = f"{plugin_name}.*.{metric_name}"
+                        threshold = thresholds.get(metric_path) or thresholds.get(wildcard_path)
+                        if threshold is None:
+                            continue
+                        if metric_path not in alert_states:
+                            alert_states[metric_path] = AlertState(metric_path)
+                        alert_state = alert_states[metric_path]
+                        new_level = threshold.evaluate_with_hysteresis(value, alert_state.level)
+                        threshold_value = None
+                        if new_level == AlertLevel.CRITICAL and threshold.critical is not None:
+                            threshold_value = threshold.critical
+                        elif new_level == AlertLevel.WARNING and threshold.warning is not None:
+                            threshold_value = threshold.warning
+                        alert_state.hysteresis = threshold.hysteresis if new_level != AlertLevel.OK else None
+                        pool_context = dict(pool_metrics_effective)
+                        pool_context["pool_name"] = pool_name
+                        old_level = alert_state.level
+                        if alert_state.update(new_level, value, threshold_value, threshold.operator.value):
+                            state_changes.append((metric_path, old_level, new_level, value))
+                            self._apply_grace(host_name, alert_state, metric_path, old_level, new_level, value, threshold, pool_context, metric_name=pool_name)
+                        elif new_level != AlertLevel.OK:
+                            self._check_pending_or_renotify(host_name, alert_state, metric_path, value, threshold, pool_context, metric_name=pool_name)
+
        # Look for partition data in disk_monitor
        if plugin_name == "disk_monitor" and "partitions" in data:
            partitions = data["partitions"]
@@ -943,6 +1133,8 @@ class ThresholdChecker:
                    elif new_level == AlertLevel.WARNING and threshold.warning is not None:
                        threshold_value = threshold.warning

+                    alert_state.hysteresis = threshold.hysteresis if new_level != AlertLevel.OK else None
+
                    old_level = alert_state.level
                    if alert_state.update(new_level, value, threshold_value, threshold.operator.value):
                        state_changes.append((metric_path, old_level, new_level, value))
@@ -959,6 +1151,8 @@ class ThresholdChecker:
        value: Any,
        threshold: ThresholdConfig,
        plugin_data: Optional[Dict[str, Any]] = None,
+        check_name: Optional[str] = None,
+        metric_name: Optional[str] = None,
    ):
        """Trigger a notification for an alert state change.
        
@@ -981,54 +1175,52 @@ class ThresholdChecker:
        # Format operator symbol
        op_symbol = threshold.operator.value

+        # Short metric label: strip the plugin-name prefix and _status_code suffix
+        short_path = (metric_path.partition(".")[2] or metric_path).removesuffix("_status_code")
+
        # Use a display-friendly value (inf is the sentinel for "overdue")
        import math
        display_value = "overdue" if isinstance(value, float) and math.isinf(value) else value

-        # Format message
+        # Format message — for the nagios operator there is no numeric threshold_value;
+        # render the display template whenever one is available.
+        has_display = threshold_value is not None or threshold.operator == ComparisonOperator.NAGIOS
+
+        def _fmt():
+            return self._format_display(
+                threshold.display,
+                value=display_value,
+                threshold_value=threshold_value,
+                op_symbol=op_symbol,
+                plugin_data=plugin_data,
+                check_name=check_name,
+                metric_name=metric_name,
+            )
+
        if new_level == AlertLevel.OK:
            lvl = "RECOVER"
-            message = f"{metric_path} = {display_value} ({old_level.name} -> OK)"
+            message = f"{short_path} = {display_value} ({old_level.name} -> OK)"
        elif new_level == AlertLevel.WARNING:
            lvl = "WARNING"
-            if threshold_value is not None:
-                threshold_info = self._format_display(
-                    threshold.display,
-                    value=display_value,
-                    threshold_value=threshold_value,
-                    op_symbol=op_symbol,
-                    plugin_data=plugin_data
-                )
-                message = f"{metric_path} = {display_value} {threshold_info}"
+            if has_display:
+                message = f"{short_path} = {display_value} {_fmt()}"
            else:
-                message = f"{metric_path} = {display_value}"
+                message = f"{short_path} = {display_value}"
        elif new_level == AlertLevel.CRITICAL:
            lvl = "CRITICAL"
-            if threshold_value is not None:
-                threshold_info = self._format_display(
-                    threshold.display,
-                    value=display_value,
-                    threshold_value=threshold_value,
-                    op_symbol=op_symbol,
-                    plugin_data=plugin_data
-                )
-                message = f"{metric_path} = {display_value} {threshold_info}"
+            if has_display:
+                message = f"{short_path} = {display_value} {_fmt()}"
            else:
-                message = f"{metric_path} = {display_value}"
+                message = f"{short_path} = {display_value}"
        else:
            lvl = "UNKNOWN"
-            message = f"{metric_path} = {display_value}"
+            if has_display:
+                message = f"{short_path} = {display_value} {_fmt()}"
+            else:
+                message = f"{short_path} = {display_value}"

-        # Return the formatted threshold info for storing in AlertState
-        formatted_threshold_msg = None
-        if threshold_value is not None and new_level != AlertLevel.OK:
-            formatted_threshold_msg = self._format_display(
-                threshold.display,
-                value=display_value,
-                threshold_value=threshold_value,
-                op_symbol=op_symbol,
-                plugin_data=plugin_data
-            )
+        # Formatted threshold info stored on AlertState for the UI
+        formatted_threshold_msg = _fmt() if has_display and new_level != AlertLevel.OK else None

        return lvl, message, formatted_threshold_msg
    
@@ -1048,11 +1240,16 @@ class ThresholdChecker:
        if host is not None and not host.watched:
            eventlog(host_name, lvl, message, service="threshold")
            return
+        short_path = (metric_path.partition(".")[2] or metric_path).removesuffix("_status_code")
+        title = f"[{lvl}] {host_name}  {short_path}"
+        # Strip the "metric = " prefix from message so body is just the value/detail
+        prefix = short_path + " = "
+        body = message[len(prefix):] if message.startswith(prefix) else message
        asyncio.get_event_loop().create_task(notify_mod.send_notification(
            host_name,
            notify_mod.Notification(
-                title=f"[{lvl}] {host_name}",
-                body=message,
+                title=title,
+                body=body,
                level=lvl,
            ),
        ))
@@ -1077,33 +1274,62 @@ class ThresholdChecker:
        self,
        display_format: str,
        value: Any,
-        threshold_value: float,
+        threshold_value: Optional[float],
        op_symbol: str,
        plugin_data: Optional[Dict[str, Any]] = None,
+        check_name: Optional[str] = None,
+        metric_name: Optional[str] = None,
    ) -> str:
        """Format the display string using available data.

-        Args:
-            display_format: Format string from threshold config
-            value: Current metric value
-            threshold_value: Threshold value that was exceeded
-            op_symbol: Comparison operator symbol
-            plugin_data: Optional dictionary of plugin data fields
+        Available template variables:
+            {value}           - current metric value
+            {threshold_value} - threshold that was exceeded
+            {op_symbol}       - comparison operator (>, <, >=, <=, ==, !=)
+            {check_name}      - prefix stripped for generic threshold match
+                                (e.g. "check_disk_root" when metric
+                                "check_disk_root_status_code" matched generic
+                                threshold "status_code")
+            {metric_name}     - field name within the plugin data dict
+            Any key from plugin_data is also available.

        Returns:
            Formatted display string
        """
+        if not display_format:
+            display_format = "(threshold: {op_symbol} {threshold_value})" if threshold_value is not None else ""
+
        # Build format context with standard variables
        format_context = {
            'value': value,
-            'threshold_value': threshold_value,
            'op_symbol': op_symbol,
        }
+        if threshold_value is not None:
+            format_context['threshold_value'] = threshold_value
+
+        # Add generic-match context variables when available
+        if check_name is not None:
+            format_context['check_name'] = check_name
+        if metric_name is not None:
+            format_context['metric_name'] = metric_name

        # Add all plugin data fields if available
        if plugin_data:
            format_context.update(plugin_data)

+        # For nagios_runner generic matches, expose the matched check's output
+        # and status as short aliases {output} and {status} so display templates
+        # don't need to use the full {check_disk_root_output} form.
+        if check_name and plugin_data:
+            if 'output' not in format_context:
+                output = plugin_data.get(f"{check_name}_output")
+                if output is not None:
+                    format_context['output'] = output
+            if 'status' not in format_context:
+                status = plugin_data.get(f"{check_name}_status")
+                if status is not None:
+                    format_context['status'] = status
+        
        try:
            # Format the display string
            return display_format.format(**format_context)
@@ -1133,10 +1359,14 @@ class ThresholdChecker:
        value: Any,
        threshold: ThresholdConfig,
        plugin_data: Optional[Dict[str, Any]],
+        check_name: Optional[str] = None,
+        metric_name: Optional[str] = None,
    ) -> None:
        """Handle a state-change transition with grace-period logic.

-        Transitioning INTO alert (worsening): defers the notification for grace_seconds.
+        Transitioning INTO alert (worsening): defers the notification for the effective
+        grace period (threshold.grace if set, else self.grace_seconds). Grace of 0 fires
+        the notification immediately with no deferral.
        De-escalation within alert states (e.g. CRITICAL→WARNING): no new notification;
          the metric is still alerting so no RECOVER was sent.
        Transitioning TO OK:
@@ -1144,8 +1374,11 @@ class ThresholdChecker:
            and the recovery — the spike never warranted a page.
          - Past grace: fires the RECOVER notification normally.
        """
+        effective_grace = threshold.grace if threshold.grace is not None else self.grace_seconds
+
        lvl, message, formatted_msg = self._trigger_notification(
-            host_name, metric_path, old_level, new_level, value, threshold, plugin_data
+            host_name, metric_path, old_level, new_level, value, threshold, plugin_data,
+            check_name=check_name, metric_name=metric_name,
        )
        alert_state.formatted_message = formatted_msg

@@ -1153,17 +1386,24 @@ class ThresholdChecker:
            if alert_state.pending_since is not None:
                logger.info(
                    "Alert suppressed (recovered within %.0fs grace): %s on %s",
-                    self.grace_seconds, metric_path, host_name,
+                    effective_grace, metric_path, host_name,
                )
                alert_state.pending_since = None
            else:
                self._send_notification(host_name, lvl, message, metric_path, old_level, new_level, value)
        elif new_level.value > old_level.value:
-            # Worsening (OK→WARNING, OK→CRITICAL, WARNING→CRITICAL): schedule notification.
+            # Worsening (OK→WARNING, OK→CRITICAL, WARNING→CRITICAL).
+            if effective_grace <= 0:
+                # No grace period — fire immediately.
+                self._send_notification(host_name, lvl, message, metric_path, old_level, new_level, value)
+                now = time.time()
+                alert_state.last_notification = now
+                alert_state.notification_count = 1
+            else:
                alert_state.pending_since = time.time()
                logger.debug(
                    "Alert deferred (%.0fs grace): %s on %s = %s",
-                self.grace_seconds, metric_path, host_name, value,
+                    effective_grace, metric_path, host_name, value,
                )
        else:
            # De-escalation within alert states (e.g. CRITICAL→WARNING): metric is still
@@ -1181,25 +1421,43 @@ class ThresholdChecker:
        value: Any,
        threshold: ThresholdConfig,
        plugin_data: Optional[Dict[str, Any]],
+        check_name: Optional[str] = None,
+        metric_name: Optional[str] = None,
    ) -> None:
        """Called when alert level is unchanged and non-OK.

        If a deferred notification is pending and grace_seconds have elapsed,
        fires it now. Otherwise falls through to normal reminder logic.
        """
+        effective_grace = threshold.grace if threshold.grace is not None else self.grace_seconds
        if alert_state.pending_since is not None:
-            if time.time() - alert_state.pending_since >= self.grace_seconds:
+            if time.time() - alert_state.pending_since >= effective_grace:
                lvl, message, formatted_msg = self._trigger_notification(
-                    host_name, metric_path, AlertLevel.OK, alert_state.level, value, threshold, plugin_data
+                    host_name, metric_path, AlertLevel.OK, alert_state.level, value, threshold, plugin_data,
+                    check_name=check_name, metric_name=metric_name,
                )
                alert_state.formatted_message = formatted_msg
                self._send_notification(
                    host_name, lvl, message, metric_path, AlertLevel.OK, alert_state.level, value
                )
                alert_state.pending_since = None
+                now = time.time()
+                alert_state.last_notification = now
+                alert_state.notification_count = 1
            # else: still within grace window, do nothing
        else:
-            self._check_renotify(host_name, alert_state, metric_path, value, threshold, plugin_data)
+            self._check_renotify(host_name, alert_state, metric_path, value, threshold, plugin_data, check_name=check_name, metric_name=metric_name)
+
+    @staticmethod
+    def _human_duration(seconds: float) -> str:
+        s = int(seconds)
+        if s < 120:
+            return f"{s}s"
+        if s < 3600:
+            return f"{s // 60}m {s % 60}s"
+        h, rem = divmod(s, 3600)
+        m = rem // 60
+        return f"{h}h {m}m" if m else f"{h}h"

    def _check_renotify(
        self,
@@ -1209,6 +1467,8 @@ class ThresholdChecker:
        value: Any,
        threshold: ThresholdConfig,
        plugin_data: Optional[Dict[str, Any]] = None,
+        check_name: Optional[str] = None,
+        metric_name: Optional[str] = None,
    ):
        """Check if we should send a repeat notification.
        
@@ -1246,6 +1506,7 @@ class ThresholdChecker:
            
            # Format operator symbol
            op_symbol = threshold.operator.value
+            short_path = (metric_path.partition(".")[2] or metric_path).removesuffix("_status_code")

            # Time to re-notify
            if threshold_value is not None:
@@ -1255,11 +1516,14 @@ class ThresholdChecker:
                    value=value,
                    threshold_value=threshold_value,
                    op_symbol=op_symbol,
-                    plugin_data=plugin_data
+                    plugin_data=plugin_data,
+                    check_name=check_name,
+                    metric_name=metric_name,
                )
-                message = f"REMINDER ({alert_state.level.name}): {host_name} - {metric_path} = {value} {threshold_info}, ongoing for {int(now - alert_state.since)}s"
+                body = f"{value} {threshold_info}, ongoing for {self._human_duration(now - alert_state.since)}"
            else:
-                message = f"REMINDER ({alert_state.level.name}): {host_name} - {metric_path} = {value} (ongoing for {int(now - alert_state.since)}s)"
+                body = f"{value} (ongoing for {self._human_duration(now - alert_state.since)})"
+            message = f"REMINDER ({alert_state.level.name}): {host_name} - {short_path} = {body}"

            from . import hbdclass
            host = hbdclass.Host.hosts.get(host_name)
@@ -1267,8 +1531,8 @@ class ThresholdChecker:
                asyncio.get_event_loop().create_task(notify_mod.send_notification(
                    host_name,
                    notify_mod.Notification(
-                        title=f"[REMINDER/{alert_state.level.name}] {host_name}",
-                        body=message,
+                        title=f"[REMINDER/{alert_state.level.name}] {host_name}  {short_path}",
+                        body=body,
                        level=alert_state.level.name,
                    ),
                ))
@@ -1288,7 +1552,20 @@ class ThresholdChecker:
            if not host.alert_states:
                continue
            configured = self.get_thresholds_for_host(hostname)
-            stale = [mp for mp in host.alert_states if mp not in configured]
+            stale = []
+            for mp in host.alert_states:
+                # connectivity.* and rtt are managed by the connection state
+                # machine, not by threshold config — never purge them.
+                if mp == "rtt" or mp.startswith("connectivity"):
+                    continue
+                if self._find_threshold(configured, mp)[0] is not None:
+                    continue
+                # Also match wildcard pool/partition thresholds (e.g. "zfs_monitor.*.status"
+                # covers alert state "zfs_monitor.tank.status").
+                parts = mp.split(".")
+                if len(parts) == 3 and f"{parts[0]}.*.{parts[2]}" in configured:
+                    continue
+                stale.append(mp)
            for mp in stale:
                logger.info(
                    "Purging stale alert state for %s / %s (no threshold configured)",
@@ -232,6 +232,23 @@ def _make_timer_callbacks(uname, host, ctx):
    return on_overdue, on_unknown


+def _make_plugin_stale_callback(uname, ctx):
+    """Return an async callback that clears stale plugin data and its alerts."""
+    msg_to_websockets = ctx.get("msg_to_websockets")
+
+    async def on_plugin_stale(host, plugin_name):
+        host.plugin_data.pop(plugin_name, None)
+        stale_keys = [k for k in host.alert_states if k.startswith(f"{plugin_name}.")]
+        for k in stale_keys:
+            del host.alert_states[k]
+        eventlog(uname, "INFO", f"plugin data stale: {plugin_name}")
+        if msg_to_websockets:
+            msg_to_websockets("plugin_stale", {"host": uname, "plugin": plugin_name})
+            msg_to_websockets("host", host.stateinfo())
+
+    return on_plugin_stale
+
+
 def restore_connection_timers(hbdclass, ctx):
    """Restore overdue timers for all loaded connections after a pickle restore.

@@ -249,10 +266,15 @@ def restore_connection_timers(hbdclass, ctx):
        for afam, conn in list(host.connections.items()):
            state = conn.getstate()
            if state == hbdclass.Connection.DOWN:
+                _set_connectivity_alert(host, afam, "CRITICAL")
                continue

            on_overdue, on_unknown = _make_timer_callbacks(uname, host, ctx)

+            if state == hbdclass.Connection.UNKNOWN:
+                _set_connectivity_alert(host, afam, "CRITICAL")
+                continue
+
            if state == hbdclass.Connection.UP and interval > 0:
                elapsed = now - conn.lastbeat
                # Give hosts one full (interval + grace) of extra time on startup
@@ -283,6 +305,10 @@ def restore_connection_timers(hbdclass, ctx):
                        "Restored OVERDUE timer %s/%s: %.0fs remaining",
                        uname, afam, remaining,
                    )
+                # Ensure the connectivity alert is set — it may be missing if
+                # hbd was shut down before the on_overdue callback had a chance
+                # to record it.
+                _set_connectivity_alert(host, afam, "CRITICAL")
                restored += 1

    logger.info("Restored timers for %d connection(s)", restored)
@@ -333,11 +359,12 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
        # Use new config function to check dyndns
        dyndnshosts = config_mod.get_dyndnshosts(cfg)
        host.dyn = uname in dyndnshosts
+        watchhosts = config_mod.get_watchhosts(cfg)
+        host.watched = uname in watchhosts
        # Apply user-access settings from config
        access = config_mod.get_host_access(cfg, uname)
        host.apply_access(access["owner"], access["managers"], access["monitors"])
-        if verbose:
-            print(("XX: New host, num now %s" % (len(hbdcls.Host.hosts))))
+        logger.info("New host signed on: %s (dyn=%s, access=%s)", uname, host.dyn, access)
        newh = True
    else:
        host = hbdcls.Host.hosts[uname]
@@ -351,8 +378,10 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):

    if msg.get("ID") == "HTB":
        host.doesack = msg.get("acks", -1)
-        # send ACK back
+        # send ACK back; ask client to resend plugin info when we have none yet
        rmsg = {"time": time.time()}
+        if not host.plugin_data:
+            rmsg["request_update"] = 1
        opkt = dicttos("ACK", rmsg)
        try:
            transport.sendto(opkt, addr)
@@ -369,6 +398,35 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
                           if k not in ("ID", "plugin", "id", "name")}
            # Store plugin data with timestamp
            host.add_plugin_data(plugin_name, plugin_data, timestamp=now)
+            # Reset stale timer using the observed send interval for this plugin.
+            # We need two samples to know the real interval; on the first sample
+            # we cancel any leftover timer but don't set a new one, to avoid
+            # false-stale firing for slow plugins (e.g. nagios_runner at 300 s).
+            history = host.plugin_data.get(plugin_name, [])
+            if len(history) >= 2:
+                plugin_interval = max(history[-1][0] - history[-2][0], 1)
+                host.reset_plugin_timer(plugin_name, plugin_interval * 3,
+                                        _make_plugin_stale_callback(uname, ctx))
+                # Remove alert states for metrics present in the previous sample
+                # but absent now (e.g. a nagios check removed from configuration).
+                prev_keys = set(history[-2][1].keys())
+                curr_keys = set(plugin_data.keys())
+                for metric_name in prev_keys - curr_keys:
+                    metric_path = f"{plugin_name}.{metric_name}"
+                    if host.alert_states.pop(metric_path, None) is not None:
+                        eventlog(uname, "INFO", f"stale check removed: {metric_path}")
+                if (prev_keys - curr_keys) and msg_to_websockets:
+                    msg_to_websockets("host", host.stateinfo())
+            else:
+                host.cancel_plugin_timer(plugin_name)
+
+            # If os_info reports an owner and none is configured server-side, apply it
+            if plugin_name == "os_info":
+                config_owner =  config_mod.get_host_access(cfg, uname).get("owner")
+                default_owner = config_mod.get_default_owner(cfg)
+                inferred_owner = config_owner or plugin_data.get("owner") or default_owner
+                host.owner = inferred_owner
+                logger.info(f"owner for {uname} is {host.owner}")
            if DEBUG > 1:
                print(f"Stored plugin data for {uname}: {plugin_name}")
            
@@ -421,6 +479,7 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
    boot = msg.get("boot", 0)

    if boot:
+        # hbc was stared with a -b flag
        eventlog(uname, "INFO", "booted")
        if host.watched:
            asyncio.create_task(notify_mod.send_notification(
@@ -428,11 +487,24 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
                notify_mod.Notification(title=f"[INFO] {uname}", body=f"{host.name} booted", level="INFO"),
            ))
    if message:
-        eventlog(uname, "INFO", "msg: %s" % message, service=service)
+        eventlog(uname, "INFO", message, service=service)

    if conn.getstate() != hbdcls.Connection.UP:
+        # Transition to UP and log/notify if appropriate
        lasts = conn.state
        d = conn.newstate(hbdcls.Connection.UP, now)
+        # On reboot, pre-boot plugin data and derived alerts are stale.
+        # Cancel all plugin timers and wipe plugin state so timers restart
+        # cleanly from the first two post-boot samples.
+        for pname in list(host.plugin_timers):
+            host.cancel_plugin_timer(pname)
+        host.plugin_data.clear()
+        stale_plugin_keys = [
+            k for k in host.alert_states
+            if k not in ("rtt",) and not k.startswith("connectivity.")
+        ]
+        for k in stale_plugin_keys:
+            del host.alert_states[k]
        # Clear connectivity alert now that the host is back up
        _set_connectivity_alert(host, conn.afam, "OK")
        # Don't log/notify RECOVER for a brand-new host seen for the first time —
@@ -146,9 +146,14 @@ def load_users(config: dict) -> dict:
    Returns the new ``users`` dict.
    """
    global users
+    old_users = dict(users)  # snapshot before rebuild
    users_cfg = config.get("users", {})
    if not isinstance(users_cfg, dict):
        users = {}
+        # Preserve OAuth-provisioned users (password_hash == "") that aren't in config.
+        for username, existing_user in old_users.items():
+            if not existing_user.password_hash and username not in users:
+                users[username] = existing_user
        return users

    result: dict = {}
@@ -166,6 +171,10 @@ def load_users(config: dict) -> dict:
        )

    users = result
+    # Preserve OAuth-provisioned users (password_hash == "") that aren't in config.
+    for username, existing_user in old_users.items():
+        if not existing_user.password_hash and username not in users:
+            users[username] = existing_user
    logger.info("Loaded %d user(s) from config", len(users))
    return users

@@ -187,6 +196,26 @@ def authenticate(username: str, password: str) -> "User | None":
    return None


+def provision_oauth_user(username: str, full_name: str, avatar: str) -> "User":
+    """Create or update a user sourced from an OAuth2 provider.
+
+    New users are inserted with no password_hash — they can only authenticate
+    via OAuth.  Existing users (e.g. defined in config with a password) have
+    their display name and avatar refreshed; all other attributes are preserved.
+    """
+    user = users.get(username)
+    if user is None:
+        user = User(username=username, full_name=full_name, avatar=avatar)
+        users[username] = user
+        logger.info("Provisioned OAuth user %r", username)
+    else:
+        if full_name:
+            user.full_name = full_name
+        if avatar:
+            user.avatar = avatar
+    return user
+
+
 # ---------------------------------------------------------------------------
 # Session management
 # ---------------------------------------------------------------------------
@@ -85,11 +85,15 @@ async def handler(request):
            except Exception as e:
                logger.error("Error sending initial hosts: %s", e)

-        # Send recent messages
+        # Send recent messages newest-first so the client can append them in
+        # display order without reordering on arrival (tagged history=True so
+        # the client knows to append rather than prepend).
        if data.msgs:
            try:
-                for m in data.msgs:
-                    await ws.send_str(json.dumps({"type": "message", "data": m}))
+                for m in reversed(data.msgs):
+                    host_name = m.get("host") if isinstance(m, dict) else None
+                    if not host_name or _user_can_see_host(user, host_name):
+                        await ws.send_str(json.dumps({"type": "message", "data": m, "history": True}))
            except Exception as e:
                logger.error("Error sending initial messages: %s", e)

@@ -128,6 +132,8 @@ def broadcast(typ: str, payload) -> bool:
    host_name: Optional[str] = None
    if typ in ("host", "plugin"):
        host_name = payload.get("raw_name") or payload.get("host") or payload.get("name")
+    elif typ == "message" and isinstance(payload, dict):
+        host_name = payload.get("host")

    jmsg = json.dumps({"type": typ, "data": payload})

@@ -4,20 +4,32 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "hbd"
-version = "5.1.17"
+version = "5.3.10"
 description = "Heartbeat monitoring system — client (hbc) and server (hbd)"
 readme = "README.md"
 requires-python = ">=3.11"
-license = "MIT" 
-keywords = ["heartbeat", "monitoring", "dns", "websocket", "system-monitoring"]
-authors = [
-  { name = "heartbeat contributors" }
-]
-
-# Core dependencies (required for both client and server)
 dependencies = [
  "PyYAML>=6.0",
 ]
+license = "MIT"
+license-files = ["LICENSE.md"]
+keywords = ["heartbeat", "monitoring", "dns", "websocket", "system-monitoring"]
+authors = [
+  { name = "Andreas Wrede" }
+]
+classifiers = [
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
+  "Operating System :: POSIX :: Linux",
+  "Operating System :: POSIX :: BSD",
+  "Topic :: System :: Monitoring",
+  "Topic :: System :: Networking :: Monitoring",
+]
+
+[project.urls]
+Repository = "https://git.wrede.ca/andreas/heartbeat"

 [project.optional-dependencies]
 # Client-only dependencies (hbc - system monitoring client)
@@ -32,6 +44,7 @@ server = [
  "aiohttp>=3.11",
  "Jinja2>=3.1.6",
  "matrix-nio>=0.24",
+  "ruamel.yaml>=0.18",
 ]

 # Minimal client — hbc_mini only, no external dependencies
@@ -1,4 +0,0 @@
-key "rndc-key" {
-      algorithm hmac-md5;
-      secret "qlGa+AYKtyOgWNuozqECMw==";
-};
@@ -5,9 +5,23 @@ uv version --bump patch
 VER=$(uv  version  --short)
 sed -i".bak"  "s/__version__ = \"[0-9.]*\"\(.*\)$/__version__ = \"$VER\"\1/" hbd/__init__.py
 sed -i".bak"  "s/__version__ = \"[0-9.]*\"\(.*\)$/__version__ = \"$VER\"\1/" scripts/hbc_mini.py
+sed -i".bak"  "s/\*\*Package:\*\* \`hbd\` v[0-9.]*/\*\*Package:\*\* \`hbd\` v$VER/" README.md
+
+# Update CHANGELOG.md with commits since last tag
+LASTTAG=$(git describe --tags --abbrev=0 2>/dev/null || true)
+ADDED=$(git log "${LASTTAG:+$LASTTAG..}HEAD" --pretty="%s" | grep "^feat:" | sed 's/^feat: /- /')
+FIXED=$(git log "${LASTTAG:+$LASTTAG..}HEAD" --pretty="%s" | grep "^fix:" | sed 's/^fix: /- /')
+{
+    printf "## [%s]\n" "$VER"
+    [ -n "$ADDED" ] && printf "\n### Added\n%s\n" "$ADDED"
+    [ -n "$FIXED" ] && printf "\n### Fixed\n%s\n" "$FIXED"
+    printf "\n---\n\n"
+} > /tmp/changelog_entry.txt
+sed -i".bak" "4r /tmp/changelog_entry.txt" CHANGELOG.md
+rm /tmp/changelog_entry.txt CHANGELOG.md.bak

 # commit pyproject.toml
-git commit -m "version $VER" pyproject.toml hbd/__init__.py scripts/hbc_mini.py
+git commit -m "version $VER" pyproject.toml hbd/__init__.py scripts/hbc_mini.py README.md CHANGELOG.md
 git push 
 # tag version
 git tag -a v$VER -m "Version $VER"
@@ -15,3 +29,4 @@ git push --tags

 rm hbd/__init__.py.bak
 rm scripts/hbc_mini.py.bak
+rm README.md.bak
@@ -0,0 +1,2 @@
+hbc_mini
+hbc_mini_dbg
@@ -0,0 +1,21 @@
+CC      ?= cc
+CFLAGS  = -O2 -Wall -Wextra -std=c11
+LDFLAGS = -lz -lpthread -lm
+TARGET  = hbc_mini
+SRC     = hbc_mini.c
+
+# FreeBSD/NetBSD keep zlib in base; no extra flags needed.
+# On some NetBSD installs pthreads may need -lpthread from pkgsrc.
+
+.PHONY: all clean debug
+
+all: $(TARGET)
+
+$(TARGET): $(SRC)
+	$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
+
+debug: $(SRC)
+	$(CC) -g -fsanitize=address,undefined -o $(TARGET)_dbg $< $(LDFLAGS)
+
+clean:
+	rm -f $(TARGET) $(TARGET)_dbg
@@ -41,7 +41,7 @@ from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple

 # updated by scripts/bumpminor.sh
-__version__ = "5.1.17"
+__version__ = "5.3.10"

 # ---------------------------------------------------------------------------
 # Protocol  (mirrors hbd/common/proto.py)
@@ -114,6 +114,7 @@ def _stodict(data: bytes) -> Dict[str, Any]:
 _DEFAULTS: Dict[str, Any] = {
    "hb_port": 50003,
    "interval": 10,
+    "owner": None,
    "plugins": {},
 }

@@ -239,6 +240,8 @@ class OSInfoPlugin(InfoPlugin):
            "hbc_version": __version__,
            "hbc_type": "mini",
        }
+        if self.config.get("owner"):
+            data["owner"] = self.config["owner"]
        if platform.system() == "Linux":
            data.update(_linux_distro())
        elif platform.system() == "Darwin":
@@ -388,7 +391,6 @@ class NagiosRunnerPlugin(MonitorPlugin):

    async def _collect_metrics(self) -> Dict[str, Any]:
        results: Dict[str, Any] = {}
-        worst = 0
        for cmd_cfg in self.commands:
            name = cmd_cfg.get("name")
            command = cmd_cfg.get("command")
@@ -399,10 +401,6 @@ class NagiosRunnerPlugin(MonitorPlugin):
            results[f"{name}_status_code"] = rc
            results[f"{name}_output"] = msg
            results.update({f"{name}_{k}": v for k, v in perf.items()})
-            worst = max(worst, rc)
-        results["overall_status"] = _NAGIOS_STATUS.get(worst, "UNKNOWN")
-        results["overall_status_code"] = worst
-        results["plugin_count"] = len(self.commands)
        return results


@@ -487,6 +485,12 @@ class CPUMonitorPlugin(MonitorPlugin):
        except Exception:
            pass

+        try:
+            with open("/proc/uptime") as fh:
+                data["uptime_seconds"] = int(float(fh.read().split()[0]))
+        except Exception:
+            pass
+
        return data


@@ -535,6 +539,20 @@ class MemoryMonitorPlugin(MonitorPlugin):
        total = mi.get("MemTotal", 0)
        avail = mi.get("MemAvailable", mi.get("MemFree", 0))
        free = mi.get("MemFree", 0)
+
+        # ZFS ARC is reclaimable but not included in MemAvailable; add it.
+        arc_kb = 0
+        try:
+            with open("/proc/spl/kstat/zfs/arcstats") as _f:
+                for _line in _f:
+                    _p = _line.split()
+                    if len(_p) >= 3 and _p[0] == "size":
+                        arc_kb = int(_p[2]) // 1024
+                        break
+        except (OSError, ValueError):
+            pass
+
+        avail = min(avail + arc_kb, total)
        used = total - avail
        data: Dict[str, Any] = {
            "memory_total": total * 1024,
@@ -701,7 +719,9 @@ async def _load_plugins(cfg: Dict[str, Any]) -> List[Plugin]:
    plugins_cfg: Dict[str, Any] = cfg.get("plugins", {})
    loaded: List[Plugin] = []
    for cls in _ALL_PLUGIN_CLASSES:
-        plugin_cfg = plugins_cfg.get(cls.name) or cfg.get(cls.name, {})
+        plugin_cfg = dict(plugins_cfg.get(cls.name) or cfg.get(cls.name) or {})
+        if "owner" in cfg and "owner" not in plugin_cfg:
+            plugin_cfg["owner"] = cfg["owner"]
        plugin: Plugin = cls(config=plugin_cfg)
        try:
            ok = await plugin.initialize()
@@ -771,7 +791,7 @@ class _HeartbeatProtocol(asyncio.DatagramProtocol):
            msg_id = msg.get("ID")
            now = time.time()
            if msg_id == "ACK":
-                self._conn._handle_ack(now)
+                self._conn._handle_ack(msg, now)
            elif msg_id == "CMD":
                asyncio.create_task(_handle_command(self._conn, msg))
            elif msg_id == "UPD":
@@ -782,8 +802,7 @@ class _HeartbeatProtocol(asyncio.DatagramProtocol):
            self._log.error("datagram error: %s", e)

    def error_received(self, exc):
-        self._log.warning("protocol error on %s: %s — dropping connection", self._conn.addr, exc)
-        self._conn._dead = True
+        self._log.warning("protocol error on %s: %s — will retry", self._conn.addr, exc)
        self._conn.close()


@@ -799,6 +818,7 @@ class AsyncConnection:
        self.rtts: List[float] = [0.0]
        self._transport: Optional[asyncio.DatagramTransport] = None
        self._dead = False
+        self._request_info: asyncio.Event = asyncio.Event()
        self._log = logging.getLogger(f"hbc.conn.{addr}")

    async def open(self) -> bool:
@@ -817,12 +837,14 @@ class AsyncConnection:
            self._transport.close()
            self._transport = None

-    def _handle_ack(self, now: float):
+    def _handle_ack(self, msg: Dict[str, Any], now: float):
        rtt = (now - self.lastsend) * 1000.0
        self.rtts.append(rtt)
        if len(self.rtts) > 10:
            self.rtts.pop(0)
        self.ackcount += 1
+        if msg.get("request_update"):
+            self._request_info.set()

    async def sendto(self, msg: Dict[str, Any], msg_id: str = "HTB"):
        if self._dead:
@@ -955,6 +977,19 @@ async def _run_monitor_group(conn: AsyncConnection, plugins: List[Plugin], inter
        await _sleep(interval)


+async def _info_refresh_loop(conn: AsyncConnection, info: List[Plugin]):
+    log = logging.getLogger("hbc.plugins")
+    while _running:
+        await conn._request_info.wait()
+        if not _running:
+            break
+        conn._request_info.clear()
+        log.info("refreshing InfoPlugins on server request")
+        for plugin in info:
+            plugin._cache = None
+        await _run_info_plugins(conn, info)
+
+
 async def _plugin_collector(conn: AsyncConnection, plugins: List[Plugin]):
    info = [p for p in plugins if isinstance(p, InfoPlugin)]
    monitor = [p for p in plugins if isinstance(p, MonitorPlugin)]
@@ -965,12 +1000,10 @@ async def _plugin_collector(conn: AsyncConnection, plugins: List[Plugin]):
    for p in monitor:
        by_interval[p.interval].append(p)

-    if by_interval:
-        await asyncio.gather(
-            *[asyncio.create_task(_run_monitor_group(conn, grp, iv))
-              for iv, grp in by_interval.items()],
-            return_exceptions=True,
-        )
+    tasks = [asyncio.create_task(_info_refresh_loop(conn, info))]
+    tasks += [asyncio.create_task(_run_monitor_group(conn, grp, iv))
+              for iv, grp in by_interval.items()]
+    await asyncio.gather(*tasks, return_exceptions=True)


 # ---------------------------------------------------------------------------
@@ -1014,7 +1047,7 @@ def _reconfigure_syslog(level: int):
 # ---------------------------------------------------------------------------

 async def _async_main(args, cfg: Dict[str, Any]) -> int:
-    global _running, _shutdown_event, _active_tasks
+    global _running, _shutdown_event, _active_tasks, send_shutdown
    _running = True
    _shutdown_event = asyncio.Event()
    _active_tasks = []
@@ -1024,36 +1057,47 @@ async def _async_main(args, cfg: Dict[str, Any]) -> int:
    port = cfg.get("hb_port", PORT)
    interval = cfg.get("interval", INTERVAL)

-    log.info("starting: %s -> %s port=%d interval=%ds", iam, args.hosts, port, interval)
+    log.info("hbc_mini %s on %s -> %s port=%d interval=%ds",__version__, iam, args.hosts, port, interval)
+
+    af_filter = (socket.AF_INET if getattr(args, "ipv4_only", False)
+                 else socket.AF_INET6 if getattr(args, "ipv6_only", False)
+                 else 0)

    connections: List[AsyncConnection] = []
    conn_id = 1
+    _retry_delay = 5
+    while _running and not connections:
        for host in args.hosts:
            try:
-            addrs = socket.getaddrinfo(host, port, 0, 0, socket.SOL_UDP)
+                addrs = socket.getaddrinfo(host, port, af_filter, 0, socket.SOL_UDP)
            except socket.gaierror as e:
-            log.error("cannot resolve %s: %s", host, e)
+                log.warning("cannot resolve %s: %s — retrying in %ds", host, e, _retry_delay)
                continue
            for ai in addrs:
                conn = AsyncConnection(conn_id, ai[4][0], port, ai[0], iam)
                if await conn.open():
                    connections.append(conn)
                    conn_id += 1
+        if not connections:
+            await _sleep(_retry_delay)
+            _retry_delay = min(_retry_delay * 2, 60)

    if not connections:
-        log.error("no connections established")
        return 1

    # Boot / one-shot message
+    send_shutdown = False
    if args.boot or args.message:
        bmsg: Dict[str, Any] = {"acks": 0}
        if args.boot:
            bmsg["boot"] = 1
+            args.boot = False  # don't repeat on restart
+            send_shutdown = True
        if args.message:
            bmsg["service"] = "service"
            bmsg["msg"] = args.message
-        for c in connections:
-            await c.sendto(bmsg)
+        target = next((c for c in connections if c._transport), connections[0])
+        await target.sendto(bmsg)
        if args.message and not args.daemon:
            await asyncio.sleep(0.3)
            for c in connections:
@@ -1085,11 +1129,13 @@ async def _async_main(args, cfg: Dict[str, Any]) -> int:
        pass

    log.info("shutting down")
-    for conn in connections:
+    target = next((c for c in connections if c._transport), connections[0] if connections else None)
+    if target and send_shutdown:
        try:
-            await conn.sendto({"shutdown": 1, "acks": conn.ackcount})
+            await target.sendto({"shutdown": 1, "acks": target.ackcount})
        except Exception:
            pass
+    for conn in connections:
        conn.close()
    await asyncio.sleep(0.3)
    for plugin in plugins:
@@ -1115,6 +1161,9 @@ def main(argv=None):
    parser.add_argument("-d", "--daemon", action="store_true", help="Run as daemon")
    parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
    parser.add_argument("-x", "--debug", action="count", default=0, help="Debug level")
+    af_group = parser.add_mutually_exclusive_group()
+    af_group.add_argument("-4", dest="ipv4_only", action="store_true", help="Use IPv4 only")
+    af_group.add_argument("-6", dest="ipv6_only", action="store_true", help="Use IPv6 only")
    parser.add_argument("hosts", nargs="+", help="HBD server(s)")
    args = parser.parse_args(argv)

@@ -0,0 +1,49 @@
+# PyInstaller spec for hbc_windows.exe
+# Build with: pyinstaller hbc_windows.spec
+#
+# Requirements (on Windows):
+#   pip install pyinstaller
+
+block_cipher = None
+
+a = Analysis(
+    ['hbc_windows.py'],
+    pathex=[],
+    binaries=[],
+    datas=[],
+    hiddenimports=[],
+    hookspath=[],
+    hooksconfig={},
+    runtime_hooks=[],
+    excludes=['tkinter', 'unittest', 'email', 'html', 'http', 'urllib', 'xml'],
+    win_no_prefer_redirects=False,
+    win_private_assemblies=False,
+    cipher=block_cipher,
+    noarchive=False,
+)
+
+pyz = PYZ(a.pure, a.zlib_archive, cipher=block_cipher)
+
+exe = EXE(
+    pyz,
+    a.scripts,
+    a.binaries,
+    a.zipfiles,
+    a.datas,
+    [],
+    name='hbc_windows',
+    debug=False,
+    bootloader_ignore_signals=False,
+    strip=False,
+    upx=False,
+    upx_exclude=[],
+    runtime_tmpdir=None,
+    console=True,
+    disable_windowed_traceback=False,
+    argv_emulation=False,
+    target_arch=None,
+    codesign_identity=None,
+    entitlements_file=None,
+    icon=None,
+    version=None,
+)
@@ -0,0 +1,126 @@
+#Requires -RunAsAdministrator
+<#
+.SYNOPSIS
+    Install hbc_windows.exe as a Windows Service using NSSM.
+
+.DESCRIPTION
+    Installs the HeartBeat Client as a Windows Service that starts automatically.
+    Requires NSSM (Non-Sucking Service Manager) in PATH or alongside this script.
+    Requires hbc_windows.exe built via: pyinstaller hbc_windows.spec
+
+.PARAMETER Server
+    HBD server hostname or IP address (required).
+
+.PARAMETER ExePath
+    Path to hbc_windows.exe. Defaults to the directory containing this script.
+
+.PARAMETER ServiceName
+    Windows service name. Default: heartbeat-client
+
+.PARAMETER ConfigFile
+    Path to hbc.json config file. Optional.
+
+.PARAMETER LogFile
+    Path to log file. Default: C:\ProgramData\heartbeat\hbc.log
+
+.PARAMETER Interval
+    Heartbeat interval in seconds. Default: 10
+
+.EXAMPLE
+    .\install_hbc_windows.ps1 -Server hbd.example.com
+    .\install_hbc_windows.ps1 -Server hbd.example.com -ConfigFile C:\ProgramData\heartbeat\hbc.json
+#>
+
+param(
+    [Parameter(Mandatory = $true)]
+    [string]$Server,
+
+    [string]$ExePath = "",
+    [string]$ServiceName = "heartbeat-client",
+    [string]$ConfigFile = "",
+    [string]$LogFile = "C:\ProgramData\heartbeat\hbc.log",
+    [int]$Interval = 10
+)
+
+Set-StrictMode -Version Latest
+$ErrorActionPreference = "Stop"
+
+# Locate hbc_windows.exe
+if ($ExePath -eq "") {
+    $ExePath = Join-Path $PSScriptRoot "hbc_windows.exe"
+}
+if (-not (Test-Path $ExePath)) {
+    Write-Error "hbc_windows.exe not found at: $ExePath`nBuild it first with: pyinstaller hbc_windows.spec"
+    exit 1
+}
+
+# Locate NSSM
+$nssm = Get-Command nssm -ErrorAction SilentlyContinue
+if (-not $nssm) {
+    $nssmLocal = Join-Path $PSScriptRoot "nssm.exe"
+    if (Test-Path $nssmLocal) {
+        $nssm = $nssmLocal
+    } else {
+        Write-Error "nssm.exe not found in PATH or alongside this script.`nDownload from https://nssm.cc/download"
+        exit 1
+    }
+} else {
+    $nssm = $nssm.Source
+}
+
+# Build argument list
+$args_list = "--daemon $Server"
+if ($ConfigFile -ne "") {
+    $args_list = "--daemon -c `"$ConfigFile`" $Server"
+}
+if ($LogFile -ne "") {
+    $args_list = "$args_list --log-file `"$LogFile`""
+}
+
+# Create data directory
+$dataDir = "C:\ProgramData\heartbeat"
+if (-not (Test-Path $dataDir)) {
+    New-Item -ItemType Directory -Path $dataDir | Out-Null
+    Write-Host "Created $dataDir"
+}
+
+# Remove existing service if present
+$existing = Get-Service -Name $ServiceName -ErrorAction SilentlyContinue
+if ($existing) {
+    Write-Host "Removing existing service '$ServiceName'..."
+    & $nssm stop $ServiceName 2>$null
+    & $nssm remove $ServiceName confirm
+}
+
+# Install service
+Write-Host "Installing service '$ServiceName'..."
+& $nssm install $ServiceName $ExePath $args_list
+if ($LASTEXITCODE -ne 0) {
+    Write-Error "nssm install failed (exit $LASTEXITCODE)"
+    exit 1
+}
+
+# Configure service
+& $nssm set $ServiceName DisplayName "HeartBeat Client"
+& $nssm set $ServiceName Description "Sends heartbeat and plugin metrics to the HBD monitoring server."
+& $nssm set $ServiceName Start SERVICE_AUTO_START
+& $nssm set $ServiceName AppStdout (Join-Path $dataDir "nssm_stdout.log")
+& $nssm set $ServiceName AppStderr (Join-Path $dataDir "nssm_stderr.log")
+& $nssm set $ServiceName AppRotateFiles 1
+& $nssm set $ServiceName AppRotateBytes 5242880
+
+# Start service
+Write-Host "Starting service '$ServiceName'..."
+& $nssm start $ServiceName
+if ($LASTEXITCODE -ne 0) {
+    Write-Warning "Service installed but failed to start — check logs in $dataDir"
+} else {
+    Write-Host "Service '$ServiceName' started successfully."
+    Write-Host "Log file: $LogFile"
+    Write-Host ""
+    Write-Host "Useful commands:"
+    Write-Host "  nssm status $ServiceName"
+    Write-Host "  nssm stop $ServiceName"
+    Write-Host "  nssm restart $ServiceName"
+    Write-Host "  nssm remove $ServiceName confirm"
+}
@@ -68,8 +68,7 @@ async def test_nagios_runner():
    print(f"   ✓ Collected {len(data)} data points")
    
    print(f"\n4. Results:")
-    print(f"   Overall Status: {data.get('overall_status')} (code: {data.get('overall_status_code')})")
-    print(f"   Plugins Executed: {data.get('plugin_count')}")
+    print(f"   Data points collected: {len(data)}")
    
    # Show individual plugin results
    print(f"\n5. Individual Plugin Results:")
@@ -0,0 +1,162 @@
+import glob
+import os
+import pytest
+from hbd.server import configio
+
+SAMPLE_YAML = """\
+# Server configuration
+hbd_port: 50004  # HTTP API port
+interval: 20
+users:
+  alice:
+    full_name: Alice Smith
+    admin: true
+notification_channels:
+  pushover_ops:
+    type: pushover
+    token: abc123
+"""
+
+
+def test_read_roundtrip_loads_values(tmp_path):
+    f = tmp_path / ".hb.yaml"
+    f.write_text(SAMPLE_YAML)
+    data = configio.read_roundtrip(str(f))
+    assert data["hbd_port"] == 50004
+    assert data["interval"] == 20
+    assert data["users"]["alice"]["full_name"] == "Alice Smith"
+
+
+def test_write_config_creates_backup(tmp_path):
+    f = tmp_path / ".hb.yaml"
+    f.write_text(SAMPLE_YAML)
+    data = configio.read_roundtrip(str(f))
+    data["interval"] = 30
+    configio.write_config(str(f), data)
+    backups = configio.list_backups(str(f))
+    assert len(backups) == 1
+    assert ".bak." in backups[0]
+
+
+def test_write_config_preserves_comments(tmp_path):
+    f = tmp_path / ".hb.yaml"
+    f.write_text(SAMPLE_YAML)
+    data = configio.read_roundtrip(str(f))
+    data["interval"] = 30
+    configio.write_config(str(f), data)
+    content = f.read_text()
+    assert "# Server configuration" in content
+    assert "# HTTP API port" in content
+
+
+def test_write_config_atomically_replaces_file(tmp_path):
+    f = tmp_path / ".hb.yaml"
+    f.write_text(SAMPLE_YAML)
+    data = configio.read_roundtrip(str(f))
+    data["interval"] = 99
+    configio.write_config(str(f), data)
+    assert not (tmp_path / ".hb.yaml.tmp").exists()
+    data2 = configio.read_roundtrip(str(f))
+    assert data2["interval"] == 99
+
+
+def test_write_config_backup_rotation(tmp_path):
+    cfg = tmp_path / ".hb.yaml"
+    cfg.write_text(SAMPLE_YAML)
+    # Pre-create 10 existing backups with old timestamps
+    for i in range(10):
+        (tmp_path / f".hb.yaml.bak.20260101-{i:06d}").write_text("old")
+    data = configio.read_roundtrip(str(cfg))
+    configio.write_config(str(cfg), data)
+    backups = configio.list_backups(str(cfg))
+    assert len(backups) == 10
+    assert not (tmp_path / ".hb.yaml.bak.20260101-000000").exists()
+
+
+def test_list_backups_newest_first(tmp_path):
+    cfg = tmp_path / ".hb.yaml"
+    cfg.write_text(SAMPLE_YAML)
+    for i in range(3):
+        (tmp_path / f".hb.yaml.bak.20260101-{i:02d}0000").write_text("b")
+    backups = configio.list_backups(str(cfg))
+    assert len(backups) == 3
+    assert backups == sorted(backups, reverse=True)
+
+
+def test_apply_structured_section_server_updates_keys(tmp_path):
+    f = tmp_path / ".hb.yaml"
+    f.write_text(SAMPLE_YAML)
+    data = configio.read_roundtrip(str(f))
+    configio.apply_structured_section(data, "server", {"interval": 60, "hbd_port": 8080})
+    assert data["interval"] == 60
+    assert data["hbd_port"] == 8080
+
+
+def test_apply_structured_section_server_ignores_unknown_keys(tmp_path):
+    f = tmp_path / ".hb.yaml"
+    f.write_text(SAMPLE_YAML)
+    data = configio.read_roundtrip(str(f))
+    configio.apply_structured_section(data, "server", {"interval": 60, "not_a_key": "x"})
+    assert "not_a_key" not in data
+
+
+def test_apply_structured_section_users_replaces_dict(tmp_path):
+    f = tmp_path / ".hb.yaml"
+    f.write_text(SAMPLE_YAML)
+    data = configio.read_roundtrip(str(f))
+    new_users = {"bob": {"full_name": "Bob Jones", "admin": False}}
+    configio.apply_structured_section(data, "users", new_users)
+    assert "alice" not in data["users"]
+    assert data["users"]["bob"]["full_name"] == "Bob Jones"
+
+
+def test_apply_yaml_section_notification_channels(tmp_path):
+    f = tmp_path / ".hb.yaml"
+    f.write_text(SAMPLE_YAML)
+    data = configio.read_roundtrip(str(f))
+    new_yaml = "email_ops:\n  type: email\n  recipients: [ops@example.com]\n"
+    configio.apply_yaml_section(data, "notification_channels", new_yaml)
+    assert "email_ops" in data["notification_channels"]
+    assert "pushover_ops" not in data["notification_channels"]
+
+
+def test_apply_yaml_section_thresholds_maps_to_threshold_configs(tmp_path):
+    f = tmp_path / ".hb.yaml"
+    f.write_text(SAMPLE_YAML)
+    data = configio.read_roundtrip(str(f))
+    configio.apply_yaml_section(data, "thresholds", "default:\n  cpu: 80\n")
+    assert "threshold_configs" in data
+    assert data["threshold_configs"]["default"]["cpu"] == 80
+
+
+def test_apply_yaml_section_dns_replaces_each_key(tmp_path):
+    f = tmp_path / ".hb.yaml"
+    f.write_text(SAMPLE_YAML)
+    data = configio.read_roundtrip(str(f))
+    configio.apply_yaml_section(
+        data, "dns",
+        "nsupdate_bin: /usr/bin/nsupdate\ndyndomains: [dyn.example.com]\n"
+    )
+    assert data["nsupdate_bin"] == "/usr/bin/nsupdate"
+    assert data["dyndomains"] == ["dyn.example.com"]
+
+
+def test_apply_yaml_section_unknown_raises(tmp_path):
+    f = tmp_path / ".hb.yaml"
+    f.write_text(SAMPLE_YAML)
+    data = configio.read_roundtrip(str(f))
+    with pytest.raises(ValueError, match="Unknown YAML section"):
+        configio.apply_yaml_section(data, "nope", "x: 1\n")
+
+
+def test_apply_structured_section_unknown_raises(tmp_path):
+    f = tmp_path / ".hb.yaml"
+    f.write_text(SAMPLE_YAML)
+    data = configio.read_roundtrip(str(f))
+    with pytest.raises(ValueError, match="Unknown structured section"):
+        configio.apply_structured_section(data, "nope", {"x": 1})
+
+
+def test_read_roundtrip_missing_file_raises(tmp_path):
+    with pytest.raises(FileNotFoundError):
+        configio.read_roundtrip(str(tmp_path / "nonexistent.yaml"))
@@ -20,7 +20,7 @@ def test_handle_cmd_sends_command():
    import hbdclass

    ctx = {
-        "config": {"watchhosts": [], "dyndnshosts": []},
+        "config": {"watchhosts": []},
        "hbdclass": hbdclass,
        "log": dummy_noop,
        "email": dummy_noop,
@@ -0,0 +1,173 @@
+"""Tests for the config read/write API helpers in http.py."""
+import pytest
+from hbd.server import http
+
+
+def test_mask_config_for_api_masks_user_passwords():
+    config = {
+        "hbd_port": 50004,
+        "interval": 20,
+        "users": {
+            "alice": {"full_name": "Alice", "admin": True, "password": "pbkdf2:sha256:abc"},
+        },
+        "oauth": {},
+    }
+    result = http._mask_config_for_api(config)
+    assert result["users"]["alice"]["password"] == "•••"
+    assert result["users"]["alice"]["full_name"] == "Alice"
+
+
+def test_mask_config_for_api_masks_oauth_client_secret():
+    config = {
+        "hbd_port": 50004,
+        "interval": 20,
+        "users": {},
+        "oauth": {
+            "gitea": {"type": "gitea", "url": "https://git.example.com",
+                      "client_id": "cid", "client_secret": "verysecret"},
+        },
+    }
+    result = http._mask_config_for_api(config)
+    assert result["oauth"]["gitea"]["client_secret"] == "•••"
+    assert result["oauth"]["gitea"]["client_id"] == "cid"
+
+
+def test_mask_config_for_api_includes_server_keys():
+    config = {"hbd_port": 50004, "interval": 20, "users": {}, "oauth": {}}
+    result = http._mask_config_for_api(config)
+    assert result["server"]["hbd_port"] == 50004
+    assert result["server"]["interval"] == 20
+
+
+def test_mask_config_for_api_no_password_in_users_leaves_no_key():
+    config = {
+        "hbd_port": 50004,
+        "users": {"bob": {"full_name": "Bob", "admin": False}},
+        "oauth": {},
+    }
+    result = http._mask_config_for_api(config)
+    assert "password" not in result["users"]["bob"]
+
+
+# ---- configio integration for write path ----
+
+def test_write_path_applies_server_section(tmp_path):
+    cfg = tmp_path / ".hb.yaml"
+    cfg.write_text("hbd_port: 50004\ninterval: 20\nusers: {}\n")
+    from hbd.server import configio
+    data = configio.read_roundtrip(str(cfg))
+    configio.apply_structured_section(data, "server", {"interval": 60})
+    configio.write_config(str(cfg), data)
+    data2 = configio.read_roundtrip(str(cfg))
+    assert data2["interval"] == 60
+    assert data2["hbd_port"] == 50004  # unchanged
+
+
+def test_write_path_applies_yaml_section(tmp_path):
+    cfg = tmp_path / ".hb.yaml"
+    cfg.write_text(
+        "hbd_port: 50004\nnotification_channels:\n  old_ch:\n    type: email\n"
+    )
+    from hbd.server import configio
+    data = configio.read_roundtrip(str(cfg))
+    configio.apply_yaml_section(data, "notification_channels", "new_ch:\n  type: pushover\n")
+    configio.write_config(str(cfg), data)
+    data2 = configio.read_roundtrip(str(cfg))
+    assert "new_ch" in data2["notification_channels"]
+    assert "old_ch" not in data2["notification_channels"]
+
+
+def test_write_path_hashes_plaintext_password(tmp_path):
+    cfg = tmp_path / ".hb.yaml"
+    cfg.write_text("hbd_port: 50004\nusers:\n  alice:\n    full_name: Alice\n    admin: true\n    password: pbkdf2:sha256:old\n")
+    from hbd.server import configio
+    from hbd.server import users as users_mod
+    data = configio.read_roundtrip(str(cfg))
+    # Simulate what the POST handler does: hash plaintext password
+    new_users = {"alice": {"full_name": "Alice", "admin": True, "password": "newplaintext"}}
+    for username, attrs in new_users.items():
+        pw = attrs.get("password", "")
+        if pw and not pw.startswith("pbkdf2:"):
+            attrs["password"] = users_mod.hash_password(pw)
+    configio.apply_structured_section(data, "users", new_users)
+    configio.write_config(str(cfg), data)
+    data2 = configio.read_roundtrip(str(cfg))
+    assert data2["users"]["alice"]["password"].startswith("pbkdf2:")
+    assert data2["users"]["alice"]["password"] != "newplaintext"
+
+
+def test_rollback_restores_backup(tmp_path):
+    cfg = tmp_path / ".hb.yaml"
+    cfg.write_text("hbd_port: 50004\ninterval: 20\n")
+    from hbd.server import configio
+    # Make a change to create a backup
+    data = configio.read_roundtrip(str(cfg))
+    data["interval"] = 99
+    configio.write_config(str(cfg), data)
+    backups = configio.list_backups(str(cfg))
+    assert len(backups) == 1
+    # Read the backup and write it back (simulating rollback)
+    backup_data = configio.read_roundtrip(backups[0])
+    configio.write_config(str(cfg), backup_data)
+    restored = configio.read_roundtrip(str(cfg))
+    assert restored["interval"] == 20
+
+
+def test_write_path_preserves_masked_password(tmp_path):
+    """The "•••" sentinel must preserve the existing hash, not write "•••" to disk."""
+    cfg = tmp_path / ".hb.yaml"
+    original_hash = "pbkdf2:sha256:original_hash"
+    cfg.write_text(
+        f"hbd_port: 50004\nusers:\n  alice:\n    full_name: Alice\n    admin: true\n    password: {original_hash}\n"
+    )
+    from hbd.server import configio
+    from hbd.server import users as users_mod
+    data = configio.read_roundtrip(str(cfg))
+    # Simulate what api_config_post does when client sends "•••" back
+    existing_users = data.get("users") or {}
+    users_payload = {"alice": {"full_name": "Alice", "admin": True, "password": "•••"}}
+    for username, attrs in users_payload.items():
+        pw = attrs.get("password", "")
+        if pw and pw != "•••" and not pw.startswith("pbkdf2:"):
+            attrs["password"] = users_mod.hash_password(pw)
+        elif not pw or pw == "•••":
+            existing_hash = (existing_users.get(username) or {}).get("password", "")
+            if existing_hash:
+                attrs["password"] = existing_hash
+            else:
+                attrs.pop("password", None)
+    configio.apply_structured_section(data, "users", users_payload)
+    configio.write_config(str(cfg), data)
+    data2 = configio.read_roundtrip(str(cfg))
+    assert data2["users"]["alice"]["password"] == original_hash, (
+        f"Expected original hash preserved, got: {data2['users']['alice']['password']!r}"
+    )
+
+
+def test_write_path_preserves_oauth_client_secret(tmp_path):
+    """The "•••" sentinel for oauth client_secret must preserve the existing secret."""
+    cfg = tmp_path / ".hb.yaml"
+    original_secret = "real_client_secret_value"
+    cfg.write_text(
+        f"hbd_port: 50004\noauth:\n  gitea:\n    type: gitea\n    url: https://git.example.com\n"
+        f"    client_id: cid123\n    client_secret: {original_secret}\n"
+    )
+    from hbd.server import configio
+    data = configio.read_roundtrip(str(cfg))
+    # Simulate what api_config_post does when client sends "•••" back for client_secret
+    existing_oauth = data.get("oauth") or {}
+    new_oauth = {"gitea": {"type": "gitea", "url": "https://git.example.com", "client_id": "cid123", "client_secret": "•••"}}
+    for name, attrs in new_oauth.items():
+        cs = attrs.get("client_secret", "")
+        if not cs or cs == "•••":
+            existing_cs = (existing_oauth.get(name) or {}).get("client_secret", "")
+            if existing_cs:
+                attrs["client_secret"] = existing_cs
+            else:
+                attrs.pop("client_secret", None)
+    data["oauth"] = new_oauth
+    configio.write_config(str(cfg), data)
+    data2 = configio.read_roundtrip(str(cfg))
+    assert data2["oauth"]["gitea"]["client_secret"] == original_secret, (
+        f"Expected original secret preserved, got: {data2['oauth']['gitea']['client_secret']!r}"
+    )
@@ -0,0 +1,174 @@
+"""Tests for _build_host_info helper in http.py."""
+import pytest
+from unittest.mock import MagicMock
+from hbd.server.http import _build_host_info
+
+
+class _FakeConn:
+    def __init__(self, lastbeat):
+        self.lastbeat = lastbeat
+
+
+class _FakeHost:
+    def __init__(self, name="myhost", owner=None, managers=None,
+                 connections=None, os_data=None, plugin_data=None):
+        self.name = name
+        self.owner = owner
+        self.managers = managers or []
+        self.connections = connections or {}
+        self._os_data = os_data
+        self.plugin_data = plugin_data or {}
+
+    def get_latest_plugin_data(self, plugin_name):
+        if plugin_name == "os_info" and self._os_data is not None:
+            return (1234567890.0, self._os_data)
+        return None
+
+
+def test_build_host_info_basic_fields():
+    host = _FakeHost(owner="alice", managers=["bob", "carol"])
+    result = _build_host_info(host)
+    assert result["owner"] == "alice"
+    assert result["managers"] == ["bob", "carol"]
+    assert result["hbc_version"] is None
+    assert result["hbc_type"] is None
+    assert result["last_packet"] is None
+    assert result["thresholds"] is None
+
+
+def test_build_host_info_no_owner():
+    host = _FakeHost()
+    result = _build_host_info(host)
+    assert result["owner"] is None
+    assert result["managers"] == []
+
+
+def test_build_host_info_reads_hbc_from_os_info():
+    host = _FakeHost(os_data={"hbc_version": "5.3.0", "hbc_type": "full"})
+    result = _build_host_info(host)
+    assert result["hbc_version"] == "5.3.0"
+    assert result["hbc_type"] == "full"
+
+
+def test_build_host_info_hbc_none_when_no_os_info():
+    host = _FakeHost(os_data=None)
+    result = _build_host_info(host)
+    assert result["hbc_version"] is None
+    assert result["hbc_type"] is None
+
+
+def test_build_host_info_last_packet_is_max_lastbeat():
+    host = _FakeHost(connections={
+        "IPv4": _FakeConn(1000.0),
+        "IPv6": _FakeConn(2000.0),
+    })
+    result = _build_host_info(host)
+    assert result["last_packet"] == 2000.0
+
+
+def test_build_host_info_last_packet_none_when_no_connections():
+    host = _FakeHost(connections={})
+    result = _build_host_info(host)
+    assert result["last_packet"] is None
+
+
+def test_build_host_info_thresholds_none_without_checker():
+    host = _FakeHost()
+    result = _build_host_info(host, threshold_checker=None)
+    assert result["thresholds"] is None
+
+
+def test_build_host_info_thresholds_sorted_by_metric():
+    from hbd.server.threshold import ThresholdConfig
+    tc_cpu = ThresholdConfig("cpu_monitor.cpu_percent", warning=80.0, critical=95.0)
+    tc_mem = ThresholdConfig("memory_monitor.memory_percent", warning=85.0, critical=98.0)
+
+    checker = MagicMock()
+    checker.get_thresholds_for_host.return_value = {
+        "memory_monitor.memory_percent": tc_mem,
+        "cpu_monitor.cpu_percent": tc_cpu,
+    }
+
+    host = _FakeHost()
+    result = _build_host_info(host, threshold_checker=checker)
+
+    assert result["thresholds"] is not None
+    assert len(result["thresholds"]) == 2
+    assert result["thresholds"][0]["metric"] == "cpu_monitor.cpu_percent"
+    assert result["thresholds"][0]["warning"] == 80.0
+    assert result["thresholds"][0]["critical"] == 95.0
+    assert result["thresholds"][0]["operator"] == ">"
+    assert result["thresholds"][1]["metric"] == "memory_monitor.memory_percent"
+
+
+def test_build_host_info_thresholds_empty_list_when_no_thresholds():
+    checker = MagicMock()
+    checker.get_thresholds_for_host.return_value = {}
+    host = _FakeHost()
+    result = _build_host_info(host, threshold_checker=checker)
+    assert result["thresholds"] == []
+
+
+def test_build_host_info_threshold_null_warning_critical():
+    from hbd.server.threshold import ThresholdConfig
+    tc = ThresholdConfig("rtt.myhost", warning=None, critical=500.0)
+    checker = MagicMock()
+    checker.get_thresholds_for_host.return_value = {"rtt.myhost": tc}
+    host = _FakeHost()
+    result = _build_host_info(host, threshold_checker=checker)
+    assert result["thresholds"][0]["warning"] is None
+    assert result["thresholds"][0]["critical"] == 500.0
+
+
+def test_build_host_info_nagios_operator_serialized():
+    from hbd.server.threshold import ThresholdConfig
+    tc = ThresholdConfig("nagios_runner.check_http", operator="nagios")
+    checker = MagicMock()
+    checker.get_thresholds_for_host.return_value = {"nagios_runner.check_http": tc}
+    host = _FakeHost()
+    result = _build_host_info(host, threshold_checker=checker)
+    assert result["thresholds"][0]["operator"] == "nagios"
+
+
+def test_build_host_info_covers_suffix_matched_metrics():
+    """memory_monitor.percent threshold covers swap_percent via suffix match."""
+    from hbd.server.threshold import ThresholdConfig
+    tc_pct = ThresholdConfig("memory_monitor.percent", warning=85.0, critical=95.0)
+    checker = MagicMock()
+    checker.get_thresholds_for_host.return_value = {"memory_monitor.percent": tc_pct}
+
+    host = _FakeHost(
+        connections={},
+        os_data=None,
+    )
+    # Simulate plugin_data with both percent and swap_percent fields
+    host.plugin_data = {
+        "memory_monitor": [(1234567890.0, {
+            "percent": 80.0,
+            "swap_percent": 25.0,
+            "available_mb": 2000,
+        })]
+    }
+
+    result = _build_host_info(host, threshold_checker=checker)
+    assert result["thresholds"] is not None
+    t = result["thresholds"][0]
+    assert t["metric"] == "memory_monitor.percent"
+    assert t["covers"] == ["memory_monitor.swap_percent"]
+
+
+def test_build_host_info_covers_empty_when_exact_matches_only():
+    """No covers when all plugin fields match their threshold exactly."""
+    from hbd.server.threshold import ThresholdConfig
+    tc_pct = ThresholdConfig("memory_monitor.percent", warning=85.0, critical=95.0)
+    checker = MagicMock()
+    checker.get_thresholds_for_host.return_value = {"memory_monitor.percent": tc_pct}
+
+    host = _FakeHost()
+    host.plugin_data = {
+        "memory_monitor": [(1234567890.0, {"percent": 80.0})]
+    }
+
+    result = _build_host_info(host, threshold_checker=checker)
+    t = result["thresholds"][0]
+    assert t["covers"] == []
@@ -0,0 +1,123 @@
+"""Tests for PUT /api/0/users/me logic."""
+import pytest
+from hbd.server import users as users_mod
+
+
+def test_hash_password_roundtrip():
+    h = users_mod.hash_password("mysecret")
+    assert h.startswith("pbkdf2:sha256:")
+    assert users_mod.authenticate.__doc__ is not None  # module loaded
+
+
+def test_password_change_requires_correct_current(tmp_path):
+    cfg = tmp_path / ".hb.yaml"
+    initial_hash = users_mod.hash_password("oldpass")
+    cfg.write_text(
+        f"hbd_port: 50004\nusers:\n  alice:\n    full_name: Alice\n    admin: true\n    password: {initial_hash}\n"
+    )
+    users_mod.load_users({"users": {"alice": {"full_name": "Alice", "admin": True, "password": initial_hash}}})
+
+    # Correct current password authenticates
+    assert users_mod.authenticate("alice", "oldpass") is not None
+    # Wrong current password does not authenticate
+    assert users_mod.authenticate("alice", "wrongpass") is None
+
+
+def test_put_users_me_writes_new_fields(tmp_path):
+    """Simulate the write path: read config, update user, write back."""
+    initial_hash = users_mod.hash_password("secret")
+    yaml_content = (
+        "hbd_port: 50004\n"
+        f"users:\n  alice:\n    full_name: Old Name\n    admin: true\n    password: {initial_hash}\n"
+    )
+    cfg = tmp_path / ".hb.yaml"
+    cfg.write_text(yaml_content)
+
+    from hbd.server import configio
+    data = configio.read_roundtrip(str(cfg))
+
+    # Simulate handler updating full_name and avatar
+    user_entry = dict(data["users"]["alice"])
+    user_entry["full_name"] = "New Name"
+    user_entry["avatar"] = "/img/alice.png"
+    data["users"]["alice"] = user_entry
+
+    configio.write_config(str(cfg), data)
+    result = configio.read_roundtrip(str(cfg))
+    assert result["users"]["alice"]["full_name"] == "New Name"
+    assert result["users"]["alice"]["avatar"] == "/img/alice.png"
+    assert result["users"]["alice"]["password"] == initial_hash  # unchanged
+
+
+def test_put_users_me_changes_password(tmp_path):
+    initial_hash = users_mod.hash_password("oldpass")
+    cfg = tmp_path / ".hb.yaml"
+    cfg.write_text(
+        f"hbd_port: 50004\nusers:\n  alice:\n    full_name: Alice\n    password: {initial_hash}\n"
+    )
+    from hbd.server import configio
+    data = configio.read_roundtrip(str(cfg))
+
+    new_hash = users_mod.hash_password("newpass")
+    data["users"]["alice"]["password"] = new_hash
+    configio.write_config(str(cfg), data)
+
+    result = configio.read_roundtrip(str(cfg))
+    # Load users from new config and authenticate with new password
+    new_config = {"users": dict(result["users"])}
+    users_mod.load_users(new_config)
+    assert users_mod.authenticate("alice", "newpass") is not None
+    assert users_mod.authenticate("alice", "oldpass") is None
+
+
+def test_put_users_me_notification_channels(tmp_path):
+    cfg = tmp_path / ".hb.yaml"
+    cfg.write_text(
+        "hbd_port: 50004\n"
+        "notification_channels:\n  pushover_ops:\n    type: pushover\n"
+        "users:\n  alice:\n    full_name: Alice\n    notification_channels: []\n"
+    )
+    from hbd.server import configio
+    data = configio.read_roundtrip(str(cfg))
+    data["users"]["alice"]["notification_channels"] = ["pushover_ops"]
+    configio.write_config(str(cfg), data)
+    result = configio.read_roundtrip(str(cfg))
+    assert result["users"]["alice"]["notification_channels"] == ["pushover_ops"]
+
+
+def test_visible_channels_excludes_private_from_others():
+    """Private channels owned by another user must not appear in the visible set."""
+    from hbd.server import settings as settings_mod
+
+    config = {
+        "notification_channels": {
+            "public_ch":  {"type": "pushover", "token": "t", "user": "u"},
+            "alice_priv": {"type": "email", "owner": "alice", "private": True,
+                           "recipients": ["a@b.com"], "sender": "s@b.com", "smtp_server": "s"},
+            "bob_priv":   {"type": "email", "owner": "bob", "private": True,
+                           "recipients": ["b@b.com"], "sender": "s@b.com", "smtp_server": "s"},
+        }
+    }
+
+    class FakeUser:
+        def __init__(self, username, admin=False):
+            self.username = username
+            self.admin = admin
+
+    alice = FakeUser("alice")
+    bob   = FakeUser("bob")
+    admin = FakeUser("admin", admin=True)
+
+    # Simulate _visible_channels_for_user logic (mirrors http.py implementation)
+    def visible(user):
+        all_channels = config.get("notification_channels") or {}
+        if user.admin:
+            return set(all_channels.keys())
+        return {
+            name for name, cfg in all_channels.items()
+            if not cfg.get("private") or cfg.get("owner") == user.username
+        }
+
+    assert visible(alice) == {"public_ch", "alice_priv"}
+    assert visible(bob)   == {"public_ch", "bob_priv"}
+    assert visible(admin) == {"public_ch", "alice_priv", "bob_priv"}
@@ -0,0 +1,178 @@
+"""Tests for notification channel CRUD via configio helpers and visibility logic."""
+import pytest
+from hbd.server import configio, settings as settings_mod
+
+
+SAMPLE_YAML = """\
+hbd_port: 50004
+notification_channels:
+  pushover_ops:
+    type: pushover
+    token: abc123
+    user: usr456
+"""
+
+
+# ---------------------------------------------------------------------------
+# configio helpers
+# ---------------------------------------------------------------------------
+
+def test_apply_channel_adds_new_entry(tmp_path):
+    f = tmp_path / ".hb.yaml"
+    f.write_text(SAMPLE_YAML)
+    data = configio.read_roundtrip(str(f))
+    configio.apply_channel(data, "email_ops", {"type": "email", "recipients": ["ops@example.com"]})
+    assert "email_ops" in data["notification_channels"]
+    assert data["notification_channels"]["email_ops"]["type"] == "email"
+    # Existing channel preserved
+    assert "pushover_ops" in data["notification_channels"]
+
+
+def test_apply_channel_updates_existing(tmp_path):
+    f = tmp_path / ".hb.yaml"
+    f.write_text(SAMPLE_YAML)
+    data = configio.read_roundtrip(str(f))
+    configio.apply_channel(data, "pushover_ops", {"type": "pushover", "token": "new_tok", "user": "new_usr"})
+    assert data["notification_channels"]["pushover_ops"]["token"] == "new_tok"
+
+
+def test_apply_channel_creates_section_if_absent():
+    data = {"hbd_port": 50004}
+    configio.apply_channel(data, "test_ch", {"type": "pushover", "token": "t", "user": "u"})
+    assert "notification_channels" in data
+    assert "test_ch" in data["notification_channels"]
+
+
+def test_delete_channel_removes_entry(tmp_path):
+    f = tmp_path / ".hb.yaml"
+    f.write_text(SAMPLE_YAML)
+    data = configio.read_roundtrip(str(f))
+    configio.delete_channel(data, "pushover_ops")
+    assert "pushover_ops" not in data["notification_channels"]
+
+
+def test_delete_channel_noop_for_missing():
+    data = {"notification_channels": {"ch1": {"type": "pushover"}}}
+    configio.delete_channel(data, "nonexistent")  # must not raise
+    assert "ch1" in data["notification_channels"]
+
+
+def test_delete_channel_noop_when_no_section():
+    data = {}
+    configio.delete_channel(data, "anything")  # must not raise
+
+
+def test_apply_channel_persisted_after_write(tmp_path):
+    f = tmp_path / ".hb.yaml"
+    f.write_text(SAMPLE_YAML)
+    data = configio.read_roundtrip(str(f))
+    configio.apply_channel(data, "signal_ops", {"type": "signal", "user": "+1", "recipient": "+2"})
+    configio.write_config(str(f), data)
+    result = configio.read_roundtrip(str(f))
+    assert "signal_ops" in result["notification_channels"]
+    assert result["notification_channels"]["signal_ops"]["user"] == "+1"
+    # Original channel preserved
+    assert "pushover_ops" in result["notification_channels"]
+
+
+def test_delete_channel_persisted_after_write(tmp_path):
+    f = tmp_path / ".hb.yaml"
+    f.write_text(SAMPLE_YAML)
+    data = configio.read_roundtrip(str(f))
+    configio.delete_channel(data, "pushover_ops")
+    configio.write_config(str(f), data)
+    result = configio.read_roundtrip(str(f))
+    assert "pushover_ops" not in (result.get("notification_channels") or {})
+
+
+# ---------------------------------------------------------------------------
+# Visibility logic (mirrors http.py _visible_channels_for_user)
+# ---------------------------------------------------------------------------
+
+def _visible(config, user):
+    """Local copy of the visibility helper for unit testing without the HTTP layer."""
+    all_channels = config.get("notification_channels") or {}
+    if user.get("admin"):
+        return set(all_channels.keys())
+    username = user["username"]
+    return {
+        name for name, cfg in all_channels.items()
+        if isinstance(cfg, dict) and (not cfg.get("private") or cfg.get("owner") == username)
+    }
+
+
+CONFIG_VISIBILITY = {
+    "notification_channels": {
+        "pub_ch":      {"type": "pushover", "token": "t", "user": "u"},
+        "alice_priv":  {"type": "email", "owner": "alice", "private": True,
+                        "recipients": ["a@a.com"], "sender": "s@a.com", "smtp_server": "s"},
+        "bob_priv":    {"type": "signal", "owner": "bob", "private": True,
+                        "user": "+1", "recipient": "+2"},
+        "admin_owned": {"type": "pushover", "token": "t2", "user": "u2", "owner": "adminuser"},
+    }
+}
+
+
+def test_public_channel_visible_to_all():
+    for uname in ("alice", "bob", "carol"):
+        user = {"username": uname, "admin": False}
+        assert "pub_ch" in _visible(CONFIG_VISIBILITY, user)
+
+
+def test_private_channel_visible_only_to_owner():
+    alice = {"username": "alice", "admin": False}
+    bob   = {"username": "bob",   "admin": False}
+    carol = {"username": "carol", "admin": False}
+
+    assert "alice_priv" in _visible(CONFIG_VISIBILITY, alice)
+    assert "alice_priv" not in _visible(CONFIG_VISIBILITY, bob)
+    assert "alice_priv" not in _visible(CONFIG_VISIBILITY, carol)
+
+    assert "bob_priv" in _visible(CONFIG_VISIBILITY, bob)
+    assert "bob_priv" not in _visible(CONFIG_VISIBILITY, alice)
+
+
+def test_admin_sees_all_channels():
+    admin = {"username": "adminuser", "admin": True}
+    visible = _visible(CONFIG_VISIBILITY, admin)
+    assert visible == {"pub_ch", "alice_priv", "bob_priv", "admin_owned"}
+
+
+def test_admin_owned_channel_is_public_by_default():
+    alice = {"username": "alice", "admin": False}
+    assert "admin_owned" in _visible(CONFIG_VISIBILITY, alice)
+
+
+# ---------------------------------------------------------------------------
+# Channel type schemas
+# ---------------------------------------------------------------------------
+
+def test_all_required_types_in_schema():
+    for t in ("pushover", "email", "signal", "matrix", "sms_voipms"):
+        assert t in settings_mod.CHANNEL_TYPE_SCHEMAS
+
+
+def test_schema_fields_have_required_keys():
+    for type_id, schema in settings_mod.CHANNEL_TYPE_SCHEMAS.items():
+        assert "label" in schema, f"{type_id} missing label"
+        assert "fields" in schema, f"{type_id} missing fields"
+        for f in schema["fields"]:
+            for k in ("key", "label", "type", "required"):
+                assert k in f, f"{type_id} field missing {k!r}"
+
+
+def test_secret_fields_use_secret_type():
+    """Known secret fields must be typed 'secret' so the UI masks them."""
+    secret_keys = {"token", "user_key", "api_key", "api_password",
+                   "smtp_password", "access_token"}
+    for type_id, schema in settings_mod.CHANNEL_TYPE_SCHEMAS.items():
+        for f in schema["fields"]:
+            if f["key"] in secret_keys:
+                assert f["type"] == "secret", (
+                    f"{type_id}.{f['key']} should be type 'secret'"
+                )
+
+
+def test_channel_labels_not_empty():
+    for type_id, schema in settings_mod.CHANNEL_TYPE_SCHEMAS.items():
+        assert schema["label"].strip(), f"{type_id} has empty label"
@@ -0,0 +1,602 @@
+import logging
+import time as time_mod
+from unittest.mock import AsyncMock, MagicMock, patch
+from urllib.parse import urlparse, parse_qs
+
+import pytest
+
+from hbd.server import oauth
+from hbd.server import users as users_mod
+from hbd.server.users import User
+
+
+CFG_OFF = {}
+CFG_ON = {
+    "oauth": {
+        "gitea": {
+            "url": "https://git.example.com",
+            "client_id": "cid",
+            "client_secret": "csec",
+        }
+    }
+}
+CFG_PARTIAL = {"oauth": {"gitea": {"url": "https://git.example.com"}}}
+
+
+@pytest.fixture(autouse=True)
+def clear_oauth_states():
+    oauth._states.clear()
+    yield
+    oauth._states.clear()
+
+
+@pytest.fixture(autouse=True)
+def reset_users_dict():
+    original = dict(users_mod.users)
+    yield
+    users_mod.users = original
+
+
+
+def test_make_state_returns_unique_tokens():
+    s1 = oauth.make_state()
+    s2 = oauth.make_state()
+    assert s1 != s2
+    assert len(s1) == 64  # 32 bytes hex
+
+
+def test_validate_state_valid():
+    state = oauth.make_state()
+    assert oauth.validate_state(state) is True
+
+
+def test_validate_state_consumed_on_use():
+    state = oauth.make_state()
+    oauth.validate_state(state)
+    assert oauth.validate_state(state) is False  # replay rejected
+
+
+def test_validate_state_unknown():
+    assert oauth.validate_state("notastate") is False
+
+
+def test_validate_state_expired(monkeypatch):
+    state = oauth.make_state()
+    # Wind expiry into the past
+    monkeypatch.setitem(oauth._states, state, time_mod.time() - 1000)
+    assert oauth.validate_state(state) is False
+
+
+def _reset_users(entries=None):
+    users_mod.users = entries or {}
+
+
+def test_provision_oauth_user_new():
+    _reset_users()
+    user = users_mod.provision_oauth_user("gituser", "Git User", "https://example.com/avatar.png")
+    assert user.username == "gituser"
+    assert user.full_name == "Git User"
+    assert user.avatar == "https://example.com/avatar.png"
+    assert user.admin is False
+    assert user.password_hash == ""
+    assert "gituser" in users_mod.users
+
+
+def test_provision_oauth_user_no_password_login():
+    _reset_users()
+    user = users_mod.provision_oauth_user("gituser", "Git User", "")
+    assert user.check_password("anything") is False
+
+
+def test_provision_oauth_user_existing_updates_profile():
+    existing = User(
+        username="alice",
+        full_name="Old Name",
+        avatar="old.png",
+        password_hash="pbkdf2:sha256:1:salt:abc",
+        admin=True,
+        notification_channels=["chan1"],
+    )
+    _reset_users({"alice": existing})
+    user = users_mod.provision_oauth_user("alice", "New Name", "new.png")
+    assert user.full_name == "New Name"
+    assert user.avatar == "new.png"
+    # Preserved
+    assert user.admin is True
+    assert user.password_hash == "pbkdf2:sha256:1:salt:abc"
+    assert user.notification_channels == ["chan1"]
+
+
+def test_provision_oauth_user_does_not_overwrite_with_empty():
+    existing = User(username="bob", full_name="Bob", avatar="bob.png")
+    _reset_users({"bob": existing})
+    user = users_mod.provision_oauth_user("bob", "", "")
+    assert user.full_name == "Bob"
+    assert user.avatar == "bob.png"
+
+
+def test_provision_oauth_user_survives_config_reload():
+    _reset_users()
+    users_mod.provision_oauth_user("oauthonly", "OAuth Only", "https://example.com/a.png")
+    assert "oauthonly" in users_mod.users
+    # Reload with empty config — OAuth user should survive
+    users_mod.load_users({})
+    assert "oauthonly" in users_mod.users
+
+
+
+# ---------------------------------------------------------------------------
+# Integration-style tests: callback logic chain
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_callback_invalid_state_rejects():
+    """Verify validate_state returns False for unknown state tokens."""
+    fake_state = "this-is-not-a-real-state"
+    assert oauth.validate_state(fake_state) is False
+
+
+@pytest.mark.asyncio
+async def test_full_oauth_flow_chain():
+    """Integration-style test: state → exchange → fetch → provision chain."""
+    p = _gitea_provider()
+    redirect_uri = "https://hbd.example.com/login/oauth/gitea/callback"
+
+    state = oauth.make_state()
+    assert oauth.validate_state(state) is True
+
+    mock_token_response = AsyncMock()
+    mock_token_response.status = 200
+    mock_token_response.json = AsyncMock(return_value={"access_token": "flow_token"})
+
+    mock_user_response = AsyncMock()
+    mock_user_response.status = 200
+    mock_user_response.json = AsyncMock(return_value={
+        "login": "flowuser",
+        "full_name": "Flow User",
+        "avatar_url": "https://git.example.com/avatars/flow.png",
+    })
+
+    mock_session = MagicMock()
+    mock_session.post = MagicMock(return_value=AsyncMock(
+        __aenter__=AsyncMock(return_value=mock_token_response),
+        __aexit__=AsyncMock(return_value=False),
+    ))
+    mock_session.get = MagicMock(return_value=AsyncMock(
+        __aenter__=AsyncMock(return_value=mock_user_response),
+        __aexit__=AsyncMock(return_value=False),
+    ))
+
+    with patch("hbd.server.oauth.aiohttp.ClientSession", return_value=AsyncMock(
+        __aenter__=AsyncMock(return_value=mock_session),
+        __aexit__=AsyncMock(return_value=False),
+    )):
+        token = await oauth.exchange_code(p, "authcode", redirect_uri)
+        profile = await oauth.fetch_user(p, token)
+
+    assert token == "flow_token"
+    assert profile["login"] == "flowuser"
+
+    _reset_users()
+    user = users_mod.provision_oauth_user(
+        profile["login"], profile["full_name"], profile["avatar_url"]
+    )
+    assert user.username == "flowuser"
+    assert user.check_password("anything") is False
+
+
+# ---------------------------------------------------------------------------
+# get_providers()
+# ---------------------------------------------------------------------------
+
+CFG_GITHUB = {
+    "oauth": {
+        "github": {"type": "github", "client_id": "ghid", "client_secret": "ghs"},
+    }
+}
+
+CFG_NEXTCLOUD = {
+    "oauth": {
+        "nc": {
+            "type": "nextcloud",
+            "url": "https://nc.example.com",
+            "client_id": "ncid",
+            "client_secret": "ncs",
+        }
+    }
+}
+
+CFG_MULTI = {
+    "oauth": {
+        "mygitea": {
+            "type": "gitea",
+            "url": "https://git.example.com",
+            "client_id": "cid",
+            "client_secret": "cs",
+            "label": "Work Gitea",
+            "logo": "https://example.com/logo.png",
+        },
+        "github": {"type": "github", "client_id": "ghid", "client_secret": "ghs"},
+        "nc": {
+            "type": "nextcloud",
+            "url": "https://nc.example.com",
+            "client_id": "ncid",
+            "client_secret": "ncs",
+        },
+    }
+}
+
+
+def test_get_providers_backward_compat_no_type_field():
+    """Old config without 'type' defaults to gitea."""
+    providers = oauth.get_providers(CFG_ON)
+    assert len(providers) == 1
+    p = providers[0]
+    assert p.name == "gitea"
+    assert p.type == "gitea"
+    assert p.label == "Gitea"
+    assert p.client_id == "cid"
+    assert p.authorize_url == "https://git.example.com/login/oauth/authorize"
+    assert p.token_url == "https://git.example.com/login/oauth/access_token"
+    assert p.profile_url == "https://git.example.com/api/v1/user"
+    assert p.scope == "user:email"
+    assert p.profile_data_path == []
+
+
+def test_get_providers_multiple():
+    providers = oauth.get_providers(CFG_MULTI)
+    assert len(providers) == 3
+    names = [p.name for p in providers]
+    assert "mygitea" in names
+    assert "github" in names
+    assert "nc" in names
+
+
+def test_get_providers_custom_label_and_logo():
+    providers = oauth.get_providers(CFG_MULTI)
+    gitea = next(p for p in providers if p.name == "mygitea")
+    assert gitea.label == "Work Gitea"
+    assert gitea.logo == "https://example.com/logo.png"
+
+
+def test_get_providers_github_default_label():
+    providers = oauth.get_providers(CFG_GITHUB)
+    assert providers[0].label == "GitHub"
+    assert providers[0].logo == ""
+
+
+def test_get_providers_github_fixed_urls():
+    providers = oauth.get_providers(CFG_GITHUB)
+    p = providers[0]
+    assert p.authorize_url == "https://github.com/login/oauth/authorize"
+    assert p.token_url == "https://github.com/login/oauth/access_token"
+    assert p.profile_url == "https://api.github.com/user"
+    assert p.scope == "read:user"
+
+
+def test_get_providers_nextcloud_urls_and_path():
+    providers = oauth.get_providers(CFG_NEXTCLOUD)
+    p = providers[0]
+    assert p.authorize_url == "https://nc.example.com/apps/oauth2/authorize"
+    assert p.token_url == "https://nc.example.com/apps/oauth2/api/v1/token"
+    assert p.profile_url == "https://nc.example.com/ocs/v2.php/cloud/user?format=json"
+    assert p.profile_data_path == ["ocs", "data"]
+    assert p.scope == ""
+
+
+def test_get_providers_skips_missing_client_id(caplog):
+    cfg = {"oauth": {"gitea": {"url": "https://git.example.com", "client_secret": "cs"}}}
+    with caplog.at_level(logging.WARNING, logger="hbd.server.oauth"):
+        result = oauth.get_providers(cfg)
+    assert result == []
+    assert "missing" in caplog.text.lower()
+
+
+def test_get_providers_skips_missing_client_secret(caplog):
+    cfg = {"oauth": {"gitea": {"url": "https://git.example.com", "client_id": "cid"}}}
+    with caplog.at_level(logging.WARNING, logger="hbd.server.oauth"):
+        result = oauth.get_providers(cfg)
+    assert result == []
+    assert "missing" in caplog.text.lower()
+
+
+def test_get_providers_skips_missing_url_for_gitea(caplog):
+    cfg = {"oauth": {"gitea": {"type": "gitea", "client_id": "cid", "client_secret": "cs"}}}
+    with caplog.at_level(logging.WARNING, logger="hbd.server.oauth"):
+        result = oauth.get_providers(cfg)
+    assert result == []
+    assert "url" in caplog.text.lower()
+
+
+def test_get_providers_skips_missing_url_for_nextcloud(caplog):
+    cfg = {"oauth": {"nc": {"type": "nextcloud", "client_id": "cid", "client_secret": "cs"}}}
+    with caplog.at_level(logging.WARNING, logger="hbd.server.oauth"):
+        result = oauth.get_providers(cfg)
+    assert result == []
+    assert "url" in caplog.text.lower()
+
+
+def test_get_providers_github_no_url_required():
+    providers = oauth.get_providers(CFG_GITHUB)
+    assert len(providers) == 1
+
+
+def test_get_providers_skips_unknown_type(caplog):
+    cfg = {"oauth": {"mystery": {"type": "saml", "client_id": "cid", "client_secret": "cs"}}}
+    import logging
+    with caplog.at_level(logging.WARNING, logger="hbd.server.oauth"):
+        result = oauth.get_providers(cfg)
+    assert result == []
+    assert "saml" in caplog.text
+
+
+def test_get_providers_empty_config():
+    assert oauth.get_providers({}) == []
+    assert oauth.get_providers(CFG_OFF) == []
+
+
+# ---------------------------------------------------------------------------
+# build_auth_url / exchange_code / fetch_user (generic, ResolvedProvider-based)
+# ---------------------------------------------------------------------------
+
+def _gitea_provider() -> oauth.ResolvedProvider:
+    return oauth.get_providers(CFG_ON)[0]
+
+
+def _github_provider() -> oauth.ResolvedProvider:
+    return oauth.get_providers(CFG_GITHUB)[0]
+
+
+def _nextcloud_provider() -> oauth.ResolvedProvider:
+    return oauth.get_providers(CFG_NEXTCLOUD)[0]
+
+
+def test_build_auth_url_gitea():
+    p = _gitea_provider()
+    url = oauth.build_auth_url(p, "teststate", "https://hbd.example.com/login/oauth/gitea/callback")
+    parsed = urlparse(url)
+    qs = parse_qs(parsed.query)
+    assert parsed.netloc == "git.example.com"
+    assert parsed.path == "/login/oauth/authorize"
+    assert qs["client_id"] == ["cid"]
+    assert qs["state"] == ["teststate"]
+    assert qs["scope"] == ["user:email"]
+    assert qs["response_type"] == ["code"]
+    assert qs["redirect_uri"] == ["https://hbd.example.com/login/oauth/gitea/callback"]
+
+
+def test_build_auth_url_github():
+    p = _github_provider()
+    url = oauth.build_auth_url(p, "st", "https://hbd.example.com/login/oauth/github/callback")
+    parsed = urlparse(url)
+    qs = parse_qs(parsed.query)
+    assert parsed.netloc == "github.com"
+    assert qs["scope"] == ["read:user"]
+
+
+def test_build_auth_url_nextcloud_no_scope_param():
+    """Nextcloud scope is empty — the 'scope' key must be absent from the URL."""
+    p = _nextcloud_provider()
+    url = oauth.build_auth_url(p, "st", "https://hbd.example.com/login/oauth/nc/callback")
+    qs = parse_qs(urlparse(url).query)
+    assert "scope" not in qs
+
+
+@pytest.mark.asyncio
+async def test_exchange_code_generic_returns_token():
+    p = _gitea_provider()
+    redirect_uri = "https://hbd.example.com/login/oauth/gitea/callback"
+    mock_response = AsyncMock()
+    mock_response.status = 200
+    mock_response.json = AsyncMock(return_value={"access_token": "tok123"})
+
+    mock_session = MagicMock()
+    mock_session.post = MagicMock(return_value=AsyncMock(
+        __aenter__=AsyncMock(return_value=mock_response),
+        __aexit__=AsyncMock(return_value=False),
+    ))
+
+    with patch("hbd.server.oauth.aiohttp.ClientSession", return_value=AsyncMock(
+        __aenter__=AsyncMock(return_value=mock_session),
+        __aexit__=AsyncMock(return_value=False),
+    )):
+        token = await oauth.exchange_code(p, "mycode", redirect_uri)
+    assert token == "tok123"
+
+
+@pytest.mark.asyncio
+async def test_exchange_code_sends_accept_json():
+    """Accept: application/json must be present for all providers (required by GitHub)."""
+    p = _github_provider()
+    captured_headers = {}
+
+    mock_response = AsyncMock()
+    mock_response.status = 200
+    mock_response.json = AsyncMock(return_value={"access_token": "ghtoken"})
+
+    mock_session = MagicMock()
+
+    def capture_post(url, **kwargs):
+        captured_headers.update(kwargs.get("headers", {}))
+        return AsyncMock(
+            __aenter__=AsyncMock(return_value=mock_response),
+            __aexit__=AsyncMock(return_value=False),
+        )
+
+    mock_session.post = capture_post
+
+    with patch("hbd.server.oauth.aiohttp.ClientSession", return_value=AsyncMock(
+        __aenter__=AsyncMock(return_value=mock_session),
+        __aexit__=AsyncMock(return_value=False),
+    )):
+        await oauth.exchange_code(p, "code", "https://hbd.example.com/login/oauth/github/callback")
+
+    assert captured_headers.get("Accept") == "application/json"
+
+
+@pytest.mark.asyncio
+async def test_exchange_code_raises_on_error_status():
+    p = _gitea_provider()
+    mock_response = AsyncMock()
+    mock_response.status = 401
+    mock_response.text = AsyncMock(return_value="unauthorized")
+
+    mock_session = MagicMock()
+    mock_session.post = MagicMock(return_value=AsyncMock(
+        __aenter__=AsyncMock(return_value=mock_response),
+        __aexit__=AsyncMock(return_value=False),
+    ))
+
+    with patch("hbd.server.oauth.aiohttp.ClientSession", return_value=AsyncMock(
+        __aenter__=AsyncMock(return_value=mock_session),
+        __aexit__=AsyncMock(return_value=False),
+    )):
+        with pytest.raises(oauth.OAuthError):
+            await oauth.exchange_code(p, "badcode", "https://hbd.example.com/login/oauth/gitea/callback")
+
+
+@pytest.mark.asyncio
+async def test_exchange_code_raises_when_no_access_token():
+    p = _gitea_provider()
+    mock_response = AsyncMock()
+    mock_response.status = 200
+    mock_response.json = AsyncMock(return_value={"error": "bad_request"})
+
+    mock_session = MagicMock()
+    mock_session.post = MagicMock(return_value=AsyncMock(
+        __aenter__=AsyncMock(return_value=mock_response),
+        __aexit__=AsyncMock(return_value=False),
+    ))
+
+    with patch("hbd.server.oauth.aiohttp.ClientSession", return_value=AsyncMock(
+        __aenter__=AsyncMock(return_value=mock_session),
+        __aexit__=AsyncMock(return_value=False),
+    )):
+        with pytest.raises(oauth.OAuthError):
+            await oauth.exchange_code(p, "mycode", "https://hbd.example.com/login/oauth/gitea/callback")
+
+
+@pytest.mark.asyncio
+async def test_fetch_user_gitea_returns_profile():
+    p = _gitea_provider()
+    mock_response = AsyncMock()
+    mock_response.status = 200
+    mock_response.json = AsyncMock(return_value={
+        "login": "alice",
+        "full_name": "Alice Smith",
+        "avatar_url": "https://git.example.com/avatars/alice.png",
+    })
+
+    mock_session = MagicMock()
+    mock_session.get = MagicMock(return_value=AsyncMock(
+        __aenter__=AsyncMock(return_value=mock_response),
+        __aexit__=AsyncMock(return_value=False),
+    ))
+
+    with patch("hbd.server.oauth.aiohttp.ClientSession", return_value=AsyncMock(
+        __aenter__=AsyncMock(return_value=mock_session),
+        __aexit__=AsyncMock(return_value=False),
+    )):
+        profile = await oauth.fetch_user(p, "tok123")
+
+    assert profile == {
+        "login": "alice",
+        "full_name": "Alice Smith",
+        "avatar_url": "https://git.example.com/avatars/alice.png",
+    }
+
+
+@pytest.mark.asyncio
+async def test_fetch_user_github_maps_name_field():
+    p = _github_provider()
+    mock_response = AsyncMock()
+    mock_response.status = 200
+    mock_response.json = AsyncMock(return_value={
+        "login": "bobgh",
+        "name": "Bob GitHub",
+        "avatar_url": "https://avatars.githubusercontent.com/u/1",
+    })
+
+    mock_session = MagicMock()
+    mock_session.get = MagicMock(return_value=AsyncMock(
+        __aenter__=AsyncMock(return_value=mock_response),
+        __aexit__=AsyncMock(return_value=False),
+    ))
+
+    with patch("hbd.server.oauth.aiohttp.ClientSession", return_value=AsyncMock(
+        __aenter__=AsyncMock(return_value=mock_session),
+        __aexit__=AsyncMock(return_value=False),
+    )):
+        profile = await oauth.fetch_user(p, "ghtoken")
+
+    assert profile["login"] == "bobgh"
+    assert profile["full_name"] == "Bob GitHub"
+    assert profile["avatar_url"] == "https://avatars.githubusercontent.com/u/1"
+
+
+@pytest.mark.asyncio
+async def test_fetch_user_nextcloud_nested_extraction():
+    """Nextcloud profile is nested under ocs.data; avatar is absent."""
+    p = _nextcloud_provider()
+    mock_response = AsyncMock()
+    mock_response.status = 200
+    mock_response.json = AsyncMock(return_value={
+        "ocs": {
+            "meta": {"status": "ok", "statuscode": 200},
+            "data": {
+                "id": "ncuser",
+                "display-name": "NC User",
+                "email": "nc@example.com",
+            },
+        }
+    })
+
+    mock_session = MagicMock()
+    mock_session.get = MagicMock(return_value=AsyncMock(
+        __aenter__=AsyncMock(return_value=mock_response),
+        __aexit__=AsyncMock(return_value=False),
+    ))
+
+    with patch("hbd.server.oauth.aiohttp.ClientSession", return_value=AsyncMock(
+        __aenter__=AsyncMock(return_value=mock_session),
+        __aexit__=AsyncMock(return_value=False),
+    )):
+        profile = await oauth.fetch_user(p, "nctoken")
+
+    assert profile["login"] == "ncuser"
+    assert profile["full_name"] == "NC User"
+    assert profile["avatar_url"] == ""  # Nextcloud has no avatar field
+
+
+@pytest.mark.asyncio
+async def test_fetch_user_raises_on_error_status():
+    p = _gitea_provider()
+    mock_response = AsyncMock()
+    mock_response.status = 401
+    mock_response.text = AsyncMock(return_value="unauthorized")
+
+    mock_session = MagicMock()
+    mock_session.get = MagicMock(return_value=AsyncMock(
+        __aenter__=AsyncMock(return_value=mock_response),
+        __aexit__=AsyncMock(return_value=False),
+    ))
+
+    with patch("hbd.server.oauth.aiohttp.ClientSession", return_value=AsyncMock(
+        __aenter__=AsyncMock(return_value=mock_session),
+        __aexit__=AsyncMock(return_value=False),
+    )):
+        with pytest.raises(oauth.OAuthError):
+            await oauth.fetch_user(p, "badtoken")
+
+
+def test_is_enabled_with_valid_provider():
+    assert oauth.is_enabled(CFG_ON) is True
+
+
+def test_is_enabled_false_when_no_providers():
+    assert oauth.is_enabled(CFG_OFF) is False
+
+
+def test_is_enabled_false_partial_config():
+    assert oauth.is_enabled(CFG_PARTIAL) is False
@@ -0,0 +1,114 @@
+import pytest
+from hbd.server import settings as settings_mod
+
+CFG = {
+    "hbd_port": 50004,
+    "interval": 20,
+    "grace": 2,
+    "users": {
+        "alice": {"full_name": "Alice Smith", "admin": True, "password": "pbkdf2:sha256:abc",
+                  "notification_channels": ["pushover_ops"]},
+    },
+    "oauth": {
+        "gitea": {"type": "gitea", "url": "https://git.example.com",
+                  "client_id": "cid", "client_secret": "csec", "label": "Sign in with Gitea"},
+    },
+    "notification_channels": {
+        "pushover_ops": {"type": "pushover", "token": "tok", "user": "usr"},
+    },
+    "hosts": {},
+}
+
+
+def test_sections_have_section_mode():
+    sections = settings_mod.get_settings_sections(CFG)
+    for s in sections:
+        assert "section_mode" in s, f"Section {s['id']} missing section_mode"
+        assert s["section_mode"] in ("form", "yaml", "channels", "hosts")
+
+
+def test_sections_have_api_section():
+    sections = settings_mod.get_settings_sections(CFG)
+    for s in sections:
+        assert "api_section" in s, f"Section {s['id']} missing api_section"
+
+
+def test_network_section_has_editable_fields():
+    sections = settings_mod.get_settings_sections(CFG)
+    network = next(s for s in sections if s["id"] == "network")
+    assert network["section_mode"] == "form"
+    assert network["api_section"] == "server"
+    editable = [f for f in network["fields"] if f["editable"]]
+    assert len(editable) >= 2  # hbd_port, ws_port at minimum
+
+
+def test_yaml_sections_have_correct_mode():
+    sections = settings_mod.get_settings_sections(CFG)
+    yaml_sections = {s["id"]: s for s in sections if s["section_mode"] == "yaml"}
+    assert "channels" not in yaml_sections  # now uses "channels" mode
+    assert "hosts" not in yaml_sections     # now uses "hosts" mode
+    assert "thresholds" in yaml_sections
+    assert "dns" in yaml_sections
+    assert yaml_sections["thresholds"]["api_section"] == "thresholds"
+    assert yaml_sections["dns"]["api_section"] == "dns"
+
+
+def test_hosts_section_uses_hosts_mode():
+    sections = settings_mod.get_settings_sections(CFG)
+    hosts_sec = next(s for s in sections if s["id"] == "hosts")
+    assert hosts_sec["section_mode"] == "hosts"
+    assert hosts_sec["api_section"] == "hosts"
+
+
+def test_channels_section_uses_channels_mode():
+    sections = settings_mod.get_settings_sections(CFG)
+    ch_sec = next(s for s in sections if s["id"] == "channels")
+    assert ch_sec["section_mode"] == "channels"
+    assert ch_sec["api_section"] == "notification_channels"
+    assert len(ch_sec["channels"]) == 1
+    ch = ch_sec["channels"][0]
+    assert ch["name"] == "pushover_ops"
+    assert ch["type"] == "pushover"
+    assert "owner" in ch
+    assert "private" in ch
+
+
+def test_channel_type_schemas_exported():
+    assert hasattr(settings_mod, "CHANNEL_TYPE_SCHEMAS")
+    for required_type in ("pushover", "email", "signal", "matrix", "sms_voipms"):
+        assert required_type in settings_mod.CHANNEL_TYPE_SCHEMAS
+        schema = settings_mod.CHANNEL_TYPE_SCHEMAS[required_type]
+        assert "label" in schema
+        assert "fields" in schema
+        for f in schema["fields"]:
+            assert "key" in f
+            assert "type" in f
+            assert "required" in f
+
+
+def test_oauth_section_exists():
+    sections = settings_mod.get_settings_sections(CFG)
+    oauth = next((s for s in sections if s["id"] == "oauth"), None)
+    assert oauth is not None
+    assert oauth["section_mode"] == "form"
+    assert oauth["api_section"] == "oauth"
+    assert len(oauth["providers"]) == 1
+    assert oauth["providers"][0]["name"] == "gitea"
+    assert oauth["providers"][0]["client_secret"] == "•••"
+
+
+def test_all_channel_names_returned():
+    result = settings_mod.get_settings_data(CFG)
+    assert "all_channel_names" in result
+    assert "pushover_ops" in result["all_channel_names"]
+
+
+def test_users_section_has_user_list():
+    sections = settings_mod.get_settings_sections(CFG)
+    users_sec = next(s for s in sections if s["id"] == "users")
+    assert users_sec["section_mode"] == "form"
+    assert users_sec["api_section"] == "users"
+    assert len(users_sec["users"]) == 1
+    assert users_sec["users"][0]["username"] == "alice"
+    # Password hash never exposed
+    assert "password" not in users_sec["users"][0]
@@ -1,9 +1,8 @@
 [tox]
 envlist = py, lint, mypy
-skipsdist = True

 [testenv]
-deps = -rrequirements-dev.txt
+extras = dev
 commands =
    pytest -q