From 018409e71dae3a373f8395c442961a2b923d6461 Mon Sep 17 00:00:00 2001
From: Andreas Wrede <andreas@wrede.ca>
Date: Tue, 5 May 2026 13:45:43 -0400
Subject: [PATCH] docs: correct README inaccuracies found during code audit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add ping_monitor to built-in plugins list
- Update cpu_monitor (uptime) and memory_monitor (ZFS ARC) descriptions
- Replace "aggregated status" bullet with accurate per-check reporting note
- Fix RTT hysteresis default: 0.1 → 0.02
- Fix client YAML config: remove non-existent server:/port: keys, use hb_port:
- Fix nagios_runner commands format: plain strings → {name:, command:} dicts
- Fix Supported Metrics: exit_code → actual <name>_status_code/<name>_status/<name>_output fields

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 README.md | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)
diff --git a/README.md b/README.md
index 5308a96..9258bbc 100644
--- a/README.md
+++ b/README.md
@@ -58,10 +58,11 @@ Heartbeat includes a comprehensive plugin architecture that extends monitoring b
 ### Built-in Plugins
 
 - `os_info`: Collects OS, kernel, distribution, and architecture information
-- `cpu_monitor`: Monitors CPU usage, load average, frequency, and process counts
-- `memory_monitor`: Monitors RAM and swap usage, available memory
+- `cpu_monitor`: Monitors CPU usage, load average, frequency, process counts, and uptime
+- `memory_monitor`: Monitors RAM and swap usage, available memory (ZFS ARC-aware)
 - `disk_monitor`: Monitors disk usage, I/O statistics, and filesystem metrics
 - `network_monitor`: Monitors network interface statistics, bandwidth, and connections
+- `ping_monitor`: Measures round-trip latency to configured hosts
 - `filesystem_info`: Collects mounted filesystem information (physical filesystems only by default)
 - `nagios_runner`: Executes Nagios monitoring plugins (check_disk, check_load, check_http, etc.)
 - `zfs_monitor`: Monitors ZFS pool health, capacity, fragmentation, dedup ratio, and cumulative I/O via `zpool(8)`
@@ -76,7 +77,7 @@ The `nagios_runner` plugin provides seamless integration with the vast Nagios pl
 - Validates absolute command paths at startup and warns on missing or non-executable files
 - Parses exit codes (OK/WARNING/CRITICAL/UNKNOWN)
 - Extracts performance data with thresholds
-- Reports aggregated status across all configured checks
+- Reports per-check status, exit code, and output; no aggregate rollup field
 
 See [docs/NAGIOS_INTEGRATION.md](docs/NAGIOS_INTEGRATION.md) for complete integration guide including configuration examples and custom plugin development.
 
@@ -224,7 +225,7 @@ thresholds:
     <hostname>:
       warning: <milliseconds>   # Warn when RTT > this value
       critical: <milliseconds>  # Critical when RTT > this value
-      hysteresis: 0.1           # Optional: 10% hysteresis (default)
+      hysteresis: 0.02          # Optional: 2% hysteresis (default)
 ```
 
 **Example alerts:**
@@ -275,7 +276,7 @@ All plugin metrics can be thresholded:
 - **Memory**: percent, available_mb, swap_percent
 - **Disk**: Per-partition percent, free_gb, free_mb
 - **Network**: errors_total, dropped packets, connection counts
-- **Nagios**: Any field emitted by `nagios_runner` (status_code, exit_code, performance data, …)
+- **Nagios**: Any field emitted by `nagios_runner` (`<name>_status_code`, `<name>_status`, `<name>_output`, performance data fields)
 
 ### Display Format Templates
 
@@ -514,12 +515,11 @@ You can also run it via the module entrypoint:
 python -m hbd.client.main your-server.example.com
 ```
 
-Client configuration can also be specified in YAML:
+Client configuration can also be specified in YAML (`~/.hbc.yaml`):
 
 ```yaml
-server: hbd.example.com
-port: 50003
-interval: 30
+hb_port: 50003        # Server port (default: 50003)
+interval: 30          # Heartbeat interval in seconds
 plugins:
   cpu_monitor:
     interval: 300      # Check every 5 minutes (default)
@@ -533,10 +533,14 @@ plugins:
   nagios_runner:
     interval: 300      # Check every 5 minutes (default)
     commands:
-      - /usr/lib/nagios/plugins/check_load -w 5,4,3 -c 10,8,6
-      - /usr/lib/nagios/plugins/check_disk -w 20% -c 10% -p /
+      - name: check_load
+        command: /usr/lib/nagios/plugins/check_load -w 5,4,3 -c 10,8,6
+      - name: check_disk
+        command: /usr/lib/nagios/plugins/check_disk -w 20% -c 10% -p /
 ```
 
+The server hostname is always passed as a positional command-line argument; there is no `server:` config key.
+
 All monitoring plugins default to 5-minute (300 second) intervals, but can be customized as needed.
 
 **Connection retry:** If a server is temporarily unreachable, `hbc` retries `open()` indefinitely on every heartbeat interval. IPv6 connections that never succeeded during early startup are dropped after 3 consecutive failures (to handle hosts without IPv6 routing), while IPv4 connections always retry.