Compare commits

..

12 Commits

Author SHA1 Message Date
Andreas Wrede e0443293e9 Merge branch 'master' of git.wrede.ca:andreas/heartbeat
Release / release (push) Successful in 44s
2026-06-06 08:31:26 -04:00
Andreas Wrede 39670f4e63 version 5.3.10 2026-06-06 08:28:43 -04:00
Andreas Wrede 2e88ee2269 feat: clear stale plugin data and persist OAuth users to config
- hbdclass: add per-plugin stale timers; clear history and alerts after
  3× heartbeat interval with no PLG data received
- udp: wire stale timer on every PLG message via _make_plugin_stale_callback
- http: persist new OAuth users to config file on first login

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 08:27:20 -04:00
andreas 2ef7d473c3 Merge pull request 'hbc_mini.c: make it compile on NetBSD' (#1) from woods/heartbeat:master into master
Merge pull request: hbc_mini.c: make it compile on NetBSD
2026-06-03 12:05:29 -04:00
woods 862a9cdea0 hbc_mini.c: make it work on NetBSD
This fixes the numbers by using the correct MIB to match the struct.
2026-06-02 13:42:11 -07:00
woods 9351938b15 hbc_mini.c: make it compile on NetBSD
Use the public "struct uvmexp_sysctl" instead of "struct uvmexp".

The numbers from the memory_monitor are wonky, but it builds and runs.
2026-06-02 12:05:42 -07:00
andreas b6ef2fe065 Merge branch 'master' of git.wrede.ca:andreas/heartbeat
sequencing
2026-06-02 08:01:47 -04:00
andreas d5d2f066b3 fix: don't use pusbover title 2026-06-02 08:01:32 -04:00
Andreas Wrede d9563392c3 fix: remove bak file in bumpminor.sh 2026-06-01 08:34:07 -04:00
andreas 5f090b9d96 feat: auto-scale CPU history graph Y axis
Y axis now fits the actual data range with 10% padding rather than
fixed 0-100%. Grid lines use nice tick steps (1/2/5/10 × magnitude).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-01 07:59:54 -04:00
andreas 3cc1d92eb4 Merge branch 'master' of git.wrede.ca:andreas/heartbeat 2026-06-01 07:56:02 -04:00
andreas 2ddba203df feat: add CPU usage history graph to CPU Monitor section
Renders an SVG line chart above the CPU Usage row using all available
history samples (up to 100). Color adapts green/orange/red by load level.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-01 07:55:55 -04:00
12 changed files with 186 additions and 11 deletions
+12
View File
@@ -2,6 +2,18 @@
All notable changes to this project are documented here, organized by release.
## [5.3.10]
### Added
- clear stale plugin data and persist OAuth users to config
- auto-scale CPU history graph Y axis
- add CPU usage history graph to CPU Monitor section
### Fixed
- remove bak file in bumpminor.sh
---
## [5.3.9]
### Added
+1 -1
View File
@@ -20,7 +20,7 @@ A lightweight UDP-based host monitoring system. Monitored hosts run a client (`h
└────────────────────┘ └────────────────────────────┘
```
**Package:** `hbd` v5.3.9
**Package:** `hbd` v5.3.10
**Python:** 3.11+
### Subpackages
+1 -1
View File
@@ -14,4 +14,4 @@ Install options:
"""
__all__ = ["__version__"]
__version__ = "5.3.9"
__version__ = "5.3.10"
+32
View File
@@ -297,6 +297,8 @@ class Host:
self.plugin_retention = 100 # Keep last N samples per plugin
# Alert state tracking: {metric_path: AlertState}
self.alert_states = {}
# Stale-data timers: {plugin_name: asyncio.TimerHandle}
self.plugin_timers = {}
# User access control
self.owner: str | None = None # username of owner
self.managers: list = [] # usernames with manager role
@@ -483,6 +485,8 @@ class Host:
self.managers = []
if not hasattr(self, "monitors"):
self.monitors = []
if not hasattr(self, "plugin_timers"):
self.plugin_timers = {}
pass
@@ -542,6 +546,34 @@ class Host:
"""
return self.plugin_data
def reset_plugin_timer(self, plugin_name, timeout_seconds, callback):
"""Reset the stale-data timer for a plugin.
If no new PLG data arrives within timeout_seconds, callback(host, plugin_name)
is called so the caller can clear history and alerts.
"""
import asyncio
existing = self.plugin_timers.get(plugin_name)
if existing and not existing.cancelled():
existing.cancel()
async def _fire():
await callback(self, plugin_name)
try:
loop = asyncio.get_event_loop()
self.plugin_timers[plugin_name] = loop.call_later(
timeout_seconds, lambda: asyncio.create_task(_fire())
)
except RuntimeError:
pass
def cancel_plugin_timer(self, plugin_name):
"""Cancel the stale timer for a plugin, if any."""
handle = self.plugin_timers.pop(plugin_name, None)
if handle and not handle.cancelled():
handle.cancel()
# ------------------------------------------------------------------
# User-role helpers
# ------------------------------------------------------------------
+17
View File
@@ -1182,6 +1182,23 @@ async def start(
profile["full_name"],
profile["avatar_url"],
)
# Persist new OAuth users to the config file so they survive restarts.
# Only write when the user isn't already in the config's users section.
if _config_path and not (config.get("users") or {}).get(user.username):
try:
disk_data = configio_mod.read_roundtrip(_config_path)
if not disk_data.get("users"):
disk_data["users"] = {}
disk_data["users"][user.username] = {
k: v for k, v in [
("full_name", user.full_name),
("avatar", user.avatar),
] if v
}
configio_mod.write_config(_config_path, disk_data)
logger.info("Persisted OAuth user %r to config", user.username)
except Exception as exc:
logger.warning("Failed to persist OAuth user %r to config: %s", user.username, exc)
session_token = users_mod.create_session(user.username)
eventlog("hbd", "INFO", f"Login: {user.username} via {provider.type}")
resp = web.HTTPFound("/")
+3 -1
View File
@@ -140,7 +140,9 @@ def _send_pushover(channel_cfg: dict, notif: Notification) -> bool:
if not token or not user:
logger.warning("pushover: missing token or user")
return False
params: dict = {"token": token, "user": user, "title": notif.title, "message": notif.body}
body = "%s: %s" % (notif.title, notif.body)
title = ""
params: dict = {"token": token, "user": user, "title": title, "message": body}
if channel_cfg.get("sound"):
params["sound"] = channel_cfg["sound"]
if notif.url:
+93 -3
View File
@@ -914,7 +914,7 @@
let html = '';
switch (pluginName) {
case 'os_info': html = renderOsInfoTable(cached.data); break;
case 'cpu_monitor': html = renderCpuTable(cached.data); break;
case 'cpu_monitor': html = renderCpuTable(hostname, cached.data); break;
case 'memory_monitor': html = renderMemoryTable(cached.data); break;
case 'disk_monitor': html = renderDiskTables(cached.data); break;
case 'network_monitor':html = renderNetworkTables(cached.data); break;
@@ -926,6 +926,10 @@
html += `<div class="timestamp">Last updated: ${new Date(cached.timestamp * 1000).toLocaleString()}</div>`;
body.innerHTML = html;
if (pluginName === 'cpu_monitor') {
fetchCpuHistory(hostname).then(samples => renderCpuChart(hostname, samples)).catch(() => {});
}
}
// ── Per-plugin renderers ────────────────────────────────────────────────
@@ -948,7 +952,92 @@
return html;
}
function renderCpuTable(d) {
async function fetchCpuHistory(hostname) {
const r = await fetch(`/api/0/hosts/${encodeURIComponent(hostname)}/plugins/cpu_monitor?limit=100`);
if (!r.ok) return [];
const json = await r.json();
return json.samples || [];
}
function renderCpuChart(hostname, samples) {
const el = document.getElementById(`cpu-chart-${hostname}`);
if (!el || !samples.length) return;
const pts = samples
.filter(s => s.data.cpu_percent != null)
.map(s => ({ t: s.timestamp, v: s.data.cpu_percent }));
if (pts.length < 2) { el.style.display = 'none'; return; }
const W = 600, H = 80, PAD = { top: 6, right: 8, bottom: 18, left: 28 };
const cW = W - PAD.left - PAD.right;
const cH = H - PAD.top - PAD.bottom;
const tMin = pts[0].t, tMax = pts[pts.length - 1].t;
const tRange = tMax - tMin || 1;
const x = t => PAD.left + ((t - tMin) / tRange) * cW;
// Auto-scale Y axis with 10% padding, clamped to [0, 100]
const vMin = Math.min(...pts.map(p => p.v));
const vMax = Math.max(...pts.map(p => p.v));
const vRange = vMax - vMin || 1;
const vPad = Math.max(vRange * 0.1, 1);
const yLow = Math.max(0, vMin - vPad);
const yHigh = Math.min(100, vMax + vPad);
const yRange = yHigh - yLow || 1;
const y = v => PAD.top + cH - ((v - yLow) / yRange) * cH;
// Build polyline points and filled area path
const linePoints = pts.map(p => `${x(p.t).toFixed(1)},${y(p.v).toFixed(1)}`).join(' ');
const areaPath = `M${x(pts[0].t).toFixed(1)},${(PAD.top + cH).toFixed(1)} ` +
pts.map(p => `L${x(p.t).toFixed(1)},${y(p.v).toFixed(1)}`).join(' ') +
` L${x(pts[pts.length-1].t).toFixed(1)},${(PAD.top + cH).toFixed(1)} Z`;
// Color based on latest absolute CPU %
const latest = pts[pts.length - 1].v;
const strokeColor = latest > 90 ? '#e53935' : latest > 70 ? '#fb8c00' : '#43a047';
const fillColor = latest > 90 ? '#ffcdd2' : latest > 70 ? '#ffe0b2' : '#c8e6c9';
// Compute nice tick step for ~3-5 grid lines
const rawStep = yRange / 4;
const mag = Math.pow(10, Math.floor(Math.log10(rawStep || 1)));
const niceStep = [1, 2, 5, 10].map(f => f * mag).find(s => yRange / s <= 5) || mag * 10;
const tickStart = Math.ceil(yLow / niceStep) * niceStep;
let gridLines = '';
for (let v = tickStart; v <= yHigh + 0.001; v += niceStep) {
const yy = y(v).toFixed(1);
const label = Number.isInteger(v) ? v : v.toFixed(1);
gridLines += `<line x1="${PAD.left}" y1="${yy}" x2="${PAD.left + cW}" y2="${yy}" stroke="#e0e0e0" stroke-width="1"/>`;
gridLines += `<text x="${(PAD.left - 3).toFixed(1)}" y="${yy}" text-anchor="end" dominant-baseline="middle" font-size="8" fill="#999">${label}</text>`;
}
// X-axis time labels
const fmt = ts => {
const d = new Date(ts * 1000);
return d.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' });
};
const xLabels = `
<text x="${PAD.left}" y="${H - 2}" text-anchor="start" font-size="8" fill="#999">${fmt(pts[0].t)}</text>
<text x="${PAD.left + cW}" y="${H - 2}" text-anchor="end" font-size="8" fill="#999">${fmt(pts[pts.length-1].t)}</text>`;
el.innerHTML = `<svg viewBox="0 0 ${W} ${H}" preserveAspectRatio="none"
style="width:100%;height:${H}px;display:block;">
<defs>
<clipPath id="cpu-clip-${hostname}">
<rect x="${PAD.left}" y="${PAD.top}" width="${cW}" height="${cH}"/>
</clipPath>
</defs>
${gridLines}
<line x1="${PAD.left}" y1="${PAD.top}" x2="${PAD.left}" y2="${PAD.top + cH}" stroke="#ccc" stroke-width="1"/>
<line x1="${PAD.left}" y1="${PAD.top + cH}" x2="${PAD.left + cW}" y2="${PAD.top + cH}" stroke="#ccc" stroke-width="1"/>
<g clip-path="url(#cpu-clip-${hostname})">
<path d="${areaPath}" fill="${fillColor}" opacity="0.6"/>
<polyline points="${linePoints}" fill="none" stroke="${strokeColor}" stroke-width="1.5" stroke-linejoin="round"/>
</g>
${xLabels}
</svg>`;
}
function renderCpuTable(hostname, d) {
const KEYS = [
['cpu_percent', 'CPU Usage', 'bar'],
['load_1min', 'Load (1 min)', 'num'],
@@ -966,7 +1055,8 @@
];
const handled = new Set(KEYS.map(r => r[0]));
let html = '<table class="data-table"><thead><tr><th>Metric</th><th>Value</th></tr></thead><tbody>';
let html = `<div id="cpu-chart-${hostname}" style="margin-bottom:8px;"></div>`;
html += '<table class="data-table"><thead><tr><th>Metric</th><th>Value</th></tr></thead><tbody>';
for (const [k, label, fmt] of KEYS) {
if (!(k in d)) continue;
const v = d[k];
+21
View File
@@ -232,6 +232,23 @@ def _make_timer_callbacks(uname, host, ctx):
return on_overdue, on_unknown
def _make_plugin_stale_callback(uname, ctx):
"""Return an async callback that clears stale plugin data and its alerts."""
msg_to_websockets = ctx.get("msg_to_websockets")
async def on_plugin_stale(host, plugin_name):
host.plugin_data.pop(plugin_name, None)
stale_keys = [k for k in host.alert_states if k.startswith(f"{plugin_name}.")]
for k in stale_keys:
del host.alert_states[k]
eventlog(uname, "INFO", f"plugin data stale: {plugin_name}")
if msg_to_websockets:
msg_to_websockets("plugin_stale", {"host": uname, "plugin": plugin_name})
msg_to_websockets("host", host.stateinfo())
return on_plugin_stale
def restore_connection_timers(hbdclass, ctx):
"""Restore overdue timers for all loaded connections after a pickle restore.
@@ -372,6 +389,10 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
if k not in ("ID", "plugin", "id", "name")}
# Store plugin data with timestamp
host.add_plugin_data(plugin_name, plugin_data, timestamp=now)
# Reset stale timer — 3× the heartbeat interval (min 60 s)
stale_timeout = max(host.interval * 3, 60)
host.reset_plugin_timer(plugin_name, stale_timeout,
_make_plugin_stale_callback(uname, ctx))
# If os_info reports an owner and none is configured server-side, apply it
if plugin_name == "os_info":
+1 -1
View File
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "hbd"
version = "5.3.9"
version = "5.3.10"
description = "Heartbeat monitoring system — client (hbc) and server (hbd)"
readme = "README.md"
requires-python = ">=3.11"
+1
View File
@@ -29,3 +29,4 @@ git push --tags
rm hbd/__init__.py.bak
rm scripts/hbc_mini.py.bak
rm README.md.bak
+3 -3
View File
@@ -789,7 +789,7 @@ static void plugin_cpu_monitor(conn_t *c, const config_t *cfg) {
* Plugin: memory_monitor
* Linux: /proc/meminfo
* FreeBSD: sysctl vm.stats.vm.*
* NetBSD: sysctl vm.uvmexp (struct uvmexp)
* NetBSD: sysctl vm.uvmexp (struct uvmexp_sysctl)
* ============================================================ */
/* emit the common kvdict fields and send */
@@ -896,9 +896,9 @@ static void plugin_memory_monitor(conn_t *c, const config_t *cfg) {
static void plugin_memory_monitor(conn_t *c, const config_t *cfg) {
(void)cfg;
struct uvmexp uvm;
struct uvmexp_sysctl uvm;
size_t len = sizeof(uvm);
int mib[2] = {CTL_VM, VM_UVMEXP};
int mib[2] = {CTL_VM, VM_UVMEXP2};
if (sysctl(mib, 2, &uvm, &len, NULL, 0) != 0) return;
long long ps = uvm.pagesize;
+1 -1
View File
@@ -41,7 +41,7 @@ from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
# updated by scripts/bumpminor.sh
__version__ = "5.3.9"
__version__ = "5.3.10"
# ---------------------------------------------------------------------------
# Protocol (mirrors hbd/common/proto.py)