Compare commits

...

4 Commits

Author SHA1 Message Date
Andreas Wrede c4f09e9ced version 5.1.8
Release / release (push) Successful in 5s
- fix: matrix/sms_voipms notifications blocked the event loop on timeout;
  make send_notification async, dispatch all channel drivers as non-blocking
  tasks (asyncio.to_thread for sync drivers, asyncio.wait_for for async);
  update all call sites to fire-and-forget via create_task
- feat: add /about page with version, runtime, uptime counter, and repo link
- fix: hbc_mini plugin data format now matches full hbc client so Host
  Overview displays memory, disk, and network metrics correctly

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
2026-05-01 05:33:27 -04:00
Andreas Wrede 64710fd4cd tweak h1 margins 2026-05-01 04:51:11 -04:00
Andreas Wrede 1f5e7465a3 fix nav bar position 2026-05-01 04:32:04 -04:00
Andreas Wrede b290b21e23 track hbc type and version 2026-04-30 18:22:35 -04:00
16 changed files with 362 additions and 127 deletions
+1 -1
View File
@@ -14,4 +14,4 @@ Install options:
""" """
__all__ = ["__version__"] __all__ = ["__version__"]
__version__ = "5.1.7" __version__ = "5.1.8"
+1
View File
@@ -60,6 +60,7 @@ class OSInfoPlugin(InfoPlugin):
"python_version": platform.python_version(), "python_version": platform.python_version(),
"python_implementation": platform.python_implementation(), "python_implementation": platform.python_implementation(),
"hbc_version": hbc_version, "hbc_version": hbc_version,
"hbc_type": "full",
} }
# Add Linux-specific distribution info # Add Linux-specific distribution info
+5 -6
View File
@@ -144,17 +144,16 @@ def cmd_notify(args):
url=f"{base_url}/plugins" if base_url else "", url=f"{base_url}/plugins" if base_url else "",
) )
# Bypass min_level for explicit test sends; run async channels directly
import asyncio import asyncio
from .notify import _send_matrix_async, _send_sms_voipms_async, _DRIVERS
ch_type = channel_cfg.get("type", "") ch_type = channel_cfg.get("type", "")
print(f"Sending via {args.channel} ({ch_type}): {title}{args.message}") print(f"Sending via {args.channel} ({ch_type}): {title}{args.message}")
if ch_type in ("matrix", "sms_voipms"): if ch_type == "matrix":
from .notify import _send_matrix_async, _send_sms_voipms_async ok = asyncio.run(_send_matrix_async(channel_cfg, notif))
driver_async = _send_matrix_async if ch_type == "matrix" else _send_sms_voipms_async elif ch_type == "sms_voipms":
ok = asyncio.run(driver_async(channel_cfg, notif)) ok = asyncio.run(_send_sms_voipms_async(channel_cfg, notif))
else: else:
from .notify import _DRIVERS
driver = _DRIVERS.get(ch_type) driver = _DRIVERS.get(ch_type)
if driver is None: if driver is None:
print(f"Error: unknown channel type '{ch_type}'", file=sys.stderr) print(f"Error: unknown channel type '{ch_type}'", file=sys.stderr)
+48
View File
@@ -1,7 +1,11 @@
"""HTTP server implementation using aiohttp and jinja2.""" """HTTP server implementation using aiohttp and jinja2."""
import asyncio import asyncio
import datetime
import json import json
import platform
import socket
import sys
import time import time
import urllib.parse import urllib.parse
import os import os
@@ -111,6 +115,7 @@ async def start(
This function is intended to be awaited inside the main asyncio event loop. This function is intended to be awaited inside the main asyncio event loop.
""" """
get_now = get_now or (lambda: time.time()) get_now = get_now or (lambda: time.time())
_start_epoch = time.time()
async def old_index(request): async def old_index(request):
_require_auth_redirect(request) _require_auth_redirect(request)
@@ -806,6 +811,48 @@ async def start(
) )
return web.Response(text=body, content_type="text/html") return web.Response(text=body, content_type="text/html")
# -------------------------------------------------------------------------
# About page
# -------------------------------------------------------------------------
async def about_page(request):
"""GET /about — version, runtime, and project information."""
current_user, _ = _require_auth_redirect(request)
pkg_dir = os.path.dirname(__file__)
templates_dir = config.get("templates_dir", os.path.join(pkg_dir, "templates"))
env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir))
from hbd import __version__ as hbd_version
uptime_secs = int(time.time() - _start_epoch)
days, rem = divmod(uptime_secs, 86400)
hours, rem = divmod(rem, 3600)
mins, secs = divmod(rem, 60)
if days:
uptime_str = f"{days}d {hours}h {mins}m"
elif hours:
uptime_str = f"{hours}h {mins}m {secs}s"
else:
uptime_str = f"{mins}m {secs}s"
start_dt = datetime.datetime.fromtimestamp(_start_epoch)
start_time_str = start_dt.strftime("%Y-%m-%d %H:%M:%S")
tmpl = env.get_template("about.html")
body = tmpl.render(
title="About - Heartbeat",
header="About",
hbd_version=hbd_version,
python_version=f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro} ({platform.python_implementation()})",
server_hostname=socket.gethostname(),
start_epoch=int(_start_epoch),
start_time_str=start_time_str,
uptime_str=uptime_str,
host_count=len(hbdclass.Host.hosts),
current_user=current_user.to_dict() if current_user else None,
active_page="about",
)
return web.Response(text=body, content_type="text/html")
# ------------------------------------------------------------------------- # -------------------------------------------------------------------------
# Settings page (admin only) # Settings page (admin only)
# ------------------------------------------------------------------------- # -------------------------------------------------------------------------
@@ -859,6 +906,7 @@ async def start(
web.get("/live", live), web.get("/live", live),
web.get("/plugins", plugins_page), web.get("/plugins", plugins_page),
web.get("/alerts", alerts_page), web.get("/alerts", alerts_page),
web.get("/about", about_page),
web.get("/profile", profile_page), web.get("/profile", profile_page),
web.get("/settings", settings_page), web.get("/settings", settings_page),
web.get("/static/{path:.*}", static), web.get("/static/{path:.*}", static),
+29 -57
View File
@@ -15,7 +15,6 @@ their own ``notification_channels`` list. When no users are configured the
server runs silently (no notifications sent). server runs silently (no notifications sent).
""" """
import asyncio
import asyncio import asyncio
import logging import logging
import smtplib import smtplib
@@ -30,13 +29,10 @@ from . import ws as ws_mod
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logger = logging.getLogger(__name__)
msg_to_websockets = ws_mod.broadcast msg_to_websockets = ws_mod.broadcast
# Module-level state set via setup() # Module-level state set via setup()
_config: dict = {} _config: dict = {}
_loop: Optional[asyncio.AbstractEventLoop] = None
# Tracks which channels fired a WARNING/CRITICAL per host. # Tracks which channels fired a WARNING/CRITICAL per host.
# {host_name: set of channel_names} — used to route RECOVER to the same channels. # {host_name: set of channel_names} — used to route RECOVER to the same channels.
@@ -73,11 +69,9 @@ class Notification:
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def setup(cfg: dict, loop: Optional[asyncio.AbstractEventLoop] = None): def setup(cfg: dict, loop: Optional[asyncio.AbstractEventLoop] = None):
"""Initialize notifier from configuration dict and event loop.""" """Initialize notifier from configuration dict."""
global _config, _loop global _config
_config = dict(cfg) _config = dict(cfg)
if loop is not None:
_loop = loop
def reload_config(cfg: dict): def reload_config(cfg: dict):
@@ -299,17 +293,6 @@ async def _send_sms_voipms_async(channel_cfg: dict, notif: Notification) -> bool
return False return False
def _send_sms_voipms(channel_cfg: dict, notif: Notification) -> bool:
"""Dispatch voip.ms SMS send onto the shared event loop."""
if _loop is None:
logger.warning("sms_voipms: event loop not available")
return False
future = asyncio.run_coroutine_threadsafe(_send_sms_voipms_async(channel_cfg, notif), _loop)
try:
return future.result(timeout=15)
except Exception as e:
logger.error("sms_voipms send timed out or failed: %s", e)
return False
async def _send_matrix_async(channel_cfg: dict, notif: Notification) -> bool: async def _send_matrix_async(channel_cfg: dict, notif: Notification) -> bool:
@@ -357,40 +340,23 @@ async def _send_matrix_async(channel_cfg: dict, notif: Notification) -> bool:
await client.close() await client.close()
def _send_matrix(channel_cfg: dict, notif: Notification) -> bool:
"""Dispatch matrix send onto the shared event loop."""
if _loop is None:
logger.warning("matrix: event loop not available")
return False
future = asyncio.run_coroutine_threadsafe(_send_matrix_async(channel_cfg, notif), _loop)
try:
return future.result(timeout=15)
except Exception as e:
logger.error("matrix send timed out or failed: %s", e)
return False
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Channel dispatcher # Channel dispatcher (all async — sync drivers run in a thread executor)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Sync drivers kept for `hbd notify` CLI usage (asyncio.run wraps them there).
_DRIVERS = { _DRIVERS = {
"pushover": _send_pushover, "pushover": _send_pushover,
"email": _send_email, "email": _send_email,
"mattermost": _send_mattermost, "mattermost": _send_mattermost,
"signal": _send_signal, "signal": _send_signal,
"sms_voipms": _send_sms_voipms,
"matrix": _send_matrix,
} }
_TIMEOUT = 15 # seconds per channel send
def _dispatch_to_channel(channel_name: str, channel_cfg: dict, notif: Notification) -> bool:
"""Send *notif* to a single named channel, honouring min_level.
RECOVER always bypasses min_level — a recovery is always relevant if the async def _dispatch_to_channel(channel_name: str, channel_cfg: dict, notif: Notification) -> bool:
channel was configured for any alerting (handles the restart-then-recover case """Send *notif* to a single named channel, honouring min_level."""
where _alerted_channels is empty and we fall through to the normal loop).
"""
level = notif.level.upper() level = notif.level.upper()
if level != "RECOVER": if level != "RECOVER":
min_level = channel_cfg.get("min_level", "WARNING").upper() min_level = channel_cfg.get("min_level", "WARNING").upper()
@@ -398,14 +364,24 @@ def _dispatch_to_channel(channel_name: str, channel_cfg: dict, notif: Notificati
logger.debug( logger.debug(
"channel '%s': skipping level %s (min_level=%s)", channel_name, level, min_level "channel '%s': skipping level %s (min_level=%s)", channel_name, level, min_level
) )
return True # not an error — filtered intentionally return True # filtered intentionally
ch_type = channel_cfg.get("type", "") ch_type = channel_cfg.get("type", "")
driver = _DRIVERS.get(ch_type) try:
if driver is None: if ch_type == "matrix":
logger.warning("unknown channel type '%s' for channel '%s'", ch_type, channel_name) return await asyncio.wait_for(_send_matrix_async(channel_cfg, notif), timeout=_TIMEOUT)
if ch_type == "sms_voipms":
return await asyncio.wait_for(_send_sms_voipms_async(channel_cfg, notif), timeout=_TIMEOUT)
sync_driver = _DRIVERS.get(ch_type)
if sync_driver is None:
logger.warning("unknown channel type '%s' for channel '%s'", ch_type, channel_name)
return False
return await asyncio.wait_for(
asyncio.to_thread(sync_driver, channel_cfg, notif), timeout=_TIMEOUT
)
except asyncio.TimeoutError:
logger.error("channel '%s' timed out after %ds", channel_name, _TIMEOUT)
return False return False
return driver(channel_cfg, notif)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -419,7 +395,7 @@ def _build_url(host_name: str) -> str:
return f"{base_url}/plugins#{host_name}" return f"{base_url}/plugins#{host_name}"
def send_notification(host_name: str, notif: Notification) -> dict: async def send_notification(host_name: str, notif: Notification) -> dict:
"""Dispatch *notif* to all managers/owner of *host_name*. """Dispatch *notif* to all managers/owner of *host_name*.
Looks up the host's owner + managers, resolves each user's Looks up the host's owner + managers, resolves each user's
@@ -469,16 +445,12 @@ def send_notification(host_name: str, notif: Notification) -> dict:
if not channel_cfg: if not channel_cfg:
continue continue
try: try:
ch_type = channel_cfg.get("type", "") ok = await _dispatch_to_channel(channel_name, channel_cfg, notif)
driver = _DRIVERS.get(ch_type) results[channel_name] = ok
if driver: if ok:
ok = driver(channel_cfg, notif) logger.info("recover sent to channel '%s': %s", channel_name, notif.title)
results[channel_name] = ok
if ok:
logger.info("recover sent to channel '%s': %s", channel_name, notif.title)
except Exception as e: except Exception as e:
logger.error("error sending recover to channel '%s': %s", channel_name, e) logger.error("error sending recover to channel '%s': %s", channel_name, e)
# Clear the alerted set once recovery is delivered
del _alerted_channels[host_name] del _alerted_channels[host_name]
return results return results
@@ -489,14 +461,14 @@ def send_notification(host_name: str, notif: Notification) -> dict:
continue continue
for channel_name in user.notification_channels: for channel_name in user.notification_channels:
if channel_name in results: if channel_name in results:
continue # already dispatched to this channel this notification continue
channel_cfg = global_channels.get(channel_name) channel_cfg = global_channels.get(channel_name)
if not channel_cfg: if not channel_cfg:
logger.warning("channel '%s' not defined in notification_channels", channel_name) logger.warning("channel '%s' not defined in notification_channels", channel_name)
results[channel_name] = False results[channel_name] = False
continue continue
try: try:
ok = _dispatch_to_channel(channel_name, channel_cfg, notif) ok = await _dispatch_to_channel(channel_name, channel_cfg, notif)
results[channel_name] = ok results[channel_name] = ok
if ok: if ok:
logger.info("notification sent to channel '%s': %s", channel_name, notif.title) logger.info("notification sent to channel '%s': %s", channel_name, notif.title)
+199
View File
@@ -0,0 +1,199 @@
<!DOCTYPE html>
<html>
{% include 'head.html' %}
<style>
html, body { overflow: visible; }
.container {
max-width: 700px;
margin: 0 auto;
}
h1 {
color: #333;
margin-bottom: 4px;
font-size: 1.5em;
}
.subtitle {
color: #666;
margin-bottom: 24px;
font-size: 0.9em;
}
.section {
background: #fff;
border-radius: 8px;
box-shadow: 0 1px 6px rgba(0,0,0,0.1);
padding: 20px 24px;
margin-bottom: 20px;
}
.section h2 {
font-size: 1em;
font-weight: 700;
color: #333;
margin: 0 0 16px;
padding-bottom: 10px;
border-bottom: 1px solid #eee;
text-transform: uppercase;
letter-spacing: 0.5px;
}
.info-row {
display: flex;
align-items: baseline;
padding: 8px 0;
border-bottom: 1px solid #f5f5f5;
font-size: 0.9em;
}
.info-row:last-child { border-bottom: none; }
.info-label {
width: 160px;
flex-shrink: 0;
color: #666;
font-size: 0.88em;
}
.info-value {
color: #222;
word-break: break-all;
}
.info-value a {
color: #0066cc;
text-decoration: none;
}
.info-value a:hover { text-decoration: underline; }
.version-badge {
display: inline-block;
padding: 3px 12px;
background: #e8f0fe;
color: #1a73e8;
border-radius: 12px;
font-size: 0.85em;
font-weight: 600;
font-family: monospace;
}
.hb-logo {
font-size: 2.5em;
font-weight: 700;
color: #0066cc;
letter-spacing: -1px;
margin-bottom: 6px;
}
.hb-tagline {
color: #555;
font-size: 0.95em;
}
.logo-section {
display: flex;
align-items: center;
gap: 20px;
padding: 8px 0 4px;
}
.logo-text { flex: 1; }
</style>
<body>
{% include 'nav.html' %}
<div class="container">
<h1>{{ header }}</h1>
<p class="subtitle">Heartbeat monitoring system</p>
<div class="section">
<div class="logo-section">
<div class="logo-text">
<div class="hb-logo">Heartbeat</div>
<div class="hb-tagline">Lightweight host monitoring over UDP</div>
</div>
<span class="version-badge">v{{ hbd_version }}</span>
</div>
</div>
<div class="section">
<h2>Version</h2>
<div class="info-row">
<span class="info-label">Server version</span>
<span class="info-value">{{ hbd_version }}</span>
</div>
<div class="info-row">
<span class="info-label">Python</span>
<span class="info-value">{{ python_version }}</span>
</div>
<div class="info-row">
<span class="info-label">License</span>
<span class="info-value">MIT</span>
</div>
</div>
<div class="section">
<h2>Runtime</h2>
<div class="info-row">
<span class="info-label">Host</span>
<span class="info-value">{{ server_hostname }}</span>
</div>
<div class="info-row">
<span class="info-label">Started</span>
<span class="info-value">{{ start_time_str }}</span>
</div>
<div class="info-row">
<span class="info-label">Uptime</span>
<span class="info-value" id="uptime-value">{{ uptime_str }}</span>
</div>
<div class="info-row">
<span class="info-label">Hosts monitored</span>
<span class="info-value">{{ host_count }}</span>
</div>
</div>
<div class="section">
<h2>Contact &amp; Source</h2>
<div class="info-row">
<span class="info-label">Author</span>
<span class="info-value">Andreas Wrede</span>
</div>
<div class="info-row">
<span class="info-label">Email</span>
<span class="info-value"><a href="mailto:aew@wrede.ca">aew@wrede.ca</a></span>
</div>
<div class="info-row">
<span class="info-label">Repository</span>
<span class="info-value"><a href="https://git.wrede.ca/andreas/heartbeat" target="_blank" rel="noopener">git.wrede.ca/andreas/heartbeat</a></span>
</div>
</div>
</div>
<script>
(function() {
var startEpoch = {{ start_epoch }};
var el = document.getElementById('uptime-value');
if (!el) return;
function fmt(s) {
var d = Math.floor(s / 86400);
var h = Math.floor((s % 86400) / 3600);
var m = Math.floor((s % 3600) / 60);
var sec = s % 60;
if (d > 0) return d + 'd ' + h + 'h ' + m + 'm';
if (h > 0) return h + 'h ' + m + 'm ' + sec + 's';
return m + 'm ' + sec + 's';
}
function tick() {
var up = Math.floor(Date.now() / 1000 - startEpoch);
el.textContent = fmt(up);
}
tick();
setInterval(tick, 1000);
})();
</script>
</body>
</html>
+1 -1
View File
@@ -9,7 +9,7 @@
margin: 0 auto; margin: 0 auto;
} }
h1 { color: #333; margin-bottom: 10px; font-size: 1.5em; } h1 { color: #333; margin-bottom: 5px; margin-top: 15px; font-size: 1.5em; }
.subtitle { .subtitle {
color: #666; color: #666;
+6 -2
View File
@@ -15,6 +15,7 @@
body { body {
margin: 0; margin: 0;
padding: 10px; padding: 10px;
padding-top: 60px;
background: #f5f5f5; background: #f5f5f5;
} }
h1 { font-size: 1.5em; color: #333; margin: 0 0 5px; } h1 { font-size: 1.5em; color: #333; margin: 0 0 5px; }
@@ -23,11 +24,14 @@
/* Navigation bar — shared across all pages */ /* Navigation bar — shared across all pages */
.nav { .nav {
position: fixed;
top: 0;
left: 0;
right: 0;
z-index: 200;
background: #fff; background: #fff;
padding: 6px 12px; padding: 6px 12px;
margin-bottom: 10px;
box-shadow: 0 2px 4px rgba(0,0,0,.1); box-shadow: 0 2px 4px rgba(0,0,0,.1);
border-radius: 4px;
display: flex; display: flex;
align-items: center; align-items: center;
justify-content: space-between; justify-content: space-between;
+1
View File
@@ -45,6 +45,7 @@
h1 { h1 {
color: #333; color: #333;
margin-bottom: 5px; margin-bottom: 5px;
margin-top: 15px;
font-size: 1.5em; font-size: 1.5em;
} }
+1
View File
@@ -9,6 +9,7 @@
{% if current_user and current_user.admin %} {% if current_user and current_user.admin %}
<a href="/settings"{% if active_page == "settings" %} class="active"{% endif %}>Settings</a> <a href="/settings"{% if active_page == "settings" %} class="active"{% endif %}>Settings</a>
{% endif %} {% endif %}
<a href="/about"{% if active_page == "about" %} class="active"{% endif %}>About</a>
</div> </div>
<div class="nav-clock" title="Click for full-screen clock"> <div class="nav-clock" title="Click for full-screen clock">
<canvas id="swiss-clock" width="44" height="44"></canvas> <canvas id="swiss-clock" width="44" height="44"></canvas>
+1
View File
@@ -16,6 +16,7 @@
h1 { h1 {
color: #333; color: #333;
margin-bottom: 5px; margin-bottom: 5px;
margin-top: 15px;
font-size: 1.5em; font-size: 1.5em;
} }
+2 -2
View File
@@ -9,7 +9,7 @@
max-width: 960px; max-width: 960px;
} }
h1 { color: #333; margin-bottom: 4px; font-size: 1.5em; } h1 { color: #333; margin-bottom: 5px; margin-top: 15px; font-size: 1.5em; }
.subtitle { color: #666; margin-bottom: 24px; font-size: 0.9em; } .subtitle { color: #666; margin-bottom: 24px; font-size: 0.9em; }
/* ---- Sidebar + content layout ---- */ /* ---- Sidebar + content layout ---- */
@@ -23,7 +23,7 @@
width: 180px; width: 180px;
flex-shrink: 0; flex-shrink: 0;
position: sticky; position: sticky;
top: 20px; top: 60px;
} }
.sidebar-nav a { .sidebar-nav a {
+19 -26
View File
@@ -987,18 +987,14 @@ class ThresholdChecker:
value: Any, value: Any,
): ):
"""Send notification and log to journal/eventlog.""" """Send notification and log to journal/eventlog."""
try: asyncio.get_event_loop().create_task(notify_mod.send_notification(
notify_mod.send_notification( host_name,
host_name, notify_mod.Notification(
notify_mod.Notification( title=f"[{lvl}] {host_name}",
title=f"[{lvl}] {host_name}", body=message,
body=message, level=lvl,
level=lvl, ),
), ))
)
logger.info("Notification sent: %s", message)
except Exception as e:
logger.error("Failed to send notification: %s", e)
# Log to journal # Log to journal
if self.journal is not None: if self.journal is not None:
@@ -1195,20 +1191,17 @@ class ThresholdChecker:
else: else:
message = f"REMINDER ({alert_state.level.name}): {host_name} - {metric_path} = {value} (ongoing for {int(now - alert_state.since)}s)" message = f"REMINDER ({alert_state.level.name}): {host_name} - {metric_path} = {value} (ongoing for {int(now - alert_state.since)}s)"
try: asyncio.get_event_loop().create_task(notify_mod.send_notification(
notify_mod.send_notification( host_name,
host_name, notify_mod.Notification(
notify_mod.Notification( title=f"[REMINDER/{alert_state.level.name}] {host_name}",
title=f"[REMINDER/{alert_state.level.name}] {host_name}", body=message,
body=message, level=alert_state.level.name,
level=alert_state.level.name, ),
), ))
) alert_state.last_notification = now
alert_state.last_notification = now alert_state.notification_count += 1
alert_state.notification_count += 1 logger.info("Re-notification sent: %s", message)
logger.info("Re-notification sent: %s", message)
except Exception as e:
logger.error("Failed to send re-notification: %s", e)
def get_active_alerts(self, alert_states: Dict[str, AlertState]) -> list: def get_active_alerts(self, alert_states: Dict[str, AlertState]) -> list:
""" """
+10 -10
View File
@@ -211,10 +211,10 @@ def _make_timer_callbacks(uname, host, ctx):
connection.newstate(connection.__class__.OVERDUE, now, cfg.get("grace", 2)) connection.newstate(connection.__class__.OVERDUE, now, cfg.get("grace", 2))
msg = f"{connection.afam} overdue" msg = f"{connection.afam} overdue"
eventlog(uname, "CRITICAL", msg) eventlog(uname, "CRITICAL", msg)
notify_mod.send_notification( asyncio.create_task(notify_mod.send_notification(
uname, uname,
notify_mod.Notification(title=f"[CRITICAL] {uname}", body=msg, level="CRITICAL"), notify_mod.Notification(title=f"[CRITICAL] {uname}", body=msg, level="CRITICAL"),
) ))
# Track in alert_states so the Alerts Dashboard shows this # Track in alert_states so the Alerts Dashboard shows this
_set_connectivity_alert(host, connection.afam, "CRITICAL") _set_connectivity_alert(host, connection.afam, "CRITICAL")
if threshold_checker: if threshold_checker:
@@ -407,10 +407,10 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
if res: if res:
eventlog(uname, "WARNING", res) eventlog(uname, "WARNING", res)
notify_mod.send_notification( asyncio.create_task(notify_mod.send_notification(
uname, uname,
notify_mod.Notification(title=f"[WARNING] {uname}", body=res, level="WARNING"), notify_mod.Notification(title=f"[WARNING] {uname}", body=res, level="WARNING"),
) ))
interval = int(msg.get("interval", 0) or 0) interval = int(msg.get("interval", 0) or 0)
shutdown = msg.get("shutdown", 0) shutdown = msg.get("shutdown", 0)
@@ -420,10 +420,10 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
if boot: if boot:
eventlog(uname, "INFO", "booted") eventlog(uname, "INFO", "booted")
notify_mod.send_notification( asyncio.create_task(notify_mod.send_notification(
uname, uname,
notify_mod.Notification(title=f"[INFO] {uname}", body=f"{host.name} booted", level="INFO"), notify_mod.Notification(title=f"[INFO] {uname}", body=f"{host.name} booted", level="INFO"),
) ))
if message: if message:
eventlog(uname, "INFO", "msg: %s" % message, service=service) eventlog(uname, "INFO", "msg: %s" % message, service=service)
@@ -440,10 +440,10 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
else: else:
m = "%s back after being %s for %s" % (conn.afam, lasts, dur(d)) m = "%s back after being %s for %s" % (conn.afam, lasts, dur(d))
eventlog(uname, "RECOVER", m) eventlog(uname, "RECOVER", m)
notify_mod.send_notification( asyncio.create_task(notify_mod.send_notification(
uname, uname,
notify_mod.Notification(title=f"[RECOVER] {uname}", body=m, level="RECOVER"), notify_mod.Notification(title=f"[RECOVER] {uname}", body=m, level="RECOVER"),
) ))
if boot or newh: if boot or newh:
host.upcount = host.doesack host.upcount = host.doesack
@@ -453,10 +453,10 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
if shutdown: if shutdown:
m = "%s shutdown" % conn.afam m = "%s shutdown" % conn.afam
eventlog(uname, "INFO", m) eventlog(uname, "INFO", m)
notify_mod.send_notification( asyncio.create_task(notify_mod.send_notification(
uname, uname,
notify_mod.Notification(title=f"[INFO] {uname}", body=m, level="INFO"), notify_mod.Notification(title=f"[INFO] {uname}", body=m, level="INFO"),
) ))
conn.newstate(hbdcls.Connection.DOWN, now) conn.newstate(hbdcls.Connection.DOWN, now)
_set_connectivity_alert(host, conn.afam, "CRITICAL") _set_connectivity_alert(host, conn.afam, "CRITICAL")
+1 -1
View File
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "hbd" name = "hbd"
version = "5.1.7" version = "5.1.8"
description = "Heartbeat monitoring system — client (hbc) and server (hbd)" description = "Heartbeat monitoring system — client (hbc) and server (hbd)"
readme = "README.md" readme = "README.md"
requires-python = ">=3.11" requires-python = ">=3.11"
+37 -21
View File
@@ -233,6 +233,8 @@ class OSInfoPlugin(InfoPlugin):
"machine": platform.machine(), "machine": platform.machine(),
"architecture": platform.architecture()[0], "architecture": platform.architecture()[0],
"python_version": platform.python_version(), "python_version": platform.python_version(),
"hbc_version": "5.1.8",
"hbc_type": "mini",
} }
if platform.system() == "Linux": if platform.system() == "Linux":
data.update(_linux_distro()) data.update(_linux_distro())
@@ -529,19 +531,27 @@ class MemoryMonitorPlugin(MonitorPlugin):
return {} return {}
total = mi.get("MemTotal", 0) total = mi.get("MemTotal", 0)
avail = mi.get("MemAvailable", mi.get("MemFree", 0)) avail = mi.get("MemAvailable", mi.get("MemFree", 0))
free = mi.get("MemFree", 0)
used = total - avail used = total - avail
data: Dict[str, Any] = { data: Dict[str, Any] = {
"mem_total_kb": total, "memory_total": total * 1024,
"mem_used_kb": used, "memory_used": used * 1024,
"mem_available_kb": avail, "memory_available": avail * 1024,
"mem_percent": round(100.0 * used / total, 1) if total else 0.0, "memory_free": free * 1024,
"memory_percent": round(100.0 * used / total, 1) if total else 0.0,
} }
for field, key in (("Buffers", "memory_buffers"), ("Cached", "memory_cached"),
("Active", "memory_active"), ("Inactive", "memory_inactive")):
if field in mi:
data[key] = mi[field] * 1024
stotal = mi.get("SwapTotal", 0) stotal = mi.get("SwapTotal", 0)
if stotal: if stotal:
sfree = mi.get("SwapFree", 0) sfree = mi.get("SwapFree", 0)
data["swap_total_kb"] = stotal sused = stotal - sfree
data["swap_used_kb"] = stotal - sfree data["swap_total"] = stotal * 1024
data["swap_percent"] = round(100.0 * (stotal - sfree) / stotal, 1) data["swap_used"] = sused * 1024
data["swap_free"] = sfree * 1024
data["swap_percent"] = round(100.0 * sused / stotal, 1)
return data return data
@@ -577,7 +587,7 @@ class DiskMonitorPlugin(MonitorPlugin):
except Exception as e: except Exception as e:
self.logger.warning("df failed: %s", e) self.logger.warning("df failed: %s", e)
return {} return {}
data: Dict[str, Any] = {} partitions: Dict[str, Any] = {}
for line in out.decode(errors="replace").splitlines()[1:]: for line in out.decode(errors="replace").splitlines()[1:]:
parts = line.split() parts = line.split()
if len(parts) < 6: if len(parts) < 6:
@@ -586,14 +596,19 @@ class DiskMonitorPlugin(MonitorPlugin):
if self.mounts and mount not in self.mounts: if self.mounts and mount not in self.mounts:
continue continue
try: try:
key = re.sub(r"[^a-zA-Z0-9_]", "_", mount).strip("_") or "root" total_kb = int(parts[1])
data[f"{key}_total_kb"] = int(parts[1]) used_kb = int(parts[2])
data[f"{key}_used_kb"] = int(parts[2]) avail_kb = int(parts[3])
data[f"{key}_avail_kb"] = int(parts[3]) pct = int(parts[4].rstrip("%"))
data[f"{key}_percent"] = int(parts[4].rstrip("%")) partitions[mount] = {
"total": total_kb * 1024,
"used": used_kb * 1024,
"free": avail_kb * 1024,
"percent": pct,
}
except (ValueError, IndexError): except (ValueError, IndexError):
continue continue
return data return {"partitions": partitions} if partitions else {}
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -649,17 +664,18 @@ class NetworkMonitorPlugin(MonitorPlugin):
self._prev = (now, curr) self._prev = (now, curr)
if dt <= 0: if dt <= 0:
return {} return {}
data: Dict[str, Any] = {} interfaces: Dict[str, Any] = {}
for iface, (rx, tx) in curr.items(): for iface, (rx, tx) in curr.items():
if iface in self.skip_ifaces or iface not in prev: if iface in self.skip_ifaces or iface not in prev:
continue continue
prx, ptx = prev[iface] prx, ptx = prev[iface]
key = re.sub(r"[^a-zA-Z0-9_]", "_", iface) interfaces[iface] = {
data[f"{key}_rx_bps"] = round((rx - prx) / dt) "bytes_recv": rx,
data[f"{key}_tx_bps"] = round((tx - ptx) / dt) "bytes_sent": tx,
data[f"{key}_rx_bytes"] = rx "bytes_recv_delta": rx - prx,
data[f"{key}_tx_bytes"] = tx "bytes_sent_delta": tx - ptx,
return data }
return {"interfaces": interfaces} if interfaces else {}
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------