Major refactoring of the codebase, including restructuring of files and directories, renaming of modules and classes, and improvements to the overall organization and readability of the code. This refactoring aims to enhance maintainability, scalability, and clarity of the codebase while preserving existing functionality. The changes include:

- Restructuring of the project directory into client and server components
- Renaming of modules and classes to better reflect their purpose and functionality
- Moving common utilities and configurations to a shared location
- Updating import statements to reflect the new structure
- Adding new documentation files for better clarity on various aspects of the project
- Removing deprecated or unused code to streamline the codebase
- Ensuring that all existing functionality is preserved and that the codebase remains functional after the refactoring.
This commit is contained in:
Andreas Wrede
2026-03-29 11:13:40 -04:00
parent 7e2038ecac
commit 0543266c92
65 changed files with 11371 additions and 140 deletions
+3
View File
@@ -0,0 +1,3 @@
"""HeartBeat Daemon (hbd) - Server/daemon component."""
__version__ = "5.0.5"
+54
View File
@@ -0,0 +1,54 @@
"""Command line interface for hbd package."""
import argparse
from .config import load_config
from .main import run as run_server
PUSHSRVS = ["all", "pushover", "mattermost"]
def build_parser():
parser = argparse.ArgumentParser(
prog="hbd",
description="HeartBeatDaemon - Wait for heartbeat messages and act on them (or their absence)",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
"-c", "--config", dest="configfile", help="Config file path (YAML)"
)
parser.add_argument(
"-f", "--foreground", action="store_true", help="Run in foreground"
)
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
parser.add_argument(
"-p", "--pushsrv", dest="pushsrv", choices=PUSHSRVS, help="Push service to use"
)
parser.add_argument(
"-x", "--debug", action="count", default=0, help="Increase debug level"
)
return parser
def main(argv=None):
parser = build_parser()
args = parser.parse_args(argv)
config = load_config(args.configfile)
# Apply CLI overrides
if args.foreground:
config["foreground"] = True
if args.verbose:
config["verbose"] = True
if args.pushsrv:
config["pushsrv"] = args.pushsrv
if args.debug:
config.setdefault("debug", 0)
config["debug"] += args.debug
run_server(config)
if __name__ == "__main__":
main()
+103
View File
@@ -0,0 +1,103 @@
"""Configuration loader and defaults for hbd (HeartBeat Daemon/Server)."""
import logging
import os
try:
import yaml
except Exception:
yaml = None
SERVER_DEFAULTS = {
# Network settings
"hb_port": 50003, # Port to listen for heartbeats
"hbd_port": 50004, # HTTP API port
"hbd_host": "", # Bind address (empty = all interfaces)
# Persistence
"pickfile": "/tmp/hb.pick",
# Logging
"logfile": "/var/log/heartbeat.log",
"logfmt": "text", # text or msg or json
# Notification settings
"pushsrv": "pushover", # pushover, mattermost, or all
"pushover_token": "",
"pushover_user": "",
# Monitoring settings
"interval": 20, # Expected heartbeat interval (for server checks)
"grace": 2, # Grace multiplier (interval * grace = timeout)
"threshold_renotify_interval": 3600, # Seconds between threshold re-notifications
# Host management
"watchhosts": [], # Hosts to monitor and notify about
"dyndnshosts": [], # Hosts with dynamic DNS
"drophosts": [], # Hosts to ignore
"dyndomains": ["wrede.org"],
# DNS updates
"nsupdate_bin": "/usr/bin/nsupdate",
# Email settings
"smtpserver": "smtp.fastmail.com",
"smtpuser": "andreas@wrede.ca",
"smtppassword": "pvtvefyp5gbhnch2",
"smtpport": 587,
"toemail": ["aew.hbd.notify@wrede.ca"],
"fromemail": "aew.hbd@wrede.ca",
# WebSocket settings
"ws_port": 50005,
"wss_port": None,
"cert_path": "/usr/local/etc/ssl/",
"wss_pem": "fullchain.pem",
"wss_key": "privkey.pem",
# Message journal configuration
"journal_enabled": True,
"journal_dir": "/var/log/heartbeat",
"journal_file": "messages.journal",
"journal_max_size": 100 * 1024 * 1024, # 100MB
"journal_max_backups": 10,
# Runtime flags
"foreground": False,
"verbose": False,
"debug": 0,
# Plugin/threshold configs (for clients reporting to this server)
"plugins": {},
"thresholds": {},
}
def load_config(path=None):
"""Load configuration from a YAML file and merge with server defaults.
If YAML is not available or the file does not exist, defaults are returned.
Args:
path: Path to YAML config file (default: ~/.hb.yaml)
Returns:
Dictionary with configuration
"""
cfg = SERVER_DEFAULTS.copy()
if not path:
# default path (~/.hb.yaml)
path = os.path.join(os.path.expanduser("~"), ".hb.yaml")
if os.path.exists(path):
if yaml:
with open(path) as fh:
data = yaml.safe_load(fh)
# Merge YAML data with defaults
# Keep all keys from YAML to support plugin configs and future extensions
for k, v in data.items():
cfg[k] = v
else:
# yaml not installed: do not attempt to parse; user must ensure defaults
pass
return cfg
+224
View File
@@ -0,0 +1,224 @@
"""DNS update helper and pure asyncio worker for heartbeat daemon."""
from __future__ import annotations
from subprocess import Popen, PIPE, STDOUT
from typing import Optional
import asyncio
def create_nsupdate_payload(
hostname: str, newip: str, dyndomain: str, dnsttl: str = "5"
) -> str:
D = {
"domain": dyndomain,
"fqdn": f"{hostname}.dy.{dyndomain}",
"dnsttl": dnsttl,
"newip": newip,
"ts": __import__("time").strftime(
"%Y-%m-%d.%H:%M:%S", __import__("time").gmtime()
),
}
if ":" in newip:
nsup = (
"""update delete %(fqdn)s AAAA
update add %(fqdn)s %(dnsttl)s AAAA %(newip)s
update delete %(fqdn)s TXT
update add %(fqdn)s %(dnsttl)s TXT "Created: %(ts)s"
send
answer
"""
% D
)
else:
nsup = (
"""update delete %(fqdn)s A
update add %(fqdn)s %(dnsttl)s A %(newip)s
update delete %(fqdn)s TXT
update add %(fqdn)s %(dnsttl)s TXT "Created: %(ts)s"
send
answer
"""
% D
)
return nsup
def nsupdate(
hostname: str,
newip: str,
dyndomain: str,
nsupdate_bin: str = "/usr/local/bin/nsupdate",
rndc_key: str = "/etc/dhcpc/rndc-key",
) -> Optional[str]:
"""Perform DNS update via nsupdate command.
Returns None on success, else returns combined stdout/stderr as a string.
"""
nsup = create_nsupdate_payload(hostname, newip, dyndomain)
cmd = [nsupdate_bin, "-k", rndc_key, "-v"]
try:
p = Popen(cmd, shell=False, bufsize=0, stdin=PIPE, stdout=PIPE, stderr=STDOUT)
except OSError as e:
return f"nsupdate: execution failed: {e}"
except Exception as e:
return f"nsupdate: some error occured: {e}"
(output, err) = p.communicate(nsup.encode())
out = output.decode() if output else ""
if out.find("status: NOERROR") >= 0:
return None
return out
async def dns_update_worker(
hbdclass,
cfg: dict,
async_queue=None,
log: Optional[callable] = None,
pushmsg: Optional[callable] = None,
loop: Optional[asyncio.AbstractEventLoop] = None,
):
"""Pure async DNS worker that processes updates from asyncio.Queue.
Exits when it receives a None sentinel.
"""
if loop is None:
loop = asyncio.get_running_loop()
dnsq = async_queue
if not dnsq:
if log:
try:
await loop.run_in_executor(
None, log, None, "dns_update_worker: no queue available"
)
except Exception:
pass
return
while True:
try:
item = await dnsq.get()
except Exception as e:
if log:
try:
await loop.run_in_executor(
None, log, None, f"dns_update_worker: error getting item: {e}"
)
except Exception:
pass
break
if item is None:
break
try:
name, addr = item
except Exception:
try:
dnsq.task_done()
except Exception:
pass
continue
m = f"changed address to {addr}"
for dyndomain in cfg.get("dyndomains", []):
err = await loop.run_in_executor(
None,
nsupdate,
name,
addr,
dyndomain,
cfg.get("nsupdate_bin", "/usr/local/bin/nsupdate"),
cfg.get("rndc_key", "/etc/dhcpc/rndc-key"),
)
if err:
m += f", DNS update failed: {err}"
if pushmsg:
try:
await loop.run_in_executor(
None,
pushmsg,
"error: nsupdate failed",
f"{name}.dy.{dyndomain}: {m}",
)
except Exception:
pass
else:
m += ", DNS updated."
try:
dnsq.task_done()
except Exception:
pass
if log:
try:
await loop.run_in_executor(None, log, name, m)
except Exception:
pass
if log:
try:
await loop.run_in_executor(None, log, None, "dns_update_worker exiting")
except Exception:
pass
def start_dns_worker(
hbdclass,
cfg: dict,
log: Optional[callable] = None,
pushmsg: Optional[callable] = None,
loop: Optional[asyncio.AbstractEventLoop] = None,
):
"""Start the async DNS worker and return the Task.
Replaces Host.dnsQ with an asyncio.Queue wrapped in a thread-safe bridge
so legacy synchronous put() calls from UDP handlers still work.
"""
if loop is None:
loop = asyncio.get_event_loop()
# Create asyncio.Queue and wrap in a bridge for thread-safe puts
async_q = asyncio.Queue()
class _QueueBridge:
"""Thread-safe wrapper around asyncio.Queue for synchronous callers."""
def __init__(self, loop, aq):
self._loop = loop
self._aq = aq
def put(self, item):
"""Thread-safe put that schedules onto event loop."""
try:
# Try to detect if we're in the event loop thread
asyncio.get_running_loop()
# We're in event loop context, use put_nowait directly
self._aq.put_nowait(item)
except RuntimeError:
# We're in a different thread, schedule safely
try:
self._loop.call_soon_threadsafe(self._aq.put_nowait, item)
except Exception:
pass
def task_done(self):
"""Delegate task_done to asyncio.Queue."""
try:
self._aq.task_done()
except Exception:
pass
bridge = _QueueBridge(loop, async_q)
hbdclass.Host.dnsQ = bridge
task = loop.create_task(
dns_update_worker(
hbdclass, cfg, async_queue=async_q, log=log, pushmsg=pushmsg, loop=loop
)
)
return task
+450
View File
@@ -0,0 +1,450 @@
"""
host and connection class shared between hbd and
the websit's heartbeat.py
"""
import time
import json
import copy
import queue
num = 0
MAXRTTS = 10
DEBUG = 2
def log(host, m):
if DEBUG:
print("class log: %s %s" % (host, m))
class Connection:
# map of addrs to names
htab = {}
UNKNOWN = "unknown"
UP = "up"
DOWN = "down"
OVERDUE = "overdue"
def __init__(self, host, cid, addr, afam):
self.host = host
self.cid = cid
if addr[0:7] == "::ffff:":
addr = addr[7:]
self.addr = addr
self.afam = afam
self.rtts = [0]
self.lastbeat = time.time()
self.statetime = self.lastbeat
self.deltastatetime = "computed"
self.state = Connection.UNKNOWN
if host:
Connection.htab[addr] = self.host.name
if self.host.isDynDns():
log(self.host.name, "dns update %s" % self.addr)
Host.dnsQ.put((self.host.name, self.addr))
def registerDns(self):
Host.dnsQ.put((self.host.name, self.addr))
def clearstate(self):
d = {}
d["addr"] = ""
d["rtt"] = ""
d["lastbeat"] = ""
d["state"] = ""
d["statetime"] = ""
d["deltastatetime"] = ""
d["rttstate"] = ""
return d
def statedict(self, Null=False):
d = self.clearstate()
now = time.time()
if not Null:
d["addr"] = self.addr
if self.rtts[-1]:
d["rtt"] = "%0.1f" % self.rtts[-1]
elif self.state == Connection.UNKNOWN:
d["rtt"] = ""
else:
d["rtt"] = "?"
d["lastbeat"] = self.lastbeat
if self.state == Connection.OVERDUE:
d["state"] = "<b>%s</b>" % self.state
else:
d["state"] = self.state
if self.state == Connection.UP:
d["rttstate"] = d["rtt"]
elif self.state == Connection.OVERDUE:
d["rttstate"] = ""
else:
d["rttstate"] = d["state"]
d["statetime"] = time.strftime(
"%Y-%m-%d %H:%M:%S", time.localtime(self.statetime)
)
delta = now - self.statetime
if self.state == Connection.UNKNOWN:
d["deltastatetime"] = ""
elif delta > 86400:
# d['deltastatetime'] = time.strftime("%d %H:%M:%S", time.gmtime(delta))
d["deltastatetime"] = "%0.1f days" % (delta / 86400.0)
elif delta > 3600:
# d['deltastatetime'] = time.strftime("%H:%M:%S", time.gmtime(delta))
d["deltastatetime"] = time.strftime("%k:%M hrs", time.gmtime(delta))
# d['deltastatetime'] = "%0.1f hrs" % (delta / 3600.)
elif delta > 60:
# d['deltastatetime'] = time.strftime("%M:%S", time.gmtime(delta))
d["deltastatetime"] = time.strftime("%M:%S mins", time.gmtime(delta))
# d['deltastatetime'] = "%0.1f mins" % (delta / 60.)
else:
# d['deltastatetime'] = time.strftime("%S", time.gmtime(delta))
d["deltastatetime"] = "%i secs" % (delta)
if self.state == Connection.UNKNOWN and now - self.lastbeat > 86400 * 10:
d = self.clearstate()
return d
def headerdict(self, afam):
d = {}
d["addr"] = "%s Addr" % afam
d["rtt"] = "Latencey"
d["lastbeat"] = "Last Contact"
d["state"] = "State"
d["statetime"] = "Last State"
d["rttstate"] = "Reach"
d["deltastatetime"] = "Last State"
return d
def jsons(self):
return json.dumps(self.__dict__)
# set new state, return number of secs in previous state
def newstate(self, state, now, when=0):
self.state = state
delta = now - when
s = delta - self.statetime
self.statetime = delta
return s
def getstate(self):
return self.state
def newaddr(self, addr, rtt, now):
self.lastbeat = now
self.rtts.append(rtt)
if len(self.rtts) > MAXRTTS:
del self.rtts[0]
if self.addr == addr:
r = None
else:
r = "changed from %s to %s" % (self.addr, addr)
try:
del Connection.htab[self.addr]
except Exception:
pass
self.addr = addr
Connection.htab[addr] = self.host.name
if self.host.isDynDns():
Host.dnsQ.put((self.host.name, self.addr))
return r
#
class Host:
# Table of Hosts
hosts = {}
dnsQ = queue.Queue()
def __init__(self, name):
global num
self.name = name
if name:
num += 1
Host.hosts[name] = self
self.num = num
self.dyn = False
self.watched = False
self.upcount = 0
self.interval = 0
self.doesack = -1
self.cmds = []
self.cver = 0
self.connections = {}
self.hdwcounts = [[0, 0], [0, 0], [0, 0]]
# Plugin data storage: {plugin_name: [(timestamp, data), ...]}
self.plugin_data = {}
self.plugin_retention = 100 # Keep last N samples per plugin
# Alert state tracking: {metric_path: AlertState}
self.alert_states = {}
def statedict(self):
d = {}
d["name"] = self.name
if self.dyn:
d["name"] += "*"
if self.watched:
d["name"] = "<b>%s</b>" % d["name"]
d["dyn"] = str(self.dyn)
d["ver"] = str(self.cver)
d["num"] = self.num
for c in ["IPv4", "IPv6"]:
if c in self.connections:
cs = self.connections[c].statedict()
else:
cs = ubConnection.statedict(True)
for csv in cs:
d["%s.%s" % (c, csv)] = cs[csv]
return d
def headerdict(self):
d = {}
d["name"] = "Name"
d["dyn"] = "Dyn"
d["ver"] = "Ver"
d["num"] = "??"
for c in ["IPv4", "IPv6"]:
cs = ubConnection.headerdict(c)
for csv in cs:
d["%s.%s" % (c, csv)] = cs[csv]
return d
def registerDns(self):
for af in self.connections:
self.connections[af].registerDns()
def stateinfo(self):
ddict = {}
for d in self.__dict__:
if d == "connections":
cl = []
for c in ["IPv4", "IPv6"]:
if c not in self.connections:
continue
# dirty ugly hack: fix conn to host backpointer
cld = copy.deepcopy(self.connections[c].__dict__)
cld["host"] = cld["host"].name
cl.append(cld)
ddict[d] = cl
else:
ddict[d] = self.__dict__[d]
return ddict
def jsons(self):
return json.dumps(self.stateinfo())
def setcver(self, cver):
self.cver = cver
def isDynDns(self):
return self.dyn
def isIPv4(self, addr):
if isinstance(addr, tuple):
return addr[0].find(".") > 0
else:
return addr.find(".") > 0
def conndata(self, cid, addr, rtt, now):
if addr[0:7] == "::ffff:":
addr = addr[7:]
if self.isIPv4(addr):
afam = "IPv4"
else:
afam = "IPv6"
if afam not in self.connections:
self.connections[afam] = Connection(self, cid, addr, afam)
conn = self.connections[afam]
res = conn.newaddr(addr, rtt, now)
return conn, res
# called when reloading class from pickle, add new fields here
def fixup(self):
for c in ["IPv4", "IPv6"]:
if c in self.connections:
addr = self.connections[c].addr
if addr[0:7] == "::ffff:":
addr = addr[7:]
self.connections[c].addr = addr
# Add plugin_data if missing (for backward compatibility)
if not hasattr(self, "plugin_data"):
self.plugin_data = {}
if not hasattr(self, "plugin_retention"):
self.plugin_retention = 100
if not hasattr(self, "alert_states"):
self.alert_states = {}
pass
def add_plugin_data(self, plugin_name, data, timestamp=None):
"""Store plugin data with timestamp.
Args:
plugin_name: Name of the plugin (e.g., "cpu_monitor")
data: Dict of plugin data
timestamp: Optional timestamp (default: current time)
"""
if timestamp is None:
timestamp = time.time()
if plugin_name not in self.plugin_data:
self.plugin_data[plugin_name] = []
# Add new data
self.plugin_data[plugin_name].append((timestamp, data))
# Enforce retention limit (keep last N samples)
if len(self.plugin_data[plugin_name]) > self.plugin_retention:
self.plugin_data[plugin_name] = self.plugin_data[plugin_name][-self.plugin_retention:]
def get_plugin_data(self, plugin_name, limit=None):
"""Retrieve plugin data for a specific plugin.
Args:
plugin_name: Name of the plugin
limit: Optional limit on number of recent samples to return
Returns:
List of (timestamp, data) tuples, most recent last
"""
data = self.plugin_data.get(plugin_name, [])
if limit and len(data) > limit:
return data[-limit:]
return data
def get_latest_plugin_data(self, plugin_name):
"""Get the most recent plugin data for a plugin.
Args:
plugin_name: Name of the plugin
Returns:
(timestamp, data) tuple or None if no data
"""
data = self.plugin_data.get(plugin_name, [])
return data[-1] if data else None
def get_all_plugin_data(self):
"""Get all plugin data for this host.
Returns:
Dict of {plugin_name: [(timestamp, data), ...]}
"""
return self.plugin_data
# def dispstate(self):
# if self.state in ["down", "overdue"]:
# state = "<b>%s</b>" % self.state
# elif self.state in ["up", "UP"]:
# state = ""
# for x in list(self.connections.keys()):
# try:
# state += " %5.1f" % (self.connections[x].rtts[-1])
# except:
# state += " %5s" % (self.connections[x].rtts[-1])
# elif self.state in ["unknown", "UNKNOWN"]:
# state = ""
# else:
# state = "%s" % self.state
# return state
def dispstats(self):
if self.doesack != -1:
if self.upcount > 0:
r = ""
for v in range(3):
a, u = self.hdwcounts[v]
if (self.upcount - u) != 0:
vs = "%0.0f" % (
100.0 - (((self.doesack - a) * 100.0) / (self.upcount - u))
)
if vs == "0":
vs = ""
else:
vs = "-"
r += '<td align="right">%s</td>' % vs
return r
else:
return "<td>(%s)</td><td></td><td></td>" % (self.doesack)
return '<td align="right">N/A</td><td></td<td></td>>'
hostfields_long = [
"name",
"IPv4.addr",
"IPv4.state",
("IPv4.rtt", 'style="text-align: right;"'),
("IPv4.statetime", 'style="text-align: right;"'),
"IPv6.addr",
"IPv6.state",
("IPv6.rtt", 'style="text-align: right;"'),
("IPv6.statetime", 'style="text-align: right;"'),
"ver",
]
hostfields_short = [
"name",
("IPv4.rttstate", 'style="text-align: right;"'),
("IPv4.deltastatetime", 'style="text-align: right;"'),
("IPv6.rttstate", 'style="text-align: right;"'),
("IPv6.deltastatetime", 'style="text-align: right;"'),
]
def gene(self, tag, v, attrib=None):
if attrib:
a = " %s" % attrib
else:
a = ""
return "<%s%s>%s</%s>" % (tag, a, v, tag)
def htmltable(self, tag, hd, short):
if short:
hostfields = Host.hostfields_short
else:
hostfields = Host.hostfields_long
h = []
for f in hostfields:
if isinstance(f, tuple):
h.append(self.gene(tag, hd[f[0]], f[1]))
else:
h.append(self.gene(tag, hd[f]))
return self.gene("tr", "\n".join(h))
def buildhosttable(self, short=False):
if DEBUG > 1:
print("DBG buildhosttable: start")
res = []
res.append('<table id="ntable" class="sortable">')
res.append(ubHost.htmltable("th", ubHost.headerdict(), short))
hosts_sorted = list(Host.hosts.keys())
if len(hosts_sorted):
hosts_sorted.sort()
for h in hosts_sorted:
res.append(ubHost.htmltable("td", Host.hosts[h].statedict(), short))
res.append("</table>")
if DEBUG > 1:
print("DBG buildhosttable: %s" % res)
return res
def buildmsgtable(self, msgs):
res = []
le = max(40 - len(Host.hosts), 3)
res.append("<h4>Log of Events</h4>")
for m in msgs[len(msgs) - le :]:
res.append("%s<BR>" % m)
return res
# create fake "unbound objects", remove in Python 3.0
ubHost = Host(None)
ubConnection = Connection(None, "", "", "")
+406
View File
@@ -0,0 +1,406 @@
"""HTTP server implementation using aiohttp and jinja2."""
import asyncio
import json
import time
import urllib.parse
import os
import logging
from aiohttp import web
import jinja2
logger = logging.getLogger(__name__)
def _render_template(html_str: str, **context) -> str:
tmpl = jinja2.Template(html_str)
return tmpl.render(**context)
async def start(
host: str,
port: int,
config,
hbdclass,
msgs_getter,
log=None,
email=None,
pushmsg=None,
msg_to_websockets=None,
tcss=None,
DEBUG=0,
verbose=False,
get_now=None,
VER="",
threshold_checker=None,
):
"""Start an aiohttp web server and block until cancelled.
This function is intended to be awaited inside the main asyncio event loop.
"""
get_now = get_now or (lambda: time.time())
async def index(request):
res = []
res.append('<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">')
res.append("<html>")
res.append("<head>")
res.append("<title>Heartbeat</title>")
if tcss:
res.append(tcss)
res.append("</head>")
res.append('<body BGCOLOR = "#FFFFFF" LINK = "#008000" VLINK = "#008000">')
res.append(f"<H2>Heartbeat status {VER}</h2>")
res += hbdclass.ubHost.buildhosttable()
res += hbdclass.ubHost.buildmsgtable(msgs_getter())
res.append(
"<p> %s (%s)</p>"
% (
time.strftime("%H:%M:%S", time.localtime(get_now())),
config.get("tz", "CET-1CDT"),
)
)
res.append("</body></html>")
body = "\n".join(res)
return web.Response(text=body, content_type="text/html")
async def api_hosts(request):
lst = [hbdclass.Host.hosts[h].jsons() for h in hbdclass.Host.hosts]
return web.json_response(json.loads("[" + ",".join(lst) + "]"))
async def api_messages(request):
lst = msgs_getter()[-30:]
return web.json_response(lst)
async def cmd(request):
qa = request.rel_url.query
uname = qa.get("h")
ucmd = qa.get("c")
if not ucmd or not uname:
return web.Response(status=400, text="need h= and c= arguments")
if uname not in hbdclass.Host.hosts:
return web.Response(status=400, text=f"h={uname} not found")
hbdclass.Host.hosts[uname].cmds.append(
("CMD", {"cmd": urllib.parse.unquote(ucmd)})
)
return web.Response(text=f"cmd {uname} queued")
async def drop(request):
qa = request.rel_url.query
uname = qa.get("h")
if not uname:
return web.Response(status=400, text="need h= argument")
if uname not in hbdclass.Host.hosts:
return web.Response(status=400, text=f"h={uname} not found")
if log:
log(uname, "dropped")
del hbdclass.Host.hosts[uname]
return web.Response(text="Done")
async def register(request):
qa = request.rel_url.query
uname = qa.get("h")
if not uname:
return web.Response(status=400, text="need h= argument")
if uname not in hbdclass.Host.hosts:
return web.Response(status=400, text=f"h={uname} not found")
ll = hbdclass.Host.hosts[uname].registerDns()
if log:
log(uname, ll)
return web.Response(text=str(ll))
async def update(request):
qa = request.rel_url.query
uname = urllib.parse.unquote(qa.get("h", ""))
ucode = qa.get("c")
if not ucode or not uname:
return web.Response(status=400, text="need h= and c= arguments")
if uname != "All" and uname not in hbdclass.Host.hosts:
return web.Response(status=400, text=f"h={uname} not found")
if uname != "All":
names = [uname]
else:
names = [n for n in hbdclass.Host.hosts if hbdclass.Host.hosts[n].cver >= 2]
out = []
for n in names:
err = None
try:
r = {"csum": None, "code": ucode}
hbdclass.Host.hosts[n].cmds.append(("UPD", r))
except Exception as e:
err = str(e)
out.append(f"update started for {n}: {err if err else 'OK'}")
return web.Response(text="\n".join(out))
async def live(request):
# render template from hbd/templates/live.html using Jinja2
# Resolve templates directory relative to the hbd package
pkg_dir = os.path.dirname(__file__)
templates_dir = config.get("templates_dir", os.path.join(pkg_dir, "templates"))
env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir))
host = config.get("hb_host", "localhost")
extra_scripts = config.get("http_extra_scripts", "")
host = request.host.split(":")[0]
if config.get("wss_port"):
heartbeat_ws_url = f"wss://{host}:{config['wss_port']}/hbd"
else:
heartbeat_ws_url = f"ws://{host}:{config.get('ws_port', 50005)}/hbd"
tmpl = env.get_template("live.html")
body = tmpl.render(
title="Heartbeat",
header="Heartbeat",
request=request,
heartbeat_ws_url=heartbeat_ws_url,
extra_scripts=extra_scripts,
hosts=[
hbdclass.Host.hosts[h].stateinfo() for h in sorted(hbdclass.Host.hosts)
],
messages=msgs_getter()[-30:],
)
return web.Response(text=body, content_type="text/html")
async def static(request):
"""Serve files from the package static directory.
URL form: /static/<path>
"""
p = request.match_info.get("path", "")
logger.debug("static file requested: %s", p)
base = os.path.abspath(os.path.join(os.path.dirname(__file__), "static"))
# normalize and prevent directory traversal
target = os.path.abspath(os.path.normpath(os.path.join(base, p)))
if not target.startswith(base + os.sep) and target != base:
return web.Response(status=403, text="Forbidden")
if not os.path.exists(target) or not os.path.isfile(target):
return web.Response(status=404, text="Not Found")
logger.info("serving static file: %s", target)
return web.FileResponse(path=target)
async def favicon(request):
"""Serve favicon.ico from the package static directory."""
base = os.path.abspath(os.path.join(os.path.dirname(__file__), "static/images"))
target = os.path.join(base, "favicon.ico")
if not os.path.exists(target) or not os.path.isfile(target):
return web.Response(status=404, text="Not Found")
return web.FileResponse(path=target)
# -------------------------------------------------------------------------
# Plugin Data API Endpoints
# -------------------------------------------------------------------------
async def api_host_plugins(request):
"""Get all plugin data for a specific host."""
hostname = request.match_info.get("hostname")
if hostname not in hbdclass.Host.hosts:
return web.json_response(
{"error": f"Host '{hostname}' not found"},
status=404
)
host = hbdclass.Host.hosts[hostname]
# Get plugin data with most recent sample for each plugin
plugins_summary = {}
for plugin_name, samples in host.plugin_data.items():
if samples:
# Get most recent sample
timestamp, data = samples[-1]
plugins_summary[plugin_name] = {
"timestamp": timestamp,
"data": data,
"sample_count": len(samples),
}
return web.json_response({
"hostname": hostname,
"plugins": plugins_summary,
})
async def api_host_plugin_detail(request):
"""Get detailed data for a specific plugin on a host."""
hostname = request.match_info.get("hostname")
plugin_name = request.match_info.get("plugin_name")
if hostname not in hbdclass.Host.hosts:
return web.json_response(
{"error": f"Host '{hostname}' not found"},
status=404
)
host = hbdclass.Host.hosts[hostname]
# Get limit from query parameter
limit = request.rel_url.query.get("limit", "10")
try:
limit = int(limit)
except ValueError:
limit = 10
# Get plugin data
samples = host.get_plugin_data(plugin_name, limit=limit)
if not samples:
return web.json_response(
{"error": f"No data for plugin '{plugin_name}' on host '{hostname}'"},
status=404
)
# Format samples
formatted_samples = [
{
"timestamp": ts,
"data": data,
}
for ts, data in samples
]
return web.json_response({
"hostname": hostname,
"plugin": plugin_name,
"samples": formatted_samples,
"sample_count": len(formatted_samples),
})
async def api_host_alerts(request):
"""Get alert states for a specific host."""
hostname = request.match_info.get("hostname")
if hostname not in hbdclass.Host.hosts:
return web.json_response(
{"error": f"Host '{hostname}' not found"},
status=404
)
host = hbdclass.Host.hosts[hostname]
# Get alert states
alerts = []
for metric_path, alert_state in host.alert_states.items():
alerts.append(alert_state.to_dict())
# Get summary if threshold_checker available
summary = {"ok": 0, "warning": 0, "critical": 0, "unknown": 0}
if threshold_checker:
summary = threshold_checker.get_alert_summary(host.alert_states)
return web.json_response({
"hostname": hostname,
"alerts": alerts,
"summary": summary,
})
async def api_all_alerts(request):
"""Get all active alerts across all hosts."""
all_alerts = []
for hostname, host in hbdclass.Host.hosts.items():
if threshold_checker:
active_alerts = threshold_checker.get_active_alerts(host.alert_states)
else:
# Fallback if no threshold checker
from hbd.client.threshold import AlertLevel
active_alerts = [
state for state in host.alert_states.values()
if state.level != AlertLevel.OK
]
for alert in active_alerts:
alert_dict = alert.to_dict()
alert_dict["hostname"] = hostname
all_alerts.append(alert_dict)
# Sort by level (critical first) then by hostname
level_order = {"CRITICAL": 0, "WARNING": 1, "UNKNOWN": 2, "OK": 3}
all_alerts.sort(
key=lambda a: (level_order.get(a["level"], 99), a["hostname"], a["metric_path"])
)
# Get summary counts
summary = {"critical": 0, "warning": 0, "unknown": 0, "total": len(all_alerts)}
for alert in all_alerts:
level = alert["level"].lower()
if level in summary:
summary[level] += 1
return web.json_response({
"alerts": all_alerts,
"summary": summary,
"host_count": len(hbdclass.Host.hosts),
})
# -------------------------------------------------------------------------
# UI Pages
# -------------------------------------------------------------------------
async def plugins_page(request):
"""Render the plugin metrics visualization page."""
pkg_dir = os.path.dirname(__file__)
templates_dir = config.get("templates_dir", os.path.join(pkg_dir, "templates"))
env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir))
# Collect all hosts with plugin data
hosts_with_plugins = []
for hostname in sorted(hbdclass.Host.hosts.keys()):
host = hbdclass.Host.hosts[hostname]
if host.plugin_data:
hosts_with_plugins.append({
"name": hostname,
"plugins": list(host.plugin_data.keys()),
})
tmpl = env.get_template("plugins.html")
body = tmpl.render(
title="Plugin Metrics - Heartbeat",
header="Plugin Metrics",
hosts=hosts_with_plugins,
)
return web.Response(text=body, content_type="text/html")
async def alerts_page(request):
"""Render the alerts dashboard page."""
pkg_dir = os.path.dirname(__file__)
templates_dir = config.get("templates_dir", os.path.join(pkg_dir, "templates"))
env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir))
tmpl = env.get_template("alerts.html")
body = tmpl.render(
title="Alerts Dashboard - Heartbeat",
header="Alerts Dashboard",
)
return web.Response(text=body, content_type="text/html")
app = web.Application()
app.add_routes(
[
web.get("/", index),
web.get("/api/0/hosts", api_hosts),
web.get("/api/0/messages", api_messages),
web.get("/api/0/hosts/{hostname}/plugins", api_host_plugins),
web.get("/api/0/hosts/{hostname}/plugins/{plugin_name}", api_host_plugin_detail),
web.get("/api/0/hosts/{hostname}/alerts", api_host_alerts),
web.get("/api/0/alerts", api_all_alerts),
web.get("/c", cmd),
web.get("/d", drop),
web.get("/n", register),
web.get("/u", update),
web.get("/live", live),
web.get("/plugins", plugins_page),
web.get("/alerts", alerts_page),
web.get("/static/{path:.*}", static),
web.get("/favicon.ico", favicon),
]
)
runner = web.AppRunner(app)
await runner.setup()
site = web.TCPSite(runner, host, port)
await site.start()
if verbose:
print(f"HTTP server started on {host}:{port}")
try:
await asyncio.Future()
finally:
await runner.cleanup()
+342
View File
@@ -0,0 +1,342 @@
"""
Journal logging for heartbeat messages.
Provides size-based rotating log files for all received heartbeat messages.
Messages are logged in JSON format for easy parsing and analysis.
"""
import json
import logging
import os
import asyncio
from datetime import datetime
from pathlib import Path
from typing import Dict, Any, Optional
logger = logging.getLogger(__name__)
class MessageJournal:
"""
Journal logger for heartbeat messages with size-based rotation.
Features:
- Logs all received messages in JSON format
- Automatic rotation when file size exceeds threshold
- Keeps configurable number of rotated logs
- Thread-safe and async-safe operation
- Configurable log directory and file naming
Configuration:
journal_dir: Directory for journal files (default: /var/log/heartbeat/)
journal_file: Base filename (default: messages.journal)
max_size: Maximum file size in bytes before rotation (default: 100MB)
max_backups: Number of backup files to keep (default: 10)
enabled: Enable/disable journaling (default: True)
"""
def __init__(self, config: Optional[Dict[str, Any]] = None):
"""
Initialize the message journal.
Args:
config: Configuration dictionary with journal settings
"""
self.config = config or {}
# Configuration options
self.journal_dir = Path(self.config.get('journal_dir', '/var/log/heartbeat'))
self.journal_file = self.config.get('journal_file', 'messages.journal')
self.max_size = self.config.get('journal_max_size', 100 * 1024 * 1024) # 100MB default
self.max_backups = self.config.get('journal_max_backups', 10)
self.enabled = self.config.get('journal_enabled', True)
# Runtime state
self._file_handle = None
self._current_size = 0
self._lock = asyncio.Lock()
self._initialized = False
# Full path to current journal file
self.journal_path = self.journal_dir / self.journal_file
async def initialize(self) -> bool:
"""
Initialize the journal.
Creates journal directory if needed and opens the journal file.
Returns:
True if initialization successful, False otherwise
"""
if not self.enabled:
logger.info("Message journal disabled in configuration")
return True
try:
# Create journal directory if it doesn't exist
self.journal_dir.mkdir(parents=True, exist_ok=True)
# Open journal file in append mode
self._file_handle = open(self.journal_path, 'a', encoding='utf-8')
# Get current file size
try:
self._current_size = os.path.getsize(self.journal_path)
except OSError:
self._current_size = 0
self._initialized = True
logger.info(f"Message journal initialized: {self.journal_path} "
f"(current size: {self._current_size:,} bytes, "
f"max: {self.max_size:,} bytes)")
return True
except Exception as e:
logger.error(f"Failed to initialize message journal: {e}")
self.enabled = False
return False
async def log_message(
self,
msg: Dict[str, Any],
addr: tuple,
timestamp: Optional[float] = None
):
"""
Log a received message to the journal.
Args:
msg: Parsed message dictionary
addr: Source address (ip, port) tuple
timestamp: Message timestamp (defaults to current time)
"""
if not self.enabled or not self._initialized:
return
# Skip HTB (heartbeat) messages - too verbose
msg_id = msg.get('ID', '')
if msg_id == 'HTB':
return
async with self._lock:
try:
# Prepare journal entry
if timestamp is None:
import time
timestamp = time.time()
entry = {
'timestamp': timestamp,
'datetime': datetime.fromtimestamp(timestamp).isoformat(),
'source_ip': addr[0] if isinstance(addr, (tuple, list)) else str(addr),
'source_port': addr[1] if isinstance(addr, (tuple, list)) and len(addr) > 1 else None,
'message': msg
}
# Serialize to JSON (one line per entry)
json_line = json.dumps(entry, separators=(',', ':')) + '\n'
json_bytes = json_line.encode('utf-8')
# Check if rotation is needed
if self._current_size + len(json_bytes) > self.max_size:
await self._rotate()
# Write to journal
if self._file_handle:
self._file_handle.write(json_line)
self._file_handle.flush() # Ensure data is written
self._current_size += len(json_bytes)
logger.debug(f"Logged message from {addr[0]}: {msg.get('ID', 'UNKNOWN')}")
except Exception as e:
logger.error(f"Error writing to journal: {e}")
async def _rotate(self):
"""
Rotate the journal file.
Renames current file with timestamp, opens new file, and removes
old backups exceeding max_backups limit.
"""
try:
# Close current file
if self._file_handle:
self._file_handle.close()
self._file_handle = None
# Generate backup filename with timestamp
timestamp_str = datetime.now().strftime('%Y%m%d-%H%M%S')
backup_name = f"{self.journal_file}.{timestamp_str}"
backup_path = self.journal_dir / backup_name
# Rename current file to backup
if self.journal_path.exists():
self.journal_path.rename(backup_path)
logger.info(f"Rotated journal: {backup_path} "
f"(size: {self._current_size:,} bytes)")
# Open new journal file
self._file_handle = open(self.journal_path, 'a', encoding='utf-8')
self._current_size = 0
# Clean up old backups
await self._cleanup_old_backups()
except Exception as e:
logger.error(f"Error rotating journal: {e}")
# Try to reopen the file even if rotation failed
try:
self._file_handle = open(self.journal_path, 'a', encoding='utf-8')
except Exception as e2:
logger.error(f"Failed to reopen journal after rotation error: {e2}")
self.enabled = False
async def _cleanup_old_backups(self):
"""
Remove old backup files exceeding max_backups limit.
Keeps only the most recent backups based on filename (which includes timestamp).
"""
try:
# Find all backup files
backup_pattern = f"{self.journal_file}.*"
backup_files = sorted(self.journal_dir.glob(backup_pattern))
# Remove oldest backups if we have too many
if len(backup_files) > self.max_backups:
files_to_remove = backup_files[:len(backup_files) - self.max_backups]
for backup_file in files_to_remove:
try:
backup_file.unlink()
logger.info(f"Removed old backup: {backup_file.name}")
except Exception as e:
logger.warning(f"Failed to remove old backup {backup_file}: {e}")
except Exception as e:
logger.error(f"Error cleaning up old backups: {e}")
async def log_threshold_event(
self,
host_name: str,
metric_path: str,
old_level: str,
new_level: str,
value: Any,
timestamp: Optional[float] = None
):
"""
Log a threshold state change event.
Args:
host_name: Name of the host
metric_path: Full metric path (e.g., "cpu_monitor.cpu_percent")
old_level: Previous alert level
new_level: New alert level
value: Current metric value
timestamp: Event timestamp (default: current time)
"""
if not self.enabled or not self._initialized:
return
try:
if timestamp is None:
timestamp = __import__('time').time()
event = {
'timestamp': timestamp,
'iso_time': datetime.fromtimestamp(timestamp).isoformat(),
'event_type': 'threshold',
'host': host_name,
'metric': metric_path,
'old_level': old_level,
'new_level': new_level,
'value': value,
}
async with self._lock:
if not self._file_handle:
return
# Check if rotation is needed
if self._current_size >= self.max_size:
await self._rotate()
# Write event
line = json.dumps(event) + '\n'
self._file_handle.write(line)
self._file_handle.flush()
# Update size
self._current_size += len(line.encode('utf-8'))
except Exception as e:
logger.error(f"Error logging threshold event: {e}")
async def close(self):
"""
Close the journal and release resources.
Should be called during shutdown.
"""
async with self._lock:
if self._file_handle:
try:
self._file_handle.close()
logger.info("Message journal closed")
except Exception as e:
logger.error(f"Error closing journal: {e}")
finally:
self._file_handle = None
self._initialized = False
def get_stats(self) -> Dict[str, Any]:
"""
Get journal statistics.
Returns:
Dictionary with journal stats
"""
return {
'enabled': self.enabled,
'initialized': self._initialized,
'current_file': str(self.journal_path),
'current_size': self._current_size,
'max_size': self.max_size,
'max_backups': self.max_backups,
'rotation_threshold': f"{(self._current_size / self.max_size * 100):.1f}%"
}
# Global journal instance
_journal_instance: Optional[MessageJournal] = None
def get_journal(config: Optional[Dict[str, Any]] = None) -> MessageJournal:
"""
Get or create the global journal instance.
Args:
config: Configuration dictionary (only used on first call)
Returns:
MessageJournal instance
"""
global _journal_instance
if _journal_instance is None:
_journal_instance = MessageJournal(config)
return _journal_instance
async def log_message(msg: Dict[str, Any], addr: tuple, timestamp: Optional[float] = None):
"""
Convenience function to log a message using the global journal.
Args:
msg: Parsed message dictionary
addr: Source address (ip, port) tuple
timestamp: Message timestamp (defaults to current time)
"""
journal = get_journal()
await journal.log_message(msg, addr, timestamp)
+396
View File
@@ -0,0 +1,396 @@
"""Server runtime: starts UDP listener, HTTP server and websocket stubs."""
import asyncio
import logging
import socket
import time
import signal
import sys
import ssl
from . import __version__
from . import udp
from . import hbdclass
from . import ws as ws_mod
logger = logging.getLogger(__name__)
msg_to_websockets = ws_mod.broadcast
logf = None
lastfm = ["", "", ""]
# shared runtime collections and helpers
msgs = []
def initlog(logfile):
try:
return open(logfile, "a+")
except Exception as e:
import sys
print("cannot open loffile %s, using STDERR: %s" % (logfile, e))
return sys.stderr
def log(host, m, service=None):
ts = time.time()
s = f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(ts))} {host or ''} {m}"
msgs.append(s)
logger.info(s)
if logf:
try:
logf.write(s + "\n")
logf.flush()
except Exception as e:
logger.warning("failed to write to logfile: %s", e)
msg_to_websockets("message", s)
def cleanup_function(config):
"""This function will be executed upon program exit."""
logger.info("Running cleanup function...")
import pickle
pickfile = config.get("pickfile", "hbd.pickle")
pickf = open(pickfile, "wb")
pick = pickle.Pickler(pickf)
pick.dump(hbdclass.Host.hosts)
pick.dump(msgs)
pick.dump(lastfm)
pickf.close()
logger.info("Cleanup complete.")
async def _run_async(config):
loop = asyncio.get_running_loop()
shutdown_event = asyncio.Event()
# Signal handlers for graceful shutdown
def signal_handler(signum, frame):
sig_name = signal.Signals(signum).name if hasattr(signal, "Signals") else signum
logger.info(f"Received {sig_name}, initiating shutdown...")
loop.call_soon_threadsafe(shutdown_event.set)
# Register signal handlers
loop.add_signal_handler(signal.SIGINT, signal_handler, signal.SIGINT, None)
loop.add_signal_handler(signal.SIGTERM, signal_handler, signal.SIGTERM, None)
from . import http as http_mod
from . import dns as dns_mod
from . import notify as notify_mod
from . import monitor as monitor_mod
from . import journal as journal_mod
from ..client import threshold as threshold_mod
notify_mod.setup(config)
# Initialize message journal
msg_journal = journal_mod.get_journal(config)
await msg_journal.initialize()
# Initialize threshold checker
threshold_checker = threshold_mod.ThresholdChecker(
config=config,
notification_callback=notify_mod.pushmsg_from_config,
renotify_interval=config.get("threshold_renotify_interval", 3600),
journal=msg_journal,
)
logger.info("Threshold checker initialized")
pushmsg = notify_mod.pushmsg_from_config
sock = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
# Disable IPV6_V6ONLY option to enable dual-stack (listen on IPv4 as well)
# This option is system-dependent; on many systems, setting it to False enables
# the socket to handle both IPv4 and IPv6 traffic.
try:
sock.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, False)
except OSError as e:
logger.warning(
f"Warning: Could not reset IPV6_V6ONLY not supported or dual-stack is unavailable. Error: {e}"
)
# 3. Bind to all interfaces (::) on a specific port
# UDP server endpoint (handler wired to handle_datagram with context)
bind_addr = ("::", config.get("hb_port", 50003))
sock.bind(bind_addr)
logger.info("Starting UDP server on %s:%s", *bind_addr)
def udp_handler(msg, addr, transport):
ctx = dict(
config=config,
hbdclass=hbdclass,
log=log,
pushmsg=pushmsg,
msg_to_websockets=msg_to_websockets,
msg_journal=msg_journal,
threshold_checker=threshold_checker,
DEBUG=config.get("debug", 0),
verbose=config.get("verbose", False),
)
udp.handle_datagram(msg, addr, transport, ctx)
transport, protocol = await loop.create_datagram_endpoint(
lambda: udp.EchoServerProtocol(config=config, handler=udp_handler),
sock=sock,
)
# HTTP server (asyncio-based via aiohttp)
try:
http_task = asyncio.create_task(
http_mod.start(
host=config.get("hbd_host", ""),
port=config.get("hbd_port", 50004),
config=config,
hbdclass=hbdclass,
msgs_getter=lambda: msgs,
log=log,
pushmsg=pushmsg,
msg_to_websockets=msg_to_websockets,
threshold_checker=threshold_checker,
tcss=None,
DEBUG=config.get("debug", 0),
verbose=config.get("verbose", False),
get_now=lambda: time.time(),
VER="",
)
)
logger.info(
"HTTP server started on %s:%s",
config.get("hbd_host", ""),
config.get("hbd_port", 50004),
)
except Exception as e:
logger.exception("failed to start HTTP server: %s", e)
# start dns update worker (async)
dns_task = None
try:
dns_task = dns_mod.start_dns_worker(
hbdclass, config, log=log, pushmsg=pushmsg, loop=loop
)
logger.info("dns update worker started")
except Exception as e:
logger.exception("dns worker failed to start: %s", e)
# Start the websocket servers as a background task
if config.get("wss_port", None):
ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
ssl_path = config.get("cert_path", "")
wss_pem = ssl_path + config.get("wss_pem", "")
wss_key = ssl_path + config.get("wss_key", "")
try:
ssl_context.load_cert_chain(wss_pem, keyfile=wss_key)
except FileNotFoundError:
logger.error("error: missing SSL keys %s or %s", wss_pem, wss_key)
sys.exit(1)
logger.info(
"Starting secure WebSocket server on port %s with cert %s",
config.get("wss_port", None),
wss_pem,
)
else:
ssl_context = None
try:
ws_port = config.get("ws_port", 50005)
logger.info("Starting WebSocket server on port %s", ws_port)
ws_task = asyncio.create_task(
ws_mod.start(
host=config.get("hbd_host", ""),
ws_port=ws_port,
wss_port=config.get("wss_port", None),
ssl_context=ssl_context,
get_hosts=lambda: [
hbdclass.Host.hosts[h].stateinfo()
for h in sorted(hbdclass.Host.hosts)
],
get_msgs=lambda: msgs,
verbose=config.get("verbose", False),
)
)
logger.info("WebSocket task started")
except Exception as e:
logger.exception("websocket server failed to start: %s", e)
# Start the monitor thread as a background task
try:
monitor_task = asyncio.create_task(
monitor_mod.start(
config=config,
hbdclass=hbdclass,
log=log,
pushmsg=pushmsg,
msg_to_websockets=msg_to_websockets,
)
)
logger.info("Monitor task started")
except Exception as e:
logger.exception("monitor task failed to start: %s", e)
try:
# run forever until shutdown event is set
await shutdown_event.wait()
logger.info("Shutdown signal received, stopping services...")
except Exception as e:
logger.exception("Error in main loop: %s", e)
finally:
# Cancel all running tasks
logger.info("Cancelling tasks...")
try:
transport.close()
except Exception as e:
logger.warning("Error closing UDP transport: %s", e)
tasks_to_cancel = [http_task, ws_task, monitor_task]
for task in tasks_to_cancel:
if task:
try:
task.cancel()
logger.debug("Cancelled task: %s", task)
except Exception as e:
logger.warning("Error cancelling task: %s", e)
# Wait for tasks to finish cancellation with timeout
remaining_tasks = [t for t in tasks_to_cancel if t]
if remaining_tasks:
try:
await asyncio.wait_for(
asyncio.gather(*remaining_tasks, return_exceptions=True),
timeout=2.0,
)
except asyncio.TimeoutError:
logger.warning("Timeout waiting for tasks to cancel")
except Exception as e:
logger.debug("Exception during task cancellation: %s", e)
# Close message journal
try:
await msg_journal.close()
except Exception as e:
logger.warning("Error closing message journal: %s", e)
# Signal DNS worker to exit and await it
try:
if "dns_task" in locals() and dns_task:
try:
hbdclass.Host.dnsQ.put(None)
except Exception:
pass
try:
await asyncio.wait_for(dns_task, timeout=2.0)
logger.info("DNS worker finished")
except asyncio.TimeoutError:
logger.warning("Timeout waiting for DNS worker to finish")
dns_task.cancel()
except asyncio.CancelledError:
logger.info("DNS worker was cancelled")
except Exception as e:
logger.warning("Error awaiting DNS worker: %s", e)
finally:
# Clear queue bridge to release any held references
hbdclass.Host.dnsQ = None
except Exception as e:
logger.warning("Error stopping DNS worker: %s", e)
logger.info("All tasks cancelled")
def load_pickled_hosts(config, hbdclass):
"""Load pickled hosts from file, if available."""
global lastfm, msgs
import os
import pickle
pickfile = config.get("pickfile", "hbd.pickle")
dyndnshosts = config.get("dyndnshosts", [])
watchhosts = config.get("watchhosts", [])
drophosts = config.get("drophosts", [])
if 1 and os.path.exists(pickfile):
if config.get("verbose", False):
logger.info("opening pickls %s", pickfile)
pickf = open(pickfile, "rb")
pick = pickle.Unpickler(pickf)
try:
hbdclass.Host.hosts = pick.load()
msgs = pick.load()
try:
lastfm = pick.load()
except Exception:
lastfm = ["", "", ""]
pickf.close()
except Exception as e:
logger.exception("load pickled failed: %s", e)
os.unlink(pickfile)
hbdclass.Connection.htab = {}
for h in list(hbdclass.Host.hosts.keys()):
hbdclass.Host.hosts[h].dyn = h in dyndnshosts
hbdclass.Host.hosts[h].watched = h in watchhosts
hbdclass.Host.hosts[h].fixup()
for h in drophosts:
if h in hbdclass.Host.hosts:
del hbdclass.Host.hosts[h]
if config.get("verbose", False):
logger.info("%s pickled hosts loaded", len(hbdclass.Host.hosts))
else:
if config.get("verbose", False):
logger.info("no pickled data")
def run(config):
"""Start the hbd service (blocking).
Manually manages the event loop to ensure clean shutdown.
"""
global logf
import os
logging.basicConfig(
level=logging.DEBUG if config.get("debug", 0) > 0 else logging.INFO
)
load_pickled_hosts(config, hbdclass)
logf = initlog(logfile=config.get("logfile", "messages.log"))
log(None, f"hbd version {__version__} starting up")
# Create and set the event loop manually
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
loop.run_until_complete(_run_async(config))
except KeyboardInterrupt:
logger.info("Received KeyboardInterrupt, shutting down...")
except Exception as e:
logger.exception("Unhandled exception in main: %s", e)
finally:
cleanup_function(config)
logger.info("hbd shutdown complete")
if logf and logf != sys.stderr:
try:
logf.close()
except Exception:
pass
# Explicitly close the loop
try:
# Cancel all remaining tasks
pending = asyncio.all_tasks(loop)
for task in pending:
task.cancel()
# Run one more cycle to process cancellations
if pending:
loop.run_until_complete(
asyncio.gather(*pending, return_exceptions=True)
)
except Exception:
pass
finally:
loop.close()
# Exit
os._exit(0)
+50
View File
@@ -0,0 +1,50 @@
"""monitor helper and thread for heartbeat daemon."""
from __future__ import annotations
import asyncio
import time
DROPOVERDUE = 7 * 24 * 3600
def checkoverdue(
config: dict,
hbdclass,
log: callable,
pushmsg: callable,
msg_to_websockets: callable,
):
now = time.time()
for h in list(hbdclass.Host.hosts.keys()):
pmsg = []
for c in hbdclass.Host.hosts[h].connections:
conn = hbdclass.Host.hosts[h].connections[c]
if conn.state == hbdclass.Connection.DOWN:
continue
timeout = hbdclass.Host.hosts[h].interval + config.get("grace", 10)
if conn.state == hbdclass.Connection.UP and (now - conn.lastbeat) > timeout:
conn.newstate(hbdclass.Connection.OVERDUE, now, config.get("grace", 10))
pmsg.append(conn.afam)
if (
conn.state == hbdclass.Connection.OVERDUE
and (now - conn.lastbeat) > DROPOVERDUE
):
conn.newstate(hbdclass.Connection.UNKNOWN, conn.lastbeat)
if pmsg != []:
if h in config.get("watchhosts", []):
pushmsg("%s %s overdue" % (h, " and ".join(pmsg)))
log(h, "%s overdue" % " and ".join(pmsg))
msg_to_websockets("host", hbdclass.Host.hosts[h].stateinfo())
async def start(
config: dict,
hbdclass: callable,
log=None,
pushmsg=None,
msg_to_websockets=None,
):
"""start a monitor loop that checks for overdue hosts every minute"""
while True:
await asyncio.sleep(15) # 15 seconds between checks
checkoverdue(config, hbdclass, log, pushmsg, msg_to_websockets)
+202
View File
@@ -0,0 +1,202 @@
"""Notification helpers: email, pushover, mattermost, signal and dispatcher."""
import logging
from typing import Optional
import http.client
import urllib.parse
import subprocess
import smtplib
import time
DEFAULT_PUSHPROVIDERS = ["all", "pushover", "mattermost", "signal"]
# module-level configuration set via setup()
_config = {}
logger = logging.getLogger(__name__)
def setup(cfg: dict):
"""Initialize notifier defaults from a configuration dict."""
global _config
_config = dict(cfg)
def send_email(toaddrs, smtpserver, sender, subject, body, debug=0):
"""Send a plain email via SMTP. Returns True on success."""
try:
smtpport = _config.get("smtpport", 587)
server = smtplib.SMTP(smtpserver, smtpport)
if debug > 0:
server.set_debuglevel(1)
if smtpport == 587:
server.starttls()
server.ehlo()
smtpuser = _config.get("smtpuser", None)
smtppassword = _config.get("smtppassword", None)
if smtpuser and smtppassword:
server.login(smtpuser, smtppassword)
server.sendmail(sender, toaddrs, body)
except Exception as e:
logger.warning("email send failed: %s", e)
try:
server.quit()
except Exception:
pass
return False
try:
server.quit()
except Exception:
pass
return True
def email(subject: str, msg: str, debug: int = 0) -> bool:
"""Convenience wrapper exposed to the rest of the application.
Uses module-level configuration to supply recipient list, smtp server
and sender address.
"""
toaddrs = _config.get("toemail")
fromemail = _config.get("fromemail")
smtpserver = _config.get("smtpserver")
if not toaddrs or not fromemail or not smtpserver:
logger.warning(
"email config incomplete: toemail=%s, fromemail=%s, smtpserver=%s",
toaddrs,
fromemail,
smtpserver,
)
return False
date = time.strftime("%a, %d %b %Y %H:%M:%S %z", time.localtime())
body = "To: %s\nFrom: %s\nSubject: %s\nDate: %s\n\n%s" % (
toaddrs[0] if toaddrs else "",
fromemail,
subject,
date,
msg,
)
return send_email(toaddrs, smtpserver, fromemail, subject, body, debug=debug)
def pushover(token: str, user: str, msg: str, debug: int = 0) -> bool:
"""Send message via Pushover API."""
conn = http.client.HTTPSConnection("api.pushover.net:443")
try:
conn.request(
"POST",
"/1/messages.json",
urllib.parse.urlencode({"token": token, "user": user, "message": msg}),
{"Content-type": "application/x-www-form-urlencoded"},
)
r = conn.getresponse()
logger.debug("pushover response: %s %s", r.status, r.reason)
return r.status == 200
except Exception as e:
logger.error("pushover error: %s", e)
return False
def pushmattermost(
host: str,
token: str,
channel: str,
msg: str,
username: str = "hbd",
icon: Optional[str] = None,
debug: int = 0,
) -> bool:
"""Send a message to Mattermost via simple webhook driver if available.
This helper tries to import mattermostdriver.Driver and uses webhooks if present.
If the import fails it returns False.
"""
try:
from mattermostdriver import Driver
except Exception:
return False
ses = {"url": host, "scheme": "http", "basepath": "/api/v4", "port": 8065}
mm = Driver(ses)
payload = {"text": msg, "channel": channel, "username": username}
if icon:
payload["icon_url"] = icon
try:
rc = mm.webhooks.call_webhook(token, payload)
logger.debug("mattermost rc: %s", rc)
return bool(rc is None or rc == "")
except Exception as e:
logger.error("mattermost error: %s", e)
return False
def pushsignal(
signal_cli_bin: str, user: str, recipient: str, msg: str, debug: int = 0
) -> bool:
"""Send a message via signal-cli (requires local installation).
Uses subprocess to call signal-cli. Returns True if the command succeeded.
"""
CLI = [signal_cli_bin, "-u", user, "send", "-m", msg, recipient]
logger.debug("signal cli: %s", CLI)
try:
res = subprocess.run(CLI, capture_output=True)
if res.returncode != 0:
logger.error("signal failed: %s".res.stderr.decode())
return False
logger.debug("signal sent: %s", res.stdout.decode())
return True
except Exception as e:
logger.exception("signal exception: %s", e)
return False
def pushmsg(cfg: dict, msg: str, debug: int = 0):
"""Dispatch push notifications according to `cfg['pushsrv']`.
cfg is expected to contain keys for different services when needed, e.g.
- cfg['pushsrv'] : one of 'all', 'pushover', 'mattermost', 'signal'
- cfg['pushover_token'], cfg['pushover_user']
- cfg['matter_host'], cfg['matter_token'], cfg['matter_channel']
- cfg['signal_cli'], cfg['signal_user'], cfg['signal_recipient']
Returns a dict of results per provider.
"""
results = {}
p = cfg.get("pushsrv", "pushover")
if p in ("all", "pushover"):
ok = pushover(
cfg.get("pushover_token", ""),
cfg.get("pushover_user", ""),
msg,
debug=debug,
)
results["pushover"] = ok
if p in ("all", "mattermost"):
ok = pushmattermost(
cfg.get("matter_host", ""),
cfg.get("matter_token", ""),
cfg.get("matter_channel", ""),
msg,
username=cfg.get("matter_username", "hbd"),
icon=cfg.get("matter_icon"),
debug=debug,
)
results["mattermost"] = ok
if p in ("all", "signal"):
ok = pushsignal(
cfg.get("signal_cli", "/usr/local/bin/signal-cli"),
cfg.get("signal_user", ""),
cfg.get("signal_recipient", ""),
msg,
debug=debug,
)
results["signal"] = ok
if p in ("all", "email"):
ok = email("Heartbeat notification", msg, debug=debug)
results["email"] = ok
logger.debug("push results: %s", results)
return results
def pushmsg_from_config(msg: str, debug: int = 0) -> dict:
"""Use the module-level configuration dict to dispatch a push message."""
return pushmsg(_config, msg, debug=debug)
Binary file not shown.

After

Width:  |  Height:  |  Size: 5.3 KiB

+142
View File
@@ -0,0 +1,142 @@
/* http://www.designcouch.com/home/why/2014/04/23/pure-css-drawer-menu/ */
* {
box-sizing: border-box;
/* adds animation for all transitions */
transition: .25s ease-in-out;
/* margin: 0;
padding: 0; */
/* text-size-adjust: none; */
}
/* Makes sure that everything is 100% height */
html,
body {
height: 100%;
overflow: hidden;
color:#303030;
background:#fafafa top left repeat-y;
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", "Oxygen", "Ubuntu", "Cantarell", "Fira Sans", "Droid Sans", "Helvetica Neue", "Helvetica", "Arial", sans-serif;
font-size:100%;
margin: 0;
}
#drawer-toggle {
position: absolute;
opacity: 0;
}
#drawer ul a {
display: block;
padding: 10px;
color: #c7c7c7;
text-decoration: none;
}
#drawer-toggle-label {
user-select: none;
left: 0px;
height: 50px;
width: 50px;
display: block;
position: fixed;
color: rgb(242, 242, 242);
background: rgba(255, 255, 255, .0);
z-index: 1;
}
/* adds our "hamburger" menu icon */
#drawer-toggle-label:before {
content: '';
display: block;
position: absolute;
height: 2px;
width: 24px;
background: #8d8d8d;
left: 13px;
top: 18px;
box-shadow: 0 6px 0 #8d8d8d, 0 12px 0 #8d8d8d;
}
header {
width: 100%;
position: fixed;
left: 0px;
background: #efefef;
padding: 10px 10px 10px 50px;
font-size: 30px;
line-height: 30px;
z-index: 0;
}
/* drawer menu pane - note the 0px width */
#drawer {
position: fixed;
top: 0;
width: 150px;
left: -150px;
height: 100%;
background: #2f2f2f;
overflow-x: hidden;
overflow-y: scroll;
padding: 0px;
}
@media all and (min-resolution: 150dpi) {
header {
font-size: 30px;
/* line-height: 45px; */
}
#drawer {
font-size: 120%;
}
/* body {
background-color: lightyellow;
} */
}
/* actual page content pane */
#content {
margin-left: 0px;
margin-top: 30px;
/* width: 100%; */
height: calc(100% - 50px);
overflow-x: hidden;
overflow-y: scroll;
padding: 20px;
flex: auto;
}
/* checked styles (menu open state) */
#drawer-toggle:checked ~ #drawer-toggle-label {
height: 100%;
width: calc(100% - 150px);
color: rgb(242, 242, 242);
background: rgba(255, 255, 255, .8);
}
#drawer-toggle:checked ~ #drawer-toggle-label,
#drawer-toggle:checked ~ header {
left: 150px;
}
#drawer-toggle:checked ~ #drawer {
left: 0px;
}
#drawer-toggle:checked ~ #content {
margin-left: 150px;
}
#copyright {
font-size: 9px;
float: left;
}
+466
View File
@@ -0,0 +1,466 @@
<!DOCTYPE html>
<html>
{% include 'head.html' %}
<style>
body {
margin: 20px;
background: #f5f5f5;
}
.nav {
background: #fff;
padding: 15px;
margin-bottom: 20px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
border-radius: 4px;
}
.nav a {
margin-right: 20px;
text-decoration: none;
color: #0066cc;
font-weight: 500;
}
.nav a:hover {
text-decoration: underline;
}
.nav a.active {
color: #333;
font-weight: bold;
}
.container {
max-width: 1400px;
margin: 0 auto;
}
h1 {
color: #333;
margin-bottom: 10px;
}
.subtitle {
color: #666;
margin-bottom: 30px;
}
.summary-cards {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 20px;
margin-bottom: 30px;
}
.summary-card {
background: white;
border-radius: 8px;
padding: 20px;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
text-align: center;
}
.summary-card.critical {
border-left: 5px solid #f44336;
}
.summary-card.warning {
border-left: 5px solid #ff9800;
}
.summary-card.ok {
border-left: 5px solid #4caf50;
}
.summary-number {
font-size: 3em;
font-weight: bold;
margin: 10px 0;
}
.summary-number.critical {
color: #f44336;
}
.summary-number.warning {
color: #ff9800;
}
.summary-number.ok {
color: #4caf50;
}
.summary-label {
color: #666;
text-transform: uppercase;
font-size: 0.9em;
letter-spacing: 1px;
}
.filters {
background: white;
border-radius: 8px;
padding: 15px;
margin-bottom: 20px;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
display: flex;
gap: 15px;
align-items: center;
}
.filter-label {
font-weight: bold;
color: #555;
}
.filter-button {
padding: 8px 16px;
border: 2px solid #ddd;
background: white;
border-radius: 20px;
cursor: pointer;
transition: all 0.2s;
font-size: 0.9em;
}
.filter-button:hover {
border-color: #2196f3;
}
.filter-button.active {
background: #2196f3;
color: white;
border-color: #2196f3;
}
.alerts-container {
background: white;
border-radius: 8px;
padding: 20px;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
}
.alert-item {
border-left: 5px solid #ddd;
padding: 15px;
margin-bottom: 15px;
background: #fafafa;
border-radius: 4px;
display: flex;
justify-content: space-between;
align-items: center;
transition: all 0.2s;
}
.alert-item:hover {
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
transform: translateX(5px);
}
.alert-item.critical {
border-left-color: #f44336;
background: #ffebee;
}
.alert-item.warning {
border-left-color: #ff9800;
background: #fff3e0;
}
.alert-item.unknown {
border-left-color: #9e9e9e;
background: #f5f5f5;
}
.alert-main {
flex: 1;
}
.alert-header {
display: flex;
align-items: center;
gap: 15px;
margin-bottom: 8px;
}
.alert-level {
padding: 4px 12px;
border-radius: 12px;
font-size: 0.75em;
font-weight: bold;
text-transform: uppercase;
letter-spacing: 0.5px;
}
.alert-level.critical {
background: #f44336;
color: white;
}
.alert-level.warning {
background: #ff9800;
color: white;
}
.alert-level.unknown {
background: #9e9e9e;
color: white;
}
.alert-hostname {
font-weight: bold;
color: #333;
font-size: 1.1em;
}
.alert-metric {
color: #666;
font-family: 'Courier New', monospace;
font-size: 0.9em;
}
.alert-details {
display: flex;
gap: 20px;
color: #666;
font-size: 0.9em;
}
.alert-value {
font-weight: bold;
color: #333;
}
.alert-duration {
color: #999;
font-size: 0.85em;
}
.no-alerts {
text-align: center;
padding: 60px 20px;
color: #999;
}
.no-alerts-icon {
font-size: 4em;
margin-bottom: 20px;
}
.loading {
text-align: center;
padding: 40px;
color: #666;
}
.error {
background: #ffebee;
border-left: 4px solid #f44336;
padding: 20px;
margin: 20px 0;
border-radius: 4px;
color: #c62828;
}
.refresh-info {
text-align: center;
color: #999;
font-size: 0.85em;
margin-top: 20px;
padding-top: 20px;
border-top: 1px solid #e0e0e0;
}
.last-update {
color: #666;
font-size: 0.9em;
text-align: right;
margin-bottom: 15px;
}
</style>
<body>
<div class="nav">
<a href="/live">Live Dashboard</a>
<a href="/plugins">Plugin Metrics</a>
<a href="/alerts" class="active">Alerts</a>
</div>
<div class="container">
<h1>{{ header }}</h1>
<p class="subtitle">Real-time monitoring alerts and threshold violations</p>
<div class="summary-cards" id="summary-cards">
<div class="summary-card critical">
<div class="summary-label">Critical</div>
<div class="summary-number critical" id="critical-count">-</div>
</div>
<div class="summary-card warning">
<div class="summary-label">Warning</div>
<div class="summary-number warning" id="warning-count">-</div>
</div>
<div class="summary-card ok">
<div class="summary-label">Total Hosts</div>
<div class="summary-number ok" id="host-count">-</div>
</div>
</div>
<div class="filters">
<span class="filter-label">Show:</span>
<button class="filter-button active" onclick="filterAlerts('all')">All</button>
<button class="filter-button" onclick="filterAlerts('critical')">Critical Only</button>
<button class="filter-button" onclick="filterAlerts('warning')">Warning Only</button>
</div>
<div class="alerts-container">
<div class="last-update">Last updated: <span id="last-update-time">Never</span></div>
<div id="alerts-list">
<div class="loading">Loading alerts...</div>
</div>
<div class="refresh-info">
Auto-refreshing every 15 seconds
</div>
</div>
</div>
<script>
let currentFilter = 'all';
let allAlerts = [];
async function loadAlerts() {
try {
const response = await fetch('/api/0/alerts');
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
const data = await response.json();
allAlerts = data.alerts;
// Update summary cards
document.getElementById('critical-count').textContent = data.summary.critical || 0;
document.getElementById('warning-count').textContent = data.summary.warning || 0;
document.getElementById('host-count').textContent = data.host_count || 0;
// Update last update time
document.getElementById('last-update-time').textContent = new Date().toLocaleTimeString();
// Render alerts
renderAlerts(allAlerts);
} catch (error) {
document.getElementById('alerts-list').innerHTML =
`<div class="error">Failed to load alerts: ${error.message}</div>`;
}
}
function renderAlerts(alerts) {
const container = document.getElementById('alerts-list');
// Filter alerts based on current filter
let filteredAlerts = alerts;
if (currentFilter !== 'all') {
filteredAlerts = alerts.filter(alert =>
alert.level.toLowerCase() === currentFilter
);
}
if (filteredAlerts.length === 0) {
if (currentFilter === 'all' && alerts.length === 0) {
container.innerHTML = `
<div class="no-alerts">
<div class="no-alerts-icon">✓</div>
<h2>All Systems Normal</h2>
<p>No active alerts at this time</p>
</div>
`;
} else {
container.innerHTML = `
<div class="no-alerts">
<p>No ${currentFilter} alerts</p>
</div>
`;
}
return;
}
let html = '';
for (const alert of filteredAlerts) {
html += renderAlert(alert);
}
container.innerHTML = html;
}
function renderAlert(alert) {
const level = alert.level.toLowerCase();
const duration = getDuration(alert.since);
return `
<div class="alert-item ${level}">
<div class="alert-main">
<div class="alert-header">
<span class="alert-level ${level}">${alert.level}</span>
<span class="alert-hostname">${alert.hostname}</span>
</div>
<div class="alert-metric">${alert.metric_path}</div>
<div class="alert-details">
<span>Value: <span class="alert-value">${formatValue(alert.last_value)}</span></span>
<span class="alert-duration">Active for ${duration}</span>
</div>
</div>
</div>
`;
}
function formatValue(value) {
if (typeof value === 'number') {
if (value > 1000) {
return value.toLocaleString();
}
return value.toFixed(2);
}
return value;
}
function getDuration(timestamp) {
const now = Date.now() / 1000;
const seconds = Math.floor(now - timestamp);
if (seconds < 60) {
return `${seconds}s`;
} else if (seconds < 3600) {
return `${Math.floor(seconds / 60)}m`;
} else if (seconds < 86400) {
const hours = Math.floor(seconds / 3600);
const minutes = Math.floor((seconds % 3600) / 60);
return `${hours}h ${minutes}m`;
} else {
const days = Math.floor(seconds / 86400);
const hours = Math.floor((seconds % 86400) / 3600);
return `${days}d ${hours}h`;
}
}
function filterAlerts(filter) {
currentFilter = filter;
// Update active button
document.querySelectorAll('.filter-button').forEach(btn => {
btn.classList.remove('active');
});
event.target.classList.add('active');
// Re-render with new filter
renderAlerts(allAlerts);
}
// Auto-refresh every 15 seconds
setInterval(loadAlerts, 15000);
// Initial load
loadAlerts();
</script>
</body>
</html>
+5
View File
@@ -0,0 +1,5 @@
<footer>
<div id="copyright">
&copy;2002-2026 <A HREF="mailto:andreas@wrede.ca">Andreas Wrede</A> All Rights Reserved.</p>
</div>
</footer>
+7
View File
@@ -0,0 +1,7 @@
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<link rel="stylesheet" href="/static/style.css" type="text/css" />
<link rel="icon" href="/static/images/favicon.ico" sizes="32x32" />
<title>{{ title }}</title>
<script src="{{ extra_scripts }}"></script>
</head>
+330
View File
@@ -0,0 +1,330 @@
<!DOCTYPE html>
<html>
{% include 'head.html' %}
<style>
.nav {
background: #fff;
padding: 15px;
margin-bottom: 20px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
border-radius: 4px;
}
.nav a {
margin-right: 20px;
text-decoration: none;
color: #0066cc;
font-weight: 500;
}
.nav a:hover {
text-decoration: underline;
}
.nav a.active {
color: #333;
font-weight: bold;
}
.content {
display: flex;
flex-direction: column;
}
.table {
/* flex: 1; */
flex-grow: none;
}
.log {
flex: 2;
flex-grow: 1;
}
#ntable {
border-collapse: collapse;
font-size: 95%;
/* width: 100%; */
}
#ntable td,
#ntable th {
border: 1px solid #ddd;
text-align: left;
padding: 0px;
}
#ntable tr:nth-child(even) {
background-color: #f2f2f2;
}
#ntable tr:hover {
background-color: #ddd;
}
#ntable th {
padding-top: 12px;
padding-bottom: 12px;
background-color: #9d9d9d;
color: white;
}
#ntable
th:not(.sorttable_sorted):not(.sorttable_sorted_reverse):not(.sorttable_nosort):after {
content: " \2195";
}
/* Modal for connection status messages */
.connection-modal {
display: none;
position: fixed;
z-index: 1000;
left: 0;
top: 0;
width: 100%;
height: 100%;
background-color: rgba(0, 0, 0, 0.4);
}
.connection-modal.show {
display: flex;
justify-content: center;
align-items: center;
}
.connection-modal-content {
background-color: #f9f9f9;
padding: 20px;
border: 1px solid #888;
border-radius: 5px;
text-align: center;
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
min-width: 300px;
}
.connection-modal-content p {
margin: 10px 0;
font-size: 16px;
color: #333;
}
</style>
<script type="text/javascript">
var cnt = 0;
var nTable = document;
var name_idx = {};
var c = 0;
function setup() {
name_idx = {};
nTable = document.getElementById("ntable");
for (var i = 0, row; (row = nTable.rows[i]); i++) {
if (i == 0) continue;
name = nTable.rows[i].cells[0].innerText;
name_idx[name] = nTable.rows[i];
/* console.log("name_Id[" + name + "]: " + name_idx[name].innerText); */
}
}
function createRow(data) {
var row = document.createElement("tr");
var c_name = document.createElement("td");
var c_ver = document.createElement("td");
var c_ipv4addr = document.createElement("td");
var c_ipv4state = document.createElement("td");
var c_ipv4latency = document.createElement("td");
c_ipv4latency.style.textAlign = "right";
var c_ipv4statets = document.createElement("td");
c_ipv4statets.style.textAlign = "right";
var c_ipv6addr = document.createElement("td");
var c_ipv6state = document.createElement("td");
var c_ipv6latency = document.createElement("td");
c_ipv6latency.style.textAlign = "right";
var c_ipv6statets = document.createElement("td");
c_ipv6statets.style.textAlign = "right";
row.appendChild(c_name);
row.appendChild(c_ver);
row.appendChild(c_ipv4addr);
row.appendChild(c_ipv4state);
row.appendChild(c_ipv4latency);
row.appendChild(c_ipv4statets);
row.appendChild(c_ipv6addr);
row.appendChild(c_ipv6state);
row.appendChild(c_ipv6latency);
row.appendChild(c_ipv6statets);
if (data.dyn) {
c_name.innerHTML = "<b>" + data.name + "</b>";
} else {
c_name.innerHTML = data.name;
}
c_ver.innerHTML = data.cver;
c_ipv4addr.innerHTML = data.connections[0].addr;
c_ipv4state.innerHTML = data.connections[0].state;
if (data.connections.length > 1) {
c_ipv6addr.innerHTML = data.connections[1].addr;
c_ipv6state.innerHTML = data.connections[1].state;
}
var table = document.getElementById("ntablebody"); // find table to append to
table.appendChild(row); // append row to table
name_idx[c_name] = row;
}
function formatTS(ts) {
const milliseconds = ts * 1000;
const dateObject = new Date(milliseconds);
return dateObject.toLocaleString("de-DE");
}
function update_table(data) {
if (!(data.name in name_idx)) {
createRow(data);
setup();
}
for (var i = 0; i < data.connections.length; i++) {
name_idx[data.name].cells[2 + i * 4].innerHTML = data.connections[i].addr;
name_idx[data.name].cells[5 + i * 4].innerHTML = formatTS(
data.connections[i].statetime
);
if (data.connections[i].state == "up") {
state = "up";
latency = Number.parseFloat(data.connections[i].rtts[0]).toFixed(2);
} else {
if (data.connections[i].state == "unknown") {
state = "";
latency = "";
name_idx[data.name].cells[2 + i * 4].innerHTML = "";
name_idx[data.name].cells[5 + i * 4].innerHTML = "";
} else {
state = "<b>" + data.connections[i].state + "</b>";
latency = "-";
}
}
name_idx[data.name].cells[3 + i * 4].innerHTML = state;
name_idx[data.name].cells[4 + i * 4].innerHTML = latency;
}
}
function WS_Connect() {
if ("WebSocket" in window) {
//N.B: subprotocol field causes chrome to error 1006
var ws_hbd = new WebSocket("{{heartbeat_ws_url}}", /* "hdb" */ );
ws_hbd.onopen = function () {
// Web Socket is connected, send data using send()
console.log("ws connect {{heartbeat_ws_url}}");
// Hide modal window if visible
var modal = document.getElementById("connectionModal");
if (modal) {
modal.classList.remove("show");
}
ws_hbd.send("heartbeat_web");
};
ws_hbd.onerror = function (event) {
console.log(event);
};
ws_hbd.onmessage = function (event) {
/* console.log(event.data); */
var state = JSON.parse(event.data);
/* console.log("State: " + state.type); */
if (state.type == "host") {
update_table(state.data);
} else if (state.type == "message") {
var msgs = document.getElementById("messages");
msgs.insertAdjacentHTML("afterbegin", state.data + "<br>");
}
cnt++;
};
ws_hbd.onclose = function (event) {
/* console.log(event); */
console.log("Connection is closed, reopening");
// Show modal window
var modal = document.getElementById("connectionModal");
if (modal) {
modal.classList.add("show");
}
setTimeout(function () {
WS_Connect();
}, 3000);
};
} else {
// The browser doesn't support WebSocket
console.log("WebSocket NOT supported by your Browser!");
}
}
WS_Connect();
</script>
<body>
<div class="nav">
<a href="/live" class="active">Live Dashboard</a>
<a href="/plugins">Plugin Metrics</a>
<a href="/alerts">Alerts</a>
</div>
{% include 'menu.html' %}
<div id="content" class="content" style="overflow: hidden">
<div id="table" class="table" style="overflow: hidden">
<!-- <h2>{{title}}</h2> -->
<table id="ntable" class="sortable">
<thead>
<tr>
<th>Name</th>
<th>Ver</th>
<th>IPv4 Addr</th>
<th>State</th>
<th style="text-align: right">Latencey</th>
<th style="text-align: right">Last State</th>
<th>IPv6 Addr</th>
<th>State</th>
<th style="text-align: right">Latencey</th>
<th style="text-align: right">Last State</th>
</tr>
</thead>
<tbody id="ntablebody">
{% for host in hosts %}
<tr>
<td>{{ host.name }}</td>
<td>{{ host.ver if host.ver else '' }}</td>
{% for conn in host.connections %}
<td>{{ conn.addr if conn.addr else '' }}</td>
<td>{{ conn.state if conn.state else '' }}</td>
<td style="text-align: right">{{ conn.latency if conn.latency else '' }}</td>
<td style="text-align: right">{{ conn.last_state_ts if conn.last_state_ts else '' }}</td>
{% endfor %}
{% if host.connections|length == 0 %}
<td></td><td></td><td></td><td></td>
<td></td><td></td><td></td><td></td>
{% elif host.connections|length == 1 %}
<td></td><td></td><td></td><td></td>
{% endif %}
</tr>
{% endfor %}
</tbody>
</table>
</div>
<div id="log" class="log" style="overflow: auto;">
<h2>Log of Events</h2>
<div id="messages">
</div>
</div>
</div>
{% include 'foot.html' %}
<!-- Connection status modal -->
<div id="connectionModal" class="connection-modal">
<div class="connection-modal-content">
<p>⚠️ Connection is closed, reopening...</p>
</div>
</div>
<script>
setup();
</script>
</body>
</html>
+3
View File
@@ -0,0 +1,3 @@
<!-- <label for="drawer-toggle" id="drawer-toggle-label"></label>
s<header>{{ header }}</header> -->
+974
View File
@@ -0,0 +1,974 @@
<!DOCTYPE html>
<html>
{% include 'head.html' %}
<style>
body {
margin: 10px;
background: #f5f5f5;
overflow: hidden;
}
.nav {
background: #fff;
padding: 10px 15px;
margin-bottom: 10px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
border-radius: 4px;
}
.nav a {
margin-right: 20px;
text-decoration: none;
color: #0066cc;
font-weight: 500;
font-size: 0.9em;
}
.nav a:hover {
text-decoration: underline;
}
.nav a.active {
color: #333;
font-weight: bold;
}
.container {
max-width: 1400px;
margin: 0 auto;
max-height: calc(100vh - 120px);
overflow-y: auto;
padding-right: 10px;
}
h1 {
color: #333;
margin-bottom: 5px;
font-size: 1.5em;
}
.subtitle {
color: #666;
margin-bottom: 15px;
font-size: 0.9em;
}
.host-card {
background: white;
border-radius: 6px;
padding: 10px 15px;
margin-bottom: 10px;
box-shadow: 0 1px 4px rgba(0,0,0,0.1);
transition: all 0.2s;
}
.host-card.collapsed .host-body {
display: none;
}
.host-header {
display: flex;
justify-content: space-between;
align-items: center;
cursor: pointer;
user-select: none;
padding: 5px 0;
}
.host-header:hover {
background: #f9f9f9;
}
.host-title {
display: flex;
align-items: center;
gap: 10px;
}
.collapse-icon {
font-size: 1.2em;
color: #666;
transition: transform 0.2s;
min-width: 20px;
}
.host-card.collapsed .collapse-icon {
transform: rotate(-90deg);
}
.host-name {
font-size: 1.1em;
font-weight: bold;
color: #333;
}
.host-body {
padding-top: 10px;
}
.plugin-pills {
display: flex;
gap: 6px;
flex-wrap: wrap;
margin-bottom: 10px;
}
.plugin-pill {
padding: 4px 12px;
background: #e3f2fd;
border: 1px solid #90caf9;
border-radius: 15px;
cursor: pointer;
transition: all 0.2s;
font-size: 0.85em;
}
.plugin-pill:hover {
background: #90caf9;
color: white;
}
.plugin-pill.active {
background: #2196f3;
color: white;
border-color: #2196f3;
}
.plugin-content {
margin-top: 10px;
display: none;
}
.plugin-content.active {
display: block;
}
.metric-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
gap: 10px;
margin-top: 10px;
}
.metric-card {
background: #fafafa;
border-left: 3px solid #2196f3;
padding: 8px 12px;
border-radius: 3px;
}
.metric-label {
font-size: 0.75em;
color: #666;
text-transform: uppercase;
letter-spacing: 0.3px;
margin-bottom: 3px;
}
.metric-value {
font-size: 1.4em;
font-weight: bold;
color: #333;
line-height: 1.2;
}
.metric-unit {
font-size: 0.6em;
color: #888;
font-weight: normal;
}
.timestamp {
color: #999;
font-size: 0.75em;
margin-top: 10px;
padding-top: 8px;
border-top: 1px solid #e0e0e0;
}
.no-data {
text-align: center;
padding: 20px;
color: #999;
font-style: italic;
font-size: 0.9em;
}
.loading {
text-align: center;
padding: 15px;
color: #666;
font-size: 0.9em;
}
.error {
background: #ffebee;
border-left: 3px solid #f44336;
padding: 10px;
margin: 10px 0;
border-radius: 3px;
color: #c62828;
font-size: 0.9em;
}
.nested-metrics {
margin-top: 8px;
padding-left: 12px;
border-left: 2px solid #ddd;
}
.nested-header {
font-weight: bold;
color: #555;
margin: 8px 0 5px 0;
font-size: 0.85em;
}
/* Scrollbar styling */
.container::-webkit-scrollbar {
width: 8px;
}
.container::-webkit-scrollbar-track {
background: #f1f1f1;
border-radius: 4px;
}
.container::-webkit-scrollbar-thumb {
background: #888;
border-radius: 4px;
}
.container::-webkit-scrollbar-thumb:hover {
background: #555;
}
/* Table styling for interface data */
.interface-table {
width: 100%;
border-collapse: collapse;
margin-top: 10px;
font-size: 0.85em;
background: #fff;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
border-radius: 4px;
overflow: hidden;
}
.interface-table thead {
background: #2196f3;
color: white;
}
.interface-table th {
padding: 8px 10px;
text-align: left;
font-weight: 600;
text-transform: uppercase;
font-size: 0.75em;
letter-spacing: 0.5px;
}
.interface-table th.number {
text-align: right;
}
.interface-table td {
padding: 6px 10px;
border-top: 1px solid #e0e0e0;
}
.interface-table td.number {
text-align: right;
font-family: 'Courier New', monospace;
}
.interface-table tbody tr:hover {
background: #f5f5f5;
}
.interface-table tbody tr:nth-child(even) {
background: #fafafa;
}
.interface-table tbody tr:nth-child(even):hover {
background: #f0f0f0;
}
.interface-name {
font-weight: bold;
color: #2196f3;
}
</style>
<body>
<div class="nav">
<a href="/live">Live Dashboard</a>
<a href="/plugins" class="active">Plugin Metrics</a>
<a href="/alerts">Alerts</a>
</div>
<div class="container">
<h1>{{ header }}</h1>
<p class="subtitle">Real-time system metrics from monitoring plugins</p>
{% if not hosts %}
<div class="no-data">
<p>No hosts with plugin data available</p>
<p style="font-size: 0.9em; margin-top: 10px;">Hosts will appear here once they start sending plugin metrics</p>
</div>
{% else %}
<div id="hosts-container">
{% for host in hosts %}
<div class="host-card" data-hostname="{{ host.name }}">
<div class="host-header" onclick="toggleHost('{{ host.name }}')">
<div class="host-title">
<span class="collapse-icon"></span>
<span class="host-name">{{ host.name }}</span>
</div>
</div>
<div class="host-body">
<div class="plugin-pills">
{% for plugin in host.plugins %}
<div class="plugin-pill" data-plugin="{{ plugin }}" onclick="event.stopPropagation(); showPlugin('{{ host.name }}', '{{ plugin }}')">
{{ plugin }}
</div>
{% endfor %}
</div>
{% for plugin in host.plugins %}
<div class="plugin-content" id="{{ host.name }}-{{ plugin }}" data-hostname="{{ host.name }}" data-plugin="{{ plugin }}">
<div class="loading">Loading {{ plugin }} data...</div>
</div>
{% endfor %}
</div>
</div>
{% endfor %}
</div>
{% endif %}
</div>
<script>
// Track selected plugins per host
const selectedPlugins = {};
function toggleHost(hostname) {
const card = document.querySelector(`[data-hostname="${hostname}"]`);
card.classList.toggle('collapsed');
}
function showPlugin(hostname, pluginName) {
// Update selectedPlugins tracker
selectedPlugins[hostname] = pluginName;
// Update active pill
const hostCard = document.querySelector(`[data-hostname="${hostname}"]`);
hostCard.querySelectorAll('.plugin-pill').forEach(pill => {
pill.classList.remove('active');
});
hostCard.querySelector(`[data-plugin="${pluginName}"]`).classList.add('active');
// Show plugin content
hostCard.querySelectorAll('.plugin-content').forEach(content => {
content.classList.remove('active');
});
const contentDiv = document.getElementById(`${hostname}-${pluginName}`);
contentDiv.classList.add('active');
// Load data if not already loaded
if (contentDiv.querySelector('.loading')) {
loadPluginData(hostname, pluginName);
}
}
async function loadPluginData(hostname, pluginName) {
const contentDiv = document.getElementById(`${hostname}-${pluginName}`);
try {
const response = await fetch(`/api/0/hosts/${hostname}/plugins/${pluginName}?limit=1`);
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
const data = await response.json();
if (data.samples && data.samples.length > 0) {
const sample = data.samples[0];
contentDiv.innerHTML = renderPluginData(sample.data, sample.timestamp);
} else {
contentDiv.innerHTML = '<div class="no-data">No data available for this plugin</div>';
}
} catch (error) {
contentDiv.innerHTML = `<div class="error">Failed to load plugin data: ${error.message}</div>`;
}
}
function renderPluginData(data, timestamp) {
let html = '<div class="metric-grid">';
for (const [key, value] of Object.entries(data)) {
// Skip nested objects for now, handle them separately
if (typeof value === 'object' && value !== null) {
continue;
}
html += renderMetric(key, value);
}
html += '</div>';
// Handle nested objects (like partitions in disk_monitor)
for (const [key, value] of Object.entries(data)) {
if (typeof value === 'object' && value !== null) {
// Check if this is interface data - render as table
if (isInterfaceData(key, value)) {
html += renderInterfaceTable(key, value);
} else if (isInterfaceStatsData(key, value)) {
html += renderInterfaceStatsTable(key, value);
} else if (isDiskPartitionData(key, value)) {
html += renderPartitionTable(key, value);
} else if (isDiskIOData(key, value)) {
html += renderDiskIOTable(key, value);
} else if (isFilesystemData(key, value)) {
html += renderFilesystemTable(key, value);
} else {
// Regular nested metrics display
html += `<div class="nested-metrics">`;
html += `<div class="nested-header">📊 ${formatLabel(key)}</div>`;
html += '<div class="metric-grid">';
if (Array.isArray(value)) {
// Handle arrays - more compact display
value.forEach((item, idx) => {
if (typeof item === 'object') {
// Add a compact separator for array items
if (idx > 0) {
html += `<div style="grid-column: 1/-1; border-top: 1px dashed #ddd; margin: 5px 0;"></div>`;
}
for (const [subKey, subValue] of Object.entries(item)) {
html += renderMetric(`${subKey}`, subValue);
}
}
});
} else {
// Handle nested objects
for (const [subKey, subValue] of Object.entries(value)) {
if (typeof subValue === 'object') {
// Another level of nesting - keep compact
html += `<div style="grid-column: 1/-1; margin-top: 5px; font-size: 0.85em; color: #666;"><strong>${subKey}:</strong></div>`;
for (const [deepKey, deepValue] of Object.entries(subValue)) {
html += renderMetric(deepKey, deepValue);
}
} else {
html += renderMetric(subKey, subValue);
}
}
}
html += '</div></div>';
}
}
}
const date = new Date(timestamp * 1000);
html += `<div class="timestamp">Last updated: ${date.toLocaleString()}</div>`;
return html;
}
function isInterfaceData(key, value) {
// Check if this is interface/network stats data (I/O counters)
if (key.toLowerCase().includes('interface') && !key.toLowerCase().includes('interface_stats')) {
// Verify it's an object with interface-like structure
if (typeof value === 'object' && !Array.isArray(value)) {
// Check if values are objects with byte/packet counters
const firstKey = Object.keys(value)[0];
if (firstKey && typeof value[firstKey] === 'object') {
const sample = value[firstKey];
return sample.hasOwnProperty('bytes_sent') ||
sample.hasOwnProperty('bytes_recv') ||
sample.hasOwnProperty('packets_sent') ||
sample.hasOwnProperty('tx_bytes') ||
sample.hasOwnProperty('rx_bytes');
}
}
}
return false;
}
function isInterfaceStatsData(key, value) {
// Check if this is interface stats data (status, speed, mtu, duplex)
if (key.toLowerCase() === 'interface_stats' || key.toLowerCase().includes('if_stats')) {
if (typeof value === 'object' && !Array.isArray(value)) {
const firstKey = Object.keys(value)[0];
if (firstKey && typeof value[firstKey] === 'object') {
const sample = value[firstKey];
return sample.hasOwnProperty('isup') ||
sample.hasOwnProperty('speed') ||
sample.hasOwnProperty('mtu') ||
sample.hasOwnProperty('duplex');
}
}
}
return false;
}
function isDiskPartitionData(key, value) {
// Check if this is disk partition data
if (key.toLowerCase() === 'partitions' || key.toLowerCase().includes('partition')) {
if (typeof value === 'object' && !Array.isArray(value)) {
const firstKey = Object.keys(value)[0];
if (firstKey && typeof value[firstKey] === 'object') {
const sample = value[firstKey];
return sample.hasOwnProperty('total') &&
sample.hasOwnProperty('used') &&
sample.hasOwnProperty('free') &&
sample.hasOwnProperty('percent');
}
}
}
return false;
}
function isDiskIOData(key, value) {
// Check if this is disk I/O counter data
if (key.toLowerCase().includes('io_counter') || key.toLowerCase().includes('disk_io')) {
if (typeof value === 'object' && !Array.isArray(value)) {
const firstKey = Object.keys(value)[0];
if (firstKey && typeof value[firstKey] === 'object') {
const sample = value[firstKey];
return sample.hasOwnProperty('read_bytes') ||
sample.hasOwnProperty('write_bytes') ||
sample.hasOwnProperty('read_count') ||
sample.hasOwnProperty('write_count');
}
}
}
return false;
}
function isFilesystemData(key, value) {
// Check if this is filesystem info data (from filesystem_info plugin)
if (key.toLowerCase() === 'filesystems' && Array.isArray(value)) {
if (value.length > 0 && typeof value[0] === 'object') {
const sample = value[0];
return sample.hasOwnProperty('device') &&
sample.hasOwnProperty('mountpoint') &&
sample.hasOwnProperty('fstype');
}
}
return false;
}
function renderInterfaceTable(key, interfaces) {
let html = `<div class="nested-metrics">`;
html += `<div class="nested-header">🌐 ${formatLabel(key)}</div>`;
html += '<table class="interface-table">';
// Determine columns based on available data
const sampleInterface = Object.values(interfaces)[0];
const hasBytes = sampleInterface.hasOwnProperty('bytes_sent') || sampleInterface.hasOwnProperty('tx_bytes');
const hasPackets = sampleInterface.hasOwnProperty('packets_sent') || sampleInterface.hasOwnProperty('tx_packets');
const hasErrors = sampleInterface.hasOwnProperty('errin') || sampleInterface.hasOwnProperty('rx_errors');
const hasDrops = sampleInterface.hasOwnProperty('dropin') || sampleInterface.hasOwnProperty('rx_dropped');
const hasDelta = sampleInterface.hasOwnProperty('bytes_sent_delta');
// Build table header
html += '<thead><tr>';
html += '<th>Interface</th>';
if (hasBytes) {
html += '<th class="number">Bytes Sent</th>';
html += '<th class="number">Bytes Recv</th>';
if (hasDelta) {
html += '<th class="number">Δ Sent</th>';
html += '<th class="number">Δ Recv</th>';
}
}
if (hasPackets) {
html += '<th class="number">Pkts Sent</th>';
html += '<th class="number">Pkts Recv</th>';
if (hasDelta) {
html += '<th class="number">Δ Pkts Sent</th>';
html += '<th class="number">Δ Pkts Recv</th>';
}
}
if (hasErrors) {
html += '<th class="number">Errors In</th>';
html += '<th class="number">Errors Out</th>';
}
if (hasDrops) {
html += '<th class="number">Drops In</th>';
html += '<th class="number">Drops Out</th>';
}
html += '</tr></thead>';
// Build table body
html += '<tbody>';
for (const [ifName, ifData] of Object.entries(interfaces)) {
html += '<tr>';
html += `<td class="interface-name">${ifName}</td>`;
if (hasBytes) {
html += `<td class="number">${formatBytes(ifData.bytes_sent || ifData.tx_bytes || 0)}</td>`;
html += `<td class="number">${formatBytes(ifData.bytes_recv || ifData.rx_bytes || 0)}</td>`;
if (hasDelta) {
html += `<td class="number">${formatBytes(ifData.bytes_sent_delta || 0)}</td>`;
html += `<td class="number">${formatBytes(ifData.bytes_recv_delta || 0)}</td>`;
}
}
if (hasPackets) {
html += `<td class="number">${(ifData.packets_sent || ifData.tx_packets || 0).toLocaleString()}</td>`;
html += `<td class="number">${(ifData.packets_recv || ifData.rx_packets || 0).toLocaleString()}</td>`;
if (hasDelta) {
html += `<td class="number">${(ifData.packets_sent_delta || 0).toLocaleString()}</td>`;
html += `<td class="number">${(ifData.packets_recv_delta || 0).toLocaleString()}</td>`;
}
}
if (hasErrors) {
html += `<td class="number">${ifData.errin || ifData.rx_errors || 0}</td>`;
html += `<td class="number">${ifData.errout || ifData.tx_errors || 0}</td>`;
}
if (hasDrops) {
html += `<td class="number">${ifData.dropin || ifData.rx_dropped || 0}</td>`;
html += `<td class="number">${ifData.dropout || ifData.tx_dropped || 0}</td>`;
}
html += '</tr>';
}
html += '</tbody>';
html += '</table>';
html += '</div>';
return html;
}
function renderInterfaceStatsTable(key, interfaces) {
let html = `<div class="nested-metrics">`;
html += `<div class="nested-header">🔌 ${formatLabel(key)}</div>`;
html += '<table class="interface-table">';
// Table header
html += '<thead><tr>';
html += '<th>Interface</th>';
html += '<th>Status</th>';
html += '<th class="number">Speed</th>';
html += '<th>Duplex</th>';
html += '<th class="number">MTU</th>';
html += '</tr></thead>';
// Table body
html += '<tbody>';
for (const [ifName, ifData] of Object.entries(interfaces)) {
html += '<tr>';
html += `<td class="interface-name">${ifName}</td>`;
// Status with color coding
const isUp = ifData.isup;
const statusColor = isUp ? '#4caf50' : '#f44336';
const statusIcon = isUp ? '✓' : '✗';
const statusText = isUp ? 'UP' : 'DOWN';
html += `<td style="color: ${statusColor}; font-weight: bold;">${statusIcon} ${statusText}</td>`;
// Speed
const speed = ifData.speed || 0;
let speedText = '-';
if (speed > 0) {
if (speed >= 1000) {
speedText = (speed / 1000).toFixed(1) + ' Gbps';
} else {
speedText = speed + ' Mbps';
}
}
html += `<td class="number">${speedText}</td>`;
// Duplex
let duplexText = ifData.duplex || '-';
if (duplexText.includes('NicDuplex.')) {
duplexText = duplexText.replace('NicDuplex.', '');
}
if (duplexText === '2') duplexText = 'FULL';
if (duplexText === '1') duplexText = 'HALF';
if (duplexText === '0') duplexText = 'UNKNOWN';
html += `<td>${duplexText}</td>`;
// MTU
html += `<td class="number">${ifData.mtu || '-'}</td>`;
html += '</tr>';
}
html += '</tbody>';
html += '</table>';
html += '</div>';
return html;
}
function renderPartitionTable(key, partitions) {
let html = `<div class="nested-metrics">`;
html += `<div class="nested-header">💾 ${formatLabel(key)}</div>`;
html += '<table class="interface-table">';
// Table header
html += '<thead><tr>';
html += '<th>Mount Point</th>';
html += '<th>Device</th>';
html += '<th>Type</th>';
html += '<th class="number">Total</th>';
html += '<th class="number">Used</th>';
html += '<th class="number">Free</th>';
html += '<th class="number">Use %</th>';
html += '</tr></thead>';
// Table body
html += '<tbody>';
for (const [mountPoint, partData] of Object.entries(partitions)) {
html += '<tr>';
html += `<td class="interface-name">${mountPoint}</td>`;
html += `<td>${partData.device || '-'}</td>`;
html += `<td>${partData.fstype || '-'}</td>`;
html += `<td class="number">${formatBytes(partData.total || 0)}</td>`;
html += `<td class="number">${formatBytes(partData.used || 0)}</td>`;
html += `<td class="number">${formatBytes(partData.free || 0)}</td>`;
// Color code the percentage
const percent = partData.percent || 0;
let percentColor = '#4caf50'; // green
if (percent > 90) percentColor = '#f44336'; // red
else if (percent > 75) percentColor = '#ff9800'; // orange
else if (percent > 50) percentColor = '#ffc107'; // yellow
html += `<td class="number" style="color: ${percentColor}; font-weight: bold;">${percent.toFixed(1)}%</td>`;
html += '</tr>';
}
html += '</tbody>';
html += '</table>';
html += '</div>';
return html;
}
function renderDiskIOTable(key, disks) {
let html = `<div class="nested-metrics">`;
html += `<div class="nested-header">📈 ${formatLabel(key)}</div>`;
html += '<table class="interface-table">';
// Determine columns based on available data
const sampleDisk = Object.values(disks)[0];
const hasDeltas = sampleDisk.hasOwnProperty('read_bytes_delta');
const hasTime = sampleDisk.hasOwnProperty('read_time');
// Table header
html += '<thead><tr>';
html += '<th>Disk</th>';
html += '<th class="number">Read Bytes</th>';
html += '<th class="number">Write Bytes</th>';
if (hasDeltas) {
html += '<th class="number">Δ Read</th>';
html += '<th class="number">Δ Write</th>';
}
html += '<th class="number">Read Count</th>';
html += '<th class="number">Write Count</th>';
if (hasDeltas) {
html += '<th class="number">Δ Reads</th>';
html += '<th class="number">Δ Writes</th>';
}
if (hasTime) {
html += '<th class="number">Read Time (ms)</th>';
html += '<th class="number">Write Time (ms)</th>';
}
html += '</tr></thead>';
// Table body
html += '<tbody>';
for (const [diskName, diskData] of Object.entries(disks)) {
html += '<tr>';
html += `<td class="interface-name">${diskName}</td>`;
html += `<td class="number">${formatBytes(diskData.read_bytes || 0)}</td>`;
html += `<td class="number">${formatBytes(diskData.write_bytes || 0)}</td>`;
if (hasDeltas) {
html += `<td class="number">${formatBytes(diskData.read_bytes_delta || 0)}</td>`;
html += `<td class="number">${formatBytes(diskData.write_bytes_delta || 0)}</td>`;
}
html += `<td class="number">${(diskData.read_count || 0).toLocaleString()}</td>`;
html += `<td class="number">${(diskData.write_count || 0).toLocaleString()}</td>`;
if (hasDeltas) {
html += `<td class="number">${(diskData.read_count_delta || 0).toLocaleString()}</td>`;
html += `<td class="number">${(diskData.write_count_delta || 0).toLocaleString()}</td>`;
}
if (hasTime) {
html += `<td class="number">${(diskData.read_time || 0).toLocaleString()}</td>`;
html += `<td class="number">${(diskData.write_time || 0).toLocaleString()}</td>`;
}
html += '</tr>';
}
html += '</tbody>';
html += '</table>';
html += '</div>';
return html;
}
function renderFilesystemTable(key, filesystems) {
let html = `<div class="nested-metrics">`;
html += `<div class="nested-header">🗄️ ${formatLabel(key)}</div>`;
html += '<table class="interface-table">';
// Table header
html += '<thead><tr>';
html += '<th>Device</th>';
html += '<th>Mount Point</th>';
html += '<th>Type</th>';
html += '<th>Options</th>';
html += '<th class="number">Max File</th>';
html += '<th class="number">Max Path</th>';
html += '</tr></thead>';
// Table body
html += '<tbody>';
for (const fs of filesystems) {
html += '<tr>';
html += `<td class="interface-name">${fs.device || '-'}</td>`;
html += `<td>${fs.mountpoint || '-'}</td>`;
html += `<td>${fs.fstype || '-'}</td>`;
// Format mount options - truncate if too long
let opts = fs.opts || '-';
if (opts.length > 40) {
opts = opts.substring(0, 37) + '...';
}
html += `<td style="font-size: 0.85em;">${opts}</td>`;
html += `<td class="number">${fs.maxfile || '-'}</td>`;
html += `<td class="number">${fs.maxpath || '-'}</td>`;
html += '</tr>';
}
html += '</tbody>';
html += '</table>';
html += '</div>';
return html;
}
function formatBytes(bytes) {
if (bytes === 0) return '0 B';
if (bytes < 1024) return bytes + ' B';
if (bytes < 1048576) return (bytes / 1024).toFixed(1) + ' KB';
if (bytes < 1073741824) return (bytes / 1048576).toFixed(1) + ' MB';
return (bytes / 1073741824).toFixed(2) + ' GB';
}
function renderMetric(key, value) {
const label = formatLabel(key);
const formattedValue = formatValue(key, value);
const unit = getUnit(key);
return `
<div class="metric-card">
<div class="metric-label">${label}</div>
<div class="metric-value">
${formattedValue}
${unit ? `<span class="metric-unit">${unit}</span>` : ''}
</div>
</div>
`;
}
function formatLabel(key) {
return key
.replace(/_/g, ' ')
.replace(/\b\w/g, l => l.toUpperCase());
}
function formatValue(key, value) {
if (typeof value === 'number') {
// Format percentages
if (key.includes('percent') || key.includes('usage')) {
return value.toFixed(1);
}
// Format bytes to MB/GB
if (key.includes('bytes') || key.includes('_mb') || key.includes('_gb')) {
if (value > 1073741824) {
return (value / 1073741824).toFixed(2);
} else if (value > 1048576) {
return (value / 1048576).toFixed(2);
}
}
// Default number formatting
if (value > 1000) {
return value.toLocaleString();
}
return value.toFixed(2);
}
return value;
}
function getUnit(key) {
if (key.includes('percent') || key.includes('usage')) return '%';
if (key.includes('_gb')) return 'GB';
if (key.includes('_mb')) return 'MB';
if (key.includes('bytes') && !key.includes('_mb') && !key.includes('_gb')) {
// Determine unit based on typical size
return 'bytes';
}
if (key.includes('count')) return '';
if (key.includes('mhz')) return 'MHz';
return '';
}
// Auto-refresh data every 30 seconds
setInterval(() => {
for (const [hostname, pluginName] of Object.entries(selectedPlugins)) {
const contentDiv = document.getElementById(`${hostname}-${pluginName}`);
if (contentDiv && contentDiv.classList.contains('active')) {
loadPluginData(hostname, pluginName);
}
}
}, 30000);
// Initialize by selecting first plugin for each host
document.addEventListener('DOMContentLoaded', () => {
const hostCards = document.querySelectorAll('.host-card');
hostCards.forEach((card, index) => {
const hostname = card.dataset.hostname;
const firstPlugin = card.querySelector('.plugin-pill');
if (firstPlugin) {
const pluginName = firstPlugin.dataset.plugin;
showPlugin(hostname, pluginName);
}
// Collapse all hosts except the first one
if (index > 0) {
card.classList.add('collapsed');
}
});
});
</script>
</body>
</html>
+270
View File
@@ -0,0 +1,270 @@
"""UDP listener and datagram processing."""
import asyncio
import zlib
import logging
from ..common.proto import stodict, oldmtodict
from ..common.utils import dur
logger = logging.getLogger(__name__)
class EchoServerProtocol(asyncio.DatagramProtocol):
def __init__(self, config=None, handler=None):
super().__init__()
self.config = config or {}
self.handler = handler
def connection_made(self, transport):
self.transport = transport
logger.info("UDP Server listening...")
def datagram_received(self, data, addr):
logger.debug("Received from %s", addr)
try:
msg = parse_message(data)
if self.handler:
# handler can be a callable provided by the application
# pass the transport so handlers can send replies (ACKs/commands)
self.handler(msg, addr, self.transport)
except Exception:
logger.exception("Error while processing datagram from %s", addr)
def parse_message(data: bytes):
"""Parse a raw datagram into a message dict.
Uses the protocol decoding helpers and falls back to old format when
decoding returns an empty dict (compat with older clients).
"""
msg = stodict(data)
if not msg:
# fallback to old format
msg = oldmtodict(data)
return msg
def dicttos(ID, d, compress=False):
s = []
for k in d:
if isinstance(d[k], float):
s.append("%s=%0.5f" % (k, d[k]))
else:
s.append("%s=%s" % (k, d[k]))
pk = ";".join(s)
if compress:
zpk = zlib.compress(pk.encode(), 6)
ID = "!" + ID + ":"
opk = ID.encode() + zpk
else:
zpk = pk
opk = ID + ":" + zpk
return opk
def handle_datagram(msg: dict, addr, transport, ctx: dict):
"""Handle a parsed datagram message.
ctx is a dictionary with runtime dependencies:
- config: dict of configuration
- hbdclass: module providing Host/Connection classes
- log: callable(loghost, message)
- pushmsg: callable(message)
- msg_to_websockets: callable(typ, data)
- msg_journal: MessageJournal instance for logging all messages
- DEBUG, verbose
"""
if not msg:
return
now = __import__("time").time()
# Log message to journal
msg_journal = ctx.get("msg_journal")
if msg_journal:
# Create async task to log message (non-blocking)
import asyncio
try:
loop = asyncio.get_event_loop()
loop.create_task(msg_journal.log_message(msg, addr, now))
except Exception as e:
logger.debug(f"Failed to log message to journal: {e}")
cfg = ctx.get("config", {})
hbdcls = ctx.get("hbdclass")
log = ctx.get("log")
pushmsg = ctx.get("pushmsg")
msg_to_websockets = ctx.get("msg_to_websockets")
DEBUG = ctx.get("DEBUG", 0)
verbose = ctx.get("verbose", False)
# normalize addr (ip, port)
ip = addr[0] if isinstance(addr, (list, tuple)) else addr
name = msg.get("name", "unknown")
from ..common.utils import shortname
uname = shortname(name)
if uname not in hbdcls.Host.hosts:
host = hbdcls.Host(uname)
host.dyn = uname in cfg.get("dyndnshosts", [])
if verbose:
print(("XX: New host, num now %s" % (len(hbdcls.Host.hosts))))
newh = True
else:
host = hbdcls.Host.hosts[uname]
newh = False
cid = msg.get("id", 0)
try:
rtt = float(msg.get("rtt", None))
except Exception:
rtt = None
if msg.get("ID") == "HTB":
host.doesack = msg.get("acks", -1)
elif msg.get("ID") == "PLG":
# Handle plugin data message
plugin_name = msg.get("plugin")
if plugin_name:
# Extract all fields except ID and plugin name
plugin_data = {k: v for k, v in msg.items() if k not in ["ID", "plugin"]}
# Store plugin data with timestamp
host.add_plugin_data(plugin_name, plugin_data, timestamp=now)
if DEBUG > 1:
print(f"Stored plugin data for {uname}: {plugin_name}")
# Check thresholds if checker is available
threshold_checker = ctx.get("threshold_checker")
if threshold_checker:
try:
state_changes = threshold_checker.check_plugin_data(
host_name=uname,
plugin_name=plugin_name,
data=plugin_data,
alert_states=host.alert_states,
)
if DEBUG > 1 and state_changes:
print(f"Threshold state changes for {uname}: {state_changes}")
except Exception as e:
logger.error(f"Error checking thresholds for {uname}.{plugin_name}: {e}")
# Notify websockets of plugin update
if msg_to_websockets:
try:
msg_to_websockets("plugin", {
"host": uname,
"plugin": plugin_name,
"data": plugin_data,
"timestamp": now
})
except Exception:
pass
host.setcver(msg.get("ver", 0))
try:
conn, res = host.conndata(cid, ip, rtt, now)
except Exception as e:
if DEBUG > 0:
print("conndata failed: %s" % e)
return
if res:
if log:
log(uname, res)
if uname in cfg.get("watchhosts", []):
if pushmsg:
pushmsg("%s %s" % (host.name, res))
interval = int(msg.get("interval", 0) or 0)
shutdown = msg.get("shutdown", 0)
service = msg.get("service", "unknown")
message = msg.get("msg", None)
boot = msg.get("boot", 0)
if boot:
if log:
log(uname, "booted")
if uname in cfg.get("watchhosts", []):
m = "%s booted" % (host.name)
if pushmsg:
pushmsg(m)
if message:
if log:
log(uname, "msg: %s" % message, service=service)
if uname in cfg.get("watchhosts", []):
if pushmsg:
pushmsg(message)
if conn.getstate() != hbdcls.Connection.UP:
lasts = conn.state
d = conn.newstate(hbdcls.Connection.UP, now)
m = "%s back after being %s for %s" % (conn.afam, lasts, dur(d))
if log:
log(uname, m)
if uname in cfg.get("watchhosts", []):
if pushmsg:
pushmsg("%s %s is back" % (uname, conn.afam))
if boot or newh:
host.upcount = host.doesack
else:
host.upcount += 1
if shutdown:
if log:
log(uname, "%s shutdown" % conn.afam)
if uname in cfg.get("watchhosts", []):
if pushmsg:
pushmsg("%s %s shutdown" % (uname, conn.afam))
conn.newstate(hbdcls.Connection.DOWN, now)
if interval > 0:
host.interval = interval
# send ACK back
rmsg = {"time": __import__("time").time()}
if host.cver < 1:
opkt = b"ACK"
else:
opkt = dicttos("ACK", rmsg, host.cver > 1)
try:
transport.sendto(opkt, addr)
except Exception as e:
if DEBUG > 0:
print(("cannot send ack: %s" % e))
# send any commands we have queued
while len(host.cmds):
op, rmsg = host.cmds[0]
if op == "CMD":
del host.cmds[0]
if log:
log(uname, "command sent")
if host.cver < 1:
rmsg = rmsg["cmd"]
elif op == "UPD":
del host.cmds[0]
if log:
log(uname, "update initiated")
if host.cver < 1:
if log:
log(uname, " ver 0 does not support UPD")
continue
if host.cver < 1:
opkt = rmsg if isinstance(rmsg, (bytes, str)) else str(rmsg)
if isinstance(opkt, str):
opkt = opkt.encode()
else:
opkt = dicttos(op, rmsg, True)
try:
transport.sendto(opkt, addr)
except Exception as e:
if DEBUG > 0:
print(("cannot send cmd/update: %s" % e))
if msg_to_websockets:
try:
msg_to_websockets("host", host.stateinfo())
except Exception:
pass
+151
View File
@@ -0,0 +1,151 @@
"""WebSocket server and broadcast helpers for hbd.
Provides an asyncio-based WebSocket server and a thread-safe broadcast
function that other threads or synchronous code can call.
"""
import asyncio
import json
import logging
from typing import Callable, Iterable, Optional
import websockets
logger = logging.getLogger(__name__)
_connections = set()
_loop: Optional[asyncio.AbstractEventLoop] = None
_get_hosts: Optional[Callable[[], Iterable]] = None
_get_msgs: Optional[Callable[[], Iterable]] = None
_verbose = False
async def _handler(websocket, path=None):
_connections.add(websocket)
remote_address = websocket.remote_address
if path is None:
path = getattr(websocket, "path", None)
logger.info("WebSocket connection from %s: %s", remote_address, path)
try:
# send initial hosts
if _get_hosts:
try:
hosts = list(_get_hosts())
logger.debug("Sending %d hosts to new WebSocket client", len(hosts))
for h in hosts:
jmsg = json.dumps({"type": "host", "data": h})
await websocket.send(jmsg)
except Exception as e:
logger.error("Error sending initial hosts: %s", e, exc_info=True)
# send recent messages
if _get_msgs:
try:
msgs = list(_get_msgs())[-100:]
logger.debug("Sending %d recent messages to new WebSocket client", len(msgs))
for m in msgs:
jmsg = json.dumps({"type": "message", "data": m})
await websocket.send(jmsg)
except Exception as e:
logger.error("Error sending initial messages: %s", e, exc_info=True)
# keep connection open until client disconnects
async for _ in websocket:
# we don't expect meaningful incoming messages besides the initial
# client 'hello' that some clients send; ignore for now
if _verbose:
logger.debug("received ws data: %s", _)
except (
websockets.exceptions.ConnectionClosedOK,
websockets.exceptions.ConnectionClosedError,
) as e:
logger.info("WebSocket closed from %s: %r", remote_address, e)
except Exception as e:
logger.exception("WebSocket handler exception from %s: %s", remote_address, e)
finally:
logger.debug("Removing WebSocket connection from %s", remote_address)
try:
_connections.remove(websocket)
except KeyError:
pass
await websocket.wait_closed()
async def start(
host: str,
ws_port: int,
wss_port: Optional[int] = None,
ssl_context=None,
get_hosts: Optional[Callable] = None,
get_msgs: Optional[Callable] = None,
verbose: bool = False,
):
"""Start WebSocket servers and block until cancelled.
This is intended to be awaited inside the main asyncio event loop.
If `wss_port` and `ssl_context` are provided, a WSS server will also be
started.
"""
global _loop, _get_hosts, _get_msgs, _verbose
_loop = asyncio.get_running_loop()
_get_hosts = get_hosts
_get_msgs = get_msgs
_verbose = verbose
servers = []
# plain WebSocket
websockets_logger = logging.getLogger("websockets.server")
websockets_logger.setLevel(logging.DEBUG if verbose else logging.INFO)
# regular WebSocket
ws_server = websockets.serve(_handler, host, ws_port) # , subprotocols=["hbd"])
servers.append(ws_server)
# secure WebSocket (optional)
if wss_port and ssl_context:
wss_server = websockets.serve(
_handler, host, wss_port, ssl=ssl_context
) # , subprotocols=["hbd"])
servers.append(wss_server)
# await starting of all servers
for srv in servers:
await srv
logger.info(
"WebSocket server(s) started on port %s (wss %s)", ws_port, wss_port
)
# block forever (until loop is stopped or cancelled)
await asyncio.Future()
def broadcast(typ: str, data) -> bool:
"""Thread-safe broadcast helper.
Schedules coroutine(s) on the running loop to send message to all
connected websockets. Returns False if server was not running.
"""
if not _loop:
return False
jmsg = json.dumps({"type": typ, "data": data})
to_close = []
for ws in list(_connections):
if ws.state != websockets.protocol.State.OPEN:
to_close.append(ws)
continue
try:
asyncio.run_coroutine_threadsafe(ws.send(jmsg), _loop)
except Exception:
to_close.append(ws)
logger.debug("ws.send exception: closed")
for ws in to_close:
try:
asyncio.run_coroutine_threadsafe(ws.wait_closed(), _loop)
except Exception:
pass
if ws in _connections:
_connections.remove(ws)
return True
def connection_count() -> int:
return len(_connections)