diff --git a/.hb.yaml b/.hb.yaml index 0e7b974..4551661 100644 --- a/.hb.yaml +++ b/.hb.yaml @@ -248,7 +248,7 @@ threshold_configs: critical: 2 operator: ">=" rtt: - warning: 30 + warning: 50 critical: 250.0 diff --git a/.hb.yaml.swp b/.hb.yaml.swp deleted file mode 100644 index 1b2ceb3..0000000 Binary files a/.hb.yaml.swp and /dev/null differ diff --git a/hbd/config.py.old b/hbd/config.py.old deleted file mode 100644 index f9ecd01..0000000 --- a/hbd/config.py.old +++ /dev/null @@ -1,75 +0,0 @@ -"""Configuration loader and defaults for hbd.""" - -import logging -import os - -try: - import yaml -except Exception: - yaml = None - -DEFAULTS = { - "hb_port": 50003, - "hbd_port": 50004, - "hbd_host": "", - "pickfile": "/tmp/hb.pick", - "logfile": "/var/log/heartbeat.log", - "logfmt": "text", - "pushsrv": "pushover", - "pushover_token": "", - "pushover_user": "", - "interval": 20, - "grace": 2, - "dyndomains": ["wrede.org"], - "watchhosts": [], - "dyndnshosts": [], - "drophosts": [], - "nsupdate_bin": "/usr/bin/nsupdate", - "foreground": False, - "verbose": False, - "debug": 0, - "smtpserver": "smtp.fastmail.com", - "smtpuser": "andreas@wrede.ca", - "smtppassword": "pvtvefyp5gbhnch2", - "smtpport": 587, - "toemail": ["aew.hbd.notify@wrede.ca"], - "fromemail": "aew.hbd@wrede.ca", - "ws_port": 50005, - "wss_port": None, - "cert_path": "/usr/local/etc/ssl/", - "wss_pem": "fullchain.pem", - "wss_key": "privkey.pem", - # Message journal configuration - "journal_enabled": True, - "journal_dir": "/var/log/heartbeat", - "journal_file": "messages.journal", - "journal_max_size": 100 * 1024 * 1024, # 100MB - "journal_max_backups": 10, - "plugins": {}, - "thresholds": {}, - "threshold_renotify_interval": 3600, -} - - -def load_config(path=None): - """Load configuration from a YAML file and merge with defaults. - - If YAML is not available or the file does not exist, defaults are returned. - """ - cfg = DEFAULTS.copy() - if not path: - # default path (~/.hb.yaml) - path = os.path.join(os.path.expanduser("~"), ".hb.yaml") - - if os.path.exists(path): - if yaml: - with open(path) as fh: - data = yaml.safe_load(fh) - # Merge YAML data with defaults - # Keep all keys from YAML to support plugin configs and future extensions - for k, v in data.items(): - cfg[k] = v - else: - # yaml not installed: do not attempt to parse; user must ensure defaults - pass - return cfg diff --git a/hbd/hbc_old.py.bak b/hbd/hbc_old.py.bak deleted file mode 100755 index b144063..0000000 --- a/hbd/hbc_old.py.bak +++ /dev/null @@ -1,602 +0,0 @@ -#!/usr/bin/env python3 -# $Id: hbc,v 1.9 2012/03/29 02:08:36 andreas Exp $ -# NEW -import argparse -import sys -import time -import socket -import os -import signal -import select -import traceback -from hashlib import md5 -import shutil -import zlib -import subprocess -import syslog -import codecs - -from .config import load_config - -PORT = 50003 -INTERVAL = 10 -REOPENC = 6 -PIDFILE = "/tmp/hbc.pid" -VER = 6 -MAXRECV = 32767 - -running = True -dorestart = False -warned1 = False - -msgonly = False -helpflag = False -verbose = False -fdaemon = False -daemonized = False -msgboot = {} -home = os.environ["HOME"] -configfile = "%s/.hbrc" % home -cmdargs = [] -iam = socket.gethostname() - - -def log(msg): - if fdaemon: - syslog.syslog(syslog.LOG_ERR, msg) - else: - print(msg) - - -def handler(signum, frame): - if signum == signal.SIGTERM: - cleanup() - - -class NullDevice: - def write(self, s): - pass - - -class Conn: - def __init__(self, conId, addr, port, af): - self.conId = conId - self.addr = addr - self.port = port - self.af = af - - self.ackcount = 0 # num of accks received - self.lastack = 0 # time() last ACK was received - self.send = 0 - self.lastsend = 0 # time() last msg was sent - self.rtts = [0] - self.sock = None - - def __str__(self): - return "Con(%s, %s %s)" % (self.addr, self.port, self.af) - - def open(self): - self.sock = socket.socket(self.af, socket.SOCK_DGRAM) - self.sock.setsockopt( - socket.SOL_SOCKET, - socket.SO_REUSEADDR, - self.sock.getsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR) | 1, - ) - - def sendto(self, msg, ID="HTB"): # default ID is HearTBeat - global warned1 - - if self.send % REOPENC == 0: - self.close() - if not self.sock: - self.open() - msg["name"] = shortname(iam) - msg["id"] = self.conId - msg["ver"] = VER - msg["time"] = time.time() - m = dicttos(ID, msg) # always compress - if verbose: - log("conn.send('%s', (%s:%s) %s)" % (msg, self.addr, self.port, len(m))) - try: - self.sock.sendto(m, (self.addr, self.port)) - except socket.error as e: - if not warned1: - log("socket error: %s %s:%s" % (e, self.addr, self.port)) - warned1 = True - self.close() - return - self.send += 1 - self.lastsend = time.time() - - def ack(self, msgDict, now): - try: - self.lastack = msgDict["time"] - mul = 2 - except Exception: - self.lastack = now - mul = 1 - rtt = (self.lastack - self.lastsend) * mul - if verbose: - log("ack RTT: %0.1f ms (now %s)" % (rtt * 1000.0, now)) - self.rtts.append(rtt * 1000.0) - if len(self.rtts) > 10: - del self.rtts[0] - self.ackcount += 1 - - def close(self): - if self.sock: - self.sock.close() - self.sock = None - - -def shortname(name): - r = name.split(".") - return r[0] - - -def dicttos(ID, d): - s = [] - for k in d: - if isinstance(d[k], float): - s.append("%s=%0.5f" % (k, d[k])) - else: - s.append("%s=%s" % (k, d[k])) - pk = ";".join(s) - zpk = zlib.compress(pk.encode(), 6) - ID = "!" + ID + ":" - return ID.encode() + zpk - - -def stodict(msg): - d = {} - if len(msg) > 0 and chr(msg[0]) == "!": - pk = zlib.decompress(msg[5:]).decode() - d["ID"] = msg[1:4].decode() - else: - r0 = msg.split(":", 1) - pk = r0[1] - d["ID"] = r0[0] - r = pk.split(";") - for v in r: - vr = v.split("=", 1) - k = vr[0].strip() - if len(vr) == 1: - d[k] = None - else: - v = vr[1].strip() - try: - v = eval(v) - except Exception: - pass - d[k] = v - if verbose: - print("msg is %s" % d) - return d - - -def XXstodict(msg): - d = {} - r0 = msg.split(":", 1) - if len(r0) == 1: - return None - if r0[0][0] == "!": # compressed - pk = zlib.decompress(msg[len(r0[0]) + 1 :]) - d["ID"] = r0[0][1:] - else: - pk = r0[1] - d["ID"] = r0[0] - r = pk.split(";") - for v in r: - vr = v.split("=", 1) - k = vr[0].strip() - if len(vr) == 1: - d[k] = None - else: - v = vr[1].strip() - try: - if v[0].isdigit(): - v = eval(v) - except Exception: - pass - d[k] = v - return d - - -def syslogtrace(note): - logm = "%s hbc died: \n%s" % (note, traceback.format_exc()) - log(logm) - for line in logm.split("\n"): - syslog.syslog(syslog.LOG_ERR, " tb: %s" % line) - if verbose: - print(logm) - - -conId = 1 - - -def createConnections(hosts): - global conId - for host in hosts: - if verbose: - log("createConnections for %s" % host) - try: - rs = socket.getaddrinfo(host, hb_port, 0, 0, socket.SOL_UDP) - except socket.gaierror: - logm = "%s hbc died: \n%s" % ("createConnections", traceback.format_exc()) - if verbose: - log(logm) - return None - for r in rs: - if verbose: - log("address %s" % str(r)) - if r[0] in [10, 24, 28, 30]: # for Linux, NetBSD, FreeBSD - af = socket.AF_INET6 - elif r[0] == 2: - af = socket.AF_INET - else: - print("dont know this net type: %s" % r[0][0]) - sys.exit(1) - - addr = r[4][0] - conns[conId] = Conn(conId, addr, hb_port, af) - if verbose: - print("cons[%s] = %s" % (conId, str(conns[conId]))) - conId += 1 - - -def doexec(conn, data): - try: - ro = subprocess.check_output( - data, stderr=subprocess.STDOUT, shell=True - ).decode() - fail = "OK" - except subprocess.CalledProcessError as e: - ro = str(e) - fail = "CalledProcessError" - except Exception as e: - syslogtrace("System") - ro = "N/A" - fail = "cmd failed: %s" % e - msg = {"service": "command", "msg": fail + " " + ro} - conns[conn].sendto(msg) - - -def doupdate(conn, msgDict): - fail = None - try: - code = codecs.decode(msgDict["code"], "base64").decode() - csum = msgDict["csum"] - except Exception as e: - fail = "csum/code missing: %s" % e - if not fail: - fail = doupdateone(code, csum) - - msg = {"service": "update", "msg": fail if fail else "OK"} - conns[conn].sendto(msg) - if not fail: - log("hc updates, fs = %s" % (len(code))) - - return fail - - -def doupdateone(code, csum): - - m = md5() - m.update(code.encode()) - icsum = m.hexdigest() - if icsum != csum: - return "checksum error" - - fn = sys.argv[0] - ofn = "%s.sav" % fn - try: - shutil.copy2(fn, ofn) - except Exception as e: - return "cannot make backup copy: %s" % e - - try: - fh = open(fn, "w") - fh.write(code) - fh.close() - except Exception as e: - return "cannot write new code: %s" % e - - return None - - -def restart(): - if verbose: - print("restart: execv %s %s" % (sys.argv[0], [sys.argv[0]] + cmdargs)) - syslog.syslog(syslog.LOG_ERR, "restart %s" % (sys.argv[0])) - e = "fallthrough" - try: - os.execv(sys.argv[0], [sys.argv[0]] + cmdargs) - except Exception: - pass - print("should not be here:", str(e)) - log("restart failed: %s" % e) - - -def process(): - global running, dorestart - - nextReport = time.time() - - while running: - while time.time() < nextReport: - ifiles = {} - conIds = {} - for conn in conns: - if conns[conn].sock: - ifiles[conns[conn].sock.fileno()] = conns[conn].sock - conIds[conns[conn].sock.fileno()] = conn - - sleep = nextReport - time.time() - if sleep <= 0: - break - try: - r = select.select(list(ifiles.keys()), [], [], sleep) - now = ( - time.time() - ) # nb: delay from actual packet arrival to select is ca. 105ms! - except KeyboardInterrupt: - running = False - break - except SystemExit: - log("daemon exit, running was %s" % running) - if running: - running = False - break - except Exception: - if running: - syslogtrace("select") - running = False - break - for rfh in r[0]: - conn = conIds[rfh] - data, addr = ifiles[rfh].recvfrom(MAXRECV) - if verbose: - print("sock.recvfrom: %s (%s) %s" % (addr, len(data), data[:4])) - try: - msgDict = stodict(data) - except Exception as e: - print( - "failed to parse incoming data from %s: %s (%s)" - % (addr, data, e) - ) - continue - - if verbose: - print( - "sock.recvfrom: %s (%s) %s" - % (addr, len(data), str(msgDict)[:80]) - ) - if msgDict is None: - print("bad backet from %s (%s) %s" % (addr, len(data), data)) - elif msgDict["ID"] == "ACK": - conns[conn].ack(msgDict, now) - elif msgDict["ID"] == "UPD": - if doupdate(conn, msgDict) is None: - if verbose: - print("process: restart after update") - dorestart = True - break - elif msgDict["ID"] == "CMD": - doexec(conn, msgDict["cmd"]) - else: - doexec(conn, data) # deprecated until no more VER - hbc - if dorestart: - running = False - break - if not running: - break - for conn in conns: - msg = {"acks": conns[conn].ackcount, "rtt": conns[conn].rtts[-1]} - conns[conn].sendto(msg) - time.sleep( - 0.1 - ) # N.B. Linux (i.e. Rasperry Pi 3 drops the second pkg unless delayed - if nextReport + interval >= time.time(): - nextReport += interval - else: - nextReport = time.time() + interval - - if verbose: - log("process: done running") - - -def cleanup(): - global running - if not running: - return - if verbose: - log("cleanup") - running = False - for conn in conns: - msg = {"shutdown": 1, "acks": conns[conn].ackcount} - conns[conn].sendto(msg) - conns[conn].close() - time.sleep(1) - closeall() - - -def closeall(): - if verbose: - syslog.syslog(syslog.LOG_ERR, "closecall") - for conn in conns: - conns[conn].close() - - -def daemonize( - working_dir="/", stdin="/dev/zero", stdout="/dev/null", stderr="/dev/null" -): - """ - Does the UNIX double-fork magic, see Stevens' "Advanced Programming in the - UNIX Environment" for details (ISBN 0201563177) - http://www.yendor.com/programming/unix/apue/proc/fork2.c - """ - - try: - # first fork - pid = os.fork() - if pid > 0: - # exit from first parent - os._exit(0) - except OSError as e: - sys.stderr.write("fork #1 failed: %d (%s)\n" % (e.errno, e.strerror)) - os._exit(1) - - # decouple from parent environment - os.chdir(working_dir) - os.setsid() - os.umask(0) - # second fork - try: - pid = os.fork() - if pid > 0: - # exit from second parent - os._exit(0) - except OSError as e: - sys.stderr.write("fork #2 failed: %d (%s)\n" % (e.errno, e.strerror)) - sys.exit(1) - - # redirects standard file descriptors - sys.stdout.flush() - sys.stderr.flush() - si = open(stdin, "r") - so = open(stdout, "a+") - se = open(stderr, "a+") - os.dup2(si.fileno(), sys.stdin.fileno()) - os.dup2(so.fileno(), sys.stdout.fileno()) - os.dup2(se.fileno(), sys.stderr.fileno()) - - -# -# Main program -# -def build_parser(): - parser = argparse.ArgumentParser( - prog="hbc", - description="HeartBeatClient - send a heatbeat message to a HeartBeatDaemon", - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - parser.add_argument("-b", "--boot", action="store_true", help="Send a boot message") - parser.add_argument( - "-c", "--config", dest="configfile", help="Config file path (YAML)" - ) - parser.add_argument("-m", "--message", dest="message", help="Send a message") - parser.add_argument( - "-n", "--name", dest="name", help="Name to use in heartbeat message" - ) - parser.add_argument( - "-d", "--daemon", action="store_true", help="Run in daemon mode" - ) - parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output") - parser.add_argument( - "-x", "--debug", action="count", default=0, help="Increase debug level" - ) - parser.add_argument("hosts", nargs="+", help="Heartbeat daemon hosts to send to") - return parser - - -def main(argv=None): - global msgonly, verbose, fdaemon, daemonized, cmdargs, iam, hb_port, conns, interval, hb_hosts - parser = build_parser() - args = parser.parse_args(argv) - - config = load_config(args.configfile) - - # Apply CLI overrides - if args.boot: - msgboot["boot"] = 1 - if args.message: - msgboot["service"] = "service" - msgboot["msg"] = args.message - msgonly = True - if args.name: - iam = args.name - cmdargs += ["-n", iam] - if args.daemon: - fdaemon = True - if args.verbose: - verbose = True - cmdargs.append("--verbose") - if args.debug: - config.setdefault("debug", 0) - config["debug"] += args.debug - cmdargs.append("-" + "x" * args.debug) - - if verbose: - print("cmdargs for restart are %s" % cmdargs) - - # - # set defaults - - hb_hosts = args.hosts - hb_port = config.get("hb_port", PORT) - interval = config.get("interval", INTERVAL) - - # - if verbose: - print("notice: hb_hosts: %s" % str(hb_hosts)) - print("notice: hb_port: %s" % hb_port) - print("notice: interval: %s" % interval) - print("notice: iam: %s" % iam) - print("notice: msgonly: %s" % msgonly) - print("notice: msgboot: %s" % msgboot) - - if not msgonly: - msgboot["interval"] = interval - - conns = {} - while True: - if verbose: - log("create connections") - createConnections(hb_hosts) - if len(conns) != 0: - break - if verbose: - log("no connections yet, sleep a bit") - time.sleep(2) - - if verbose: - log("%s connections created" % (len(conns))) - - if len(msgboot) > 0: - if verbose: - print("on boot") - msgboot["acks"] = 0 - for conn in conns: - conns[conn].sendto(msgboot) - - if msgonly: - if verbose: - print("msgboot done msgonly=%s" % msgonly) - closeall() - sys.exit(0) - - # - syslog.openlog("hbc", syslog.LOG_PID, syslog.LOG_DAEMON) - if fdaemon: - print("daemoinizing.") - daemonize() - daemonized = True - syslog.syslog(syslog.LOG_ERR, "starting heartbeat to %s" % ",".join(hb_hosts)) - - signal.signal(signal.SIGTERM, handler) - try: - process() - except Exception as e: - syslogtrace("process") - if verbose: - print("err: process exit: %s" % e) - - if verbose: - log("main: cleanup") - cleanup() - if dorestart: - restart() - - -if __name__ == "__main__": - main() diff --git a/hbd/server/templates/live.html b/hbd/server/templates/live.html index be97718..634140e 100644 --- a/hbd/server/templates/live.html +++ b/hbd/server/templates/live.html @@ -130,7 +130,6 @@ function createRow(data) { var row = document.createElement("tr"); var c_name = document.createElement("td"); - var c_ver = document.createElement("td"); var c_ipv4addr = document.createElement("td"); var c_ipv4state = document.createElement("td"); var c_ipv4latency = document.createElement("td"); @@ -144,7 +143,6 @@ var c_ipv6statets = document.createElement("td"); c_ipv6statets.style.textAlign = "right"; row.appendChild(c_name); - row.appendChild(c_ver); row.appendChild(c_ipv4addr); row.appendChild(c_ipv4state); row.appendChild(c_ipv4latency); @@ -158,7 +156,6 @@ } else { c_name.innerHTML = data.name; } - c_ver.innerHTML = data.cver; c_ipv4addr.innerHTML = data.connections[0].addr; c_ipv4state.innerHTML = data.connections[0].state; if (data.connections.length > 1) { @@ -274,7 +271,6 @@ Name - Ver IPv4 Addr State Latencey @@ -289,7 +285,6 @@ {% for host in hosts %} {{ host.name }} - {{ host.ver if host.ver else '' }} {% for conn in host.connections %} {{ conn.addr if conn.addr else '' }} {{ conn.state if conn.state else '' }} diff --git a/tests/test_handle_datagram.py b/tests/test_handle_datagram.py index 1e6ea0a..f904558 100644 --- a/tests/test_handle_datagram.py +++ b/tests/test_handle_datagram.py @@ -44,6 +44,6 @@ def test_handle_cmd_sends_command(): handle_datagram(msg2, ("127.0.0.1", 50000), ftr, ctx) # should have sent ACK and the command; last send should be non-empty assert len(ftr.sent) >= 1 - # the command for cver 0 will be sent as raw cmd string + # the command for ??0 will be sent as raw cmd string # so at least one send contains b'doit' or similar assert any(b"doit" in s[0] for s in ftr.sent)