Move threshhold to server, move eventlog to notify

This commit is contained in:
Andreas Wrede
2026-03-29 20:29:33 -04:00
parent 0543266c92
commit ad7178ebcb
7 changed files with 61 additions and 48 deletions
+1 -1
View File
@@ -2,7 +2,7 @@
hb_port: 50003
hbd_host: ''
#logfile: "/home/andreas/public_html/messages/andreas"
logfile: "/home/andreas/logs/heartbeat/andreas"
logfile: "/home/andreas/logs/heartbeat/heartbeat.log"
#logfile: "/Users/andreas/public_html/messages/andreas"
logfmt: "msg"
grace: 40
BIN
View File
Binary file not shown.
+1 -1
View File
@@ -299,7 +299,7 @@ async def start(
active_alerts = threshold_checker.get_active_alerts(host.alert_states)
else:
# Fallback if no threshold checker
from hbd.client.threshold import AlertLevel
from hbd.server.threshold import AlertLevel
active_alerts = [
state for state in host.alert_states.values()
if state.level != AlertLevel.OK
+12 -41
View File
@@ -13,40 +13,16 @@ from . import udp
from . import hbdclass
from . import ws as ws_mod
from . import notify as notify_mod
logger = logging.getLogger(__name__)
msg_to_websockets = ws_mod.broadcast
eventlog = notify_mod.log
logf = None
lastfm = ["", "", ""]
# shared runtime collections and helpers
msgs = []
def initlog(logfile):
try:
return open(logfile, "a+")
except Exception as e:
import sys
print("cannot open loffile %s, using STDERR: %s" % (logfile, e))
return sys.stderr
def log(host, m, service=None):
ts = time.time()
s = f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(ts))} {host or ''} {m}"
msgs.append(s)
logger.info(s)
if logf:
try:
logf.write(s + "\n")
logf.flush()
except Exception as e:
logger.warning("failed to write to logfile: %s", e)
msg_to_websockets("message", s)
msgs = notify_mod.msgs
def cleanup_function(config):
"""This function will be executed upon program exit."""
@@ -84,7 +60,7 @@ async def _run_async(config):
from . import notify as notify_mod
from . import monitor as monitor_mod
from . import journal as journal_mod
from ..client import threshold as threshold_mod
from . import threshold as threshold_mod
notify_mod.setup(config)
@@ -125,7 +101,7 @@ async def _run_async(config):
ctx = dict(
config=config,
hbdclass=hbdclass,
log=log,
log=eventlog,
pushmsg=pushmsg,
msg_to_websockets=msg_to_websockets,
msg_journal=msg_journal,
@@ -149,7 +125,7 @@ async def _run_async(config):
config=config,
hbdclass=hbdclass,
msgs_getter=lambda: msgs,
log=log,
log=eventlog,
pushmsg=pushmsg,
msg_to_websockets=msg_to_websockets,
threshold_checker=threshold_checker,
@@ -172,7 +148,7 @@ async def _run_async(config):
dns_task = None
try:
dns_task = dns_mod.start_dns_worker(
hbdclass, config, log=log, pushmsg=pushmsg, loop=loop
hbdclass, config, log=eventlog, pushmsg=pushmsg, loop=loop
)
logger.info("dns update worker started")
except Exception as e:
@@ -211,7 +187,7 @@ async def _run_async(config):
for h in sorted(hbdclass.Host.hosts)
],
get_msgs=lambda: msgs,
verbose=config.get("verbose", False),
config=config,
)
)
logger.info("WebSocket task started")
@@ -224,7 +200,7 @@ async def _run_async(config):
monitor_mod.start(
config=config,
hbdclass=hbdclass,
log=log,
log=eventlog,
pushmsg=pushmsg,
msg_to_websockets=msg_to_websockets,
)
@@ -347,7 +323,6 @@ def run(config):
Manually manages the event loop to ensure clean shutdown.
"""
global logf
import os
logging.basicConfig(
@@ -355,8 +330,8 @@ def run(config):
)
load_pickled_hosts(config, hbdclass)
logf = initlog(logfile=config.get("logfile", "messages.log"))
log(None, f"hbd version {__version__} starting up")
notify_mod.initlog(logfile=config.get("logfile", "messages.log"))
eventlog(None, f"hbd version {__version__} starting up")
# Create and set the event loop manually
loop = asyncio.new_event_loop()
@@ -371,11 +346,7 @@ def run(config):
finally:
cleanup_function(config)
logger.info("hbd shutdown complete")
if logf and logf != sys.stderr:
try:
logf.close()
except Exception:
pass
notify_mod.closelog()
# Explicitly close the loop
try:
# Cancel all remaining tasks
+38
View File
@@ -7,13 +7,50 @@ import urllib.parse
import subprocess
import smtplib
import time
import sys
from . import ws as ws_mod
DEFAULT_PUSHPROVIDERS = ["all", "pushover", "mattermost", "signal"]
msg_to_websockets = ws_mod.broadcast
# module-level configuration set via setup()
_config = {}
logger = logging.getLogger(__name__)
msgs = []
logf = None
def initlog(logfile):
global logf
try:
logf = open(logfile, "a+")
return logf
except Exception as e:
import sys
print("cannot open logfile %s, using STDERR: %s" % (logfile, e))
return sys.stderr
def closelog():
global logf
if logf and logf != sys.stderr:
try:
logf.close()
except Exception:
pass
def log(host, m, service=None):
ts = time.time()
s = f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(ts))} {host or ''} {m}"
msgs.append(s)
logger.info(s)
if logf:
try:
logf.write(s + "\n")
logf.flush()
except Exception as e:
logger.warning("failed to write to logfile: %s", e)
msg_to_websockets("message", s)
def setup(cfg: dict):
"""Initialize notifier defaults from a configuration dict."""
@@ -160,6 +197,7 @@ def pushmsg(cfg: dict, msg: str, debug: int = 0):
Returns a dict of results per provider.
"""
results = {}
p = cfg.get("pushsrv", "pushover")
if p in ("all", "pushover"):
@@ -13,9 +13,10 @@ import logging
import time
from enum import Enum
from typing import Dict, Any, Optional, Tuple, Callable
from . import notify as notify_mod
logger = logging.getLogger(__name__)
eventlog = notify_mod.log
class AlertLevel(Enum):
"""Alert severity levels."""
@@ -505,6 +506,8 @@ class ThresholdChecker:
))
except Exception as e:
logger.debug(f"Failed to log threshold event to journal: {e}")
# Log to eventlog as well
eventlog(host_name, message, service="threshold")
def _check_renotify(
self,
+5 -4
View File
@@ -12,7 +12,7 @@ from typing import Callable, Iterable, Optional
import websockets
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
_connections = set()
_loop: Optional[asyncio.AbstractEventLoop] = None
_get_hosts: Optional[Callable[[], Iterable]] = None
@@ -78,7 +78,7 @@ async def start(
ssl_context=None,
get_hosts: Optional[Callable] = None,
get_msgs: Optional[Callable] = None,
verbose: bool = False,
config: dict = {},
):
"""Start WebSocket servers and block until cancelled.
@@ -90,12 +90,13 @@ async def start(
_loop = asyncio.get_running_loop()
_get_hosts = get_hosts
_get_msgs = get_msgs
_verbose = verbose
_verbose = config.get("verbose", False),
_debug = config.get("debug", False),
servers = []
# plain WebSocket
websockets_logger = logging.getLogger("websockets.server")
websockets_logger.setLevel(logging.DEBUG if verbose else logging.INFO)
websockets_logger.setLevel(logging.DEBUG if _debug > 2 else logging.INFO)
# regular WebSocket
ws_server = websockets.serve(_handler, host, ws_port) # , subprotocols=["hbd"])
servers.append(ws_server)