Move threshhold to server, move eventlog to notify

This commit is contained in:
Andreas Wrede
2026-03-29 20:29:33 -04:00
parent 0543266c92
commit ad7178ebcb
7 changed files with 61 additions and 48 deletions
+1 -1
View File
@@ -2,7 +2,7 @@
hb_port: 50003 hb_port: 50003
hbd_host: '' hbd_host: ''
#logfile: "/home/andreas/public_html/messages/andreas" #logfile: "/home/andreas/public_html/messages/andreas"
logfile: "/home/andreas/logs/heartbeat/andreas" logfile: "/home/andreas/logs/heartbeat/heartbeat.log"
#logfile: "/Users/andreas/public_html/messages/andreas" #logfile: "/Users/andreas/public_html/messages/andreas"
logfmt: "msg" logfmt: "msg"
grace: 40 grace: 40
BIN
View File
Binary file not shown.
+1 -1
View File
@@ -299,7 +299,7 @@ async def start(
active_alerts = threshold_checker.get_active_alerts(host.alert_states) active_alerts = threshold_checker.get_active_alerts(host.alert_states)
else: else:
# Fallback if no threshold checker # Fallback if no threshold checker
from hbd.client.threshold import AlertLevel from hbd.server.threshold import AlertLevel
active_alerts = [ active_alerts = [
state for state in host.alert_states.values() state for state in host.alert_states.values()
if state.level != AlertLevel.OK if state.level != AlertLevel.OK
+12 -41
View File
@@ -13,40 +13,16 @@ from . import udp
from . import hbdclass from . import hbdclass
from . import ws as ws_mod from . import ws as ws_mod
from . import notify as notify_mod
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
msg_to_websockets = ws_mod.broadcast msg_to_websockets = ws_mod.broadcast
eventlog = notify_mod.log
logf = None
lastfm = ["", "", ""] lastfm = ["", "", ""]
# shared runtime collections and helpers # shared runtime collections and helpers
msgs = [] msgs = notify_mod.msgs
def initlog(logfile):
try:
return open(logfile, "a+")
except Exception as e:
import sys
print("cannot open loffile %s, using STDERR: %s" % (logfile, e))
return sys.stderr
def log(host, m, service=None):
ts = time.time()
s = f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(ts))} {host or ''} {m}"
msgs.append(s)
logger.info(s)
if logf:
try:
logf.write(s + "\n")
logf.flush()
except Exception as e:
logger.warning("failed to write to logfile: %s", e)
msg_to_websockets("message", s)
def cleanup_function(config): def cleanup_function(config):
"""This function will be executed upon program exit.""" """This function will be executed upon program exit."""
@@ -84,7 +60,7 @@ async def _run_async(config):
from . import notify as notify_mod from . import notify as notify_mod
from . import monitor as monitor_mod from . import monitor as monitor_mod
from . import journal as journal_mod from . import journal as journal_mod
from ..client import threshold as threshold_mod from . import threshold as threshold_mod
notify_mod.setup(config) notify_mod.setup(config)
@@ -125,7 +101,7 @@ async def _run_async(config):
ctx = dict( ctx = dict(
config=config, config=config,
hbdclass=hbdclass, hbdclass=hbdclass,
log=log, log=eventlog,
pushmsg=pushmsg, pushmsg=pushmsg,
msg_to_websockets=msg_to_websockets, msg_to_websockets=msg_to_websockets,
msg_journal=msg_journal, msg_journal=msg_journal,
@@ -149,7 +125,7 @@ async def _run_async(config):
config=config, config=config,
hbdclass=hbdclass, hbdclass=hbdclass,
msgs_getter=lambda: msgs, msgs_getter=lambda: msgs,
log=log, log=eventlog,
pushmsg=pushmsg, pushmsg=pushmsg,
msg_to_websockets=msg_to_websockets, msg_to_websockets=msg_to_websockets,
threshold_checker=threshold_checker, threshold_checker=threshold_checker,
@@ -172,7 +148,7 @@ async def _run_async(config):
dns_task = None dns_task = None
try: try:
dns_task = dns_mod.start_dns_worker( dns_task = dns_mod.start_dns_worker(
hbdclass, config, log=log, pushmsg=pushmsg, loop=loop hbdclass, config, log=eventlog, pushmsg=pushmsg, loop=loop
) )
logger.info("dns update worker started") logger.info("dns update worker started")
except Exception as e: except Exception as e:
@@ -211,7 +187,7 @@ async def _run_async(config):
for h in sorted(hbdclass.Host.hosts) for h in sorted(hbdclass.Host.hosts)
], ],
get_msgs=lambda: msgs, get_msgs=lambda: msgs,
verbose=config.get("verbose", False), config=config,
) )
) )
logger.info("WebSocket task started") logger.info("WebSocket task started")
@@ -224,7 +200,7 @@ async def _run_async(config):
monitor_mod.start( monitor_mod.start(
config=config, config=config,
hbdclass=hbdclass, hbdclass=hbdclass,
log=log, log=eventlog,
pushmsg=pushmsg, pushmsg=pushmsg,
msg_to_websockets=msg_to_websockets, msg_to_websockets=msg_to_websockets,
) )
@@ -347,7 +323,6 @@ def run(config):
Manually manages the event loop to ensure clean shutdown. Manually manages the event loop to ensure clean shutdown.
""" """
global logf
import os import os
logging.basicConfig( logging.basicConfig(
@@ -355,8 +330,8 @@ def run(config):
) )
load_pickled_hosts(config, hbdclass) load_pickled_hosts(config, hbdclass)
logf = initlog(logfile=config.get("logfile", "messages.log")) notify_mod.initlog(logfile=config.get("logfile", "messages.log"))
log(None, f"hbd version {__version__} starting up") eventlog(None, f"hbd version {__version__} starting up")
# Create and set the event loop manually # Create and set the event loop manually
loop = asyncio.new_event_loop() loop = asyncio.new_event_loop()
@@ -371,11 +346,7 @@ def run(config):
finally: finally:
cleanup_function(config) cleanup_function(config)
logger.info("hbd shutdown complete") logger.info("hbd shutdown complete")
if logf and logf != sys.stderr: notify_mod.closelog()
try:
logf.close()
except Exception:
pass
# Explicitly close the loop # Explicitly close the loop
try: try:
# Cancel all remaining tasks # Cancel all remaining tasks
+38
View File
@@ -7,13 +7,50 @@ import urllib.parse
import subprocess import subprocess
import smtplib import smtplib
import time import time
import sys
from . import ws as ws_mod
DEFAULT_PUSHPROVIDERS = ["all", "pushover", "mattermost", "signal"] DEFAULT_PUSHPROVIDERS = ["all", "pushover", "mattermost", "signal"]
msg_to_websockets = ws_mod.broadcast
# module-level configuration set via setup() # module-level configuration set via setup()
_config = {} _config = {}
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
msgs = []
logf = None
def initlog(logfile):
global logf
try:
logf = open(logfile, "a+")
return logf
except Exception as e:
import sys
print("cannot open logfile %s, using STDERR: %s" % (logfile, e))
return sys.stderr
def closelog():
global logf
if logf and logf != sys.stderr:
try:
logf.close()
except Exception:
pass
def log(host, m, service=None):
ts = time.time()
s = f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(ts))} {host or ''} {m}"
msgs.append(s)
logger.info(s)
if logf:
try:
logf.write(s + "\n")
logf.flush()
except Exception as e:
logger.warning("failed to write to logfile: %s", e)
msg_to_websockets("message", s)
def setup(cfg: dict): def setup(cfg: dict):
"""Initialize notifier defaults from a configuration dict.""" """Initialize notifier defaults from a configuration dict."""
@@ -160,6 +197,7 @@ def pushmsg(cfg: dict, msg: str, debug: int = 0):
Returns a dict of results per provider. Returns a dict of results per provider.
""" """
results = {} results = {}
p = cfg.get("pushsrv", "pushover") p = cfg.get("pushsrv", "pushover")
if p in ("all", "pushover"): if p in ("all", "pushover"):
@@ -13,9 +13,10 @@ import logging
import time import time
from enum import Enum from enum import Enum
from typing import Dict, Any, Optional, Tuple, Callable from typing import Dict, Any, Optional, Tuple, Callable
from . import notify as notify_mod
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
eventlog = notify_mod.log
class AlertLevel(Enum): class AlertLevel(Enum):
"""Alert severity levels.""" """Alert severity levels."""
@@ -505,6 +506,8 @@ class ThresholdChecker:
)) ))
except Exception as e: except Exception as e:
logger.debug(f"Failed to log threshold event to journal: {e}") logger.debug(f"Failed to log threshold event to journal: {e}")
# Log to eventlog as well
eventlog(host_name, message, service="threshold")
def _check_renotify( def _check_renotify(
self, self,
+5 -4
View File
@@ -12,7 +12,7 @@ from typing import Callable, Iterable, Optional
import websockets import websockets
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
_connections = set() _connections = set()
_loop: Optional[asyncio.AbstractEventLoop] = None _loop: Optional[asyncio.AbstractEventLoop] = None
_get_hosts: Optional[Callable[[], Iterable]] = None _get_hosts: Optional[Callable[[], Iterable]] = None
@@ -78,7 +78,7 @@ async def start(
ssl_context=None, ssl_context=None,
get_hosts: Optional[Callable] = None, get_hosts: Optional[Callable] = None,
get_msgs: Optional[Callable] = None, get_msgs: Optional[Callable] = None,
verbose: bool = False, config: dict = {},
): ):
"""Start WebSocket servers and block until cancelled. """Start WebSocket servers and block until cancelled.
@@ -90,12 +90,13 @@ async def start(
_loop = asyncio.get_running_loop() _loop = asyncio.get_running_loop()
_get_hosts = get_hosts _get_hosts = get_hosts
_get_msgs = get_msgs _get_msgs = get_msgs
_verbose = verbose _verbose = config.get("verbose", False),
_debug = config.get("debug", False),
servers = [] servers = []
# plain WebSocket # plain WebSocket
websockets_logger = logging.getLogger("websockets.server") websockets_logger = logging.getLogger("websockets.server")
websockets_logger.setLevel(logging.DEBUG if verbose else logging.INFO) websockets_logger.setLevel(logging.DEBUG if _debug > 2 else logging.INFO)
# regular WebSocket # regular WebSocket
ws_server = websockets.serve(_handler, host, ws_port) # , subprotocols=["hbd"]) ws_server = websockets.serve(_handler, host, ws_port) # , subprotocols=["hbd"])
servers.append(ws_server) servers.append(ws_server)