adjust default log, pick and config locations. renotify on critical only, make user sessions persistem

This commit is contained in:
Andreas Wrede
2026-04-10 13:24:57 -04:00
parent 2015195112
commit 2468386f24
8 changed files with 26 additions and 11 deletions
-1
View File
@@ -470,7 +470,6 @@ Set breakpoints in modules such as `hbd/server/udp.py`, `hbd/server/dns.py`, or
- `hbd_host`: bind address for HTTP/WSS - `hbd_host`: bind address for HTTP/WSS
- `pickfile`: path for persisted state - `pickfile`: path for persisted state
- `logfile`: path to log file - `logfile`: path to log file
- `logfmt`: `text` or `msg`
- `pushsrv`: push service (`pushover`|`mattermost`|`all`) - `pushsrv`: push service (`pushover`|`mattermost`|`all`)
- `interval` / `grace`: heartbeat timing configuration - `interval` / `grace`: heartbeat timing configuration
- `dyndomains`: list of dyndomains to update via `nsupdate` - `dyndomains`: list of dyndomains to update via `nsupdate`
+1 -1
View File
@@ -59,7 +59,7 @@ Server-specific defaults:
- `hb_port`: Port to listen for heartbeats (default: 50003) - `hb_port`: Port to listen for heartbeats (default: 50003)
- `hbd_port`: HTTP API port (default: 50004) - `hbd_port`: HTTP API port (default: 50004)
- `ws_port`: WebSocket port (default: 50005) - `ws_port`: WebSocket port (default: 50005)
- `logfile`, `logfmt`: Logging configuration - `logfile`: Log file path
- `pushsrv`, `pushover_token`, etc.: Notification settings - `pushsrv`, `pushover_token`, etc.: Notification settings
- `watchhosts`, `dyndnshosts`: Host monitoring - `watchhosts`, `dyndnshosts`: Host monitoring
- `smtpserver`, etc.: Email settings - `smtpserver`, etc.: Email settings
-1
View File
@@ -81,7 +81,6 @@ The following settings **cannot** be reloaded and require a service restart:
- **Logging** - **Logging**
- `logfile` - Log file path - `logfile` - Log file path
- `logfmt` - Log format
- **Journal Settings** - **Journal Settings**
- `journal_enabled` - Enable/disable journaling - `journal_enabled` - Enable/disable journaling
+2 -4
View File
@@ -16,12 +16,10 @@ SERVER_DEFAULTS = {
"hbd_host": "", # Bind address (empty = all interfaces) "hbd_host": "", # Bind address (empty = all interfaces)
# Persistence # Persistence
"pickfile": "/tmp/hb.pick", "pickfile": os.path.join(os.path.expanduser("~"), ".hb.pick"), # File to store host state between restarts
# Logging # Logging
"logfile": "/var/log/heartbeat.log", "logfile": os.path.join(os.path.expanduser("~"), ".hb.log"),
"logfmt": "text", # text or msg or json
# Notification channels # Notification channels
"notification_channels": {}, # Named channels with type and credentials "notification_channels": {}, # Named channels with type and credentials
"default_notification_channels": [], # Default channels if host doesn't specify "default_notification_channels": [], # Default channels if host doesn't specify
+7
View File
@@ -27,6 +27,7 @@ def save_state(config, hbdclass):
"""Save current state to pickle file. Safe to call at any time.""" """Save current state to pickle file. Safe to call at any time."""
import pickle import pickle
import os import os
from . import users as users_mod
# Clear timer references before pickling (they can't be serialized) # Clear timer references before pickling (they can't be serialized)
for hostname, host in list(hbdclass.Host.hosts.items()): for hostname, host in list(hbdclass.Host.hosts.items()):
@@ -48,6 +49,7 @@ def save_state(config, hbdclass):
pick = pickle.Pickler(pickf) pick = pickle.Pickler(pickf)
pick.dump(hbdclass.Host.hosts) pick.dump(hbdclass.Host.hosts)
pick.dump(data.msgs) pick.dump(data.msgs)
pick.dump(users_mod.save_sessions())
os.replace(tmpfile, pickfile) os.replace(tmpfile, pickfile)
except Exception as e: except Exception as e:
logger.error("Failed to save state: %s", e) logger.error("Failed to save state: %s", e)
@@ -432,6 +434,7 @@ def load_pickled_hosts(config, hbdclass):
import os import os
import pickle import pickle
from . import config as config_mod from . import config as config_mod
from . import users as users_mod
pickfile = config.get("pickfile", "hbd.pickle") pickfile = config.get("pickfile", "hbd.pickle")
dyndnshosts = config_mod.get_dyndnshosts(config) dyndnshosts = config_mod.get_dyndnshosts(config)
@@ -445,6 +448,10 @@ def load_pickled_hosts(config, hbdclass):
try: try:
hbdclass.Host.hosts = pick.load() hbdclass.Host.hosts = pick.load()
data.msgs = pick.load() data.msgs = pick.load()
try:
users_mod.load_sessions(pick.load())
except Exception:
pass # older pickle without sessions — fine
pickf.close() pickf.close()
except Exception as e: except Exception as e:
logger.exception("load pickled failed: %s", e) logger.exception("load pickled failed: %s", e)
-2
View File
@@ -252,8 +252,6 @@ def get_settings_sections(config: dict) -> list:
"Path to the pickle file used to persist host state across restarts."), "Path to the pickle file used to persist host state across restarts."),
field("logfile", "Event log", "path", field("logfile", "Event log", "path",
"Path to the event log file."), "Path to the event log file."),
field("logfmt", "Log format", "select",
"Format for event log entries: text, msg, or json."),
], ],
}, },
{ {
+1 -1
View File
@@ -1096,7 +1096,7 @@ class ThresholdChecker:
threshold: Threshold configuration threshold: Threshold configuration
plugin_data: Optional dictionary of all plugin data fields plugin_data: Optional dictionary of all plugin data fields
""" """
if alert_state.level == AlertLevel.OK: if alert_state.level != AlertLevel.CRITICAL:
return return
# Skip reminders if alert has been acknowledged # Skip reminders if alert has been acknowledged
+14
View File
@@ -226,3 +226,17 @@ def _purge_expired_sessions() -> None:
expired = [t for t, s in list(_sessions.items()) if s["expires"] < now] expired = [t for t, s in list(_sessions.items()) if s["expires"] < now]
for t in expired: for t in expired:
del _sessions[t] del _sessions[t]
def save_sessions() -> dict:
"""Return a snapshot of non-expired sessions suitable for pickling."""
_purge_expired_sessions()
return dict(_sessions)
def load_sessions(snapshot: dict) -> None:
"""Restore sessions from a pickled snapshot, dropping any that have expired."""
global _sessions
now = time.time()
_sessions = {t: s for t, s in snapshot.items() if s.get("expires", 0) > now}
logger.debug("Restored %d session(s) from pickle", len(_sessions))