adjust default log, pick and config locations. renotify on critical only, make user sessions persistem

This commit is contained in:
Andreas Wrede
2026-04-10 13:24:57 -04:00
parent 2015195112
commit 2468386f24
8 changed files with 26 additions and 11 deletions
-1
View File
@@ -470,7 +470,6 @@ Set breakpoints in modules such as `hbd/server/udp.py`, `hbd/server/dns.py`, or
- `hbd_host`: bind address for HTTP/WSS
- `pickfile`: path for persisted state
- `logfile`: path to log file
- `logfmt`: `text` or `msg`
- `pushsrv`: push service (`pushover`|`mattermost`|`all`)
- `interval` / `grace`: heartbeat timing configuration
- `dyndomains`: list of dyndomains to update via `nsupdate`
+1 -1
View File
@@ -59,7 +59,7 @@ Server-specific defaults:
- `hb_port`: Port to listen for heartbeats (default: 50003)
- `hbd_port`: HTTP API port (default: 50004)
- `ws_port`: WebSocket port (default: 50005)
- `logfile`, `logfmt`: Logging configuration
- `logfile`: Log file path
- `pushsrv`, `pushover_token`, etc.: Notification settings
- `watchhosts`, `dyndnshosts`: Host monitoring
- `smtpserver`, etc.: Email settings
-1
View File
@@ -81,7 +81,6 @@ The following settings **cannot** be reloaded and require a service restart:
- **Logging**
- `logfile` - Log file path
- `logfmt` - Log format
- **Journal Settings**
- `journal_enabled` - Enable/disable journaling
+2 -4
View File
@@ -16,12 +16,10 @@ SERVER_DEFAULTS = {
"hbd_host": "", # Bind address (empty = all interfaces)
# Persistence
"pickfile": "/tmp/hb.pick",
"pickfile": os.path.join(os.path.expanduser("~"), ".hb.pick"), # File to store host state between restarts
# Logging
"logfile": "/var/log/heartbeat.log",
"logfmt": "text", # text or msg or json
"logfile": os.path.join(os.path.expanduser("~"), ".hb.log"),
# Notification channels
"notification_channels": {}, # Named channels with type and credentials
"default_notification_channels": [], # Default channels if host doesn't specify
+7
View File
@@ -27,6 +27,7 @@ def save_state(config, hbdclass):
"""Save current state to pickle file. Safe to call at any time."""
import pickle
import os
from . import users as users_mod
# Clear timer references before pickling (they can't be serialized)
for hostname, host in list(hbdclass.Host.hosts.items()):
@@ -48,6 +49,7 @@ def save_state(config, hbdclass):
pick = pickle.Pickler(pickf)
pick.dump(hbdclass.Host.hosts)
pick.dump(data.msgs)
pick.dump(users_mod.save_sessions())
os.replace(tmpfile, pickfile)
except Exception as e:
logger.error("Failed to save state: %s", e)
@@ -432,6 +434,7 @@ def load_pickled_hosts(config, hbdclass):
import os
import pickle
from . import config as config_mod
from . import users as users_mod
pickfile = config.get("pickfile", "hbd.pickle")
dyndnshosts = config_mod.get_dyndnshosts(config)
@@ -445,6 +448,10 @@ def load_pickled_hosts(config, hbdclass):
try:
hbdclass.Host.hosts = pick.load()
data.msgs = pick.load()
try:
users_mod.load_sessions(pick.load())
except Exception:
pass # older pickle without sessions — fine
pickf.close()
except Exception as e:
logger.exception("load pickled failed: %s", e)
-2
View File
@@ -252,8 +252,6 @@ def get_settings_sections(config: dict) -> list:
"Path to the pickle file used to persist host state across restarts."),
field("logfile", "Event log", "path",
"Path to the event log file."),
field("logfmt", "Log format", "select",
"Format for event log entries: text, msg, or json."),
],
},
{
+1 -1
View File
@@ -1096,7 +1096,7 @@ class ThresholdChecker:
threshold: Threshold configuration
plugin_data: Optional dictionary of all plugin data fields
"""
if alert_state.level == AlertLevel.OK:
if alert_state.level != AlertLevel.CRITICAL:
return
# Skip reminders if alert has been acknowledged
+14
View File
@@ -226,3 +226,17 @@ def _purge_expired_sessions() -> None:
expired = [t for t, s in list(_sessions.items()) if s["expires"] < now]
for t in expired:
del _sessions[t]
def save_sessions() -> dict:
"""Return a snapshot of non-expired sessions suitable for pickling."""
_purge_expired_sessions()
return dict(_sessions)
def load_sessions(snapshot: dict) -> None:
"""Restore sessions from a pickled snapshot, dropping any that have expired."""
global _sessions
now = time.time()
_sessions = {t: s for t, s in snapshot.items() if s.get("expires", 0) > now}
logger.debug("Restored %d session(s) from pickle", len(_sessions))