save state to pickle file, restart timers on restart

This commit is contained in:
2026-04-06 17:24:59 -04:00
parent 57c4b86430
commit 832a8b0bda
6 changed files with 195 additions and 91 deletions
+46 -10
View File
@@ -22,12 +22,12 @@ eventlog = notify_mod.eventlog
# shared runtime collections and helpers
def cleanup_function(config, hbdclass):
"""This function will be executed upon program exit."""
logger.info("Running cleanup function...")
def save_state(config, hbdclass):
"""Save current state to pickle file. Safe to call at any time."""
import pickle
import os
# Ensure all timer references are cleared before pickling
# Clear timer references before pickling (they can't be serialized)
for hostname, host in list(hbdclass.Host.hosts.items()):
for conn_type, conn in host.connections.items():
if hasattr(conn, 'cancel_overdue_timer'):
@@ -40,13 +40,26 @@ def cleanup_function(config, hbdclass):
conn.timeout_duration = None
pickfile = config.get("pickfile", "hbd.pickle")
tmpfile = pickfile + ".tmp"
pickf = open(pickfile, "wb")
pick = pickle.Pickler(pickf)
pick.dump(hbdclass.Host.hosts)
pick.dump(data.msgs)
pickf.close()
try:
with open(tmpfile, "wb") as pickf:
pick = pickle.Pickler(pickf)
pick.dump(hbdclass.Host.hosts)
pick.dump(data.msgs)
os.replace(tmpfile, pickfile)
except Exception as e:
logger.error("Failed to save state: %s", e)
try:
os.unlink(tmpfile)
except Exception:
pass
def cleanup_function(config, hbdclass):
"""This function will be executed upon program exit."""
logger.info("Running cleanup function...")
save_state(config, hbdclass)
logger.info("Cleanup complete.")
@@ -185,6 +198,16 @@ async def _run_async(config, config_path=None):
sock=sock,
)
# Restore connection timers for hosts loaded from pickle
restore_ctx = dict(
config=config,
hbdclass=hbdclass,
log=eventlog,
msg_to_websockets=msg_to_websockets,
threshold_checker=threshold_checker,
)
udp.restore_connection_timers(hbdclass, restore_ctx)
# HTTP server (asyncio-based via aiohttp)
try:
http_task = asyncio.create_task(
@@ -257,6 +280,19 @@ async def _run_async(config, config_path=None):
except Exception as e:
logger.exception("websocket server failed to start: %s", e)
# Periodic autosave task
autosave_interval = config.get("autosave_interval", 300) # default: 5 minutes
async def autosave_task():
while True:
await asyncio.sleep(autosave_interval)
logger.debug("Autosaving state...")
save_state(config, hbdclass)
logger.debug("Autosave complete (%d hosts)", len(hbdclass.Host.hosts))
autosave = asyncio.create_task(autosave_task())
logger.info("Autosave task started (interval: %ds)", autosave_interval)
# Main event loop - monitor shutdown and reload events
try:
while True:
@@ -304,7 +340,7 @@ async def _run_async(config, config_path=None):
except Exception as e:
logger.warning("Error closing UDP transport: %s", e)
tasks_to_cancel = [http_task, ws_task]
tasks_to_cancel = [http_task, ws_task, autosave]
for task in tasks_to_cancel:
if task:
try: