Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| d7b368c7c6 | |||
| e790663f9f | |||
| 475319e248 |
+1
-1
@@ -14,4 +14,4 @@ Install options:
|
||||
"""
|
||||
|
||||
__all__ = ["__version__"]
|
||||
__version__ = "5.1.18"
|
||||
__version__ = "5.1.19"
|
||||
|
||||
+6
-11
@@ -463,18 +463,14 @@ async def cleanup(connections: List[AsyncConnection]):
|
||||
logger = logging.getLogger("hbc.cleanup")
|
||||
logger.info("Cleaning up connections")
|
||||
|
||||
for conn in connections:
|
||||
target = next((c for c in connections if c.transport), connections[0] if connections else None)
|
||||
if target:
|
||||
try:
|
||||
msg = {
|
||||
"shutdown": 1,
|
||||
"acks": conn.ackcount
|
||||
}
|
||||
await conn.sendto(msg)
|
||||
await target.sendto({"shutdown": 1, "acks": target.ackcount})
|
||||
except Exception as e:
|
||||
logger.error(f"Error sending shutdown: {e}")
|
||||
|
||||
for conn in connections:
|
||||
conn.close()
|
||||
break # Only send shutdown on first connection to avoid duplicates
|
||||
|
||||
# Give messages time to send
|
||||
await asyncio.sleep(0.5)
|
||||
@@ -539,9 +535,8 @@ async def async_main(args, config):
|
||||
boot_msg["msg"] = args.message
|
||||
|
||||
boot_msg["acks"] = 0
|
||||
for conn in connections:
|
||||
await conn.sendto(boot_msg)
|
||||
break # Only send message on first connection to avoid duplicates
|
||||
target = next((c for c in connections if c.transport), connections[0])
|
||||
await target.sendto(boot_msg)
|
||||
|
||||
if args.message and not args.daemon:
|
||||
# Message-only mode
|
||||
|
||||
@@ -118,6 +118,13 @@ class CPUMonitorPlugin(MonitorPlugin):
|
||||
data["cpu_iowait"] = round(cpu_times.iowait, 1)
|
||||
except Exception as e:
|
||||
self.logger.debug(f"Could not get CPU times: {e}")
|
||||
|
||||
# Uptime in seconds
|
||||
try:
|
||||
import time
|
||||
data["uptime_seconds"] = int(time.time() - self.psutil.boot_time())
|
||||
except Exception as e:
|
||||
self.logger.debug(f"Could not get uptime: {e}")
|
||||
|
||||
self.logger.debug(
|
||||
f"Collected CPU metrics: {data.get('cpu_percent', 'N/A')}% usage"
|
||||
|
||||
@@ -14,6 +14,24 @@ except ImportError:
|
||||
|
||||
from hbd.client.plugin import MonitorPlugin
|
||||
|
||||
|
||||
def _zfs_arc_bytes() -> int:
|
||||
"""Return current ZFS ARC size in bytes, or 0 if ZFS is not present.
|
||||
|
||||
ZFS ARC is reclaimable but is not included in MemAvailable by the Linux
|
||||
kernel (it is not in SReclaimable), so it would otherwise be counted as
|
||||
used memory.
|
||||
"""
|
||||
try:
|
||||
with open("/proc/spl/kstat/zfs/arcstats") as fh:
|
||||
for line in fh:
|
||||
parts = line.split()
|
||||
if len(parts) >= 3 and parts[0] == "size":
|
||||
return int(parts[2])
|
||||
except (OSError, ValueError):
|
||||
pass
|
||||
return 0
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -101,11 +119,21 @@ class MemoryMonitorPlugin(MonitorPlugin):
|
||||
|
||||
# Virtual (physical) memory statistics
|
||||
vmem = psutil.virtual_memory()
|
||||
|
||||
# psutil's available already excludes page cache / file buffers
|
||||
# (uses MemAvailable on Linux). Add ZFS ARC on top because the kernel
|
||||
# does not include it in SReclaimable / MemAvailable even though it is
|
||||
# reclaimable.
|
||||
arc_bytes = _zfs_arc_bytes()
|
||||
available = min(vmem.available + arc_bytes, vmem.total)
|
||||
used = vmem.total - available
|
||||
percent = round(used / vmem.total * 100, 1) if vmem.total else 0.0
|
||||
|
||||
metrics['memory_total'] = vmem.total
|
||||
metrics['memory_available'] = vmem.available
|
||||
metrics['memory_used'] = vmem.used
|
||||
metrics['memory_available'] = available
|
||||
metrics['memory_used'] = used
|
||||
metrics['memory_free'] = vmem.free
|
||||
metrics['memory_percent'] = vmem.percent
|
||||
metrics['memory_percent'] = percent
|
||||
|
||||
# Platform-specific memory details
|
||||
if hasattr(vmem, 'active'):
|
||||
|
||||
+1
-1
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "hbd"
|
||||
version = "5.1.18"
|
||||
version = "5.1.19"
|
||||
description = "Heartbeat monitoring system — client (hbc) and server (hbd)"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
|
||||
+26
-7
@@ -41,7 +41,7 @@ from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
# updated by scripts/bumpminor.sh
|
||||
__version__ = "5.1.18"
|
||||
__version__ = "5.1.19"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Protocol (mirrors hbd/common/proto.py)
|
||||
@@ -487,6 +487,12 @@ class CPUMonitorPlugin(MonitorPlugin):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
with open("/proc/uptime") as fh:
|
||||
data["uptime_seconds"] = int(float(fh.read().split()[0]))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return data
|
||||
|
||||
|
||||
@@ -535,6 +541,20 @@ class MemoryMonitorPlugin(MonitorPlugin):
|
||||
total = mi.get("MemTotal", 0)
|
||||
avail = mi.get("MemAvailable", mi.get("MemFree", 0))
|
||||
free = mi.get("MemFree", 0)
|
||||
|
||||
# ZFS ARC is reclaimable but not included in MemAvailable; add it.
|
||||
arc_kb = 0
|
||||
try:
|
||||
with open("/proc/spl/kstat/zfs/arcstats") as _f:
|
||||
for _line in _f:
|
||||
_p = _line.split()
|
||||
if len(_p) >= 3 and _p[0] == "size":
|
||||
arc_kb = int(_p[2]) // 1024
|
||||
break
|
||||
except (OSError, ValueError):
|
||||
pass
|
||||
|
||||
avail = min(avail + arc_kb, total)
|
||||
used = total - avail
|
||||
data: Dict[str, Any] = {
|
||||
"memory_total": total * 1024,
|
||||
@@ -1052,9 +1072,8 @@ async def _async_main(args, cfg: Dict[str, Any]) -> int:
|
||||
if args.message:
|
||||
bmsg["service"] = "service"
|
||||
bmsg["msg"] = args.message
|
||||
for c in connections:
|
||||
await c.sendto(bmsg)
|
||||
break
|
||||
target = next((c for c in connections if c._transport), connections[0])
|
||||
await target.sendto(bmsg)
|
||||
if args.message and not args.daemon:
|
||||
await asyncio.sleep(0.3)
|
||||
for c in connections:
|
||||
@@ -1086,12 +1105,12 @@ async def _async_main(args, cfg: Dict[str, Any]) -> int:
|
||||
pass
|
||||
|
||||
log.info("shutting down")
|
||||
for conn in connections:
|
||||
target = next((c for c in connections if c._transport), connections[0] if connections else None)
|
||||
if target:
|
||||
try:
|
||||
await conn.sendto({"shutdown": 1, "acks": conn.ackcount})
|
||||
await target.sendto({"shutdown": 1, "acks": target.ackcount})
|
||||
except Exception:
|
||||
pass
|
||||
break
|
||||
for conn in connections:
|
||||
conn.close()
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
Reference in New Issue
Block a user