feat: exclude ZFS ARC from memory_percent; add uptime_seconds to cpu_monitor
memory_monitor / hbc_mini: ZFS ARC is reclaimable but not reflected in MemAvailable by the Linux kernel (not in SReclaimable). Read ARC size from /proc/spl/kstat/zfs/arcstats and add it to available memory before computing memory_percent and memory_used. No-op on systems without ZFS. cpu_monitor: report uptime_seconds via psutil.boot_time() (full client) and /proc/uptime (hbc_mini). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -118,6 +118,13 @@ class CPUMonitorPlugin(MonitorPlugin):
|
|||||||
data["cpu_iowait"] = round(cpu_times.iowait, 1)
|
data["cpu_iowait"] = round(cpu_times.iowait, 1)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.debug(f"Could not get CPU times: {e}")
|
self.logger.debug(f"Could not get CPU times: {e}")
|
||||||
|
|
||||||
|
# Uptime in seconds
|
||||||
|
try:
|
||||||
|
import time
|
||||||
|
data["uptime_seconds"] = int(time.time() - self.psutil.boot_time())
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.debug(f"Could not get uptime: {e}")
|
||||||
|
|
||||||
self.logger.debug(
|
self.logger.debug(
|
||||||
f"Collected CPU metrics: {data.get('cpu_percent', 'N/A')}% usage"
|
f"Collected CPU metrics: {data.get('cpu_percent', 'N/A')}% usage"
|
||||||
|
|||||||
@@ -14,6 +14,24 @@ except ImportError:
|
|||||||
|
|
||||||
from hbd.client.plugin import MonitorPlugin
|
from hbd.client.plugin import MonitorPlugin
|
||||||
|
|
||||||
|
|
||||||
|
def _zfs_arc_bytes() -> int:
|
||||||
|
"""Return current ZFS ARC size in bytes, or 0 if ZFS is not present.
|
||||||
|
|
||||||
|
ZFS ARC is reclaimable but is not included in MemAvailable by the Linux
|
||||||
|
kernel (it is not in SReclaimable), so it would otherwise be counted as
|
||||||
|
used memory.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with open("/proc/spl/kstat/zfs/arcstats") as fh:
|
||||||
|
for line in fh:
|
||||||
|
parts = line.split()
|
||||||
|
if len(parts) >= 3 and parts[0] == "size":
|
||||||
|
return int(parts[2])
|
||||||
|
except (OSError, ValueError):
|
||||||
|
pass
|
||||||
|
return 0
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@@ -101,11 +119,21 @@ class MemoryMonitorPlugin(MonitorPlugin):
|
|||||||
|
|
||||||
# Virtual (physical) memory statistics
|
# Virtual (physical) memory statistics
|
||||||
vmem = psutil.virtual_memory()
|
vmem = psutil.virtual_memory()
|
||||||
|
|
||||||
|
# psutil's available already excludes page cache / file buffers
|
||||||
|
# (uses MemAvailable on Linux). Add ZFS ARC on top because the kernel
|
||||||
|
# does not include it in SReclaimable / MemAvailable even though it is
|
||||||
|
# reclaimable.
|
||||||
|
arc_bytes = _zfs_arc_bytes()
|
||||||
|
available = min(vmem.available + arc_bytes, vmem.total)
|
||||||
|
used = vmem.total - available
|
||||||
|
percent = round(used / vmem.total * 100, 1) if vmem.total else 0.0
|
||||||
|
|
||||||
metrics['memory_total'] = vmem.total
|
metrics['memory_total'] = vmem.total
|
||||||
metrics['memory_available'] = vmem.available
|
metrics['memory_available'] = available
|
||||||
metrics['memory_used'] = vmem.used
|
metrics['memory_used'] = used
|
||||||
metrics['memory_free'] = vmem.free
|
metrics['memory_free'] = vmem.free
|
||||||
metrics['memory_percent'] = vmem.percent
|
metrics['memory_percent'] = percent
|
||||||
|
|
||||||
# Platform-specific memory details
|
# Platform-specific memory details
|
||||||
if hasattr(vmem, 'active'):
|
if hasattr(vmem, 'active'):
|
||||||
|
|||||||
@@ -487,6 +487,12 @@ class CPUMonitorPlugin(MonitorPlugin):
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open("/proc/uptime") as fh:
|
||||||
|
data["uptime_seconds"] = int(float(fh.read().split()[0]))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
@@ -535,6 +541,20 @@ class MemoryMonitorPlugin(MonitorPlugin):
|
|||||||
total = mi.get("MemTotal", 0)
|
total = mi.get("MemTotal", 0)
|
||||||
avail = mi.get("MemAvailable", mi.get("MemFree", 0))
|
avail = mi.get("MemAvailable", mi.get("MemFree", 0))
|
||||||
free = mi.get("MemFree", 0)
|
free = mi.get("MemFree", 0)
|
||||||
|
|
||||||
|
# ZFS ARC is reclaimable but not included in MemAvailable; add it.
|
||||||
|
arc_kb = 0
|
||||||
|
try:
|
||||||
|
with open("/proc/spl/kstat/zfs/arcstats") as _f:
|
||||||
|
for _line in _f:
|
||||||
|
_p = _line.split()
|
||||||
|
if len(_p) >= 3 and _p[0] == "size":
|
||||||
|
arc_kb = int(_p[2]) // 1024
|
||||||
|
break
|
||||||
|
except (OSError, ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
avail = min(avail + arc_kb, total)
|
||||||
used = total - avail
|
used = total - avail
|
||||||
data: Dict[str, Any] = {
|
data: Dict[str, Any] = {
|
||||||
"memory_total": total * 1024,
|
"memory_total": total * 1024,
|
||||||
|
|||||||
Reference in New Issue
Block a user