Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 65c4267847 | |||
| 462a445235 | |||
| 368e178f93 | |||
| 6905bf266a | |||
| b6dcce4f35 | |||
| e6436fc236 | |||
| c5ce41762e | |||
| 26ca0c095f | |||
| 1eecd67594 | |||
| caf3c2c0ac |
@@ -377,7 +377,7 @@ This project now declares its dependencies in `pyproject.toml`. Instead
|
|||||||
of the old `requirements.txt` flow, install the package into a virtualenv
|
of the old `requirements.txt` flow, install the package into a virtualenv
|
||||||
using `pip`:
|
using `pip`:
|
||||||
|
|
||||||
See `scripts/install.sh` for a way to install.
|
See `scripts/hb_install.sh` for a way to install.
|
||||||
|
|
||||||
Run the daemon (example):
|
Run the daemon (example):
|
||||||
|
|
||||||
@@ -441,6 +441,68 @@ plugins:
|
|||||||
|
|
||||||
All monitoring plugins default to 5-minute (300 second) intervals, but can be customized as needed.
|
All monitoring plugins default to 5-minute (300 second) intervals, but can be customized as needed.
|
||||||
|
|
||||||
|
### hbc_mini — single-file client (no external dependencies)
|
||||||
|
|
||||||
|
`scripts/hbc_mini.py` is a self-contained version of the heartbeat client that requires only Python 3.8+ and no external packages. Copy it to any host and run it directly — no virtualenv, no `pip install`.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Basic usage
|
||||||
|
python3 hbc_mini.py your-server.example.com
|
||||||
|
|
||||||
|
# Run as daemon
|
||||||
|
python3 hbc_mini.py -d your-server.example.com
|
||||||
|
|
||||||
|
# Send a boot message
|
||||||
|
python3 hbc_mini.py -b your-server.example.com
|
||||||
|
|
||||||
|
# Send a one-off message
|
||||||
|
python3 hbc_mini.py -m "maintenance starting" your-server.example.com
|
||||||
|
```
|
||||||
|
|
||||||
|
**Config:** `~/.hbc.json` (same keys as `~/.hbc.yaml`, JSON format). Example:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"hb_port": 50003,
|
||||||
|
"interval": 30,
|
||||||
|
"plugins": {
|
||||||
|
"ping_monitor": {
|
||||||
|
"interval": 60,
|
||||||
|
"hosts": ["8.8.8.8", "192.168.1.1"]
|
||||||
|
},
|
||||||
|
"nagios_runner": {
|
||||||
|
"interval": 300,
|
||||||
|
"commands": [
|
||||||
|
{"name": "check_load", "command": "/usr/lib/nagios/plugins/check_load -w 5,4,3 -c 10,8,6"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Plugin availability:**
|
||||||
|
|
||||||
|
| Plugin | Platform | Data source |
|
||||||
|
|---|---|---|
|
||||||
|
| `os_info` | all | `platform` stdlib |
|
||||||
|
| `ping_monitor` | all | `ping` subprocess |
|
||||||
|
| `nagios_runner` | all (not Windows) | subprocess |
|
||||||
|
| `cpu_monitor` | Linux | `/proc/stat` |
|
||||||
|
| `memory_monitor` | Linux | `/proc/meminfo` |
|
||||||
|
| `disk_monitor` | Linux, macOS, BSD | `df -P` subprocess |
|
||||||
|
| `network_monitor` | Linux | `/proc/net/dev` |
|
||||||
|
|
||||||
|
**What is not available compared to the full `hbc`:**
|
||||||
|
|
||||||
|
- No YAML config (use JSON instead)
|
||||||
|
- No `filesystem_info` plugin
|
||||||
|
- `cpu_monitor` does not report per-core usage or CPU frequency (no psutil)
|
||||||
|
- Plugins cannot be loaded from external `.py` files — all plugins are compiled in
|
||||||
|
|
||||||
|
Everything else — heartbeat protocol, ACK/CMD/UPD handling, `hb_install.sh`-based self-update, daemonize, syslog — is identical to the full client.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## 🐞 Debugging in VS Code
|
## 🐞 Debugging in VS Code
|
||||||
|
|
||||||
This repository includes a ready-to-use `.vscode/launch.json` with configurations to run or attach the VS Code debugger to `hbd`.
|
This repository includes a ready-to-use `.vscode/launch.json` with configurations to run or attach the VS Code debugger to `hbd`.
|
||||||
|
|||||||
+1
-1
@@ -14,4 +14,4 @@ Install options:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
__all__ = ["__version__"]
|
__all__ = ["__version__"]
|
||||||
__version__ = "5.1.4"
|
__version__ = "5.1.7"
|
||||||
|
|||||||
+43
-41
@@ -14,7 +14,6 @@ import signal
|
|||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
from hashlib import md5
|
|
||||||
from logging.handlers import SysLogHandler
|
from logging.handlers import SysLogHandler
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, List, Optional
|
from typing import Dict, List, Optional
|
||||||
@@ -56,7 +55,8 @@ class AsyncConnection:
|
|||||||
|
|
||||||
self.transport: Optional[asyncio.DatagramTransport] = None
|
self.transport: Optional[asyncio.DatagramTransport] = None
|
||||||
self.protocol: Optional[asyncio.DatagramProtocol] = None
|
self.protocol: Optional[asyncio.DatagramProtocol] = None
|
||||||
|
self._dead = False
|
||||||
|
|
||||||
self.logger = logging.getLogger(f"hbc.conn.{addr}")
|
self.logger = logging.getLogger(f"hbc.conn.{addr}")
|
||||||
|
|
||||||
async def open(self) -> bool:
|
async def open(self) -> bool:
|
||||||
@@ -93,9 +93,12 @@ class AsyncConnection:
|
|||||||
msg: Message dictionary
|
msg: Message dictionary
|
||||||
msg_id: Message ID (HTB, PLG, etc.)
|
msg_id: Message ID (HTB, PLG, etc.)
|
||||||
"""
|
"""
|
||||||
|
if self._dead:
|
||||||
|
return
|
||||||
|
|
||||||
if not self.transport:
|
if not self.transport:
|
||||||
await self.open()
|
await self.open()
|
||||||
|
|
||||||
if not self.transport:
|
if not self.transport:
|
||||||
self.logger.error("Cannot send - no transport")
|
self.logger.error("Cannot send - no transport")
|
||||||
return
|
return
|
||||||
@@ -167,7 +170,9 @@ class HeartbeatProtocol(asyncio.DatagramProtocol):
|
|||||||
|
|
||||||
def error_received(self, exc):
|
def error_received(self, exc):
|
||||||
"""Handle protocol errors."""
|
"""Handle protocol errors."""
|
||||||
self.logger.error(f"Protocol error: {exc}")
|
self.logger.warning(f"Protocol error on {self.connection.addr}: {exc} — dropping connection")
|
||||||
|
self.connection._dead = True
|
||||||
|
self.connection.close()
|
||||||
|
|
||||||
|
|
||||||
async def handle_command(conn: AsyncConnection, msg: dict):
|
async def handle_command(conn: AsyncConnection, msg: dict):
|
||||||
@@ -204,55 +209,52 @@ async def handle_command(conn: AsyncConnection, msg: dict):
|
|||||||
await conn.sendto(response)
|
await conn.sendto(response)
|
||||||
|
|
||||||
|
|
||||||
async def handle_update(conn: AsyncConnection, msg: dict):
|
async def handle_update(conn: AsyncConnection, _msg: dict): # pyright: ignore[reportUnusedParameter]
|
||||||
"""Handle self-update from server."""
|
"""Handle self-update by running hb_install.sh."""
|
||||||
import codecs
|
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
logger = logging.getLogger("hbc.update")
|
logger = logging.getLogger("hbc.update")
|
||||||
|
|
||||||
|
installer = shutil.which("hb_install.sh")
|
||||||
|
if installer is None:
|
||||||
|
candidate = Path(sys.argv[0]).parent / "hb_install.sh"
|
||||||
|
if candidate.exists():
|
||||||
|
installer = str(candidate)
|
||||||
|
|
||||||
|
if installer is None:
|
||||||
|
error = "hb_install.sh not found in PATH or alongside hbc"
|
||||||
|
logger.error(error)
|
||||||
|
await conn.sendto({"service": "update", "msg": error})
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info(f"Running installer: {installer}")
|
||||||
try:
|
try:
|
||||||
code = codecs.decode(msg["code"], "base64").decode()
|
proc = await asyncio.create_subprocess_exec(
|
||||||
csum = msg["csum"]
|
installer, "client",
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.STDOUT,
|
||||||
|
)
|
||||||
|
out, _ = await asyncio.wait_for(proc.communicate(), timeout=120)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
error = "Installer timed out"
|
||||||
|
logger.error(error)
|
||||||
|
await conn.sendto({"service": "update", "msg": error})
|
||||||
|
return
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error = f"Missing code/csum: {e}"
|
error = f"Installer failed: {e}"
|
||||||
logger.error(error)
|
logger.error(error)
|
||||||
await conn.sendto({"service": "update", "msg": error})
|
await conn.sendto({"service": "update", "msg": error})
|
||||||
return
|
return
|
||||||
|
|
||||||
# Verify checksum
|
if proc.returncode != 0:
|
||||||
m = md5()
|
error = f"Installer exited {proc.returncode}: {out.decode().strip()}"
|
||||||
m.update(code.encode())
|
|
||||||
if m.hexdigest() != csum:
|
|
||||||
error = "Checksum mismatch"
|
|
||||||
logger.error(error)
|
logger.error(error)
|
||||||
await conn.sendto({"service": "update", "msg": error})
|
await conn.sendto({"service": "update", "msg": error})
|
||||||
return
|
return
|
||||||
|
|
||||||
# Backup current file
|
|
||||||
fn = sys.argv[0]
|
|
||||||
ofn = f"{fn}.sav"
|
|
||||||
try:
|
|
||||||
shutil.copy2(fn, ofn)
|
|
||||||
except Exception as e:
|
|
||||||
error = f"Backup failed: {e}"
|
|
||||||
logger.error(error)
|
|
||||||
await conn.sendto({"service": "update", "msg": error})
|
|
||||||
return
|
|
||||||
|
|
||||||
# Write new code
|
|
||||||
try:
|
|
||||||
with open(fn, "w") as fh:
|
|
||||||
fh.write(code)
|
|
||||||
except Exception as e:
|
|
||||||
error = f"Write failed: {e}"
|
|
||||||
logger.error(error)
|
|
||||||
await conn.sendto({"service": "update", "msg": error})
|
|
||||||
return
|
|
||||||
|
|
||||||
logger.info("Update successful, restart required")
|
logger.info("Update successful, restart required")
|
||||||
await conn.sendto({"service": "update", "msg": "OK"})
|
await conn.sendto({"service": "update", "msg": "OK"})
|
||||||
|
|
||||||
# Trigger restart
|
# Trigger restart
|
||||||
global dorestart
|
global dorestart
|
||||||
dorestart = True
|
dorestart = True
|
||||||
|
|||||||
+4
-9
@@ -210,15 +210,11 @@ async def start(
|
|||||||
return err
|
return err
|
||||||
qa = request.rel_url.query
|
qa = request.rel_url.query
|
||||||
uname = urllib.parse.unquote(qa.get("h", ""))
|
uname = urllib.parse.unquote(qa.get("h", ""))
|
||||||
ucode = qa.get("c")
|
if not uname:
|
||||||
if not ucode or not uname:
|
return web.Response(status=400, text="need h= argument")
|
||||||
return web.Response(status=400, text="need h= and c= arguments")
|
|
||||||
if uname != "All" and uname not in hbdclass.Host.hosts:
|
if uname != "All" and uname not in hbdclass.Host.hosts:
|
||||||
return web.Response(status=400, text=f"h={uname} not found")
|
return web.Response(status=400, text=f"h={uname} not found")
|
||||||
if uname != "All":
|
names = [uname] if uname != "All" else list(hbdclass.Host.hosts)
|
||||||
names = [uname]
|
|
||||||
else:
|
|
||||||
names = [n for n in hbdclass.Host.hosts]
|
|
||||||
out = []
|
out = []
|
||||||
for n in names:
|
for n in names:
|
||||||
host = hbdclass.Host.hosts[n]
|
host = hbdclass.Host.hosts[n]
|
||||||
@@ -227,8 +223,7 @@ async def start(
|
|||||||
continue
|
continue
|
||||||
op_err = None
|
op_err = None
|
||||||
try:
|
try:
|
||||||
r = {"csum": None, "code": ucode}
|
host.cmds.append(("UPD", {}))
|
||||||
host.cmds.append(("UPD", r))
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
op_err = str(e)
|
op_err = str(e)
|
||||||
out.append(f"update started for {n}: {op_err if op_err else 'OK'}")
|
out.append(f"update started for {n}: {op_err if op_err else 'OK'}")
|
||||||
|
|||||||
@@ -210,7 +210,6 @@ async def _run_async(config, config_path=None):
|
|||||||
ctx = dict(
|
ctx = dict(
|
||||||
config=config,
|
config=config,
|
||||||
hbdclass=hbdclass,
|
hbdclass=hbdclass,
|
||||||
log=eventlog,
|
|
||||||
msg_to_websockets=msg_to_websockets,
|
msg_to_websockets=msg_to_websockets,
|
||||||
msg_journal=msg_journal,
|
msg_journal=msg_journal,
|
||||||
threshold_checker=threshold_checker,
|
threshold_checker=threshold_checker,
|
||||||
@@ -237,7 +236,6 @@ async def _run_async(config, config_path=None):
|
|||||||
restore_ctx = dict(
|
restore_ctx = dict(
|
||||||
config=config,
|
config=config,
|
||||||
hbdclass=hbdclass,
|
hbdclass=hbdclass,
|
||||||
log=eventlog,
|
|
||||||
msg_to_websockets=msg_to_websockets,
|
msg_to_websockets=msg_to_websockets,
|
||||||
threshold_checker=threshold_checker,
|
threshold_checker=threshold_checker,
|
||||||
)
|
)
|
||||||
|
|||||||
+2
-5
@@ -315,7 +315,6 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
|||||||
|
|
||||||
cfg = ctx.get("config", {})
|
cfg = ctx.get("config", {})
|
||||||
hbdcls = ctx.get("hbdclass")
|
hbdcls = ctx.get("hbdclass")
|
||||||
log = ctx.get("log")
|
|
||||||
msg_to_websockets = ctx.get("msg_to_websockets")
|
msg_to_websockets = ctx.get("msg_to_websockets")
|
||||||
DEBUG = ctx.get("DEBUG", 0)
|
DEBUG = ctx.get("DEBUG", 0)
|
||||||
verbose = ctx.get("verbose", False)
|
verbose = ctx.get("verbose", False)
|
||||||
@@ -491,12 +490,10 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
|||||||
op, rmsg = host.cmds[0]
|
op, rmsg = host.cmds[0]
|
||||||
if op == "CMD":
|
if op == "CMD":
|
||||||
del host.cmds[0]
|
del host.cmds[0]
|
||||||
if log:
|
eventlog(uname, "INFO", "command sent")
|
||||||
log(uname, "command sent")
|
|
||||||
elif op == "UPD":
|
elif op == "UPD":
|
||||||
del host.cmds[0]
|
del host.cmds[0]
|
||||||
if log:
|
eventlog(uname, "INFO", "update initiated")
|
||||||
log(uname, "update initiated")
|
|
||||||
opkt = dicttos(op, rmsg)
|
opkt = dicttos(op, rmsg)
|
||||||
try:
|
try:
|
||||||
transport.sendto(opkt, addr)
|
transport.sendto(opkt, addr)
|
||||||
|
|||||||
+4
-1
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "hbd"
|
name = "hbd"
|
||||||
version = "5.1.4"
|
version = "5.1.7"
|
||||||
description = "Heartbeat monitoring system — client (hbc) and server (hbd)"
|
description = "Heartbeat monitoring system — client (hbc) and server (hbd)"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.11"
|
requires-python = ">=3.11"
|
||||||
@@ -54,6 +54,9 @@ dev = [
|
|||||||
hbd = "hbd.server.cli:main"
|
hbd = "hbd.server.cli:main"
|
||||||
hbc = "hbd.client.main:main"
|
hbc = "hbd.client.main:main"
|
||||||
|
|
||||||
|
[tool.setuptools]
|
||||||
|
script-files = ["scripts/hb_install.sh", "scripts/hbc_mini.py"]
|
||||||
|
|
||||||
[tool.setuptools.packages.find]
|
[tool.setuptools.packages.find]
|
||||||
where = ["."]
|
where = ["."]
|
||||||
include = ["hbd*"]
|
include = ["hbd*"]
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
# install the heartbeat client, hbc. The server is installed when the arg 'server' is passed
|
# Helper script to install the heartbeat tools. By default, it will only
|
||||||
# install the heartbeat client, hbc. The server is installed when the arg 'server' is passed
|
# install the heartbeat client, hbc. The server is installed when the arg 'server' is passed
|
||||||
# to the script. The script will install the heartbeat tools in a python
|
# to the script. The script will install the heartbeat tools in a python
|
||||||
# virtual environment in ~/venvs/hbd. The hbd and hbc commands will be
|
# virtual environment in ~/venvs/hbd. The hbd and hbc commands will be
|
||||||
@@ -9,17 +9,20 @@
|
|||||||
# reused. The script will also remove any existing symlinks for hbd and hbc
|
# reused. The script will also remove any existing symlinks for hbd and hbc
|
||||||
# in ~/bin before creating new ones.
|
# in ~/bin before creating new ones.
|
||||||
|
|
||||||
|
|
||||||
# hbd/hbc from wheel and create symlinks for hbd and hbc in ~/bin
|
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
what=$1
|
what=$1
|
||||||
on_ha=0
|
on_ha=0
|
||||||
[ -z "$what" ] && what="client"
|
[ -z "$what" ] && what="client"
|
||||||
|
|
||||||
if [ -d /homeassistant ]; then
|
if [ -d /homeassistant ]; then
|
||||||
echo "cannot install in HA, run \"docker exec -it homeassistant $0 $@\""
|
echo "cannot install in HA, running \"docker exec homeassistant $0 $@\""
|
||||||
exit 1
|
docker exec homeassistant $0 $@
|
||||||
|
rc=$?
|
||||||
|
if [ $rc -ne 0 ]; then
|
||||||
|
echo "Failed to install heartbeat in HA, please check the logs for more details"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
exit 0
|
||||||
fi
|
fi
|
||||||
if [ -d /config ]; then
|
if [ -d /config ]; then
|
||||||
echo "Installing on HA"
|
echo "Installing on HA"
|
||||||
@@ -46,8 +49,11 @@ fi
|
|||||||
echo "Installing heartbeat $what"
|
echo "Installing heartbeat $what"
|
||||||
|
|
||||||
if [ ! -d $venv/hbd ]; then
|
if [ ! -d $venv/hbd ]; then
|
||||||
|
set +e
|
||||||
python3 -m pip --version > /dev/null 2>&1
|
python3 -m pip --version > /dev/null 2>&1
|
||||||
if [ $? -ne 0 ]; then
|
rc=$?
|
||||||
|
set -e
|
||||||
|
if [ $rc -ne 0 ]; then
|
||||||
# truenas does not have pip installed by default, so we need to fetch get-pip.py and install pip
|
# truenas does not have pip installed by default, so we need to fetch get-pip.py and install pip
|
||||||
echo "pip is not installed, fetching get-pip.py and installing pip"
|
echo "pip is not installed, fetching get-pip.py and installing pip"
|
||||||
arg="--without-pip"
|
arg="--without-pip"
|
||||||
@@ -78,6 +84,9 @@ if [ "$what" = "server" ]; then
|
|||||||
else
|
else
|
||||||
rm -f $where/hbc
|
rm -f $where/hbc
|
||||||
ln -sf $(which hbc) $where/hbc
|
ln -sf $(which hbc) $where/hbc
|
||||||
|
# rm -f $where/hb_install.sh
|
||||||
|
cp "$0" $where/hb_install.sh
|
||||||
|
chmod +x $where/hb_install.sh
|
||||||
if [ $on_ha -eq 1 ]; then
|
if [ $on_ha -eq 1 ]; then
|
||||||
echo "restarting hbc "
|
echo "restarting hbc "
|
||||||
job=$(grep run_hbc configuration.yaml | sed 's/run_hbc://')
|
job=$(grep run_hbc configuration.yaml | sed 's/run_hbc://')
|
||||||
Executable
+1128
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user