Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 6282077fe0 | |||
| ddd857173b | |||
| f46f725d12 | |||
| 3da6976b53 | |||
| 3a0c48e32b | |||
| cf6e19704f | |||
| b0addd7c67 | |||
| 32680d34a4 | |||
| a7abdcb5c5 | |||
| 7bab15ae52 |
@@ -127,15 +127,15 @@ class FilesystemInfoPlugin(InfoPlugin):
|
|||||||
try:
|
try:
|
||||||
# Maximum filename length
|
# Maximum filename length
|
||||||
max_name = os.pathconf(partition.mountpoint, 'PC_NAME_MAX')
|
max_name = os.pathconf(partition.mountpoint, 'PC_NAME_MAX')
|
||||||
if max_name:
|
if max_name is not None:
|
||||||
fs_info['maxfile'] = max_name
|
fs_info['maxfile'] = max_name
|
||||||
except (OSError, ValueError):
|
except (OSError, ValueError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Maximum path length
|
# Maximum path length
|
||||||
max_path = os.pathconf(partition.mountpoint, 'PC_PATH_MAX')
|
max_path = os.pathconf(partition.mountpoint, 'PC_PATH_MAX')
|
||||||
if max_path:
|
if max_path is not None:
|
||||||
fs_info['maxpath'] = max_path
|
fs_info['maxpath'] = max_path
|
||||||
except (OSError, ValueError):
|
except (OSError, ValueError):
|
||||||
pass
|
pass
|
||||||
|
|||||||
@@ -367,7 +367,7 @@ class Host:
|
|||||||
def stateinfo(self):
|
def stateinfo(self):
|
||||||
ddict = {}
|
ddict = {}
|
||||||
for d in self.__dict__:
|
for d in self.__dict__:
|
||||||
if d in ["alert_states", "plugin_data"]:
|
if d in ["alert_states", "plugin_data", "plugin_timers"]:
|
||||||
continue
|
continue
|
||||||
if d == "connections":
|
if d == "connections":
|
||||||
cl = []
|
cl = []
|
||||||
|
|||||||
+25
-9
@@ -424,7 +424,7 @@ async def start(
|
|||||||
# Resolve templates directory relative to the hbd package
|
# Resolve templates directory relative to the hbd package
|
||||||
pkg_dir = os.path.dirname(__file__)
|
pkg_dir = os.path.dirname(__file__)
|
||||||
templates_dir = config.get("templates_dir", os.path.join(pkg_dir, "templates"))
|
templates_dir = config.get("templates_dir", os.path.join(pkg_dir, "templates"))
|
||||||
env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir))
|
env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir), autoescape=True)
|
||||||
host = config.get("hb_host", "localhost")
|
host = config.get("hb_host", "localhost")
|
||||||
extra_scripts = config.get("http_extra_scripts", "")
|
extra_scripts = config.get("http_extra_scripts", "")
|
||||||
host = request.host # includes port if non-standard
|
host = request.host # includes port if non-standard
|
||||||
@@ -597,8 +597,6 @@ async def start(
|
|||||||
all_alerts = []
|
all_alerts = []
|
||||||
|
|
||||||
for hostname, host in hbdclass.Host.hosts.items():
|
for hostname, host in hbdclass.Host.hosts.items():
|
||||||
if not host.watched:
|
|
||||||
continue
|
|
||||||
if not _can_view_host(user, host):
|
if not _can_view_host(user, host):
|
||||||
continue
|
continue
|
||||||
if threshold_checker:
|
if threshold_checker:
|
||||||
@@ -692,7 +690,7 @@ async def start(
|
|||||||
current_user, _ = _require_auth_redirect(request)
|
current_user, _ = _require_auth_redirect(request)
|
||||||
pkg_dir = os.path.dirname(__file__)
|
pkg_dir = os.path.dirname(__file__)
|
||||||
templates_dir = config.get("templates_dir", os.path.join(pkg_dir, "templates"))
|
templates_dir = config.get("templates_dir", os.path.join(pkg_dir, "templates"))
|
||||||
env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir))
|
env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir), autoescape=True)
|
||||||
|
|
||||||
# Collect all hosts with plugin data (filtered by visibility)
|
# Collect all hosts with plugin data (filtered by visibility)
|
||||||
hosts_with_plugins = []
|
hosts_with_plugins = []
|
||||||
@@ -723,7 +721,7 @@ async def start(
|
|||||||
current_user, _ = _require_auth_redirect(request)
|
current_user, _ = _require_auth_redirect(request)
|
||||||
pkg_dir = os.path.dirname(__file__)
|
pkg_dir = os.path.dirname(__file__)
|
||||||
templates_dir = config.get("templates_dir", os.path.join(pkg_dir, "templates"))
|
templates_dir = config.get("templates_dir", os.path.join(pkg_dir, "templates"))
|
||||||
env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir))
|
env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir), autoescape=True)
|
||||||
|
|
||||||
tmpl = env.get_template("alerts.html")
|
tmpl = env.get_template("alerts.html")
|
||||||
body = tmpl.render(
|
body = tmpl.render(
|
||||||
@@ -780,6 +778,8 @@ async def start(
|
|||||||
token = users_mod.create_session(username)
|
token = users_mod.create_session(username)
|
||||||
eventlog("hbd", "INFO", f"Login: {username} via password")
|
eventlog("hbd", "INFO", f"Login: {username} via password")
|
||||||
redirect_to = request.rel_url.query.get("next", "/")
|
redirect_to = request.rel_url.query.get("next", "/")
|
||||||
|
if not redirect_to.startswith("/"):
|
||||||
|
redirect_to = "/"
|
||||||
resp = web.HTTPFound(redirect_to)
|
resp = web.HTTPFound(redirect_to)
|
||||||
resp.set_cookie(
|
resp.set_cookie(
|
||||||
SESSION_COOKIE,
|
SESSION_COOKIE,
|
||||||
@@ -891,6 +891,13 @@ async def start(
|
|||||||
if not target_user.avatar_is_local():
|
if not target_user.avatar_is_local():
|
||||||
return web.Response(status=404, text="No local avatar configured")
|
return web.Response(status=404, text="No local avatar configured")
|
||||||
path = target_user.avatar
|
path = target_user.avatar
|
||||||
|
avatar_dir = config.get("avatar_dir") or (
|
||||||
|
os.path.dirname(os.path.realpath(_config_path)) if _config_path else None
|
||||||
|
)
|
||||||
|
if not avatar_dir:
|
||||||
|
return web.Response(status=403, text="Local avatars not configured")
|
||||||
|
if not os.path.realpath(path).startswith(os.path.realpath(avatar_dir) + os.sep):
|
||||||
|
return web.Response(status=403, text="Forbidden")
|
||||||
if not os.path.isfile(path):
|
if not os.path.isfile(path):
|
||||||
return web.Response(status=404, text="Avatar file not found")
|
return web.Response(status=404, text="Avatar file not found")
|
||||||
# Infer content-type from extension
|
# Infer content-type from extension
|
||||||
@@ -994,7 +1001,7 @@ async def start(
|
|||||||
current_user, _ = _require_auth_redirect(request)
|
current_user, _ = _require_auth_redirect(request)
|
||||||
pkg_dir = os.path.dirname(__file__)
|
pkg_dir = os.path.dirname(__file__)
|
||||||
templates_dir = config.get("templates_dir", os.path.join(pkg_dir, "templates"))
|
templates_dir = config.get("templates_dir", os.path.join(pkg_dir, "templates"))
|
||||||
env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir))
|
env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir), autoescape=True)
|
||||||
|
|
||||||
# Build host access summary for this user.
|
# Build host access summary for this user.
|
||||||
# Merge live hosts with config-only hosts (not yet seen) so the profile
|
# Merge live hosts with config-only hosts (not yet seen) so the profile
|
||||||
@@ -1078,7 +1085,7 @@ async def start(
|
|||||||
current_user, _ = _require_auth_redirect(request)
|
current_user, _ = _require_auth_redirect(request)
|
||||||
pkg_dir = os.path.dirname(__file__)
|
pkg_dir = os.path.dirname(__file__)
|
||||||
templates_dir = config.get("templates_dir", os.path.join(pkg_dir, "templates"))
|
templates_dir = config.get("templates_dir", os.path.join(pkg_dir, "templates"))
|
||||||
env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir))
|
env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir), autoescape=True)
|
||||||
from hbd import __version__ as hbd_version
|
from hbd import __version__ as hbd_version
|
||||||
|
|
||||||
uptime_secs = int(time.time() - _start_epoch)
|
uptime_secs = int(time.time() - _start_epoch)
|
||||||
@@ -1122,7 +1129,7 @@ async def start(
|
|||||||
raise web.HTTPForbidden(reason="Admin access required")
|
raise web.HTTPForbidden(reason="Admin access required")
|
||||||
pkg_dir = os.path.dirname(__file__)
|
pkg_dir = os.path.dirname(__file__)
|
||||||
templates_dir = config.get("templates_dir", os.path.join(pkg_dir, "templates"))
|
templates_dir = config.get("templates_dir", os.path.join(pkg_dir, "templates"))
|
||||||
env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir))
|
env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir), autoescape=True)
|
||||||
tmpl = env.get_template("settings.html")
|
tmpl = env.get_template("settings.html")
|
||||||
settings_data = settings_mod.get_settings_data(config, threshold_checker=threshold_checker)
|
settings_data = settings_mod.get_settings_data(config, threshold_checker=threshold_checker)
|
||||||
body = tmpl.render(
|
body = tmpl.render(
|
||||||
@@ -1661,7 +1668,16 @@ async def start(
|
|||||||
if "full_name" in body:
|
if "full_name" in body:
|
||||||
user_entry["full_name"] = str(body["full_name"])
|
user_entry["full_name"] = str(body["full_name"])
|
||||||
if "avatar" in body:
|
if "avatar" in body:
|
||||||
user_entry["avatar"] = str(body["avatar"])
|
avatar_val = str(body["avatar"])
|
||||||
|
if avatar_val.startswith("/"):
|
||||||
|
avatar_dir = config.get("avatar_dir") or (
|
||||||
|
os.path.dirname(os.path.realpath(_config_path)) if _config_path else None
|
||||||
|
)
|
||||||
|
if not avatar_dir:
|
||||||
|
return web.json_response({"error": "Local avatars not configured"}, status=400)
|
||||||
|
if not os.path.realpath(avatar_val).startswith(os.path.realpath(avatar_dir) + os.sep):
|
||||||
|
return web.json_response({"error": "Avatar path outside allowed directory"}, status=400)
|
||||||
|
user_entry["avatar"] = avatar_val
|
||||||
if "notification_channels" in body:
|
if "notification_channels" in body:
|
||||||
visible = _visible_channels_for_user(user)
|
visible = _visible_channels_for_user(user)
|
||||||
user_entry["notification_channels"] = [
|
user_entry["notification_channels"] = [
|
||||||
|
|||||||
@@ -321,9 +321,15 @@
|
|||||||
var c = 0;
|
var c = 0;
|
||||||
var HBD_VERSION = "{{ hbd_version }}";
|
var HBD_VERSION = "{{ hbd_version }}";
|
||||||
|
|
||||||
|
function escHtml(s) {
|
||||||
|
var d = document.createElement('div');
|
||||||
|
d.textContent = String(s);
|
||||||
|
return d.innerHTML;
|
||||||
|
}
|
||||||
|
|
||||||
function hostNameHtml(data) {
|
function hostNameHtml(data) {
|
||||||
var rawName = data.raw_name || data.name.replace(/<[^>]+>/g, '').replace('*', '').trim();
|
var rawName = data.raw_name || data.name.replace(/<[^>]+>/g, '').replace('*', '').trim();
|
||||||
var nameHtml = data.name;
|
var nameHtml = escHtml(data.name);
|
||||||
if (!data.hbc_version || data.hbc_version !== HBD_VERSION) {
|
if (!data.hbc_version || data.hbc_version !== HBD_VERSION) {
|
||||||
nameHtml += ' 🥀';
|
nameHtml += ' 🥀';
|
||||||
}
|
}
|
||||||
@@ -410,11 +416,11 @@
|
|||||||
c_critical.innerHTML = "";
|
c_critical.innerHTML = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
c_ipv4addr.innerHTML = data.connections[0].addr;
|
c_ipv4addr.innerHTML = escHtml(data.connections[0].addr);
|
||||||
c_ipv4state.innerHTML = data.connections[0].state;
|
c_ipv4state.innerHTML = escHtml(data.connections[0].state);
|
||||||
if (data.connections.length > 1) {
|
if (data.connections.length > 1) {
|
||||||
c_ipv6addr.innerHTML = data.connections[1].addr;
|
c_ipv6addr.innerHTML = escHtml(data.connections[1].addr);
|
||||||
c_ipv6state.innerHTML = data.connections[1].state;
|
c_ipv6state.innerHTML = escHtml(data.connections[1].state);
|
||||||
}
|
}
|
||||||
var table = document.getElementById("ntablebody"); // find table to append to
|
var table = document.getElementById("ntablebody"); // find table to append to
|
||||||
table.appendChild(row); // append row to table
|
table.appendChild(row); // append row to table
|
||||||
@@ -477,7 +483,7 @@
|
|||||||
|
|
||||||
for (var i = 0; i < data.connections.length; i++) {
|
for (var i = 0; i < data.connections.length; i++) {
|
||||||
// Offset by 2 for the warning/critical count columns
|
// Offset by 2 for the warning/critical count columns
|
||||||
name_idx[data.name].cells[3 + i * 4].innerHTML = data.connections[i].addr;
|
name_idx[data.name].cells[3 + i * 4].innerHTML = escHtml(data.connections[i].addr);
|
||||||
name_idx[data.name].cells[6 + i * 4].innerHTML = formatTS(
|
name_idx[data.name].cells[6 + i * 4].innerHTML = formatTS(
|
||||||
data.connections[i].statetime
|
data.connections[i].statetime
|
||||||
);
|
);
|
||||||
@@ -497,7 +503,7 @@
|
|||||||
state = '<span class="state-overdue">overdue</span>';
|
state = '<span class="state-overdue">overdue</span>';
|
||||||
latency = "-";
|
latency = "-";
|
||||||
} else {
|
} else {
|
||||||
state = "<b>" + data.connections[i].state + "</b>";
|
state = "<b>" + escHtml(data.connections[i].state) + "</b>";
|
||||||
latency = "-";
|
latency = "-";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -558,12 +564,12 @@
|
|||||||
+ ' ' + _p(_d.getHours()) + ':' + _p(_d.getMinutes()) + ':' + _p(_d.getSeconds());
|
+ ' ' + _p(_d.getHours()) + ':' + _p(_d.getMinutes()) + ':' + _p(_d.getSeconds());
|
||||||
var lvl = (msg.level || "INFO").toLowerCase();
|
var lvl = (msg.level || "INFO").toLowerCase();
|
||||||
var hostVal = msg.host || '';
|
var hostVal = msg.host || '';
|
||||||
var html = '<div class="log-entry log-' + lvl + '" data-level="' + lvl + '" data-host="' + hostVal.replace(/"/g, '"') + '">';
|
var html = '<div class="log-entry log-' + escHtml(lvl) + '" data-level="' + escHtml(lvl) + '" data-host="' + escHtml(hostVal) + '">';
|
||||||
html += '<span class="log-ts">' + ts_str + '</span>';
|
html += '<span class="log-ts">' + ts_str + '</span>';
|
||||||
html += '<span class="log-level">' + (msg.level || "") + '</span>';
|
html += '<span class="log-level">' + escHtml(msg.level || "") + '</span>';
|
||||||
if (msg.host) html += '<span class="log-host">' + msg.host + '</span>';
|
if (msg.host) html += '<span class="log-host">' + escHtml(msg.host) + '</span>';
|
||||||
if (msg.service) html += '<span class="log-service">' + msg.service + '</span>';
|
if (msg.service) html += '<span class="log-service">' + escHtml(msg.service) + '</span>';
|
||||||
html += '<span class="log-msg">' + msg.message + '</span>';
|
html += '<span class="log-msg">' + escHtml(msg.message) + '</span>';
|
||||||
html += '</div>';
|
html += '</div>';
|
||||||
msgs.insertAdjacentHTML(state.history ? "beforeend" : "afterbegin", html);
|
msgs.insertAdjacentHTML(state.history ? "beforeend" : "afterbegin", html);
|
||||||
applyLogFilters();
|
applyLogFilters();
|
||||||
@@ -621,7 +627,7 @@
|
|||||||
<tbody id="ntablebody">
|
<tbody id="ntablebody">
|
||||||
{% for host in hosts %}
|
{% for host in hosts %}
|
||||||
<tr class="{% if host.alert_critical_unacked > 0 or host.alert_critical_acked > 0 %}row-critical{% elif host.alert_warning_unacked > 0 or host.alert_warning_acked > 0 %}row-warning{% endif %}">
|
<tr class="{% if host.alert_critical_unacked > 0 or host.alert_critical_acked > 0 %}row-critical{% elif host.alert_warning_unacked > 0 or host.alert_warning_acked > 0 %}row-warning{% endif %}">
|
||||||
<td data-name="{{ host.name }}"><a class="host-link" href="/plugins#{{ host.raw_name | urlencode }}">{{ host.name }}{% if not host.hbc_version or host.hbc_version != hbd_version %} 🥀{% endif %}</a></td>
|
<td data-name="{{ host.name }}"><a class="host-link" href="/plugins#{{ host.name | urlencode }}">{{ host.name }}{% if not host.hbc_version or host.hbc_version != hbd_version %} 🥀{% endif %}</a></td>
|
||||||
<td style="text-align: center; color: #ff9800; font-weight: bold;">
|
<td style="text-align: center; color: #ff9800; font-weight: bold;">
|
||||||
{%- set warning_unacked = host.alert_warning_unacked -%}
|
{%- set warning_unacked = host.alert_warning_unacked -%}
|
||||||
{%- set warning_acked = host.alert_warning_acked -%}
|
{%- set warning_acked = host.alert_warning_acked -%}
|
||||||
|
|||||||
@@ -1554,6 +1554,10 @@ class ThresholdChecker:
|
|||||||
configured = self.get_thresholds_for_host(hostname)
|
configured = self.get_thresholds_for_host(hostname)
|
||||||
stale = []
|
stale = []
|
||||||
for mp in host.alert_states:
|
for mp in host.alert_states:
|
||||||
|
# connectivity.* and rtt are managed by the connection state
|
||||||
|
# machine, not by threshold config — never purge them.
|
||||||
|
if mp == "rtt" or mp.startswith("connectivity"):
|
||||||
|
continue
|
||||||
if self._find_threshold(configured, mp)[0] is not None:
|
if self._find_threshold(configured, mp)[0] is not None:
|
||||||
continue
|
continue
|
||||||
# Also match wildcard pool/partition thresholds (e.g. "zfs_monitor.*.status"
|
# Also match wildcard pool/partition thresholds (e.g. "zfs_monitor.*.status"
|
||||||
|
|||||||
+46
-6
@@ -266,10 +266,15 @@ def restore_connection_timers(hbdclass, ctx):
|
|||||||
for afam, conn in list(host.connections.items()):
|
for afam, conn in list(host.connections.items()):
|
||||||
state = conn.getstate()
|
state = conn.getstate()
|
||||||
if state == hbdclass.Connection.DOWN:
|
if state == hbdclass.Connection.DOWN:
|
||||||
|
_set_connectivity_alert(host, afam, "CRITICAL")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
on_overdue, on_unknown = _make_timer_callbacks(uname, host, ctx)
|
on_overdue, on_unknown = _make_timer_callbacks(uname, host, ctx)
|
||||||
|
|
||||||
|
if state == hbdclass.Connection.UNKNOWN:
|
||||||
|
_set_connectivity_alert(host, afam, "CRITICAL")
|
||||||
|
continue
|
||||||
|
|
||||||
if state == hbdclass.Connection.UP and interval > 0:
|
if state == hbdclass.Connection.UP and interval > 0:
|
||||||
elapsed = now - conn.lastbeat
|
elapsed = now - conn.lastbeat
|
||||||
# Give hosts one full (interval + grace) of extra time on startup
|
# Give hosts one full (interval + grace) of extra time on startup
|
||||||
@@ -300,6 +305,10 @@ def restore_connection_timers(hbdclass, ctx):
|
|||||||
"Restored OVERDUE timer %s/%s: %.0fs remaining",
|
"Restored OVERDUE timer %s/%s: %.0fs remaining",
|
||||||
uname, afam, remaining,
|
uname, afam, remaining,
|
||||||
)
|
)
|
||||||
|
# Ensure the connectivity alert is set — it may be missing if
|
||||||
|
# hbd was shut down before the on_overdue callback had a chance
|
||||||
|
# to record it.
|
||||||
|
_set_connectivity_alert(host, afam, "CRITICAL")
|
||||||
restored += 1
|
restored += 1
|
||||||
|
|
||||||
logger.info("Restored timers for %d connection(s)", restored)
|
logger.info("Restored timers for %d connection(s)", restored)
|
||||||
@@ -389,16 +398,33 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
|||||||
if k not in ("ID", "plugin", "id", "name")}
|
if k not in ("ID", "plugin", "id", "name")}
|
||||||
# Store plugin data with timestamp
|
# Store plugin data with timestamp
|
||||||
host.add_plugin_data(plugin_name, plugin_data, timestamp=now)
|
host.add_plugin_data(plugin_name, plugin_data, timestamp=now)
|
||||||
# Reset stale timer — 3× the heartbeat interval (min 60 s)
|
# Reset stale timer using the observed send interval for this plugin.
|
||||||
stale_timeout = max(host.interval * 3, 60)
|
# We need two samples to know the real interval; on the first sample
|
||||||
host.reset_plugin_timer(plugin_name, stale_timeout,
|
# we cancel any leftover timer but don't set a new one, to avoid
|
||||||
_make_plugin_stale_callback(uname, ctx))
|
# false-stale firing for slow plugins (e.g. nagios_runner at 300 s).
|
||||||
|
history = host.plugin_data.get(plugin_name, [])
|
||||||
|
if len(history) >= 2:
|
||||||
|
plugin_interval = max(history[-1][0] - history[-2][0], 1)
|
||||||
|
host.reset_plugin_timer(plugin_name, plugin_interval * 3,
|
||||||
|
_make_plugin_stale_callback(uname, ctx))
|
||||||
|
# Remove alert states for metrics present in the previous sample
|
||||||
|
# but absent now (e.g. a nagios check removed from configuration).
|
||||||
|
prev_keys = set(history[-2][1].keys())
|
||||||
|
curr_keys = set(plugin_data.keys())
|
||||||
|
for metric_name in prev_keys - curr_keys:
|
||||||
|
metric_path = f"{plugin_name}.{metric_name}"
|
||||||
|
if host.alert_states.pop(metric_path, None) is not None:
|
||||||
|
eventlog(uname, "INFO", f"stale check removed: {metric_path}")
|
||||||
|
if (prev_keys - curr_keys) and msg_to_websockets:
|
||||||
|
msg_to_websockets("host", host.stateinfo())
|
||||||
|
else:
|
||||||
|
host.cancel_plugin_timer(plugin_name)
|
||||||
|
|
||||||
# If os_info reports an owner and none is configured server-side, apply it
|
# If os_info reports an owner and none is configured server-side, apply it
|
||||||
if plugin_name == "os_info":
|
if plugin_name == "os_info":
|
||||||
config_owner = config_mod.get_host_access(cfg, uname).get("owner")
|
config_owner = config_mod.get_host_access(cfg, uname).get("owner")
|
||||||
default_owner = config_mod.get_default_owner(cfg)
|
default_owner = config_mod.get_default_owner(cfg)
|
||||||
inferred_owner = plugin_data.get("owner", config_owner or default_owner)
|
inferred_owner = config_owner or plugin_data.get("owner") or default_owner
|
||||||
host.owner = inferred_owner
|
host.owner = inferred_owner
|
||||||
logger.info(f"owner for {uname} is {host.owner}")
|
logger.info(f"owner for {uname} is {host.owner}")
|
||||||
if DEBUG > 1:
|
if DEBUG > 1:
|
||||||
@@ -453,6 +479,7 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
|||||||
boot = msg.get("boot", 0)
|
boot = msg.get("boot", 0)
|
||||||
|
|
||||||
if boot:
|
if boot:
|
||||||
|
# hbc was stared with a -b flag
|
||||||
eventlog(uname, "INFO", "booted")
|
eventlog(uname, "INFO", "booted")
|
||||||
if host.watched:
|
if host.watched:
|
||||||
asyncio.create_task(notify_mod.send_notification(
|
asyncio.create_task(notify_mod.send_notification(
|
||||||
@@ -460,11 +487,24 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
|||||||
notify_mod.Notification(title=f"[INFO] {uname}", body=f"{host.name} booted", level="INFO"),
|
notify_mod.Notification(title=f"[INFO] {uname}", body=f"{host.name} booted", level="INFO"),
|
||||||
))
|
))
|
||||||
if message:
|
if message:
|
||||||
eventlog(uname, "INFO", "msg: %s" % message, service=service)
|
eventlog(uname, "INFO", message, service=service)
|
||||||
|
|
||||||
if conn.getstate() != hbdcls.Connection.UP:
|
if conn.getstate() != hbdcls.Connection.UP:
|
||||||
|
# Transition to UP and log/notify if appropriate
|
||||||
lasts = conn.state
|
lasts = conn.state
|
||||||
d = conn.newstate(hbdcls.Connection.UP, now)
|
d = conn.newstate(hbdcls.Connection.UP, now)
|
||||||
|
# On reboot, pre-boot plugin data and derived alerts are stale.
|
||||||
|
# Cancel all plugin timers and wipe plugin state so timers restart
|
||||||
|
# cleanly from the first two post-boot samples.
|
||||||
|
for pname in list(host.plugin_timers):
|
||||||
|
host.cancel_plugin_timer(pname)
|
||||||
|
host.plugin_data.clear()
|
||||||
|
stale_plugin_keys = [
|
||||||
|
k for k in host.alert_states
|
||||||
|
if k not in ("rtt",) and not k.startswith("connectivity.")
|
||||||
|
]
|
||||||
|
for k in stale_plugin_keys:
|
||||||
|
del host.alert_states[k]
|
||||||
# Clear connectivity alert now that the host is back up
|
# Clear connectivity alert now that the host is back up
|
||||||
_set_connectivity_alert(host, conn.afam, "OK")
|
_set_connectivity_alert(host, conn.afam, "OK")
|
||||||
# Don't log/notify RECOVER for a brand-new host seen for the first time —
|
# Don't log/notify RECOVER for a brand-new host seen for the first time —
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,49 @@
|
|||||||
|
# PyInstaller spec for hbc_windows.exe
|
||||||
|
# Build with: pyinstaller hbc_windows.spec
|
||||||
|
#
|
||||||
|
# Requirements (on Windows):
|
||||||
|
# pip install pyinstaller
|
||||||
|
|
||||||
|
block_cipher = None
|
||||||
|
|
||||||
|
a = Analysis(
|
||||||
|
['hbc_windows.py'],
|
||||||
|
pathex=[],
|
||||||
|
binaries=[],
|
||||||
|
datas=[],
|
||||||
|
hiddenimports=[],
|
||||||
|
hookspath=[],
|
||||||
|
hooksconfig={},
|
||||||
|
runtime_hooks=[],
|
||||||
|
excludes=['tkinter', 'unittest', 'email', 'html', 'http', 'urllib', 'xml'],
|
||||||
|
win_no_prefer_redirects=False,
|
||||||
|
win_private_assemblies=False,
|
||||||
|
cipher=block_cipher,
|
||||||
|
noarchive=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
pyz = PYZ(a.pure, a.zlib_archive, cipher=block_cipher)
|
||||||
|
|
||||||
|
exe = EXE(
|
||||||
|
pyz,
|
||||||
|
a.scripts,
|
||||||
|
a.binaries,
|
||||||
|
a.zipfiles,
|
||||||
|
a.datas,
|
||||||
|
[],
|
||||||
|
name='hbc_windows',
|
||||||
|
debug=False,
|
||||||
|
bootloader_ignore_signals=False,
|
||||||
|
strip=False,
|
||||||
|
upx=False,
|
||||||
|
upx_exclude=[],
|
||||||
|
runtime_tmpdir=None,
|
||||||
|
console=True,
|
||||||
|
disable_windowed_traceback=False,
|
||||||
|
argv_emulation=False,
|
||||||
|
target_arch=None,
|
||||||
|
codesign_identity=None,
|
||||||
|
entitlements_file=None,
|
||||||
|
icon=None,
|
||||||
|
version=None,
|
||||||
|
)
|
||||||
@@ -0,0 +1,126 @@
|
|||||||
|
#Requires -RunAsAdministrator
|
||||||
|
<#
|
||||||
|
.SYNOPSIS
|
||||||
|
Install hbc_windows.exe as a Windows Service using NSSM.
|
||||||
|
|
||||||
|
.DESCRIPTION
|
||||||
|
Installs the HeartBeat Client as a Windows Service that starts automatically.
|
||||||
|
Requires NSSM (Non-Sucking Service Manager) in PATH or alongside this script.
|
||||||
|
Requires hbc_windows.exe built via: pyinstaller hbc_windows.spec
|
||||||
|
|
||||||
|
.PARAMETER Server
|
||||||
|
HBD server hostname or IP address (required).
|
||||||
|
|
||||||
|
.PARAMETER ExePath
|
||||||
|
Path to hbc_windows.exe. Defaults to the directory containing this script.
|
||||||
|
|
||||||
|
.PARAMETER ServiceName
|
||||||
|
Windows service name. Default: heartbeat-client
|
||||||
|
|
||||||
|
.PARAMETER ConfigFile
|
||||||
|
Path to hbc.json config file. Optional.
|
||||||
|
|
||||||
|
.PARAMETER LogFile
|
||||||
|
Path to log file. Default: C:\ProgramData\heartbeat\hbc.log
|
||||||
|
|
||||||
|
.PARAMETER Interval
|
||||||
|
Heartbeat interval in seconds. Default: 10
|
||||||
|
|
||||||
|
.EXAMPLE
|
||||||
|
.\install_hbc_windows.ps1 -Server hbd.example.com
|
||||||
|
.\install_hbc_windows.ps1 -Server hbd.example.com -ConfigFile C:\ProgramData\heartbeat\hbc.json
|
||||||
|
#>
|
||||||
|
|
||||||
|
param(
|
||||||
|
[Parameter(Mandatory = $true)]
|
||||||
|
[string]$Server,
|
||||||
|
|
||||||
|
[string]$ExePath = "",
|
||||||
|
[string]$ServiceName = "heartbeat-client",
|
||||||
|
[string]$ConfigFile = "",
|
||||||
|
[string]$LogFile = "C:\ProgramData\heartbeat\hbc.log",
|
||||||
|
[int]$Interval = 10
|
||||||
|
)
|
||||||
|
|
||||||
|
Set-StrictMode -Version Latest
|
||||||
|
$ErrorActionPreference = "Stop"
|
||||||
|
|
||||||
|
# Locate hbc_windows.exe
|
||||||
|
if ($ExePath -eq "") {
|
||||||
|
$ExePath = Join-Path $PSScriptRoot "hbc_windows.exe"
|
||||||
|
}
|
||||||
|
if (-not (Test-Path $ExePath)) {
|
||||||
|
Write-Error "hbc_windows.exe not found at: $ExePath`nBuild it first with: pyinstaller hbc_windows.spec"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Locate NSSM
|
||||||
|
$nssm = Get-Command nssm -ErrorAction SilentlyContinue
|
||||||
|
if (-not $nssm) {
|
||||||
|
$nssmLocal = Join-Path $PSScriptRoot "nssm.exe"
|
||||||
|
if (Test-Path $nssmLocal) {
|
||||||
|
$nssm = $nssmLocal
|
||||||
|
} else {
|
||||||
|
Write-Error "nssm.exe not found in PATH or alongside this script.`nDownload from https://nssm.cc/download"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$nssm = $nssm.Source
|
||||||
|
}
|
||||||
|
|
||||||
|
# Build argument list
|
||||||
|
$args_list = "--daemon $Server"
|
||||||
|
if ($ConfigFile -ne "") {
|
||||||
|
$args_list = "--daemon -c `"$ConfigFile`" $Server"
|
||||||
|
}
|
||||||
|
if ($LogFile -ne "") {
|
||||||
|
$args_list = "$args_list --log-file `"$LogFile`""
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create data directory
|
||||||
|
$dataDir = "C:\ProgramData\heartbeat"
|
||||||
|
if (-not (Test-Path $dataDir)) {
|
||||||
|
New-Item -ItemType Directory -Path $dataDir | Out-Null
|
||||||
|
Write-Host "Created $dataDir"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Remove existing service if present
|
||||||
|
$existing = Get-Service -Name $ServiceName -ErrorAction SilentlyContinue
|
||||||
|
if ($existing) {
|
||||||
|
Write-Host "Removing existing service '$ServiceName'..."
|
||||||
|
& $nssm stop $ServiceName 2>$null
|
||||||
|
& $nssm remove $ServiceName confirm
|
||||||
|
}
|
||||||
|
|
||||||
|
# Install service
|
||||||
|
Write-Host "Installing service '$ServiceName'..."
|
||||||
|
& $nssm install $ServiceName $ExePath $args_list
|
||||||
|
if ($LASTEXITCODE -ne 0) {
|
||||||
|
Write-Error "nssm install failed (exit $LASTEXITCODE)"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Configure service
|
||||||
|
& $nssm set $ServiceName DisplayName "HeartBeat Client"
|
||||||
|
& $nssm set $ServiceName Description "Sends heartbeat and plugin metrics to the HBD monitoring server."
|
||||||
|
& $nssm set $ServiceName Start SERVICE_AUTO_START
|
||||||
|
& $nssm set $ServiceName AppStdout (Join-Path $dataDir "nssm_stdout.log")
|
||||||
|
& $nssm set $ServiceName AppStderr (Join-Path $dataDir "nssm_stderr.log")
|
||||||
|
& $nssm set $ServiceName AppRotateFiles 1
|
||||||
|
& $nssm set $ServiceName AppRotateBytes 5242880
|
||||||
|
|
||||||
|
# Start service
|
||||||
|
Write-Host "Starting service '$ServiceName'..."
|
||||||
|
& $nssm start $ServiceName
|
||||||
|
if ($LASTEXITCODE -ne 0) {
|
||||||
|
Write-Warning "Service installed but failed to start — check logs in $dataDir"
|
||||||
|
} else {
|
||||||
|
Write-Host "Service '$ServiceName' started successfully."
|
||||||
|
Write-Host "Log file: $LogFile"
|
||||||
|
Write-Host ""
|
||||||
|
Write-Host "Useful commands:"
|
||||||
|
Write-Host " nssm status $ServiceName"
|
||||||
|
Write-Host " nssm stop $ServiceName"
|
||||||
|
Write-Host " nssm restart $ServiceName"
|
||||||
|
Write-Host " nssm remove $ServiceName confirm"
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user