Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f640574e4f | |||
| 9a19424279 | |||
| ca8ba84e65 | |||
| f3d08d1c9e | |||
| 1e4263b793 |
+1
-1
@@ -14,4 +14,4 @@ Install options:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
__all__ = ["__version__"]
|
__all__ = ["__version__"]
|
||||||
__version__ = "5.2.0"
|
__version__ = "5.2.2"
|
||||||
|
|||||||
+2
-3
@@ -172,9 +172,8 @@ class HeartbeatProtocol(asyncio.DatagramProtocol):
|
|||||||
self.logger.error(f"Error processing datagram: {e}", exc_info=True)
|
self.logger.error(f"Error processing datagram: {e}", exc_info=True)
|
||||||
|
|
||||||
def error_received(self, exc):
|
def error_received(self, exc):
|
||||||
"""Handle protocol errors."""
|
"""Handle protocol errors — close transport so the heartbeat sender retries."""
|
||||||
self.logger.warning(f"Protocol error on {self.connection.addr}: {exc} — dropping connection")
|
self.logger.warning(f"Protocol error on {self.connection.addr}: {exc} — will retry")
|
||||||
self.connection._dead = True
|
|
||||||
self.connection.close()
|
self.connection.close()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -475,6 +475,8 @@ def run(config, config_path=None):
|
|||||||
if config.get("debug", 0) > 0:
|
if config.get("debug", 0) > 0:
|
||||||
log_level = logging.DEBUG
|
log_level = logging.DEBUG
|
||||||
logging.basicConfig(level=log_level)
|
logging.basicConfig(level=log_level)
|
||||||
|
if not config.get("debug", 0):
|
||||||
|
logging.getLogger("aiohttp.access").propagate = False
|
||||||
load_pickled_hosts(config, hbdclass)
|
load_pickled_hosts(config, hbdclass)
|
||||||
|
|
||||||
notify_mod.initlog(logfile=config.get("logfile", "messages.log"))
|
notify_mod.initlog(logfile=config.get("logfile", "messages.log"))
|
||||||
|
|||||||
@@ -439,7 +439,7 @@
|
|||||||
<span class="alert-level ${level}">${alert.level}</span>
|
<span class="alert-level ${level}">${alert.level}</span>
|
||||||
<a class="alert-hostname" href="/plugins#${alert.hostname}">${alert.hostname}</a>
|
<a class="alert-hostname" href="/plugins#${alert.hostname}">${alert.hostname}</a>
|
||||||
</div>
|
</div>
|
||||||
<div class="alert-metric">${alert.metric_path}</div>
|
<div class="alert-metric">${alert.metric_path.includes('.') ? alert.metric_path.slice(alert.metric_path.indexOf('.') + 1) : alert.metric_path}</div>
|
||||||
<div class="alert-details">
|
<div class="alert-details">
|
||||||
<span>${valueText}</span>
|
<span>${valueText}</span>
|
||||||
<span class="alert-duration">Active for ${duration}</span>
|
<span class="alert-duration">Active for ${duration}</span>
|
||||||
|
|||||||
+18
-11
@@ -1043,7 +1043,10 @@ class ThresholdChecker:
|
|||||||
|
|
||||||
# Format operator symbol
|
# Format operator symbol
|
||||||
op_symbol = threshold.operator.value
|
op_symbol = threshold.operator.value
|
||||||
|
|
||||||
|
# Short metric label: strip the plugin-name prefix for readability
|
||||||
|
short_path = metric_path.partition(".")[2] or metric_path
|
||||||
|
|
||||||
# Use a display-friendly value (inf is the sentinel for "overdue")
|
# Use a display-friendly value (inf is the sentinel for "overdue")
|
||||||
import math
|
import math
|
||||||
display_value = "overdue" if isinstance(value, float) and math.isinf(value) else value
|
display_value = "overdue" if isinstance(value, float) and math.isinf(value) else value
|
||||||
@@ -1065,25 +1068,25 @@ class ThresholdChecker:
|
|||||||
|
|
||||||
if new_level == AlertLevel.OK:
|
if new_level == AlertLevel.OK:
|
||||||
lvl = "RECOVER"
|
lvl = "RECOVER"
|
||||||
message = f"{metric_path} = {display_value} ({old_level.name} -> OK)"
|
message = f"{short_path} = {display_value} ({old_level.name} -> OK)"
|
||||||
elif new_level == AlertLevel.WARNING:
|
elif new_level == AlertLevel.WARNING:
|
||||||
lvl = "WARNING"
|
lvl = "WARNING"
|
||||||
if has_display:
|
if has_display:
|
||||||
message = f"{metric_path} = {display_value} {_fmt()}"
|
message = f"{short_path} = {display_value} {_fmt()}"
|
||||||
else:
|
else:
|
||||||
message = f"{metric_path} = {display_value}"
|
message = f"{short_path} = {display_value}"
|
||||||
elif new_level == AlertLevel.CRITICAL:
|
elif new_level == AlertLevel.CRITICAL:
|
||||||
lvl = "CRITICAL"
|
lvl = "CRITICAL"
|
||||||
if has_display:
|
if has_display:
|
||||||
message = f"{metric_path} = {display_value} {_fmt()}"
|
message = f"{short_path} = {display_value} {_fmt()}"
|
||||||
else:
|
else:
|
||||||
message = f"{metric_path} = {display_value}"
|
message = f"{short_path} = {display_value}"
|
||||||
else:
|
else:
|
||||||
lvl = "UNKNOWN"
|
lvl = "UNKNOWN"
|
||||||
if has_display:
|
if has_display:
|
||||||
message = f"{metric_path} = {display_value} {_fmt()}"
|
message = f"{short_path} = {display_value} {_fmt()}"
|
||||||
else:
|
else:
|
||||||
message = f"{metric_path} = {display_value}"
|
message = f"{short_path} = {display_value}"
|
||||||
|
|
||||||
# Formatted threshold info stored on AlertState for the UI
|
# Formatted threshold info stored on AlertState for the UI
|
||||||
formatted_threshold_msg = _fmt() if has_display and new_level != AlertLevel.OK else None
|
formatted_threshold_msg = _fmt() if has_display and new_level != AlertLevel.OK else None
|
||||||
@@ -1157,6 +1160,9 @@ class ThresholdChecker:
|
|||||||
Returns:
|
Returns:
|
||||||
Formatted display string
|
Formatted display string
|
||||||
"""
|
"""
|
||||||
|
if not display_format:
|
||||||
|
display_format = "(threshold: {op_symbol} {threshold_value})" if threshold_value is not None else ""
|
||||||
|
|
||||||
# Build format context with standard variables
|
# Build format context with standard variables
|
||||||
format_context = {
|
format_context = {
|
||||||
'value': value,
|
'value': value,
|
||||||
@@ -1338,7 +1344,8 @@ class ThresholdChecker:
|
|||||||
|
|
||||||
# Format operator symbol
|
# Format operator symbol
|
||||||
op_symbol = threshold.operator.value
|
op_symbol = threshold.operator.value
|
||||||
|
short_path = metric_path.partition(".")[2] or metric_path
|
||||||
|
|
||||||
# Time to re-notify
|
# Time to re-notify
|
||||||
if threshold_value is not None:
|
if threshold_value is not None:
|
||||||
# Use display format string
|
# Use display format string
|
||||||
@@ -1351,9 +1358,9 @@ class ThresholdChecker:
|
|||||||
check_name=check_name,
|
check_name=check_name,
|
||||||
metric_name=metric_name,
|
metric_name=metric_name,
|
||||||
)
|
)
|
||||||
message = f"REMINDER ({alert_state.level.name}): {host_name} - {metric_path} = {value} {threshold_info}, ongoing for {int(now - alert_state.since)}s"
|
message = f"REMINDER ({alert_state.level.name}): {host_name} - {short_path} = {value} {threshold_info}, ongoing for {int(now - alert_state.since)}s"
|
||||||
else:
|
else:
|
||||||
message = f"REMINDER ({alert_state.level.name}): {host_name} - {metric_path} = {value} (ongoing for {int(now - alert_state.since)}s)"
|
message = f"REMINDER ({alert_state.level.name}): {host_name} - {short_path} = {value} (ongoing for {int(now - alert_state.since)}s)"
|
||||||
|
|
||||||
from . import hbdclass
|
from . import hbdclass
|
||||||
host = hbdclass.Host.hosts.get(host_name)
|
host = hbdclass.Host.hosts.get(host_name)
|
||||||
|
|||||||
+1
-2
@@ -336,8 +336,7 @@ def handle_datagram(msg: dict, addr, transport, ctx: dict):
|
|||||||
# Apply user-access settings from config
|
# Apply user-access settings from config
|
||||||
access = config_mod.get_host_access(cfg, uname)
|
access = config_mod.get_host_access(cfg, uname)
|
||||||
host.apply_access(access["owner"], access["managers"], access["monitors"])
|
host.apply_access(access["owner"], access["managers"], access["monitors"])
|
||||||
if verbose:
|
logger.info("New host signed on: %s (dyn=%s, access=%s)", uname, host.dyn, access)
|
||||||
print(("XX: New host, num now %s" % (len(hbdcls.Host.hosts))))
|
|
||||||
newh = True
|
newh = True
|
||||||
else:
|
else:
|
||||||
host = hbdcls.Host.hosts[uname]
|
host = hbdcls.Host.hosts[uname]
|
||||||
|
|||||||
+1
-1
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "hbd"
|
name = "hbd"
|
||||||
version = "5.2.0"
|
version = "5.2.2"
|
||||||
description = "Heartbeat monitoring system — client (hbc) and server (hbd)"
|
description = "Heartbeat monitoring system — client (hbc) and server (hbd)"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.11"
|
requires-python = ">=3.11"
|
||||||
|
|||||||
+2
-3
@@ -41,7 +41,7 @@ from pathlib import Path
|
|||||||
from typing import Any, Dict, List, Optional, Tuple
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
# updated by scripts/bumpminor.sh
|
# updated by scripts/bumpminor.sh
|
||||||
__version__ = "5.2.0"
|
__version__ = "5.2.2"
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Protocol (mirrors hbd/common/proto.py)
|
# Protocol (mirrors hbd/common/proto.py)
|
||||||
@@ -797,8 +797,7 @@ class _HeartbeatProtocol(asyncio.DatagramProtocol):
|
|||||||
self._log.error("datagram error: %s", e)
|
self._log.error("datagram error: %s", e)
|
||||||
|
|
||||||
def error_received(self, exc):
|
def error_received(self, exc):
|
||||||
self._log.warning("protocol error on %s: %s — dropping connection", self._conn.addr, exc)
|
self._log.warning("protocol error on %s: %s — will retry", self._conn.addr, exc)
|
||||||
self._conn._dead = True
|
|
||||||
self._conn.close()
|
self._conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user