refactor monitor, add threshold rtesting
This commit is contained in:
+135
-5
@@ -42,6 +42,11 @@ class Connection:
|
||||
self.statetime = self.lastbeat
|
||||
self.deltastatetime = "computed"
|
||||
self.state = Connection.UNKNOWN
|
||||
|
||||
# Timer-based reachability monitoring
|
||||
self.overdue_timer = None
|
||||
self.overdue_callback = None
|
||||
self.timeout_duration = None
|
||||
|
||||
if host:
|
||||
Connection.htab[addr] = self.host.name
|
||||
@@ -49,6 +54,27 @@ class Connection:
|
||||
log(self.host.name, "dns update %s" % self.addr)
|
||||
Host.dnsQ.put((self.host.name, self.addr))
|
||||
|
||||
def __getstate__(self):
|
||||
"""Prepare Connection for pickling by excluding non-serializable timer objects."""
|
||||
state = self.__dict__.copy()
|
||||
# Remove asyncio timer objects that can't be pickled
|
||||
# These will be recreated when the next HTB arrives after unpickling
|
||||
state['overdue_timer'] = None
|
||||
state['overdue_callback'] = None
|
||||
state['timeout_duration'] = None
|
||||
return state
|
||||
|
||||
def __setstate__(self, state):
|
||||
"""Restore Connection from pickle, reinitializing timer fields."""
|
||||
self.__dict__.update(state)
|
||||
# Ensure timer fields are initialized (they'll be recreated when HTB arrives)
|
||||
if not hasattr(self, 'overdue_timer'):
|
||||
self.overdue_timer = None
|
||||
if not hasattr(self, 'overdue_callback'):
|
||||
self.overdue_callback = None
|
||||
if not hasattr(self, 'timeout_duration'):
|
||||
self.timeout_duration = None
|
||||
|
||||
def registerDns(self):
|
||||
Host.dnsQ.put((self.host.name, self.addr))
|
||||
|
||||
@@ -123,7 +149,18 @@ class Connection:
|
||||
return d
|
||||
|
||||
def jsons(self):
|
||||
return json.dumps(self.__dict__)
|
||||
"""Serialize connection to JSON, excluding non-serializable timer objects."""
|
||||
data = {}
|
||||
for key, value in self.__dict__.items():
|
||||
# Skip timer-related fields that can't be serialized
|
||||
if key in ['overdue_timer', 'overdue_callback', 'timeout_duration']:
|
||||
continue
|
||||
# Handle host backpointer by converting to name
|
||||
if key == 'host':
|
||||
data[key] = value.name if value else None
|
||||
else:
|
||||
data[key] = value
|
||||
return json.dumps(data)
|
||||
|
||||
# set new state, return number of secs in previous state
|
||||
def newstate(self, state, now, when=0):
|
||||
@@ -151,10 +188,87 @@ class Connection:
|
||||
except Exception:
|
||||
pass
|
||||
self.addr = addr
|
||||
Connection.htab[addr] = self.host.name
|
||||
Connection.htab[addr] = self.host.nameconnection_count
|
||||
if self.host.isDynDns():
|
||||
Host.dnsQ.put((self.host.name, self.addr))
|
||||
return r
|
||||
|
||||
def reset_overdue_timer(self, timeout_seconds, callback):
|
||||
"""Reset the overdue timer for this connection.
|
||||
|
||||
Cancels any existing timer and sets a new one that will mark
|
||||
the connection as overdue if no heartbeat arrives before timeout.
|
||||
|
||||
Args:
|
||||
timeout_seconds: Seconds before marking as overdue
|
||||
callback: Async function to call when timer expires
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
# Cancel existing timer if any
|
||||
if self.overdue_timer and not self.overdue_timer.cancelled():
|
||||
self.overdue_timer.cancel()
|
||||
|
||||
# Store parameters for later reference
|
||||
self.timeout_duration = timeout_seconds
|
||||
self.overdue_callback = callback
|
||||
|
||||
# Create new timer
|
||||
async def timer_expired():
|
||||
await callback(self)
|
||||
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
self.overdue_timer = loop.call_later(timeout_seconds,
|
||||
lambda: asyncio.create_task(timer_expired()))
|
||||
except RuntimeError:
|
||||
# No event loop running yet
|
||||
pass
|
||||
|
||||
def cancel_overdue_timer(self):
|
||||
"""Cancel the overdue timer if it exists and clear all timer references."""
|
||||
if self.overdue_timer:
|
||||
try:
|
||||
if not self.overdue_timer.cancelled():
|
||||
self.overdue_timer.cancel()
|
||||
except Exception:
|
||||
pass
|
||||
# Clear all timer-related references
|
||||
self.overdue_timer = None
|
||||
self.overdue_callback = None
|
||||
self.timeout_duration = None
|
||||
|
||||
def get_avg_rtt(self):
|
||||
"""Get average RTT from recent samples."""
|
||||
valid_rtts = [r for r in self.rtts if r > 0]
|
||||
if valid_rtts:
|
||||
return sum(valid_rtts) / len(valid_rtts)
|
||||
return 0
|
||||
|
||||
def get_current_rtt(self):
|
||||
"""Get most recent RTT value."""
|
||||
return self.rtts[-1] if self.rtts else 0
|
||||
|
||||
def check_rtt_threshold(self, warning_threshold=None, critical_threshold=None):
|
||||
"""Check if RTT exceeds thresholds.
|
||||
|
||||
Args:
|
||||
warning_threshold: RTT in ms for warning level
|
||||
critical_threshold: RTT in ms for critical level
|
||||
|
||||
Returns:
|
||||
Tuple of (level, rtt_value) where level is None, 'WARNING', or 'CRITICAL'
|
||||
"""
|
||||
rtt = self.get_current_rtt()
|
||||
if rtt <= 0:
|
||||
return (None, rtt)
|
||||
|
||||
if critical_threshold and rtt > critical_threshold:
|
||||
return ('CRITICAL', rtt)
|
||||
elif warning_threshold and rtt > warning_threshold:
|
||||
return ('WARNING', rtt)
|
||||
|
||||
return (None, rtt)
|
||||
|
||||
|
||||
#
|
||||
@@ -224,14 +338,30 @@ class Host:
|
||||
def stateinfo(self):
|
||||
ddict = {}
|
||||
for d in self.__dict__:
|
||||
if d in ["alert_states", "plugin_data"]:
|
||||
continue
|
||||
if d == "connections":
|
||||
cl = []
|
||||
for c in ["IPv4", "IPv6"]:
|
||||
if c not in self.connections:
|
||||
continue
|
||||
# dirty ugly hack: fix conn to host backpointer
|
||||
cld = copy.deepcopy(self.connections[c].__dict__)
|
||||
cld["host"] = cld["host"].name
|
||||
# Create connection dict, excluding non-serializable timer objects
|
||||
conn = self.connections[c]
|
||||
cld = {}
|
||||
for key, value in conn.__dict__.items():
|
||||
# Skip timer-related fields that can't be serialized
|
||||
if key in ['overdue_timer', 'overdue_callback', 'timeout_duration']:
|
||||
continue
|
||||
# Handle host backpointer by converting to name
|
||||
if key == 'host':
|
||||
cld[key] = value.name if value else None
|
||||
else:
|
||||
# Safe copy for serializable values
|
||||
try:
|
||||
cld[key] = copy.deepcopy(value)
|
||||
except Exception:
|
||||
# If deepcopy fails, use shallow copy
|
||||
cld[key] = value
|
||||
cl.append(cld)
|
||||
ddict[d] = cl
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user