major rework for ver 3.0
This commit is contained in:
@@ -22,6 +22,8 @@ import urllib
|
||||
import httplib
|
||||
import threading
|
||||
import Queue
|
||||
import md5
|
||||
import zlib
|
||||
|
||||
from subprocess import Popen, STDOUT, PIPE
|
||||
|
||||
@@ -31,6 +33,7 @@ SEND_PUSHOVER=True
|
||||
|
||||
DEBUG = 0
|
||||
|
||||
MAXRECV = 32767
|
||||
LOGFILE = "/home/andreas/public_html/messages/andreas"
|
||||
PICKFILE = "/var/tmp/hbd.pick"
|
||||
AEMAIL = ["andreas@wrede.ca"]
|
||||
@@ -92,6 +95,50 @@ class LogDevice:
|
||||
self.fh.flush()
|
||||
|
||||
|
||||
def dicttos(ID, d, compress=False):
|
||||
s = []
|
||||
for k in d:
|
||||
if type(d[k]) == type(1.2):
|
||||
s.append("%s=%0.5f" % (k, d[k]))
|
||||
else:
|
||||
s.append("%s=%s" % (k, d[k]))
|
||||
pk = ";".join(s)
|
||||
if compress:
|
||||
zpk = zlib.compress(pk, 6)
|
||||
ID = "!"+ID
|
||||
else:
|
||||
zpk = pk
|
||||
return ID + ":" + zpk
|
||||
|
||||
|
||||
def stodict(msg):
|
||||
d = {}
|
||||
r0 = msg.split(':',1)
|
||||
if len(r0) == 1:
|
||||
return None
|
||||
if r0[0][0] == '!': # compressed
|
||||
pk = zlib.uncompress(r0[1])
|
||||
d['ID'] = r0[0][1:]
|
||||
else:
|
||||
pk = r0[1]
|
||||
d['ID'] = r0[0]
|
||||
r = pk.split(';')
|
||||
for v in r:
|
||||
vr = v.split('=', 1)
|
||||
k = vr[0].strip()
|
||||
if len(vr) == 1:
|
||||
d[k] = None
|
||||
else:
|
||||
v = vr[1].strip()
|
||||
if v[0].isdigit():
|
||||
v = eval(v)
|
||||
d[k] = v
|
||||
return d
|
||||
|
||||
|
||||
def oldmtodict(msg):
|
||||
return stodict('HTB:'+msg)
|
||||
|
||||
class Host:
|
||||
up = "up"
|
||||
down = "down"
|
||||
@@ -112,11 +159,21 @@ class Host:
|
||||
self.interval = 0
|
||||
self.doesack = -1
|
||||
self.cmds = []
|
||||
self.cver = 0
|
||||
self.latency = 0
|
||||
self.hdwcounts = [[0,0],[0,0],[0,0]]
|
||||
num += 1
|
||||
hosts[name] = self
|
||||
|
||||
|
||||
def setcver(self, cver):
|
||||
self.cver = cver
|
||||
|
||||
|
||||
def isDynDns(self):
|
||||
return self.name in dyndnshosts
|
||||
|
||||
|
||||
def newaddr(self, addr):
|
||||
if isIPv4(addr):
|
||||
if self.addr4:
|
||||
@@ -127,10 +184,14 @@ class Host:
|
||||
del htab[self.addr4]
|
||||
self.addr4 = addr
|
||||
htab[addr] = self.name
|
||||
if self.isDynDns():
|
||||
dnsQ.put((self.name, self.addr4))
|
||||
else:
|
||||
r = "new addr %s" % (addr)
|
||||
self.addr4 = addr
|
||||
htab[addr] = self.name
|
||||
if self.isDynDns():
|
||||
dnsQ.put((self.name, self.addr4))
|
||||
else:
|
||||
if self.addr6:
|
||||
if self.addr6 == addr:
|
||||
@@ -140,10 +201,14 @@ class Host:
|
||||
del htab[self.addr6]
|
||||
self.addr6 = addr
|
||||
htab[addr] = self.name
|
||||
if self.isDynDns():
|
||||
dnsQ.put((self.name, self.addr6))
|
||||
else:
|
||||
r = "new addr %s" % (addr)
|
||||
self.addr6 = addr
|
||||
htab[addr] = self.name
|
||||
if self.isDynDns():
|
||||
dnsQ.put((self.name, self.addr6))
|
||||
return r
|
||||
|
||||
|
||||
@@ -182,6 +247,12 @@ class Host:
|
||||
self.addr6 = self.addr
|
||||
del self.addr
|
||||
|
||||
try:
|
||||
a=self.latency
|
||||
except:
|
||||
self.latency = 0
|
||||
|
||||
|
||||
|
||||
def getstate(self):
|
||||
return self.state
|
||||
@@ -190,6 +261,8 @@ class Host:
|
||||
def dispstate(self):
|
||||
if self.state in ["down", "overdue"]:
|
||||
state = "<b>%s</b>" % self.state
|
||||
elif self.state in ["up", "UP"]:
|
||||
state = "%0.1f" % self.latency
|
||||
else:
|
||||
state = "%s" % self.state
|
||||
return state
|
||||
@@ -224,21 +297,22 @@ class Host:
|
||||
return s
|
||||
|
||||
|
||||
|
||||
def email(s, msg):
|
||||
if not SEND_EMAIL:
|
||||
return
|
||||
ret = "OK"
|
||||
toaddrs = AEMAIL
|
||||
fromaddr = "aew.heartbeat@wrede.ca"
|
||||
fromemail = "aew.heartbeat@wrede.ca"
|
||||
subj = "Info from %s: %s" % (NAME, s)
|
||||
date = time.strftime("%a, %d %b %Y %H:%M:%S %z", time.localtime())
|
||||
body = "To: %s\nFrom: %s\nSubject: %s\nDate: %s\n\n%s" % (toaddrs[0], fromaddr, subj, date, msg)
|
||||
body = "To: %s\nFrom: %s\nSubject: %s\nDate: %s\n\n%s" % (toaddrs[0], fromemail, subj, date, msg)
|
||||
try:
|
||||
server = smtplib.SMTP(SMTPSERVER)
|
||||
if DEBUG > 0: server.set_debuglevel(1)
|
||||
server.sendmail(fromaddr, toaddrs, body)
|
||||
server.sendmail(fromemail, toaddrs, body)
|
||||
except smtplib.SMTPRecipientsRefused, errs:
|
||||
log("cannot send email: %s\n" % (errs))
|
||||
log(None, "cannot send email: %s\n" % (errs))
|
||||
ret = "Fail"
|
||||
except:
|
||||
print("smtp error: "+traceback.format_exc())
|
||||
@@ -293,9 +367,9 @@ answer
|
||||
|
||||
""" % D
|
||||
|
||||
if DEBUG > 0: log("DBG: nsup %s" % nsup)
|
||||
if DEBUG > 0: log(None, "DBG: nsup %s" % nsup)
|
||||
cmd = ["/usr/local/bin/nsupdate", "-k", "/etc/dhcpc/K%(domain)s.+157+00000." % D, "-v"]
|
||||
if DEBUG > 0: log("DBG: cmd %s" % cmd)
|
||||
if DEBUG > 0: log(None, "DBG: cmd %s" % cmd)
|
||||
try:
|
||||
p = Popen(cmd, shell=False, bufsize=1, stdin=PIPE, stdout=PIPE, stderr=STDOUT)
|
||||
except OSError, e:
|
||||
@@ -357,17 +431,26 @@ def checkoverdue():
|
||||
continue
|
||||
timeout = hosts[h].interval+grace
|
||||
if hosts[h].state == Host.up and now-hosts[h].lastbeat > timeout:
|
||||
m = "%s is overdue" % h
|
||||
if h in watchhosts:
|
||||
email("overdue", m)
|
||||
pushover(m)
|
||||
email("overdue", "%s is overdue" % h)
|
||||
pushover("%s is overdue" % h)
|
||||
hosts[h].newstate(Host.overdue, grace)
|
||||
log(m)
|
||||
log(h, "overdue")
|
||||
|
||||
|
||||
def log(m, service="heartbeat"):
|
||||
def log(host, m, service=None):
|
||||
if DEBUG > 0: print "Log: %s" % m
|
||||
msg = time.strftime("%b %d %H:%M:%S", time.localtime(time.time()))+": "+m+"\n"
|
||||
ts = time.strftime("%b %d %H:%M:%S", time.localtime(time.time()))
|
||||
if service:
|
||||
srv = "service %s: " % service
|
||||
else:
|
||||
srv = ""
|
||||
if host:
|
||||
hst = "%s " % host
|
||||
else:
|
||||
hst = ""
|
||||
|
||||
msg = "%s: %s%s%s\n" % (ts, hst, srv, m)
|
||||
msgs.append(msg)
|
||||
if logfmt == "msg":
|
||||
m2 = "%d|%s|%s\n" % (now, service, m)
|
||||
@@ -393,34 +476,63 @@ def dnsupdatethread():
|
||||
|
||||
#
|
||||
#
|
||||
def fromaddr(name, addr, boot, interval, acks):
|
||||
global htab
|
||||
|
||||
newh=False
|
||||
#
|
||||
#
|
||||
def readsock(sock):
|
||||
if DEBUG > 3: sys.stderr.write("readsock recfrom start")
|
||||
data, addr = sock.recvfrom(MAXRECV)
|
||||
# now = time.time()
|
||||
if DEBUG > 2: sys.stderr.write("readsock = %s, %s\n" % (data,addr))
|
||||
msg = stodict(data)
|
||||
if not msg: # Old hbc client
|
||||
if verbose: print "old hbc:", data
|
||||
oldclient = True
|
||||
msg = oldmtodict(data)
|
||||
else:
|
||||
oldclient = False
|
||||
if verbose: print "readsock = %s, %s" % (msg,addr)
|
||||
name = msg.get('name', "unknown")
|
||||
if not name in hosts: # was: hosts.has_key(name):
|
||||
host = Host(name)
|
||||
newh=True
|
||||
else:
|
||||
host = hosts[name]
|
||||
newh=False
|
||||
|
||||
host.doesack = acks
|
||||
res = host.newaddr(addr)
|
||||
if res:
|
||||
m = "%s %s" % (host.name, res)
|
||||
log(m)
|
||||
if name in dyndnshosts and isIPv4(addr): # don't try and cat ptr to IPv6 addr
|
||||
if DEBUG > 0: print "dbg: dynhost and %s: %s" % (isIPv4(addr), addr)
|
||||
dnsQ.put((name, addr))
|
||||
host.doesack = msg.get('acks', -1)
|
||||
host.setcver(msg.get('ver', 0))
|
||||
host.lastbeat = now
|
||||
|
||||
interval = msg.get('interval', 0)
|
||||
shutdown = msg.get('shutdown', 0)
|
||||
service = msg.get('service', "unknown")
|
||||
message = msg.get('msg', None)
|
||||
boot = msg.get('boot', 0)
|
||||
host.latency = now - msg.get('time', 0)
|
||||
|
||||
if boot:
|
||||
log(name, "booted")
|
||||
if name in watchhosts:
|
||||
email("address change", m)
|
||||
email("booted", m)
|
||||
pushover(m)
|
||||
if message:
|
||||
log(name, "msg: %s" % message, service=service)
|
||||
if name in watchhosts:
|
||||
email("msg", m)
|
||||
pushover(m)
|
||||
|
||||
res = host.newaddr(addr[0])
|
||||
if res:
|
||||
log(name, res)
|
||||
if name in watchhosts:
|
||||
email("address change", "%s %s" % (host.name, res))
|
||||
pushover(m)
|
||||
|
||||
host.lastbeat = now
|
||||
if host.getstate() != Host.up and interval > 0:
|
||||
lasts = host.state
|
||||
d = host.newstate(Host.up)
|
||||
m = "%s, back after being %s for %s" % (host.name, lasts, dur(d))
|
||||
log(m)
|
||||
m = "back after being %s for %s" % (lasts, dur(d))
|
||||
log(name, m)
|
||||
if name in watchhosts:
|
||||
email("back", name)
|
||||
pushover("%s is back" % name)
|
||||
@@ -430,101 +542,78 @@ def fromaddr(name, addr, boot, interval, acks):
|
||||
host.upcount += 1
|
||||
|
||||
|
||||
#
|
||||
#
|
||||
def readsock(sock):
|
||||
global htab
|
||||
if DEBUG > 3: sys.stderr.write("readsock recfrom start")
|
||||
data, addr = sock.recvfrom(1024)
|
||||
if DEBUG > 2: sys.stderr.write("readsock = %s, %s\n" % (data,addr))
|
||||
pairs = string.split(data, ';')
|
||||
boot = 0
|
||||
shutdown = 0
|
||||
name = "unknown"
|
||||
service = "unknown"
|
||||
msg = None
|
||||
interval = 0
|
||||
deltaT = 0.0
|
||||
acks = -1
|
||||
for pair in pairs:
|
||||
l = string.split(pair, "=")
|
||||
key = l[0]
|
||||
if len(l) != 2:
|
||||
val = "0"
|
||||
else:
|
||||
val = l[1]
|
||||
if key == 'boot':
|
||||
boot += 1
|
||||
elif key == 'shutdown':
|
||||
shutdown += 1
|
||||
elif key == 'interval':
|
||||
interval = int(val)
|
||||
elif key == 'name':
|
||||
name = shortname(val)
|
||||
elif key == 'msg':
|
||||
msg = val
|
||||
elif key == 'service':
|
||||
service = val
|
||||
elif key == 'time':
|
||||
try:
|
||||
deltaT = now-float(val)
|
||||
except:
|
||||
pass
|
||||
elif key == 'acks':
|
||||
try:
|
||||
acks = int(val)
|
||||
except:
|
||||
acks = -1
|
||||
|
||||
if boot:
|
||||
if acks == -1:
|
||||
a = "(%s)" % acks
|
||||
else:
|
||||
a = ""
|
||||
m = "%s booted, deltaT %0.2g sec %s" % (name, deltaT, a)
|
||||
log(m)
|
||||
if name in watchhosts:
|
||||
email("booted", m)
|
||||
pushover(m)
|
||||
if msg:
|
||||
m = "%s msg: %s" % (name, msg)
|
||||
log(m, service=service)
|
||||
if name in watchhosts:
|
||||
email("msg", m)
|
||||
pushover(m)
|
||||
|
||||
fromaddr(name, addr[0], boot, interval, acks)
|
||||
if shutdown:
|
||||
m = "%s shutdown" % name
|
||||
log(m)
|
||||
log(name, "shutdown")
|
||||
if name in watchhosts:
|
||||
email("shutdown", m)
|
||||
email("shutdown", "%s shutdown" % name)
|
||||
pushover(m)
|
||||
try:
|
||||
hosts[name].newstate(Host.down)
|
||||
host.newstate(Host.down)
|
||||
except:
|
||||
pass
|
||||
if interval > 0:
|
||||
try:
|
||||
hosts[name].interval = interval
|
||||
host.interval = interval
|
||||
except:
|
||||
pass
|
||||
|
||||
rmsg="ACK"
|
||||
if len(hosts[name].cmds):
|
||||
rmsg=hosts[name].cmds[0]
|
||||
msg="command '%s' initiated" % hosts[name].cmds[0]
|
||||
email("%s cmd exec" % name, msg)
|
||||
pushover(msg)
|
||||
del hosts[name].cmds[0]
|
||||
log("%s command initiated" % name)
|
||||
try:
|
||||
ss=sock.sendto(rmsg, addr)
|
||||
rmsg = {'time': time.time()}
|
||||
op = 'ACK'
|
||||
if host.cver < 1:
|
||||
opkt = 'ACK'
|
||||
rmsg = 'ACK'
|
||||
else:
|
||||
opkt = dicttos('ACK', rmsg)
|
||||
ss=sock.sendto(opkt, addr)
|
||||
if verbose: print "sendto1: %s (%s) %s %s" % (addr, len(opkt), op, str(rmsg)[:50])
|
||||
|
||||
# send any commands we have queued
|
||||
while len(host.cmds):
|
||||
op, rmsg = host.cmds[0]
|
||||
if op == 'CMD':
|
||||
email("%s cmd exec" % name, "command '%s' sent" % rmsg)
|
||||
del host.cmds[0]
|
||||
log(name, "command sent")
|
||||
if host.cver < 1:
|
||||
rmsg = rmsg['cmd']
|
||||
elif op == 'UPD':
|
||||
del host.cmds[0]
|
||||
log(name, "update initiated")
|
||||
if host.cver < 1:
|
||||
log(name," ver 0 does not support UPD")
|
||||
continue
|
||||
if host.cver < 1:
|
||||
opkt = rmsg
|
||||
op = ""
|
||||
else:
|
||||
opkt = dicttos(op, rmsg, True)
|
||||
try:
|
||||
ss=sock.sendto(opkt, addr)
|
||||
except Exception as e:
|
||||
print "opkt len is %s" % len(opkt)
|
||||
print "cannot send: %s" % e
|
||||
|
||||
if verbose: print "sendto2: %s (%s) %s %s" % (addr, len(opkt), op, str(rmsg)[:50])
|
||||
if DEBUG > 2: print "msg from %s,%s, sent %s bytes back" % (addr[0], addr[1], ss)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
def updatecode(ucode, uname):
|
||||
|
||||
fail = None
|
||||
try:
|
||||
fh = open(ucode, "r")
|
||||
new_code = fh.read()
|
||||
fh.close()
|
||||
except Exception as e:
|
||||
fail = "cannot read new code: %s" % e
|
||||
if not fail:
|
||||
m = md5.new()
|
||||
m.update(new_code)
|
||||
icsum = m.hexdigest()
|
||||
rmsg = {'csum': icsum, 'code': new_code.encode('base64','strict') }
|
||||
hosts[uname].cmds.append(('UPD',rmsg))
|
||||
return fail
|
||||
#
|
||||
#
|
||||
|
||||
@@ -563,6 +652,7 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
def buildpage(self):
|
||||
res=self.buildhead(refresh=60)
|
||||
res.append("<H2>Heartbeat status %s</h2><h4> %s (%s)</H4>" % (VER, time.strftime("%H:%M:%S", time.localtime(now)), os.environ.get('TZ', 'CET-1CDT')))
|
||||
|
||||
res.append("<table>")
|
||||
res.append("<tr><th>Host</th><th>State</th><th>Hr</th><th>Dy</th><th>Wk</th><th>IP4 Addr</th><th>IP6 Addr</th><th>Last change</th></tr>\n")
|
||||
hosts_sorted = hosts.keys()
|
||||
@@ -571,6 +661,7 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
res.append("<tr><td>%-24s</td><td>%-7s</td>%s<td>%-16s</td><td>%-16s</td><td>%-17s</td></tr>\n" % \
|
||||
(h, hosts[h].dispstate(), hosts[h].dispstats(), hosts[h].addr4, hosts[h].addr6, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(hosts[h].statetime))))
|
||||
res.append("</table>")
|
||||
|
||||
res.append("<h4>Log of Events</h4>")
|
||||
for m in msgs[len(msgs)-30:]:
|
||||
res.append("%s<BR>" % m)
|
||||
@@ -624,9 +715,14 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
if uarg[1][:2] == "c=":
|
||||
ucmd=uarg[1][2:]
|
||||
if ucmd != "" and uname != "" and hosts.has_key(uname):
|
||||
hosts[uname].cmds.append(urllib.unquote(ucmd))
|
||||
rmsg = {'cmd': urllib.unquote(ucmd)}
|
||||
hosts[uname].cmds.append(('CMD', rmsg))
|
||||
res=self.buildhead()
|
||||
res.append("2Done")
|
||||
else:
|
||||
code=400
|
||||
cause='arg 2 error'
|
||||
res=self.builderror(code, cause, "ucmd %s uname %s" % (ucmd, uname))
|
||||
|
||||
elif upar[0] == "/d": # drop host /d?h=melschserver
|
||||
if len(uarg) != 1 or len(uarg[0]) < 3:
|
||||
@@ -637,8 +733,8 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
if uarg[0][:2] == "h=":
|
||||
uname=uarg[0][2:]
|
||||
if uname != "" and hosts.has_key(uname):
|
||||
log(uname, "dropped")
|
||||
del hosts[uname]
|
||||
log("%s dropped" % uname)
|
||||
res=self.buildhead()
|
||||
res.append("Done")
|
||||
|
||||
@@ -657,13 +753,36 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
else:
|
||||
ll="name %s not found" % uname
|
||||
res.append(ll)
|
||||
log(ll)
|
||||
log(uname, ll)
|
||||
|
||||
elif upar[0] == "/u": # update
|
||||
uname=""
|
||||
ucode=""
|
||||
if len(uarg) != 2 or len(uarg[0]) < 3 or len(uarg[1]) < 3:
|
||||
code=400
|
||||
cause='Argument error'
|
||||
res=self.builderror(code, cause, "need h= and c= arguments")
|
||||
else:
|
||||
if uarg[0][:2] == "h=":
|
||||
uname=uarg[0][2:]
|
||||
if uarg[1][:2] == "c=":
|
||||
ucode=uarg[1][2:]
|
||||
if ucode != "" and uname != "" and hosts.has_key(uname):
|
||||
err = updatecode(ucode, urllib.unquote(uname))
|
||||
res=self.buildhead()
|
||||
res.append("3 Done: %s" % err if err else "OK")
|
||||
else:
|
||||
code=400
|
||||
cause='Argument error'
|
||||
res=self.builderror(code, cause, "host not found")
|
||||
|
||||
|
||||
|
||||
elif upar[0] == "/r": # restart
|
||||
res=self.buildhead()
|
||||
res.append("restart request")
|
||||
xsig=signal.SIGHUP
|
||||
log("restart request")
|
||||
log(None, "restart request")
|
||||
|
||||
else:
|
||||
code=404
|
||||
@@ -715,7 +834,7 @@ def closeup():
|
||||
except:
|
||||
pass
|
||||
|
||||
log("restarting")
|
||||
log(None, "restarting")
|
||||
try:
|
||||
logf.close()
|
||||
except:
|
||||
@@ -879,7 +998,7 @@ if os.path.exists(PICKFILE):
|
||||
for h in hosts.keys():
|
||||
hosts[h].fixup()
|
||||
for h in drophosts:
|
||||
if h in hosts: # was: hosts.has_key(h):
|
||||
if h in hosts:
|
||||
del hosts[h]
|
||||
|
||||
|
||||
@@ -887,7 +1006,7 @@ now = time.time()
|
||||
startsec = int(now) % interval
|
||||
|
||||
|
||||
log("Starting %s" % VER)
|
||||
log(None, "Starting %s" % VER)
|
||||
atexit.register(on_exit)
|
||||
|
||||
ilist = []
|
||||
|
||||
Reference in New Issue
Block a user