major rework for ver 3.0

This commit is contained in:
2016-04-22 00:00:51 -04:00
parent b3fe352b13
commit f90038beae
2 changed files with 528 additions and 245 deletions
+293 -129
View File
@@ -11,6 +11,11 @@ import string
import select import select
import errno import errno
import traceback import traceback
import urllib2
import md5
import shutil
import zlib
import subprocess
try: try:
import lockfile import lockfile
@@ -33,123 +38,269 @@ import syslog
PORT = 50003 PORT = 50003
INTERVAL = 10 INTERVAL = 10
PIDFILE = '/tmp/hbc.pid' PIDFILE = '/tmp/hbc.pid'
DBG = False VER = 1
MAXRECV = 32767
socks = None running = True
up = True dorestart = False
ackcount = 0
class NullDevice: class NullDevice:
def write(self, s): def write(self, s):
pass pass
class Conn:
def __init__(self, conId, addr, port, af):
self.conId = conId
self.addr = addr
self.port = port
self.af = af
self.ackcount = 0 # num of accks received
self.lastack = 0 # time() last ACK was received
self.send = 0
self.lastsend = 0 # time() last msg was sent
self.rtts = []
self.sock=socket.socket(af, socket.SOCK_DGRAM)
self.sock.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR, \
self.sock.getsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR) | 1)
def sendto(self, msg, ID = 'HTB'): # default ID is HearTBeat
msg['name'] = iam
msg['id'] = self.conId
msg['ver'] = VER
msg['time'] = time.time()
m = dicttos(ID, msg)
mz = zlib.compress(m,9)
if verbose: print "conn.send('%s', (%s:%s) %s>%s)" % (msg, self.addr, self.port, len(m), len(mz))
self.sock.sendto(m, (self.addr, self.port))
self.send += 1
self.lastsend = time.time()
def ack(self, msgDict):
self.lastack = time.time()
self.lastacksent = float(msgDict.get('time','0'))
if verbose: print "ack RTT: %0.1f ms" % ((self.lastack - self.lastacksent) * 1000.0)
self.ackcount += 1
def close(self):
if self.sock:
self.sock.close()
self.sock = None
def dicttos(ID, d, compress=False):
s = []
for k in d:
if type(d[k]) == type(1.2):
s.append("%s=%0.5f" % (k, d[k]))
else:
s.append("%s=%s" % (k, d[k]))
pk = ";".join(s)
if compress:
zpk = zlib.compress(pk, 6)
ID = "!"+ID
else:
zpk = pk
return ID + ":" + zpk
def stodict(msg):
d = {}
r0 = msg.split(':',1)
if len(r0) == 1:
return None
if r0[0][0] == '!': # compressed
pk = zlib.decompress(msg[len(r0[0])+1:])
d['ID'] = r0[0][1:]
else:
pk = r0[1]
d['ID'] = r0[0]
r = pk.split(';')
for v in r:
vr = v.split('=', 1)
k = vr[0].strip()
if len(vr) == 1:
d[k] = None
else:
v = vr[1].strip()
try:
if v[0].isdigit():
v = eval(v)
except:
pass
d[k] = v
return d
def syslogtrace(note): def syslogtrace(note):
logm = '%s hbc died: \n%s' % (note, traceback.format_exc()) logm = '%s hbc died: \n%s' % (note, traceback.format_exc())
for l in logm.split('\n'): for l in logm.split('\n'):
syslog.syslog(syslog.LOG_ERR, ' tb: %s' % l) syslog.syslog(syslog.LOG_ERR, ' tb: %s' % l)
if verbose:
print logm
def getsock(host):
try: conId = 1
rs=socket.getaddrinfo(host, 50001, 0, 0, socket.SOL_UDP) def createConnections(hosts):
except socket.gaierror: global conId
logm = '%s hbc died: \n%s' % ('getsock', traceback.format_exc()) for host in hosts:
if DBG: print logm if verbose: print "createConnections for %s" % host
return None try:
socks = [] rs=socket.getaddrinfo(host, hb_port, 0, 0, socket.SOL_UDP)
for r in rs: except socket.gaierror:
if DBG: print "address %s" % str(r) logm = '%s hbc died: \n%s' % ('createConnections', traceback.format_exc())
if r[0] in [10, 28, 30]: if verbose: print logm
af_type=socket.AF_INET6 return None
elif r[0] == 2: for r in rs:
af_type=socket.AF_INET if verbose: print "address %s" % str(r)
else: if r[0] in [10, 28, 30]:
print "dont know this net type: %s" % r[0][0] af=socket.AF_INET6
sys.exit(1) elif r[0] == 2:
if verbose: af=socket.AF_INET
syslog.syslog(syslog.LOG_ERR, "socktype: %s" % af_type) else:
sock=socket.socket(af_type, socket.SOCK_DGRAM) print "dont know this net type: %s" % r[0][0]
sock.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR, \ sys.exit(1)
sock.getsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR) | 1)
if verbose: syslog.syslog(syslog.LOG_ERR, "get socket %s" % sock)
socks.append(sock)
return socks addr = r[4][0]
conns[conId] = Conn(conId, addr, hb_port, af)
conId += 1
def socksend(msg, tohost): def doexec(conn, data):
global socks try:
ro = subprocess.check_output(data, stderr=subprocess.STDOUT, shell=True)
fail = "OK"
except subprocess.CalledProcessError as e:
ro = str(e)
fail = "CalledProcessError"
except Exception as e:
syslogtrace('System')
ro = None
fail = "cmd failed: %s" % e
msg={'service': 'command', 'msg': fail+" "+ro}
conns[conn].sendto(msg)
if socks == None:
socks = getsock(tohost[0]) def doupdate(conn, msgDict):
for sock in socks: fail = None
if DBG: print "socksend: sending msg=%s on socket=%s" % (msg, sock) try:
sock.sendto(msg, tohost) code = msgDict['code'].decode('base64')
if verbose: syslog.syslog(syslog.LOG_ERR, "msg %s sent" % msg) csum = msgDict['csum']
except:
fail = "csum/code missing"
if not fail:
fail = doupdateone(code, csum)
msg={'service': 'update', 'msg': fail if fail else "OK"}
conns[conn].sendto(msg)
return fail
def doupdateone(code, csum):
m = md5.new()
m.update(code)
icsum = m.hexdigest()
if icsum != csum:
return "checksum error"
fn = sys.argv[0]
ofn = "%.sav" % fn
try:
shutil.copy2(fn, ofn)
except Exception as e:
return "cannot make backup copy: %s" % e
try:
fh=open(fn, "w")
fh.write(code)
fh.close()
except Exception as e:
return "cannot write new code: %s" % e
return None
def restart():
if verbose: print "restart: execv %s %s" % (sys.argv[0], [sys.argv[0]]+cmdargs)
os.execv(sys.argv[0], [sys.argv[0]]+cmdargs)
print "should not be here"
def process(): def process():
global up, socks, ackcount global running, dorestart
if socks == None: ifiles = {}
socks=getsock(tohost[0]) conIds = {}
for conn in conns:
ifiles[conns[conn].sock.fileno()] = conns[conn].sock
conIds[conns[conn].sock.fileno()] = conn
nextReport = time.time()
ackcount=0 while running:
lastT=time.time() while time.time() < nextReport:
ifiles = [] sleep=nextReport - time.time()
for sock in socks: if verbose: print "process: sleep %s" % sleep
ifiles.append(sock.fileno()) if sleep <= 0:
while up: break
sleep=(lastT+interval) - time.time()
if verbose: syslog.syslog(syslog.LOG_ERR, "sleep %s" % sleep)
if sleep > 0:
try: try:
r=select.select(ifiles,[],[],sleep) r=select.select(ifiles.keys(),[],[],sleep)
except KeyboardInterrupt:
running = False
break
except: except:
if up: if running:
syslogtrace('select') syslogtrace('select')
break break
if verbose: syslog.syslog(syslog.LOG_ERR, "r is %s" % str(r)) if verbose: print "process: r is %s" % str(r)
cont = False for rfh in r[0]:
for sock in socks: conn = conIds[rfh]
if sock.fileno() in r[0]: data, addr = ifiles[rfh].recvfrom(MAXRECV)
data, addr = sock.recvfrom(1024) if verbose: print "sock.recvfrom: %s (%s) %s" % (addr, len(data), data[:4])
if data == "ACK": msgDict = stodict(data)
ackcount+=1 if verbose: print "sock.recvfrom: %s (%s) %s" % (addr, len(data), str(msgDict)[:80])
else: if msgDict['ID'] == "ACK":
try: conns[conn].ack(msgDict)
os.system(data) elif msgDict['ID'] == "UPD":
except: if doupdate(conn, msgDict) == None:
syslogtrace('System') if verbose: print "process: restart after update"
pass dorestart = True
cont = True break
continue elif msgDict['ID'] == "CMD":
if cont: doexec(conn, msgDict['cmd'])
continue else:
lastT=time.time() doexec(conn, data) # deprecated until no more VER - hbc
for hb_host in hb_hosts: if dorestart:
try: running = False
msg="interval=%s;name=%s;time=%s;acks=%s" % (interval, iam, time.time(), ackcount) break
if verbose: syslog.syslog(syslog.LOG_ERR, "sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port)) if not running:
socksend(msg, (hb_host, hb_port)) break
except: for conn in conns:
logm = '%s hbc died: \n%s' % ('socksend', traceback.format_exc()) msg={'interval': interval, 'acks': conns[conn].ackcount}
if DBG: print logm conns[conn].sendto(msg)
pass nextReport += interval
def cleanup(a, b): if verbose: print "process: done running"
global up, socks, ackcount
up = False def cleanup():
syslog.syslog(syslog.LOG_ERR, 'exit a=%s b=%s' % (str(a), str(b))) global running
msg="shutdown=1;name=%s;acks=%s" % (iam, ackcount) running = False
for hb_host in hb_hosts: for conn in conns:
if verbose: syslog.syslog(syslog.LOG_ERR, "hbc: sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port)) msg={'shutdown': 1, 'acks': conns[conn].ackcount}
socksend(msg, (hb_host, hb_port)) conns[conn].sendto(msg)
conns[conn].close()
time.sleep(1) time.sleep(1)
for sock in socks: closeall()
sock.close()
def closeall():
for conn in conns:
conns[conn].close()
syslog.syslog(syslog.LOG_ERR, 'exit')
msgonly=False msgonly=False
@@ -158,9 +309,11 @@ verbose=False
fdaemon=False fdaemon=False
optlist=[] optlist=[]
args=[] args=[]
msgboot=[] msgboot={}
home=os.environ['HOME'] home=os.environ['HOME']
configfile="%s/.hbrc" % home configfile="%s/.hbrc" % home
cmdargs = []
try: try:
optlist, args = getopt.getopt(sys.argv[1:], 'bc:dhm:v') optlist, args = getopt.getopt(sys.argv[1:], 'bc:dhm:v')
@@ -169,22 +322,28 @@ except:
for o,a in optlist: for o,a in optlist:
if o == '-b': if o == '-b':
msgboot.append("boot=1") msgboot['boot']=1
msgonly=True
elif o == '-c': elif o == '-c':
configfile=a configfile=a
cmdargs += [o, a]
elif o == '-d': elif o == '-d':
fdaemon=True fdaemon=True
cmdargs += [o]
elif o == '-h': elif o == '-h':
helpflag=True helpflag=True
elif o == '-m': elif o == '-m':
msgboot.append("service=%s" % "service") msgboot['service'] = "service"
a.replace(';',':') msgboot['msg'] = a
msgboot.append("msg=%s" % a)
msgonly=True msgonly=True
elif o == '-v': elif o == '-v':
verbose=True verbose=True
cmdargs += [o]
cmdargs += args
if verbose: print "cmdargs for restart are %s" % cmdargs
if helpflag: if helpflag:
print "hbc HeartBeatClient" print "hbc HeartBeatClient"
print "usage: hbc [-bdhv] [-c configfile] [-m msg][host1 [..]]" print "usage: hbc [-bdhv] [-c configfile] [-m msg][host1 [..]]"
@@ -217,7 +376,7 @@ hb_hosts=[]
iam=socket.gethostname() iam=socket.gethostname()
try: try:
f=open(configfile,"r") f=open(configfile,"r")
if verbose: print "notice: using config file %s" % configfile if verbose: print "notice: using config file %s" % configfile
except: except:
if verbose: print "warning: running without config file: %s" % configfile if verbose: print "warning: running without config file: %s" % configfile
@@ -244,50 +403,43 @@ if f:
if len(args) != 0: if len(args) != 0:
hb_hosts=args hb_hosts=args
if len(hb_hosts) == 0: if len(hb_hosts) == 0:
print "no hb server specified" print "no hb server specified"
sys.exit(1) sys.exit(1)
#
if verbose: if verbose:
print "notice: hb_hosts: %s" % str(hb_hosts) print "notice: hb_hosts: %s" % str(hb_hosts)
print "notice: hb_port: %s" % hb_port print "notice: hb_port: %s" % hb_port
print "notice: interval: %s" % interval print "notice: interval: %s" % interval
print "notice: iam: %s" % iam print "notice: iam: %s" % iam
print "notice: msgonly: %s" % msgonly
print "notice: msgboot: %s" % msgboot
if not msgonly: if not msgonly:
msgboot.append("interval=%s" % interval) msgboot['interval'] = interval
conns = {}
createConnections(hb_hosts)
if verbose: print "%s connections created" % (len(conns))
if len(msgboot) > 0: if len(msgboot) > 0:
if DBG: print "on boot" if verbose: print "on boot"
msgboot.append("name=%s" % iam) msgboot['acks'] = 0
msgboot.append("time=%s" % time.time()) for conn in conns:
msgboot.append("acks=0") conns[conn].sendto(msgboot)
msg=";".join(msgboot)
while 1:
fail=0
for hb_host in hb_hosts:
try:
if DBG: print "sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port)
socksend(msg, (hb_host, hb_port))
except:
logm = '%s hbc died: \n%s' % ('socksend2', traceback.format_exc())
if DBG: print logm
fail=1
if fail:
time.sleep(10)
else:
break
if verbose: print "msgboot done msgonly=%s" % msgonly
if msgonly: if msgonly:
sys.exit(0) if verbose: print "msgboot done msgonly=%s" % msgonly
closeall()
sys.exit(0)
#
# #
syslog.openlog(logoption=syslog.LOG_PID, facility=syslog.LOG_DAEMON) syslog.openlog(logoption=syslog.LOG_PID, facility=syslog.LOG_DAEMON)
if fdaemon: if fdaemon:
@@ -326,22 +478,34 @@ if fdaemon:
} }
context.files_preserve = [] context.files_preserve = []
for sock in socks: for conn in conns:
context.files_preserve += [sock, sock.fileno()] context.files_preserve += [conns[conn].sock, conns[conn].sock.fileno()]
with context: with context:
syslog.syslog(syslog.LOG_ERR, 'starting heartbeat to %s' % ','.join(hb_hosts)) syslog.syslog(syslog.LOG_ERR, 'starting heartbeat to %s' % ','.join(hb_hosts))
up = True running = True
try: try:
process() process()
except: except:
syslogtrace('process') syslogtrace('process')
cleanup(0, None) if verbose: print "main: cleanup"
cleanup()
if dorestart:
if verbose: print "main: restart"
restart()
else: else:
up = True running = True
try: try:
if verbose: print "starting loop process"
process() process()
except: except Exception as e:
if verbose: print "err: process exit: %s" % e
syslogtrace('process') syslogtrace('process')
cleanup(0, None) if verbose: print "main: cleanup"
cleanup()
if dorestart:
if verbose: print "main: restart"
restart()
+235 -116
View File
@@ -22,6 +22,8 @@ import urllib
import httplib import httplib
import threading import threading
import Queue import Queue
import md5
import zlib
from subprocess import Popen, STDOUT, PIPE from subprocess import Popen, STDOUT, PIPE
@@ -31,6 +33,7 @@ SEND_PUSHOVER=True
DEBUG = 0 DEBUG = 0
MAXRECV = 32767
LOGFILE = "/home/andreas/public_html/messages/andreas" LOGFILE = "/home/andreas/public_html/messages/andreas"
PICKFILE = "/var/tmp/hbd.pick" PICKFILE = "/var/tmp/hbd.pick"
AEMAIL = ["andreas@wrede.ca"] AEMAIL = ["andreas@wrede.ca"]
@@ -92,6 +95,50 @@ class LogDevice:
self.fh.flush() self.fh.flush()
def dicttos(ID, d, compress=False):
s = []
for k in d:
if type(d[k]) == type(1.2):
s.append("%s=%0.5f" % (k, d[k]))
else:
s.append("%s=%s" % (k, d[k]))
pk = ";".join(s)
if compress:
zpk = zlib.compress(pk, 6)
ID = "!"+ID
else:
zpk = pk
return ID + ":" + zpk
def stodict(msg):
d = {}
r0 = msg.split(':',1)
if len(r0) == 1:
return None
if r0[0][0] == '!': # compressed
pk = zlib.uncompress(r0[1])
d['ID'] = r0[0][1:]
else:
pk = r0[1]
d['ID'] = r0[0]
r = pk.split(';')
for v in r:
vr = v.split('=', 1)
k = vr[0].strip()
if len(vr) == 1:
d[k] = None
else:
v = vr[1].strip()
if v[0].isdigit():
v = eval(v)
d[k] = v
return d
def oldmtodict(msg):
return stodict('HTB:'+msg)
class Host: class Host:
up = "up" up = "up"
down = "down" down = "down"
@@ -112,11 +159,21 @@ class Host:
self.interval = 0 self.interval = 0
self.doesack = -1 self.doesack = -1
self.cmds = [] self.cmds = []
self.cver = 0
self.latency = 0
self.hdwcounts = [[0,0],[0,0],[0,0]] self.hdwcounts = [[0,0],[0,0],[0,0]]
num += 1 num += 1
hosts[name] = self hosts[name] = self
def setcver(self, cver):
self.cver = cver
def isDynDns(self):
return self.name in dyndnshosts
def newaddr(self, addr): def newaddr(self, addr):
if isIPv4(addr): if isIPv4(addr):
if self.addr4: if self.addr4:
@@ -127,10 +184,14 @@ class Host:
del htab[self.addr4] del htab[self.addr4]
self.addr4 = addr self.addr4 = addr
htab[addr] = self.name htab[addr] = self.name
if self.isDynDns():
dnsQ.put((self.name, self.addr4))
else: else:
r = "new addr %s" % (addr) r = "new addr %s" % (addr)
self.addr4 = addr self.addr4 = addr
htab[addr] = self.name htab[addr] = self.name
if self.isDynDns():
dnsQ.put((self.name, self.addr4))
else: else:
if self.addr6: if self.addr6:
if self.addr6 == addr: if self.addr6 == addr:
@@ -140,10 +201,14 @@ class Host:
del htab[self.addr6] del htab[self.addr6]
self.addr6 = addr self.addr6 = addr
htab[addr] = self.name htab[addr] = self.name
if self.isDynDns():
dnsQ.put((self.name, self.addr6))
else: else:
r = "new addr %s" % (addr) r = "new addr %s" % (addr)
self.addr6 = addr self.addr6 = addr
htab[addr] = self.name htab[addr] = self.name
if self.isDynDns():
dnsQ.put((self.name, self.addr6))
return r return r
@@ -182,6 +247,12 @@ class Host:
self.addr6 = self.addr self.addr6 = self.addr
del self.addr del self.addr
try:
a=self.latency
except:
self.latency = 0
def getstate(self): def getstate(self):
return self.state return self.state
@@ -190,6 +261,8 @@ class Host:
def dispstate(self): def dispstate(self):
if self.state in ["down", "overdue"]: if self.state in ["down", "overdue"]:
state = "<b>%s</b>" % self.state state = "<b>%s</b>" % self.state
elif self.state in ["up", "UP"]:
state = "%0.1f" % self.latency
else: else:
state = "%s" % self.state state = "%s" % self.state
return state return state
@@ -224,21 +297,22 @@ class Host:
return s return s
def email(s, msg): def email(s, msg):
if not SEND_EMAIL: if not SEND_EMAIL:
return return
ret = "OK" ret = "OK"
toaddrs = AEMAIL toaddrs = AEMAIL
fromaddr = "aew.heartbeat@wrede.ca" fromemail = "aew.heartbeat@wrede.ca"
subj = "Info from %s: %s" % (NAME, s) subj = "Info from %s: %s" % (NAME, s)
date = time.strftime("%a, %d %b %Y %H:%M:%S %z", time.localtime()) date = time.strftime("%a, %d %b %Y %H:%M:%S %z", time.localtime())
body = "To: %s\nFrom: %s\nSubject: %s\nDate: %s\n\n%s" % (toaddrs[0], fromaddr, subj, date, msg) body = "To: %s\nFrom: %s\nSubject: %s\nDate: %s\n\n%s" % (toaddrs[0], fromemail, subj, date, msg)
try: try:
server = smtplib.SMTP(SMTPSERVER) server = smtplib.SMTP(SMTPSERVER)
if DEBUG > 0: server.set_debuglevel(1) if DEBUG > 0: server.set_debuglevel(1)
server.sendmail(fromaddr, toaddrs, body) server.sendmail(fromemail, toaddrs, body)
except smtplib.SMTPRecipientsRefused, errs: except smtplib.SMTPRecipientsRefused, errs:
log("cannot send email: %s\n" % (errs)) log(None, "cannot send email: %s\n" % (errs))
ret = "Fail" ret = "Fail"
except: except:
print("smtp error: "+traceback.format_exc()) print("smtp error: "+traceback.format_exc())
@@ -293,9 +367,9 @@ answer
""" % D """ % D
if DEBUG > 0: log("DBG: nsup %s" % nsup) if DEBUG > 0: log(None, "DBG: nsup %s" % nsup)
cmd = ["/usr/local/bin/nsupdate", "-k", "/etc/dhcpc/K%(domain)s.+157+00000." % D, "-v"] cmd = ["/usr/local/bin/nsupdate", "-k", "/etc/dhcpc/K%(domain)s.+157+00000." % D, "-v"]
if DEBUG > 0: log("DBG: cmd %s" % cmd) if DEBUG > 0: log(None, "DBG: cmd %s" % cmd)
try: try:
p = Popen(cmd, shell=False, bufsize=1, stdin=PIPE, stdout=PIPE, stderr=STDOUT) p = Popen(cmd, shell=False, bufsize=1, stdin=PIPE, stdout=PIPE, stderr=STDOUT)
except OSError, e: except OSError, e:
@@ -357,17 +431,26 @@ def checkoverdue():
continue continue
timeout = hosts[h].interval+grace timeout = hosts[h].interval+grace
if hosts[h].state == Host.up and now-hosts[h].lastbeat > timeout: if hosts[h].state == Host.up and now-hosts[h].lastbeat > timeout:
m = "%s is overdue" % h
if h in watchhosts: if h in watchhosts:
email("overdue", m) email("overdue", "%s is overdue" % h)
pushover(m) pushover("%s is overdue" % h)
hosts[h].newstate(Host.overdue, grace) hosts[h].newstate(Host.overdue, grace)
log(m) log(h, "overdue")
def log(m, service="heartbeat"): def log(host, m, service=None):
if DEBUG > 0: print "Log: %s" % m if DEBUG > 0: print "Log: %s" % m
msg = time.strftime("%b %d %H:%M:%S", time.localtime(time.time()))+": "+m+"\n" ts = time.strftime("%b %d %H:%M:%S", time.localtime(time.time()))
if service:
srv = "service %s: " % service
else:
srv = ""
if host:
hst = "%s " % host
else:
hst = ""
msg = "%s: %s%s%s\n" % (ts, hst, srv, m)
msgs.append(msg) msgs.append(msg)
if logfmt == "msg": if logfmt == "msg":
m2 = "%d|%s|%s\n" % (now, service, m) m2 = "%d|%s|%s\n" % (now, service, m)
@@ -393,34 +476,63 @@ def dnsupdatethread():
# #
# #
def fromaddr(name, addr, boot, interval, acks): #
global htab #
def readsock(sock):
newh=False if DEBUG > 3: sys.stderr.write("readsock recfrom start")
data, addr = sock.recvfrom(MAXRECV)
# now = time.time()
if DEBUG > 2: sys.stderr.write("readsock = %s, %s\n" % (data,addr))
msg = stodict(data)
if not msg: # Old hbc client
if verbose: print "old hbc:", data
oldclient = True
msg = oldmtodict(data)
else:
oldclient = False
if verbose: print "readsock = %s, %s" % (msg,addr)
name = msg.get('name', "unknown")
if not name in hosts: # was: hosts.has_key(name): if not name in hosts: # was: hosts.has_key(name):
host = Host(name) host = Host(name)
newh=True newh=True
else: else:
host = hosts[name] host = hosts[name]
newh=False
host.doesack = acks host.doesack = msg.get('acks', -1)
res = host.newaddr(addr) host.setcver(msg.get('ver', 0))
if res: host.lastbeat = now
m = "%s %s" % (host.name, res)
log(m) interval = msg.get('interval', 0)
if name in dyndnshosts and isIPv4(addr): # don't try and cat ptr to IPv6 addr shutdown = msg.get('shutdown', 0)
if DEBUG > 0: print "dbg: dynhost and %s: %s" % (isIPv4(addr), addr) service = msg.get('service', "unknown")
dnsQ.put((name, addr)) message = msg.get('msg', None)
boot = msg.get('boot', 0)
host.latency = now - msg.get('time', 0)
if boot:
log(name, "booted")
if name in watchhosts: if name in watchhosts:
email("address change", m) email("booted", m)
pushover(m)
if message:
log(name, "msg: %s" % message, service=service)
if name in watchhosts:
email("msg", m)
pushover(m)
res = host.newaddr(addr[0])
if res:
log(name, res)
if name in watchhosts:
email("address change", "%s %s" % (host.name, res))
pushover(m) pushover(m)
host.lastbeat = now
if host.getstate() != Host.up and interval > 0: if host.getstate() != Host.up and interval > 0:
lasts = host.state lasts = host.state
d = host.newstate(Host.up) d = host.newstate(Host.up)
m = "%s, back after being %s for %s" % (host.name, lasts, dur(d)) m = "back after being %s for %s" % (lasts, dur(d))
log(m) log(name, m)
if name in watchhosts: if name in watchhosts:
email("back", name) email("back", name)
pushover("%s is back" % name) pushover("%s is back" % name)
@@ -430,101 +542,78 @@ def fromaddr(name, addr, boot, interval, acks):
host.upcount += 1 host.upcount += 1
#
#
def readsock(sock):
global htab
if DEBUG > 3: sys.stderr.write("readsock recfrom start")
data, addr = sock.recvfrom(1024)
if DEBUG > 2: sys.stderr.write("readsock = %s, %s\n" % (data,addr))
pairs = string.split(data, ';')
boot = 0
shutdown = 0
name = "unknown"
service = "unknown"
msg = None
interval = 0
deltaT = 0.0
acks = -1
for pair in pairs:
l = string.split(pair, "=")
key = l[0]
if len(l) != 2:
val = "0"
else:
val = l[1]
if key == 'boot':
boot += 1
elif key == 'shutdown':
shutdown += 1
elif key == 'interval':
interval = int(val)
elif key == 'name':
name = shortname(val)
elif key == 'msg':
msg = val
elif key == 'service':
service = val
elif key == 'time':
try:
deltaT = now-float(val)
except:
pass
elif key == 'acks':
try:
acks = int(val)
except:
acks = -1
if boot:
if acks == -1:
a = "(%s)" % acks
else:
a = ""
m = "%s booted, deltaT %0.2g sec %s" % (name, deltaT, a)
log(m)
if name in watchhosts:
email("booted", m)
pushover(m)
if msg:
m = "%s msg: %s" % (name, msg)
log(m, service=service)
if name in watchhosts:
email("msg", m)
pushover(m)
fromaddr(name, addr[0], boot, interval, acks)
if shutdown: if shutdown:
m = "%s shutdown" % name log(name, "shutdown")
log(m)
if name in watchhosts: if name in watchhosts:
email("shutdown", m) email("shutdown", "%s shutdown" % name)
pushover(m) pushover(m)
try: try:
hosts[name].newstate(Host.down) host.newstate(Host.down)
except: except:
pass pass
if interval > 0: if interval > 0:
try: try:
hosts[name].interval = interval host.interval = interval
except: except:
pass pass
rmsg="ACK" rmsg = {'time': time.time()}
if len(hosts[name].cmds): op = 'ACK'
rmsg=hosts[name].cmds[0] if host.cver < 1:
msg="command '%s' initiated" % hosts[name].cmds[0] opkt = 'ACK'
email("%s cmd exec" % name, msg) rmsg = 'ACK'
pushover(msg) else:
del hosts[name].cmds[0] opkt = dicttos('ACK', rmsg)
log("%s command initiated" % name) ss=sock.sendto(opkt, addr)
try: if verbose: print "sendto1: %s (%s) %s %s" % (addr, len(opkt), op, str(rmsg)[:50])
ss=sock.sendto(rmsg, addr)
# send any commands we have queued
while len(host.cmds):
op, rmsg = host.cmds[0]
if op == 'CMD':
email("%s cmd exec" % name, "command '%s' sent" % rmsg)
del host.cmds[0]
log(name, "command sent")
if host.cver < 1:
rmsg = rmsg['cmd']
elif op == 'UPD':
del host.cmds[0]
log(name, "update initiated")
if host.cver < 1:
log(name," ver 0 does not support UPD")
continue
if host.cver < 1:
opkt = rmsg
op = ""
else:
opkt = dicttos(op, rmsg, True)
try:
ss=sock.sendto(opkt, addr)
except Exception as e:
print "opkt len is %s" % len(opkt)
print "cannot send: %s" % e
if verbose: print "sendto2: %s (%s) %s %s" % (addr, len(opkt), op, str(rmsg)[:50])
if DEBUG > 2: print "msg from %s,%s, sent %s bytes back" % (addr[0], addr[1], ss) if DEBUG > 2: print "msg from %s,%s, sent %s bytes back" % (addr[0], addr[1], ss)
except:
pass
def updatecode(ucode, uname):
fail = None
try:
fh = open(ucode, "r")
new_code = fh.read()
fh.close()
except Exception as e:
fail = "cannot read new code: %s" % e
if not fail:
m = md5.new()
m.update(new_code)
icsum = m.hexdigest()
rmsg = {'csum': icsum, 'code': new_code.encode('base64','strict') }
hosts[uname].cmds.append(('UPD',rmsg))
return fail
# #
# #
@@ -563,6 +652,7 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
def buildpage(self): def buildpage(self):
res=self.buildhead(refresh=60) res=self.buildhead(refresh=60)
res.append("<H2>Heartbeat status %s</h2><h4> %s (%s)</H4>" % (VER, time.strftime("%H:%M:%S", time.localtime(now)), os.environ.get('TZ', 'CET-1CDT'))) res.append("<H2>Heartbeat status %s</h2><h4> %s (%s)</H4>" % (VER, time.strftime("%H:%M:%S", time.localtime(now)), os.environ.get('TZ', 'CET-1CDT')))
res.append("<table>") res.append("<table>")
res.append("<tr><th>Host</th><th>State</th><th>Hr</th><th>Dy</th><th>Wk</th><th>IP4 Addr</th><th>IP6 Addr</th><th>Last change</th></tr>\n") res.append("<tr><th>Host</th><th>State</th><th>Hr</th><th>Dy</th><th>Wk</th><th>IP4 Addr</th><th>IP6 Addr</th><th>Last change</th></tr>\n")
hosts_sorted = hosts.keys() hosts_sorted = hosts.keys()
@@ -571,6 +661,7 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
res.append("<tr><td>%-24s</td><td>%-7s</td>%s<td>%-16s</td><td>%-16s</td><td>%-17s</td></tr>\n" % \ res.append("<tr><td>%-24s</td><td>%-7s</td>%s<td>%-16s</td><td>%-16s</td><td>%-17s</td></tr>\n" % \
(h, hosts[h].dispstate(), hosts[h].dispstats(), hosts[h].addr4, hosts[h].addr6, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(hosts[h].statetime)))) (h, hosts[h].dispstate(), hosts[h].dispstats(), hosts[h].addr4, hosts[h].addr6, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(hosts[h].statetime))))
res.append("</table>") res.append("</table>")
res.append("<h4>Log of Events</h4>") res.append("<h4>Log of Events</h4>")
for m in msgs[len(msgs)-30:]: for m in msgs[len(msgs)-30:]:
res.append("%s<BR>" % m) res.append("%s<BR>" % m)
@@ -624,9 +715,14 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
if uarg[1][:2] == "c=": if uarg[1][:2] == "c=":
ucmd=uarg[1][2:] ucmd=uarg[1][2:]
if ucmd != "" and uname != "" and hosts.has_key(uname): if ucmd != "" and uname != "" and hosts.has_key(uname):
hosts[uname].cmds.append(urllib.unquote(ucmd)) rmsg = {'cmd': urllib.unquote(ucmd)}
hosts[uname].cmds.append(('CMD', rmsg))
res=self.buildhead() res=self.buildhead()
res.append("2Done") res.append("2Done")
else:
code=400
cause='arg 2 error'
res=self.builderror(code, cause, "ucmd %s uname %s" % (ucmd, uname))
elif upar[0] == "/d": # drop host /d?h=melschserver elif upar[0] == "/d": # drop host /d?h=melschserver
if len(uarg) != 1 or len(uarg[0]) < 3: if len(uarg) != 1 or len(uarg[0]) < 3:
@@ -637,8 +733,8 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
if uarg[0][:2] == "h=": if uarg[0][:2] == "h=":
uname=uarg[0][2:] uname=uarg[0][2:]
if uname != "" and hosts.has_key(uname): if uname != "" and hosts.has_key(uname):
log(uname, "dropped")
del hosts[uname] del hosts[uname]
log("%s dropped" % uname)
res=self.buildhead() res=self.buildhead()
res.append("Done") res.append("Done")
@@ -657,13 +753,36 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
else: else:
ll="name %s not found" % uname ll="name %s not found" % uname
res.append(ll) res.append(ll)
log(ll) log(uname, ll)
elif upar[0] == "/u": # update
uname=""
ucode=""
if len(uarg) != 2 or len(uarg[0]) < 3 or len(uarg[1]) < 3:
code=400
cause='Argument error'
res=self.builderror(code, cause, "need h= and c= arguments")
else:
if uarg[0][:2] == "h=":
uname=uarg[0][2:]
if uarg[1][:2] == "c=":
ucode=uarg[1][2:]
if ucode != "" and uname != "" and hosts.has_key(uname):
err = updatecode(ucode, urllib.unquote(uname))
res=self.buildhead()
res.append("3 Done: %s" % err if err else "OK")
else:
code=400
cause='Argument error'
res=self.builderror(code, cause, "host not found")
elif upar[0] == "/r": # restart elif upar[0] == "/r": # restart
res=self.buildhead() res=self.buildhead()
res.append("restart request") res.append("restart request")
xsig=signal.SIGHUP xsig=signal.SIGHUP
log("restart request") log(None, "restart request")
else: else:
code=404 code=404
@@ -715,7 +834,7 @@ def closeup():
except: except:
pass pass
log("restarting") log(None, "restarting")
try: try:
logf.close() logf.close()
except: except:
@@ -879,7 +998,7 @@ if os.path.exists(PICKFILE):
for h in hosts.keys(): for h in hosts.keys():
hosts[h].fixup() hosts[h].fixup()
for h in drophosts: for h in drophosts:
if h in hosts: # was: hosts.has_key(h): if h in hosts:
del hosts[h] del hosts[h]
@@ -887,7 +1006,7 @@ now = time.time()
startsec = int(now) % interval startsec = int(now) % interval
log("Starting %s" % VER) log(None, "Starting %s" % VER)
atexit.register(on_exit) atexit.register(on_exit)
ilist = [] ilist = []