From bf416c29730ba46a5613845ab83e4fbff47a6117 Mon Sep 17 00:00:00 2001 From: andreas Date: Sat, 16 Jun 2012 14:18:26 +0000 Subject: [PATCH] add a watchlist with hostnames that should trigger an email --- hbd | 95 +++++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 73 insertions(+), 22 deletions(-) diff --git a/hbd b/hbd index ea0d4b9..4573f8a 100755 --- a/hbd +++ b/hbd @@ -1,13 +1,16 @@ #!/usr/bin/env python -# $Id: hbd,v 1.30 2012/06/09 15:21:07 andreas Exp $ +# $Id: hbd,v 1.31 2012/06/16 14:18:26 andreas Exp $ # Wait for heartbeat messages and act on them (or their absence) # -import time, os, string, sys, socket, atexit, select, SocketServer, getopt, signal, cPickle +import time, os, string, sys, socket, atexit, select, SocketServer, getopt, signal, cPickle, smtplib, traceback False=0 True=1 LOGFILE="/home/andreas/public_html/messages/andreas" PICKFILE="/tmp/hbd.pick" +AEMAIL=["andreas@wrede.ca"] +NAME="heatbeat" +SMTPSERVER="localhost" hosts={} htab={} @@ -49,8 +52,8 @@ def shortname(name): return r[0] class NullDevice: - def write(self, s): - pass + def write(self, s): + pass class Host: up="up" @@ -93,6 +96,30 @@ class Host: displaystatetime(self.name) return s +def email(s, msg): + ret="OK" + toaddrs=AEMAIL + fromaddr="aew.heartbeat@wrede.ca" + subj="Info from %s: %s" % (NAME, s) + date=time.strftime("%a, %d %b %Y %H:%M:%S %z", time.localtime()) + body="To: %s\nFrom: %s\nSubject: %s\nDate: %s\n\n%s" % (toaddrs[0], fromaddr, subj, date, msg) + try: + server = smtplib.SMTP(SMTPSERVER) + if DEBUG: server.set_debuglevel(1) + server.sendmail(fromaddr, toaddrs, body) + except smtplib.SMTPRecipientsRefused, errs: + log("cannot send email: %s\n" % (errs)) + ret="Fail" + except: + print("smtp error: "+traceback.format_exc()) + saveandrestart() + try: + server.quit() + except: + pass + return ret + + # # @@ -119,7 +146,10 @@ def addhost(name, addr): if visual: displayaddr(sname) htab[addr]=sname - log("%s, changed address to %s" % (sname, addr)) + m="%s, changed address to %s" % (sname, addr) + log(m) + if name in watchhosts: + email("address change", m) else: hosts[sname]=Host(sname, addr) s=hosts.keys() @@ -184,7 +214,10 @@ def checkoverdue(): gr=5*grace timeout=hosts[h].interval+gr if hosts[h].state == Host.up and now-hosts[h].lastbeat > timeout: - log("%s is overdue" % h) + m="%s is overdue" % h + log(m) + if h in watchhosts: + email("overdue", m) hosts[h].newstate(Host.overdue, gr) # @@ -291,7 +324,10 @@ def fromaddr(name, addr, boot, interval, acks): if host.getstate() != Host.up and interval > 0: lasts=host.state d=host.newstate(Host.up) - log("%s, back after being %s for %s" % (host.name, lasts, dur(d))) + m="%s, back after being %s for %s" % (host.name, lasts, dur(d)) + log(m) + if name in watchhosts: + email("back", name) host.upcount+=1 # @@ -342,12 +378,21 @@ def readsock(): a="(%s)" % acks else: a="" - log("%s booted, deltaT %0.2g sec %s" % (name, deltaT,a)) + m="%s booted, deltaT %0.2g sec %s" % (name, deltaT,a) + log(m) + if name in watchhosts: + email("booted", m) if msg: - log("%s msg: %s" % (name, msg), service=service) + m="%s msg: %s" % (name, msg) + log(m, service=service) + if name in watchhosts: + email("msg", m) fromaddr(name, addr[0], boot, interval, acks) if shutdown: - log("%s shutdown" % name) + m="%s shutdown" % name + log(m) + if name in watchhosts: + email("shutdown", m) try: hosts[name].newstate(Host.down) except: @@ -442,6 +487,17 @@ class HtmlHandler(SocketServer.BaseRequestHandler): except: pass +def saveandrestart(): + sock.close() + serv.socket.close() + pickf=open(PICKFILE, 'w') + pick=cPickle.Pickler(pickf) + pick.dump(hosts) + pick.dump(htab) + pick.dump(msgs) + pickf.close() + + os.execv(sys.argv[0],[sys.argv[0]]+cmdargs) # # Main @@ -459,7 +515,7 @@ configfile="%s/.hbrc" % home try: optlist, args = getopt.getopt(sys.argv[1:], 'c:dfh:v') except: - helpflag=True + helpflag=True for o,a in optlist: if o == '-c': @@ -513,6 +569,7 @@ logfmt="text" interval=INTERVAL grace=GRACE reportstrict=False +watchhosts=[] try: f=open(configfile,"r") @@ -544,6 +601,8 @@ if f: logfmt=r[1] elif r[0] == 'reportstrict': reportstrict=r[1] in ["True","true","TRUE","1"] + elif r[0] == 'watchhosts': + watchhosts=eval(r[1]) f.close() if len(args) != 0: @@ -581,7 +640,7 @@ ilist=[] sock=socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sock.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR, \ - sock.getsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR) | 1) + sock.getsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR) | 1) sock.bind(("",hb_port)) ilist.append(sock) @@ -659,14 +718,6 @@ while up: if sig == signal.SIGHUP: - sock.close() - serv.socket.close() - pickf=open(PICKFILE, 'w') - pick=cPickle.Pickler(pickf) - pick.dump(hosts) - pick.dump(htab) - pick.dump(msgs) - pickf.close() - - os.execv(sys.argv[0],[sys.argv[0]]+cmdargs) + saveandrestart() +