add a watchlist with hostnames that should trigger an email

This commit is contained in:
andreas
2012-06-16 14:18:26 +00:00
parent 6146e0a4dc
commit bf416c2973
+72 -21
View File
@@ -1,13 +1,16 @@
#!/usr/bin/env python
# $Id: hbd,v 1.30 2012/06/09 15:21:07 andreas Exp $
# $Id: hbd,v 1.31 2012/06/16 14:18:26 andreas Exp $
# Wait for heartbeat messages and act on them (or their absence)
#
import time, os, string, sys, socket, atexit, select, SocketServer, getopt, signal, cPickle
import time, os, string, sys, socket, atexit, select, SocketServer, getopt, signal, cPickle, smtplib, traceback
False=0
True=1
LOGFILE="/home/andreas/public_html/messages/andreas"
PICKFILE="/tmp/hbd.pick"
AEMAIL=["andreas@wrede.ca"]
NAME="heatbeat"
SMTPSERVER="localhost"
hosts={}
htab={}
@@ -49,8 +52,8 @@ def shortname(name):
return r[0]
class NullDevice:
def write(self, s):
pass
def write(self, s):
pass
class Host:
up="up"
@@ -93,6 +96,30 @@ class Host:
displaystatetime(self.name)
return s
def email(s, msg):
ret="OK"
toaddrs=AEMAIL
fromaddr="aew.heartbeat@wrede.ca"
subj="Info from %s: %s" % (NAME, s)
date=time.strftime("%a, %d %b %Y %H:%M:%S %z", time.localtime())
body="To: %s\nFrom: %s\nSubject: %s\nDate: %s\n\n%s" % (toaddrs[0], fromaddr, subj, date, msg)
try:
server = smtplib.SMTP(SMTPSERVER)
if DEBUG: server.set_debuglevel(1)
server.sendmail(fromaddr, toaddrs, body)
except smtplib.SMTPRecipientsRefused, errs:
log("cannot send email: %s\n" % (errs))
ret="Fail"
except:
print("smtp error: "+traceback.format_exc())
saveandrestart()
try:
server.quit()
except:
pass
return ret
#
#
@@ -119,7 +146,10 @@ def addhost(name, addr):
if visual:
displayaddr(sname)
htab[addr]=sname
log("%s, changed address to %s" % (sname, addr))
m="%s, changed address to %s" % (sname, addr)
log(m)
if name in watchhosts:
email("address change", m)
else:
hosts[sname]=Host(sname, addr)
s=hosts.keys()
@@ -184,7 +214,10 @@ def checkoverdue():
gr=5*grace
timeout=hosts[h].interval+gr
if hosts[h].state == Host.up and now-hosts[h].lastbeat > timeout:
log("%s is overdue" % h)
m="%s is overdue" % h
log(m)
if h in watchhosts:
email("overdue", m)
hosts[h].newstate(Host.overdue, gr)
#
@@ -291,7 +324,10 @@ def fromaddr(name, addr, boot, interval, acks):
if host.getstate() != Host.up and interval > 0:
lasts=host.state
d=host.newstate(Host.up)
log("%s, back after being %s for %s" % (host.name, lasts, dur(d)))
m="%s, back after being %s for %s" % (host.name, lasts, dur(d))
log(m)
if name in watchhosts:
email("back", name)
host.upcount+=1
#
@@ -342,12 +378,21 @@ def readsock():
a="(%s)" % acks
else:
a=""
log("%s booted, deltaT %0.2g sec %s" % (name, deltaT,a))
m="%s booted, deltaT %0.2g sec %s" % (name, deltaT,a)
log(m)
if name in watchhosts:
email("booted", m)
if msg:
log("%s msg: %s" % (name, msg), service=service)
m="%s msg: %s" % (name, msg)
log(m, service=service)
if name in watchhosts:
email("msg", m)
fromaddr(name, addr[0], boot, interval, acks)
if shutdown:
log("%s shutdown" % name)
m="%s shutdown" % name
log(m)
if name in watchhosts:
email("shutdown", m)
try:
hosts[name].newstate(Host.down)
except:
@@ -442,6 +487,17 @@ class HtmlHandler(SocketServer.BaseRequestHandler):
except:
pass
def saveandrestart():
sock.close()
serv.socket.close()
pickf=open(PICKFILE, 'w')
pick=cPickle.Pickler(pickf)
pick.dump(hosts)
pick.dump(htab)
pick.dump(msgs)
pickf.close()
os.execv(sys.argv[0],[sys.argv[0]]+cmdargs)
#
# Main
@@ -459,7 +515,7 @@ configfile="%s/.hbrc" % home
try:
optlist, args = getopt.getopt(sys.argv[1:], 'c:dfh:v')
except:
helpflag=True
helpflag=True
for o,a in optlist:
if o == '-c':
@@ -513,6 +569,7 @@ logfmt="text"
interval=INTERVAL
grace=GRACE
reportstrict=False
watchhosts=[]
try:
f=open(configfile,"r")
@@ -544,6 +601,8 @@ if f:
logfmt=r[1]
elif r[0] == 'reportstrict':
reportstrict=r[1] in ["True","true","TRUE","1"]
elif r[0] == 'watchhosts':
watchhosts=eval(r[1])
f.close()
if len(args) != 0:
@@ -581,7 +640,7 @@ ilist=[]
sock=socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sock.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR, \
sock.getsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR) | 1)
sock.getsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR) | 1)
sock.bind(("",hb_port))
ilist.append(sock)
@@ -659,14 +718,6 @@ while up:
if sig == signal.SIGHUP:
sock.close()
serv.socket.close()
pickf=open(PICKFILE, 'w')
pick=cPickle.Pickler(pickf)
pick.dump(hosts)
pick.dump(htab)
pick.dump(msgs)
pickf.close()
saveandrestart()
os.execv(sys.argv[0],[sys.argv[0]]+cmdargs)