add a watchlist with hostnames that should trigger an email
This commit is contained in:
@@ -1,13 +1,16 @@
|
||||
#!/usr/bin/env python
|
||||
# $Id: hbd,v 1.30 2012/06/09 15:21:07 andreas Exp $
|
||||
# $Id: hbd,v 1.31 2012/06/16 14:18:26 andreas Exp $
|
||||
# Wait for heartbeat messages and act on them (or their absence)
|
||||
#
|
||||
import time, os, string, sys, socket, atexit, select, SocketServer, getopt, signal, cPickle
|
||||
import time, os, string, sys, socket, atexit, select, SocketServer, getopt, signal, cPickle, smtplib, traceback
|
||||
|
||||
False=0
|
||||
True=1
|
||||
LOGFILE="/home/andreas/public_html/messages/andreas"
|
||||
PICKFILE="/tmp/hbd.pick"
|
||||
AEMAIL=["andreas@wrede.ca"]
|
||||
NAME="heatbeat"
|
||||
SMTPSERVER="localhost"
|
||||
|
||||
hosts={}
|
||||
htab={}
|
||||
@@ -49,8 +52,8 @@ def shortname(name):
|
||||
return r[0]
|
||||
|
||||
class NullDevice:
|
||||
def write(self, s):
|
||||
pass
|
||||
def write(self, s):
|
||||
pass
|
||||
|
||||
class Host:
|
||||
up="up"
|
||||
@@ -93,6 +96,30 @@ class Host:
|
||||
displaystatetime(self.name)
|
||||
return s
|
||||
|
||||
def email(s, msg):
|
||||
ret="OK"
|
||||
toaddrs=AEMAIL
|
||||
fromaddr="aew.heartbeat@wrede.ca"
|
||||
subj="Info from %s: %s" % (NAME, s)
|
||||
date=time.strftime("%a, %d %b %Y %H:%M:%S %z", time.localtime())
|
||||
body="To: %s\nFrom: %s\nSubject: %s\nDate: %s\n\n%s" % (toaddrs[0], fromaddr, subj, date, msg)
|
||||
try:
|
||||
server = smtplib.SMTP(SMTPSERVER)
|
||||
if DEBUG: server.set_debuglevel(1)
|
||||
server.sendmail(fromaddr, toaddrs, body)
|
||||
except smtplib.SMTPRecipientsRefused, errs:
|
||||
log("cannot send email: %s\n" % (errs))
|
||||
ret="Fail"
|
||||
except:
|
||||
print("smtp error: "+traceback.format_exc())
|
||||
saveandrestart()
|
||||
try:
|
||||
server.quit()
|
||||
except:
|
||||
pass
|
||||
return ret
|
||||
|
||||
|
||||
|
||||
#
|
||||
#
|
||||
@@ -119,7 +146,10 @@ def addhost(name, addr):
|
||||
if visual:
|
||||
displayaddr(sname)
|
||||
htab[addr]=sname
|
||||
log("%s, changed address to %s" % (sname, addr))
|
||||
m="%s, changed address to %s" % (sname, addr)
|
||||
log(m)
|
||||
if name in watchhosts:
|
||||
email("address change", m)
|
||||
else:
|
||||
hosts[sname]=Host(sname, addr)
|
||||
s=hosts.keys()
|
||||
@@ -184,7 +214,10 @@ def checkoverdue():
|
||||
gr=5*grace
|
||||
timeout=hosts[h].interval+gr
|
||||
if hosts[h].state == Host.up and now-hosts[h].lastbeat > timeout:
|
||||
log("%s is overdue" % h)
|
||||
m="%s is overdue" % h
|
||||
log(m)
|
||||
if h in watchhosts:
|
||||
email("overdue", m)
|
||||
hosts[h].newstate(Host.overdue, gr)
|
||||
|
||||
#
|
||||
@@ -291,7 +324,10 @@ def fromaddr(name, addr, boot, interval, acks):
|
||||
if host.getstate() != Host.up and interval > 0:
|
||||
lasts=host.state
|
||||
d=host.newstate(Host.up)
|
||||
log("%s, back after being %s for %s" % (host.name, lasts, dur(d)))
|
||||
m="%s, back after being %s for %s" % (host.name, lasts, dur(d))
|
||||
log(m)
|
||||
if name in watchhosts:
|
||||
email("back", name)
|
||||
host.upcount+=1
|
||||
|
||||
#
|
||||
@@ -342,12 +378,21 @@ def readsock():
|
||||
a="(%s)" % acks
|
||||
else:
|
||||
a=""
|
||||
log("%s booted, deltaT %0.2g sec %s" % (name, deltaT,a))
|
||||
m="%s booted, deltaT %0.2g sec %s" % (name, deltaT,a)
|
||||
log(m)
|
||||
if name in watchhosts:
|
||||
email("booted", m)
|
||||
if msg:
|
||||
log("%s msg: %s" % (name, msg), service=service)
|
||||
m="%s msg: %s" % (name, msg)
|
||||
log(m, service=service)
|
||||
if name in watchhosts:
|
||||
email("msg", m)
|
||||
fromaddr(name, addr[0], boot, interval, acks)
|
||||
if shutdown:
|
||||
log("%s shutdown" % name)
|
||||
m="%s shutdown" % name
|
||||
log(m)
|
||||
if name in watchhosts:
|
||||
email("shutdown", m)
|
||||
try:
|
||||
hosts[name].newstate(Host.down)
|
||||
except:
|
||||
@@ -442,6 +487,17 @@ class HtmlHandler(SocketServer.BaseRequestHandler):
|
||||
except:
|
||||
pass
|
||||
|
||||
def saveandrestart():
|
||||
sock.close()
|
||||
serv.socket.close()
|
||||
pickf=open(PICKFILE, 'w')
|
||||
pick=cPickle.Pickler(pickf)
|
||||
pick.dump(hosts)
|
||||
pick.dump(htab)
|
||||
pick.dump(msgs)
|
||||
pickf.close()
|
||||
|
||||
os.execv(sys.argv[0],[sys.argv[0]]+cmdargs)
|
||||
|
||||
#
|
||||
# Main
|
||||
@@ -459,7 +515,7 @@ configfile="%s/.hbrc" % home
|
||||
try:
|
||||
optlist, args = getopt.getopt(sys.argv[1:], 'c:dfh:v')
|
||||
except:
|
||||
helpflag=True
|
||||
helpflag=True
|
||||
|
||||
for o,a in optlist:
|
||||
if o == '-c':
|
||||
@@ -513,6 +569,7 @@ logfmt="text"
|
||||
interval=INTERVAL
|
||||
grace=GRACE
|
||||
reportstrict=False
|
||||
watchhosts=[]
|
||||
|
||||
try:
|
||||
f=open(configfile,"r")
|
||||
@@ -544,6 +601,8 @@ if f:
|
||||
logfmt=r[1]
|
||||
elif r[0] == 'reportstrict':
|
||||
reportstrict=r[1] in ["True","true","TRUE","1"]
|
||||
elif r[0] == 'watchhosts':
|
||||
watchhosts=eval(r[1])
|
||||
f.close()
|
||||
|
||||
if len(args) != 0:
|
||||
@@ -581,7 +640,7 @@ ilist=[]
|
||||
|
||||
sock=socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||
sock.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR, \
|
||||
sock.getsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR) | 1)
|
||||
sock.getsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR) | 1)
|
||||
|
||||
sock.bind(("",hb_port))
|
||||
ilist.append(sock)
|
||||
@@ -659,14 +718,6 @@ while up:
|
||||
|
||||
|
||||
if sig == signal.SIGHUP:
|
||||
sock.close()
|
||||
serv.socket.close()
|
||||
pickf=open(PICKFILE, 'w')
|
||||
pick=cPickle.Pickler(pickf)
|
||||
pick.dump(hosts)
|
||||
pick.dump(htab)
|
||||
pick.dump(msgs)
|
||||
pickf.close()
|
||||
|
||||
os.execv(sys.argv[0],[sys.argv[0]]+cmdargs)
|
||||
saveandrestart()
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user