always pickle current state when a log msg is set
drop strickt reporting - it's useless
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# $Id: hbd,v 1.33 2012/09/22 17:51:45 andreas Exp $
|
# $Id: hbd,v 1.34 2012/09/22 19:17:53 andreas Exp $
|
||||||
# Wait for heartbeat messages and act on them (or their absence)
|
# Wait for heartbeat messages and act on them (or their absence)
|
||||||
#
|
#
|
||||||
import time, os, string, sys, socket, atexit, select, SocketServer, getopt, signal, cPickle, smtplib, traceback
|
import time, os, string, sys, socket, atexit, select, SocketServer, getopt, signal, cPickle, smtplib, traceback
|
||||||
@@ -7,7 +7,7 @@ import time, os, string, sys, socket, atexit, select, SocketServer, getopt, sign
|
|||||||
False=0
|
False=0
|
||||||
True=1
|
True=1
|
||||||
LOGFILE="/home/andreas/public_html/messages/andreas"
|
LOGFILE="/home/andreas/public_html/messages/andreas"
|
||||||
PICKFILE="/tmp/hbd.pick"
|
PICKFILE="/var/tmp/hbd.pick"
|
||||||
AEMAIL=["andreas@wrede.ca"]
|
AEMAIL=["andreas@wrede.ca"]
|
||||||
NAME="heatbeat"
|
NAME="heatbeat"
|
||||||
SMTPSERVER="localhost"
|
SMTPSERVER="localhost"
|
||||||
@@ -208,22 +208,17 @@ def checkoverdue():
|
|||||||
for h in hosts.keys():
|
for h in hosts.keys():
|
||||||
if hosts[h].state == Host.down:
|
if hosts[h].state == Host.down:
|
||||||
continue
|
continue
|
||||||
if reportstrict:
|
timeout=hosts[h].interval+grace
|
||||||
gr=grace
|
|
||||||
else:
|
|
||||||
gr=5*grace
|
|
||||||
timeout=hosts[h].interval+gr
|
|
||||||
if hosts[h].state == Host.up and now-hosts[h].lastbeat > timeout:
|
if hosts[h].state == Host.up and now-hosts[h].lastbeat > timeout:
|
||||||
m="%s is overdue" % h
|
m="%s is overdue" % h
|
||||||
log(m)
|
|
||||||
if h in watchhosts:
|
if h in watchhosts:
|
||||||
email("overdue", m)
|
email("overdue", m)
|
||||||
hosts[h].newstate(Host.overdue, gr)
|
hosts[h].newstate(Host.overdue, grace)
|
||||||
|
log(m)
|
||||||
|
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
#
|
|
||||||
def displaytime():
|
def displaytime():
|
||||||
maxY,maxX=stdscr.getmaxyx()
|
maxY,maxX=stdscr.getmaxyx()
|
||||||
stdscr.addstr(0,maxX-8, time.strftime("%H:%M:%S", time.localtime(now)), curses.A_BOLD)
|
stdscr.addstr(0,maxX-8, time.strftime("%H:%M:%S", time.localtime(now)), curses.A_BOLD)
|
||||||
@@ -303,6 +298,7 @@ def log(m, service="heartbeat"):
|
|||||||
msgw.addstr(msg)
|
msgw.addstr(msg)
|
||||||
msgw.clrtoeol()
|
msgw.clrtoeol()
|
||||||
msgw.refresh()
|
msgw.refresh()
|
||||||
|
pickleit()
|
||||||
|
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
@@ -493,6 +489,10 @@ class HtmlHandler(SocketServer.BaseRequestHandler):
|
|||||||
def saveandrestart():
|
def saveandrestart():
|
||||||
sock.close()
|
sock.close()
|
||||||
serv.socket.close()
|
serv.socket.close()
|
||||||
|
log("restarting")
|
||||||
|
os.execv(sys.argv[0],[sys.argv[0]]+cmdargs)
|
||||||
|
|
||||||
|
def pickleit():
|
||||||
pickf=open(PICKFILE, 'w')
|
pickf=open(PICKFILE, 'w')
|
||||||
pick=cPickle.Pickler(pickf)
|
pick=cPickle.Pickler(pickf)
|
||||||
pick.dump(hosts)
|
pick.dump(hosts)
|
||||||
@@ -500,7 +500,6 @@ def saveandrestart():
|
|||||||
pick.dump(msgs)
|
pick.dump(msgs)
|
||||||
pickf.close()
|
pickf.close()
|
||||||
|
|
||||||
os.execv(sys.argv[0],[sys.argv[0]]+cmdargs)
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Main
|
# Main
|
||||||
@@ -508,7 +507,6 @@ def saveandrestart():
|
|||||||
|
|
||||||
helpflag=False
|
helpflag=False
|
||||||
forground=False
|
forground=False
|
||||||
restart=None
|
|
||||||
optlist=[]
|
optlist=[]
|
||||||
args=[]
|
args=[]
|
||||||
home=os.environ['HOME']
|
home=os.environ['HOME']
|
||||||
@@ -554,7 +552,7 @@ hb_port=50003
|
|||||||
interval=20
|
interval=20
|
||||||
hbd_port=50004
|
hbd_port=50004
|
||||||
hbd_host=www.domain.com
|
hbd_host=www.domain.com
|
||||||
grace=1
|
grace=2
|
||||||
"""
|
"""
|
||||||
|
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
@@ -571,7 +569,6 @@ logfile=LOGFILE
|
|||||||
logfmt="text"
|
logfmt="text"
|
||||||
interval=INTERVAL
|
interval=INTERVAL
|
||||||
grace=GRACE
|
grace=GRACE
|
||||||
reportstrict=False
|
|
||||||
watchhosts=[]
|
watchhosts=[]
|
||||||
drophosts=[]
|
drophosts=[]
|
||||||
|
|
||||||
@@ -603,8 +600,6 @@ if f:
|
|||||||
logfile=r[1]
|
logfile=r[1]
|
||||||
elif r[0] == 'logfmt':
|
elif r[0] == 'logfmt':
|
||||||
logfmt=r[1]
|
logfmt=r[1]
|
||||||
elif r[0] == 'reportstrict':
|
|
||||||
reportstrict=r[1] in ["True","true","TRUE","1"]
|
|
||||||
elif r[0] == 'watchhosts':
|
elif r[0] == 'watchhosts':
|
||||||
watchhosts=eval(r[1])
|
watchhosts=eval(r[1])
|
||||||
elif r[0] == 'drophosts':
|
elif r[0] == 'drophosts':
|
||||||
@@ -622,11 +617,13 @@ logf=initlog(logfile)
|
|||||||
if os.path.exists(PICKFILE):
|
if os.path.exists(PICKFILE):
|
||||||
pickf=open(PICKFILE, 'r')
|
pickf=open(PICKFILE, 'r')
|
||||||
pick=cPickle.Unpickler(pickf)
|
pick=cPickle.Unpickler(pickf)
|
||||||
|
try:
|
||||||
hosts=pick.load()
|
hosts=pick.load()
|
||||||
htab=pick.load()
|
htab=pick.load()
|
||||||
msgs=pick.load()
|
msgs=pick.load()
|
||||||
pickf.close()
|
pickf.close()
|
||||||
# os.unlink(PICKFILE)
|
except:
|
||||||
|
os.unlink(PICKFILE)
|
||||||
for h in drophosts:
|
for h in drophosts:
|
||||||
if hosts.has_key(h):
|
if hosts.has_key(h):
|
||||||
del hosts[h]
|
del hosts[h]
|
||||||
@@ -641,9 +638,7 @@ if visual:
|
|||||||
display()
|
display()
|
||||||
stdscr.nodelay(1)
|
stdscr.nodelay(1)
|
||||||
|
|
||||||
if verbose:
|
log("Starting")
|
||||||
if restart: log("Restarting")
|
|
||||||
else: log("Starting")
|
|
||||||
atexit.register(on_exit)
|
atexit.register(on_exit)
|
||||||
|
|
||||||
ilist=[]
|
ilist=[]
|
||||||
@@ -658,7 +653,7 @@ ilist.append(sock)
|
|||||||
serv=SocketServer.TCPServer((hbd_host,hbd_port),HtmlHandler)
|
serv=SocketServer.TCPServer((hbd_host,hbd_port),HtmlHandler)
|
||||||
ilist.append(serv.fileno())
|
ilist.append(serv.fileno())
|
||||||
|
|
||||||
if not forground and not restart:
|
if not forground:
|
||||||
pid=os.fork()
|
pid=os.fork()
|
||||||
if pid > 0:
|
if pid > 0:
|
||||||
if verbose: print "daemoinizing... pid=%d" % pid
|
if verbose: print "daemoinizing... pid=%d" % pid
|
||||||
@@ -680,7 +675,7 @@ sig=0
|
|||||||
signal.signal(signal.SIGTERM, handler)
|
signal.signal(signal.SIGTERM, handler)
|
||||||
signal.signal(signal.SIGHUP, handler)
|
signal.signal(signal.SIGHUP, handler)
|
||||||
|
|
||||||
next=int(now)+1
|
next=int(now)+15 # 15 seconds time to settle after (re-)start
|
||||||
sleep=next - now
|
sleep=next - now
|
||||||
while up:
|
while up:
|
||||||
if visual:
|
if visual:
|
||||||
|
|||||||
Reference in New Issue
Block a user