always pickle current state when a log msg is set

drop strickt reporting - it's useless
This commit is contained in:
andreas
2012-09-22 19:17:53 +00:00
parent 1df065cf9b
commit e65d9df061
+17 -22
View File
@@ -1,5 +1,5 @@
#!/usr/bin/env python
# $Id: hbd,v 1.33 2012/09/22 17:51:45 andreas Exp $
# $Id: hbd,v 1.34 2012/09/22 19:17:53 andreas Exp $
# Wait for heartbeat messages and act on them (or their absence)
#
import time, os, string, sys, socket, atexit, select, SocketServer, getopt, signal, cPickle, smtplib, traceback
@@ -7,7 +7,7 @@ import time, os, string, sys, socket, atexit, select, SocketServer, getopt, sign
False=0
True=1
LOGFILE="/home/andreas/public_html/messages/andreas"
PICKFILE="/tmp/hbd.pick"
PICKFILE="/var/tmp/hbd.pick"
AEMAIL=["andreas@wrede.ca"]
NAME="heatbeat"
SMTPSERVER="localhost"
@@ -208,22 +208,17 @@ def checkoverdue():
for h in hosts.keys():
if hosts[h].state == Host.down:
continue
if reportstrict:
gr=grace
else:
gr=5*grace
timeout=hosts[h].interval+gr
timeout=hosts[h].interval+grace
if hosts[h].state == Host.up and now-hosts[h].lastbeat > timeout:
m="%s is overdue" % h
log(m)
if h in watchhosts:
email("overdue", m)
hosts[h].newstate(Host.overdue, gr)
hosts[h].newstate(Host.overdue, grace)
log(m)
#
#
#
#
def displaytime():
maxY,maxX=stdscr.getmaxyx()
stdscr.addstr(0,maxX-8, time.strftime("%H:%M:%S", time.localtime(now)), curses.A_BOLD)
@@ -303,6 +298,7 @@ def log(m, service="heartbeat"):
msgw.addstr(msg)
msgw.clrtoeol()
msgw.refresh()
pickleit()
#
#
@@ -493,6 +489,10 @@ class HtmlHandler(SocketServer.BaseRequestHandler):
def saveandrestart():
sock.close()
serv.socket.close()
log("restarting")
os.execv(sys.argv[0],[sys.argv[0]]+cmdargs)
def pickleit():
pickf=open(PICKFILE, 'w')
pick=cPickle.Pickler(pickf)
pick.dump(hosts)
@@ -500,7 +500,6 @@ def saveandrestart():
pick.dump(msgs)
pickf.close()
os.execv(sys.argv[0],[sys.argv[0]]+cmdargs)
#
# Main
@@ -508,7 +507,6 @@ def saveandrestart():
helpflag=False
forground=False
restart=None
optlist=[]
args=[]
home=os.environ['HOME']
@@ -554,7 +552,7 @@ hb_port=50003
interval=20
hbd_port=50004
hbd_host=www.domain.com
grace=1
grace=2
"""
sys.exit(1)
@@ -571,7 +569,6 @@ logfile=LOGFILE
logfmt="text"
interval=INTERVAL
grace=GRACE
reportstrict=False
watchhosts=[]
drophosts=[]
@@ -603,8 +600,6 @@ if f:
logfile=r[1]
elif r[0] == 'logfmt':
logfmt=r[1]
elif r[0] == 'reportstrict':
reportstrict=r[1] in ["True","true","TRUE","1"]
elif r[0] == 'watchhosts':
watchhosts=eval(r[1])
elif r[0] == 'drophosts':
@@ -622,11 +617,13 @@ logf=initlog(logfile)
if os.path.exists(PICKFILE):
pickf=open(PICKFILE, 'r')
pick=cPickle.Unpickler(pickf)
try:
hosts=pick.load()
htab=pick.load()
msgs=pick.load()
pickf.close()
# os.unlink(PICKFILE)
except:
os.unlink(PICKFILE)
for h in drophosts:
if hosts.has_key(h):
del hosts[h]
@@ -641,9 +638,7 @@ if visual:
display()
stdscr.nodelay(1)
if verbose:
if restart: log("Restarting")
else: log("Starting")
log("Starting")
atexit.register(on_exit)
ilist=[]
@@ -658,7 +653,7 @@ ilist.append(sock)
serv=SocketServer.TCPServer((hbd_host,hbd_port),HtmlHandler)
ilist.append(serv.fileno())
if not forground and not restart:
if not forground:
pid=os.fork()
if pid > 0:
if verbose: print "daemoinizing... pid=%d" % pid
@@ -680,7 +675,7 @@ sig=0
signal.signal(signal.SIGTERM, handler)
signal.signal(signal.SIGHUP, handler)
next=int(now)+1
next=int(now)+15 # 15 seconds time to settle after (re-)start
sleep=next - now
while up:
if visual: