From e65d9df0617b65b9c14eb9c4635204c46c86f987 Mon Sep 17 00:00:00 2001 From: andreas Date: Sat, 22 Sep 2012 19:17:53 +0000 Subject: [PATCH] always pickle current state when a log msg is set drop strickt reporting - it's useless --- hbd | 47 +++++++++++++++++++++-------------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/hbd b/hbd index 6997971..2e53366 100755 --- a/hbd +++ b/hbd @@ -1,5 +1,5 @@ #!/usr/bin/env python -# $Id: hbd,v 1.33 2012/09/22 17:51:45 andreas Exp $ +# $Id: hbd,v 1.34 2012/09/22 19:17:53 andreas Exp $ # Wait for heartbeat messages and act on them (or their absence) # import time, os, string, sys, socket, atexit, select, SocketServer, getopt, signal, cPickle, smtplib, traceback @@ -7,7 +7,7 @@ import time, os, string, sys, socket, atexit, select, SocketServer, getopt, sign False=0 True=1 LOGFILE="/home/andreas/public_html/messages/andreas" -PICKFILE="/tmp/hbd.pick" +PICKFILE="/var/tmp/hbd.pick" AEMAIL=["andreas@wrede.ca"] NAME="heatbeat" SMTPSERVER="localhost" @@ -208,22 +208,17 @@ def checkoverdue(): for h in hosts.keys(): if hosts[h].state == Host.down: continue - if reportstrict: - gr=grace - else: - gr=5*grace - timeout=hosts[h].interval+gr + timeout=hosts[h].interval+grace if hosts[h].state == Host.up and now-hosts[h].lastbeat > timeout: m="%s is overdue" % h - log(m) if h in watchhosts: email("overdue", m) - hosts[h].newstate(Host.overdue, gr) + hosts[h].newstate(Host.overdue, grace) + log(m) # # # -# def displaytime(): maxY,maxX=stdscr.getmaxyx() stdscr.addstr(0,maxX-8, time.strftime("%H:%M:%S", time.localtime(now)), curses.A_BOLD) @@ -303,6 +298,7 @@ def log(m, service="heartbeat"): msgw.addstr(msg) msgw.clrtoeol() msgw.refresh() + pickleit() # # @@ -493,6 +489,10 @@ class HtmlHandler(SocketServer.BaseRequestHandler): def saveandrestart(): sock.close() serv.socket.close() + log("restarting") + os.execv(sys.argv[0],[sys.argv[0]]+cmdargs) + +def pickleit(): pickf=open(PICKFILE, 'w') pick=cPickle.Pickler(pickf) pick.dump(hosts) @@ -500,7 +500,6 @@ def saveandrestart(): pick.dump(msgs) pickf.close() - os.execv(sys.argv[0],[sys.argv[0]]+cmdargs) # # Main @@ -508,7 +507,6 @@ def saveandrestart(): helpflag=False forground=False -restart=None optlist=[] args=[] home=os.environ['HOME'] @@ -554,7 +552,7 @@ hb_port=50003 interval=20 hbd_port=50004 hbd_host=www.domain.com -grace=1 +grace=2 """ sys.exit(1) @@ -571,7 +569,6 @@ logfile=LOGFILE logfmt="text" interval=INTERVAL grace=GRACE -reportstrict=False watchhosts=[] drophosts=[] @@ -603,8 +600,6 @@ if f: logfile=r[1] elif r[0] == 'logfmt': logfmt=r[1] - elif r[0] == 'reportstrict': - reportstrict=r[1] in ["True","true","TRUE","1"] elif r[0] == 'watchhosts': watchhosts=eval(r[1]) elif r[0] == 'drophosts': @@ -622,11 +617,13 @@ logf=initlog(logfile) if os.path.exists(PICKFILE): pickf=open(PICKFILE, 'r') pick=cPickle.Unpickler(pickf) - hosts=pick.load() - htab=pick.load() - msgs=pick.load() - pickf.close() -# os.unlink(PICKFILE) + try: + hosts=pick.load() + htab=pick.load() + msgs=pick.load() + pickf.close() + except: + os.unlink(PICKFILE) for h in drophosts: if hosts.has_key(h): del hosts[h] @@ -641,9 +638,7 @@ if visual: display() stdscr.nodelay(1) -if verbose: - if restart: log("Restarting") - else: log("Starting") +log("Starting") atexit.register(on_exit) ilist=[] @@ -658,7 +653,7 @@ ilist.append(sock) serv=SocketServer.TCPServer((hbd_host,hbd_port),HtmlHandler) ilist.append(serv.fileno()) -if not forground and not restart: +if not forground: pid=os.fork() if pid > 0: if verbose: print "daemoinizing... pid=%d" % pid @@ -680,7 +675,7 @@ sig=0 signal.signal(signal.SIGTERM, handler) signal.signal(signal.SIGHUP, handler) -next=int(now)+1 +next=int(now)+15 # 15 seconds time to settle after (re-)start sleep=next - now while up: if visual: