major changes to both hbc and hbd

This commit is contained in:
andreas
2005-07-19 20:31:05 +00:00
parent b818ffc4c8
commit 203f92d61f
2 changed files with 204 additions and 49 deletions
+51 -17
View File
@@ -1,9 +1,13 @@
#!/usr/bin/env python #!/usr/bin/env python
# $Id: hbc,v 1.2 2005/07/15 14:25:06 andreas Exp $ # $Id: hbc,v 1.3 2005/07/19 20:31:05 andreas Exp $
import sys, time, socket, os, signal, getopt, string import sys, time, socket, os, signal, getopt, string
PORT=50003 PORT=50003
INTERVAL=20 INTERVAL=10
class NullDevice:
def write(self, s):
pass
def handler(signum, frame): def handler(signum, frame):
@@ -12,9 +16,10 @@ def handler(signum, frame):
return return
sys.exit(0) sys.exit(0)
helpflag=0 msgonly=False
verbose=0 helpflag=False
daemon=0 verbose=False
daemon=False
optlist=[] optlist=[]
args=[] args=[]
msgboot=[] msgboot=[]
@@ -24,7 +29,7 @@ configfile="%s/.hbrc" % home
try: try:
optlist, args = getopt.getopt(sys.argv[1:], 'bc:dhm:v') optlist, args = getopt.getopt(sys.argv[1:], 'bc:dhm:v')
except: except:
helpflag=1 helpflag=True
for o,a in optlist: for o,a in optlist:
if o == '-b': if o == '-b':
@@ -32,24 +37,37 @@ for o,a in optlist:
elif o == '-c': elif o == '-c':
configfile=a configfile=a
elif o == '-d': elif o == '-d':
daemon=1 daemon=True
elif o == '-h': elif o == '-h':
helpflag=1 helpflag=True
elif o == '-m': elif o == '-m':
msgboot.append("service=%s" % "service") msgboot.append("service=%s" % "service")
a.replace(';',':')
msgboot.append("msg=%s" % a) msgboot.append("msg=%s" % a)
msgonly=True
elif o == '-v': elif o == '-v':
verbose+=1 verbose=True
if helpflag: if helpflag:
print "hbc HeartBeatClient" print "hbc HeartBeatClient"
print "usage: hbc [-bdhv] [-c configfile] [-m msg][host1 [..]]" print "usage: hbc [-bdhv] [-c configfile] [-m msg][host1 [..]]"
print print
print " -b indicate machine boot"
print " -c configfile"
print " -d daemonize"
print " -h this help"
print " -m send a message"
print " -v verbose"
print
print """ config file can contain print """ config file can contain
hb_hosts=('host1', 'host2', ..._ hb_hosts=('host1', 'host2', ..._
hb_port=50003 hb_port=50003
interval=20 interval=20
logfile=...
logfmt={|test|msg}
grace=SECONDS
reportstrict={True|False}
""" """
sys.exit(1) sys.exit(1)
@@ -76,6 +94,8 @@ if f:
r=l[:-1].split('=') r=l[:-1].split('=')
if r[0] == 'hb_hosts': if r[0] == 'hb_hosts':
hb_hosts=eval(r[1]) hb_hosts=eval(r[1])
if verbose:
print "notice: cfg hb_hosts: %s" % hb_hosts
elif r[0] == 'interval': elif r[0] == 'interval':
interval=eval(r[1]) interval=eval(r[1])
elif r[0] == 'hb_port': elif r[0] == 'hb_port':
@@ -99,7 +119,7 @@ if verbose:
print "notice: interval: %s" % interval print "notice: interval: %s" % interval
print "notice: iam: %s" % iam print "notice: iam: %s" % iam
if daemon: if not msgonly:
msgboot.append("interval=%s" % interval) msgboot.append("interval=%s" % interval)
if len(msgboot) > 0: if len(msgboot) > 0:
@@ -118,15 +138,27 @@ if len(msgboot) > 0:
else: else:
break break
if msgonly:
sys.exit(0)
if not daemon: if daemon:
sys.exit(0) pid=os.fork()
if pid > 0:
if verbose:
print "daemoinizing... pid=%d" % pid
sys.exit(0)
os.close(0)
os.close(1)
os.close(2)
sys.stdin.close()
sys.stdout = NullDevice()
sys.stderr = NullDevice()
os.chdir("/")
os.setsid()
os.umask(0)
pid=os.fork()
if pid > 0:
if verbose:
print "daemoinizing... pid=%d" % pid
sys.exit(0)
msg="interval=%s;name=%s" % (interval, iam) msg="interval=%s;name=%s" % (interval, iam)
up=1 up=1
@@ -152,3 +184,5 @@ for hb_host in hb_hosts:
sock.sendto(msg, (hb_host, hb_port)) sock.sendto(msg, (hb_host, hb_port))
time.sleep(1) time.sleep(1)
sock.close() sock.close()
+153 -32
View File
@@ -1,23 +1,23 @@
#!/usr/bin/env python #!/usr/bin/env python
# $Id: hbd,v 1.1 2005/07/14 19:26:01 andreas Exp $ # $Id: hbd,v 1.2 2005/07/19 20:31:05 andreas Exp $
# Wait for heartbeat messages and act on them (or their absence) # Wait for heartbeat messages and act on them (or their absence)
# #
import time, os, string, sys, socket, curses, atexit, select, SocketServer import time, os, string, sys, socket, curses, atexit, select, SocketServer, getopt
LOGF="/home/andreas/public_html/messages/andreas" LOGFILE="/home/andreas/public_html/messages/andreas"
hosts={} hosts={}
num=0 num=0
upcount=0 upcount=0
PORT=50003 PORT=50003
TPORT=50004 TPORT=50004
THOST="10.99.1.4" THOST=""
DEBUG=False DEBUG=False
verbose=False verbose=False
INTERVAL=10 INTERVAL=10
GRACE=10 GRACE=2
visual=0 visual=0
@@ -29,11 +29,14 @@ msgw=None
msgwB=None msgwB=None
msgwHeight=10 msgwHeight=10
class NullDevice:
def write(self, s):
pass
class Host: class Host:
up="up" up="up"
down="down" down="down"
overdue="overdue" overdue="overdue"
OVERDUE="OVERDUE"
def __init__(self, name, addr): def __init__(self, name, addr):
global num global num
@@ -46,12 +49,13 @@ class Host:
self.uppercent="n/a" self.uppercent="n/a"
self.state="up" self.state="up"
self.statetime=self.lastbeat self.statetime=self.lastbeat
self.interval=INTERVAL self.interval=0
num+=1 num+=1
def getstate(self): def getstate(self):
return self.state return self.state
# set new state, return number of secs in previous state
def newstate(self, state): def newstate(self, state):
self.state=state self.state=state
now=time.time() now=time.time()
@@ -109,9 +113,8 @@ def on_exit():
print "exit" print "exit"
def initlog(): def initlog(logfile):
global logf return open(logfile,"a")
logf=open(LOGF,"a")
# #
# #
# #
@@ -135,7 +138,7 @@ def initwin():
msgw.setscrreg(0, msgwHeight-1) msgw.setscrreg(0, msgwHeight-1)
msgw.scrollok(1) msgw.scrollok(1)
stdscr.addstr(0,0, "WatchArnsberg Version 1.0", curses.A_BOLD) stdscr.addstr(0,0, "hbd Version 1.0", curses.A_BOLD)
stdscr.refresh() stdscr.refresh()
msgwB.refresh() msgwB.refresh()
# #
@@ -146,11 +149,13 @@ def checkoverdue():
for h in hosts: for h in hosts:
if hosts[h].state == Host.down: if hosts[h].state == Host.down:
continue continue
if hosts[h].state == Host.up and now-hosts[h].lastbeat > hosts[h].interval+GRACE: if reportstrict:
timeout=hosts[h].interval+grace
else:
timeout=hosts[h].interval*5+grace
if hosts[h].state == Host.up and now-hosts[h].lastbeat > timeout:
if reportstrict: log("%s is overdue" % h)
hosts[h].newstate(Host.overdue) hosts[h].newstate(Host.overdue)
elif hosts[h].state == Host.overdue and now-hosts[h].lastbeat > hosts[h].interval*5+GRACE:
log("%s is overdue" % h)
hosts[h].newstate(Host.OVERDUE)
# #
# #
@@ -165,7 +170,7 @@ def displaytime():
attr=0 attr=0
if verbose and hosts[h].state != Host.down: if verbose and hosts[h].state != Host.down:
d=dur(now-hosts[h].lastbeat) d=dur(now-hosts[h].lastbeat)
if hosts[h].state == Host.OVERDUE: if hosts[h].state == Host.overdue:
attr=curses.A_BOLD attr=curses.A_BOLD
win.addstr(hosts[h].num+1, 25, "%8s" % d, attr) win.addstr(hosts[h].num+1, 25, "%8s" % d, attr)
win.addstr(hosts[h].num+1, 53, "%3s" % hosts[h].uppercent ) win.addstr(hosts[h].num+1, 53, "%3s" % hosts[h].uppercent )
@@ -223,7 +228,10 @@ def display():
def log(m, service="heartbeat"): def log(m, service="heartbeat"):
msg=time.strftime("%b %d %H:%M:%S")+": "+m+"\n" msg=time.strftime("%b %d %H:%M:%S")+": "+m+"\n"
msgs.append(msg) msgs.append(msg)
m2="%d|%s|%s\n" % (now, service, m) if logfmt == "msg":
m2="%d|%s|%s\n" % (now, service, m)
else:
m2=msg
logf.write(m2) logf.write(m2)
logf.flush() logf.flush()
if msgw != None: if msgw != None:
@@ -236,18 +244,17 @@ def log(m, service="heartbeat"):
# #
# #
def fromaddr(name, addr, boot): def fromaddr(name, addr, boot, interval):
global htab global htab
if not htab.has_key(addr): if not htab.has_key(addr):
addhost(name, addr) addhost(name, addr)
host=hosts[htab[addr]] host=hosts[htab[addr]]
host.lastbeat=now host.lastbeat=now
if host.getstate() != Host.up: if host.getstate() != Host.up and interval > 0:
lasts=host.state lasts=host.state
d=host.newstate(Host.up) d=host.newstate(Host.up)
if lasts != 'overdue': log("%s, back after being %s for %s" % (host.name, lasts, dur(d)))
log("%s, back after being %s for %s" % (host.name, lasts, dur(d)))
host.upcount+=1 host.upcount+=1
# #
@@ -261,7 +268,7 @@ def readsock():
shutdown=0 shutdown=0
name="unknown" name="unknown"
msg=None msg=None
interval=INTERVAL interval=0
for pair in pairs: for pair in pairs:
l=string.split(pair,"=") l=string.split(pair,"=")
key=l[0] key=l[0]
@@ -285,11 +292,12 @@ def readsock():
log("%s booted" % name) log("%s booted" % name)
if msg: if msg:
log("%s msg: %s" % (name, msg),service=service) log("%s msg: %s" % (name, msg),service=service)
fromaddr(name, addr[0], boot) fromaddr(name, addr[0], boot, interval)
if shutdown: if shutdown:
log("%s shutdown" % name) log("%s shutdown" % name)
hosts[name].newstate(Host.down) hosts[name].newstate(Host.down)
hosts[name].interval=interval if interval > 0:
hosts[name].interval=interval
# #
@@ -300,7 +308,7 @@ def updatestats():
upcount+=1 upcount+=1
for h in hosts: for h in hosts:
if upcount > 0: if upcount > 0:
hosts[h].uppercent="%3.0f" % ((hosts[h].upcount*hosts[h].interval*100.0)/(upcount*INTERVAL)) hosts[h].uppercent="%3.0f" % ((hosts[h].upcount*hosts[h].interval*100.0)/(upcount*interval))
# #
# #
# #
@@ -339,7 +347,7 @@ class HtmlHandler(SocketServer.BaseRequestHandler):
cause="OK" cause="OK"
self.request.send("HTTP/1.0 %s %s\r\n" % (code, cause)) self.request.send("HTTP/1.0 %s %s\r\n" % (code, cause))
self.request.send("Date: %s\r\n" % time.strftime("%a, %d %b %Y %H:%M:%S GMT",time.gmtime(now))) self.request.send("Date: %s\r\n" % time.strftime("%a, %d %b %Y %H:%M:%S GMT",time.gmtime(now)))
self.request.send("Server: WatchArnsberg\r\n") self.request.send("Server: hbd\r\n")
self.request.send("Last-Modified: %s\r\n" % time.strftime("%a, %d %b %Y %H:%M:%S GMT",time.gmtime(now))) self.request.send("Last-Modified: %s\r\n" % time.strftime("%a, %d %b %Y %H:%M:%S GMT",time.gmtime(now)))
self.request.send("Accept-Ranges: bytes\r\n") self.request.send("Accept-Ranges: bytes\r\n")
self.request.send("Connection: close\r\n") self.request.send("Connection: close\r\n")
@@ -353,7 +361,7 @@ class HtmlHandler(SocketServer.BaseRequestHandler):
res.append('<h1>%s</h1>' % (cause)) res.append('<h1>%s</h1>' % (cause))
res.append('<p>The requested URL %s was not found on this server.</p>' % uri) res.append('<p>The requested URL %s was not found on this server.</p>' % uri)
res.append('<hr>') res.append('<hr>')
res.append('<address>WatchArnsberg (Unix) Server at somewhere.planix.com Port %d</address>' % TPORT) res.append('<address>hbd (Unix) Server at %s Port %d</address>' % (hbd_host, hbd_port))
res.append('</body></html>') res.append('</body></html>')
else: else:
@@ -383,11 +391,106 @@ class HtmlHandler(SocketServer.BaseRequestHandler):
# Main # Main
# #
helpflag=False
forground=False
optlist=[]
args=[]
home=os.environ['HOME']
configfile="%s/.hbrc" % home
try:
optlist, args = getopt.getopt(sys.argv[1:], 'c:dfhv')
except:
helpflag=True
initlog() for o,a in optlist:
if o == '-c':
configfile=a
if o == '-d':
visual=True
elif o == '-f':
forground=True
elif o == '-h':
helpflag=True
elif o == '-v':
verbose=True
if helpflag:
print "hbc HeartBeatDaemon"
print "usage: hbd [-dfhv] [-c configfile]"
print
print " -c configfile"
print " -d display"
print " -f run in foreground"
print " -h this help"
print " -v verbose"
print
print """ config file can contain
logfile=/var/log/heartbeat.log
logfmt=[text|msg]
hb_port=50003
interval=20
hbd_port=50004
hbd_host=www.domain.com
grace=1
"""
sys.exit(1)
if visual:
forground=True
#
# set defaults
hb_port=PORT
hbd_host=THOST
hbd_port=TPORT
logfile=LOGFILE
logfmt="text"
interval=INTERVAL
grace=GRACE
reportstrict=False
try:
f=open(configfile,"r")
if verbose: print "notice: using config file %s" % configfile
except:
print "warning: running without conifig file: %s" % configfile
f=None
if f:
while 1:
l=f.readline()
if len(l) == 0:
break
r=l[:-1].split('=')
if r[0] == 'interval':
interval=eval(r[1])
elif r[0] == 'grace':
grace=eval(r[1])
elif r[0] == 'hbd_port':
hbd_port=eval(r[1])
elif r[0] == 'hbd_host':
hbd_host=r[1]
elif r[0] == 'hb_port':
hb_port=eval(r[1])
elif r[0] == 'logfile':
logfile=r[1]
elif r[0] == 'logfmt':
logfmt=r[1]
elif r[0] == 'reportstrict':
reportstrict=r[1]
f.close()
if len(args) != 0:
print "error: args"
sys.exit(1)
logf=initlog(logfile)
now=time.time() now=time.time()
startsec=int(now) % INTERVAL startsec=int(now) % interval
htab={} htab={}
if visual: if visual:
@@ -401,12 +504,30 @@ if DEBUG: log("Starting")
ilist=[] ilist=[]
sock=socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sock=socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sock.bind(("",PORT)) sock.bind(("",hb_port))
ilist.append(sock) ilist.append(sock)
serv=SocketServer.TCPServer((THOST,TPORT),HtmlHandler) serv=SocketServer.TCPServer((hbd_host,hbd_port),HtmlHandler)
ilist.append(serv.fileno()) ilist.append(serv.fileno())
if not forground:
pid=os.fork()
if pid > 0:
if verbose:
print "daemoinizing... pid=%d" % pid
sys.exit(0)
verbose=False
os.close(0)
os.close(1)
os.close(2)
sys.stdin.close()
sys.stdout = NullDevice()
sys.stderr = NullDevice()
os.chdir("/")
os.setsid()
os.umask(0)
next=int(now)+1 next=int(now)+1
sleep=next - now sleep=next - now
while 1: while 1:
@@ -441,7 +562,7 @@ while 1:
serv.handle_request() serv.handle_request()
if now >= next: if now >= next:
next=now+1 next=now+1
if int(now) % INTERVAL == startsec: if int(now) % interval == startsec:
updatestats() updatestats()
checkoverdue() checkoverdue()
if visual: if visual: