major changes to both hbc and hbd

This commit is contained in:
andreas
2005-07-19 20:31:05 +00:00
parent b818ffc4c8
commit 203f92d61f
2 changed files with 204 additions and 49 deletions
+48 -14
View File
@@ -1,9 +1,13 @@
#!/usr/bin/env python
# $Id: hbc,v 1.2 2005/07/15 14:25:06 andreas Exp $
# $Id: hbc,v 1.3 2005/07/19 20:31:05 andreas Exp $
import sys, time, socket, os, signal, getopt, string
PORT=50003
INTERVAL=20
INTERVAL=10
class NullDevice:
def write(self, s):
pass
def handler(signum, frame):
@@ -12,9 +16,10 @@ def handler(signum, frame):
return
sys.exit(0)
helpflag=0
verbose=0
daemon=0
msgonly=False
helpflag=False
verbose=False
daemon=False
optlist=[]
args=[]
msgboot=[]
@@ -24,7 +29,7 @@ configfile="%s/.hbrc" % home
try:
optlist, args = getopt.getopt(sys.argv[1:], 'bc:dhm:v')
except:
helpflag=1
helpflag=True
for o,a in optlist:
if o == '-b':
@@ -32,24 +37,37 @@ for o,a in optlist:
elif o == '-c':
configfile=a
elif o == '-d':
daemon=1
daemon=True
elif o == '-h':
helpflag=1
helpflag=True
elif o == '-m':
msgboot.append("service=%s" % "service")
a.replace(';',':')
msgboot.append("msg=%s" % a)
msgonly=True
elif o == '-v':
verbose+=1
verbose=True
if helpflag:
print "hbc HeartBeatClient"
print "usage: hbc [-bdhv] [-c configfile] [-m msg][host1 [..]]"
print
print " -b indicate machine boot"
print " -c configfile"
print " -d daemonize"
print " -h this help"
print " -m send a message"
print " -v verbose"
print
print """ config file can contain
hb_hosts=('host1', 'host2', ..._
hb_port=50003
interval=20
logfile=...
logfmt={|test|msg}
grace=SECONDS
reportstrict={True|False}
"""
sys.exit(1)
@@ -76,6 +94,8 @@ if f:
r=l[:-1].split('=')
if r[0] == 'hb_hosts':
hb_hosts=eval(r[1])
if verbose:
print "notice: cfg hb_hosts: %s" % hb_hosts
elif r[0] == 'interval':
interval=eval(r[1])
elif r[0] == 'hb_port':
@@ -99,7 +119,7 @@ if verbose:
print "notice: interval: %s" % interval
print "notice: iam: %s" % iam
if daemon:
if not msgonly:
msgboot.append("interval=%s" % interval)
if len(msgboot) > 0:
@@ -118,16 +138,28 @@ if len(msgboot) > 0:
else:
break
if not daemon:
if msgonly:
sys.exit(0)
pid=os.fork()
if pid > 0:
if daemon:
pid=os.fork()
if pid > 0:
if verbose:
print "daemoinizing... pid=%d" % pid
sys.exit(0)
os.close(0)
os.close(1)
os.close(2)
sys.stdin.close()
sys.stdout = NullDevice()
sys.stderr = NullDevice()
os.chdir("/")
os.setsid()
os.umask(0)
msg="interval=%s;name=%s" % (interval, iam)
up=1
signal.signal(signal.SIGTERM, handler)
@@ -152,3 +184,5 @@ for hb_host in hb_hosts:
sock.sendto(msg, (hb_host, hb_port))
time.sleep(1)
sock.close()
+150 -29
View File
@@ -1,23 +1,23 @@
#!/usr/bin/env python
# $Id: hbd,v 1.1 2005/07/14 19:26:01 andreas Exp $
# $Id: hbd,v 1.2 2005/07/19 20:31:05 andreas Exp $
# Wait for heartbeat messages and act on them (or their absence)
#
import time, os, string, sys, socket, curses, atexit, select, SocketServer
import time, os, string, sys, socket, curses, atexit, select, SocketServer, getopt
LOGF="/home/andreas/public_html/messages/andreas"
LOGFILE="/home/andreas/public_html/messages/andreas"
hosts={}
num=0
upcount=0
PORT=50003
TPORT=50004
THOST="10.99.1.4"
THOST=""
DEBUG=False
verbose=False
INTERVAL=10
GRACE=10
GRACE=2
visual=0
@@ -29,11 +29,14 @@ msgw=None
msgwB=None
msgwHeight=10
class NullDevice:
def write(self, s):
pass
class Host:
up="up"
down="down"
overdue="overdue"
OVERDUE="OVERDUE"
def __init__(self, name, addr):
global num
@@ -46,12 +49,13 @@ class Host:
self.uppercent="n/a"
self.state="up"
self.statetime=self.lastbeat
self.interval=INTERVAL
self.interval=0
num+=1
def getstate(self):
return self.state
# set new state, return number of secs in previous state
def newstate(self, state):
self.state=state
now=time.time()
@@ -109,9 +113,8 @@ def on_exit():
print "exit"
def initlog():
global logf
logf=open(LOGF,"a")
def initlog(logfile):
return open(logfile,"a")
#
#
#
@@ -135,7 +138,7 @@ def initwin():
msgw.setscrreg(0, msgwHeight-1)
msgw.scrollok(1)
stdscr.addstr(0,0, "WatchArnsberg Version 1.0", curses.A_BOLD)
stdscr.addstr(0,0, "hbd Version 1.0", curses.A_BOLD)
stdscr.refresh()
msgwB.refresh()
#
@@ -146,11 +149,13 @@ def checkoverdue():
for h in hosts:
if hosts[h].state == Host.down:
continue
if hosts[h].state == Host.up and now-hosts[h].lastbeat > hosts[h].interval+GRACE:
if reportstrict:
timeout=hosts[h].interval+grace
else:
timeout=hosts[h].interval*5+grace
if hosts[h].state == Host.up and now-hosts[h].lastbeat > timeout:
if reportstrict: log("%s is overdue" % h)
hosts[h].newstate(Host.overdue)
elif hosts[h].state == Host.overdue and now-hosts[h].lastbeat > hosts[h].interval*5+GRACE:
log("%s is overdue" % h)
hosts[h].newstate(Host.OVERDUE)
#
#
@@ -165,7 +170,7 @@ def displaytime():
attr=0
if verbose and hosts[h].state != Host.down:
d=dur(now-hosts[h].lastbeat)
if hosts[h].state == Host.OVERDUE:
if hosts[h].state == Host.overdue:
attr=curses.A_BOLD
win.addstr(hosts[h].num+1, 25, "%8s" % d, attr)
win.addstr(hosts[h].num+1, 53, "%3s" % hosts[h].uppercent )
@@ -223,7 +228,10 @@ def display():
def log(m, service="heartbeat"):
msg=time.strftime("%b %d %H:%M:%S")+": "+m+"\n"
msgs.append(msg)
if logfmt == "msg":
m2="%d|%s|%s\n" % (now, service, m)
else:
m2=msg
logf.write(m2)
logf.flush()
if msgw != None:
@@ -236,17 +244,16 @@ def log(m, service="heartbeat"):
#
#
def fromaddr(name, addr, boot):
def fromaddr(name, addr, boot, interval):
global htab
if not htab.has_key(addr):
addhost(name, addr)
host=hosts[htab[addr]]
host.lastbeat=now
if host.getstate() != Host.up:
if host.getstate() != Host.up and interval > 0:
lasts=host.state
d=host.newstate(Host.up)
if lasts != 'overdue':
log("%s, back after being %s for %s" % (host.name, lasts, dur(d)))
host.upcount+=1
@@ -261,7 +268,7 @@ def readsock():
shutdown=0
name="unknown"
msg=None
interval=INTERVAL
interval=0
for pair in pairs:
l=string.split(pair,"=")
key=l[0]
@@ -285,10 +292,11 @@ def readsock():
log("%s booted" % name)
if msg:
log("%s msg: %s" % (name, msg),service=service)
fromaddr(name, addr[0], boot)
fromaddr(name, addr[0], boot, interval)
if shutdown:
log("%s shutdown" % name)
hosts[name].newstate(Host.down)
if interval > 0:
hosts[name].interval=interval
@@ -300,7 +308,7 @@ def updatestats():
upcount+=1
for h in hosts:
if upcount > 0:
hosts[h].uppercent="%3.0f" % ((hosts[h].upcount*hosts[h].interval*100.0)/(upcount*INTERVAL))
hosts[h].uppercent="%3.0f" % ((hosts[h].upcount*hosts[h].interval*100.0)/(upcount*interval))
#
#
#
@@ -339,7 +347,7 @@ class HtmlHandler(SocketServer.BaseRequestHandler):
cause="OK"
self.request.send("HTTP/1.0 %s %s\r\n" % (code, cause))
self.request.send("Date: %s\r\n" % time.strftime("%a, %d %b %Y %H:%M:%S GMT",time.gmtime(now)))
self.request.send("Server: WatchArnsberg\r\n")
self.request.send("Server: hbd\r\n")
self.request.send("Last-Modified: %s\r\n" % time.strftime("%a, %d %b %Y %H:%M:%S GMT",time.gmtime(now)))
self.request.send("Accept-Ranges: bytes\r\n")
self.request.send("Connection: close\r\n")
@@ -353,7 +361,7 @@ class HtmlHandler(SocketServer.BaseRequestHandler):
res.append('<h1>%s</h1>' % (cause))
res.append('<p>The requested URL %s was not found on this server.</p>' % uri)
res.append('<hr>')
res.append('<address>WatchArnsberg (Unix) Server at somewhere.planix.com Port %d</address>' % TPORT)
res.append('<address>hbd (Unix) Server at %s Port %d</address>' % (hbd_host, hbd_port))
res.append('</body></html>')
else:
@@ -383,11 +391,106 @@ class HtmlHandler(SocketServer.BaseRequestHandler):
# Main
#
helpflag=False
forground=False
optlist=[]
args=[]
home=os.environ['HOME']
configfile="%s/.hbrc" % home
try:
optlist, args = getopt.getopt(sys.argv[1:], 'c:dfhv')
except:
helpflag=True
initlog()
for o,a in optlist:
if o == '-c':
configfile=a
if o == '-d':
visual=True
elif o == '-f':
forground=True
elif o == '-h':
helpflag=True
elif o == '-v':
verbose=True
if helpflag:
print "hbc HeartBeatDaemon"
print "usage: hbd [-dfhv] [-c configfile]"
print
print " -c configfile"
print " -d display"
print " -f run in foreground"
print " -h this help"
print " -v verbose"
print
print """ config file can contain
logfile=/var/log/heartbeat.log
logfmt=[text|msg]
hb_port=50003
interval=20
hbd_port=50004
hbd_host=www.domain.com
grace=1
"""
sys.exit(1)
if visual:
forground=True
#
# set defaults
hb_port=PORT
hbd_host=THOST
hbd_port=TPORT
logfile=LOGFILE
logfmt="text"
interval=INTERVAL
grace=GRACE
reportstrict=False
try:
f=open(configfile,"r")
if verbose: print "notice: using config file %s" % configfile
except:
print "warning: running without conifig file: %s" % configfile
f=None
if f:
while 1:
l=f.readline()
if len(l) == 0:
break
r=l[:-1].split('=')
if r[0] == 'interval':
interval=eval(r[1])
elif r[0] == 'grace':
grace=eval(r[1])
elif r[0] == 'hbd_port':
hbd_port=eval(r[1])
elif r[0] == 'hbd_host':
hbd_host=r[1]
elif r[0] == 'hb_port':
hb_port=eval(r[1])
elif r[0] == 'logfile':
logfile=r[1]
elif r[0] == 'logfmt':
logfmt=r[1]
elif r[0] == 'reportstrict':
reportstrict=r[1]
f.close()
if len(args) != 0:
print "error: args"
sys.exit(1)
logf=initlog(logfile)
now=time.time()
startsec=int(now) % INTERVAL
startsec=int(now) % interval
htab={}
if visual:
@@ -401,12 +504,30 @@ if DEBUG: log("Starting")
ilist=[]
sock=socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sock.bind(("",PORT))
sock.bind(("",hb_port))
ilist.append(sock)
serv=SocketServer.TCPServer((THOST,TPORT),HtmlHandler)
serv=SocketServer.TCPServer((hbd_host,hbd_port),HtmlHandler)
ilist.append(serv.fileno())
if not forground:
pid=os.fork()
if pid > 0:
if verbose:
print "daemoinizing... pid=%d" % pid
sys.exit(0)
verbose=False
os.close(0)
os.close(1)
os.close(2)
sys.stdin.close()
sys.stdout = NullDevice()
sys.stderr = NullDevice()
os.chdir("/")
os.setsid()
os.umask(0)
next=int(now)+1
sleep=next - now
while 1:
@@ -441,7 +562,7 @@ while 1:
serv.handle_request()
if now >= next:
next=now+1
if int(now) % INTERVAL == startsec:
if int(now) % interval == startsec:
updatestats()
checkoverdue()
if visual: