major changes to both hbc and hbd
This commit is contained in:
@@ -1,9 +1,13 @@
|
||||
#!/usr/bin/env python
|
||||
# $Id: hbc,v 1.2 2005/07/15 14:25:06 andreas Exp $
|
||||
# $Id: hbc,v 1.3 2005/07/19 20:31:05 andreas Exp $
|
||||
import sys, time, socket, os, signal, getopt, string
|
||||
|
||||
PORT=50003
|
||||
INTERVAL=20
|
||||
INTERVAL=10
|
||||
|
||||
class NullDevice:
|
||||
def write(self, s):
|
||||
pass
|
||||
|
||||
|
||||
def handler(signum, frame):
|
||||
@@ -12,9 +16,10 @@ def handler(signum, frame):
|
||||
return
|
||||
sys.exit(0)
|
||||
|
||||
helpflag=0
|
||||
verbose=0
|
||||
daemon=0
|
||||
msgonly=False
|
||||
helpflag=False
|
||||
verbose=False
|
||||
daemon=False
|
||||
optlist=[]
|
||||
args=[]
|
||||
msgboot=[]
|
||||
@@ -24,7 +29,7 @@ configfile="%s/.hbrc" % home
|
||||
try:
|
||||
optlist, args = getopt.getopt(sys.argv[1:], 'bc:dhm:v')
|
||||
except:
|
||||
helpflag=1
|
||||
helpflag=True
|
||||
|
||||
for o,a in optlist:
|
||||
if o == '-b':
|
||||
@@ -32,24 +37,37 @@ for o,a in optlist:
|
||||
elif o == '-c':
|
||||
configfile=a
|
||||
elif o == '-d':
|
||||
daemon=1
|
||||
daemon=True
|
||||
elif o == '-h':
|
||||
helpflag=1
|
||||
helpflag=True
|
||||
elif o == '-m':
|
||||
msgboot.append("service=%s" % "service")
|
||||
a.replace(';',':')
|
||||
msgboot.append("msg=%s" % a)
|
||||
msgonly=True
|
||||
elif o == '-v':
|
||||
verbose+=1
|
||||
verbose=True
|
||||
|
||||
|
||||
if helpflag:
|
||||
print "hbc HeartBeatClient"
|
||||
print "usage: hbc [-bdhv] [-c configfile] [-m msg][host1 [..]]"
|
||||
print
|
||||
print " -b indicate machine boot"
|
||||
print " -c configfile"
|
||||
print " -d daemonize"
|
||||
print " -h this help"
|
||||
print " -m send a message"
|
||||
print " -v verbose"
|
||||
print
|
||||
print """ config file can contain
|
||||
hb_hosts=('host1', 'host2', ..._
|
||||
hb_port=50003
|
||||
interval=20
|
||||
logfile=...
|
||||
logfmt={|test|msg}
|
||||
grace=SECONDS
|
||||
reportstrict={True|False}
|
||||
"""
|
||||
|
||||
sys.exit(1)
|
||||
@@ -76,6 +94,8 @@ if f:
|
||||
r=l[:-1].split('=')
|
||||
if r[0] == 'hb_hosts':
|
||||
hb_hosts=eval(r[1])
|
||||
if verbose:
|
||||
print "notice: cfg hb_hosts: %s" % hb_hosts
|
||||
elif r[0] == 'interval':
|
||||
interval=eval(r[1])
|
||||
elif r[0] == 'hb_port':
|
||||
@@ -99,7 +119,7 @@ if verbose:
|
||||
print "notice: interval: %s" % interval
|
||||
print "notice: iam: %s" % iam
|
||||
|
||||
if daemon:
|
||||
if not msgonly:
|
||||
msgboot.append("interval=%s" % interval)
|
||||
|
||||
if len(msgboot) > 0:
|
||||
@@ -118,16 +138,28 @@ if len(msgboot) > 0:
|
||||
else:
|
||||
break
|
||||
|
||||
|
||||
if not daemon:
|
||||
if msgonly:
|
||||
sys.exit(0)
|
||||
|
||||
if daemon:
|
||||
pid=os.fork()
|
||||
if pid > 0:
|
||||
if verbose:
|
||||
print "daemoinizing... pid=%d" % pid
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
os.close(0)
|
||||
os.close(1)
|
||||
os.close(2)
|
||||
sys.stdin.close()
|
||||
sys.stdout = NullDevice()
|
||||
sys.stderr = NullDevice()
|
||||
os.chdir("/")
|
||||
os.setsid()
|
||||
os.umask(0)
|
||||
|
||||
|
||||
msg="interval=%s;name=%s" % (interval, iam)
|
||||
up=1
|
||||
signal.signal(signal.SIGTERM, handler)
|
||||
@@ -152,3 +184,5 @@ for hb_host in hb_hosts:
|
||||
sock.sendto(msg, (hb_host, hb_port))
|
||||
time.sleep(1)
|
||||
sock.close()
|
||||
|
||||
|
||||
|
||||
@@ -1,23 +1,23 @@
|
||||
#!/usr/bin/env python
|
||||
# $Id: hbd,v 1.1 2005/07/14 19:26:01 andreas Exp $
|
||||
# $Id: hbd,v 1.2 2005/07/19 20:31:05 andreas Exp $
|
||||
# Wait for heartbeat messages and act on them (or their absence)
|
||||
#
|
||||
import time, os, string, sys, socket, curses, atexit, select, SocketServer
|
||||
import time, os, string, sys, socket, curses, atexit, select, SocketServer, getopt
|
||||
|
||||
|
||||
LOGF="/home/andreas/public_html/messages/andreas"
|
||||
LOGFILE="/home/andreas/public_html/messages/andreas"
|
||||
|
||||
hosts={}
|
||||
num=0
|
||||
upcount=0
|
||||
PORT=50003
|
||||
TPORT=50004
|
||||
THOST="10.99.1.4"
|
||||
THOST=""
|
||||
DEBUG=False
|
||||
verbose=False
|
||||
|
||||
INTERVAL=10
|
||||
GRACE=10
|
||||
GRACE=2
|
||||
|
||||
visual=0
|
||||
|
||||
@@ -29,11 +29,14 @@ msgw=None
|
||||
msgwB=None
|
||||
msgwHeight=10
|
||||
|
||||
class NullDevice:
|
||||
def write(self, s):
|
||||
pass
|
||||
|
||||
class Host:
|
||||
up="up"
|
||||
down="down"
|
||||
overdue="overdue"
|
||||
OVERDUE="OVERDUE"
|
||||
|
||||
def __init__(self, name, addr):
|
||||
global num
|
||||
@@ -46,12 +49,13 @@ class Host:
|
||||
self.uppercent="n/a"
|
||||
self.state="up"
|
||||
self.statetime=self.lastbeat
|
||||
self.interval=INTERVAL
|
||||
self.interval=0
|
||||
num+=1
|
||||
|
||||
def getstate(self):
|
||||
return self.state
|
||||
|
||||
# set new state, return number of secs in previous state
|
||||
def newstate(self, state):
|
||||
self.state=state
|
||||
now=time.time()
|
||||
@@ -109,9 +113,8 @@ def on_exit():
|
||||
print "exit"
|
||||
|
||||
|
||||
def initlog():
|
||||
global logf
|
||||
logf=open(LOGF,"a")
|
||||
def initlog(logfile):
|
||||
return open(logfile,"a")
|
||||
#
|
||||
#
|
||||
#
|
||||
@@ -135,7 +138,7 @@ def initwin():
|
||||
msgw.setscrreg(0, msgwHeight-1)
|
||||
msgw.scrollok(1)
|
||||
|
||||
stdscr.addstr(0,0, "WatchArnsberg Version 1.0", curses.A_BOLD)
|
||||
stdscr.addstr(0,0, "hbd Version 1.0", curses.A_BOLD)
|
||||
stdscr.refresh()
|
||||
msgwB.refresh()
|
||||
#
|
||||
@@ -146,11 +149,13 @@ def checkoverdue():
|
||||
for h in hosts:
|
||||
if hosts[h].state == Host.down:
|
||||
continue
|
||||
if hosts[h].state == Host.up and now-hosts[h].lastbeat > hosts[h].interval+GRACE:
|
||||
if reportstrict:
|
||||
timeout=hosts[h].interval+grace
|
||||
else:
|
||||
timeout=hosts[h].interval*5+grace
|
||||
if hosts[h].state == Host.up and now-hosts[h].lastbeat > timeout:
|
||||
if reportstrict: log("%s is overdue" % h)
|
||||
hosts[h].newstate(Host.overdue)
|
||||
elif hosts[h].state == Host.overdue and now-hosts[h].lastbeat > hosts[h].interval*5+GRACE:
|
||||
log("%s is overdue" % h)
|
||||
hosts[h].newstate(Host.OVERDUE)
|
||||
|
||||
#
|
||||
#
|
||||
@@ -165,7 +170,7 @@ def displaytime():
|
||||
attr=0
|
||||
if verbose and hosts[h].state != Host.down:
|
||||
d=dur(now-hosts[h].lastbeat)
|
||||
if hosts[h].state == Host.OVERDUE:
|
||||
if hosts[h].state == Host.overdue:
|
||||
attr=curses.A_BOLD
|
||||
win.addstr(hosts[h].num+1, 25, "%8s" % d, attr)
|
||||
win.addstr(hosts[h].num+1, 53, "%3s" % hosts[h].uppercent )
|
||||
@@ -223,7 +228,10 @@ def display():
|
||||
def log(m, service="heartbeat"):
|
||||
msg=time.strftime("%b %d %H:%M:%S")+": "+m+"\n"
|
||||
msgs.append(msg)
|
||||
if logfmt == "msg":
|
||||
m2="%d|%s|%s\n" % (now, service, m)
|
||||
else:
|
||||
m2=msg
|
||||
logf.write(m2)
|
||||
logf.flush()
|
||||
if msgw != None:
|
||||
@@ -236,17 +244,16 @@ def log(m, service="heartbeat"):
|
||||
|
||||
#
|
||||
#
|
||||
def fromaddr(name, addr, boot):
|
||||
def fromaddr(name, addr, boot, interval):
|
||||
global htab
|
||||
|
||||
if not htab.has_key(addr):
|
||||
addhost(name, addr)
|
||||
host=hosts[htab[addr]]
|
||||
host.lastbeat=now
|
||||
if host.getstate() != Host.up:
|
||||
if host.getstate() != Host.up and interval > 0:
|
||||
lasts=host.state
|
||||
d=host.newstate(Host.up)
|
||||
if lasts != 'overdue':
|
||||
log("%s, back after being %s for %s" % (host.name, lasts, dur(d)))
|
||||
host.upcount+=1
|
||||
|
||||
@@ -261,7 +268,7 @@ def readsock():
|
||||
shutdown=0
|
||||
name="unknown"
|
||||
msg=None
|
||||
interval=INTERVAL
|
||||
interval=0
|
||||
for pair in pairs:
|
||||
l=string.split(pair,"=")
|
||||
key=l[0]
|
||||
@@ -285,10 +292,11 @@ def readsock():
|
||||
log("%s booted" % name)
|
||||
if msg:
|
||||
log("%s msg: %s" % (name, msg),service=service)
|
||||
fromaddr(name, addr[0], boot)
|
||||
fromaddr(name, addr[0], boot, interval)
|
||||
if shutdown:
|
||||
log("%s shutdown" % name)
|
||||
hosts[name].newstate(Host.down)
|
||||
if interval > 0:
|
||||
hosts[name].interval=interval
|
||||
|
||||
|
||||
@@ -300,7 +308,7 @@ def updatestats():
|
||||
upcount+=1
|
||||
for h in hosts:
|
||||
if upcount > 0:
|
||||
hosts[h].uppercent="%3.0f" % ((hosts[h].upcount*hosts[h].interval*100.0)/(upcount*INTERVAL))
|
||||
hosts[h].uppercent="%3.0f" % ((hosts[h].upcount*hosts[h].interval*100.0)/(upcount*interval))
|
||||
#
|
||||
#
|
||||
#
|
||||
@@ -339,7 +347,7 @@ class HtmlHandler(SocketServer.BaseRequestHandler):
|
||||
cause="OK"
|
||||
self.request.send("HTTP/1.0 %s %s\r\n" % (code, cause))
|
||||
self.request.send("Date: %s\r\n" % time.strftime("%a, %d %b %Y %H:%M:%S GMT",time.gmtime(now)))
|
||||
self.request.send("Server: WatchArnsberg\r\n")
|
||||
self.request.send("Server: hbd\r\n")
|
||||
self.request.send("Last-Modified: %s\r\n" % time.strftime("%a, %d %b %Y %H:%M:%S GMT",time.gmtime(now)))
|
||||
self.request.send("Accept-Ranges: bytes\r\n")
|
||||
self.request.send("Connection: close\r\n")
|
||||
@@ -353,7 +361,7 @@ class HtmlHandler(SocketServer.BaseRequestHandler):
|
||||
res.append('<h1>%s</h1>' % (cause))
|
||||
res.append('<p>The requested URL %s was not found on this server.</p>' % uri)
|
||||
res.append('<hr>')
|
||||
res.append('<address>WatchArnsberg (Unix) Server at somewhere.planix.com Port %d</address>' % TPORT)
|
||||
res.append('<address>hbd (Unix) Server at %s Port %d</address>' % (hbd_host, hbd_port))
|
||||
res.append('</body></html>')
|
||||
|
||||
else:
|
||||
@@ -383,11 +391,106 @@ class HtmlHandler(SocketServer.BaseRequestHandler):
|
||||
# Main
|
||||
#
|
||||
|
||||
helpflag=False
|
||||
forground=False
|
||||
optlist=[]
|
||||
args=[]
|
||||
home=os.environ['HOME']
|
||||
configfile="%s/.hbrc" % home
|
||||
try:
|
||||
optlist, args = getopt.getopt(sys.argv[1:], 'c:dfhv')
|
||||
except:
|
||||
helpflag=True
|
||||
|
||||
initlog()
|
||||
for o,a in optlist:
|
||||
if o == '-c':
|
||||
configfile=a
|
||||
if o == '-d':
|
||||
visual=True
|
||||
elif o == '-f':
|
||||
forground=True
|
||||
elif o == '-h':
|
||||
helpflag=True
|
||||
elif o == '-v':
|
||||
verbose=True
|
||||
|
||||
|
||||
if helpflag:
|
||||
print "hbc HeartBeatDaemon"
|
||||
print "usage: hbd [-dfhv] [-c configfile]"
|
||||
print
|
||||
print " -c configfile"
|
||||
print " -d display"
|
||||
print " -f run in foreground"
|
||||
print " -h this help"
|
||||
print " -v verbose"
|
||||
print
|
||||
print """ config file can contain
|
||||
logfile=/var/log/heartbeat.log
|
||||
logfmt=[text|msg]
|
||||
hb_port=50003
|
||||
interval=20
|
||||
hbd_port=50004
|
||||
hbd_host=www.domain.com
|
||||
grace=1
|
||||
"""
|
||||
|
||||
sys.exit(1)
|
||||
|
||||
if visual:
|
||||
forground=True
|
||||
#
|
||||
# set defaults
|
||||
|
||||
hb_port=PORT
|
||||
hbd_host=THOST
|
||||
hbd_port=TPORT
|
||||
logfile=LOGFILE
|
||||
logfmt="text"
|
||||
interval=INTERVAL
|
||||
grace=GRACE
|
||||
reportstrict=False
|
||||
|
||||
try:
|
||||
f=open(configfile,"r")
|
||||
if verbose: print "notice: using config file %s" % configfile
|
||||
except:
|
||||
print "warning: running without conifig file: %s" % configfile
|
||||
f=None
|
||||
|
||||
if f:
|
||||
while 1:
|
||||
l=f.readline()
|
||||
if len(l) == 0:
|
||||
break
|
||||
r=l[:-1].split('=')
|
||||
if r[0] == 'interval':
|
||||
interval=eval(r[1])
|
||||
elif r[0] == 'grace':
|
||||
grace=eval(r[1])
|
||||
elif r[0] == 'hbd_port':
|
||||
hbd_port=eval(r[1])
|
||||
elif r[0] == 'hbd_host':
|
||||
hbd_host=r[1]
|
||||
elif r[0] == 'hb_port':
|
||||
hb_port=eval(r[1])
|
||||
elif r[0] == 'logfile':
|
||||
logfile=r[1]
|
||||
elif r[0] == 'logfmt':
|
||||
logfmt=r[1]
|
||||
elif r[0] == 'reportstrict':
|
||||
reportstrict=r[1]
|
||||
f.close()
|
||||
|
||||
if len(args) != 0:
|
||||
print "error: args"
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
logf=initlog(logfile)
|
||||
|
||||
now=time.time()
|
||||
startsec=int(now) % INTERVAL
|
||||
startsec=int(now) % interval
|
||||
|
||||
htab={}
|
||||
if visual:
|
||||
@@ -401,12 +504,30 @@ if DEBUG: log("Starting")
|
||||
ilist=[]
|
||||
|
||||
sock=socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||
sock.bind(("",PORT))
|
||||
sock.bind(("",hb_port))
|
||||
ilist.append(sock)
|
||||
|
||||
serv=SocketServer.TCPServer((THOST,TPORT),HtmlHandler)
|
||||
serv=SocketServer.TCPServer((hbd_host,hbd_port),HtmlHandler)
|
||||
ilist.append(serv.fileno())
|
||||
|
||||
if not forground:
|
||||
pid=os.fork()
|
||||
if pid > 0:
|
||||
if verbose:
|
||||
print "daemoinizing... pid=%d" % pid
|
||||
sys.exit(0)
|
||||
|
||||
verbose=False
|
||||
os.close(0)
|
||||
os.close(1)
|
||||
os.close(2)
|
||||
sys.stdin.close()
|
||||
sys.stdout = NullDevice()
|
||||
sys.stderr = NullDevice()
|
||||
os.chdir("/")
|
||||
os.setsid()
|
||||
os.umask(0)
|
||||
|
||||
next=int(now)+1
|
||||
sleep=next - now
|
||||
while 1:
|
||||
@@ -441,7 +562,7 @@ while 1:
|
||||
serv.handle_request()
|
||||
if now >= next:
|
||||
next=now+1
|
||||
if int(now) % INTERVAL == startsec:
|
||||
if int(now) % interval == startsec:
|
||||
updatestats()
|
||||
checkoverdue()
|
||||
if visual:
|
||||
|
||||
Reference in New Issue
Block a user