rename the main daemon
This commit is contained in:
@@ -0,0 +1,457 @@
|
||||
#!/usr/bin/env python
|
||||
# $Id: hbd,v 1.1 2005/07/14 19:26:01 andreas Exp $
|
||||
# Wait for heartbeat messages and act on them (or their absence)
|
||||
#
|
||||
import time, os, string, sys, socket, curses, atexit, select, SocketServer
|
||||
|
||||
|
||||
LOGF="/home/andreas/public_html/messages/andreas"
|
||||
|
||||
hosts={}
|
||||
num=0
|
||||
upcount=0
|
||||
PORT=50003
|
||||
TPORT=50004
|
||||
THOST="10.99.1.4"
|
||||
DEBUG=False
|
||||
verbose=False
|
||||
|
||||
INTERVAL=10
|
||||
GRACE=10
|
||||
|
||||
visual=0
|
||||
|
||||
msgs=[]
|
||||
|
||||
stdscr=None
|
||||
win=None
|
||||
msgw=None
|
||||
msgwB=None
|
||||
msgwHeight=10
|
||||
|
||||
class Host:
|
||||
up="up"
|
||||
down="down"
|
||||
overdue="overdue"
|
||||
OVERDUE="OVERDUE"
|
||||
|
||||
def __init__(self, name, addr):
|
||||
global num
|
||||
self.name=name
|
||||
self.addr=addr
|
||||
self.num=num
|
||||
self.lastbeat=time.time()
|
||||
self.upcount=0
|
||||
self.state=Host.up
|
||||
self.uppercent="n/a"
|
||||
self.state="up"
|
||||
self.statetime=self.lastbeat
|
||||
self.interval=INTERVAL
|
||||
num+=1
|
||||
|
||||
def getstate(self):
|
||||
return self.state
|
||||
|
||||
def newstate(self, state):
|
||||
self.state=state
|
||||
now=time.time()
|
||||
s=now-self.statetime
|
||||
self.statetime=now
|
||||
if visual:
|
||||
displaystatetime(self.name)
|
||||
return s
|
||||
|
||||
|
||||
#
|
||||
#
|
||||
def dur(sec):
|
||||
sec=int(sec)
|
||||
h=sec / 3600
|
||||
m=(sec - h * 3600) / 60
|
||||
s=(sec - h * 3600) % 60
|
||||
if h > 0:
|
||||
return "%d:%02d:%02d" % (h, m, s)
|
||||
if m > 0:
|
||||
return "%d:%02d" % (m, s)
|
||||
return "0:%02d" % s
|
||||
|
||||
|
||||
#
|
||||
#
|
||||
#
|
||||
def addhost(name, addr):
|
||||
if hosts.has_key(name):
|
||||
del htab[hosts[name].addr]
|
||||
hosts[name].addr=addr
|
||||
if visual:
|
||||
displayaddr(name)
|
||||
htab[addr]=name
|
||||
log("%s, changed address to %s" % (name, addr))
|
||||
else:
|
||||
hosts[name]=Host(name, addr)
|
||||
s=hosts.keys()
|
||||
s.sort()
|
||||
x=0
|
||||
for n in s:
|
||||
hosts[n].num=x
|
||||
x+=1
|
||||
htab[addr]=name
|
||||
if visual:
|
||||
display()
|
||||
|
||||
#
|
||||
#
|
||||
#
|
||||
def on_exit():
|
||||
if visual:
|
||||
exitcurses()
|
||||
logf.close()
|
||||
print "exit"
|
||||
|
||||
|
||||
def initlog():
|
||||
global logf
|
||||
logf=open(LOGF,"a")
|
||||
#
|
||||
#
|
||||
#
|
||||
def initwin():
|
||||
global win, msgw, msgwB, msgwHeight
|
||||
|
||||
maxY,maxX=stdscr.getmaxyx()
|
||||
|
||||
begin_x = 0
|
||||
begin_y = 2
|
||||
height = len(htab)+2
|
||||
if DEBUG: log("initwin called with %d" % height)
|
||||
win = curses.newwin(height, maxX, begin_y, begin_x)
|
||||
a=win.border(0,0,0,0,0,0,curses.ACS_LTEE,curses.ACS_RTEE)
|
||||
|
||||
msgwB = curses.newwin(0, 0, height+1, begin_x)
|
||||
msgwB.border(0,0,0,0,curses.ACS_LTEE,curses.ACS_RTEE)
|
||||
|
||||
msgwHeight=maxY-height-3
|
||||
msgw = curses.newwin(msgwHeight, maxX-2, height+2, begin_x+1)
|
||||
msgw.setscrreg(0, msgwHeight-1)
|
||||
msgw.scrollok(1)
|
||||
|
||||
stdscr.addstr(0,0, "WatchArnsberg Version 1.0", curses.A_BOLD)
|
||||
stdscr.refresh()
|
||||
msgwB.refresh()
|
||||
#
|
||||
#
|
||||
#
|
||||
def checkoverdue():
|
||||
|
||||
for h in hosts:
|
||||
if hosts[h].state == Host.down:
|
||||
continue
|
||||
if hosts[h].state == Host.up and now-hosts[h].lastbeat > hosts[h].interval+GRACE:
|
||||
hosts[h].newstate(Host.overdue)
|
||||
elif hosts[h].state == Host.overdue and now-hosts[h].lastbeat > hosts[h].interval*5+GRACE:
|
||||
log("%s is overdue" % h)
|
||||
hosts[h].newstate(Host.OVERDUE)
|
||||
|
||||
#
|
||||
#
|
||||
#
|
||||
#
|
||||
def displaytime():
|
||||
maxY,maxX=stdscr.getmaxyx()
|
||||
stdscr.addstr(0,maxX-8, time.strftime("%H:%M:%S", time.localtime(now)), curses.A_BOLD)
|
||||
|
||||
for h in hosts:
|
||||
d=hosts[h].getstate()
|
||||
attr=0
|
||||
if verbose and hosts[h].state != Host.down:
|
||||
d=dur(now-hosts[h].lastbeat)
|
||||
if hosts[h].state == Host.OVERDUE:
|
||||
attr=curses.A_BOLD
|
||||
win.addstr(hosts[h].num+1, 25, "%8s" % d, attr)
|
||||
win.addstr(hosts[h].num+1, 53, "%3s" % hosts[h].uppercent )
|
||||
win.refresh()
|
||||
stdscr.refresh()
|
||||
|
||||
#
|
||||
#
|
||||
#
|
||||
def displaystatetime(h, refresh=1):
|
||||
win.addstr(hosts[h].num+1, 60, "%-17s" % time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(hosts[h].statetime)))
|
||||
if refresh:
|
||||
win.refresh()
|
||||
#
|
||||
#
|
||||
#
|
||||
def displayaddr(h, refresh=1):
|
||||
win.addstr(hosts[h].num+1, 35, "%-16s" % hosts[h].addr)
|
||||
if refresh:
|
||||
win.refresh()
|
||||
#
|
||||
#
|
||||
#
|
||||
def displaybody():
|
||||
for h in hosts:
|
||||
win.addstr(hosts[h].num+1, 1, "%-24s" % (h))
|
||||
if hosts[h].addr != None:
|
||||
displayaddr(h, 0)
|
||||
if hosts[h].statetime != None:
|
||||
displaystatetime(h, 0)
|
||||
win.refresh()
|
||||
|
||||
|
||||
#
|
||||
#
|
||||
#
|
||||
def displaymsgs():
|
||||
global msgw, msgs
|
||||
y=0
|
||||
for m in msgs[len(msgs)-msgwHeight:]:
|
||||
msgw.addstr(y, 0, m)
|
||||
y+=1
|
||||
msgw.refresh()
|
||||
|
||||
#
|
||||
#
|
||||
#
|
||||
def display():
|
||||
if visual:
|
||||
initwin()
|
||||
displaytime()
|
||||
displaybody()
|
||||
displaymsgs()
|
||||
|
||||
def log(m, service="heartbeat"):
|
||||
msg=time.strftime("%b %d %H:%M:%S")+": "+m+"\n"
|
||||
msgs.append(msg)
|
||||
m2="%d|%s|%s\n" % (now, service, m)
|
||||
logf.write(m2)
|
||||
logf.flush()
|
||||
if msgw != None:
|
||||
y,x=msgw.getyx()
|
||||
# if y >= msgwHeight-1:
|
||||
# msgw.scroll()
|
||||
msgw.addstr(msg)
|
||||
msgw.clrtoeol()
|
||||
msgw.refresh()
|
||||
|
||||
#
|
||||
#
|
||||
def fromaddr(name, addr, boot):
|
||||
global htab
|
||||
|
||||
if not htab.has_key(addr):
|
||||
addhost(name, addr)
|
||||
host=hosts[htab[addr]]
|
||||
host.lastbeat=now
|
||||
if host.getstate() != Host.up:
|
||||
lasts=host.state
|
||||
d=host.newstate(Host.up)
|
||||
if lasts != 'overdue':
|
||||
log("%s, back after being %s for %s" % (host.name, lasts, dur(d)))
|
||||
host.upcount+=1
|
||||
|
||||
#
|
||||
#
|
||||
#
|
||||
def readsock():
|
||||
global htab, win
|
||||
data, addr = sock.recvfrom(1024)
|
||||
pairs=string.split(data,';')
|
||||
boot=0
|
||||
shutdown=0
|
||||
name="unknown"
|
||||
msg=None
|
||||
interval=INTERVAL
|
||||
for pair in pairs:
|
||||
l=string.split(pair,"=")
|
||||
key=l[0]
|
||||
if len(l) != 2:
|
||||
val="0"
|
||||
else:
|
||||
val=l[1]
|
||||
if key == 'boot':
|
||||
boot+=1
|
||||
elif key == 'shutdown':
|
||||
shutdown+=1
|
||||
elif key == 'interval':
|
||||
interval=int(val)
|
||||
elif key == 'name':
|
||||
name=val
|
||||
elif key == 'msg':
|
||||
msg=val
|
||||
elif key == 'service':
|
||||
service=val
|
||||
if boot:
|
||||
log("%s booted" % name)
|
||||
if msg:
|
||||
log("%s msg: %s" % (name, msg),service=service)
|
||||
fromaddr(name, addr[0], boot)
|
||||
if shutdown:
|
||||
log("%s shutdown" % name)
|
||||
hosts[name].newstate(Host.down)
|
||||
hosts[name].interval=interval
|
||||
|
||||
|
||||
#
|
||||
#
|
||||
#
|
||||
def updatestats():
|
||||
global upcount
|
||||
upcount+=1
|
||||
for h in hosts:
|
||||
if upcount > 0:
|
||||
hosts[h].uppercent="%3.0f" % ((hosts[h].upcount*hosts[h].interval*100.0)/(upcount*INTERVAL))
|
||||
#
|
||||
#
|
||||
#
|
||||
def initcurses():
|
||||
global stdscr
|
||||
stdscr = curses.initscr()
|
||||
curses.noecho()
|
||||
curses.cbreak()
|
||||
stdscr.keypad(1)
|
||||
if DEBUG: sys.stderr.write("curses init done: %s\n" % stdscr)
|
||||
|
||||
def exitcurses():
|
||||
curses.nocbreak(); stdscr.keypad(0); curses.echo()
|
||||
curses.endwin()
|
||||
|
||||
#
|
||||
#
|
||||
#
|
||||
class HtmlHandler(SocketServer.BaseRequestHandler):
|
||||
def handle(self):
|
||||
f=self.request.makefile()
|
||||
|
||||
while 1:
|
||||
line=string.strip(f.readline())
|
||||
if len(line) == 0:
|
||||
break
|
||||
r=line.split()
|
||||
if r[0] == "GET":
|
||||
uri=r[1]
|
||||
html=r[2]
|
||||
if uri != "/":
|
||||
code=404
|
||||
cause="Not Found"
|
||||
else:
|
||||
code=200
|
||||
cause="OK"
|
||||
self.request.send("HTTP/1.0 %s %s\r\n" % (code, cause))
|
||||
self.request.send("Date: %s\r\n" % time.strftime("%a, %d %b %Y %H:%M:%S GMT",time.gmtime(now)))
|
||||
self.request.send("Server: WatchArnsberg\r\n")
|
||||
self.request.send("Last-Modified: %s\r\n" % time.strftime("%a, %d %b %Y %H:%M:%S GMT",time.gmtime(now)))
|
||||
self.request.send("Accept-Ranges: bytes\r\n")
|
||||
self.request.send("Connection: close\r\n")
|
||||
self.request.send("Content-Type: text/html; charset=ISO-8859-1\r\n\r\n")
|
||||
res=[]
|
||||
if code != 200:
|
||||
res.append('<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">')
|
||||
res.append('<html><head>')
|
||||
res.append('<title>%s %s</title>' % (code, cause))
|
||||
res.append('</head><body>')
|
||||
res.append('<h1>%s</h1>' % (cause))
|
||||
res.append('<p>The requested URL %s was not found on this server.</p>' % uri)
|
||||
res.append('<hr>')
|
||||
res.append('<address>WatchArnsberg (Unix) Server at somewhere.planix.com Port %d</address>' % TPORT)
|
||||
res.append('</body></html>')
|
||||
|
||||
else:
|
||||
res.append('<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">')
|
||||
res.append("<html>")
|
||||
res.append("<head>")
|
||||
res.append("<meta http-equiv=Refresh content=%d>\n" % 60)
|
||||
res.append("</head>")
|
||||
res.append('<body BGCOLOR="#FFFFFF" LINK="#008000" VLINK="#008000" BACKGROUND="/~andreas/images/tile.marble.gif">')
|
||||
res.append("<H2>Heartbeat status at %s</H2>" % time.strftime("%H:%M:%S", time.localtime(now)))
|
||||
res.append("<table>")
|
||||
res.append("<tr><th>Host</th><th>State</th><th>IP Addr</th><th>Res</th><th>Last change</th></tr>\n" )
|
||||
for h in hosts:
|
||||
res.append("<tr><td>%-24s</td><td>%-7s</td><td>%-16s</td><td>%-3s</td><td>%-17s</td></tr>\n" % (h, hosts[h].state, hosts[h].addr, hosts[h].uppercent, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(hosts[h].statetime))))
|
||||
res.append("</table>")
|
||||
res.append("<P>")
|
||||
for m in msgs[len(msgs)-30:]:
|
||||
res.append("%s<BR>" % m)
|
||||
|
||||
try:
|
||||
self.request.send(string.join(res,"\n"))
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
#
|
||||
# Main
|
||||
#
|
||||
|
||||
|
||||
initlog()
|
||||
|
||||
now=time.time()
|
||||
startsec=int(now) % INTERVAL
|
||||
|
||||
htab={}
|
||||
if visual:
|
||||
initcurses()
|
||||
display()
|
||||
stdscr.nodelay(1)
|
||||
|
||||
atexit.register(on_exit)
|
||||
if DEBUG: log("Starting")
|
||||
|
||||
ilist=[]
|
||||
|
||||
sock=socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||
sock.bind(("",PORT))
|
||||
ilist.append(sock)
|
||||
|
||||
serv=SocketServer.TCPServer((THOST,TPORT),HtmlHandler)
|
||||
ilist.append(serv.fileno())
|
||||
|
||||
next=int(now)+1
|
||||
sleep=next - now
|
||||
while 1:
|
||||
if visual:
|
||||
c = stdscr.getch()
|
||||
if c == ord('c'): msgs=[]; display()
|
||||
elif c == ord('q'): break # Exit the while()
|
||||
elif c == ord('d'): DEBUG=not DEBUG
|
||||
elif c == ord('v'): verbose=not verbose
|
||||
# elif c == ord('p'): PrintDocument()
|
||||
# elif c == ord('x'): x = y = 0
|
||||
|
||||
try:
|
||||
sr=select.select(ilist,[],[],sleep)
|
||||
now=time.time()
|
||||
except KeyboardInterrupt:
|
||||
sys.exit(0)
|
||||
except select.error, value:
|
||||
if value[0] != 4: # interrupted system call
|
||||
print select.error, value
|
||||
#raise os.error, value
|
||||
continue
|
||||
if visual:
|
||||
exitcurses()
|
||||
initcurses()
|
||||
display()
|
||||
continue
|
||||
for fh in sr[0]:
|
||||
if fh == sock:
|
||||
readsock()
|
||||
if fh == serv.fileno():
|
||||
serv.handle_request()
|
||||
if now >= next:
|
||||
next=now+1
|
||||
if int(now) % INTERVAL == startsec:
|
||||
updatestats()
|
||||
checkoverdue()
|
||||
if visual:
|
||||
stdscr.move(1 , 0)
|
||||
stdscr.clrtoeol()
|
||||
displaytime()
|
||||
|
||||
sleep=next-now
|
||||
if sleep < 0:
|
||||
sys.stderr.write("sleep is negaitive! %s next=%s\n" % (sleep, next))
|
||||
sleep=0
|
||||
if DEBUG: sys.stderr.write("sleep=%s next=%s\n" % (sleep, next))
|
||||
|
||||
Reference in New Issue
Block a user