Files
heartbeat/watcharnsberg
T
2005-02-27 16:41:40 +00:00

385 lines
7.0 KiB
Python
Executable File

#!/usr/bin/env python
# $Id: watcharnsberg,v 1.2 2005/02/27 16:41:40 andreas Exp $
# Wait for heartbeat messages and act on them (or their absence)
#
import time, os, string, sys, socket, curses, atexit, select
LOGF="/home/andreas/public_html/messages/andreas"
hosts={}
num=0
upcount=0
PORT=50003
DEBUG=False
verbose=False
INTERVAL=10
GRACE=10
visual=0
msgs=[]
stdscr=None
win=None
msgw=None
msgwB=None
msgwHeight=10
class Host:
def __init__(self, name, addr):
global num
self.name=name
self.addr=addr
self.num=num
self.last=time.time()
self.upcount=0
self.up=1
self.uppercent="n/a"
self.state="up"
self.statetime=self.last
self.interval=INTERVAL
num+=1
def getstate(self):
return self.state
def newstate(self, state):
self.state=state
now=time.time()
s=now-self.statetime
self.statetime=now
if state == "up":
self.up=1
elif state == "down":
self.up=0
if visual:
displaystatetime(self.name)
return s
#
#
def dur(sec):
sec=int(sec)
h=sec / 3600
m=(sec - h * 3600) / 60
s=(sec - h * 3600) % 60
if h > 0:
return "%d:%02d:%02d" % (h, m, s)
if m > 0:
return "%d:%02d" % (m, s)
return "0:%02d" % s
#
#
#
def addhost(name, addr):
if hosts.has_key(name):
del htab[hosts[name].addr]
hosts[name].addr=addr
if visual:
displayaddr(name)
htab[addr]=name
log("%s, changed address to %s" % (name, addr))
else:
hosts[name]=Host(name, addr)
s=hosts.keys()
s.sort()
x=0
for n in s:
hosts[n].num=x
x+=1
htab[addr]=name
if visual:
display()
#
#
#
def on_exit():
if visual:
exitcurses()
logf.close()
print "exit"
def initlog():
global logf
logf=open(LOGF,"a")
#
#
#
def initwin():
global win, msgw, msgwB, msgwHeight
maxY,maxX=stdscr.getmaxyx()
begin_x = 0
begin_y = 2
height = len(htab)+2
if DEBUG: log("initwin called with %d" % height)
win = curses.newwin(height, maxX, begin_y, begin_x)
a=win.border(0,0,0,0,0,0,curses.ACS_LTEE,curses.ACS_RTEE)
msgwB = curses.newwin(0, 0, height+1, begin_x)
msgwB.border(0,0,0,0,curses.ACS_LTEE,curses.ACS_RTEE)
msgwHeight=maxY-height-3
msgw = curses.newwin(msgwHeight, maxX-2, height+2, begin_x+1)
msgw.setscrreg(0, msgwHeight-1)
msgw.scrollok(1)
stdscr.addstr(0,0, "WatchArnsberg Version 1.0", curses.A_BOLD)
stdscr.refresh()
msgwB.refresh()
#
#
#
def displaytime():
if visual:
maxY,maxX=stdscr.getmaxyx()
stdscr.addstr(0,maxX-8, time.strftime("%H:%M:%S", time.localtime(now)), curses.A_BOLD)
# stdscr.addstr(0,67,"%s.%04d" % (time.strftime("%H:%M:%S", time.localtime(now)),int((now-int(now))*10000) ), curses.A_BOLD)
for h in hosts:
if hosts[h].last != 0:
attr=0
if verbose:
d=dur(now-hosts[h].last)
else:
d=hosts[h].getstate()
if not hosts[h].up:
d=hosts[h].getstate()
elif now-hosts[h].last > hosts[h].interval+GRACE:
d="overdue"
if now-hosts[h].last > hosts[h].interval*5+GRACE:
d="OVERDUE"
if hosts[h].getstate() != "overdue":
log("%s is overdue" % h)
hosts[h].newstate("overdue")
if now-hosts[h].last > hosts[h].interval*60+GRACE:
attr=curses.A_BOLD
if visual:
win.addstr(hosts[h].num+1, 25, "%8s" % d, attr)
win.addstr(hosts[h].num+1, 53, "%3s" % hosts[h].uppercent )
if visual:
win.refresh()
stdscr.refresh()
#
#
#
def displaystatetime(h, refresh=1):
win.addstr(hosts[h].num+1, 60, "%-17s" % time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(hosts[h].statetime)))
if refresh:
win.refresh()
#
#
#
def displayaddr(h, refresh=1):
win.addstr(hosts[h].num+1, 35, "%-16s" % hosts[h].addr)
if refresh:
win.refresh()
#
#
#
def displaybody():
for h in hosts:
win.addstr(hosts[h].num+1, 1, "%-24s" % (h))
if hosts[h].addr != None:
displayaddr(h, 0)
if hosts[h].statetime != None:
displaystatetime(h, 0)
win.refresh()
#
#
#
def displaymsgs():
global msgw, msgs
y=0
for m in msgs[len(msgs)-msgwHeight:]:
msgw.addstr(y, 0, m)
y+=1
msgw.refresh()
#
#
#
def display():
if visual:
initwin()
displaytime()
if visual:
displaybody()
displaymsgs()
def log(m, service="heartbeat"):
msg=time.strftime("%b %d %H:%M:%S")+": "+m+"\n"
msgs.append(msg)
m2="%d|%s|%s\n" % (now, service, m)
logf.write(m2)
logf.flush()
if msgw != None:
y,x=msgw.getyx()
# if y >= msgwHeight-1:
# msgw.scroll()
msgw.addstr(msg)
msgw.clrtoeol()
msgw.refresh()
#
#
def fromaddr(name, addr, boot):
global htab
if not htab.has_key(addr):
addhost(name, addr)
host=hosts[htab[addr]]
host.last=now
if host.getstate() != "up":
lasts=host.state
d=host.newstate("up")
log("%s, back after being %s for %s" % (host.name, lasts, dur(d)))
host.upcount+=1
#
#
#
def readsock():
global htab, win
data, addr = sock.recvfrom(1024)
pairs=string.split(data,';')
boot=0
shutdown=0
name="unknown"
msg=None
interval=INTERVAL
for pair in pairs:
l=string.split(pair,"=")
key=l[0]
if len(l) != 2:
val="0"
else:
val=l[1]
if key == 'boot':
boot+=1
elif key == 'shutdown':
shutdown+=1
elif key == 'interval':
interval=int(val)
elif key == 'name':
name=val
elif key == 'msg':
msg=val
elif key == 'service':
service=val
if boot:
log("%s booted" % name)
if msg:
log("%s msg: %s" % (name, msg),service=service)
fromaddr(name, addr[0], boot)
if shutdown:
log("%s shutdown" % name)
hosts[name].newstate("down")
hosts[name].interval=interval
#
#
#
def dominute():
global upcount
upcount+=1
for h in hosts:
if upcount > 0:
hosts[h].uppercent="%3.0f" % ((hosts[h].upcount*hosts[h].interval*100.0)/(upcount*INTERVAL))
def initcurses():
global stdscr
stdscr = curses.initscr()
curses.noecho()
curses.cbreak()
stdscr.keypad(1)
if DEBUG: sys.stderr.write("curses init done: %s\n" % stdscr)
def exitcurses():
curses.nocbreak(); stdscr.keypad(0); curses.echo()
curses.endwin()
#
# Main
#
initlog()
now=time.time()
startsec=int(now) % INTERVAL
htab={}
if visual:
initcurses()
display()
stdscr.nodelay(1)
atexit.register(on_exit)
if DEBUG: log("Starting")
ilist=[]
sock=socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sock.bind(("",PORT))
ilist.append(sock)
next=int(now)+1
sleep=next - now
while 1:
if visual:
c = stdscr.getch()
if c == ord('c'): msgs=[]; display()
elif c == ord('q'): break # Exit the while()
elif c == ord('d'): DEBUG=not DEBUG
elif c == ord('v'): verbose=not verbose
# elif c == ord('p'): PrintDocument()
# elif c == ord('x'): x = y = 0
try:
sr=select.select(ilist,[],[],sleep)
now=time.time()
except KeyboardInterrupt:
sys.exit(0)
except select.error, value:
if value[0] != 4: # interrupted system call
raise os.error, value
if visual:
exitcurses()
initcurses()
display()
continue
for fh in sr[0]:
if fh == sock:
readsock()
if now >= next:
next+=1
if int(now) % INTERVAL == startsec:
dominute()
if visual:
# stdscr.addstr(1 , 0, "Now is %s, Next is %s" % (now, next))
if DEBUG:
stdscr.addstr(1 , 0, "len(htab) is %s, Next is %s" % (len(htab), next))
else:
stdscr.move(1 , 0)
stdscr.clrtoeol()
displaytime()
sleep=next-now
if DEBUG: sys.stderr.write("sleep=%s next=%s\n" % (sleep, next))