#!/usr/bin/env python
# $Id: watcharnsberg,v 1.5 2005/05/06 14:39:27 andreas Exp $
# Wait for heartbeat messages and act on them (or their absence)
#
import time, os, string, sys, socket, curses, atexit, select, SocketServer


LOGF="/home/andreas/public_html/messages/andreas"

hosts={}
num=0
upcount=0
PORT=50003
TPORT=50004
THOST="10.99.1.4"
DEBUG=False
verbose=False

INTERVAL=10
GRACE=10

visual=0

msgs=[]

stdscr=None
win=None
msgw=None
msgwB=None
msgwHeight=10

class Host:
  up="up"
  down="down"
  overdue="overdue"
  OVERDUE="OVERDUE"

  def __init__(self, name, addr):
	global num
	self.name=name
	self.addr=addr
	self.num=num
	self.lastbeat=time.time()
	self.upcount=0
	self.state=Host.up
	self.uppercent="n/a"
	self.state="up"
	self.statetime=self.lastbeat
	self.interval=INTERVAL
	num+=1

  def getstate(self):
	return self.state

  def newstate(self, state):
	self.state=state
	now=time.time()
	s=now-self.statetime
	self.statetime=now
	if visual:
		displaystatetime(self.name)
	return s


#
#
def dur(sec):
	sec=int(sec)
	h=sec / 3600
	m=(sec - h * 3600) / 60
	s=(sec - h * 3600) % 60
	if h > 0:
		return  "%d:%02d:%02d" % (h, m, s)
	if m > 0:
		return  "%d:%02d" % (m, s)
	return  "0:%02d" % s


#
#
#
def addhost(name, addr):
	if hosts.has_key(name):
		del htab[hosts[name].addr]
		hosts[name].addr=addr
		if visual:
			displayaddr(name)
		htab[addr]=name
		log("%s, changed address to %s" % (name, addr))
	else:
		hosts[name]=Host(name, addr)
		s=hosts.keys()
		s.sort()
		x=0
		for n in s:
			hosts[n].num=x
			x+=1
		htab[addr]=name
		if visual:
			display()
		
#
#
#
def on_exit():
	if visual:
		exitcurses()
	logf.close()
	print "exit"


def initlog():
	global logf
	logf=open(LOGF,"a")
#
#
#
def initwin():
	global win, msgw, msgwB, msgwHeight

	maxY,maxX=stdscr.getmaxyx()

	begin_x = 0
	begin_y = 2
	height = len(htab)+2
	if DEBUG: log("initwin called with %d" % height)
	win = curses.newwin(height, maxX, begin_y, begin_x)
	a=win.border(0,0,0,0,0,0,curses.ACS_LTEE,curses.ACS_RTEE)

	msgwB = curses.newwin(0, 0, height+1, begin_x)
	msgwB.border(0,0,0,0,curses.ACS_LTEE,curses.ACS_RTEE)

	msgwHeight=maxY-height-3
	msgw = curses.newwin(msgwHeight, maxX-2, height+2, begin_x+1)
	msgw.setscrreg(0, msgwHeight-1)
	msgw.scrollok(1)

	stdscr.addstr(0,0, "WatchArnsberg Version 1.0", curses.A_BOLD)
	stdscr.refresh()
	msgwB.refresh()
#
#
#
def checkoverdue():
	
	for h in hosts:
		if hosts[h].state == Host.down:
			continue
		if hosts[h].state == Host.up and now-hosts[h].lastbeat > hosts[h].interval+GRACE:
			hosts[h].newstate(Host.overdue)
		elif hosts[h].state == Host.overdue and now-hosts[h].lastbeat > hosts[h].interval*5+GRACE:
			log("%s is overdue" % h)
			hosts[h].newstate(Host.OVERDUE)

#
#
#
#
def displaytime():
	maxY,maxX=stdscr.getmaxyx()
	stdscr.addstr(0,maxX-8, time.strftime("%H:%M:%S", time.localtime(now)), curses.A_BOLD)
	
	for h in hosts:
		d=hosts[h].getstate()
		attr=0
		if verbose and  hosts[h].state != Host.down:
			d=dur(now-hosts[h].lastbeat)
		if hosts[h].state == Host.OVERDUE:
			attr=curses.A_BOLD
		win.addstr(hosts[h].num+1, 25, "%8s" %  d, attr)
		win.addstr(hosts[h].num+1, 53,  "%3s" % hosts[h].uppercent )
	win.refresh()
	stdscr.refresh()

#
#
#
def displaystatetime(h, refresh=1):
	win.addstr(hosts[h].num+1, 60, "%-17s" % time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(hosts[h].statetime)))
	if refresh:
		win.refresh()
#
#
#
def displayaddr(h, refresh=1):
	win.addstr(hosts[h].num+1, 35, "%-16s" % hosts[h].addr)
	if refresh:
		win.refresh()
#
#
#
def displaybody():
	for h in hosts:
		win.addstr(hosts[h].num+1, 1, "%-24s" %  (h))
		if hosts[h].addr != None:
			displayaddr(h, 0)
		if hosts[h].statetime != None:
			displaystatetime(h, 0)
	win.refresh()


#
#
#
def displaymsgs():
	global msgw, msgs
	y=0
	for m in msgs[len(msgs)-msgwHeight:]:
		msgw.addstr(y, 0, m)
		y+=1
	msgw.refresh()

#
#
#
def display():
	if visual:
		initwin()
		displaytime()
		displaybody()
		displaymsgs()

def log(m, service="heartbeat"):
	msg=time.strftime("%b %d %H:%M:%S")+": "+m+"\n"
	msgs.append(msg)
	m2="%d|%s|%s\n" % (now, service, m)
	logf.write(m2)
	logf.flush()
	if msgw != None:
		y,x=msgw.getyx()
#		if y >= msgwHeight-1:
#			msgw.scroll()
		msgw.addstr(msg)
		msgw.clrtoeol()
		msgw.refresh()

#
#
def fromaddr(name, addr, boot):
	global htab

	if not htab.has_key(addr):
		addhost(name, addr)
	host=hosts[htab[addr]]
	host.lastbeat=now
	if host.getstate() != Host.up:
		lasts=host.state
		d=host.newstate(Host.up)
		if lasts != 'overdue':
			log("%s, back after being %s for %s" % (host.name, lasts, dur(d)))
	host.upcount+=1

#
#
#
def readsock():
	global htab, win
	data, addr = sock.recvfrom(1024)
	pairs=string.split(data,';')
	boot=0
	shutdown=0
	name="unknown"
	msg=None
	interval=INTERVAL
	for pair in pairs:
		l=string.split(pair,"=")
		key=l[0]
		if len(l) != 2:
			val="0"
		else:
			val=l[1]
		if key == 'boot':
			boot+=1
		elif key == 'shutdown':
			shutdown+=1
		elif key == 'interval':
			interval=int(val)
		elif key == 'name':
			name=val
		elif key == 'msg':
			msg=val
		elif key == 'service':
			service=val
	if boot:
		log("%s booted" % name)
	if msg:
		log("%s msg: %s" % (name, msg),service=service)
	fromaddr(name, addr[0], boot)
	if shutdown:
		log("%s shutdown" % name)
		hosts[name].newstate(Host.down)
	hosts[name].interval=interval


#
#
#
def updatestats():
	global upcount
	upcount+=1
	for h in hosts:
		if upcount > 0:
			hosts[h].uppercent="%3.0f" % ((hosts[h].upcount*hosts[h].interval*100.0)/(upcount*INTERVAL))
#
#
#
def genhtml(now):
	f=open("/home/andreas/public_html/private/watcharnsberg.html","w")
	f.write("<html>")
	f.write("<head>")
	f.write("<meta http-equiv=Refresh content=%d>\n" % 10)
	f.write("</head>")
	f.write('<body BGCOLOR="#FFFFFF" LINK="#008000" VLINK="#008000" BACKGROUND="/~andreas/images/tile.marble.gif">')
	f.write("<H2>Heartbeat status at %s</H2>" % time.strftime("%H:%M:%S", time.localtime(now)))
	f.write("<table>")
	f.write("<tr><th>Host</th><th>State</th><th>IP Addr</th><th>Res</th><th>Last change</th></tr>\n" )
	for h in hosts:
		f.write("<tr><td>%-24s</td><td>%-7s</td><td>%-16s</td><td>%-3s</td><td>%-17s</td></tr>\n" %  (h, hosts[h].state,  hosts[h].addr, hosts[h].uppercent, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(hosts[h].statetime))))
	f.write("</table>")
	f.write("<P>")
	for m in msgs[len(msgs)-30:]:
		f.write("%s<BR>" % m)
	os.environ['SCRIPT_FILENAME']="/home/andreas/bin/watcharnsberg"
	g=os.popen("/home/andreas/cgi-bin/trailer.py", "r")
	o=g.readlines()
	g.close()
	f.write(string.join(o,""))
	f.close()
	
#
#
#
	

def initcurses():
	global stdscr
	stdscr = curses.initscr()
	curses.noecho()
	curses.cbreak()
	stdscr.keypad(1)
	if DEBUG: sys.stderr.write("curses init done: %s\n" % stdscr)

def exitcurses():
	curses.nocbreak(); stdscr.keypad(0); curses.echo()
	curses.endwin()

#
#
#
class HtmlHandler(SocketServer.BaseRequestHandler):
  def handle(self):
	f=self.request.makefile()

	while 1:
		line=string.strip(f.readline())
		if len(line) == 0:
			break
		r=line.split()
		if r[0] == "GET":
			uri=r[1]
			html=r[2]
	if uri != "/":
		code=404
		cause="Not Found"
	else:
		code=200
		cause="OK"
	self.request.send("HTTP/1.0 %s %s\r\n" % (code, cause))
	self.request.send("Date: %s\r\n" % time.strftime("%a, %d %b %Y %H:%M:%S GMT",time.gmtime(now)))
	self.request.send("Server: WatchArnsberg\r\n")
	self.request.send("Last-Modified: %s\r\n" % time.strftime("%a, %d %b %Y %H:%M:%S GMT",time.gmtime(now)))
	self.request.send("Accept-Ranges: bytes\r\n")
	self.request.send("Connection: close\r\n")
	self.request.send("Content-Type: text/html; charset=ISO-8859-1\r\n\r\n")
	res=[]
	if code != 200:
		res.append('<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">')
		res.append('<html><head>')
		res.append('<title>%s %s</title>' % (code, cause))
		res.append('</head><body>')
		res.append('<h1>%s</h1>' % (cause))
		res.append('<p>The requested URL %s was not found on this server.</p>' % uri)
		res.append('<hr>')
		res.append('<address>WatchArnsberg (Unix) Server at somewhere.planix.com Port %d</address>' % TPORT)
		res.append('</body></html>')
		
	else:
		res.append('<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">')
		res.append("<html>")
		res.append("<head>")
		res.append("<meta http-equiv=Refresh content=%d>\n" % 60)
		res.append("</head>")
		res.append('<body BGCOLOR="#FFFFFF" LINK="#008000" VLINK="#008000" BACKGROUND="/~andreas/images/tile.marble.gif">')
		res.append("<H2>Heartbeat status at %s</H2>" % time.strftime("%H:%M:%S", time.localtime(now)))
		res.append("<table>")
		res.append("<tr><th>Host</th><th>State</th><th>IP Addr</th><th>Res</th><th>Last change</th></tr>\n" )
		for h in hosts:
			res.append("<tr><td>%-24s</td><td>%-7s</td><td>%-16s</td><td>%-3s</td><td>%-17s</td></tr>\n" %  (h, hosts[h].state,  hosts[h].addr, hosts[h].uppercent, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(hosts[h].statetime))))
		res.append("</table>")
		res.append("<P>")
		for m in msgs[len(msgs)-30:]:
			res.append("%s<BR>" % m)
#		os.environ['SCRIPT_FILENAME']="/home/andreas/bin/watcharnsberg"
#		g=os.popen("/home/andreas/cgi-bin/trailer.py", "r")
#		o=g.readlines()
#		g.close()
#		res.append(string.join(o,""))

	try:
		self.request.send(string.join(res,"\n"))
	except:
		pass

	
#
# Main
#


initlog()

now=time.time()
startsec=int(now) % INTERVAL

htab={}
if visual:
	initcurses()
	display()
	stdscr.nodelay(1)

atexit.register(on_exit)
if DEBUG: log("Starting")

ilist=[]

sock=socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sock.bind(("",PORT))
ilist.append(sock)

serv=SocketServer.TCPServer((THOST,TPORT),HtmlHandler)
ilist.append(serv.fileno())

next=int(now)+1
sleep=next - now
while 1:
	if visual:
		c = stdscr.getch()
		if c == ord('c'): msgs=[]; display()
		elif c == ord('q'): break  # Exit the while()
		elif c == ord('d'): DEBUG=not DEBUG
		elif c == ord('v'): verbose=not verbose
#		elif c == ord('p'): PrintDocument()
#		elif c == ord('x'): x = y = 0

	try:
		sr=select.select(ilist,[],[],sleep)
		now=time.time()
	except KeyboardInterrupt:
		sys.exit(0)
	except select.error, value:
		if value[0] != 4:	# interrupted system call
			print select.error, value
			#raise os.error, value
			continue
		if visual:
			exitcurses()
			initcurses()
			display()
		continue
	for fh in sr[0]:
		if fh == sock:
			readsock()
		if fh == serv.fileno():
			serv.handle_request()
	if now >= next:
		next=now+1
		if int(now) % INTERVAL == startsec:
			updatestats()
##		if int(now) % 60 == 0:
##			genhtml(now)
		checkoverdue()
		if visual:
			stdscr.move(1 , 0)
			stdscr.clrtoeol()
			displaytime()

	sleep=next-now
	if sleep < 0:
		sys.stderr.write("sleep is negaitive! %s next=%s\n" % (sleep, next))
		sleep=0
	if DEBUG: sys.stderr.write("sleep=%s next=%s\n" % (sleep, next))

