#!/usr/bin/env python
# $Id: hbd,v 1.9 2006/04/28 12:15:13 andreas Exp $
# Wait for heartbeat messages and act on them (or their absence)
#
import time, os, string, sys, socket, atexit, select, SocketServer, getopt

False=0
True=1
LOGFILE="/home/andreas/public_html/messages/andreas"

hosts={}
num=0
upcount=0
PORT=50003
TPORT=50004
THOST=""
DEBUG=False
verbose=False

INTERVAL=10
GRACE=2

visual=0

msgs=[]

stdscr=None
win=None
msgw=None
msgwB=None
msgwHeight=10

class NullDevice:
    def write(self, s):
        pass

class Host:
  up="up"
  down="down"
  overdue="overdue"

  def __init__(self, name, addr):
	global num
	self.name=name
	self.addr=addr
	self.num=num
	self.lastbeat=time.time()
	self.upcount=0
	self.state=Host.up
	self.uppercent="n/a"
	self.state="up"
	self.statetime=self.lastbeat
	self.interval=0
	num+=1

  def getstate(self):
	return self.state

  # set new state, return number of secs in previous state
  def newstate(self, state, when=0):
	self.state=state
	now=time.time()-when
	s=now-self.statetime
	self.statetime=now
	if visual:
		displaystatetime(self.name)
	return s


#
#
def dur(sec):
	sec=int(sec)
	h=sec / 3600
	m=(sec - h * 3600) / 60
	s=(sec - h * 3600) % 60
	if h > 0:
		return  "%d:%02d:%02d" % (h, m, s)
	if m > 0:
		return  "%d:%02d" % (m, s)
	return  "0:%02d" % s


#
#
#
def addhost(name, addr):
	if hosts.has_key(name):
		del htab[hosts[name].addr]
		hosts[name].addr=addr
		if visual:
			displayaddr(name)
		htab[addr]=name
		log("%s, changed address to %s" % (name, addr))
	else:
		hosts[name]=Host(name, addr)
		s=hosts.keys()
		s.sort()
		x=0
		for n in s:
			hosts[n].num=x
			x+=1
		htab[addr]=name
		if visual:
			display()
		
#
#
#
def on_exit():
	if visual:
		exitcurses()
	logf.close()
	print "exit"


def initlog(logfile):
	return open(logfile,"a")
#
#
#
def initwin():
	global win, msgw, msgwB, msgwHeight

	maxY,maxX=stdscr.getmaxyx()

	begin_x = 0
	begin_y = 2
	height = len(htab)+2
	if DEBUG: log("initwin called with %d" % height)
	win = curses.newwin(height, maxX, begin_y, begin_x)
	a=win.border(0,0,0,0,0,0,curses.ACS_LTEE,curses.ACS_RTEE)

	msgwB = curses.newwin(0, 0, height+1, begin_x)
	msgwB.border(0,0,0,0,curses.ACS_LTEE,curses.ACS_RTEE)

	msgwHeight=maxY-height-3
	msgw = curses.newwin(msgwHeight, maxX-2, height+2, begin_x+1)
	msgw.setscrreg(0, msgwHeight-1)
	msgw.scrollok(1)

	stdscr.addstr(0,0, "hbd Version 1.0", curses.A_BOLD)
	stdscr.refresh()
	msgwB.refresh()
#
#
#
def checkoverdue():

	for h in hosts.keys():
		if hosts[h].state == Host.down:
			continue
		if reportstrict:
			gr=grace
		else:
			gr=5*grace
		timeout=hosts[h].interval+gr
		if hosts[h].state == Host.up and now-hosts[h].lastbeat > timeout:
			log("%s is overdue" % h)
			hosts[h].newstate(Host.overdue, gr)

#
#
#
#
def displaytime():
	maxY,maxX=stdscr.getmaxyx()
	stdscr.addstr(0,maxX-8, time.strftime("%H:%M:%S", time.localtime(now)), curses.A_BOLD)
	
	for h in hosts.keys():
		d=hosts[h].getstate()
		attr=0
		if verbose and  hosts[h].state != Host.down:
			d=dur(now-hosts[h].lastbeat)
		if hosts[h].state == Host.overdue:
			attr=curses.A_BOLD
		win.addstr(hosts[h].num+1, 25, "%8s" %  d, attr)
		win.addstr(hosts[h].num+1, 53,  "%3s" % hosts[h].uppercent )
	win.refresh()
	stdscr.refresh()

#
#
#
def displaystatetime(h, refresh=1):
	win.addstr(hosts[h].num+1, 60, "%-17s" % time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(hosts[h].statetime)))
	if refresh:
		win.refresh()
#
#
#
def displayaddr(h, refresh=1):
	win.addstr(hosts[h].num+1, 35, "%-16s" % hosts[h].addr)
	if refresh:
		win.refresh()
#
#
#
def displaybody():
	for h in hosts.keys():
		win.addstr(hosts[h].num+1, 1, "%-24s" %  (h))
		if hosts[h].addr != None:
			displayaddr(h, 0)
		if hosts[h].statetime != None:
			displaystatetime(h, 0)
	win.refresh()


#
#
#
def displaymsgs():
	global msgw, msgs
	y=0
	for m in msgs[len(msgs)-msgwHeight:]:
		msgw.addstr(y, 0, m)
		y+=1
	msgw.refresh()

#
#
#
def display():
	if visual:
		initwin()
		displaytime()
		displaybody()
		displaymsgs()

def log(m, service="heartbeat"):
	msg=time.strftime("%b %d %H:%M:%S", time.localtime(time.time()))+": "+m+"\n"
	msgs.append(msg)
	if logfmt == "msg":
		m2="%d|%s|%s\n" % (now, service, m)
	else:
		m2=msg
	logf.write(m2)
	logf.flush()
	if msgw != None:
		y,x=msgw.getyx()
#		if y >= msgwHeight-1:
#			msgw.scroll()
		msgw.addstr(msg)
		msgw.clrtoeol()
		msgw.refresh()

#
#
def fromaddr(name, addr, boot, interval):
	global htab

	if not htab.has_key(addr):
		addhost(name, addr)
	host=hosts[htab[addr]]
	host.lastbeat=now
	if host.getstate() != Host.up and interval > 0:
		lasts=host.state
		d=host.newstate(Host.up)
		log("%s, back after being %s for %s" % (host.name, lasts, dur(d)))
	host.upcount+=1

#
#
#
def readsock():
	global htab, win
	data, addr = sock.recvfrom(1024)
	pairs=string.split(data,';')
	boot=0
	shutdown=0
	name="unknown"
	msg=None
	interval=0
	deltaT=0.0
	for pair in pairs:
		l=string.split(pair,"=")
		key=l[0]
		if len(l) != 2:
			val="0"
		else:
			val=l[1]
		if key == 'boot':
			boot+=1
		elif key == 'shutdown':
			shutdown+=1
		elif key == 'interval':
			interval=int(val)
		elif key == 'name':
			name=val
		elif key == 'msg':
			msg=val
		elif key == 'service':
			service=val
		elif key == 'time':
			try:
				deltaT=now-float(val)
			except:
				pass
	if boot:
		log("%s booted, deltaT %0.2g sec" % (name, deltaT))
	if msg:
		log("%s %0.2g msg: %s" % (name, deltaT, msg),service=service)
	fromaddr(name, addr[0], boot, interval)
	if shutdown:
		log("%s shutdown" % name)
		hosts[name].newstate(Host.down)
	if interval > 0:
		try:
			hosts[name].interval=interval
		except:
			pass


#
#
#
def updatestats():
	global upcount
	upcount+=1
	for h in hosts.keys():
		if upcount > 0:
			hosts[h].uppercent="%3.0f" % ((hosts[h].upcount*hosts[h].interval*100.0)/(upcount*interval))
#
#
#
def initcurses():
	global stdscr
	stdscr = curses.initscr()
	curses.noecho()
	curses.cbreak()
	stdscr.keypad(1)
	if DEBUG: sys.stderr.write("curses init done: %s\n" % stdscr)

def exitcurses():
	curses.nocbreak(); stdscr.keypad(0); curses.echo()
	curses.endwin()

#
#
#
class HtmlHandler(SocketServer.BaseRequestHandler):
  def handle(self):
	f=self.request.makefile()

	while 1:
		line=string.strip(f.readline())
		if len(line) == 0:
			break
		r=line.split()
		if r[0] == "GET":
			uri=r[1]
			html=r[2]
	if uri != "/":
		code=404
		cause="Not Found"
	else:
		code=200
		cause="OK"
	self.request.send("HTTP/1.0 %s %s\r\n" % (code, cause))
	self.request.send("Date: %s\r\n" % time.strftime("%a, %d %b %Y %H:%M:%S GMT",time.gmtime(now)))
	self.request.send("Server: hbd\r\n")
	self.request.send("Last-Modified: %s\r\n" % time.strftime("%a, %d %b %Y %H:%M:%S GMT",time.gmtime(now)))
	self.request.send("Accept-Ranges: bytes\r\n")
	self.request.send("Connection: close\r\n")
	self.request.send("Content-Type: text/html; charset=ISO-8859-1\r\n\r\n")
	res=[]
	if code != 200:
		res.append('<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">')
		res.append('<html><head>')
		res.append('<title>%s %s</title>' % (code, cause))
		res.append('</head><body>')
		res.append('<h1>%s</h1>' % (cause))
		res.append('<p>The requested URL %s was not found on this server.</p>' % uri)
		res.append('<hr>')
		res.append('<address>hbd (Unix) Server at %s Port %d</address>' % (hbd_host, hbd_port))
		res.append('</body></html>')
		
	else:
		res.append('<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">')
		res.append("<html>")
		res.append("<head>")
		res.append("<meta http-equiv=Refresh content=%d>\n" % 60)
		res.append("</head>")
		res.append('<body BGCOLOR="#FFFFFF" LINK="#008000" VLINK="#008000" BACKGROUND="/~andreas/images/tile.marble.gif">')
		res.append("<H2>Heartbeat status at %s</H2>" % time.strftime("%H:%M:%S", time.localtime(now)))
		res.append("<table>")
		res.append("<tr><th>Host</th><th>State</th><th>IP Addr</th><th>Res</th><th>Last change</th></tr>\n" )
		for h in hosts.keys():
			res.append("<tr><td>%-24s</td><td>%-7s</td><td>%-16s</td><td>%-3s</td><td>%-17s</td></tr>\n" %  (h, hosts[h].state,  hosts[h].addr, hosts[h].uppercent, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(hosts[h].statetime))))
		res.append("</table>")
		res.append("<P>")
		for m in msgs[len(msgs)-30:]:
			res.append("%s<BR>" % m)

	try:
		self.request.send(string.join(res,"\n"))
	except:
		pass

	
#
# Main
#

helpflag=False
forground=False
optlist=[]  
args=[]
home=os.environ['HOME']
configfile="%s/.hbrc" % home
try:
	optlist, args = getopt.getopt(sys.argv[1:], 'c:dfhv')
except:
    helpflag=True

for o,a in optlist:
	if o == '-c':
		configfile=a
	if o == '-d':
		visual=True
	elif o == '-f':
		forground=True
	elif o == '-h':
		helpflag=True
	elif o == '-v':
		verbose=True


if helpflag:
	print "hbc HeartBeatDaemon"
	print "usage: hbd [-dfhv] [-c configfile]"
	print
	print "	-c configfile"
	print "	-d display"
	print "	-f run in foreground"
	print "	-h this help"
	print "	-v verbose"
	print
	print """ config file can contain 
logfile=/var/log/heartbeat.log
logfmt=[text|msg]
hb_port=50003
interval=20
hbd_port=50004
hbd_host=www.domain.com
grace=1
"""

	sys.exit(1)

if visual:
	forground=True
#
# set defaults

hb_port=PORT
hbd_host=THOST
hbd_port=TPORT
logfile=LOGFILE
logfmt="text"
interval=INTERVAL
grace=GRACE
reportstrict=False

try:
	f=open(configfile,"r")	
	if verbose: print "notice: using config file %s" % configfile
except:
	print "warning: running without conifig file: %s" % configfile
	f=None

if f:
	while 1:
		l=f.readline()
		if len(l) == 0:
			break
		if verbose: print "  %s" % l[:-1]
		r=l[:-1].split('=')
		if r[0] == 'interval':
			interval=eval(r[1])
		elif r[0] == 'grace':
			grace=eval(r[1])
		elif r[0] == 'hbd_port':
			hbd_port=eval(r[1])
		elif r[0] == 'hbd_host':
			hbd_host=r[1]
		elif r[0] == 'hb_port':
			hb_port=eval(r[1])
		elif r[0] == 'logfile':
			logfile=r[1]
		elif r[0] == 'logfmt':
			logfmt=r[1]
		elif r[0] == 'reportstrict':
			reportstrict=r[1] in ["True","true","TRUE","1"]
	f.close()

if len(args) != 0:
	print "error: args"
	sys.exit(1)


if verbose: print "notice: logging to %s" % logfile
logf=initlog(logfile)

now=time.time()
startsec=int(now) % interval

htab={}
if visual:
	import curses
	initcurses()
	display()
	stdscr.nodelay(1)

if verbose: log("Starting")
atexit.register(on_exit)

ilist=[]

sock=socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sock.bind(("",hb_port))
ilist.append(sock)

serv=SocketServer.TCPServer((hbd_host,hbd_port),HtmlHandler)
ilist.append(serv.fileno())

if not forground:
	pid=os.fork()
	if pid > 0:
		if verbose:
			print "daemoinizing... pid=%d" % pid
		sys.exit(0)

	verbose=False
	os.close(0)
	os.close(1)
	os.close(2)
	sys.stdin.close()
	sys.stdout = NullDevice()
	sys.stderr = NullDevice()
	os.chdir("/")
	os.setsid() 
	os.umask(0) 

next=int(now)+1
sleep=next - now
while 1:
	if visual:
		c = stdscr.getch()
		if c == ord('c'): msgs=[]; display()
		elif c == ord('q'): break  # Exit the while()
		elif c == ord('d'): DEBUG=not DEBUG
		elif c == ord('v'): verbose=not verbose
#		elif c == ord('p'): PrintDocument()
#		elif c == ord('x'): x = y = 0

	try:
		sr=select.select(ilist,[],[],sleep)
		now=time.time()
	except KeyboardInterrupt:
		sys.exit(0)
	except select.error, value:
		if value[0] != 4:	# interrupted system call
			print select.error, value
			#raise os.error, value
			continue
		if visual:
			exitcurses()
			initcurses()
			display()
		continue
	for fh in sr[0]:
		if fh == sock:
			readsock()
		if fh == serv.fileno():
			serv.handle_request()
	if now >= next:
		next=now+1
		if int(now) % interval == startsec:
			updatestats()
		checkoverdue()
		if visual:
			stdscr.move(1 , 0)
			stdscr.clrtoeol()
			displaytime()

	sleep=next-now
	if sleep < 0:
		sys.stderr.write("sleep is negaitive! %s next=%s\n" % (sleep, next))
		sleep=0
	if DEBUG: sys.stderr.write("sleep=%s next=%s\n" % (sleep, next))

