#!/usr/bin/env python
# $Id: hbd,v 1.38 2013/07/14 02:25:05 andreas Exp $
# Wait for heartbeat messages and act on them (or their absence)
#
VER = 3.00

import time
import os
import string
import sys
import socket
import atexit
import select
import SocketServer
import BaseHTTPServer
import getopt
import signal
import cPickle
import smtplib
import traceback
import urllib
import httplib
import threading
import Queue
import md5
import zlib

from subprocess import Popen, STDOUT, PIPE


SEND_EMAIL=False
SEND_PUSHOVER=True

DEBUG = 0

MAXRECV = 32767
MAXRTTS = 10
LOGFILE = "/home/andreas/public_html/messages/andreas"
PICKFILE = "/var/tmp/hbd.pick"
AEMAIL = ["andreas@wrede.ca"]
NAME = "heatbeat"
SMTPSERVER = "localhost"

# Table of Hosts
hosts = {}
# map of addrs to names
htab = {}

msgs = []

num = 0
#AEW upcount = 0
PORT = 50003
TPORT = 50004
THOST = ""

verbose = False

INTERVAL = 10
GRACE = 2

os.environ['TZ'] = 'EST5EDT'

tsfm=["%H","%d","%U"]
lastfm=["","",""]

def handler(signum, frame):
	global running, sig
	sig = signum
	if not running:
		if verbose:
			sys.stderr.write("NOT runing signal: %s running: %d" % (sig, running))
		sys.exit(2)
	if verbose:
		sys.stderr.write("signal: %s running: %s frame: %s" % (sig, running, frame))


def shortname(name):
	r = string.split(name, '.')
	return r[0]


def isIPv4(addr):
	return addr.find('.') > 0


class NullDevice:
	def write(self, s):
		pass


class LogDevice:
	def __init__(self):
		self.fh = open("/tmp/log1","a")

	def write(self, s):
		self.fh.write(s)
		self.fh.flush()


def dicttos(ID, d, compress=False):
	s = []
	for k in d:
		if type(d[k]) == type(1.2):
			s.append("%s=%0.5f" % (k, d[k]))
		else:
			s.append("%s=%s" % (k, d[k]))
	pk = ";".join(s)
	if compress:
		zpk = zlib.compress(pk, 6)
		ID = "!"+ID
	else:
		zpk = pk
	return ID + ":" + zpk


def stodict(msg):
	d = {}
	r0 = msg.split(':',1)
	if len(r0) == 1:
		return None
	if r0[0][0] == '!':		# compressed
		pk = zlib.uncompress(r0[1])
		d['ID'] = r0[0][1:]
	else:
		pk = r0[1]
		d['ID'] = r0[0]
	r = pk.split(';')
	for v in r:
		vr = v.split('=', 1)
		k = vr[0].strip()
		if len(vr) == 1:
			d[k] = None
		else:
			v = vr[1].strip()
			if v[0].isdigit():
				v = eval(v)
			d[k] = v
	return d


def oldmtodict(msg):
	return stodict('HTB:'+msg)


class Connection:
	def __init__(self, name, cid, addr):
		self.name = name
		self.cid = cid
		self.addr = addr
		self.rtts = [0]


class Host:
	up = "up"
	down = "down"
	overdue = "overdue"


	def __init__(self, name):
		global num
		self.name = name
		self.addr4 = None
		self.addr6 = None
		self.num = num
		self.lastbeat = time.time()
		self.upcount = 0
		self.state = Host.up
		self.state = "up"
		self.statetime = self.lastbeat
		self.interval = 0
		self.doesack = -1
		self.cmds = []
		self.cver = 0
		self.connections = {}
		self.latency = 0
		self.hdwcounts = [[0,0],[0,0],[0,0]]
		num += 1
		hosts[name] = self


	def setcver(self, cver):
		self.cver = cver


	def isDynDns(self):
		return self.name in dyndnshosts


	def newaddr(self, addr):
		if isIPv4(addr):
			if self.addr4:
				if self.addr4 == addr:
					r = None
				else:
					r = "changed from %s to %s" % (self.addr4, addr)
					del htab[self.addr4]
					self.addr4 = addr
					htab[addr] = self.name
					if self.isDynDns():
						dnsQ.put((self.name, self.addr4))
			else:
				r = "new addr %s" % (addr)
				self.addr4 = addr
				htab[addr] = self.name
				if self.isDynDns():
					dnsQ.put((self.name, self.addr4))
		else:
			if self.addr6:
				if self.addr6 == addr:
					r = None
				else:
					r = "changed from %s to %s" % (self.addr6, addr)
					del htab[self.addr6]
					self.addr6 = addr
					htab[addr] = self.name
					if self.isDynDns():
						dnsQ.put((self.name, self.addr6))
			else:
				r = "new addr %s" % (addr)
				self.addr6 = addr
				htab[addr] = self.name
				if self.isDynDns():
					dnsQ.put((self.name, self.addr6))
		return r


	# called when reloading class from pickle, add new fields here
	def fixup(self):
		try:
			a=self.cmds
		except:
			self.cmds=[]

		try:
			a=self.hdwcounts
		except:
			self.hdwcounts = [[self.doesack,self.upcount],[self.doesack,self.upcount],[self.doesack,self.upcount]]

		try:
			self.addr=self.addr4
		except:
			pass
		try:
			self.addr=self.addr6
		except:
			pass


		try:
			a=self.addr4
			a=self.addr6
		except:
			print "fix %s: addr to addr4/6 fixup" % self.name
			if isIPv4(self.addr):
				self.addr4 = self.addr
				self.addr6 = None
			else:
				self.addr4 = None
				self.addr6 = self.addr
			del self.addr

		try:
			a=self.latency
		except:
			self.latency = 0

		try:
			a=self.cver
		except:
			self.cver = 0

		try:
			a=self.connections
			a.append([])
		except:
			self.connections={}


	def getstate(self):
		return self.state


	def dispstate(self):
		if self.state in ["down", "overdue"]:
			state = "<b>%s</b>" % self.state
		elif self.state in ["up", "UP"]:
			state = ""
			for x in self.connections.keys():
				try:
					state += " %5.1f" %  (self.connections[x].rtts[-1])
				except:
					state += " %5s" %  (self.connections[x].rtts[-1])
		else:
			state = "%s" % self.state
		return state


	def dispstats(self):
		return '<td align="right">N/A</td><td></td<td></td>'
		if self.doesack != -1:
			if self.upcount > 0:
#				return "(%0.1f%%) %s %s %s " % ((self.doesack * 100.0) / self.upcount, self.doesack, self.upcount, self.hdwcounts)
				r = ""
				for v in xrange(3):
					a,u = self.hdwcounts[v]
					if (self.upcount - u) != 0:
						vs = "%0.0f" % (100.0 - (((self.doesack - a) * 100.0) / (self.upcount - u)))
						if vs == "0":
							vs=""
					else:
						vs="-"
					r+= '<td align="right">%s</td>' % vs
				return r
			else:
				return "<td>(%s)</td><td></td><td></td>" % (self.doesack)
		return '<td align="right">N/A</td><td></td<td></td>>'


	def htmldisp(self, header=False):
		if header:
		 	return "<tr><th>Host</th><th>State</th><th>Hr</th><th>Dy</th><th>Wk</th><th>IP4 Addr</th><th>IP6 Addr</th><th>Last change</th><th>Ver</th></tr>\n"

		else:
			ipv4addr = self.addr4 if self.addr4 else ""
			ipv6addr = self.addr6 if self.addr6 else ""
			lastts = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(self.statetime))
			return "<tr><td>%-24s</td><td>%-7s</td>%s<td>%-16s</td><td>%-16s</td><td>%-17s</td><td>%s</td></tr>\n" % \
			(self.name, self.dispstate(), self.dispstats(), ipv4addr, ipv6addr, lastts, self.cver)



	# set new state, return number of secs in previous state
	def newstate(self, state, when=0):
		self.state = state
		now = time.time()-when
		s = now-self.statetime
		self.statetime = now
		return s



def email(s, msg):
	if not SEND_EMAIL:
		return
	ret = "OK"
	toaddrs = AEMAIL
	fromemail = "aew.heartbeat@wrede.ca"
	subj = "Info from %s: %s" % (NAME, s)
	date = time.strftime("%a, %d %b %Y %H:%M:%S %z", time.localtime())
	body = "To: %s\nFrom: %s\nSubject: %s\nDate: %s\n\n%s" % (toaddrs[0], fromemail, subj, date, msg)
	try:
		server = smtplib.SMTP(SMTPSERVER)
		if DEBUG > 0: server.set_debuglevel(1)
		server.sendmail(fromemail, toaddrs, body)
	except smtplib.SMTPRecipientsRefused, errs:
		log(None, "cannot send email: %s\n" % (errs))
		ret = "Fail"
	except:
		print("smtp error: "+traceback.format_exc())
		saveandrestart()
	try:
		server.quit()
	except:
		pass
	return ret


def pushover(msg):
	if not SEND_PUSHOVER:
		return
	conn = httplib.HTTPSConnection("api.pushover.net:443")
	try:
		conn.request("POST", "/1/messages.json",
			urllib.urlencode({
			"token": "ac7NLX2rPjXFareeDgLpXNoDf4iFmf",
			"user": "uDhH33UjQQDYtNzJb1ThRiWb9ingGK",
			"message": msg, }), { "Content-type": "application/x-www-form-urlencoded" })
		conn.getresponse()
	except:
		pass


# nsupdate:  set the DNS A record for a fqdn
#	return: None if ok, else error text
def nsupdate(hostname, newip):
	D = {}
	D['domain'] = 'dy.wapanafa.org'
	D['fqdn'] = '%s.dy.wapanafa.org' % hostname
	D['dnsttl'] = '5'
	D['newip'] = newip
	D['ts'] = time.strftime('%Y-%m-%d.%H:%M:%S', time.gmtime())
	if newip.find(":") > 0:
		nsup = """update delete %(fqdn)s AAAA
update add %(fqdn)s %(dnsttl)s AAAA %(newip)s
update delete %(fqdn)s TXT
update add %(fqdn)s %(dnsttl)s TXT "Created: %(ts)s"
send
answer

""" % D
	else:
		nsup = """update delete %(fqdn)s A
update add %(fqdn)s %(dnsttl)s A %(newip)s
update delete %(fqdn)s TXT
update add %(fqdn)s %(dnsttl)s TXT "Created: %(ts)s"
send
answer

""" % D

	if DEBUG > 0: log(None, "DBG: nsup %s" % nsup)
	cmd = ["/usr/local/bin/nsupdate", "-k", "/etc/dhcpc/K%(domain)s.+157+00000." % D, "-v"]
	if DEBUG > 0: log(None, "DBG: cmd %s" % cmd)
	try:
		p = Popen(cmd, shell=False, bufsize=1, stdin=PIPE, stdout=PIPE, stderr=STDOUT)
	except OSError, e:
		return "nsupdate: execution failed: %s" % e
	except:
		return "nsupdate: some error occured"

	(output, err) = p.communicate(nsup)
	if output.find('status: NOERROR') >= 0:
		return None
	return output


#
def dur(sec):
	sec = int(sec)
	h = sec / 3600
	m = (sec - h * 3600) / 60
	s = (sec - h * 3600) % 60
	if h > 0:
		return "%d:%02d:%02d" % (h, m, s)
	if m > 0:
		return "%d:%02d" % (m, s)
	return "0:%02d" % s


def fixsort():
	s = hosts.keys()
	s.sort()
	x = 0
	for n in s:
		hosts[n].num = x
		x += 1

#
def on_exit():
	if DEBUG > 0: sys.stderr.write("on_exit\n")
	try:
		logf.close()
	except:
		pass
	print "exit"


def initlog(logfile):
	try:
		return open(logfile, "a+")
	except:
		pass
	return open(logfile, "w")


#
#
def checkoverdue():

	for h in hosts.keys():
		if hosts[h].state == Host.down:
			continue
		timeout = hosts[h].interval+grace
		if hosts[h].state == Host.up and now-hosts[h].lastbeat > timeout:
			if h in watchhosts:
				email("overdue", "%s is overdue" % h)
				pushover("%s is overdue" % h)
			hosts[h].newstate(Host.overdue, grace)
			log(h, "overdue")


def log(host, m, service=None):
	if DEBUG > 0: print "Log: %s" % m
	ts = time.strftime("%b %d %H:%M:%S", time.localtime(time.time()))
	if service:
		srv = "service %s: " % service
	else:
		srv = ""
	if host:
		hst = "%s " % host
	else:
		hst = ""
	
	msg = "%s: %s%s%s\n" % (ts, hst, srv, m)
	msgs.append(msg)
	if logfmt == "msg":
		m2 = "%d|%s|%s\n" % (now, service, m)
	else:
		m2 = msg
	logf.write(m2)
	logf.flush()
	pickleit()


def dnsupdatethread():
	while True:
		name, addr = dnsQ.get()
		m = "changed address to %s" % (addr)
		err = nsupdate(name, addr)
		if err:
			m += ", DNS update failed: %s" % err
			email("error: nsupdate failed", "%s: %s" % (name, m))
		else:
			m += ", DNS updated."
		dnsQ.task_done()
		log(name, m)

#
#
#
#
def readsock(sock):
	if DEBUG > 3: sys.stderr.write("readsock recfrom start")
	data, addr = sock.recvfrom(MAXRECV)
#	now = time.time()
	if DEBUG > 2: sys.stderr.write("readsock = %s, %s\n" % (data,addr))
	msg = stodict(data)
	if not msg:		# Old hbc client
		if verbose: print "old hbc:", data
		oldclient = True
		msg = oldmtodict(data)
	else:
		oldclient = False
	if verbose: print "readsock = %s, %s" % (msg,addr)
	name = shortname(msg.get('name', "unknown"))
	if not name in hosts:		# was: hosts.has_key(name):
		host = Host(name)
		newh=True
	else:
		host = hosts[name]
		newh=False

	cid = msg.get('id', 0)
	if cid not in host.connections:
		host.connections[cid] = Connection(name, cid, addr)
	conn = host.connections[cid]
	
	host.doesack = msg.get('acks', -1)
	host.setcver(msg.get('ver', 0))
	host.lastbeat = now

	interval = msg.get('interval', 0)
	shutdown = msg.get('shutdown', 0)
	service = msg.get('service', "unknown")
	message = msg.get('msg', None)
	boot = msg.get('boot', 0)
	host.latency = now - msg.get('time', 0)
	rtt = msg.get('rtt',"up")
	conn.rtts.append(rtt)
	if len(conn.rtts) > MAXRTTS:
		del conn.rtts[0]

	if boot:
		log(name, "booted")
		if name in watchhosts:
			email("booted", m)
			pushover(m)
	if message:
		log(name, "msg: %s" % message, service=service)
		if name in watchhosts:
			email("msg", message)
			pushover(message)

	res = host.newaddr(addr[0])
	if res:
		log(name, res)
		if name in watchhosts:
			email("address change", "%s %s" % (host.name, res))
			pushover("%s %s" % (host.name, res))

	if host.getstate() != Host.up and interval > 0:
		lasts = host.state
		d = host.newstate(Host.up)
		m = "back after being %s for %s" % (lasts, dur(d))
		log(name, m)
		if name in watchhosts:
			email("back", name)
			pushover("%s is back" % name)
	if boot or newh:
		host.upcount = host.doesack
	else:
		host.upcount += 1


	if shutdown:
		log(name, "shutdown")
		if name in watchhosts:
			email("shutdown", "%s shutdown" % name)
			pushover("%s hbc shutdown" % name)
		try:
			host.newstate(Host.down)
		except:
			pass
	if interval > 0:
		try:
			host.interval = interval
		except:
			pass

	rmsg = {'time': time.time()}
	op = 'ACK'
	if host.cver < 1:
		opkt = 'ACK'
		rmsg = 'ACK'
	else:
		opkt = dicttos('ACK', rmsg)
	ss=sock.sendto(opkt, addr)
	if verbose: print "sendto1: %s (%s) %s %s" % (addr, len(opkt), op, str(rmsg)[:50])

	# send any commands we have queued
	while len(host.cmds):
		op, rmsg = host.cmds[0]
		if op == 'CMD':
			email("%s cmd exec" % name, "command '%s' sent" %  rmsg)
			del host.cmds[0]
			log(name, "command sent")
			if host.cver < 1:
				rmsg = rmsg['cmd']
		elif op == 'UPD':
			del host.cmds[0]
			log(name, "update initiated")
			if host.cver < 1:
				log(name," ver 0 does not support UPD")
				continue
		if host.cver < 1:
			opkt = rmsg
			op = ""
		else:
			opkt = dicttos(op, rmsg, True)
		try:
			ss=sock.sendto(opkt, addr)
		except Exception as e:
			print "opkt len is %s" % len(opkt)
			print "cannot send: %s" % e
			
		if verbose: print "sendto2: %s (%s) %s %s" % (addr, len(opkt), op, str(rmsg)[:50])
		if DEBUG > 2: print "msg from %s,%s, sent %s bytes back" % (addr[0], addr[1], ss)



def updatecode(ucode, uname):

	fail = None
	try:
		fh = open(ucode, "r")
		new_code = fh.read()
		fh.close()
	except Exception as e:
		fail = "cannot read new code: %s" % e
	if not fail:
		m = md5.new()
		m.update(new_code)
		icsum = m.hexdigest()
		rmsg = {'csum': icsum, 'code': new_code.encode('base64','strict') }
		hosts[uname].cmds.append(('UPD',rmsg))
	return fail
#
#

#class HttpServer(BaseHTTPServer.HTTPServer):
class HttpServer(SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer):
	allow_reuse_address = True
	def threaded():
		pass
#
#
class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):


	def do_HEAD(self):
		self.send_response(200)
		self.send_header("Last-Modified", time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(now)))
#		self.send_header("Accept-Ranges","bytes")
#		self.send_header("Connection","close")
		self.send_header("Content-Type","text/html; charset = ISO-8859-1")
		self.end_headers()
	

	def buildhead(self, title="Heartbeat", refresh=None):
		res=[]
		res.append('<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">')
		res.append("<html>")
		res.append("<head>")
		res.append('<title>%s</title>' % (title))
		if refresh:
			res.append("<meta http-equiv = Refresh content = %d>\n" % refresh)
		res.append("</head>")
		res.append('<body BGCOLOR = "#FFFFFF" LINK = "#008000" VLINK = "#008000">')
		return res


	def buildpage(self):
		res=self.buildhead(refresh=60)
		res.append("<H2>Heartbeat status %s</h2><h4> %s (%s)</H4>" % (VER, time.strftime("%H:%M:%S", time.localtime(now)), os.environ.get('TZ', 'CET-1CDT')))

		res.append("<table>")
		hosts_sorted = hosts.keys()
		hosts_sorted.sort()
		res.append(hosts[hosts_sorted[0]].htmldisp(True))
		for h in hosts_sorted:
			res.append(hosts[h].htmldisp())
		res.append("</table>")

		le = max(40 - len(hosts), 3)
		res.append("<h4>Log of Events</h4>")
		for m in msgs[len(msgs)-le:]:
			res.append("%s<BR>" % m)
		res.append("</body></html>")
		return res

	def builderror(self, code, cause, lcause):
		res=[]
		res.append('<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">')
		res.append('<html><head>')
		res.append('<title>%s %s</title>' % (code, cause))
		res.append('</head><body>')
		res.append('<h1>%s</h1>' % (cause))
		res.append('<p>%s</p>' % lcause)
		res.append('<hr>')
		res.append('<address>hbd (Unix) Server at %s:%s</address>' % (hbd_host, hbd_port))
		res.append('</body></html>')
		return res
	

	def do_GET(self):
		global sig
		xsig = 0
		self.do_HEAD()
		headers=[]

		if DEBUG > 2: sys.stderr.write("handle\n")
		uri = self.path
		upar=string.split(uri,"?")
		if len(upar) == 1:
			uarg=[]
		else:
			uarg=string.split(upar[1],"&")

		if DEBUG > 2: sys.stderr.write("handle = %s\n" % (uri))
		code = 200
		cause = "OK"
		if uri == "/":
			res=self.buildpage()

		elif upar[0] == "/c":	# command on host /c?h=melschserver&c=sudo%20ls 
			uname=""
			ucmd=""
			if len(uarg) != 2 or len(uarg[0]) < 3 or len(uarg[1]) < 3:
				code=400
				cause='Argument error'
				res=self.builderror(code, cause, "need h= and c= arguments")
			else:
				if uarg[0][:2] == "h=":
					uname=uarg[0][2:]
				if uarg[1][:2] == "c=":
					ucmd=uarg[1][2:]
				if ucmd != "" and uname != "" and hosts.has_key(uname):
					rmsg = {'cmd': urllib.unquote(ucmd)}
					hosts[uname].cmds.append(('CMD', rmsg))
					res=self.buildhead()
					res.append("2Done")
				else:
					code=400
					cause='arg 2 error'
					res=self.builderror(code, cause, "ucmd %s uname %s" % (ucmd, uname))

		elif upar[0] == "/d":	# drop host  /d?h=melschserver
			if len(uarg) != 1 or len(uarg[0]) < 3:
				code=400
				cause='Argument error'
				res=self.builderror(code, cause, "need h= argument")	
			else:
				if uarg[0][:2] == "h=":
					uname=uarg[0][2:]
				if uname != "" and hosts.has_key(uname):
					log(uname, "dropped")
					del hosts[uname]
					res=self.buildhead()
					res.append("Done")
	
		elif upar[0] == "/n":   # register name
			if len(uarg) != 1 or len(uarg[0]) < 3:
				code=400
				cause='Argument error'
				res=self.builderror(code, cause, "need h= argument")	
			else:
				res=self.buildhead()
				if uarg[0][:2] == "h=":
					uname=uarg[0][2:]
				if uname != "" and hosts.has_key(uname):
					err = nsupdate(uname, hosts[uname].addr)
					ll="nsupdate request: %s" % err
				else:
					ll="name %s not found" % uname
				res.append(ll)
				log(uname, ll)

		elif upar[0] == "/u":   # update
			uname=""
			ucode=""
			if len(uarg) != 2 or len(uarg[0]) < 3 or len(uarg[1]) < 3:
				code=400
				cause='Argument error'
				res=self.builderror(code, cause, "need h= and c= arguments")
			else:
				if uarg[0][:2] == "h=":
					uname=uarg[0][2:]
				if uarg[1][:2] == "c=":
					ucode=uarg[1][2:]
				if ucode != "" and uname != "" and hosts.has_key(uname):
					err = updatecode(ucode, urllib.unquote(uname))
					res=self.buildhead()
					res.append("3 Done: %s" % err if err else "OK")
				else:
					code=400
					cause='Argument error'
					res=self.builderror(code, cause, "host not found")



		elif upar[0] == "/r":   # restart
			res=self.buildhead()
			res.append("restart request")
			xsig=signal.SIGHUP
			log(None, "restart request")

		else:
			code=404
			cause="Not Found"
			res=self.builderror(code, cause, "The requested URL was not found on this server.")


		tosend = []
		for h in headers:
			tosend.append("%s\r" % h)
		tosend.append("\r")
#		self.request.send("HTTP/1.0 %s %s\r\n" % (code, cause))
#		for h in headers:
#			self.request.send("%s\r\n" % h)
#		self.request.send("\r\n")

		tosend += res
		self.wfile.write(string.join(tosend, "\n"))

		if xsig:
			sig = xsig

def setrunning(new):
	global running
	if DEBUG > 0: sys.stderr.write("running is now = %s\n" % (new))
	running = new


def closeup():
	setrunning(False)
	try:
		sock.close()
	except:
		pass
	try:
		sock6.close()
	except:
		pass
	
	if DEBUG > 0: sys.stderr.write("asking http server to stop\n")
	try:
		serv.shutdown()
		if DEBUG > 0: sys.stderr.write("http server stopped\n")
	except Exception as e:
		if DEBUG > 0: sys.stderr.write("http server did NOT stop: %s\n" % str(e))

	try:
		serv.server_close()
	except:
		pass

	log(None, "restarting")
	try:
		logf.close()
	except:
		pass

#	signal.signal(signal.SIGTERM, 0)
	signal.signal(signal.SIGHUP, 0)


def restart():
	print "execv %s %s" % (sys.argv[0], [sys.argv[0]]+cmdargs)
	os.execv(sys.argv[0], [sys.argv[0]]+cmdargs)
	print "should not be here"

def saveandrestart():
	closeup()
	restart()


def pickleit():
	pickf = open(PICKFILE, 'w')
	pick = cPickle.Pickler(pickf)
	pick.dump(hosts)
	pick.dump(htab)
	pick.dump(msgs)
	pick.dump(lastfm)
	pickf.close()


#
# Main
#

helpflag = False
forground = False
optlist = []
args = []
home = os.environ['HOME']
cmdargs = []
configfile = "%s/.hbrc" % home

try:
	optlist, args = getopt.getopt(sys.argv[1:], 'c:dfh:vx')
except:
	helpflag = True

for o, a in optlist:
	if o == '-c':
		configfile = a
		cmdargs += [o, a]
	if o == '-f':
		forground = True
		cmdargs += [o]
	elif o == '-h':
		helpflag = True
	elif o == '-v':
		verbose = True
		cmdargs += [o]
	elif o == '-x':
		DEBUG += 1
		cmdargs += [o]

if helpflag:
	print "hbc HeartBeatDaemon"
	print "usage: hbd [-dfhvx] [-c configfile]"
	print
	print "	-c configfile"
	print "	-d display"
	print "	-f run in foreground"
	print "	-h this help"
	print "	-v verbose"
	print "	-x increase debug lvl"
	print
	print """ config file can contain
logfile = /var/log/heartbeat.log
logfmt = [text|msg]
hb_port = 50003
interval = 20
hbd_port = 50004
hbd_host = www.domain.com
grace = 2
"""

	sys.exit(1)

#
# set defaults

hb_port = PORT
hbd_host = THOST
hbd_port = TPORT
logfile = LOGFILE
logfmt = "text"
interval = INTERVAL
grace = GRACE
watchhosts = []
dyndnshosts = []
drophosts = []

try:
	f = open(configfile, "r")
	if verbose:
		print "notice: using config file %s" % configfile
except:
	print "warning: running without config file: %s" % configfile
	f = None

if f:
	while 1:
		l = f.readline()
		if len(l) == 0:
			break
		if verbose:
			print "  %s" % l[:-1]
		r = l[:-1].split('=')
		if r[0] == 'interval':
			interval = eval(r[1])
		elif r[0] == 'grace':
			grace = eval(r[1])
		elif r[0] == 'hbd_port':
			hbd_port = eval(r[1])
		elif r[0] == 'hbd_host':
			hbd_host = eval(r[1])
		elif r[0] == 'hb_port':
			hb_port = eval(r[1])
		elif r[0] == 'logfile':
			logfile = eval(r[1])
		elif r[0] == 'logfmt':
			logfmt = eval(r[1])
		elif r[0] == 'watchhosts':
			watchhosts = eval(r[1])
		elif r[0] == 'dyndnshosts':
			dyndnshosts = eval(r[1])
		elif r[0] == 'drophosts':
			drophosts = eval(r[1])
	f.close()

if len(args) != 0:
	print "error: args"
	sys.exit(1)


if verbose:
	print "notice: logging to %s" % logfile
logf = initlog(logfile)

if os.path.exists(PICKFILE):
	pickf = open(PICKFILE, 'r')
	pick = cPickle.Unpickler(pickf)
	try:
		hosts = pick.load()
		htab = pick.load()
		msgs = pick.load()
		try:
			lastfm = pick.load()
		except:
			lastfm = ["","",""]
		pickf.close()
	except:
		os.unlink(PICKFILE)
	for h in hosts.keys():
		hosts[h].fixup()
	for h in drophosts:
		if h in hosts:
			del hosts[h]


now = time.time()
startsec = int(now) % interval


log(None, "Starting %s" % VER)
atexit.register(on_exit)

ilist = []

sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sock.bind(("", hb_port))
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
ilist.append(sock)

sock6 = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
sock6.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock6.bind(("", hb_port))
ilist.append(sock6)

#ilist.append(serv.fileno())

if not forground:
	pid = os.fork()
	if pid > 0:
		if verbose:
			print "daemoinizing... pid = %d" % pid
		sys.exit(0)

	verbose = False
	os.close(0)
	os.close(1)
	os.close(2)
	sys.stdin.close()
	if DEBUG > 0:
		sys.stdout = LogDevice()
		sys.stderr = LogDevice()
	else:
		sys.stdout = NullDevice()
		sys.stderr = NullDevice()
	os.chdir("/tmp")
	os.setsid()
	os.umask(0)

try:
	serv = HttpServer((hbd_host, hbd_port), HttpHandler)
except:
	print "failed to start server on %s:%s" % (hbd_host, hbd_port)
	sys.exit(1)

servthread = threading.Thread(target=serv.serve_forever)
servthread.daemon = True
servthread.start()

dnsQ = Queue.Queue()
dnsT = threading.Thread(target=dnsupdatethread)
dnsT.daemon = True
dnsT.start()

running = True
sig = 0
signal.signal(signal.SIGTERM, handler)
signal.signal(signal.SIGHUP, handler)

next = int(now)+15   # 15 seconds time to settle after (re-)start
sleep = 1
firstcheck = int(now) + 15

while running:
	sr = None
	if DEBUG > 2: sys.stderr.write("about to sleep = %s\n" % (sleep))
	try:
		sr = select.select(ilist, [], [], sleep)
		now = time.time()
	except KeyboardInterrupt:
		sys.stderr.write("Keyboard Interrupt!\n")
		running = False
		closeup()
		continue
	except select.error, value:
		if value[0] != 4:		# interrupted system call
			sys.stderr.write("select err %s %s" % (select.error, value))
			#raise os.error, value
			continue
		continue
	except Exception as e:
		if DEBUG > 2: sys.stderr.write("select exception %s\n" % (str(e)))
		sys.exit(1)
	if DEBUG > 2: sys.stderr.write("woke from sleep = %s (%s)\n" % (str(sr), str(ilist)))
	for fh in sr[0]:
		if fh in [sock, sock6]:
			readsock(fh)
#		elif fh == serv.fileno():
#			serv.handle_request()
		else:
			sys.stderr.write("what happend just now?\n")
	if DEBUG > 2: sys.stderr.write("done handling, running is %s, sig is %s\n" % (running, sig))

	# check hour/day/week 
	for v in xrange(3):
		fm=tsfm[v]
		ts=time.strftime(tsfm[v], time.localtime(now))
		if ts != lastfm[v]:
			lastfm[v]=ts
			for h in hosts.keys():
				hosts[h].hdwcounts[v] = [hosts[h].doesack, hosts[h].upcount]
	
	if now >= next and now >= firstcheck:
		next = now+1
		checkoverdue()

	sleep = next-now
	if sleep < 0:
		sys.stderr.write("sleep is negative! %s next = %s\n" % (sleep, next))
		sleep = 0
	if DEBUG > 2: sys.stderr.write("sleep = %s next = %s\n" % (sleep, next))

	if sig != 0:
		setrunning(False)



if sig == signal.SIGHUP:
	if DEBUG > 0: sys.stderr.write("signal 1 saveandrestart\n")
	saveandrestart()
