Files
heartbeat/hbc
T

317 lines
6.4 KiB
Python
Executable File

#!/usr/bin/env python
# $Id: hbc,v 1.9 2012/03/29 02:08:36 andreas Exp $
# require on Linux
# python-filelock
# python-daemon vs 1.61 or >
# on *bsd
# py27-lockfile
# py-27-daemon
# or run sudo easy_install-2.7 lockfile daemon
import sys
import time
import socket
import os
import signal
import getopt
import string
import select
import errno
import traceback
import lockfile
import daemon
import daemon.pidfile
import syslog
PORT = 50003
INTERVAL = 10
PIDFILE = '/tmp/hbc.pid'
DBG = False
sock = None
up = True
ackcount = 0
class NullDevice:
def write(self, s):
pass
def syslogtrace(note):
logm = '%s hbc died: \n%s' % (note, traceback.format_exc())
for l in logm.split('\n'):
syslog.syslog(' tb: %s' % l)
def getsock(host):
try:
r=socket.getaddrinfo(host, 50001, 0, 0, socket.SOL_UDP)
except socket.gaierror:
logm = '%s hbc died: \n%s' % ('getsock', traceback.format_exc())
if DBG: print logm
return None
if r[0][0] in [28, 30]:
af_type=socket.AF_INET6
elif r[0][0] == 2:
af_type=socket.AF_INET
else:
print "dont know this net type: %s" % r[0][0]
sys.exit(1)
if verbose:
syslog.syslog("socktype: %s" % af_type)
sock=socket.socket(af_type, socket.SOCK_DGRAM)
sock.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR, \
sock.getsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR) | 1)
if verbose: syslog.syslog("get socket %s" % sock)
return sock
def socksend(msg, tohost):
global sock
if sock == None:
sock=getsock(tohost[0])
if DBG: print "socksend: sending msg=%s on socket=%s" % (msg, sock)
sock.sendto(msg, tohost)
if verbose: syslog.syslog("msg %s sent" % msg)
def process():
global up, sock, ackcount
if sock == None:
sock=getsock(tohost[0])
ackcount=0
lastT=time.time()
while up:
sleep=(lastT+interval) - time.time()
if verbose: syslog.syslog("sleep %s" % sleep)
if sleep > 0:
try:
r=select.select([sock.fileno()],[],[],sleep)
except:
if up:
syslogtrace('select')
break
if verbose: syslog.syslog("r is %s" % str(r))
if sock.fileno() in r[0]:
data, addr = sock.recvfrom(1024)
if data == "ACK":
ackcount+=1
else:
try:
os.system(data)
except:
syslogtrace('System')
pass
continue
lastT=time.time()
for hb_host in hb_hosts:
try:
msg="interval=%s;name=%s;time=%s;acks=%s" % (interval, iam, time.time(), ackcount)
if verbose: syslog.syslog("sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port))
socksend(msg, (hb_host, hb_port))
except:
logm = '%s hbc died: \n%s' % ('socksend', traceback.format_exc())
if DBG: print logm
pass
def cleanup(a, b):
global up, sock, ackcount
up = False
syslog.syslog('exit a=%s b=%s' % (str(a), str(b)))
msg="shutdown=1;name=%s;acks=%s" % (iam, ackcount)
for hb_host in hb_hosts:
if verbose: syslog.syslog("hbc: sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port))
socksend(msg, (hb_host, hb_port))
time.sleep(1)
sock.close()
msgonly=False
helpflag=False
verbose=False
fdaemon=False
optlist=[]
args=[]
msgboot=[]
home=os.environ['HOME']
configfile="%s/.hbrc" % home
try:
optlist, args = getopt.getopt(sys.argv[1:], 'bc:dhm:v')
except:
helpflag=True
for o,a in optlist:
if o == '-b':
msgboot.append("boot=1")
elif o == '-c':
configfile=a
elif o == '-d':
fdaemon=True
elif o == '-h':
helpflag=True
elif o == '-m':
msgboot.append("service=%s" % "service")
a.replace(';',':')
msgboot.append("msg=%s" % a)
msgonly=True
elif o == '-v':
verbose=True
if helpflag:
print "hbc HeartBeatClient"
print "usage: hbc [-bdhv] [-c configfile] [-m msg][host1 [..]]"
print
print " -b indicate machine boot"
print " -c configfile"
print " -d daemonize"
print " -h this help"
print " -m send a message"
print " -v verbose"
print
print """ config file can contain
hb_hosts=('host1', 'host2', ..._
hb_port=50003
interval=20
logfile=...
logfmt={|test|msg}
grace=SECONDS
reportstrict={True|False}
"""
sys.exit(1)
#
# set defaults
hb_port=PORT
interval=INTERVAL
hb_hosts=[]
iam=socket.gethostname()
try:
f=open(configfile,"r")
if verbose: print "notice: using config file %s" % configfile
except:
if verbose: print "warning: running without config file: %s" % configfile
f=None
if f:
while 1:
l=f.readline()
if len(l) == 0:
break
r=l[:-1].split('=')
if r[0] == 'hb_hosts':
hb_hosts=eval(r[1])
if verbose:
print "notice: cfg hb_hosts: %s" % hb_hosts
elif r[0] == 'interval':
interval=eval(r[1])
elif r[0] == 'hb_port':
hb_port=eval(r[1])
elif r[0] == 'name':
iam=eval(r[1])
if verbose: print "name set to %s" % iam
f.close()
if len(args) != 0:
hb_hosts=args
if len(hb_hosts) == 0:
print "no hb server specified"
sys.exit(1)
if verbose:
print "notice: hb_hosts: %s" % str(hb_hosts)
print "notice: hb_port: %s" % hb_port
print "notice: interval: %s" % interval
print "notice: iam: %s" % iam
if not msgonly:
msgboot.append("interval=%s" % interval)
if len(msgboot) > 0:
if DBG: print "on boot"
msgboot.append("name=%s" % iam)
msgboot.append("time=%s" % time.time())
msgboot.append("acks=0")
msg=";".join(msgboot)
while 1:
fail=0
for hb_host in hb_hosts:
try:
if DBG: print "sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port)
socksend(msg, (hb_host, hb_port))
except:
logm = '%s hbc died: \n%s' % ('socksend2', traceback.format_exc())
if DBG: print logm
fail=1
if fail:
time.sleep(10)
else:
break
if verbose: print "msgboot done msgonly=%s" % msgonly
if msgonly:
sys.exit(0)
#
#
syslog.openlog(logoption=syslog.LOG_PID, facility=syslog.LOG_DAEMON)
if fdaemon:
pidfile = daemon.pidfile.TimeoutPIDLockFile(PIDFILE, acquire_timeout=-1)
try:
opid = pidfile.read_pid()
except:
opid = None
if verbose: print "opid %s" % opid
if opid:
try:
os.kill(opid, 0)
is_running = True
except:
is_running = False
if verbose: print "is_running %s" % is_running
if is_running:
print "process still alive %s" % opid
sys.exit(1)
print "warning: stale pid file removed"
os.unlink(PIDFILE)
print "daemoinizing... %s" % os.getpid()
context = daemon.DaemonContext(
working_directory='/tmp',
umask=0o002,
pidfile=pidfile,
)
context.signal_map = {
signal.SIGTERM: cleanup,
signal.SIGHUP: 'terminate',
# signal.SIGUSR1: reload_program_config,
}
context.files_preserve = [sock, sock.fileno()]
with context:
syslog.syslog('starting heartbeat to %s' % ','.join(hb_hosts))
up = True
try:
process()
except:
syslogtrace('process')
cleanup(0, None)