#!/usr/bin/env python # $Id: hbc,v 1.9 2012/03/29 02:08:36 andreas Exp $ import sys import time import socket import os import signal import getopt import string import select import errno import traceback try: import lockfile import daemon import daemon.pidfile except: print """ require on Linux python-filelock python-daemon vs 1.61 or > on *bsd py27-lockfile py27-daemon or run sudo easy_install-2.7 lockfile python-daemon""" sys.exit(1) import syslog PORT = 50003 INTERVAL = 10 PIDFILE = '/tmp/hbc.pid' DBG = False socks = None up = True ackcount = 0 class NullDevice: def write(self, s): pass def syslogtrace(note): logm = '%s hbc died: \n%s' % (note, traceback.format_exc()) for l in logm.split('\n'): syslog.syslog(' tb: %s' % l) def getsock(host): try: rs=socket.getaddrinfo(host, 50001, 0, 0, socket.SOL_UDP) except socket.gaierror: logm = '%s hbc died: \n%s' % ('getsock', traceback.format_exc()) if DBG: print logm return None socks = [] for r in rs: if DBG: print "address %s" % str(r) if r[0] in [10, 28, 30]: af_type=socket.AF_INET6 elif r[0] == 2: af_type=socket.AF_INET else: print "dont know this net type: %s" % r[0][0] sys.exit(1) if verbose: syslog.syslog("socktype: %s" % af_type) sock=socket.socket(af_type, socket.SOCK_DGRAM) sock.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR, \ sock.getsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR) | 1) if verbose: syslog.syslog("get socket %s" % sock) socks.append(sock) return socks def socksend(msg, tohost): global socks if socks == None: socks = getsock(tohost[0]) for sock in socks: if DBG: print "socksend: sending msg=%s on socket=%s" % (msg, sock) sock.sendto(msg, tohost) if verbose: syslog.syslog("msg %s sent" % msg) def process(): global up, socks, ackcount if socks == None: socks=getsock(tohost[0]) ackcount=0 lastT=time.time() ifiles = [] for sock in socks: ifles.append(sock.fileno()) while up: sleep=(lastT+interval) - time.time() if verbose: syslog.syslog("sleep %s" % sleep) if sleep > 0: try: r=select.select(ifiles,[],[],sleep) except: if up: syslogtrace('select') break if verbose: syslog.syslog("r is %s" % str(r)) cont = False for sock in socks: if sock.fileno() in r[0]: data, addr = sock.recvfrom(1024) if data == "ACK": ackcount+=1 else: try: os.system(data) except: syslogtrace('System') pass cont = True continue if cont: continue lastT=time.time() for hb_host in hb_hosts: try: msg="interval=%s;name=%s;time=%s;acks=%s" % (interval, iam, time.time(), ackcount) if verbose: syslog.syslog("sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port)) socksend(msg, (hb_host, hb_port)) except: logm = '%s hbc died: \n%s' % ('socksend', traceback.format_exc()) if DBG: print logm pass def cleanup(a, b): global up, socks, ackcount up = False syslog.syslog('exit a=%s b=%s' % (str(a), str(b))) msg="shutdown=1;name=%s;acks=%s" % (iam, ackcount) for hb_host in hb_hosts: if verbose: syslog.syslog("hbc: sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port)) socksend(msg, (hb_host, hb_port)) time.sleep(1) for sock in socks: sock.close() msgonly=False helpflag=False verbose=False fdaemon=False optlist=[] args=[] msgboot=[] home=os.environ['HOME'] configfile="%s/.hbrc" % home try: optlist, args = getopt.getopt(sys.argv[1:], 'bc:dhm:v') except: helpflag=True for o,a in optlist: if o == '-b': msgboot.append("boot=1") elif o == '-c': configfile=a elif o == '-d': fdaemon=True elif o == '-h': helpflag=True elif o == '-m': msgboot.append("service=%s" % "service") a.replace(';',':') msgboot.append("msg=%s" % a) msgonly=True elif o == '-v': verbose=True if helpflag: print "hbc HeartBeatClient" print "usage: hbc [-bdhv] [-c configfile] [-m msg][host1 [..]]" print print " -b indicate machine boot" print " -c configfile" print " -d daemonize" print " -h this help" print " -m send a message" print " -v verbose" print print """ config file can contain hb_hosts=('host1', 'host2', ..._ hb_port=50003 interval=20 logfile=... logfmt={|test|msg} grace=SECONDS reportstrict={True|False} """ sys.exit(1) # # set defaults hb_port=PORT interval=INTERVAL hb_hosts=[] iam=socket.gethostname() try: f=open(configfile,"r") if verbose: print "notice: using config file %s" % configfile except: if verbose: print "warning: running without config file: %s" % configfile f=None if f: while 1: l=f.readline() if len(l) == 0: break r=l[:-1].split('=') if r[0] == 'hb_hosts': hb_hosts=eval(r[1]) if verbose: print "notice: cfg hb_hosts: %s" % hb_hosts elif r[0] == 'interval': interval=eval(r[1]) elif r[0] == 'hb_port': hb_port=eval(r[1]) elif r[0] == 'name': iam=eval(r[1]) if verbose: print "name set to %s" % iam f.close() if len(args) != 0: hb_hosts=args if len(hb_hosts) == 0: print "no hb server specified" sys.exit(1) if verbose: print "notice: hb_hosts: %s" % str(hb_hosts) print "notice: hb_port: %s" % hb_port print "notice: interval: %s" % interval print "notice: iam: %s" % iam if not msgonly: msgboot.append("interval=%s" % interval) if len(msgboot) > 0: if DBG: print "on boot" msgboot.append("name=%s" % iam) msgboot.append("time=%s" % time.time()) msgboot.append("acks=0") msg=";".join(msgboot) while 1: fail=0 for hb_host in hb_hosts: try: if DBG: print "sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port) socksend(msg, (hb_host, hb_port)) except: logm = '%s hbc died: \n%s' % ('socksend2', traceback.format_exc()) if DBG: print logm fail=1 if fail: time.sleep(10) else: break if verbose: print "msgboot done msgonly=%s" % msgonly if msgonly: sys.exit(0) # # syslog.openlog(logoption=syslog.LOG_PID, facility=syslog.LOG_DAEMON) if fdaemon: pidfile = daemon.pidfile.TimeoutPIDLockFile(PIDFILE, acquire_timeout=-1) try: opid = pidfile.read_pid() except: opid = None if verbose: print "opid %s" % opid if opid: try: os.kill(opid, 0) is_running = True except: is_running = False if verbose: print "is_running %s" % is_running if is_running: print "process still alive %s" % opid sys.exit(1) print "warning: stale pid file removed" os.unlink(PIDFILE) print "daemoinizing... %s" % os.getpid() context = daemon.DaemonContext( working_directory='/tmp', umask=0o002, pidfile=pidfile, initgroups=False, ) context.signal_map = { signal.SIGTERM: cleanup, signal.SIGHUP: 'terminate', # signal.SIGUSR1: reload_program_config, } context.files_preseve = [] for sock in socks: context.files_preserve += [sock, sock.fileno()] with context: syslog.syslog('starting heartbeat to %s' % ','.join(hb_hosts)) up = True try: process() except: syslogtrace('process') cleanup(0, None)