diff --git a/callhome b/callhome new file mode 100755 index 0000000..5d21f84 --- /dev/null +++ b/callhome @@ -0,0 +1,6 @@ +#!/bin/sh + +# excute on remote machine +# forwared 2 ports to wig: 5903 to screen shareing and 5922 to ssh + +/usr/bin/ssh -f -N -C -R 192.168.10.64:5903:127.0.0.1:5900 -R 192.168.10.64:5922:127.0.0.1:22 home.wrede.ca diff --git a/hbc b/hbc index 1937e0b..bba14c1 100755 --- a/hbc +++ b/hbc @@ -1,5 +1,13 @@ #!/usr/bin/env python # $Id: hbc,v 1.9 2012/03/29 02:08:36 andreas Exp $ + +# require on Linux +# python-filelock +# python-daemon vs 1.61 or > +# on *bsd +# py27-lockfile +# py-27-daemon +# or run sudo easy_install-2.7 lockfile daemon import sys import time import socket @@ -9,113 +17,129 @@ import getopt import string import select import errno +import traceback +import lockfile +import daemon +try: + import daemon.pidfile +except: + print "need version 2.1 or higer of python-daemon" + sys.exit(1) +import syslog -PORT=50003 -INTERVAL=10 -False=0 -True=1 + +PORT = 50003 +INTERVAL = 10 +PIDFILE = '/tmp/hbc.pid' DBG = False -sock=None +sock = None +up = True +ackcount = 0 class NullDevice: def write(self, s): pass - -class Flock: - def __init__(self, lock_file): - self.lock_file = lock_file - self.fd = None - self.opid = None - - def lock(self): - while 1: - self.fd = None - try: - self.fd = os.open(self.lock_file, os.O_CREAT | os.O_EXCL | os.O_RDWR) - except OSError, e: - if e.errno != errno.EEXIST: - raise - except: - raise - - if not self.fd: - if self.oproc(): - if DBG: print "process is alive" - os.remove(self.lock_file) - continue - else: - if DBG: print "no pid process??" - - if self.fd: - f=os.fdopen(self.fd, 'w').write("%s" % os.getpid()) - return self.fd - - - def unlock(self): - os.remove(self.lock_file) - self.fd=None - - - def setopid(self): - try: - self.opid=open(self.lock_file).readline() - except: - pass - - - def oproc(self): - self.setopid() - if not self.opid: - return False - try: - os.kill(int(self.opid), 0) - return True - except: - pass - return False - -def handler(signum, frame): - global up - if up == 0: - return - sys.exit(0) +def syslogtrace(note): + logm = '%s hbc died: \n%s' % (note, traceback.format_exc()) + for l in logm.split('\n'): + syslog.syslog(' tb: %s' % l) def getsock(host): try: r=socket.getaddrinfo(host, 50001, 0, 0, socket.SOL_UDP) except socket.gaierror: + logm = '%s hbc died: \n%s' % ('getsock', traceback.format_exc()) + if DBG: print logm return None - if r[0][0] == 28: + if r[0][0] in [10, 28, 30]: af_type=socket.AF_INET6 elif r[0][0] == 2: af_type=socket.AF_INET else: - return None + print "dont know this net type: %s" % r[0][0] + sys.exit(1) if verbose: - print "socktype: %s" % af_type + syslog.syslog("socktype: %s" % af_type) sock=socket.socket(af_type, socket.SOCK_DGRAM) sock.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR, \ sock.getsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR) | 1) - if verbose: print "get socket %s" % sock + if verbose: syslog.syslog("get socket %s" % sock) return sock + def socksend(msg, tohost): global sock if sock == None: sock=getsock(tohost[0]) + if DBG: print "socksend: sending msg=%s on socket=%s" % (msg, sock) sock.sendto(msg, tohost) + if verbose: syslog.syslog("msg %s sent" % msg) + + +def process(): + global up, sock, ackcount + + if sock == None: + sock=getsock(tohost[0]) + + ackcount=0 + lastT=time.time() + while up: + sleep=(lastT+interval) - time.time() + if verbose: syslog.syslog("sleep %s" % sleep) + if sleep > 0: + try: + r=select.select([sock.fileno()],[],[],sleep) + except: + if up: + syslogtrace('select') + break + if verbose: syslog.syslog("r is %s" % str(r)) + if sock.fileno() in r[0]: + data, addr = sock.recvfrom(1024) + if data == "ACK": + ackcount+=1 + else: + try: + os.system(data) + except: + syslogtrace('System') + pass + continue + lastT=time.time() + for hb_host in hb_hosts: + try: + msg="interval=%s;name=%s;time=%s;acks=%s" % (interval, iam, time.time(), ackcount) + if verbose: syslog.syslog("sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port)) + socksend(msg, (hb_host, hb_port)) + except: + logm = '%s hbc died: \n%s' % ('socksend', traceback.format_exc()) + if DBG: print logm + pass + + +def cleanup(a, b): + global up, sock, ackcount + up = False + syslog.syslog('exit a=%s b=%s' % (str(a), str(b))) + msg="shutdown=1;name=%s;acks=%s" % (iam, ackcount) + for hb_host in hb_hosts: + if verbose: syslog.syslog("hbc: sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port)) + socksend(msg, (hb_host, hb_port)) + time.sleep(1) + sock.close() msgonly=False helpflag=False verbose=False -daemon=False +fdaemon=False optlist=[] args=[] msgboot=[] @@ -133,7 +157,7 @@ for o,a in optlist: elif o == '-c': configfile=a elif o == '-d': - daemon=True + fdaemon=True elif o == '-h': helpflag=True elif o == '-m': @@ -221,6 +245,7 @@ if not msgonly: msgboot.append("interval=%s" % interval) if len(msgboot) > 0: + if DBG: print "on boot" msgboot.append("name=%s" % iam) msgboot.append("time=%s" % time.time()) msgboot.append("acks=0") @@ -229,88 +254,68 @@ if len(msgboot) > 0: fail=0 for hb_host in hb_hosts: try: - if verbose: print "sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port) + if DBG: print "sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port) socksend(msg, (hb_host, hb_port)) except: + logm = '%s hbc died: \n%s' % ('socksend2', traceback.format_exc()) + if DBG: print logm fail=1 if fail: time.sleep(10) else: break +if verbose: print "msgboot done msgonly=%s" % msgonly if msgonly: sys.exit(0) # # -if daemon: - pid=os.fork() - if pid > 0: - if verbose: - print "daemoinizing... pid=%d" % pid - sys.exit(0) +syslog.openlog(logoption=syslog.LOG_PID, facility=syslog.LOG_DAEMON) +if fdaemon: + pidfile = daemon.pidfile.TimeoutPIDLockFile(PIDFILE, acquire_timeout=-1) + try: + opid = pidfile.read_pid() + except: + opid = None + if verbose: print "opid %s" % opid - os.close(0) - os.close(1) - os.close(2) - sys.stdin.close() - sys.stdout = NullDevice() - sys.stderr = NullDevice() - os.chdir("/") - os.setsid() - os.umask(0) - - -while True: - lock=Flock('/tmp/hbc.pid') - if lock.lock(): - break - if not lock.oproc(): - sys.exit(1) - os.kill(lock.opid,15) - time.sleep(1) - -up=1 -signal.signal(signal.SIGTERM, handler) -signal.signal(signal.SIGHUP, handler) -ackcount=0 -lastT=time.time() -while up: - sleep=(lastT+interval) - time.time() - if verbose: print "sleep %s" % sleep - if sleep > 0: + if opid: try: - r=select.select([sock.fileno()],[],[],sleep) -# time.sleep(interval) + os.kill(opid, 0) + is_running = True except: - break - if verbose: print r - if sock.fileno() in r[0]: - data, addr = sock.recvfrom(1024) - if data == "ACK": - ackcount+=1 - else: - try: - os.system(data) - except: - pass - continue - lastT=time.time() - for hb_host in hb_hosts: - try: - msg="interval=%s;name=%s;time=%s;acks=%s" % (interval, iam, time.time(), ackcount) - if verbose: print "sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port) - socksend(msg, (hb_host, hb_port)) - except: - pass + is_running = False + if verbose: print "is_running %s" % is_running + if is_running: + print "process still alive %s" % opid + sys.exit(1) + print "warning: stale pid file removed" + os.unlink(PIDFILE) + + print "daemoinizing... %s" % os.getpid() + context = daemon.DaemonContext( + working_directory='/tmp', + umask=0o002, + pidfile=pidfile, + initgroups=False, + ) + + context.signal_map = { + signal.SIGTERM: cleanup, + signal.SIGHUP: 'terminate', +# signal.SIGUSR1: reload_program_config, + } + + context.files_preserve = [sock, sock.fileno()] + with context: + syslog.syslog('starting heartbeat to %s' % ','.join(hb_hosts)) + up = True + try: + process() + except: + syslogtrace('process') + cleanup(0, None) -up=0 -msg="shutdown=1;name=%s;acks=%s" % (iam, ackcount) -for hb_host in hb_hosts: - if verbose: print "hbc: sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port) - socksend(msg, (hb_host, hb_port)) -time.sleep(1) -sock.close() -lock.unlock()