From cc7c75496997455b161f2e3f75c5c0a6e2feecbd Mon Sep 17 00:00:00 2001 From: Andreas Wrede Date: Sat, 29 Aug 2015 22:55:14 -0400 Subject: [PATCH 1/7] add callhome script --- callhome | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100755 callhome diff --git a/callhome b/callhome new file mode 100755 index 0000000..5d21f84 --- /dev/null +++ b/callhome @@ -0,0 +1,6 @@ +#!/bin/sh + +# excute on remote machine +# forwared 2 ports to wig: 5903 to screen shareing and 5922 to ssh + +/usr/bin/ssh -f -N -C -R 192.168.10.64:5903:127.0.0.1:5900 -R 192.168.10.64:5922:127.0.0.1:22 home.wrede.ca From a59dd3b13826d9395ee621b2679366891d94542e Mon Sep 17 00:00:00 2001 From: Andreas Wrede Date: Tue, 1 Sep 2015 09:50:53 -0400 Subject: [PATCH 2/7] make lock work if pid file is empty --- hbc | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/hbc b/hbc index 1937e0b..191ebe7 100755 --- a/hbc +++ b/hbc @@ -12,9 +12,7 @@ import errno PORT=50003 INTERVAL=10 -False=0 -True=1 -DBG = False +DBG = True sock=None @@ -31,6 +29,7 @@ class Flock: self.opid = None def lock(self): + if DBG: print "lock it" while 1: self.fd = None try: @@ -38,9 +37,10 @@ class Flock: except OSError, e: if e.errno != errno.EEXIST: raise + if DBG: print "create error %s" % e.errno except: raise - + if DBG: print "lock() - self.fd = '%s'" % self.fd if not self.fd: if self.oproc(): if DBG: print "process is alive" @@ -62,8 +62,13 @@ class Flock: def setopid(self): try: self.opid=open(self.lock_file).readline() + if DBG: print "setopid = '%s'" % self.opid + if self.opid == '': + os.remove(self.lock_file) except: - pass + self.opid = '' + if self.opid == '': + self.opid = None def oproc(self): @@ -110,6 +115,7 @@ def socksend(msg, tohost): if sock == None: sock=getsock(tohost[0]) sock.sendto(msg, tohost) + if verbose: print "msg %s sent" % msg msgonly=False @@ -238,6 +244,7 @@ if len(msgboot) > 0: else: break +if verbose: print "msgboot done msgonly=%s" % msgonly if msgonly: sys.exit(0) @@ -265,7 +272,9 @@ if daemon: while True: lock=Flock('/tmp/hbc.pid') + if DBG: print "get lock" if lock.lock(): + if DBG: print "got lock" break if not lock.oproc(): sys.exit(1) From 1404bab59181c9ac5962791ea7292d9819009905 Mon Sep 17 00:00:00 2001 From: Andreas Wrede Date: Wed, 2 Sep 2015 14:51:20 -0400 Subject: [PATCH 3/7] replace homebrow lock with package lockfile --- hbc | 165 +++++++++++++++++++++--------------------------------------- 1 file changed, 57 insertions(+), 108 deletions(-) diff --git a/hbc b/hbc index 191ebe7..dacffab 100755 --- a/hbc +++ b/hbc @@ -1,5 +1,9 @@ #!/usr/bin/env python # $Id: hbc,v 1.9 2012/03/29 02:08:36 andreas Exp $ + +# requre python-filelock on Linux +# require py27-lockfile on *bsd +# or run sudo easy_install-2.7 lockfile import sys import time import socket @@ -9,6 +13,8 @@ import getopt import string import select import errno +import traceback +from lockfile import FileLock PORT=50003 INTERVAL=10 @@ -21,67 +27,6 @@ class NullDevice: pass - -class Flock: - def __init__(self, lock_file): - self.lock_file = lock_file - self.fd = None - self.opid = None - - def lock(self): - if DBG: print "lock it" - while 1: - self.fd = None - try: - self.fd = os.open(self.lock_file, os.O_CREAT | os.O_EXCL | os.O_RDWR) - except OSError, e: - if e.errno != errno.EEXIST: - raise - if DBG: print "create error %s" % e.errno - except: - raise - if DBG: print "lock() - self.fd = '%s'" % self.fd - if not self.fd: - if self.oproc(): - if DBG: print "process is alive" - os.remove(self.lock_file) - continue - else: - if DBG: print "no pid process??" - - if self.fd: - f=os.fdopen(self.fd, 'w').write("%s" % os.getpid()) - return self.fd - - - def unlock(self): - os.remove(self.lock_file) - self.fd=None - - - def setopid(self): - try: - self.opid=open(self.lock_file).readline() - if DBG: print "setopid = '%s'" % self.opid - if self.opid == '': - os.remove(self.lock_file) - except: - self.opid = '' - if self.opid == '': - self.opid = None - - - def oproc(self): - self.setopid() - if not self.opid: - return False - try: - os.kill(int(self.opid), 0) - return True - except: - pass - return False - def handler(signum, frame): global up if up == 0: @@ -117,6 +62,38 @@ def socksend(msg, tohost): sock.sendto(msg, tohost) if verbose: print "msg %s sent" % msg +def process(): + ackcount=0 + lastT=time.time() + while up: + sleep=(lastT+interval) - time.time() + if verbose: print "sleep %s" % sleep + if sleep > 0: + try: + r=select.select([sock.fileno()],[],[],sleep) + # time.sleep(interval) + except: + break + if verbose: print r + if sock.fileno() in r[0]: + data, addr = sock.recvfrom(1024) + if data == "ACK": + ackcount+=1 + else: + try: + os.system(data) + except: + pass + continue + lastT=time.time() + for hb_host in hb_hosts: + try: + msg="interval=%s;name=%s;time=%s;acks=%s" % (interval, iam, time.time(), ackcount) + if verbose: print "sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port) + socksend(msg, (hb_host, hb_port)) + except: + pass + msgonly=False helpflag=False @@ -259,61 +236,33 @@ if daemon: sys.exit(0) - os.close(0) - os.close(1) - os.close(2) - sys.stdin.close() - sys.stdout = NullDevice() - sys.stderr = NullDevice() + if not DBG: + os.close(0) + os.close(1) + os.close(2) + sys.stdin.close() + sys.stdout = NullDevice() + sys.stderr = NullDevice() os.chdir("/") os.setsid() os.umask(0) -while True: - lock=Flock('/tmp/hbc.pid') - if DBG: print "get lock" - if lock.lock(): - if DBG: print "got lock" - break - if not lock.oproc(): - sys.exit(1) - os.kill(lock.opid,15) - time.sleep(1) +lock = FileLock('/tmp/hbc.pid') +lock.acquire() up=1 signal.signal(signal.SIGTERM, handler) signal.signal(signal.SIGHUP, handler) -ackcount=0 -lastT=time.time() -while up: - sleep=(lastT+interval) - time.time() - if verbose: print "sleep %s" % sleep - if sleep > 0: - try: - r=select.select([sock.fileno()],[],[],sleep) -# time.sleep(interval) - except: - break - if verbose: print r - if sock.fileno() in r[0]: - data, addr = sock.recvfrom(1024) - if data == "ACK": - ackcount+=1 - else: - try: - os.system(data) - except: - pass - continue - lastT=time.time() - for hb_host in hb_hosts: - try: - msg="interval=%s;name=%s;time=%s;acks=%s" % (interval, iam, time.time(), ackcount) - if verbose: print "sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port) - socksend(msg, (hb_host, hb_port)) - except: - pass + + + +try: + process() +except: + data='hbc died:\n'+traceback.format_exc() + open("/tmp/hbc.log","a").write(data) + sys.exit(1) up=0 msg="shutdown=1;name=%s;acks=%s" % (iam, ackcount) @@ -322,4 +271,4 @@ for hb_host in hb_hosts: socksend(msg, (hb_host, hb_port)) time.sleep(1) sock.close() -lock.unlock() +lock.release() From f54ddd0bc9a4d9a22e6f7a060fe76aa10f41eb4f Mon Sep 17 00:00:00 2001 From: Andreas Wrede Date: Fri, 4 Sep 2015 15:52:16 -0400 Subject: [PATCH 4/7] use daemon and PIDLock --- hbc | 130 +++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 71 insertions(+), 59 deletions(-) diff --git a/hbc b/hbc index dacffab..0378f59 100755 --- a/hbc +++ b/hbc @@ -1,9 +1,13 @@ #!/usr/bin/env python # $Id: hbc,v 1.9 2012/03/29 02:08:36 andreas Exp $ -# requre python-filelock on Linux -# require py27-lockfile on *bsd -# or run sudo easy_install-2.7 lockfile +# require on Linux +# python-filelock +# python-daemon vs 1.61 or > +# on *bsd +# py27-lockfile +# py-27-daemon +# or run sudo easy_install-2.7 lockfile daemon import sys import time import socket @@ -14,24 +18,29 @@ import string import select import errno import traceback -from lockfile import FileLock +import lockfile +import daemon +import daemon.pidfile +import syslog + PORT=50003 INTERVAL=10 DBG = True -sock=None +sock = None +up = True +ackcount = 0 class NullDevice: def write(self, s): pass -def handler(signum, frame): - global up - if up == 0: - return - sys.exit(0) +def syslogtrace(note): + logm = '%s hbc died: \n%s' % (note, traceback.format_exc()) + for l in logm.split('\n'): + syslog.syslog(' tb: %s' % l) def getsock(host): @@ -46,35 +55,43 @@ def getsock(host): else: return None if verbose: - print "socktype: %s" % af_type + syslog.syslog("socktype: %s" % af_type) sock=socket.socket(af_type, socket.SOCK_DGRAM) sock.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR, \ sock.getsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR) | 1) - if verbose: print "get socket %s" % sock + if verbose: syslog.syslog("get socket %s" % sock) return sock + def socksend(msg, tohost): global sock if sock == None: sock=getsock(tohost[0]) sock.sendto(msg, tohost) - if verbose: print "msg %s sent" % msg + if verbose: syslog.syslog("msg %s sent" % msg) + def process(): + global up, sock, ackcount + + if sock == None: + sock=getsock(tohost[0]) + ackcount=0 lastT=time.time() while up: sleep=(lastT+interval) - time.time() - if verbose: print "sleep %s" % sleep + if verbose: syslog.syslog("sleep %s" % sleep) if sleep > 0: try: r=select.select([sock.fileno()],[],[],sleep) - # time.sleep(interval) except: + if up: + syslogtrace('select') break - if verbose: print r + if verbose: syslog.syslog("r is %s" % str(r)) if sock.fileno() in r[0]: data, addr = sock.recvfrom(1024) if data == "ACK": @@ -83,22 +100,35 @@ def process(): try: os.system(data) except: + syslogtrace('System') pass continue lastT=time.time() for hb_host in hb_hosts: try: msg="interval=%s;name=%s;time=%s;acks=%s" % (interval, iam, time.time(), ackcount) - if verbose: print "sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port) + if verbose: syslog.syslog("sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port)) socksend(msg, (hb_host, hb_port)) except: pass +def cleanup(a, b): + global up, sock, ackcount + up = False + syslog.syslog('exit a=%s b=%s' % (str(a), str(b))) + msg="shutdown=1;name=%s;acks=%s" % (iam, ackcount) + for hb_host in hb_hosts: + if verbose: syslog.syslog("hbc: sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port)) + socksend(msg, (hb_host, hb_port)) + time.sleep(1) + sock.close() + + msgonly=False helpflag=False verbose=False -daemon=False +fdaemon=False optlist=[] args=[] msgboot=[] @@ -116,7 +146,7 @@ for o,a in optlist: elif o == '-c': configfile=a elif o == '-d': - daemon=True + fdaemon=True elif o == '-h': helpflag=True elif o == '-m': @@ -228,47 +258,29 @@ if msgonly: # # -if daemon: - pid=os.fork() - if pid > 0: - if verbose: - print "daemoinizing... pid=%d" % pid - sys.exit(0) +syslog.openlog(logoption=syslog.LOG_PID, facility=syslog.LOG_DAEMON) +if fdaemon: + print "daemoinizing... %s" % os.getpid() + context = daemon.DaemonContext( + working_directory='/tmp', + umask=0o002, + pidfile=daemon.pidfile.TimeoutPIDLockFile('/tmp/hbc.pid', acquire_timeout=-1), + ) - if not DBG: - os.close(0) - os.close(1) - os.close(2) - sys.stdin.close() - sys.stdout = NullDevice() - sys.stderr = NullDevice() - os.chdir("/") - os.setsid() - os.umask(0) + context.signal_map = { + signal.SIGTERM: cleanup, + signal.SIGHUP: 'terminate', +# signal.SIGUSR1: reload_program_config, + } + context.files_preserve = [sock, sock.fileno()] + with context: + syslog.syslog('starting heartbeat to %s' % ','.join(hb_hosts)) + up = True + try: + process() + except: + syslogtrace('process') + cleanup(0, None) -lock = FileLock('/tmp/hbc.pid') -lock.acquire() - -up=1 -signal.signal(signal.SIGTERM, handler) -signal.signal(signal.SIGHUP, handler) - - - -try: - process() -except: - data='hbc died:\n'+traceback.format_exc() - open("/tmp/hbc.log","a").write(data) - sys.exit(1) - -up=0 -msg="shutdown=1;name=%s;acks=%s" % (iam, ackcount) -for hb_host in hb_hosts: - if verbose: print "hbc: sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port) - socksend(msg, (hb_host, hb_port)) -time.sleep(1) -sock.close() -lock.release() From 993a903cedcdd42cf48c90d31c8119057afeb47f Mon Sep 17 00:00:00 2001 From: Andreas Wrede Date: Fri, 4 Sep 2015 16:12:13 -0400 Subject: [PATCH 5/7] handle stale pid file --- hbc | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/hbc b/hbc index 0378f59..1f73857 100755 --- a/hbc +++ b/hbc @@ -24,8 +24,9 @@ import daemon.pidfile import syslog -PORT=50003 -INTERVAL=10 +PORT = 50003 +INTERVAL = 10 +PIDFILE = '/tmp/hbc.pid' DBG = True sock = None @@ -260,12 +261,32 @@ if msgonly: # syslog.openlog(logoption=syslog.LOG_PID, facility=syslog.LOG_DAEMON) if fdaemon: - print "daemoinizing... %s" % os.getpid() + pidfile = daemon.pidfile.TimeoutPIDLockFile(PIDFILE, acquire_timeout=-1) + try: + opid = pidfile.read_pid() + except: + opid = None + if verbose: print "opid %s" % opid + + if opid: + try: + os.kill(opid, 0) + is_running = True + except: + is_running = False + if verbose: print "is_running %s" % is_running + if is_running: + print "process still alive %s" % opid + sys.exit(1) + print "warning: stale pid file removed" + os.unlink(PIDFILE) + + print "daemoinizing... %s" % os.getpid() context = daemon.DaemonContext( working_directory='/tmp', umask=0o002, - pidfile=daemon.pidfile.TimeoutPIDLockFile('/tmp/hbc.pid', acquire_timeout=-1), + pidfile=pidfile, ) context.signal_map = { From 721fea572306ac911eec50b5ddcda1cc02470398 Mon Sep 17 00:00:00 2001 From: Andreas Wrede Date: Fri, 4 Sep 2015 16:34:45 -0400 Subject: [PATCH 6/7] handle MacOS v6 quirk; more DBG --- hbc | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/hbc b/hbc index 1f73857..da971c5 100755 --- a/hbc +++ b/hbc @@ -27,7 +27,7 @@ import syslog PORT = 50003 INTERVAL = 10 PIDFILE = '/tmp/hbc.pid' -DBG = True +DBG = False sock = None up = True @@ -48,13 +48,16 @@ def getsock(host): try: r=socket.getaddrinfo(host, 50001, 0, 0, socket.SOL_UDP) except socket.gaierror: + logm = '%s hbc died: \n%s' % ('getsock', traceback.format_exc()) + if DBG: print logm return None - if r[0][0] == 28: + if r[0][0] in [28, 30]: af_type=socket.AF_INET6 elif r[0][0] == 2: af_type=socket.AF_INET else: - return None + print "dont know this net type: %s" % r[0][0] + sys.exit(1) if verbose: syslog.syslog("socktype: %s" % af_type) sock=socket.socket(af_type, socket.SOCK_DGRAM) @@ -70,6 +73,7 @@ def socksend(msg, tohost): if sock == None: sock=getsock(tohost[0]) + if DBG: print "socksend: sending msg=%s on socket=%s" % (msg, sock) sock.sendto(msg, tohost) if verbose: syslog.syslog("msg %s sent" % msg) @@ -111,6 +115,8 @@ def process(): if verbose: syslog.syslog("sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port)) socksend(msg, (hb_host, hb_port)) except: + logm = '%s hbc died: \n%s' % ('socksend', traceback.format_exc()) + if DBG: print logm pass @@ -235,6 +241,7 @@ if not msgonly: msgboot.append("interval=%s" % interval) if len(msgboot) > 0: + if DBG: print "on boot" msgboot.append("name=%s" % iam) msgboot.append("time=%s" % time.time()) msgboot.append("acks=0") @@ -243,9 +250,11 @@ if len(msgboot) > 0: fail=0 for hb_host in hb_hosts: try: - if verbose: print "sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port) + if DBG: print "sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port) socksend(msg, (hb_host, hb_port)) except: + logm = '%s hbc died: \n%s' % ('socksend2', traceback.format_exc()) + if DBG: print logm fail=1 if fail: time.sleep(10) From 993b42b3395a056e036a9e9518151e2c6693c6ec Mon Sep 17 00:00:00 2001 From: Andreas Wrede Date: Sun, 29 Nov 2015 15:16:39 -0500 Subject: [PATCH 7/7] make sure we have the right daemon --- hbc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/hbc b/hbc index da971c5..bba14c1 100755 --- a/hbc +++ b/hbc @@ -20,7 +20,11 @@ import errno import traceback import lockfile import daemon -import daemon.pidfile +try: + import daemon.pidfile +except: + print "need version 2.1 or higer of python-daemon" + sys.exit(1) import syslog @@ -51,7 +55,7 @@ def getsock(host): logm = '%s hbc died: \n%s' % ('getsock', traceback.format_exc()) if DBG: print logm return None - if r[0][0] in [28, 30]: + if r[0][0] in [10, 28, 30]: af_type=socket.AF_INET6 elif r[0][0] == 2: af_type=socket.AF_INET @@ -296,6 +300,7 @@ if fdaemon: working_directory='/tmp', umask=0o002, pidfile=pidfile, + initgroups=False, ) context.signal_map = {