diff --git a/hbc b/hbc index 8c282f5..02217d7 100755 --- a/hbc +++ b/hbc @@ -17,48 +17,13 @@ import zlib import subprocess import syslog -try: - import lockfile - import daemon - import daemon.pidfile -except: - print """ - require on Linux - python-filelock - python-daemon vs 1.61 or > - run sudo easy_install-2.7 lockfile python-daemon - on *bsd - py27-lockfile - py27-daemon - run sudo pkg install -y py27-lockfile py27-daemon -""" - sys.exit(1) - - -# N.B. daemon tries to close resource.RLIMIT_NOFILE file descriptors -# which on FreeBSD in close to a million -# hack: replace the function in daemon with ths one: - - - -def log(msg): - if fdaemon: - syslog.syslog(syslog.LOG_ERR, msg) - else: - print msg - - -def get_maximum_file_descriptors(): - return 2048 - -daemon.get_maximum_file_descriptors = get_maximum_file_descriptors - +import syslog PORT = 50003 INTERVAL = 10 PIDFILE = '/tmp/hbc.pid' -VER = 3 +VER = 4 MAXRECV = 32767 running = True @@ -66,8 +31,8 @@ dorestart = False warned1 = False class NullDevice: - def write(self, s): - pass + def write(self, s): + pass class Conn: @@ -84,7 +49,7 @@ class Conn: self.rtts = [0] self.sock=socket.socket(af, socket.SOCK_DGRAM) self.sock.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR, \ - self.sock.getsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR) | 1) + self.sock.getsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR) | 1) def sendto(self, msg, ID = 'HTB'): # default ID is HearTBeat global warned1 @@ -175,9 +140,10 @@ def syslogtrace(note): logm = '%s hbc died: \n%s' % (note, traceback.format_exc()) log(logm) for l in logm.split('\n'): - log(' tb: %s' % l) - - + syslog.syslog(syslog.LOG_ERR, ' tb: %s' % l) + if verbose: + print logm + conId = 1 def createConnections(hosts): @@ -185,7 +151,7 @@ def createConnections(hosts): for host in hosts: if verbose: log("createConnections for %s" % host) try: - rs=socket.getaddrinfo(host, hb_port, 0, 0, socket.SOL_UDP) + rs=socket.getaddrinfo(host, hb_port, 0, 0, socket.SOL_UDP) except socket.gaierror: logm = '%s hbc died: \n%s' % ('createConnections', traceback.format_exc()) if verbose: log(logm) @@ -197,7 +163,7 @@ def createConnections(hosts): elif r[0] == 2: af=socket.AF_INET else: - log("dont know this net type: %s" % r[0][0]) + print "dont know this net type: %s" % r[0][0] sys.exit(1) addr = r[4][0] @@ -226,7 +192,7 @@ def doupdate(conn, msgDict): code = msgDict['code'].decode('base64') csum = msgDict['csum'] except: - fail = "csum/code missing" + fail = "csum/code missing" if not fail: fail = doupdateone(code, csum) @@ -244,10 +210,10 @@ def doupdateone(code, csum): m.update(code) icsum = m.hexdigest() if icsum != csum: - return "checksum error" + return "checksum error" fn = sys.argv[0] - ofn = "%.sav" % fn + ofn = "%.sav" % fn try: shutil.copy2(fn, ofn) except Exception as e: @@ -264,7 +230,9 @@ def doupdateone(code, csum): def restart(): - log('restart %s' % (sys.argv[0])) + if verbose: + print "restart: execv %s %s" % (sys.argv[0], [sys.argv[0]]+cmdargs) + syslog.syslog(syslog.LOG_ERR, 'restart %s' % (sys.argv[0])) e = "fallthrough" try: os.execv(sys.argv[0], [sys.argv[0]]+cmdargs) @@ -310,17 +278,17 @@ def process(): if verbose: print "sock.recvfrom: %s (%s) %s" % (addr, len(data), data[:4]) msgDict = stodict(data) if verbose: print "sock.recvfrom: %s (%s) %s" % (addr, len(data), str(msgDict)[:80]) - if msgDict['ID'] == "ACK": + if msgDict['ID'] == "ACK": conns[conn].ack(msgDict) - elif msgDict['ID'] == "UPD": + elif msgDict['ID'] == "UPD": if doupdate(conn, msgDict) == None: if verbose: print "process: restart after update" dorestart = True break - elif msgDict['ID'] == "CMD": + elif msgDict['ID'] == "CMD": doexec(conn, msgDict['cmd']) else: - doexec(conn, data) # deprecated until no more VER - hbc + doexec(conn, data) # deprecated until no more VER - hbc if dorestart: running = False break @@ -341,7 +309,7 @@ def cleanup(): global running if verbose: log('cleanup') running = False - for conn in conns: + for conn in conns: msg={'shutdown': 1, 'acks': conns[conn].ackcount} conns[conn].sendto(msg) conns[conn].close() @@ -350,16 +318,60 @@ def cleanup(): def closeall(): - if verbose: log('closecall') - for conn in conns: + if verbose: syslog.syslog(syslog.LOG_ERR, 'closecall') + for conn in conns: conns[conn].close() +def daemonize(working_dir="/", stdin='/dev/zero', stdout='/dev/null', stderr='/dev/null'): + """ + Does the UNIX double-fork magic, see Stevens' "Advanced Programming in the + UNIX Environment" for details (ISBN 0201563177) + http://www.yendor.com/programming/unix/apue/proc/fork2.c + """ + + try: + # first fork + pid = os.fork() + if pid > 0: + # exit from first parent + os._exit(0) + except OSError, e: + sys.stderr.write("fork #1 failed: %d (%s)\n" % (e.errno, e.strerror)) + os._exit(1) + + # decouple from parent environment + os.chdir(working_dir) + os.setsid() + os.umask(0) + # second fork + try: + pid = os.fork() + if pid > 0: + # exit from second parent + os._exit(0) + except OSError, e: + sys.stderr.write("fork #2 failed: %d (%s)\n" % (e.errno, e.strerror)) + sys.exit(1) + + # redirects standard file descriptors + sys.stdout.flush() + sys.stderr.flush() + si = file(stdin, 'r') + so = file(stdout, 'a+') + se = file(stderr, 'a+', 0) + os.dup2(si.fileno(), sys.stdin.fileno()) + os.dup2(so.fileno(), sys.stdout.fileno()) + os.dup2(se.fileno(), sys.stderr.fileno()) + + + msgonly=False helpflag=False verbose=False fdaemon=False -optlist=[] +daemonized = False +optlist=[] args=[] msgboot={} home=os.environ['HOME'] @@ -368,9 +380,9 @@ cmdargs = [] try: - optlist, args = getopt.getopt(sys.argv[1:], 'bc:dhm:v') + optlist, args = getopt.getopt(sys.argv[1:], 'bc:dhm:v') except: - helpflag=True + helpflag=True for o,a in optlist: if o == '-b': @@ -442,7 +454,7 @@ if f: if r[0] == 'hb_hosts': hb_hosts=eval(r[1]) if verbose: - print "notice: cfg hb_hosts: %s" % hb_hosts + print "notice: cfg hb_hosts: %s" % hb_hosts elif r[0] == 'interval': interval=eval(r[1]) elif r[0] == 'hb_port': @@ -487,7 +499,7 @@ if verbose: if len(msgboot) > 0: if verbose: print "on boot" - msgboot['acks'] = 0 + msgboot['acks'] = 0 for conn in conns: conns[conn].sendto(msgboot) @@ -497,63 +509,20 @@ if msgonly: sys.exit(0) # - syslog.openlog('hbc', syslog.LOG_PID, syslog.LOG_DAEMON) if fdaemon: + print "daemoinizing." + daemonize() + daemonized = True + syslog.syslog(syslog.LOG_ERR, 'starting heartbeat to %s' % ','.join(hb_hosts)) - pidfile = daemon.pidfile.TimeoutPIDLockFile(PIDFILE, acquire_timeout=-1) - try: - opid = pidfile.read_pid() - except: - opid = None +running = True +try: + process() +except: + syslogtrace('process') + if verbose: print "err: process exit: %s" % e - if opid: - try: - os.kill(opid, 0) - is_running = True - except: - is_running = False - if verbose: print "is_running %s" % is_running - if is_running: - print "process still alive %s" % opid - sys.exit(1) - print "warning: stale pid file removed" - os.unlink(PIDFILE) - - print "daemoinizing... %s" % os.getpid() - context = daemon.DaemonContext( - working_directory='/tmp', - umask=0o022, - pidfile=pidfile, - detach_process=True, -# initgroups=False, - ) - - context.signal_map = { -# signal.SIGHUP: cleanup, - signal.SIGTERM: 'terminate', -# signal.SIGUSR1: reload_program_config, - } - - context.files_preserve = [] - for conn in conns: - context.files_preserve += [conns[conn].sock, conns[conn].sock.fileno()] - with context: - log('starting heartbeat to %s' % ','.join(hb_hosts)) - running = True - try: - process() - except: - syslogtrace('process') - -else: - running = True - try: - if verbose: print "starting loop process" - process() - except Exception as e: - if verbose: print "err: process exit: %s" % e - syslogtrace('process') if verbose: log( "main: cleanup") cleanup() if dorestart: