Merge branch 'master' of git.wrede.ca:andreas/heartbeat
sequence
This commit is contained in:
@@ -0,0 +1,6 @@
|
||||
#!/bin/sh
|
||||
|
||||
# excute on remote machine
|
||||
# forwared 2 ports to wig: 5903 to screen shareing and 5922 to ssh
|
||||
|
||||
/usr/bin/ssh -f -N -C -R 192.168.10.64:5903:127.0.0.1:5900 -R 192.168.10.64:5922:127.0.0.1:22 home.wrede.ca
|
||||
@@ -1,5 +1,13 @@
|
||||
#!/usr/bin/env python
|
||||
# $Id: hbc,v 1.9 2012/03/29 02:08:36 andreas Exp $
|
||||
|
||||
# require on Linux
|
||||
# python-filelock
|
||||
# python-daemon vs 1.61 or >
|
||||
# on *bsd
|
||||
# py27-lockfile
|
||||
# py-27-daemon
|
||||
# or run sudo easy_install-2.7 lockfile daemon
|
||||
import sys
|
||||
import time
|
||||
import socket
|
||||
@@ -9,113 +17,129 @@ import getopt
|
||||
import string
|
||||
import select
|
||||
import errno
|
||||
import traceback
|
||||
import lockfile
|
||||
import daemon
|
||||
try:
|
||||
import daemon.pidfile
|
||||
except:
|
||||
print "need version 2.1 or higer of python-daemon"
|
||||
sys.exit(1)
|
||||
import syslog
|
||||
|
||||
|
||||
PORT = 50003
|
||||
INTERVAL = 10
|
||||
False=0
|
||||
True=1
|
||||
PIDFILE = '/tmp/hbc.pid'
|
||||
DBG = False
|
||||
|
||||
sock = None
|
||||
up = True
|
||||
ackcount = 0
|
||||
|
||||
class NullDevice:
|
||||
def write(self, s):
|
||||
pass
|
||||
|
||||
|
||||
|
||||
class Flock:
|
||||
def __init__(self, lock_file):
|
||||
self.lock_file = lock_file
|
||||
self.fd = None
|
||||
self.opid = None
|
||||
|
||||
def lock(self):
|
||||
while 1:
|
||||
self.fd = None
|
||||
try:
|
||||
self.fd = os.open(self.lock_file, os.O_CREAT | os.O_EXCL | os.O_RDWR)
|
||||
except OSError, e:
|
||||
if e.errno != errno.EEXIST:
|
||||
raise
|
||||
except:
|
||||
raise
|
||||
|
||||
if not self.fd:
|
||||
if self.oproc():
|
||||
if DBG: print "process is alive"
|
||||
os.remove(self.lock_file)
|
||||
continue
|
||||
else:
|
||||
if DBG: print "no pid process??"
|
||||
|
||||
if self.fd:
|
||||
f=os.fdopen(self.fd, 'w').write("%s" % os.getpid())
|
||||
return self.fd
|
||||
|
||||
|
||||
def unlock(self):
|
||||
os.remove(self.lock_file)
|
||||
self.fd=None
|
||||
|
||||
|
||||
def setopid(self):
|
||||
try:
|
||||
self.opid=open(self.lock_file).readline()
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
def oproc(self):
|
||||
self.setopid()
|
||||
if not self.opid:
|
||||
return False
|
||||
try:
|
||||
os.kill(int(self.opid), 0)
|
||||
return True
|
||||
except:
|
||||
pass
|
||||
return False
|
||||
|
||||
def handler(signum, frame):
|
||||
global up
|
||||
if up == 0:
|
||||
return
|
||||
sys.exit(0)
|
||||
def syslogtrace(note):
|
||||
logm = '%s hbc died: \n%s' % (note, traceback.format_exc())
|
||||
for l in logm.split('\n'):
|
||||
syslog.syslog(' tb: %s' % l)
|
||||
|
||||
|
||||
def getsock(host):
|
||||
try:
|
||||
r=socket.getaddrinfo(host, 50001, 0, 0, socket.SOL_UDP)
|
||||
except socket.gaierror:
|
||||
logm = '%s hbc died: \n%s' % ('getsock', traceback.format_exc())
|
||||
if DBG: print logm
|
||||
return None
|
||||
if r[0][0] == 28:
|
||||
if r[0][0] in [10, 28, 30]:
|
||||
af_type=socket.AF_INET6
|
||||
elif r[0][0] == 2:
|
||||
af_type=socket.AF_INET
|
||||
else:
|
||||
return None
|
||||
print "dont know this net type: %s" % r[0][0]
|
||||
sys.exit(1)
|
||||
if verbose:
|
||||
print "socktype: %s" % af_type
|
||||
syslog.syslog("socktype: %s" % af_type)
|
||||
sock=socket.socket(af_type, socket.SOCK_DGRAM)
|
||||
sock.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR, \
|
||||
sock.getsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR) | 1)
|
||||
if verbose: print "get socket %s" % sock
|
||||
if verbose: syslog.syslog("get socket %s" % sock)
|
||||
|
||||
return sock
|
||||
|
||||
|
||||
def socksend(msg, tohost):
|
||||
global sock
|
||||
|
||||
if sock == None:
|
||||
sock=getsock(tohost[0])
|
||||
if DBG: print "socksend: sending msg=%s on socket=%s" % (msg, sock)
|
||||
sock.sendto(msg, tohost)
|
||||
if verbose: syslog.syslog("msg %s sent" % msg)
|
||||
|
||||
|
||||
def process():
|
||||
global up, sock, ackcount
|
||||
|
||||
if sock == None:
|
||||
sock=getsock(tohost[0])
|
||||
|
||||
ackcount=0
|
||||
lastT=time.time()
|
||||
while up:
|
||||
sleep=(lastT+interval) - time.time()
|
||||
if verbose: syslog.syslog("sleep %s" % sleep)
|
||||
if sleep > 0:
|
||||
try:
|
||||
r=select.select([sock.fileno()],[],[],sleep)
|
||||
except:
|
||||
if up:
|
||||
syslogtrace('select')
|
||||
break
|
||||
if verbose: syslog.syslog("r is %s" % str(r))
|
||||
if sock.fileno() in r[0]:
|
||||
data, addr = sock.recvfrom(1024)
|
||||
if data == "ACK":
|
||||
ackcount+=1
|
||||
else:
|
||||
try:
|
||||
os.system(data)
|
||||
except:
|
||||
syslogtrace('System')
|
||||
pass
|
||||
continue
|
||||
lastT=time.time()
|
||||
for hb_host in hb_hosts:
|
||||
try:
|
||||
msg="interval=%s;name=%s;time=%s;acks=%s" % (interval, iam, time.time(), ackcount)
|
||||
if verbose: syslog.syslog("sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port))
|
||||
socksend(msg, (hb_host, hb_port))
|
||||
except:
|
||||
logm = '%s hbc died: \n%s' % ('socksend', traceback.format_exc())
|
||||
if DBG: print logm
|
||||
pass
|
||||
|
||||
|
||||
def cleanup(a, b):
|
||||
global up, sock, ackcount
|
||||
up = False
|
||||
syslog.syslog('exit a=%s b=%s' % (str(a), str(b)))
|
||||
msg="shutdown=1;name=%s;acks=%s" % (iam, ackcount)
|
||||
for hb_host in hb_hosts:
|
||||
if verbose: syslog.syslog("hbc: sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port))
|
||||
socksend(msg, (hb_host, hb_port))
|
||||
time.sleep(1)
|
||||
sock.close()
|
||||
|
||||
|
||||
msgonly=False
|
||||
helpflag=False
|
||||
verbose=False
|
||||
daemon=False
|
||||
fdaemon=False
|
||||
optlist=[]
|
||||
args=[]
|
||||
msgboot=[]
|
||||
@@ -133,7 +157,7 @@ for o,a in optlist:
|
||||
elif o == '-c':
|
||||
configfile=a
|
||||
elif o == '-d':
|
||||
daemon=True
|
||||
fdaemon=True
|
||||
elif o == '-h':
|
||||
helpflag=True
|
||||
elif o == '-m':
|
||||
@@ -221,6 +245,7 @@ if not msgonly:
|
||||
msgboot.append("interval=%s" % interval)
|
||||
|
||||
if len(msgboot) > 0:
|
||||
if DBG: print "on boot"
|
||||
msgboot.append("name=%s" % iam)
|
||||
msgboot.append("time=%s" % time.time())
|
||||
msgboot.append("acks=0")
|
||||
@@ -229,88 +254,68 @@ if len(msgboot) > 0:
|
||||
fail=0
|
||||
for hb_host in hb_hosts:
|
||||
try:
|
||||
if verbose: print "sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port)
|
||||
if DBG: print "sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port)
|
||||
socksend(msg, (hb_host, hb_port))
|
||||
except:
|
||||
logm = '%s hbc died: \n%s' % ('socksend2', traceback.format_exc())
|
||||
if DBG: print logm
|
||||
fail=1
|
||||
if fail:
|
||||
time.sleep(10)
|
||||
else:
|
||||
break
|
||||
|
||||
if verbose: print "msgboot done msgonly=%s" % msgonly
|
||||
if msgonly:
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
#
|
||||
#
|
||||
if daemon:
|
||||
pid=os.fork()
|
||||
if pid > 0:
|
||||
if verbose:
|
||||
print "daemoinizing... pid=%d" % pid
|
||||
sys.exit(0)
|
||||
syslog.openlog(logoption=syslog.LOG_PID, facility=syslog.LOG_DAEMON)
|
||||
if fdaemon:
|
||||
|
||||
pidfile = daemon.pidfile.TimeoutPIDLockFile(PIDFILE, acquire_timeout=-1)
|
||||
try:
|
||||
opid = pidfile.read_pid()
|
||||
except:
|
||||
opid = None
|
||||
if verbose: print "opid %s" % opid
|
||||
|
||||
os.close(0)
|
||||
os.close(1)
|
||||
os.close(2)
|
||||
sys.stdin.close()
|
||||
sys.stdout = NullDevice()
|
||||
sys.stderr = NullDevice()
|
||||
os.chdir("/")
|
||||
os.setsid()
|
||||
os.umask(0)
|
||||
|
||||
|
||||
while True:
|
||||
lock=Flock('/tmp/hbc.pid')
|
||||
if lock.lock():
|
||||
break
|
||||
if not lock.oproc():
|
||||
if opid:
|
||||
try:
|
||||
os.kill(opid, 0)
|
||||
is_running = True
|
||||
except:
|
||||
is_running = False
|
||||
if verbose: print "is_running %s" % is_running
|
||||
if is_running:
|
||||
print "process still alive %s" % opid
|
||||
sys.exit(1)
|
||||
os.kill(lock.opid,15)
|
||||
time.sleep(1)
|
||||
print "warning: stale pid file removed"
|
||||
os.unlink(PIDFILE)
|
||||
|
||||
up=1
|
||||
signal.signal(signal.SIGTERM, handler)
|
||||
signal.signal(signal.SIGHUP, handler)
|
||||
ackcount=0
|
||||
lastT=time.time()
|
||||
while up:
|
||||
sleep=(lastT+interval) - time.time()
|
||||
if verbose: print "sleep %s" % sleep
|
||||
if sleep > 0:
|
||||
try:
|
||||
r=select.select([sock.fileno()],[],[],sleep)
|
||||
# time.sleep(interval)
|
||||
except:
|
||||
break
|
||||
if verbose: print r
|
||||
if sock.fileno() in r[0]:
|
||||
data, addr = sock.recvfrom(1024)
|
||||
if data == "ACK":
|
||||
ackcount+=1
|
||||
else:
|
||||
try:
|
||||
os.system(data)
|
||||
except:
|
||||
pass
|
||||
continue
|
||||
lastT=time.time()
|
||||
for hb_host in hb_hosts:
|
||||
try:
|
||||
msg="interval=%s;name=%s;time=%s;acks=%s" % (interval, iam, time.time(), ackcount)
|
||||
if verbose: print "sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port)
|
||||
socksend(msg, (hb_host, hb_port))
|
||||
except:
|
||||
pass
|
||||
print "daemoinizing... %s" % os.getpid()
|
||||
context = daemon.DaemonContext(
|
||||
working_directory='/tmp',
|
||||
umask=0o002,
|
||||
pidfile=pidfile,
|
||||
initgroups=False,
|
||||
)
|
||||
|
||||
context.signal_map = {
|
||||
signal.SIGTERM: cleanup,
|
||||
signal.SIGHUP: 'terminate',
|
||||
# signal.SIGUSR1: reload_program_config,
|
||||
}
|
||||
|
||||
context.files_preserve = [sock, sock.fileno()]
|
||||
with context:
|
||||
syslog.syslog('starting heartbeat to %s' % ','.join(hb_hosts))
|
||||
up = True
|
||||
try:
|
||||
process()
|
||||
except:
|
||||
syslogtrace('process')
|
||||
cleanup(0, None)
|
||||
|
||||
up=0
|
||||
msg="shutdown=1;name=%s;acks=%s" % (iam, ackcount)
|
||||
for hb_host in hb_hosts:
|
||||
if verbose: print "hbc: sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port)
|
||||
socksend(msg, (hb_host, hb_port))
|
||||
time.sleep(1)
|
||||
sock.close()
|
||||
lock.unlock()
|
||||
|
||||
Reference in New Issue
Block a user