Merge branch 'master' of git.wrede.ca:andreas/heartbeat

sequence
This commit is contained in:
2016-01-04 18:58:07 +01:00
2 changed files with 147 additions and 136 deletions
Executable
+6
View File
@@ -0,0 +1,6 @@
#!/bin/sh
# excute on remote machine
# forwared 2 ports to wig: 5903 to screen shareing and 5922 to ssh
/usr/bin/ssh -f -N -C -R 192.168.10.64:5903:127.0.0.1:5900 -R 192.168.10.64:5922:127.0.0.1:22 home.wrede.ca
+141 -136
View File
@@ -1,5 +1,13 @@
#!/usr/bin/env python #!/usr/bin/env python
# $Id: hbc,v 1.9 2012/03/29 02:08:36 andreas Exp $ # $Id: hbc,v 1.9 2012/03/29 02:08:36 andreas Exp $
# require on Linux
# python-filelock
# python-daemon vs 1.61 or >
# on *bsd
# py27-lockfile
# py-27-daemon
# or run sudo easy_install-2.7 lockfile daemon
import sys import sys
import time import time
import socket import socket
@@ -9,113 +17,129 @@ import getopt
import string import string
import select import select
import errno import errno
import traceback
import lockfile
import daemon
try:
import daemon.pidfile
except:
print "need version 2.1 or higer of python-daemon"
sys.exit(1)
import syslog
PORT=50003
INTERVAL=10 PORT = 50003
False=0 INTERVAL = 10
True=1 PIDFILE = '/tmp/hbc.pid'
DBG = False DBG = False
sock=None sock = None
up = True
ackcount = 0
class NullDevice: class NullDevice:
def write(self, s): def write(self, s):
pass pass
def syslogtrace(note):
class Flock: logm = '%s hbc died: \n%s' % (note, traceback.format_exc())
def __init__(self, lock_file): for l in logm.split('\n'):
self.lock_file = lock_file syslog.syslog(' tb: %s' % l)
self.fd = None
self.opid = None
def lock(self):
while 1:
self.fd = None
try:
self.fd = os.open(self.lock_file, os.O_CREAT | os.O_EXCL | os.O_RDWR)
except OSError, e:
if e.errno != errno.EEXIST:
raise
except:
raise
if not self.fd:
if self.oproc():
if DBG: print "process is alive"
os.remove(self.lock_file)
continue
else:
if DBG: print "no pid process??"
if self.fd:
f=os.fdopen(self.fd, 'w').write("%s" % os.getpid())
return self.fd
def unlock(self):
os.remove(self.lock_file)
self.fd=None
def setopid(self):
try:
self.opid=open(self.lock_file).readline()
except:
pass
def oproc(self):
self.setopid()
if not self.opid:
return False
try:
os.kill(int(self.opid), 0)
return True
except:
pass
return False
def handler(signum, frame):
global up
if up == 0:
return
sys.exit(0)
def getsock(host): def getsock(host):
try: try:
r=socket.getaddrinfo(host, 50001, 0, 0, socket.SOL_UDP) r=socket.getaddrinfo(host, 50001, 0, 0, socket.SOL_UDP)
except socket.gaierror: except socket.gaierror:
logm = '%s hbc died: \n%s' % ('getsock', traceback.format_exc())
if DBG: print logm
return None return None
if r[0][0] == 28: if r[0][0] in [10, 28, 30]:
af_type=socket.AF_INET6 af_type=socket.AF_INET6
elif r[0][0] == 2: elif r[0][0] == 2:
af_type=socket.AF_INET af_type=socket.AF_INET
else: else:
return None print "dont know this net type: %s" % r[0][0]
sys.exit(1)
if verbose: if verbose:
print "socktype: %s" % af_type syslog.syslog("socktype: %s" % af_type)
sock=socket.socket(af_type, socket.SOCK_DGRAM) sock=socket.socket(af_type, socket.SOCK_DGRAM)
sock.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR, \ sock.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR, \
sock.getsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR) | 1) sock.getsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR) | 1)
if verbose: print "get socket %s" % sock if verbose: syslog.syslog("get socket %s" % sock)
return sock return sock
def socksend(msg, tohost): def socksend(msg, tohost):
global sock global sock
if sock == None: if sock == None:
sock=getsock(tohost[0]) sock=getsock(tohost[0])
if DBG: print "socksend: sending msg=%s on socket=%s" % (msg, sock)
sock.sendto(msg, tohost) sock.sendto(msg, tohost)
if verbose: syslog.syslog("msg %s sent" % msg)
def process():
global up, sock, ackcount
if sock == None:
sock=getsock(tohost[0])
ackcount=0
lastT=time.time()
while up:
sleep=(lastT+interval) - time.time()
if verbose: syslog.syslog("sleep %s" % sleep)
if sleep > 0:
try:
r=select.select([sock.fileno()],[],[],sleep)
except:
if up:
syslogtrace('select')
break
if verbose: syslog.syslog("r is %s" % str(r))
if sock.fileno() in r[0]:
data, addr = sock.recvfrom(1024)
if data == "ACK":
ackcount+=1
else:
try:
os.system(data)
except:
syslogtrace('System')
pass
continue
lastT=time.time()
for hb_host in hb_hosts:
try:
msg="interval=%s;name=%s;time=%s;acks=%s" % (interval, iam, time.time(), ackcount)
if verbose: syslog.syslog("sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port))
socksend(msg, (hb_host, hb_port))
except:
logm = '%s hbc died: \n%s' % ('socksend', traceback.format_exc())
if DBG: print logm
pass
def cleanup(a, b):
global up, sock, ackcount
up = False
syslog.syslog('exit a=%s b=%s' % (str(a), str(b)))
msg="shutdown=1;name=%s;acks=%s" % (iam, ackcount)
for hb_host in hb_hosts:
if verbose: syslog.syslog("hbc: sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port))
socksend(msg, (hb_host, hb_port))
time.sleep(1)
sock.close()
msgonly=False msgonly=False
helpflag=False helpflag=False
verbose=False verbose=False
daemon=False fdaemon=False
optlist=[] optlist=[]
args=[] args=[]
msgboot=[] msgboot=[]
@@ -133,7 +157,7 @@ for o,a in optlist:
elif o == '-c': elif o == '-c':
configfile=a configfile=a
elif o == '-d': elif o == '-d':
daemon=True fdaemon=True
elif o == '-h': elif o == '-h':
helpflag=True helpflag=True
elif o == '-m': elif o == '-m':
@@ -221,6 +245,7 @@ if not msgonly:
msgboot.append("interval=%s" % interval) msgboot.append("interval=%s" % interval)
if len(msgboot) > 0: if len(msgboot) > 0:
if DBG: print "on boot"
msgboot.append("name=%s" % iam) msgboot.append("name=%s" % iam)
msgboot.append("time=%s" % time.time()) msgboot.append("time=%s" % time.time())
msgboot.append("acks=0") msgboot.append("acks=0")
@@ -229,88 +254,68 @@ if len(msgboot) > 0:
fail=0 fail=0
for hb_host in hb_hosts: for hb_host in hb_hosts:
try: try:
if verbose: print "sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port) if DBG: print "sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port)
socksend(msg, (hb_host, hb_port)) socksend(msg, (hb_host, hb_port))
except: except:
logm = '%s hbc died: \n%s' % ('socksend2', traceback.format_exc())
if DBG: print logm
fail=1 fail=1
if fail: if fail:
time.sleep(10) time.sleep(10)
else: else:
break break
if verbose: print "msgboot done msgonly=%s" % msgonly
if msgonly: if msgonly:
sys.exit(0) sys.exit(0)
# #
# #
if daemon: syslog.openlog(logoption=syslog.LOG_PID, facility=syslog.LOG_DAEMON)
pid=os.fork() if fdaemon:
if pid > 0:
if verbose:
print "daemoinizing... pid=%d" % pid
sys.exit(0)
pidfile = daemon.pidfile.TimeoutPIDLockFile(PIDFILE, acquire_timeout=-1)
try:
opid = pidfile.read_pid()
except:
opid = None
if verbose: print "opid %s" % opid
os.close(0) if opid:
os.close(1)
os.close(2)
sys.stdin.close()
sys.stdout = NullDevice()
sys.stderr = NullDevice()
os.chdir("/")
os.setsid()
os.umask(0)
while True:
lock=Flock('/tmp/hbc.pid')
if lock.lock():
break
if not lock.oproc():
sys.exit(1)
os.kill(lock.opid,15)
time.sleep(1)
up=1
signal.signal(signal.SIGTERM, handler)
signal.signal(signal.SIGHUP, handler)
ackcount=0
lastT=time.time()
while up:
sleep=(lastT+interval) - time.time()
if verbose: print "sleep %s" % sleep
if sleep > 0:
try: try:
r=select.select([sock.fileno()],[],[],sleep) os.kill(opid, 0)
# time.sleep(interval) is_running = True
except: except:
break is_running = False
if verbose: print r if verbose: print "is_running %s" % is_running
if sock.fileno() in r[0]: if is_running:
data, addr = sock.recvfrom(1024) print "process still alive %s" % opid
if data == "ACK": sys.exit(1)
ackcount+=1 print "warning: stale pid file removed"
else: os.unlink(PIDFILE)
try:
os.system(data) print "daemoinizing... %s" % os.getpid()
except: context = daemon.DaemonContext(
pass working_directory='/tmp',
continue umask=0o002,
lastT=time.time() pidfile=pidfile,
for hb_host in hb_hosts: initgroups=False,
try: )
msg="interval=%s;name=%s;time=%s;acks=%s" % (interval, iam, time.time(), ackcount)
if verbose: print "sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port) context.signal_map = {
socksend(msg, (hb_host, hb_port)) signal.SIGTERM: cleanup,
except: signal.SIGHUP: 'terminate',
pass # signal.SIGUSR1: reload_program_config,
}
context.files_preserve = [sock, sock.fileno()]
with context:
syslog.syslog('starting heartbeat to %s' % ','.join(hb_hosts))
up = True
try:
process()
except:
syslogtrace('process')
cleanup(0, None)
up=0
msg="shutdown=1;name=%s;acks=%s" % (iam, ackcount)
for hb_host in hb_hosts:
if verbose: print "hbc: sock.send('%s', (%s, %s))" % (msg, hb_host, hb_port)
socksend(msg, (hb_host, hb_port))
time.sleep(1)
sock.close()
lock.unlock()