Merge branch 'master' of git.wrede.ca:andreas/heartbeat
sync
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
# $Id: hbc,v 1.9 2012/03/29 02:08:36 andreas Exp $
|
||||
|
||||
# NEW
|
||||
import sys
|
||||
import time
|
||||
import socket
|
||||
@@ -11,63 +11,39 @@ import string
|
||||
import select
|
||||
import errno
|
||||
import traceback
|
||||
import md5
|
||||
from hashlib import md5
|
||||
import shutil
|
||||
import zlib
|
||||
import subprocess
|
||||
import syslog
|
||||
|
||||
try:
|
||||
import lockfile
|
||||
import daemon
|
||||
import daemon.pidfile
|
||||
except:
|
||||
print """
|
||||
require on Linux
|
||||
python-filelock
|
||||
python-daemon vs 1.61 or >
|
||||
run sudo easy_install-2.7 lockfile python-daemon
|
||||
on *bsd
|
||||
py27-lockfile
|
||||
py27-daemon
|
||||
run sudo pkg install -y py27-lockfile py27-daemon
|
||||
"""
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# N.B. daemon tries to close resource.RLIMIT_NOFILE file descriptors
|
||||
# which on FreeBSD in close to a million
|
||||
# hack: replace the function in daemon with ths one:
|
||||
|
||||
|
||||
|
||||
def log(msg):
|
||||
if fdaemon:
|
||||
syslog.syslog(syslog.LOG_ERR, msg)
|
||||
else:
|
||||
print msg
|
||||
|
||||
|
||||
def get_maximum_file_descriptors():
|
||||
return 2048
|
||||
|
||||
daemon.get_maximum_file_descriptors = get_maximum_file_descriptors
|
||||
|
||||
import syslog
|
||||
|
||||
|
||||
PORT = 50003
|
||||
INTERVAL = 10
|
||||
REOPENC = 6
|
||||
PIDFILE = '/tmp/hbc.pid'
|
||||
VER = 2
|
||||
VER = 5
|
||||
MAXRECV = 32767
|
||||
|
||||
running = True
|
||||
dorestart = False
|
||||
warned1 = False
|
||||
|
||||
def log(msg):
|
||||
if fdaemon:
|
||||
syslog.syslog(syslog.LOG_ERR, msg)
|
||||
else:
|
||||
print(msg)
|
||||
|
||||
def handler(signum, frame):
|
||||
if signum == signal.SIGTERM:
|
||||
cleanup()
|
||||
|
||||
class NullDevice:
|
||||
def write(self, s):
|
||||
pass
|
||||
def write(self, s):
|
||||
pass
|
||||
|
||||
|
||||
class Conn:
|
||||
@@ -82,35 +58,54 @@ class Conn:
|
||||
self.send = 0
|
||||
self.lastsend = 0 # time() last msg was sent
|
||||
self.rtts = [0]
|
||||
self.sock=socket.socket(af, socket.SOCK_DGRAM)
|
||||
self.sock = None
|
||||
|
||||
|
||||
def __str__(self):
|
||||
return "Con(%s, %s %s)" % (self.addr, self.port, self.af)
|
||||
|
||||
|
||||
def open(self):
|
||||
self.sock=socket.socket(self.af, socket.SOCK_DGRAM)
|
||||
self.sock.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR, \
|
||||
self.sock.getsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR) | 1)
|
||||
self.sock.getsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR) | 1)
|
||||
|
||||
|
||||
def sendto(self, msg, ID = 'HTB'): # default ID is HearTBeat
|
||||
global warned1
|
||||
|
||||
if self.send % REOPENC == 0:
|
||||
self.close()
|
||||
if not self.sock:
|
||||
self.open()
|
||||
msg['name'] = shortname(iam)
|
||||
msg['id'] = self.conId
|
||||
msg['ver'] = VER
|
||||
msg['time'] = time.time()
|
||||
m = dicttos(ID, msg, True) # always compress
|
||||
m = dicttos(ID, msg) # always compress
|
||||
if verbose:
|
||||
log("conn.send('%s', (%s:%s) %s)" % (msg, self.addr, self.port, len(m)))
|
||||
try:
|
||||
self.sock.sendto(m, (self.addr, self.port))
|
||||
except socket.error as e:
|
||||
if not warned1: print "socket error: %s %s:%s" % (e, self.addr, self.port)
|
||||
if not warned1: log("socket error: %s %s:%s" % (e, self.addr, self.port))
|
||||
warned1 = True
|
||||
self.close()
|
||||
return
|
||||
self.send += 1
|
||||
self.lastsend = time.time()
|
||||
|
||||
|
||||
def ack(self, msgDict):
|
||||
self.lastack = time.time()
|
||||
self.lastacksent = float(msgDict.get('time','0'))
|
||||
if verbose: log("ack RTT: %0.1f ms" % ((self.lastack - self.lastsend) * 1000.0))
|
||||
self.rtts.append((self.lastack - self.lastsend) * 1000.0)
|
||||
def ack(self, msgDict, now):
|
||||
try:
|
||||
self.lastack = msgDict['time']
|
||||
mul = 2
|
||||
except:
|
||||
self.lastack = now
|
||||
mul = 1
|
||||
rtt = (self.lastack - self.lastsend) * mul
|
||||
if verbose: log("ack RTT: %0.1f ms (now %s)" % (rtt * 1000.0, now))
|
||||
self.rtts.append(rtt * 1000.0)
|
||||
if len(self.rtts) > 10:
|
||||
del self.rtts[0]
|
||||
self.ackcount += 1
|
||||
@@ -123,11 +118,11 @@ class Conn:
|
||||
|
||||
|
||||
def shortname(name):
|
||||
r = string.split(name, '.')
|
||||
r = name.split('.')
|
||||
return r[0]
|
||||
|
||||
|
||||
def dicttos(ID, d, compress=False):
|
||||
def dicttos(ID, d):
|
||||
s = []
|
||||
for k in d:
|
||||
if type(d[k]) == type(1.2):
|
||||
@@ -135,15 +130,35 @@ def dicttos(ID, d, compress=False):
|
||||
else:
|
||||
s.append("%s=%s" % (k, d[k]))
|
||||
pk = ";".join(s)
|
||||
if compress:
|
||||
zpk = zlib.compress(pk, 6)
|
||||
ID = "!"+ID
|
||||
else:
|
||||
zpk = pk
|
||||
return ID + ":" + zpk
|
||||
|
||||
zpk = zlib.compress(pk.encode(), 6)
|
||||
ID = "!" + ID + ":"
|
||||
return ID.encode() + zpk
|
||||
|
||||
def stodict(msg):
|
||||
d = {}
|
||||
if len(msg) > 0 and chr(msg[0]) == "!":
|
||||
pk = zlib.decompress(msg[5:]).decode()
|
||||
d['ID'] = msg[1:4].decode()
|
||||
else:
|
||||
r0 = msg.split(':',1)
|
||||
pk = r0[1]
|
||||
d['ID'] = r0[0]
|
||||
r = pk.split(';')
|
||||
for v in r:
|
||||
vr = v.split('=', 1)
|
||||
k = vr[0].strip()
|
||||
if len(vr) == 1:
|
||||
d[k] = None
|
||||
else:
|
||||
v = vr[1].strip()
|
||||
if v[0].isdigit():
|
||||
v = eval(v)
|
||||
d[k] = v
|
||||
if verbose: print("msg is %s" % d)
|
||||
return d
|
||||
|
||||
|
||||
def XXstodict(msg):
|
||||
d = {}
|
||||
r0 = msg.split(':',1)
|
||||
if len(r0) == 1:
|
||||
@@ -175,9 +190,10 @@ def syslogtrace(note):
|
||||
logm = '%s hbc died: \n%s' % (note, traceback.format_exc())
|
||||
log(logm)
|
||||
for l in logm.split('\n'):
|
||||
log(' tb: %s' % l)
|
||||
|
||||
|
||||
syslog.syslog(syslog.LOG_ERR, ' tb: %s' % l)
|
||||
if verbose:
|
||||
print(logm)
|
||||
|
||||
|
||||
conId = 1
|
||||
def createConnections(hosts):
|
||||
@@ -185,7 +201,7 @@ def createConnections(hosts):
|
||||
for host in hosts:
|
||||
if verbose: log("createConnections for %s" % host)
|
||||
try:
|
||||
rs=socket.getaddrinfo(host, hb_port, 0, 0, socket.SOL_UDP)
|
||||
rs=socket.getaddrinfo(host, hb_port, 0, 0, socket.SOL_UDP)
|
||||
except socket.gaierror:
|
||||
logm = '%s hbc died: \n%s' % ('createConnections', traceback.format_exc())
|
||||
if verbose: log(logm)
|
||||
@@ -197,11 +213,13 @@ def createConnections(hosts):
|
||||
elif r[0] == 2:
|
||||
af=socket.AF_INET
|
||||
else:
|
||||
log("dont know this net type: %s" % r[0][0])
|
||||
print("dont know this net type: %s" % r[0][0])
|
||||
sys.exit(1)
|
||||
|
||||
addr = r[4][0]
|
||||
conns[conId] = Conn(conId, addr, hb_port, af)
|
||||
if verbose:
|
||||
print("cons[%s] = %s" % (conId, str(conns[conId])))
|
||||
conId += 1
|
||||
|
||||
|
||||
@@ -214,7 +232,7 @@ def doexec(conn, data):
|
||||
fail = "CalledProcessError"
|
||||
except Exception as e:
|
||||
syslogtrace('System')
|
||||
ro = None
|
||||
ro = "N/A"
|
||||
fail = "cmd failed: %s" % e
|
||||
msg={'service': 'command', 'msg': fail+" "+ro}
|
||||
conns[conn].sendto(msg)
|
||||
@@ -226,7 +244,7 @@ def doupdate(conn, msgDict):
|
||||
code = msgDict['code'].decode('base64')
|
||||
csum = msgDict['csum']
|
||||
except:
|
||||
fail = "csum/code missing"
|
||||
fail = "csum/code missing"
|
||||
if not fail:
|
||||
fail = doupdateone(code, csum)
|
||||
|
||||
@@ -244,10 +262,10 @@ def doupdateone(code, csum):
|
||||
m.update(code)
|
||||
icsum = m.hexdigest()
|
||||
if icsum != csum:
|
||||
return "checksum error"
|
||||
return "checksum error"
|
||||
|
||||
fn = sys.argv[0]
|
||||
ofn = "%.sav" % fn
|
||||
ofn = "%s.sav" % fn
|
||||
try:
|
||||
shutil.copy2(fn, ofn)
|
||||
except Exception as e:
|
||||
@@ -264,38 +282,43 @@ def doupdateone(code, csum):
|
||||
|
||||
|
||||
def restart():
|
||||
log('restart %s' % (sys.argv[0]))
|
||||
if verbose:
|
||||
print("restart: execv %s %s" % (sys.argv[0], [sys.argv[0]]+cmdargs))
|
||||
syslog.syslog(syslog.LOG_ERR, 'restart %s' % (sys.argv[0]))
|
||||
e = "fallthrough"
|
||||
try:
|
||||
os.execv(sys.argv[0], [sys.argv[0]]+cmdargs)
|
||||
except Exception as e:
|
||||
pass
|
||||
print "should not be here:", str(e)
|
||||
print("should not be here:", str(e))
|
||||
log('restart failed: %s' % e)
|
||||
|
||||
|
||||
def process():
|
||||
global running, dorestart
|
||||
|
||||
ifiles = {}
|
||||
conIds = {}
|
||||
for conn in conns:
|
||||
ifiles[conns[conn].sock.fileno()] = conns[conn].sock
|
||||
conIds[conns[conn].sock.fileno()] = conn
|
||||
nextReport = time.time()
|
||||
|
||||
while running:
|
||||
while time.time() < nextReport:
|
||||
ifiles = {}
|
||||
conIds = {}
|
||||
for conn in conns:
|
||||
if conns[conn].sock:
|
||||
ifiles[conns[conn].sock.fileno()] = conns[conn].sock
|
||||
conIds[conns[conn].sock.fileno()] = conn
|
||||
|
||||
sleep=nextReport - time.time()
|
||||
if sleep <= 0:
|
||||
break
|
||||
try:
|
||||
r=select.select(ifiles.keys(),[],[],sleep)
|
||||
r=select.select(list(ifiles.keys()),[],[],sleep)
|
||||
now = time.time() # nb: delay from actual packet arrival to select is ca. 105ms!
|
||||
except KeyboardInterrupt:
|
||||
running = False
|
||||
break
|
||||
except SystemExit:
|
||||
log('daemon exit, running=: %s' % running)
|
||||
log('daemon exit, running was %s' % running)
|
||||
if running:
|
||||
running = False
|
||||
break
|
||||
@@ -307,38 +330,45 @@ def process():
|
||||
for rfh in r[0]:
|
||||
conn = conIds[rfh]
|
||||
data, addr = ifiles[rfh].recvfrom(MAXRECV)
|
||||
if verbose: print "sock.recvfrom: %s (%s) %s" % (addr, len(data), data[:4])
|
||||
# if verbose: print("sock.recvfrom: %s (%s) %s" % (addr, len(data), data[:4]))
|
||||
msgDict = stodict(data)
|
||||
if verbose: print "sock.recvfrom: %s (%s) %s" % (addr, len(data), str(msgDict)[:80])
|
||||
if msgDict['ID'] == "ACK":
|
||||
conns[conn].ack(msgDict)
|
||||
elif msgDict['ID'] == "UPD":
|
||||
if verbose: print("sock.recvfrom: %s (%s) %s" % (addr, len(data), str(msgDict)[:80]))
|
||||
if msgDict == None:
|
||||
print("bad backet from %s (%s) %s" % (addr, len(data), data))
|
||||
elif msgDict['ID'] == "ACK":
|
||||
conns[conn].ack(msgDict, now)
|
||||
elif msgDict['ID'] == "UPD":
|
||||
if doupdate(conn, msgDict) == None:
|
||||
if verbose: print "process: restart after update"
|
||||
if verbose: print("process: restart after update")
|
||||
dorestart = True
|
||||
break
|
||||
elif msgDict['ID'] == "CMD":
|
||||
elif msgDict['ID'] == "CMD":
|
||||
doexec(conn, msgDict['cmd'])
|
||||
else:
|
||||
doexec(conn, data) # deprecated until no more VER - hbc
|
||||
doexec(conn, data) # deprecated until no more VER - hbc
|
||||
if dorestart:
|
||||
running = False
|
||||
break
|
||||
if not running:
|
||||
break
|
||||
for conn in conns:
|
||||
# msg={'interval': interval, 'acks': conns[conn].ackcount, 'rtt': conns[conn].rtts[-1]}
|
||||
msg={'acks': conns[conn].ackcount, 'rtt': conns[conn].rtts[-1]}
|
||||
conns[conn].sendto(msg)
|
||||
nextReport = time.time() + interval
|
||||
time.sleep(0.1) #N.B. Linux (i.e. Rasperry Pi 3 drops the second pkg unless delayed
|
||||
if nextReport + interval >= time.time():
|
||||
nextReport += interval
|
||||
else:
|
||||
nextReport = time.time() + interval
|
||||
|
||||
if verbose: log( "process: done running")
|
||||
|
||||
def cleanup():
|
||||
global running
|
||||
if not running:
|
||||
return
|
||||
if verbose: log('cleanup')
|
||||
running = False
|
||||
for conn in conns:
|
||||
for conn in conns:
|
||||
msg={'shutdown': 1, 'acks': conns[conn].ackcount}
|
||||
conns[conn].sendto(msg)
|
||||
conns[conn].close()
|
||||
@@ -347,16 +377,60 @@ def cleanup():
|
||||
|
||||
|
||||
def closeall():
|
||||
if verbose: log('closecall')
|
||||
for conn in conns:
|
||||
if verbose: syslog.syslog(syslog.LOG_ERR, 'closecall')
|
||||
for conn in conns:
|
||||
conns[conn].close()
|
||||
|
||||
|
||||
def daemonize(working_dir="/", stdin='/dev/zero', stdout='/dev/null', stderr='/dev/null'):
|
||||
"""
|
||||
Does the UNIX double-fork magic, see Stevens' "Advanced Programming in the
|
||||
UNIX Environment" for details (ISBN 0201563177)
|
||||
http://www.yendor.com/programming/unix/apue/proc/fork2.c
|
||||
"""
|
||||
|
||||
try:
|
||||
# first fork
|
||||
pid = os.fork()
|
||||
if pid > 0:
|
||||
# exit from first parent
|
||||
os._exit(0)
|
||||
except OSError as e:
|
||||
sys.stderr.write("fork #1 failed: %d (%s)\n" % (e.errno, e.strerror))
|
||||
os._exit(1)
|
||||
|
||||
# decouple from parent environment
|
||||
os.chdir(working_dir)
|
||||
os.setsid()
|
||||
os.umask(0)
|
||||
# second fork
|
||||
try:
|
||||
pid = os.fork()
|
||||
if pid > 0:
|
||||
# exit from second parent
|
||||
os._exit(0)
|
||||
except OSError as e:
|
||||
sys.stderr.write("fork #2 failed: %d (%s)\n" % (e.errno, e.strerror))
|
||||
sys.exit(1)
|
||||
|
||||
# redirects standard file descriptors
|
||||
sys.stdout.flush()
|
||||
sys.stderr.flush()
|
||||
si = open(stdin, 'r')
|
||||
so = open(stdout, 'a+')
|
||||
se = open(stderr, 'a+')
|
||||
os.dup2(si.fileno(), sys.stdin.fileno())
|
||||
os.dup2(so.fileno(), sys.stdout.fileno())
|
||||
os.dup2(se.fileno(), sys.stderr.fileno())
|
||||
|
||||
|
||||
|
||||
msgonly=False
|
||||
helpflag=False
|
||||
verbose=False
|
||||
fdaemon=False
|
||||
optlist=[]
|
||||
daemonized = False
|
||||
optlist=[]
|
||||
args=[]
|
||||
msgboot={}
|
||||
home=os.environ['HOME']
|
||||
@@ -365,9 +439,9 @@ cmdargs = []
|
||||
|
||||
|
||||
try:
|
||||
optlist, args = getopt.getopt(sys.argv[1:], 'bc:dhm:v')
|
||||
optlist, args = getopt.getopt(sys.argv[1:], 'bc:dhm:v')
|
||||
except:
|
||||
helpflag=True
|
||||
helpflag=True
|
||||
|
||||
for o,a in optlist:
|
||||
if o == '-b':
|
||||
@@ -390,20 +464,20 @@ for o,a in optlist:
|
||||
|
||||
|
||||
cmdargs += args
|
||||
if verbose: print "cmdargs for restart are %s" % cmdargs
|
||||
if verbose: print("cmdargs for restart are %s" % cmdargs)
|
||||
|
||||
if helpflag:
|
||||
print "hbc HeartBeatClient"
|
||||
print "usage: hbc [-bdhv] [-c configfile] [-m msg][host1 [..]]"
|
||||
print
|
||||
print " -b indicate machine boot"
|
||||
print " -c configfile"
|
||||
print " -d daemonize"
|
||||
print " -h this help"
|
||||
print " -m send a message"
|
||||
print " -v verbose"
|
||||
print
|
||||
print """ config file can contain
|
||||
print("hbc HeartBeatClient")
|
||||
print("usage: hbc [-bdhv] [-c configfile] [-m msg][host1 [..]]")
|
||||
print()
|
||||
print(" -b indicate machine boot")
|
||||
print(" -c configfile")
|
||||
print(" -d daemonize")
|
||||
print(" -h this help")
|
||||
print(" -m send a message")
|
||||
print(" -v verbose")
|
||||
print()
|
||||
print(""" config file can contain
|
||||
hb_hosts=('host1', 'host2', ..._
|
||||
hb_port=50003
|
||||
interval=20
|
||||
@@ -411,7 +485,7 @@ logfile=...
|
||||
logfmt={|test|msg}
|
||||
grace=SECONDS
|
||||
reportstrict={True|False}
|
||||
"""
|
||||
""")
|
||||
|
||||
sys.exit(1)
|
||||
|
||||
@@ -425,9 +499,9 @@ iam=socket.gethostname()
|
||||
|
||||
try:
|
||||
f=open(configfile,"r")
|
||||
if verbose: print "notice: using config file %s" % configfile
|
||||
if verbose: print("notice: using config file %s" % configfile)
|
||||
except:
|
||||
if verbose: print "warning: running without config file: %s" % configfile
|
||||
if verbose: print("warning: running without config file: %s" % configfile)
|
||||
f=None
|
||||
|
||||
if f:
|
||||
@@ -439,14 +513,14 @@ if f:
|
||||
if r[0] == 'hb_hosts':
|
||||
hb_hosts=eval(r[1])
|
||||
if verbose:
|
||||
print "notice: cfg hb_hosts: %s" % hb_hosts
|
||||
print("notice: cfg hb_hosts: %s" % hb_hosts)
|
||||
elif r[0] == 'interval':
|
||||
interval=eval(r[1])
|
||||
elif r[0] == 'hb_port':
|
||||
hb_port=eval(r[1])
|
||||
elif r[0] == 'name':
|
||||
iam=eval(r[1])
|
||||
if verbose: print "name set to %s" % iam
|
||||
if verbose: print("name set to %s" % iam)
|
||||
f.close()
|
||||
|
||||
if len(args) != 0:
|
||||
@@ -454,17 +528,17 @@ if len(args) != 0:
|
||||
|
||||
|
||||
if len(hb_hosts) == 0:
|
||||
print "no hb server specified"
|
||||
print("no hb server specified")
|
||||
sys.exit(1)
|
||||
|
||||
#
|
||||
if verbose:
|
||||
print "notice: hb_hosts: %s" % str(hb_hosts)
|
||||
print "notice: hb_port: %s" % hb_port
|
||||
print "notice: interval: %s" % interval
|
||||
print "notice: iam: %s" % iam
|
||||
print "notice: msgonly: %s" % msgonly
|
||||
print "notice: msgboot: %s" % msgboot
|
||||
print("notice: hb_hosts: %s" % str(hb_hosts))
|
||||
print("notice: hb_port: %s" % hb_port)
|
||||
print("notice: interval: %s" % interval)
|
||||
print("notice: iam: %s" % iam)
|
||||
print("notice: msgonly: %s" % msgonly)
|
||||
print("notice: msgboot: %s" % msgboot)
|
||||
|
||||
if not msgonly:
|
||||
msgboot['interval'] = interval
|
||||
@@ -483,74 +557,32 @@ if verbose:
|
||||
log("%s connections created" % (len(conns)))
|
||||
|
||||
if len(msgboot) > 0:
|
||||
if verbose: print "on boot"
|
||||
msgboot['acks'] = 0
|
||||
if verbose: print("on boot")
|
||||
msgboot['acks'] = 0
|
||||
for conn in conns:
|
||||
conns[conn].sendto(msgboot)
|
||||
|
||||
if msgonly:
|
||||
if verbose: print "msgboot done msgonly=%s" % msgonly
|
||||
if verbose: print("msgboot done msgonly=%s" % msgonly)
|
||||
closeall()
|
||||
sys.exit(0)
|
||||
|
||||
#
|
||||
|
||||
syslog.openlog('hbc', syslog.LOG_PID, syslog.LOG_DAEMON)
|
||||
if fdaemon:
|
||||
print("daemoinizing.")
|
||||
daemonize()
|
||||
daemonized = True
|
||||
syslog.syslog(syslog.LOG_ERR, 'starting heartbeat to %s' % ','.join(hb_hosts))
|
||||
|
||||
pidfile = daemon.pidfile.TimeoutPIDLockFile(PIDFILE, acquire_timeout=-1)
|
||||
try:
|
||||
opid = pidfile.read_pid()
|
||||
except:
|
||||
opid = None
|
||||
signal.signal(signal.SIGTERM, handler)
|
||||
running = True
|
||||
try:
|
||||
process()
|
||||
except:
|
||||
syslogtrace('process')
|
||||
if verbose: print("err: process exit: %s" % e)
|
||||
|
||||
if opid:
|
||||
try:
|
||||
os.kill(opid, 0)
|
||||
is_running = True
|
||||
except:
|
||||
is_running = False
|
||||
if verbose: print "is_running %s" % is_running
|
||||
if is_running:
|
||||
print "process still alive %s" % opid
|
||||
sys.exit(1)
|
||||
print "warning: stale pid file removed"
|
||||
os.unlink(PIDFILE)
|
||||
|
||||
print "daemoinizing... %s" % os.getpid()
|
||||
context = daemon.DaemonContext(
|
||||
working_directory='/tmp',
|
||||
umask=0o022,
|
||||
pidfile=pidfile,
|
||||
detach_process=True,
|
||||
# initgroups=False,
|
||||
)
|
||||
|
||||
context.signal_map = {
|
||||
# signal.SIGHUP: cleanup,
|
||||
signal.SIGTERM: 'terminate',
|
||||
# signal.SIGUSR1: reload_program_config,
|
||||
}
|
||||
|
||||
context.files_preserve = []
|
||||
for conn in conns:
|
||||
context.files_preserve += [conns[conn].sock, conns[conn].sock.fileno()]
|
||||
with context:
|
||||
log('starting heartbeat to %s' % ','.join(hb_hosts))
|
||||
running = True
|
||||
try:
|
||||
process()
|
||||
except:
|
||||
syslogtrace('process')
|
||||
|
||||
else:
|
||||
running = True
|
||||
try:
|
||||
if verbose: print "starting loop process"
|
||||
process()
|
||||
except Exception as e:
|
||||
if verbose: print "err: process exit: %s" % e
|
||||
syslogtrace('process')
|
||||
if verbose: log( "main: cleanup")
|
||||
cleanup()
|
||||
if dorestart:
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
# $Id: hbd,v 1.38 2013/07/14 02:25:05 andreas Exp $
|
||||
# Wait for heartbeat messages and act on them (or their absence)
|
||||
#
|
||||
VER = 3.00
|
||||
VER = 4.00
|
||||
|
||||
import time
|
||||
import os
|
||||
@@ -11,31 +11,33 @@ import sys
|
||||
import socket
|
||||
import atexit
|
||||
import select
|
||||
import SocketServer
|
||||
import BaseHTTPServer
|
||||
import socketserver
|
||||
import http.server
|
||||
import getopt
|
||||
import signal
|
||||
import cPickle
|
||||
import pickle
|
||||
import smtplib
|
||||
import traceback
|
||||
import urllib
|
||||
import urlparse
|
||||
import httplib
|
||||
import urllib.request, urllib.parse, urllib.error
|
||||
import urllib.parse
|
||||
import http.client
|
||||
import threading
|
||||
import Queue
|
||||
import md5
|
||||
import queue
|
||||
from hashlib import md5
|
||||
import json
|
||||
import zlib
|
||||
|
||||
from subprocess import Popen, STDOUT, PIPE
|
||||
|
||||
from hbdclass import *
|
||||
#from hbdclass import *
|
||||
import hbdclass
|
||||
|
||||
|
||||
SEND_EMAIL=False
|
||||
SEND_PUSHOVER=True
|
||||
|
||||
DEBUG = 0
|
||||
hbdclass.DEBUG = DEBUG
|
||||
|
||||
MAXRECV = 32767
|
||||
LOGFILE = "/home/andreas/public_html/messages/andreas"
|
||||
@@ -55,6 +57,7 @@ verbose = False
|
||||
|
||||
INTERVAL = 10
|
||||
GRACE = 2
|
||||
DROPOVERDUE = 7*24*3600
|
||||
|
||||
os.environ['TZ'] = 'EST5EDT'
|
||||
|
||||
@@ -104,7 +107,7 @@ def handler(signum, frame):
|
||||
|
||||
|
||||
def shortname(name):
|
||||
r = string.split(name, '.')
|
||||
r = name.split('.')
|
||||
return r[0]
|
||||
|
||||
|
||||
@@ -131,22 +134,22 @@ def dicttos(ID, d, compress=False):
|
||||
s.append("%s=%s" % (k, d[k]))
|
||||
pk = ";".join(s)
|
||||
if compress:
|
||||
zpk = zlib.compress(pk, 6)
|
||||
ID = "!"+ID
|
||||
zpk = zlib.compress(pk.encode(), 6)
|
||||
ID = "!" + ID + ":"
|
||||
opk = ID.encode() + zpk
|
||||
else:
|
||||
zpk = pk
|
||||
return ID + ":" + zpk
|
||||
opk = ID + ":" + zpk
|
||||
return opk
|
||||
|
||||
|
||||
def stodict(msg):
|
||||
d = {}
|
||||
r0 = msg.split(':',1)
|
||||
if len(r0) == 1:
|
||||
return None
|
||||
if r0[0][0] == '!': # compressed
|
||||
pk = zlib.decompress(msg[len(r0[0])+1:])
|
||||
d['ID'] = r0[0][1:]
|
||||
if len(msg) > 0 and chr(msg[0]) == "!":
|
||||
pk = zlib.decompress(msg[5:]).decode()
|
||||
d['ID'] = msg[1:4].decode()
|
||||
else:
|
||||
r0 = msg.split(':',1)
|
||||
pk = r0[1]
|
||||
d['ID'] = r0[0]
|
||||
r = pk.split(';')
|
||||
@@ -180,11 +183,11 @@ def email(s, msg):
|
||||
server = smtplib.SMTP(SMTPSERVER)
|
||||
if DEBUG > 0: server.set_debuglevel(1)
|
||||
server.sendmail(fromemail, toaddrs, body)
|
||||
except smtplib.SMTPRecipientsRefused, errs:
|
||||
except smtplib.SMTPRecipientsRefused as errs:
|
||||
log(None, "cannot send email: %s\n" % (errs))
|
||||
ret = "Fail"
|
||||
except:
|
||||
print("smtp error: "+traceback.format_exc())
|
||||
print(("smtp error: "+traceback.format_exc()))
|
||||
saveandrestart()
|
||||
try:
|
||||
server.quit()
|
||||
@@ -196,10 +199,10 @@ def email(s, msg):
|
||||
def pushover(msg):
|
||||
if not SEND_PUSHOVER:
|
||||
return
|
||||
conn = httplib.HTTPSConnection("api.pushover.net:443")
|
||||
conn = http.client.HTTPSConnection("api.pushover.net:443")
|
||||
try:
|
||||
conn.request("POST", "/1/messages.json",
|
||||
urllib.urlencode({
|
||||
urllib.parse.urlencode({
|
||||
"token": "ac7NLX2rPjXFareeDgLpXNoDf4iFmf",
|
||||
"user": "uDhH33UjQQDYtNzJb1ThRiWb9ingGK",
|
||||
"message": msg, }), { "Content-type": "application/x-www-form-urlencoded" })
|
||||
@@ -241,12 +244,12 @@ answer
|
||||
if DEBUG > 0: log(None, "DBG: cmd %s" % cmd)
|
||||
try:
|
||||
p = Popen(cmd, shell=False, bufsize=1, stdin=PIPE, stdout=PIPE, stderr=STDOUT)
|
||||
except OSError, e:
|
||||
except OSError as e:
|
||||
return "nsupdate: execution failed: %s" % e
|
||||
except:
|
||||
return "nsupdate: some error occured"
|
||||
|
||||
(output, err) = p.communicate(nsup)
|
||||
(output, err) = p.communicate(nsup.encode())
|
||||
if output.find('status: NOERROR') >= 0:
|
||||
return None
|
||||
return output
|
||||
@@ -255,9 +258,9 @@ answer
|
||||
#
|
||||
def dur(sec):
|
||||
sec = int(sec)
|
||||
h = sec / 3600
|
||||
m = (sec - h * 3600) / 60
|
||||
s = (sec - h * 3600) % 60
|
||||
h = int(sec / 3600)
|
||||
m = int((sec - h * 3600) / 60)
|
||||
s = int((sec - h * 3600) % 60)
|
||||
if h > 0:
|
||||
return "%d:%02d:%02d" % (h, m, s)
|
||||
if m > 0:
|
||||
@@ -266,11 +269,11 @@ def dur(sec):
|
||||
|
||||
|
||||
def fixsort():
|
||||
s = Host.hosts.keys()
|
||||
s = list(hbdclass.Host.hosts.keys())
|
||||
s.sort()
|
||||
x = 0
|
||||
for n in s:
|
||||
Host.hosts[n].num = x
|
||||
hbdclass.Host.hosts[n].num = x
|
||||
x += 1
|
||||
|
||||
#
|
||||
@@ -280,7 +283,7 @@ def on_exit():
|
||||
logf.close()
|
||||
except:
|
||||
pass
|
||||
print "exit"
|
||||
print("exit")
|
||||
|
||||
|
||||
def initlog(logfile):
|
||||
@@ -295,16 +298,18 @@ def initlog(logfile):
|
||||
#
|
||||
def checkoverdue():
|
||||
now = time.time()
|
||||
for h in Host.hosts.keys():
|
||||
for h in list(hbdclass.Host.hosts.keys()):
|
||||
pmsg = []
|
||||
for c in Host.hosts[h].connections:
|
||||
conn = Host.hosts[h].connections[c]
|
||||
if conn.state == Connection.down:
|
||||
for c in hbdclass.Host.hosts[h].connections:
|
||||
conn = hbdclass.Host.hosts[h].connections[c]
|
||||
if conn.state == hbdclass.Connection.down:
|
||||
continue
|
||||
timeout = Host.hosts[h].interval + grace
|
||||
if conn.state == Connection.up and (now - conn.lastbeat) > timeout:
|
||||
conn.newstate(Connection.overdue, now, grace)
|
||||
timeout = hbdclass.Host.hosts[h].interval + grace
|
||||
if conn.state == hbdclass.Connection.up and (now - conn.lastbeat) > timeout:
|
||||
conn.newstate(hbdclass.Connection.overdue, now, grace)
|
||||
pmsg.append(conn.afam)
|
||||
if conn.state == hbdclass.Connection.overdue and (now - conn.lastbeat) > DROPOVERDUE:
|
||||
conn.newstate(hbdclass.Connection.unknown, conn.lastbeat)
|
||||
if pmsg != []:
|
||||
if h in watchhosts:
|
||||
email("overdue", "%s overdue" % " and ".join(pmsg))
|
||||
@@ -313,8 +318,9 @@ def checkoverdue():
|
||||
|
||||
|
||||
def log(host, m, service=None):
|
||||
if DEBUG > 0: print "Log: %s" % m
|
||||
ts = time.strftime("%b %d %H:%M:%S", time.localtime(time.time()))
|
||||
if DEBUG > 0: print("Log: %s %s" % (host, m))
|
||||
now = time.time()
|
||||
ts = time.strftime("%b %d %H:%M:%S", time.localtime(now))
|
||||
if service:
|
||||
srv = "service %s: " % service
|
||||
else:
|
||||
@@ -327,7 +333,7 @@ def log(host, m, service=None):
|
||||
msg = "%s: %s%s%s\n" % (ts, hst, srv, m)
|
||||
msgs.append(msg)
|
||||
if logfmt == "msg":
|
||||
m2 = "%d|%s|%s\n" % (now, service, m)
|
||||
m2 = "%d|%s|%s\n" % (now, hst, m)
|
||||
else:
|
||||
m2 = msg
|
||||
logf.write(m2)
|
||||
@@ -337,7 +343,7 @@ def log(host, m, service=None):
|
||||
|
||||
def dnsupdatethread():
|
||||
while True:
|
||||
name, addr = Host.dnsQ.get()
|
||||
name, addr = hbdclass.Host.dnsQ.get()
|
||||
m = "changed address to %s" % (addr)
|
||||
err = nsupdate(name, addr)
|
||||
if err:
|
||||
@@ -345,7 +351,7 @@ def dnsupdatethread():
|
||||
email("error: nsupdate failed", "%s: %s" % (name, m))
|
||||
else:
|
||||
m += ", DNS updated."
|
||||
Host.dnsQ.task_done()
|
||||
hbdclass.Host.dnsQ.task_done()
|
||||
log(name, m)
|
||||
|
||||
#
|
||||
@@ -355,30 +361,38 @@ def dnsupdatethread():
|
||||
def readsock(sock):
|
||||
global now
|
||||
if DEBUG > 3: sys.stderr.write("readsock recfrom start")
|
||||
data, addrp = sock.recvfrom(MAXRECV)
|
||||
now = time.time()
|
||||
if DEBUG > 2: sys.stderr.write("readsock = %s, %s\n" % (data,addrp))
|
||||
msg = stodict(data)
|
||||
data, addrp = sock.recvfrom(MAXRECV)
|
||||
if DEBUG > 3: sys.stderr.write("readsock = %s, %s\n" % (data,addrp))
|
||||
try:
|
||||
msg = stodict(data)
|
||||
except:
|
||||
return
|
||||
if DEBUG > 3: sys.stderr.write("msg is %s" % str(msg))
|
||||
if not msg: # Old hbc client
|
||||
if verbose: print "old hbc:", data
|
||||
if verbose: print(("old hbc:", data))
|
||||
oldclient = True
|
||||
msg = oldmtodict(data)
|
||||
else:
|
||||
oldclient = False
|
||||
if DEBUG > 2: print "readsock = %s, %s" % (msg,addrp)
|
||||
if DEBUG > 2: print(("readsock = %s, %s" % (msg,addrp)))
|
||||
|
||||
addr = addrp[0:2]
|
||||
name = shortname(msg.get('name', "unknown"))
|
||||
if not name in Host.hosts: # was: hosts.has_key(name):
|
||||
host = Host(name)
|
||||
if verbose: print "XX: New host, num now %s" % (len(Host.hosts))
|
||||
if not name in hbdclass.Host.hosts: # was: hosts.has_key(name):
|
||||
host = hbdclass.Host(name)
|
||||
host.dyn = name in dyndnshosts
|
||||
if verbose: print(("XX: New host, num now %s" % (len(hbdclass.Host.hosts))))
|
||||
newh=True
|
||||
else:
|
||||
host = Host.hosts[name]
|
||||
host = hbdclass.Host.hosts[name]
|
||||
newh=False
|
||||
|
||||
cid = msg.get('id', 0)
|
||||
rtt = msg.get('rtt',None)
|
||||
try:
|
||||
rtt = float(msg.get('rtt',None))
|
||||
except:
|
||||
rtt = None
|
||||
|
||||
if msg['ID'] == 'HTB':
|
||||
host.doesack = msg.get('acks', -1)
|
||||
@@ -408,9 +422,9 @@ def readsock(sock):
|
||||
email("address change", "%s %s" % (host.name, res))
|
||||
pushover("%s %s" % (host.name, res))
|
||||
|
||||
if conn.getstate() != Connection.up: # XXX and interval > 0:
|
||||
if conn.getstate() != hbdclass.Connection.up: # XXX and interval > 0:
|
||||
lasts = conn.state
|
||||
d = conn.newstate(Connection.up, now)
|
||||
d = conn.newstate(hbdclass.Connection.up, now)
|
||||
m = "%s back after being %s for %s" % (conn.afam, lasts, dur(d))
|
||||
log(name, m)
|
||||
if name in watchhosts:
|
||||
@@ -428,7 +442,7 @@ def readsock(sock):
|
||||
if name in watchhosts:
|
||||
email("shutdown", "%s %s shutdown" % (name, conn.afam))
|
||||
pushover("%s %s shutdown" % (name, conn.afam))
|
||||
conn.newstate(Connection.down, now)
|
||||
conn.newstate(hbdclass.Connection.down, now)
|
||||
|
||||
if interval > 0:
|
||||
host.interval = interval
|
||||
@@ -440,8 +454,11 @@ def readsock(sock):
|
||||
rmsg = 'ACK'
|
||||
else:
|
||||
opkt = dicttos('ACK', rmsg, host.cver > 1) # clients w/ ver 2+ can cope
|
||||
ss=sock.sendto(opkt, addr)
|
||||
if DEBUG > 2: print "sendto1: %s (%s) %s %s" % (addr, len(opkt), op, str(rmsg)[:50])
|
||||
try:
|
||||
ss=sock.sendto(opkt, addr)
|
||||
except:
|
||||
pass # XXX return pkg failes
|
||||
if DEBUG > 2: print(("sendto1: %s (%s) %s %s" % (addr, len(opkt), op, str(rmsg)[:50])))
|
||||
|
||||
# send any commands we have queued
|
||||
while len(host.cmds):
|
||||
@@ -466,11 +483,11 @@ def readsock(sock):
|
||||
try:
|
||||
ss=sock.sendto(opkt, addr)
|
||||
except Exception as e:
|
||||
print "opkt len is %s" % len(opkt)
|
||||
print "cannot send: %s" % e
|
||||
print(("opkt len is %s" % len(opkt)))
|
||||
print(("cannot send: %s" % e))
|
||||
|
||||
if verbose: print "sendto2: %s (%s) %s %s" % (addr, len(opkt), op, str(rmsg)[:50])
|
||||
if DEBUG > 2: print "msg from %s,%s, sent %s bytes back" % (addr[0], addr[1], ss)
|
||||
if verbose: print(("sendto2: %s (%s) %s %s" % (addr, len(opkt), op, str(rmsg)[:50])))
|
||||
if DEBUG > 2: print(("msg from %s,%s, sent %s bytes back" % (addr[0], addr[1], ss)))
|
||||
|
||||
|
||||
|
||||
@@ -488,19 +505,19 @@ def updatecode(ucode, uname):
|
||||
m.update(new_code)
|
||||
icsum = m.hexdigest()
|
||||
rmsg = {'csum': icsum, 'code': new_code.encode('base64','strict') }
|
||||
Host.hosts[uname].cmds.append(('UPD',rmsg))
|
||||
hbdclass.Host.hosts[uname].cmds.append(('UPD',rmsg))
|
||||
return fail
|
||||
|
||||
#
|
||||
# Web Server
|
||||
#
|
||||
class HttpServer(SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer):
|
||||
class HttpServer(socketserver.ThreadingMixIn, http.server.HTTPServer):
|
||||
allow_reuse_address = True
|
||||
def threaded():
|
||||
pass
|
||||
#
|
||||
#
|
||||
class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
class HttpHandler(http.server.BaseHTTPRequestHandler):
|
||||
|
||||
server_version = "HeartbeatHTTP/%s" % VER
|
||||
|
||||
@@ -509,8 +526,9 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
|
||||
|
||||
def handle(self):
|
||||
return http.server.BaseHTTPRequestHandler.handle(self)
|
||||
try:
|
||||
return BaseHTTPServer.BaseHTTPRequestHandler.handle(self)
|
||||
return http.server.BaseHTTPRequestHandler.handle(self)
|
||||
except Exception as e:
|
||||
self.log_error("Request went away: %r", e)
|
||||
self.close_connection = 1
|
||||
@@ -525,7 +543,7 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
self.send_response(code)
|
||||
self.send_header("Last-Modified", time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(now)))
|
||||
# self.send_header("Accept-Ranges","bytes")
|
||||
# self.send_header("Connection","close")
|
||||
# self.send_header("hbdclass.Connection","close")
|
||||
for h in headerdict:
|
||||
self.send_header(h, headerdict[h])
|
||||
self.end_headers()
|
||||
@@ -549,10 +567,8 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
def buildpage(self):
|
||||
res=self.buildhead(refresh=60, extras=tcss)
|
||||
res.append("<H2>Heartbeat status %s</h2>" % VER)
|
||||
|
||||
|
||||
res += ubHost.buildhosttable()
|
||||
res += ubHost.buildmsgtable(msgs)
|
||||
res += hbdclass.ubHost.buildhosttable()
|
||||
res += hbdclass.ubHost.buildmsgtable(msgs)
|
||||
res.append('<p> %s (%s)</p>' % (time.strftime("%H:%M:%S", time.localtime(now)), os.environ.get('TZ', 'CET-1CDT')))
|
||||
res.append("</body></html>")
|
||||
return res
|
||||
@@ -576,11 +592,11 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
global sig
|
||||
code = 200
|
||||
xsig = 0
|
||||
rqAcceptEncoding = self.headers.getheader('Accept-encoding',{})
|
||||
rqAcceptEncoding = self.headers.get('Accept-encoding',{})
|
||||
headerdict = {"Content-Type": "text/html; charset = ISO-8859-1" }
|
||||
if DEBUG > 2: sys.stderr.write("handle\n")
|
||||
qr = urlparse.urlparse(self.path)
|
||||
qa = urlparse.parse_qs(qr.query)
|
||||
qr = urllib.parse.urlparse(self.path)
|
||||
qa = urllib.parse.parse_qs(qr.query)
|
||||
|
||||
if DEBUG > 2: sys.stderr.write("handle = %s\n" % (qr.geturl()))
|
||||
if qr.path == "/":
|
||||
@@ -591,10 +607,10 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
ucmd=qa.get('c', [None])[0]
|
||||
if not ucmd or not uname:
|
||||
code, res=self.builderror(400, 'Argument error', "need h= and c= arguments")
|
||||
elif not Host.hosts.has_key(uname):
|
||||
elif uname not in hbdclass.Host.hosts:
|
||||
code, res=self.builderror(400, 'Data error', "h=%s not found" % uname)
|
||||
else:
|
||||
Host.hosts[uname].cmds.append(('CMD', {'cmd': urllib.unquote(ucmd)}))
|
||||
hbdclass.Host.hosts[uname].cmds.append(('CMD', {'cmd': urllib.parse.unquote(ucmd)}))
|
||||
res=self.buildhead()
|
||||
res.append("cmd %s queued for host %s" % (uname, ucmd))
|
||||
|
||||
@@ -602,11 +618,12 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
uname=qa.get('h',[None])[0]
|
||||
if not uname:
|
||||
code, res=self.builderror(400, 'Argument error', "need h= argument")
|
||||
if not uname in Host.hosts:
|
||||
if not uname in hbdclass.Host.hosts:
|
||||
code, res=self.builderror(400, 'Data error', "h=%s not found" % uname)
|
||||
else:
|
||||
log(uname, "dropped")
|
||||
del Host.hosts[uname]
|
||||
# for addr in hbdclass.Host.hosts[uname].0i
|
||||
del hbdclass.Host.hosts[uname]
|
||||
res=self.buildhead()
|
||||
res.append("Done")
|
||||
|
||||
@@ -614,30 +631,39 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
uname=qa.get('h',[None])[0]
|
||||
if not uname:
|
||||
code, res=self.builderror(400, 'Argument error', "need h= argument")
|
||||
if not uname in Host.hosts:
|
||||
if not uname in hbdclass.Host.hosts:
|
||||
code, res=self.builderror(400, 'Data error', "h=%s not found" % uname)
|
||||
else:
|
||||
ll = Host.hosts[uname].registerDns()
|
||||
ll = hbdclass.Host.hosts[uname].registerDns()
|
||||
res.append(ll)
|
||||
log(uname, ll)
|
||||
|
||||
elif qr.path == "/u": # update
|
||||
uname=qa.get('h',[None])[0]
|
||||
uname=urllib.parse.unquote(qa.get('h',[None])[0])
|
||||
ucode=qa.get('c', [None])[0]
|
||||
if not ucode or not uname:
|
||||
code, res=self.builderror(400, 'Argument error', "need h= and c= arguments")
|
||||
elif not Host.hosts.has_key(uname):
|
||||
elif uname != 'All' and uname not in hbdclass.Host.hosts:
|
||||
code, res=self.builderror(400, 'Data error', "h=%s not found" % uname)
|
||||
else:
|
||||
err = updatecode(ucode, urllib.unquote(uname))
|
||||
res=self.buildhead()
|
||||
res.append("3 Done: %s" % err if err else "OK")
|
||||
if uname != 'All':
|
||||
names = [uname]
|
||||
else:
|
||||
names = []
|
||||
for n in hbdclass.Host.hosts:
|
||||
if hbdclass.Host.hosts[n].cver >= 2: # earliest version that supports update
|
||||
names.append(n)
|
||||
for n in names:
|
||||
err = updatecode(ucode, n)
|
||||
res.append("update started for %s: %s<br>" % (n, err if err else "OK"))
|
||||
res.append("Done")
|
||||
|
||||
elif qr.path == "/api/0/hosts": # api access to host table
|
||||
headerdict = {"Content-Type": "application/json; charset=utf-8" }
|
||||
l=[]
|
||||
for h in Host.hosts:
|
||||
l.append(Host.hosts[h].jsons())
|
||||
for h in hbdclass.Host.hosts:
|
||||
l.append(hbdclass.Host.hosts[h].jsons())
|
||||
res=["["+",".join(l)+"]"]
|
||||
|
||||
elif qr.path == "/api/0/messages": # api access to host table
|
||||
@@ -657,9 +683,9 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
|
||||
if 'deflate' in rqAcceptEncoding:
|
||||
headerdict['Content-Encoding'] = "deflate"
|
||||
towrite = zlib.compress(string.join(res, "\n"), 6)
|
||||
towrite = zlib.compress("\n".join(res).encode(), 6)
|
||||
else:
|
||||
towrite = string.join(res, "\n")
|
||||
towrite = "\n".join(res)
|
||||
headerdict['Content-Length'] = len(towrite)
|
||||
headerdict['Cache-Control'] = 'private, must-revalidate, max-age=0'
|
||||
headerdict['Expires'] = 'Thu, 01 Jan 1970 00:00:00 GMT'
|
||||
@@ -710,9 +736,9 @@ def closeup():
|
||||
|
||||
|
||||
def restart():
|
||||
if verbose: print "execv %s %s" % (sys.argv[0], [sys.argv[0]]+cmdargs)
|
||||
if verbose: print(("execv %s %s" % (sys.argv[0], [sys.argv[0]]+cmdargs)))
|
||||
os.execv(sys.argv[0], [sys.argv[0]]+cmdargs)
|
||||
print "should not be here"
|
||||
print("should not be here")
|
||||
|
||||
def saveandrestart():
|
||||
closeup()
|
||||
@@ -720,9 +746,9 @@ def saveandrestart():
|
||||
|
||||
|
||||
def pickleit():
|
||||
pickf = open(pickfile, 'w')
|
||||
pick = cPickle.Pickler(pickf)
|
||||
pick.dump(Host.hosts)
|
||||
pickf = open(pickfile, 'wb')
|
||||
pick = pickle.Pickler(pickf)
|
||||
pick.dump(hbdclass.Host.hosts)
|
||||
pick.dump(msgs)
|
||||
pick.dump(lastfm)
|
||||
pickf.close()
|
||||
@@ -762,17 +788,17 @@ for o, a in optlist:
|
||||
cmdargs += [o]
|
||||
|
||||
if helpflag:
|
||||
print "hbc HeartBeatDaemon"
|
||||
print "usage: hbd [-dfhvx] [-c configfile]"
|
||||
print
|
||||
print " -c configfile"
|
||||
print " -d display"
|
||||
print " -f run in foreground"
|
||||
print " -h this help"
|
||||
print " -v verbose"
|
||||
print " -x increase debug lvl"
|
||||
print
|
||||
print """ config file can contain
|
||||
print("hbc HeartBeatDaemon")
|
||||
print("usage: hbd [-dfhvx] [-c configfile]")
|
||||
print()
|
||||
print(" -c configfile")
|
||||
print(" -d display")
|
||||
print(" -f run in foreground")
|
||||
print(" -h this help")
|
||||
print(" -v verbose")
|
||||
print(" -x increase debug lvl")
|
||||
print()
|
||||
print(""" config file can contain
|
||||
logfile = /var/log/heartbeat.log
|
||||
logfmt = [text|msg]
|
||||
hb_port = 50003
|
||||
@@ -780,7 +806,7 @@ interval = 20
|
||||
hbd_port = 50004
|
||||
hbd_host = www.domain.com
|
||||
grace = 2
|
||||
"""
|
||||
""")
|
||||
|
||||
sys.exit(1)
|
||||
|
||||
@@ -802,21 +828,28 @@ drophosts = []
|
||||
try:
|
||||
f = open(configfile, "r")
|
||||
if verbose:
|
||||
print "notice: using config file %s" % configfile
|
||||
print(("notice: using config file %s" % configfile))
|
||||
except:
|
||||
print "warning: running without config file: %s" % configfile
|
||||
print(("warning: running without config file: %s" % configfile))
|
||||
f = None
|
||||
|
||||
if f:
|
||||
while 1:
|
||||
l = f.readline()
|
||||
if len(l) == 0:
|
||||
ls = f.readline()
|
||||
if len(ls) == 0:
|
||||
break
|
||||
l = ls[:-1].strip()
|
||||
if len(l) == 0 or l[0] == "#":
|
||||
continue
|
||||
if verbose:
|
||||
print " %s" % l[:-1]
|
||||
r = l[:-1].split('=')
|
||||
print((" %s" % l))
|
||||
r = l.split('=')
|
||||
o = r[0].strip()
|
||||
a = eval(r[1].strip())
|
||||
try:
|
||||
a = eval(r[1].strip())
|
||||
except Exception as e:
|
||||
print(("error: %s" % str(r)))
|
||||
sys.exit(1)
|
||||
if o == 'interval':
|
||||
interval = a
|
||||
elif o == 'grace':
|
||||
@@ -842,38 +875,39 @@ if f:
|
||||
f.close()
|
||||
|
||||
if len(args) != 0:
|
||||
print "error: args"
|
||||
print("error: args")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if verbose:
|
||||
print "notice: logging to %s" % logfile
|
||||
print(("notice: logging to %s" % logfile))
|
||||
logf = initlog(logfile)
|
||||
|
||||
if 1 and os.path.exists(pickfile):
|
||||
if verbose: print "opening pickls %s" % pickfile
|
||||
pickf = open(pickfile, 'r')
|
||||
pick = cPickle.Unpickler(pickf)
|
||||
if verbose: print(("opening pickls %s" % pickfile))
|
||||
pickf = open(pickfile, 'rb')
|
||||
pick = pickle.Unpickler(pickf)
|
||||
try:
|
||||
Host.hosts = pick.load()
|
||||
hbdclass.Host.hosts = pick.load()
|
||||
msgs = pick.load()
|
||||
try:
|
||||
lastfm = pick.load()
|
||||
except:
|
||||
lastfm = ["","",""]
|
||||
pickf.close()
|
||||
except:
|
||||
except Exception as e:
|
||||
print(("load pickled failed: %s" % e))
|
||||
os.unlink(pickfile)
|
||||
Connection.htab = {}
|
||||
for h in Host.hosts.keys():
|
||||
Host.hosts[h].dyn = h in dyndnshosts
|
||||
Host.hosts[h].fixup()
|
||||
hbdclass.Connection.htab = {}
|
||||
for h in list(hbdclass.Host.hosts.keys()):
|
||||
hbdclass.Host.hosts[h].dyn = h in dyndnshosts
|
||||
hbdclass.Host.hosts[h].fixup()
|
||||
for h in drophosts:
|
||||
if h in Host.hosts:
|
||||
del Host.hosts[h]
|
||||
if verbose: print "%s pickled hosts loaded" % len(Host.hosts)
|
||||
if h in hbdclass.Host.hosts:
|
||||
del hbdclass.Host.hosts[h]
|
||||
if verbose: print(("%s pickled hosts loaded" % len(hbdclass.Host.hosts)))
|
||||
else:
|
||||
if verbose: print "no pickled data"
|
||||
if verbose: print("no pickled data")
|
||||
|
||||
|
||||
now = time.time()
|
||||
@@ -901,7 +935,7 @@ if not forground:
|
||||
pid = os.fork()
|
||||
if pid > 0:
|
||||
if verbose:
|
||||
print "daemoinizing... pid = %d" % pid
|
||||
print(("daemoinizing... pid = %d" % pid))
|
||||
sys.exit(0)
|
||||
|
||||
verbose = False
|
||||
@@ -922,14 +956,14 @@ if not forground:
|
||||
try:
|
||||
serv = HttpServer((hbd_host, hbd_port), HttpHandler)
|
||||
except:
|
||||
print "failed to start server on %s:%s" % (hbd_host, hbd_port)
|
||||
print(("failed to start server on %s:%s" % (hbd_host, hbd_port)))
|
||||
sys.exit(1)
|
||||
|
||||
servthread = threading.Thread(target=serv.serve_forever)
|
||||
servthread.daemon = True
|
||||
servthread.start()
|
||||
|
||||
Host.dnsQ = Queue.Queue()
|
||||
hbdclass.Host.dnsQ = queue.Queue()
|
||||
dnsT = threading.Thread(target=dnsupdatethread)
|
||||
dnsT.daemon = True
|
||||
dnsT.start()
|
||||
@@ -939,13 +973,13 @@ sig = 0
|
||||
signal.signal(signal.SIGTERM, handler)
|
||||
signal.signal(signal.SIGHUP, handler)
|
||||
|
||||
next = int(now)+15 # 15 seconds time to settle after (re-)start
|
||||
rnext = int(now)+15 # 15 seconds time to settle after (re-)start
|
||||
sleep = 1
|
||||
firstcheck = int(now) + 15
|
||||
|
||||
while running:
|
||||
sr = None
|
||||
if DEBUG > 2: sys.stderr.write("about to sleep = %s\n" % (sleep))
|
||||
if DEBUG > 3: sys.stderr.write("about to sleep = %s\n" % (sleep))
|
||||
try:
|
||||
sr = select.select(ilist, [], [], sleep)
|
||||
now = time.time()
|
||||
@@ -954,7 +988,7 @@ while running:
|
||||
running = False
|
||||
closeup()
|
||||
continue
|
||||
except select.error, value:
|
||||
except select.error as value:
|
||||
if value[0] != 4: # interrupted system call
|
||||
sys.stderr.write("select err %s %s" % (select.error, value))
|
||||
#raise os.error, value
|
||||
@@ -963,7 +997,7 @@ while running:
|
||||
except Exception as e:
|
||||
if DEBUG > 2: sys.stderr.write("select exception %s\n" % (str(e)))
|
||||
sys.exit(1)
|
||||
if DEBUG > 2: sys.stderr.write("woke from sleep = %s (%s)\n" % (str(sr), str(ilist)))
|
||||
if DEBUG > 3: sys.stderr.write("woke from sleep = %s (%s)\n" % (str(sr), str(ilist)))
|
||||
for fh in sr[0]:
|
||||
if fh in [sock, sock6]:
|
||||
readsock(fh)
|
||||
@@ -971,26 +1005,26 @@ while running:
|
||||
# serv.handle_request()
|
||||
else:
|
||||
sys.stderr.write("what happend just now?\n")
|
||||
if DEBUG > 2: sys.stderr.write("done handling, running is %s, sig is %s\n" % (running, sig))
|
||||
if DEBUG > 3: sys.stderr.write("done handling, running is %s, sig is %s\n" % (running, sig))
|
||||
|
||||
# check hour/day/week
|
||||
for v in xrange(3):
|
||||
for v in range(3):
|
||||
fm=tsfm[v]
|
||||
ts=time.strftime(tsfm[v], time.localtime(now))
|
||||
if ts != lastfm[v]:
|
||||
lastfm[v]=ts
|
||||
for h in Host.hosts.keys():
|
||||
Host.hosts[h].hdwcounts[v] = [Host.hosts[h].doesack, Host.hosts[h].upcount]
|
||||
for h in list(hbdclass.Host.hosts.keys()):
|
||||
hbdclass.Host.hosts[h].hdwcounts[v] = [hbdclass.Host.hosts[h].doesack, hbdclass.Host.hosts[h].upcount]
|
||||
|
||||
if now >= next and now >= firstcheck:
|
||||
next = now+1
|
||||
if now >= rnext and now >= firstcheck:
|
||||
rnext = now+1
|
||||
checkoverdue()
|
||||
|
||||
sleep = next-now
|
||||
sleep = rnext-now
|
||||
if sleep < 0:
|
||||
sys.stderr.write("sleep is negative! %s next = %s\n" % (sleep, next))
|
||||
sys.stderr.write("sleep is negative! %s next = %s\n" % (sleep, rnext))
|
||||
sleep = 0
|
||||
if DEBUG > 2: sys.stderr.write("sleep = %s next = %s\n" % (sleep, next))
|
||||
if DEBUG > 3: sys.stderr.write("sleep = %s next = %s\n" % (sleep, rnext))
|
||||
|
||||
if sig != 0:
|
||||
setrunning(False)
|
||||
|
||||
+37
-9
@@ -13,7 +13,13 @@ num = 0
|
||||
|
||||
MAXRTTS = 10
|
||||
|
||||
#
|
||||
DEBUG=2
|
||||
|
||||
def log(host, m):
|
||||
if DEBUG:
|
||||
print("class log: %s %s" % (host, m))
|
||||
|
||||
|
||||
class Connection:
|
||||
# map of addrs to names
|
||||
|
||||
@@ -38,6 +44,7 @@ class Connection:
|
||||
r = "new addr %s" % (addr)
|
||||
Connection.htab[addr] = self.host.name
|
||||
if self.host.isDynDns():
|
||||
log(self.host.name, "dns update %s" % self.addr)
|
||||
Host.dnsQ.put((self.host.name, self.addr))
|
||||
|
||||
def registerDns(self):
|
||||
@@ -51,6 +58,8 @@ class Connection:
|
||||
d['addr'] = self.addr
|
||||
if self.rtts[-1]:
|
||||
d['rtt'] = "%0.1f" % self.rtts[-1]
|
||||
elif self.state == Connection.unknown:
|
||||
d['rtt'] = ""
|
||||
else:
|
||||
d['rtt'] = "?"
|
||||
d['lastbeat'] = self.lastbeat
|
||||
@@ -60,18 +69,29 @@ class Connection:
|
||||
d['state'] = self.state
|
||||
if self.state == Connection.up:
|
||||
d['rttstate'] = d['rtt']
|
||||
elif self.state == Connection.overdue:
|
||||
d['rttstate'] = ''
|
||||
else:
|
||||
d['rttstate'] = d['state']
|
||||
d['statetime'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(self.statetime))
|
||||
delta = now - self.statetime
|
||||
if delta > 86400:
|
||||
d['deltastatetime'] = time.strftime("%d %H:%M:%S", time.gmtime(delta))
|
||||
|
||||
if self.state == Connection.unknown:
|
||||
d['deltastatetime'] = ''
|
||||
elif delta > 86400:
|
||||
# d['deltastatetime'] = time.strftime("%d %H:%M:%S", time.gmtime(delta))
|
||||
d['deltastatetime'] = "%0.1f days" % (delta / 86400.)
|
||||
elif delta > 3600:
|
||||
d['deltastatetime'] = time.strftime("%H:%M:%S", time.gmtime(delta))
|
||||
# d['deltastatetime'] = time.strftime("%H:%M:%S", time.gmtime(delta))
|
||||
d['deltastatetime'] = time.strftime("%k:%M hrs", time.gmtime(delta))
|
||||
# d['deltastatetime'] = "%0.1f hrs" % (delta / 3600.)
|
||||
elif delta > 60:
|
||||
d['deltastatetime'] = time.strftime("%M:%S", time.gmtime(delta))
|
||||
# d['deltastatetime'] = time.strftime("%M:%S", time.gmtime(delta))
|
||||
d['deltastatetime'] = time.strftime("%M:%S mins", time.gmtime(delta))
|
||||
# d['deltastatetime'] = "%0.1f mins" % (delta / 60.)
|
||||
else:
|
||||
d['deltastatetime'] = time.strftime("%S", time.gmtime(delta))
|
||||
# d['deltastatetime'] = time.strftime("%S", time.gmtime(delta))
|
||||
d['deltastatetime'] = "%i secs" % (delta)
|
||||
|
||||
else:
|
||||
d['addr'] = ''
|
||||
@@ -159,6 +179,8 @@ class Host:
|
||||
def statedict(self):
|
||||
d = {}
|
||||
d['name'] = self.name
|
||||
if self.dyn:
|
||||
d['name'] += "*"
|
||||
d['dyn'] = str(self.dyn)
|
||||
d['ver'] = str(self.cver)
|
||||
d['num'] = self.num
|
||||
@@ -246,11 +268,13 @@ class Host:
|
||||
state = "<b>%s</b>" % self.state
|
||||
elif self.state in ["up", "UP"]:
|
||||
state = ""
|
||||
for x in self.connections.keys():
|
||||
for x in list(self.connections.keys()):
|
||||
try:
|
||||
state += " %5.1f" % (self.connections[x].rtts[-1])
|
||||
except:
|
||||
state += " %5s" % (self.connections[x].rtts[-1])
|
||||
elif self.state in ["unknown", "UNKNOWN"]:
|
||||
state = ""
|
||||
else:
|
||||
state = "%s" % self.state
|
||||
return state
|
||||
@@ -261,7 +285,7 @@ class Host:
|
||||
if self.upcount > 0:
|
||||
# return "(%0.1f%%) %s %s %s " % ((self.doesack * 100.0) / self.upcount, self.doesack, self.upcount, self.hdwcounts)
|
||||
r = ""
|
||||
for v in xrange(3):
|
||||
for v in range(3):
|
||||
a,u = self.hdwcounts[v]
|
||||
if (self.upcount - u) != 0:
|
||||
vs = "%0.0f" % (100.0 - (((self.doesack - a) * 100.0) / (self.upcount - u)))
|
||||
@@ -311,15 +335,19 @@ class Host:
|
||||
|
||||
|
||||
def buildhosttable(self, short=False):
|
||||
if DEBUG > 1:
|
||||
print("DBG buildhosttable: start")
|
||||
res = []
|
||||
res.append('<table id="ntable" class="sortable">')
|
||||
res.append(ubHost.htmltable('th', ubHost.headerdict(), short))
|
||||
hosts_sorted = Host.hosts.keys()
|
||||
hosts_sorted = list(Host.hosts.keys())
|
||||
if len(hosts_sorted):
|
||||
hosts_sorted.sort()
|
||||
for h in hosts_sorted:
|
||||
res.append(ubHost.htmltable('td', Host.hosts[h].statedict(), short))
|
||||
res.append("</table>")
|
||||
if DEBUG > 1:
|
||||
print("DBG buildhosttable: %s" % res)
|
||||
return res
|
||||
|
||||
|
||||
|
||||
+5
-5
@@ -1,12 +1,12 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import sys
|
||||
import httplib, urllib
|
||||
import http.client, urllib.request, urllib.parse, urllib.error
|
||||
|
||||
def pushover(msg):
|
||||
conn = httplib.HTTPSConnection("api.pushover.net:443")
|
||||
conn = http.client.HTTPSConnection("api.pushover.net:443")
|
||||
conn.request("POST", "/1/messages.json",
|
||||
urllib.urlencode({
|
||||
urllib.parse.urlencode({
|
||||
"token": "ac7NLX2rPjXFareeDgLpXNoDf4iFmf",
|
||||
"user": "uDhH33UjQQDYtNzJb1ThRiWb9ingGK",
|
||||
"message": msg,
|
||||
@@ -18,6 +18,6 @@ def pushover(msg):
|
||||
|
||||
v=" ".join(sys.argv[1:])
|
||||
if pushover(v):
|
||||
print "delivered"
|
||||
print("delivered")
|
||||
else:
|
||||
print "NOT delivered"
|
||||
print("NOT delivered")
|
||||
|
||||
+6
-6
@@ -1,13 +1,13 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import sys
|
||||
import httplib, urllib
|
||||
import http.client, urllib.request, urllib.parse, urllib.error
|
||||
import getopt
|
||||
|
||||
def pushover(msg, title=""):
|
||||
conn = httplib.HTTPSConnection("api.pushover.net:443")
|
||||
conn = http.client.HTTPSConnection("api.pushover.net:443")
|
||||
conn.request("POST", "/1/messages.json",
|
||||
urllib.urlencode({
|
||||
urllib.parse.urlencode({
|
||||
"token": "aNY2xeYydxzabzihTjb3P2LMHhqhr2",
|
||||
"user": "uDhH33UjQQDYtNzJb1ThRiWb9ingGK",
|
||||
"message": msg,
|
||||
@@ -29,7 +29,7 @@ title="Nagios"
|
||||
optslist, args = [], []
|
||||
try:
|
||||
optslist, args = getopt.getopt(sys.argv[1:], 'ht:v')
|
||||
except getopt.error, cause:
|
||||
except getopt.error as cause:
|
||||
helpflag=True
|
||||
|
||||
lastyear=0
|
||||
@@ -44,6 +44,6 @@ v=" ".join(args)
|
||||
rc=pushover(v, title)
|
||||
if verbose:
|
||||
if rc:
|
||||
print "delivered"
|
||||
print("delivered")
|
||||
else:
|
||||
print "NOT delivered"
|
||||
print("NOT delivered")
|
||||
|
||||
Reference in New Issue
Block a user