1129 lines
24 KiB
Python
Executable File
1129 lines
24 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# $Id: hbd,v 1.38 2013/07/14 02:25:05 andreas Exp $
|
|
# Wait for heartbeat messages and act on them (or their absence)
|
|
#
|
|
VER = 3.00
|
|
|
|
import time
|
|
import os
|
|
import string
|
|
import sys
|
|
import socket
|
|
import atexit
|
|
import select
|
|
import SocketServer
|
|
import BaseHTTPServer
|
|
import getopt
|
|
import signal
|
|
import cPickle
|
|
import smtplib
|
|
import traceback
|
|
import urllib
|
|
import httplib
|
|
import threading
|
|
import Queue
|
|
import md5
|
|
import zlib
|
|
|
|
from subprocess import Popen, STDOUT, PIPE
|
|
|
|
|
|
SEND_EMAIL=False
|
|
SEND_PUSHOVER=True
|
|
|
|
DEBUG = 0
|
|
|
|
MAXRECV = 32767
|
|
LOGFILE = "/home/andreas/public_html/messages/andreas"
|
|
PICKFILE = "/var/tmp/hbd.pick"
|
|
AEMAIL = ["andreas@wrede.ca"]
|
|
NAME = "heatbeat"
|
|
SMTPSERVER = "localhost"
|
|
|
|
# Table of Hosts
|
|
hosts = {}
|
|
# map of addrs to names
|
|
htab = {}
|
|
|
|
msgs = []
|
|
|
|
num = 0
|
|
#AEW upcount = 0
|
|
PORT = 50003
|
|
TPORT = 50004
|
|
THOST = ""
|
|
|
|
verbose = False
|
|
|
|
INTERVAL = 10
|
|
GRACE = 2
|
|
|
|
os.environ['TZ'] = 'EST5EDT'
|
|
|
|
tsfm=["%H","%d","%U"]
|
|
lastfm=["","",""]
|
|
|
|
def handler(signum, frame):
|
|
global running, sig
|
|
sig = signum
|
|
if not running:
|
|
if verbose:
|
|
sys.stderr.write("NOT runing signal: %s running: %d" % (sig, running))
|
|
sys.exit(2)
|
|
if verbose:
|
|
sys.stderr.write("signal: %s running: %s frame: %s" % (sig, running, frame))
|
|
|
|
|
|
def shortname(name):
|
|
r = string.split(name, '.')
|
|
return r[0]
|
|
|
|
|
|
def isIPv4(addr):
|
|
return addr.find('.') > 0
|
|
|
|
class NullDevice:
|
|
def write(self, s):
|
|
pass
|
|
|
|
class LogDevice:
|
|
def __init__(self):
|
|
self.fh = open("/tmp/log1","a")
|
|
|
|
def write(self, s):
|
|
self.fh.write(s)
|
|
self.fh.flush()
|
|
|
|
|
|
def dicttos(ID, d, compress=False):
|
|
s = []
|
|
for k in d:
|
|
if type(d[k]) == type(1.2):
|
|
s.append("%s=%0.5f" % (k, d[k]))
|
|
else:
|
|
s.append("%s=%s" % (k, d[k]))
|
|
pk = ";".join(s)
|
|
if compress:
|
|
zpk = zlib.compress(pk, 6)
|
|
ID = "!"+ID
|
|
else:
|
|
zpk = pk
|
|
return ID + ":" + zpk
|
|
|
|
|
|
def stodict(msg):
|
|
d = {}
|
|
r0 = msg.split(':',1)
|
|
if len(r0) == 1:
|
|
return None
|
|
if r0[0][0] == '!': # compressed
|
|
pk = zlib.uncompress(r0[1])
|
|
d['ID'] = r0[0][1:]
|
|
else:
|
|
pk = r0[1]
|
|
d['ID'] = r0[0]
|
|
r = pk.split(';')
|
|
for v in r:
|
|
vr = v.split('=', 1)
|
|
k = vr[0].strip()
|
|
if len(vr) == 1:
|
|
d[k] = None
|
|
else:
|
|
v = vr[1].strip()
|
|
if v[0].isdigit():
|
|
v = eval(v)
|
|
d[k] = v
|
|
return d
|
|
|
|
|
|
def oldmtodict(msg):
|
|
return stodict('HTB:'+msg)
|
|
|
|
class Host:
|
|
up = "up"
|
|
down = "down"
|
|
overdue = "overdue"
|
|
|
|
|
|
def __init__(self, name):
|
|
global num
|
|
self.name = name
|
|
self.addr4 = None
|
|
self.addr6 = None
|
|
self.num = num
|
|
self.lastbeat = time.time()
|
|
self.upcount = 0
|
|
self.state = Host.up
|
|
self.state = "up"
|
|
self.statetime = self.lastbeat
|
|
self.interval = 0
|
|
self.doesack = -1
|
|
self.cmds = []
|
|
self.cver = 0
|
|
self.latency = 0
|
|
self.hdwcounts = [[0,0],[0,0],[0,0]]
|
|
num += 1
|
|
hosts[name] = self
|
|
|
|
|
|
def setcver(self, cver):
|
|
self.cver = cver
|
|
|
|
|
|
def isDynDns(self):
|
|
return self.name in dyndnshosts
|
|
|
|
|
|
def newaddr(self, addr):
|
|
if isIPv4(addr):
|
|
if self.addr4:
|
|
if self.addr4 == addr:
|
|
r = None
|
|
else:
|
|
r = "changed from %s to %s" % (self.addr4, addr)
|
|
del htab[self.addr4]
|
|
self.addr4 = addr
|
|
htab[addr] = self.name
|
|
if self.isDynDns():
|
|
dnsQ.put((self.name, self.addr4))
|
|
else:
|
|
r = "new addr %s" % (addr)
|
|
self.addr4 = addr
|
|
htab[addr] = self.name
|
|
if self.isDynDns():
|
|
dnsQ.put((self.name, self.addr4))
|
|
else:
|
|
if self.addr6:
|
|
if self.addr6 == addr:
|
|
r = None
|
|
else:
|
|
r = "changed from %s to %s" % (self.addr6, addr)
|
|
del htab[self.addr6]
|
|
self.addr6 = addr
|
|
htab[addr] = self.name
|
|
if self.isDynDns():
|
|
dnsQ.put((self.name, self.addr6))
|
|
else:
|
|
r = "new addr %s" % (addr)
|
|
self.addr6 = addr
|
|
htab[addr] = self.name
|
|
if self.isDynDns():
|
|
dnsQ.put((self.name, self.addr6))
|
|
return r
|
|
|
|
|
|
# called when reloading class from pickle, add new fields here
|
|
def fixup(self):
|
|
try:
|
|
a=self.cmds
|
|
except:
|
|
self.cmds=[]
|
|
|
|
try:
|
|
a=self.hdwcounts
|
|
except:
|
|
self.hdwcounts = [[self.doesack,self.upcount],[self.doesack,self.upcount],[self.doesack,self.upcount]]
|
|
|
|
try:
|
|
self.addr=self.addr4
|
|
except:
|
|
pass
|
|
try:
|
|
self.addr=self.addr6
|
|
except:
|
|
pass
|
|
|
|
|
|
try:
|
|
a=self.addr4
|
|
a=self.addr6
|
|
except:
|
|
print "fix %s: addr to addr4/6 fixup" % self.name
|
|
if isIPv4(self.addr):
|
|
self.addr4 = self.addr
|
|
self.addr6 = None
|
|
else:
|
|
self.addr4 = None
|
|
self.addr6 = self.addr
|
|
del self.addr
|
|
|
|
try:
|
|
a=self.latency
|
|
except:
|
|
self.latency = 0
|
|
|
|
|
|
|
|
def getstate(self):
|
|
return self.state
|
|
|
|
|
|
def dispstate(self):
|
|
if self.state in ["down", "overdue"]:
|
|
state = "<b>%s</b>" % self.state
|
|
elif self.state in ["up", "UP"]:
|
|
state = "%0.1f" % self.latency
|
|
else:
|
|
state = "%s" % self.state
|
|
return state
|
|
|
|
|
|
def dispstats(self):
|
|
if self.doesack != -1:
|
|
if self.upcount > 0:
|
|
# return "(%0.1f%%) %s %s %s " % ((self.doesack * 100.0) / self.upcount, self.doesack, self.upcount, self.hdwcounts)
|
|
r = ""
|
|
for v in xrange(3):
|
|
a,u = self.hdwcounts[v]
|
|
if (self.upcount - u) != 0:
|
|
vs = "%0.0f" % (100.0 - (((self.doesack - a) * 100.0) / (self.upcount - u)))
|
|
if vs == "0":
|
|
vs=""
|
|
else:
|
|
vs="-"
|
|
r+= '<td align="right">%s</td>' % vs
|
|
return r
|
|
else:
|
|
return "<td>(%s)</td><td></td><td></td>" % (self.doesack)
|
|
return '<td align="right">N/A</td><td></td<td></td>>'
|
|
|
|
|
|
# set new state, return number of secs in previous state
|
|
def newstate(self, state, when=0):
|
|
self.state = state
|
|
now = time.time()-when
|
|
s = now-self.statetime
|
|
self.statetime = now
|
|
return s
|
|
|
|
|
|
|
|
def email(s, msg):
|
|
if not SEND_EMAIL:
|
|
return
|
|
ret = "OK"
|
|
toaddrs = AEMAIL
|
|
fromemail = "aew.heartbeat@wrede.ca"
|
|
subj = "Info from %s: %s" % (NAME, s)
|
|
date = time.strftime("%a, %d %b %Y %H:%M:%S %z", time.localtime())
|
|
body = "To: %s\nFrom: %s\nSubject: %s\nDate: %s\n\n%s" % (toaddrs[0], fromemail, subj, date, msg)
|
|
try:
|
|
server = smtplib.SMTP(SMTPSERVER)
|
|
if DEBUG > 0: server.set_debuglevel(1)
|
|
server.sendmail(fromemail, toaddrs, body)
|
|
except smtplib.SMTPRecipientsRefused, errs:
|
|
log(None, "cannot send email: %s\n" % (errs))
|
|
ret = "Fail"
|
|
except:
|
|
print("smtp error: "+traceback.format_exc())
|
|
saveandrestart()
|
|
try:
|
|
server.quit()
|
|
except:
|
|
pass
|
|
return ret
|
|
|
|
|
|
def pushover(msg):
|
|
if not SEND_PUSHOVER:
|
|
return
|
|
conn = httplib.HTTPSConnection("api.pushover.net:443")
|
|
try:
|
|
conn.request("POST", "/1/messages.json",
|
|
urllib.urlencode({
|
|
"token": "ac7NLX2rPjXFareeDgLpXNoDf4iFmf",
|
|
"user": "uDhH33UjQQDYtNzJb1ThRiWb9ingGK",
|
|
"message": msg, }), { "Content-type": "application/x-www-form-urlencoded" })
|
|
conn.getresponse()
|
|
except:
|
|
pass
|
|
|
|
|
|
# nsupdate: set the DNS A record for a fqdn
|
|
# return: None if ok, else error text
|
|
def nsupdate(hostname, newip):
|
|
D = {}
|
|
D['domain'] = 'dy.wapanafa.org'
|
|
D['fqdn'] = '%s.dy.wapanafa.org' % hostname
|
|
D['dnsttl'] = '5'
|
|
D['newip'] = newip
|
|
D['ts'] = time.strftime('%Y-%m-%d.%H:%M:%S', time.gmtime())
|
|
if newip.find(":") > 0:
|
|
nsup = """update delete %(fqdn)s AAAA
|
|
update add %(fqdn)s %(dnsttl)s AAAA %(newip)s
|
|
update delete %(fqdn)s TXT
|
|
update add %(fqdn)s %(dnsttl)s TXT "Created: %(ts)s"
|
|
send
|
|
answer
|
|
|
|
""" % D
|
|
else:
|
|
nsup = """update delete %(fqdn)s A
|
|
update add %(fqdn)s %(dnsttl)s A %(newip)s
|
|
update delete %(fqdn)s TXT
|
|
update add %(fqdn)s %(dnsttl)s TXT "Created: %(ts)s"
|
|
send
|
|
answer
|
|
|
|
""" % D
|
|
|
|
if DEBUG > 0: log(None, "DBG: nsup %s" % nsup)
|
|
cmd = ["/usr/local/bin/nsupdate", "-k", "/etc/dhcpc/K%(domain)s.+157+00000." % D, "-v"]
|
|
if DEBUG > 0: log(None, "DBG: cmd %s" % cmd)
|
|
try:
|
|
p = Popen(cmd, shell=False, bufsize=1, stdin=PIPE, stdout=PIPE, stderr=STDOUT)
|
|
except OSError, e:
|
|
return "nsupdate: execution failed: %s" % e
|
|
except:
|
|
return "nsupdate: some error occured"
|
|
|
|
(output, err) = p.communicate(nsup)
|
|
if output.find('status: NOERROR') >= 0:
|
|
return None
|
|
return output
|
|
|
|
|
|
#
|
|
def dur(sec):
|
|
sec = int(sec)
|
|
h = sec / 3600
|
|
m = (sec - h * 3600) / 60
|
|
s = (sec - h * 3600) % 60
|
|
if h > 0:
|
|
return "%d:%02d:%02d" % (h, m, s)
|
|
if m > 0:
|
|
return "%d:%02d" % (m, s)
|
|
return "0:%02d" % s
|
|
|
|
|
|
def fixsort():
|
|
s = hosts.keys()
|
|
s.sort()
|
|
x = 0
|
|
for n in s:
|
|
hosts[n].num = x
|
|
x += 1
|
|
|
|
#
|
|
def on_exit():
|
|
if DEBUG > 0: sys.stderr.write("on_exit\n")
|
|
try:
|
|
logf.close()
|
|
except:
|
|
pass
|
|
print "exit"
|
|
|
|
|
|
def initlog(logfile):
|
|
try:
|
|
return open(logfile, "a+")
|
|
except:
|
|
pass
|
|
return open(logfile, "w")
|
|
|
|
|
|
#
|
|
#
|
|
def checkoverdue():
|
|
|
|
for h in hosts.keys():
|
|
if hosts[h].state == Host.down:
|
|
continue
|
|
timeout = hosts[h].interval+grace
|
|
if hosts[h].state == Host.up and now-hosts[h].lastbeat > timeout:
|
|
if h in watchhosts:
|
|
email("overdue", "%s is overdue" % h)
|
|
pushover("%s is overdue" % h)
|
|
hosts[h].newstate(Host.overdue, grace)
|
|
log(h, "overdue")
|
|
|
|
|
|
def log(host, m, service=None):
|
|
if DEBUG > 0: print "Log: %s" % m
|
|
ts = time.strftime("%b %d %H:%M:%S", time.localtime(time.time()))
|
|
if service:
|
|
srv = "service %s: " % service
|
|
else:
|
|
srv = ""
|
|
if host:
|
|
hst = "%s " % host
|
|
else:
|
|
hst = ""
|
|
|
|
msg = "%s: %s%s%s\n" % (ts, hst, srv, m)
|
|
msgs.append(msg)
|
|
if logfmt == "msg":
|
|
m2 = "%d|%s|%s\n" % (now, service, m)
|
|
else:
|
|
m2 = msg
|
|
logf.write(m2)
|
|
logf.flush()
|
|
pickleit()
|
|
|
|
|
|
def dnsupdatethread():
|
|
while True:
|
|
name, addr = dnsQ.get()
|
|
m = "%s changed address to %s" % (name, addr)
|
|
err = nsupdate(name, addr)
|
|
if err:
|
|
m += ", DNS failed: %s" % err
|
|
email("error: nsupdate failed", m)
|
|
else:
|
|
m += ", DNS updated."
|
|
dnsQ.task_done()
|
|
log(m)
|
|
|
|
#
|
|
#
|
|
#
|
|
#
|
|
def readsock(sock):
|
|
if DEBUG > 3: sys.stderr.write("readsock recfrom start")
|
|
data, addr = sock.recvfrom(MAXRECV)
|
|
# now = time.time()
|
|
if DEBUG > 2: sys.stderr.write("readsock = %s, %s\n" % (data,addr))
|
|
msg = stodict(data)
|
|
if not msg: # Old hbc client
|
|
if verbose: print "old hbc:", data
|
|
oldclient = True
|
|
msg = oldmtodict(data)
|
|
else:
|
|
oldclient = False
|
|
if verbose: print "readsock = %s, %s" % (msg,addr)
|
|
name = shortname(msg.get('name', "unknown"))
|
|
if not name in hosts: # was: hosts.has_key(name):
|
|
host = Host(name)
|
|
newh=True
|
|
else:
|
|
host = hosts[name]
|
|
newh=False
|
|
|
|
host.doesack = msg.get('acks', -1)
|
|
host.setcver(msg.get('ver', 0))
|
|
host.lastbeat = now
|
|
|
|
interval = msg.get('interval', 0)
|
|
shutdown = msg.get('shutdown', 0)
|
|
service = msg.get('service', "unknown")
|
|
message = msg.get('msg', None)
|
|
boot = msg.get('boot', 0)
|
|
host.latency = now - msg.get('time', 0)
|
|
|
|
if boot:
|
|
log(name, "booted")
|
|
if name in watchhosts:
|
|
email("booted", m)
|
|
pushover(m)
|
|
if message:
|
|
log(name, "msg: %s" % message, service=service)
|
|
if name in watchhosts:
|
|
email("msg", m)
|
|
pushover(m)
|
|
|
|
res = host.newaddr(addr[0])
|
|
if res:
|
|
log(name, res)
|
|
if name in watchhosts:
|
|
email("address change", "%s %s" % (host.name, res))
|
|
pushover(m)
|
|
|
|
if host.getstate() != Host.up and interval > 0:
|
|
lasts = host.state
|
|
d = host.newstate(Host.up)
|
|
m = "back after being %s for %s" % (lasts, dur(d))
|
|
log(name, m)
|
|
if name in watchhosts:
|
|
email("back", name)
|
|
pushover("%s is back" % name)
|
|
if boot or newh:
|
|
host.upcount = host.doesack
|
|
else:
|
|
host.upcount += 1
|
|
|
|
|
|
if shutdown:
|
|
log(name, "shutdown")
|
|
if name in watchhosts:
|
|
email("shutdown", "%s shutdown" % name)
|
|
pushover(m)
|
|
try:
|
|
host.newstate(Host.down)
|
|
except:
|
|
pass
|
|
if interval > 0:
|
|
try:
|
|
host.interval = interval
|
|
except:
|
|
pass
|
|
|
|
rmsg = {'time': time.time()}
|
|
op = 'ACK'
|
|
if host.cver < 1:
|
|
opkt = 'ACK'
|
|
rmsg = 'ACK'
|
|
else:
|
|
opkt = dicttos('ACK', rmsg)
|
|
ss=sock.sendto(opkt, addr)
|
|
if verbose: print "sendto1: %s (%s) %s %s" % (addr, len(opkt), op, str(rmsg)[:50])
|
|
|
|
# send any commands we have queued
|
|
while len(host.cmds):
|
|
op, rmsg = host.cmds[0]
|
|
if op == 'CMD':
|
|
email("%s cmd exec" % name, "command '%s' sent" % rmsg)
|
|
del host.cmds[0]
|
|
log(name, "command sent")
|
|
if host.cver < 1:
|
|
rmsg = rmsg['cmd']
|
|
elif op == 'UPD':
|
|
del host.cmds[0]
|
|
log(name, "update initiated")
|
|
if host.cver < 1:
|
|
log(name," ver 0 does not support UPD")
|
|
continue
|
|
if host.cver < 1:
|
|
opkt = rmsg
|
|
op = ""
|
|
else:
|
|
opkt = dicttos(op, rmsg, True)
|
|
try:
|
|
ss=sock.sendto(opkt, addr)
|
|
except Exception as e:
|
|
print "opkt len is %s" % len(opkt)
|
|
print "cannot send: %s" % e
|
|
|
|
if verbose: print "sendto2: %s (%s) %s %s" % (addr, len(opkt), op, str(rmsg)[:50])
|
|
if DEBUG > 2: print "msg from %s,%s, sent %s bytes back" % (addr[0], addr[1], ss)
|
|
|
|
|
|
|
|
def updatecode(ucode, uname):
|
|
|
|
fail = None
|
|
try:
|
|
fh = open(ucode, "r")
|
|
new_code = fh.read()
|
|
fh.close()
|
|
except Exception as e:
|
|
fail = "cannot read new code: %s" % e
|
|
if not fail:
|
|
m = md5.new()
|
|
m.update(new_code)
|
|
icsum = m.hexdigest()
|
|
rmsg = {'csum': icsum, 'code': new_code.encode('base64','strict') }
|
|
hosts[uname].cmds.append(('UPD',rmsg))
|
|
return fail
|
|
#
|
|
#
|
|
|
|
#class HttpServer(BaseHTTPServer.HTTPServer):
|
|
class HttpServer(SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer):
|
|
allow_reuse_address = True
|
|
def threaded():
|
|
pass
|
|
#
|
|
#
|
|
class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
|
|
|
|
|
def do_HEAD(self):
|
|
self.send_response(200)
|
|
self.send_header("Last-Modified", time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(now)))
|
|
# self.send_header("Accept-Ranges","bytes")
|
|
# self.send_header("Connection","close")
|
|
self.send_header("Content-Type","text/html; charset = ISO-8859-1")
|
|
self.end_headers()
|
|
|
|
|
|
def buildhead(self, title="Heartbeat", refresh=None):
|
|
res=[]
|
|
res.append('<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">')
|
|
res.append("<html>")
|
|
res.append("<head>")
|
|
res.append('<title>%s</title>' % (title))
|
|
if refresh:
|
|
res.append("<meta http-equiv = Refresh content = %d>\n" % refresh)
|
|
res.append("</head>")
|
|
res.append('<body BGCOLOR = "#FFFFFF" LINK = "#008000" VLINK = "#008000">')
|
|
return res
|
|
|
|
|
|
def buildpage(self):
|
|
res=self.buildhead(refresh=60)
|
|
res.append("<H2>Heartbeat status %s</h2><h4> %s (%s)</H4>" % (VER, time.strftime("%H:%M:%S", time.localtime(now)), os.environ.get('TZ', 'CET-1CDT')))
|
|
|
|
res.append("<table>")
|
|
res.append("<tr><th>Host</th><th>State</th><th>Hr</th><th>Dy</th><th>Wk</th><th>IP4 Addr</th><th>IP6 Addr</th><th>Last change</th><th>Ver</th></tr>\n")
|
|
hosts_sorted = hosts.keys()
|
|
hosts_sorted.sort()
|
|
for h in hosts_sorted:
|
|
res.append("<tr><td>%-24s</td><td>%-7s</td>%s<td>%-16s</td><td>%-16s</td><td>%-17s</td><td>%s</td></tr>\n" % \
|
|
(h, hosts[h].dispstate(), hosts[h].dispstats(), hosts[h].addr4, hosts[h].addr6, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(hosts[h].statetime)), hosts[h].cver))
|
|
res.append("</table>")
|
|
|
|
res.append("<h4>Log of Events</h4>")
|
|
for m in msgs[len(msgs)-30:]:
|
|
res.append("%s<BR>" % m)
|
|
res.append("</body></html>")
|
|
return res
|
|
|
|
def builderror(self, code, cause, lcause):
|
|
res=[]
|
|
res.append('<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">')
|
|
res.append('<html><head>')
|
|
res.append('<title>%s %s</title>' % (code, cause))
|
|
res.append('</head><body>')
|
|
res.append('<h1>%s</h1>' % (cause))
|
|
res.append('<p>%s</p>' % lcause)
|
|
res.append('<hr>')
|
|
res.append('<address>hbd (Unix) Server at %s:%s</address>' % (hbd_host, hbd_port))
|
|
res.append('</body></html>')
|
|
return res
|
|
|
|
|
|
def do_GET(self):
|
|
global sig
|
|
xsig = 0
|
|
self.do_HEAD()
|
|
headers=[]
|
|
|
|
if DEBUG > 2: sys.stderr.write("handle\n")
|
|
uri = self.path
|
|
upar=string.split(uri,"?")
|
|
if len(upar) == 1:
|
|
uarg=[]
|
|
else:
|
|
uarg=string.split(upar[1],"&")
|
|
|
|
if DEBUG > 2: sys.stderr.write("handle = %s\n" % (uri))
|
|
code = 200
|
|
cause = "OK"
|
|
if uri == "/":
|
|
res=self.buildpage()
|
|
|
|
elif upar[0] == "/c": # command on host /c?h=melschserver&c=sudo%20ls
|
|
uname=""
|
|
ucmd=""
|
|
if len(uarg) != 2 or len(uarg[0]) < 3 or len(uarg[1]) < 3:
|
|
code=400
|
|
cause='Argument error'
|
|
res=self.builderror(code, cause, "need h= and c= arguments")
|
|
else:
|
|
if uarg[0][:2] == "h=":
|
|
uname=uarg[0][2:]
|
|
if uarg[1][:2] == "c=":
|
|
ucmd=uarg[1][2:]
|
|
if ucmd != "" and uname != "" and hosts.has_key(uname):
|
|
rmsg = {'cmd': urllib.unquote(ucmd)}
|
|
hosts[uname].cmds.append(('CMD', rmsg))
|
|
res=self.buildhead()
|
|
res.append("2Done")
|
|
else:
|
|
code=400
|
|
cause='arg 2 error'
|
|
res=self.builderror(code, cause, "ucmd %s uname %s" % (ucmd, uname))
|
|
|
|
elif upar[0] == "/d": # drop host /d?h=melschserver
|
|
if len(uarg) != 1 or len(uarg[0]) < 3:
|
|
code=400
|
|
cause='Argument error'
|
|
res=self.builderror(code, cause, "need h= argument")
|
|
else:
|
|
if uarg[0][:2] == "h=":
|
|
uname=uarg[0][2:]
|
|
if uname != "" and hosts.has_key(uname):
|
|
log(uname, "dropped")
|
|
del hosts[uname]
|
|
res=self.buildhead()
|
|
res.append("Done")
|
|
|
|
elif upar[0] == "/n": # register name
|
|
if len(uarg) != 1 or len(uarg[0]) < 3:
|
|
code=400
|
|
cause='Argument error'
|
|
res=self.builderror(code, cause, "need h= argument")
|
|
else:
|
|
res=self.buildhead()
|
|
if uarg[0][:2] == "h=":
|
|
uname=uarg[0][2:]
|
|
if uname != "" and hosts.has_key(uname):
|
|
err = nsupdate(uname, hosts[uname].addr)
|
|
ll="nsupdate request: %s" % err
|
|
else:
|
|
ll="name %s not found" % uname
|
|
res.append(ll)
|
|
log(uname, ll)
|
|
|
|
elif upar[0] == "/u": # update
|
|
uname=""
|
|
ucode=""
|
|
if len(uarg) != 2 or len(uarg[0]) < 3 or len(uarg[1]) < 3:
|
|
code=400
|
|
cause='Argument error'
|
|
res=self.builderror(code, cause, "need h= and c= arguments")
|
|
else:
|
|
if uarg[0][:2] == "h=":
|
|
uname=uarg[0][2:]
|
|
if uarg[1][:2] == "c=":
|
|
ucode=uarg[1][2:]
|
|
if ucode != "" and uname != "" and hosts.has_key(uname):
|
|
err = updatecode(ucode, urllib.unquote(uname))
|
|
res=self.buildhead()
|
|
res.append("3 Done: %s" % err if err else "OK")
|
|
else:
|
|
code=400
|
|
cause='Argument error'
|
|
res=self.builderror(code, cause, "host not found")
|
|
|
|
|
|
|
|
elif upar[0] == "/r": # restart
|
|
res=self.buildhead()
|
|
res.append("restart request")
|
|
xsig=signal.SIGHUP
|
|
log(None, "restart request")
|
|
|
|
else:
|
|
code=404
|
|
cause="Not Found"
|
|
res=self.builderror(code, cause, "The requested URL was not found on this server.")
|
|
|
|
|
|
tosend = []
|
|
for h in headers:
|
|
tosend.append("%s\r" % h)
|
|
tosend.append("\r")
|
|
# self.request.send("HTTP/1.0 %s %s\r\n" % (code, cause))
|
|
# for h in headers:
|
|
# self.request.send("%s\r\n" % h)
|
|
# self.request.send("\r\n")
|
|
|
|
tosend += res
|
|
self.wfile.write(string.join(tosend, "\n"))
|
|
|
|
if xsig:
|
|
sig = xsig
|
|
|
|
def setrunning(new):
|
|
global running
|
|
if DEBUG > 0: sys.stderr.write("running is now = %s\n" % (new))
|
|
running = new
|
|
|
|
|
|
def closeup():
|
|
setrunning(False)
|
|
try:
|
|
sock.close()
|
|
except:
|
|
pass
|
|
try:
|
|
sock6.close()
|
|
except:
|
|
pass
|
|
|
|
if DEBUG > 0: sys.stderr.write("asking http server to stop\n")
|
|
try:
|
|
serv.shutdown()
|
|
if DEBUG > 0: sys.stderr.write("http server stopped\n")
|
|
except Exception as e:
|
|
if DEBUG > 0: sys.stderr.write("http server did NOT stop: %s\n" % str(e))
|
|
|
|
try:
|
|
serv.server_close()
|
|
except:
|
|
pass
|
|
|
|
log(None, "restarting")
|
|
try:
|
|
logf.close()
|
|
except:
|
|
pass
|
|
|
|
# signal.signal(signal.SIGTERM, 0)
|
|
signal.signal(signal.SIGHUP, 0)
|
|
|
|
|
|
def restart():
|
|
print "execv %s %s" % (sys.argv[0], [sys.argv[0]]+cmdargs)
|
|
os.execv(sys.argv[0], [sys.argv[0]]+cmdargs)
|
|
print "should not be here"
|
|
|
|
def saveandrestart():
|
|
closeup()
|
|
restart()
|
|
|
|
|
|
def pickleit():
|
|
pickf = open(PICKFILE, 'w')
|
|
pick = cPickle.Pickler(pickf)
|
|
pick.dump(hosts)
|
|
pick.dump(htab)
|
|
pick.dump(msgs)
|
|
pick.dump(lastfm)
|
|
pickf.close()
|
|
|
|
|
|
#
|
|
# Main
|
|
#
|
|
|
|
helpflag = False
|
|
forground = False
|
|
optlist = []
|
|
args = []
|
|
home = os.environ['HOME']
|
|
cmdargs = []
|
|
configfile = "%s/.hbrc" % home
|
|
|
|
try:
|
|
optlist, args = getopt.getopt(sys.argv[1:], 'c:dfh:vx')
|
|
except:
|
|
helpflag = True
|
|
|
|
for o, a in optlist:
|
|
if o == '-c':
|
|
configfile = a
|
|
cmdargs += [o, a]
|
|
if o == '-f':
|
|
forground = True
|
|
cmdargs += [o]
|
|
elif o == '-h':
|
|
helpflag = True
|
|
elif o == '-v':
|
|
verbose = True
|
|
cmdargs += [o]
|
|
elif o == '-x':
|
|
DEBUG += 1
|
|
cmdargs += [o]
|
|
|
|
if helpflag:
|
|
print "hbc HeartBeatDaemon"
|
|
print "usage: hbd [-dfhvx] [-c configfile]"
|
|
print
|
|
print " -c configfile"
|
|
print " -d display"
|
|
print " -f run in foreground"
|
|
print " -h this help"
|
|
print " -v verbose"
|
|
print " -x increase debug lvl"
|
|
print
|
|
print """ config file can contain
|
|
logfile = /var/log/heartbeat.log
|
|
logfmt = [text|msg]
|
|
hb_port = 50003
|
|
interval = 20
|
|
hbd_port = 50004
|
|
hbd_host = www.domain.com
|
|
grace = 2
|
|
"""
|
|
|
|
sys.exit(1)
|
|
|
|
#
|
|
# set defaults
|
|
|
|
hb_port = PORT
|
|
hbd_host = THOST
|
|
hbd_port = TPORT
|
|
logfile = LOGFILE
|
|
logfmt = "text"
|
|
interval = INTERVAL
|
|
grace = GRACE
|
|
watchhosts = []
|
|
dyndnshosts = []
|
|
drophosts = []
|
|
|
|
try:
|
|
f = open(configfile, "r")
|
|
if verbose:
|
|
print "notice: using config file %s" % configfile
|
|
except:
|
|
print "warning: running without config file: %s" % configfile
|
|
f = None
|
|
|
|
if f:
|
|
while 1:
|
|
l = f.readline()
|
|
if len(l) == 0:
|
|
break
|
|
if verbose:
|
|
print " %s" % l[:-1]
|
|
r = l[:-1].split('=')
|
|
if r[0] == 'interval':
|
|
interval = eval(r[1])
|
|
elif r[0] == 'grace':
|
|
grace = eval(r[1])
|
|
elif r[0] == 'hbd_port':
|
|
hbd_port = eval(r[1])
|
|
elif r[0] == 'hbd_host':
|
|
hbd_host = eval(r[1])
|
|
elif r[0] == 'hb_port':
|
|
hb_port = eval(r[1])
|
|
elif r[0] == 'logfile':
|
|
logfile = eval(r[1])
|
|
elif r[0] == 'logfmt':
|
|
logfmt = eval(r[1])
|
|
elif r[0] == 'watchhosts':
|
|
watchhosts = eval(r[1])
|
|
elif r[0] == 'dyndnshosts':
|
|
dyndnshosts = eval(r[1])
|
|
elif r[0] == 'drophosts':
|
|
drophosts = eval(r[1])
|
|
f.close()
|
|
|
|
if len(args) != 0:
|
|
print "error: args"
|
|
sys.exit(1)
|
|
|
|
|
|
if verbose:
|
|
print "notice: logging to %s" % logfile
|
|
logf = initlog(logfile)
|
|
|
|
if os.path.exists(PICKFILE):
|
|
pickf = open(PICKFILE, 'r')
|
|
pick = cPickle.Unpickler(pickf)
|
|
try:
|
|
hosts = pick.load()
|
|
htab = pick.load()
|
|
msgs = pick.load()
|
|
try:
|
|
lastfm = pick.load()
|
|
except:
|
|
lastfm = ["","",""]
|
|
pickf.close()
|
|
except:
|
|
os.unlink(PICKFILE)
|
|
for h in hosts.keys():
|
|
hosts[h].fixup()
|
|
for h in drophosts:
|
|
if h in hosts:
|
|
del hosts[h]
|
|
|
|
|
|
now = time.time()
|
|
startsec = int(now) % interval
|
|
|
|
|
|
log(None, "Starting %s" % VER)
|
|
atexit.register(on_exit)
|
|
|
|
ilist = []
|
|
|
|
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
|
sock.bind(("", hb_port))
|
|
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
|
ilist.append(sock)
|
|
|
|
sock6 = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
|
|
sock6.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
|
sock6.bind(("", hb_port))
|
|
ilist.append(sock6)
|
|
|
|
#ilist.append(serv.fileno())
|
|
|
|
if not forground:
|
|
pid = os.fork()
|
|
if pid > 0:
|
|
if verbose:
|
|
print "daemoinizing... pid = %d" % pid
|
|
sys.exit(0)
|
|
|
|
verbose = False
|
|
os.close(0)
|
|
os.close(1)
|
|
os.close(2)
|
|
sys.stdin.close()
|
|
if DEBUG > 0:
|
|
sys.stdout = LogDevice()
|
|
sys.stderr = LogDevice()
|
|
else:
|
|
sys.stdout = NullDevice()
|
|
sys.stderr = NullDevice()
|
|
os.chdir("/tmp")
|
|
os.setsid()
|
|
os.umask(0)
|
|
|
|
try:
|
|
serv = HttpServer((hbd_host, hbd_port), HttpHandler)
|
|
except:
|
|
print "failed to start server on %s:%s" % (hbd_host, hbd_port)
|
|
sys.exit(1)
|
|
|
|
servthread = threading.Thread(target=serv.serve_forever)
|
|
servthread.daemon = True
|
|
servthread.start()
|
|
|
|
dnsQ = Queue.Queue()
|
|
dnsT = threading.Thread(target=dnsupdatethread)
|
|
dnsT.daemon = True
|
|
dnsT.start()
|
|
|
|
running = True
|
|
sig = 0
|
|
signal.signal(signal.SIGTERM, handler)
|
|
signal.signal(signal.SIGHUP, handler)
|
|
|
|
next = int(now)+15 # 15 seconds time to settle after (re-)start
|
|
sleep = 1
|
|
firstcheck = int(now) + 15
|
|
|
|
while running:
|
|
sr = None
|
|
if DEBUG > 2: sys.stderr.write("about to sleep = %s\n" % (sleep))
|
|
try:
|
|
sr = select.select(ilist, [], [], sleep)
|
|
now = time.time()
|
|
except KeyboardInterrupt:
|
|
sys.stderr.write("Keyboard Interrupt!\n")
|
|
running = False
|
|
closeup()
|
|
continue
|
|
except select.error, value:
|
|
if value[0] != 4: # interrupted system call
|
|
sys.stderr.write("select err %s %s" % (select.error, value))
|
|
#raise os.error, value
|
|
continue
|
|
continue
|
|
except Exception as e:
|
|
if DEBUG > 2: sys.stderr.write("select exception %s\n" % (str(e)))
|
|
sys.exit(1)
|
|
if DEBUG > 2: sys.stderr.write("woke from sleep = %s (%s)\n" % (str(sr), str(ilist)))
|
|
for fh in sr[0]:
|
|
if fh in [sock, sock6]:
|
|
readsock(fh)
|
|
# elif fh == serv.fileno():
|
|
# serv.handle_request()
|
|
else:
|
|
sys.stderr.write("what happend just now?\n")
|
|
if DEBUG > 2: sys.stderr.write("done handling, running is %s, sig is %s\n" % (running, sig))
|
|
|
|
# check hour/day/week
|
|
for v in xrange(3):
|
|
fm=tsfm[v]
|
|
ts=time.strftime(tsfm[v], time.localtime(now))
|
|
if ts != lastfm[v]:
|
|
lastfm[v]=ts
|
|
for h in hosts.keys():
|
|
hosts[h].hdwcounts[v] = [hosts[h].doesack, hosts[h].upcount]
|
|
|
|
if now >= next and now >= firstcheck:
|
|
next = now+1
|
|
checkoverdue()
|
|
|
|
sleep = next-now
|
|
if sleep < 0:
|
|
sys.stderr.write("sleep is negative! %s next = %s\n" % (sleep, next))
|
|
sleep = 0
|
|
if DEBUG > 2: sys.stderr.write("sleep = %s next = %s\n" % (sleep, next))
|
|
|
|
if sig != 0:
|
|
setrunning(False)
|
|
|
|
|
|
|
|
if sig == signal.SIGHUP:
|
|
if DEBUG > 0: sys.stderr.write("signal 1 saveandrestart\n")
|
|
saveandrestart()
|