Merge branch 'master' of git.wrede.ca:andreas/heartbeat

untobandle
This commit is contained in:
2015-02-14 17:24:31 -05:00
+85 -202
View File
@@ -2,7 +2,7 @@
# $Id: hbd,v 1.38 2013/07/14 02:25:05 andreas Exp $ # $Id: hbd,v 1.38 2013/07/14 02:25:05 andreas Exp $
# Wait for heartbeat messages and act on them (or their absence) # Wait for heartbeat messages and act on them (or their absence)
# #
VER = 1.52 VER = 1.62
import time import time
import os import os
@@ -50,7 +50,6 @@ verbose = False
INTERVAL = 10 INTERVAL = 10
GRACE = 2 GRACE = 2
visual = 0
os.environ['TZ'] = 'EST5EDT' os.environ['TZ'] = 'EST5EDT'
stdscr = None stdscr = None
@@ -101,16 +100,22 @@ class Host:
self.interval = 0 self.interval = 0
self.doesack = -1 self.doesack = -1
self.cmds = [] self.cmds = []
self.hdwcounts = [0,0,0]
num += 1 num += 1
# called when reloading class from pickle # called when reloading class from pickle, add new fields here
def fixup(self): def fixup(self):
try: try:
a=self.cmds a=self.cmds
except: except:
self.cmds=[] self.cmds=[]
try:
a=self.hdwcounts
except:
self.hdwcounts = [0,0,0]
def getstate(self): def getstate(self):
return self.state return self.state
@@ -120,7 +125,10 @@ class Host:
else: else:
state = "%s" % self.state state = "%s" % self.state
if self.doesack != -1: if self.doesack != -1:
return "%s(%s)" % (state, self.doesack) if self.upcount > 0:
return "%s(%0.1f%%) %s %s " % (state, (self.doesack * 100.0) / self.upcount, self.doesack, self.upcount)
else:
return "%s(%s)" % (state, self.doesack)
return state return state
# set new state, return number of secs in previous state # set new state, return number of secs in previous state
@@ -129,8 +137,6 @@ class Host:
now = time.time()-when now = time.time()-when
s = now-self.statetime s = now-self.statetime
self.statetime = now self.statetime = now
if visual:
displaystatetime(self.name)
return s return s
@@ -165,13 +171,15 @@ def pushover(msg):
if not SEND_PUSHOVER: if not SEND_PUSHOVER:
return return
conn = httplib.HTTPSConnection("api.pushover.net:443") conn = httplib.HTTPSConnection("api.pushover.net:443")
conn.request("POST", "/1/messages.json", try:
urllib.urlencode({ conn.request("POST", "/1/messages.json",
"token": "ac7NLX2rPjXFareeDgLpXNoDf4iFmf", urllib.urlencode({
"user": "uDhH33UjQQDYtNzJb1ThRiWb9ingGK", "token": "ac7NLX2rPjXFareeDgLpXNoDf4iFmf",
"message": msg, "user": "uDhH33UjQQDYtNzJb1ThRiWb9ingGK",
}), { "Content-type": "application/x-www-form-urlencoded" }) "message": msg, }), { "Content-type": "application/x-www-form-urlencoded" })
conn.getresponse() conn.getresponse()
except:
pass
# nsupdate: set the DNS A record for a fqdn # nsupdate: set the DNS A record for a fqdn
@@ -226,8 +234,6 @@ def addhost(name, addr):
if sname in hosts: # was: hosts.has_key(sname): if sname in hosts: # was: hosts.has_key(sname):
del htab[hosts[sname].addr] del htab[hosts[sname].addr]
hosts[sname].addr = addr hosts[sname].addr = addr
if visual:
displayaddr(sname)
htab[addr] = sname htab[addr] = sname
m = "%s, changed address to %s" % (sname, addr) m = "%s, changed address to %s" % (sname, addr)
log(m) log(m)
@@ -240,14 +246,10 @@ def addhost(name, addr):
hosts[n].num = x hosts[n].num = x
x += 1 x += 1
htab[addr] = sname htab[addr] = sname
if visual:
display()
# #
def on_exit(): def on_exit():
if visual:
exitcurses()
if DEBUG: if DEBUG:
sys.stderr.write("on_exit\n") sys.stderr.write("on_exit\n")
logf.close() logf.close()
@@ -259,33 +261,6 @@ def initlog(logfile):
# #
#
def initwin():
global win, msgw, msgwB, msgwHeight
maxY, maxX = stdscr.getmaxyx()
begin_x = 0
begin_y = 2
height = len(htab)+2
if DEBUG:
log("initwin called with %d" % height)
win = curses.newwin(height, maxX, begin_y, begin_x)
a = win.border(0, 0, 0, 0, 0, 0, curses.ACS_LTEE, curses.ACS_RTEE)
msgwB = curses.newwin(0, 0, height+1, begin_x)
msgwB.border(0, 0, 0, 0, curses.ACS_LTEE, curses.ACS_RTEE)
msgwHeight = maxY-height-3
msgw = curses.newwin(msgwHeight, maxX-2, height+2, begin_x+1)
msgw.setscrreg(0, msgwHeight-1)
msgw.scrollok(1)
stdscr.addstr(0, 0, "hbd Version %s" % VER, curses.A_BOLD)
stdscr.refresh()
msgwB.refresh()
# #
def checkoverdue(): def checkoverdue():
@@ -302,73 +277,6 @@ def checkoverdue():
log(m) log(m)
#
#
def displaytime():
maxY, maxX = stdscr.getmaxyx()
stdscr.addstr(0, maxX-8, time.strftime("%H:%M:%S", time.localtime(now)), curses.A_BOLD)
for h in hosts.keys():
d = hosts[h].getstate()
attr = 0
if verbose and hosts[h].state != Host.down:
d = dur(now-hosts[h].lastbeat)
if hosts[h].state == Host.overdue:
attr = curses.A_BOLD
win.addstr(hosts[h].num+1, 25, "%8s" % d, attr)
win.refresh()
stdscr.refresh()
#
#
def displaystatetime(h, refresh=1):
win.addstr(hosts[h].num+1, 60, "%-17s" % time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(hosts[h].statetime)))
if refresh:
win.refresh()
#
#
def displayaddr(h, refresh=1):
win.addstr(hosts[h].num+1, 35, "%-16s" % hosts[h].addr)
if refresh:
win.refresh()
#
#
def displaybody():
for h in hosts.keys():
win.addstr(hosts[h].num+1, 1, "%-25s" % (h))
if hosts[h].addr is not None:
displayaddr(h, 0)
if hosts[h].statetime is not None:
displaystatetime(h, 0)
win.refresh()
#
#
def displaymsgs():
global msgw, msgs
y = 0
for m in msgs[len(msgs)-msgwHeight:]:
msgw.addstr(y, 0, m)
y += 1
msgw.refresh()
#
#
def display():
if visual:
initwin()
displaytime()
displaybody()
displaymsgs()
def log(m, service="heartbeat"): def log(m, service="heartbeat"):
if DEBUG: print "Log: %s" % m if DEBUG: print "Log: %s" % m
msg = time.strftime("%b %d %H:%M:%S", time.localtime(time.time()))+": "+m+"\n" msg = time.strftime("%b %d %H:%M:%S", time.localtime(time.time()))+": "+m+"\n"
@@ -394,8 +302,11 @@ def log(m, service="heartbeat"):
def fromaddr(name, addr, boot, interval, acks): def fromaddr(name, addr, boot, interval, acks):
global htab global htab
newh=False
if not name in hosts: # was: hosts.has_key(name): if not name in hosts: # was: hosts.has_key(name):
addhost(name, addr) addhost(name, addr)
newh=True
host = hosts[name] host = hosts[name]
host.doesack = acks host.doesack = acks
if host.addr != addr: if host.addr != addr:
@@ -425,7 +336,10 @@ def fromaddr(name, addr, boot, interval, acks):
if name in watchhosts: if name in watchhosts:
email("back", name) email("back", name)
pushover("%s is back" % name) pushover("%s is back" % name)
host.upcount += 1 if boot or newh:
host.upcount = host.doesack
else:
host.upcount += 1
# #
@@ -524,24 +438,6 @@ def readsock(sock):
# #
# #
#
def initcurses():
global stdscr
stdscr = curses.initscr()
curses.noecho()
curses.cbreak()
stdscr.keypad(1)
if DEBUG:
sys.stderr.write("curses init done: %s\n" % stdscr)
def exitcurses():
curses.nocbreak()
stdscr.keypad(0)
curses.echo()
curses.endwin()
class HtmlServer(SocketServer.TCPServer): class HtmlServer(SocketServer.TCPServer):
allow_reuse_address = True allow_reuse_address = True
# #
@@ -577,6 +473,18 @@ class HtmlHandler(SocketServer.BaseRequestHandler):
res.append("</body></html>") res.append("</body></html>")
return res return res
def builderror(self, code, cause, lcause):
res=[]
res.append('<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">')
res.append('<html><head>')
res.append('<title>%s %s</title>' % (code, cause))
res.append('</head><body>')
res.append('<h1>%s</h1>' % (cause))
res.append('<p>%s</p>' % lcause)
res.append('<hr>')
res.append('<address>hbd (Unix) Server at %s Port %s</address>' % (hbd_host, hbd_port))
res.append('</body></html>')
return res
def handle(self): def handle(self):
global sig, running global sig, running
@@ -612,35 +520,50 @@ class HtmlHandler(SocketServer.BaseRequestHandler):
elif upar[0] == "/c": # command on host /c?h=melschserver&c=sudo%20ls elif upar[0] == "/c": # command on host /c?h=melschserver&c=sudo%20ls
uname="" uname=""
ucmd="" ucmd=""
if uarg[0][:2] == "h=": if len(uarg) != 2 or len(uarg[0]) < 3 or len(uarg[1]) < 3:
uname=uarg[0][2:] code=400
if uarg[1][:2] == "c=": cause='Argument error'
ucmd=uarg[1][2:] res=self.builderror(code, cause, "need h= and c= arguments")
if ucmd != "" and uname != "" and hosts.has_key(uname): else:
hosts[uname].cmds.append(urllib.unquote(ucmd)) if uarg[0][:2] == "h=":
res=self.buildhead() uname=uarg[0][2:]
res.append("2Done") if uarg[1][:2] == "c=":
ucmd=uarg[1][2:]
if ucmd != "" and uname != "" and hosts.has_key(uname):
hosts[uname].cmds.append(urllib.unquote(ucmd))
res=self.buildhead()
res.append("2Done")
elif upar[0] == "/d": # drop host /d?h=melschserver elif upar[0] == "/d": # drop host /d?h=melschserver
if uarg[0][:2] == "h=": if len(uarg) != 1 or len(uarg[0]) < 3:
uname=uarg[0][2:] code=400
if uname != "" and hosts.has_key(uname): cause='Argument error'
del hosts[uname] res=self.builderror(code, cause, "need h= argument")
log("%s dropped" % uname) else:
res=self.buildhead() if uarg[0][:2] == "h=":
res.append("Done") uname=uarg[0][2:]
if uname != "" and hosts.has_key(uname):
del hosts[uname]
log("%s dropped" % uname)
res=self.buildhead()
res.append("Done")
elif upar[0] == "/n": # register name elif upar[0] == "/n": # register name
res=self.buildhead() if len(uarg) != 1 or len(uarg[0]) < 3:
if uarg[0][:2] == "h=": code=400
uname=uarg[0][2:] cause='Argument error'
if uname != "" and hosts.has_key(uname): res=self.builderror(code, cause, "need h= argument")
err = nsupdate(uname, hosts[uname].addr)
ll="nsupdate request: %s" % err
else: else:
ll="name %s not found" % uname res=self.buildhead()
res.append(ll) if uarg[0][:2] == "h=":
log(ll) uname=uarg[0][2:]
if uname != "" and hosts.has_key(uname):
err = nsupdate(uname, hosts[uname].addr)
ll="nsupdate request: %s" % err
else:
ll="name %s not found" % uname
res.append(ll)
log(ll)
elif upar[0] == "/r": # restart elif upar[0] == "/r": # restart
res=self.buildhead() res=self.buildhead()
@@ -650,18 +573,10 @@ class HtmlHandler(SocketServer.BaseRequestHandler):
log("restart request") log("restart request")
else: else:
code = 404 code=404
cause = "Not Found" cause="Not Found"
res=[] res=self.builderror(code, cause, "The requested URL was not found on this server.")
res.append('<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">')
res.append('<html><head>')
res.append('<title>%s %s</title>' % (code, cause))
res.append('</head><body>')
res.append('<h1>%s</h1>' % (cause))
res.append('<p>The requested URL %s was not found on this server.</p>' % uri)
res.append('<hr>')
res.append('<address>hbd (Unix) Server at %s Port %s</address>' % (hbd_host, hbd_port))
res.append('</body></html>')
self.request.send("HTTP/1.0 %s %s\r\n" % (code, cause)) self.request.send("HTTP/1.0 %s %s\r\n" % (code, cause))
for h in headers: for h in headers:
@@ -676,6 +591,7 @@ class HtmlHandler(SocketServer.BaseRequestHandler):
def saveandrestart(): def saveandrestart():
sock.close() sock.close()
sock6.close()
# serv.shutdown() #N.B. dont shutdown() as we don't use serv_forever # serv.shutdown() #N.B. dont shutdown() as we don't use serv_forever
serv.server_close() serv.server_close()
log("restarting") log("restarting")
@@ -712,10 +628,7 @@ for o, a in optlist:
if o == '-c': if o == '-c':
configfile = a configfile = a
cmdargs += [o, a] cmdargs += [o, a]
if o == '-d': if o == '-f':
visual = True
cmdargs += [o]
elif o == '-f':
forground = True forground = True
cmdargs += [o] cmdargs += [o]
elif o == '-h': elif o == '-h':
@@ -750,8 +663,6 @@ grace = 2
sys.exit(1) sys.exit(1)
if visual:
forground = True
# #
# set defaults # set defaults
@@ -833,11 +744,6 @@ if os.path.exists(PICKFILE):
now = time.time() now = time.time()
startsec = int(now) % interval startsec = int(now) % interval
if visual:
import curses
initcurses()
display()
stdscr.nodelay(1)
log("Starting %s" % VER) log("Starting %s" % VER)
atexit.register(on_exit) atexit.register(on_exit)
@@ -884,21 +790,6 @@ signal.signal(signal.SIGHUP, handler)
next = int(now)+15 # 15 seconds time to settle after (re-)start next = int(now)+15 # 15 seconds time to settle after (re-)start
sleep = next - now sleep = next - now
while running: while running:
if visual:
c = stdscr.getch()
if c == ord('c'):
msgs = []
display()
elif c == ord('q'):
break # Exit the while()
elif c == ord('d'):
DEBUG = not DEBUG
elif c == ord('v'):
verbose = not verbose
# elif c == ord('p'):
# PrintDocument()
# elif c == ord('x'):
# x = y = 0
if DEBUG: if DEBUG:
sys.stderr.write("about to sleep = %s\n" % (sleep)) sys.stderr.write("about to sleep = %s\n" % (sleep))
@@ -912,10 +803,6 @@ while running:
print select.error, value print select.error, value
#raise os.error, value #raise os.error, value
continue continue
if visual:
exitcurses()
initcurses()
display()
continue continue
except: except:
sys.exit(1) sys.exit(1)
@@ -929,10 +816,6 @@ while running:
if now >= next: if now >= next:
next = now+1 next = now+1
checkoverdue() checkoverdue()
if visual:
stdscr.move(1, 0)
stdscr.clrtoeol()
displaytime()
sleep = next-now sleep = next-now
if sleep < 0: if sleep < 0: