#!/usr/bin/env python # $Id: hbd,v 1.38 2013/07/14 02:25:05 andreas Exp $ # Wait for heartbeat messages and act on them (or their absence) # VER = 1.52 import time import os import string import sys import socket import atexit import select import SocketServer import getopt import signal import cPickle import smtplib import traceback import urllib import httplib from subprocess import Popen, STDOUT, PIPE SEND_EMAIL=False SEND_PUSHOVER=True False = 0 True = 1 LOGFILE = "/home/andreas/public_html/messages/andreas" PICKFILE = "/var/tmp/hbd.pick" AEMAIL = ["andreas@wrede.ca"] NAME = "heatbeat" SMTPSERVER = "localhost" hosts = {} htab = {} msgs = [] num = 0 upcount = 0 PORT = 50003 TPORT = 50004 THOST = "" DEBUG = False verbose = False INTERVAL = 10 GRACE = 2 visual = 0 os.environ['TZ'] = 'EST5EDT' stdscr = None win = None msgw = None msgwB = None msgwHeight = 10 def handler(signum, frame): global running, sig sig = signum if not running: if verbose: print "NOT runing signal: %s running: %d" % (sig, running) return # signal.signal(sig, handler) if verbose: print "signal: %s running: %s frame: %s" % (sig, running, frame) running = False # sys.exit(0) def shortname(name): r = string.split(name, '.') return r[0] class NullDevice: def write(self, s): pass class Host: up = "up" down = "down" overdue = "overdue" def __init__(self, name, addr): global num self.name = shortname(name) self.addr = addr self.num = num self.lastbeat = time.time() self.upcount = 0 self.state = Host.up self.state = "up" self.statetime = self.lastbeat self.interval = 0 self.doesack = -1 self.cmds = [] num += 1 # called when reloading class from pickle def fixup(self): try: a=self.cmds except: self.cmds=[] def getstate(self): return self.state def dispstate(self): if self.state in ["down", "overdue"]: state = "%s" % self.state else: state = "%s" % self.state if self.doesack != -1: return "%s(%s)" % (state, self.doesack) return state # set new state, return number of secs in previous state def newstate(self, state, when=0): self.state = state now = time.time()-when s = now-self.statetime self.statetime = now if visual: displaystatetime(self.name) return s def email(s, msg): if not SEND_EMAIL: return ret = "OK" toaddrs = AEMAIL fromaddr = "aew.heartbeat@wrede.ca" subj = "Info from %s: %s" % (NAME, s) date = time.strftime("%a, %d %b %Y %H:%M:%S %z", time.localtime()) body = "To: %s\nFrom: %s\nSubject: %s\nDate: %s\n\n%s" % (toaddrs[0], fromaddr, subj, date, msg) try: server = smtplib.SMTP(SMTPSERVER) if DEBUG: server.set_debuglevel(1) server.sendmail(fromaddr, toaddrs, body) except smtplib.SMTPRecipientsRefused, errs: log("cannot send email: %s\n" % (errs)) ret = "Fail" except: print("smtp error: "+traceback.format_exc()) saveandrestart() try: server.quit() except: pass return ret def pushover(msg): if not SEND_PUSHOVER: return conn = httplib.HTTPSConnection("api.pushover.net:443") conn.request("POST", "/1/messages.json", urllib.urlencode({ "token": "ac7NLX2rPjXFareeDgLpXNoDf4iFmf", "user": "uDhH33UjQQDYtNzJb1ThRiWb9ingGK", "message": msg, }), { "Content-type": "application/x-www-form-urlencoded" }) conn.getresponse() # nsupdate: set the DNS A record for a fqdn # return: None if ok, else error text def nsupdate(hostname, newip): D = {} D['domain'] = 'dy.wapanafa.org' D['fqdn'] = '%s.dy.wapanafa.org' % hostname D['dnsttl'] = '5' D['newip'] = newip D['ts'] = time.strftime('%Y-%m-%d.%H:%M:%S', time.gmtime()) nsup = """update delete %(fqdn)s A update add %(fqdn)s %(dnsttl)s A %(newip)s update delete %(fqdn)s TXT update add %(fqdn)s %(dnsttl)s TXT "Created: %(ts)s" send answer """ % D # log("DBG: nsup %s" % nsup) cmd = ["/usr/bin/nsupdate", "-k", "/etc/dhcpc/K%(domain)s.+157+00000." % D, "-v"] # log("DBG: cmd %s" % cmd) try: p = Popen(cmd, shell=False, bufsize=1, stdin=PIPE, stdout=PIPE, stderr=STDOUT) except OSError, e: return "nsupdate: execution failed: %s" % e except: return "nsupdate: some error occured" (output, err) = p.communicate(nsup) if output.find('status: NOERROR') >= 0: return None return output # def dur(sec): sec = int(sec) h = sec / 3600 m = (sec - h * 3600) / 60 s = (sec - h * 3600) % 60 if h > 0: return "%d:%02d:%02d" % (h, m, s) if m > 0: return "%d:%02d" % (m, s) return "0:%02d" % s # def addhost(name, addr): sname = shortname(name) if sname in hosts: # was: hosts.has_key(sname): del htab[hosts[sname].addr] hosts[sname].addr = addr if visual: displayaddr(sname) htab[addr] = sname m = "%s, changed address to %s" % (sname, addr) log(m) else: hosts[sname] = Host(sname, addr) s = hosts.keys() s.sort() x = 0 for n in s: hosts[n].num = x x += 1 htab[addr] = sname if visual: display() # def on_exit(): if visual: exitcurses() if DEBUG: sys.stderr.write("on_exit\n") logf.close() print "exit" def initlog(logfile): return open(logfile, "a") # # def initwin(): global win, msgw, msgwB, msgwHeight maxY, maxX = stdscr.getmaxyx() begin_x = 0 begin_y = 2 height = len(htab)+2 if DEBUG: log("initwin called with %d" % height) win = curses.newwin(height, maxX, begin_y, begin_x) a = win.border(0, 0, 0, 0, 0, 0, curses.ACS_LTEE, curses.ACS_RTEE) msgwB = curses.newwin(0, 0, height+1, begin_x) msgwB.border(0, 0, 0, 0, curses.ACS_LTEE, curses.ACS_RTEE) msgwHeight = maxY-height-3 msgw = curses.newwin(msgwHeight, maxX-2, height+2, begin_x+1) msgw.setscrreg(0, msgwHeight-1) msgw.scrollok(1) stdscr.addstr(0, 0, "hbd Version %s" % VER, curses.A_BOLD) stdscr.refresh() msgwB.refresh() # def checkoverdue(): for h in hosts.keys(): if hosts[h].state == Host.down: continue timeout = hosts[h].interval+grace if hosts[h].state == Host.up and now-hosts[h].lastbeat > timeout: m = "%s is overdue" % h if h in watchhosts: email("overdue", m) pushover(m) hosts[h].newstate(Host.overdue, grace) log(m) # # def displaytime(): maxY, maxX = stdscr.getmaxyx() stdscr.addstr(0, maxX-8, time.strftime("%H:%M:%S", time.localtime(now)), curses.A_BOLD) for h in hosts.keys(): d = hosts[h].getstate() attr = 0 if verbose and hosts[h].state != Host.down: d = dur(now-hosts[h].lastbeat) if hosts[h].state == Host.overdue: attr = curses.A_BOLD win.addstr(hosts[h].num+1, 25, "%8s" % d, attr) win.refresh() stdscr.refresh() # # def displaystatetime(h, refresh=1): win.addstr(hosts[h].num+1, 60, "%-17s" % time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(hosts[h].statetime))) if refresh: win.refresh() # # def displayaddr(h, refresh=1): win.addstr(hosts[h].num+1, 35, "%-16s" % hosts[h].addr) if refresh: win.refresh() # # def displaybody(): for h in hosts.keys(): win.addstr(hosts[h].num+1, 1, "%-25s" % (h)) if hosts[h].addr is not None: displayaddr(h, 0) if hosts[h].statetime is not None: displaystatetime(h, 0) win.refresh() # # def displaymsgs(): global msgw, msgs y = 0 for m in msgs[len(msgs)-msgwHeight:]: msgw.addstr(y, 0, m) y += 1 msgw.refresh() # # def display(): if visual: initwin() displaytime() displaybody() displaymsgs() def log(m, service="heartbeat"): if DEBUG: print "Log: %s" % m msg = time.strftime("%b %d %H:%M:%S", time.localtime(time.time()))+": "+m+"\n" msgs.append(msg) if logfmt == "msg": m2 = "%d|%s|%s\n" % (now, service, m) else: m2 = msg logf.write(m2) logf.flush() if msgw is not None: y, x = msgw.getyx() # if y > = msgwHeight-1: # msgw.scroll() msgw.addstr(msg) msgw.clrtoeol() msgw.refresh() pickleit() # # def fromaddr(name, addr, boot, interval, acks): global htab if not name in hosts: # was: hosts.has_key(name): addhost(name, addr) host = hosts[name] host.doesack = acks if host.addr != addr: if host.addr in htab: # was: htab.has_key(host.addr): del htab[host.addr] host.addr = addr htab[addr] = name m = "%s changed address to %s" % (host.name, addr) if name in dyndnshosts: err = nsupdate(name, addr) if err: m += ", DNS failed: %s" % err else: m += ", DNS updated." log(m) if name in watchhosts: email("address change", m) pushover(m) host.lastbeat = now if host.getstate() != Host.up and interval > 0: lasts = host.state d = host.newstate(Host.up) m = "%s, back after being %s for %s" % (host.name, lasts, dur(d)) log(m) if name in watchhosts: email("back", name) pushover("%s is back" % name) host.upcount += 1 # # def readsock(sock): global htab, win data, addr = sock.recvfrom(1024) pairs = string.split(data, ';') boot = 0 shutdown = 0 name = "unknown" service = "unknown" msg = None interval = 0 deltaT = 0.0 acks = -1 for pair in pairs: l = string.split(pair, "=") key = l[0] if len(l) != 2: val = "0" else: val = l[1] if key == 'boot': boot += 1 elif key == 'shutdown': shutdown += 1 elif key == 'interval': interval = int(val) elif key == 'name': name = shortname(val) elif key == 'msg': msg = val elif key == 'service': service = val elif key == 'time': try: deltaT = now-float(val) except: pass elif key == 'acks': try: acks = int(val) except: acks = -1 if boot: if acks == -1: a = "(%s)" % acks else: a = "" m = "%s booted, deltaT %0.2g sec %s" % (name, deltaT, a) log(m) if name in watchhosts: email("booted", m) pushover(m) if msg: m = "%s msg: %s" % (name, msg) log(m, service=service) if name in watchhosts: email("msg", m) pushover(m) fromaddr(name, addr[0], boot, interval, acks) if shutdown: m = "%s shutdown" % name log(m) if name in watchhosts: email("shutdown", m) pushover(m) try: hosts[name].newstate(Host.down) except: pass if interval > 0: try: hosts[name].interval = interval except: pass rmsg="ACK" if len(hosts[name].cmds): rmsg=hosts[name].cmds[0] msg="command '%s' initiated" % hosts[name].cmds[0] email("%s cmd exec" % name, msg) pushover(msg) del hosts[name].cmds[0] log("%s command initiated" % name) try: ss=sock.sendto(rmsg, addr) if DEBUG: log("msg from %s,%s, sent %s bytes back" % (addr[0], addr[1], ss)) except: pass # # # def initcurses(): global stdscr stdscr = curses.initscr() curses.noecho() curses.cbreak() stdscr.keypad(1) if DEBUG: sys.stderr.write("curses init done: %s\n" % stdscr) def exitcurses(): curses.nocbreak() stdscr.keypad(0) curses.echo() curses.endwin() class HtmlServer(SocketServer.TCPServer): allow_reuse_address = True # # class HtmlHandler(SocketServer.BaseRequestHandler): def buildhead(self, title="Heartbeat", refresh=None): res=[] res.append('') res.append("") res.append("
") res.append('| Host | State | IP Addr | Last change |
|---|---|---|---|
| %-24s | %-7s | %-16s | %-17s |
The requested URL %s was not found on this server.
' % uri) res.append('