part 2 of redo: state is in connections
This commit is contained in:
@@ -37,7 +37,6 @@ SEND_PUSHOVER=True
|
||||
DEBUG = 0
|
||||
|
||||
MAXRECV = 32767
|
||||
MAXRTTS = 10
|
||||
LOGFILE = "/home/andreas/public_html/messages/andreas"
|
||||
PICKFILE = "/var/tmp/hbd.pick"
|
||||
AEMAIL = ["andreas@wrede.ca"]
|
||||
@@ -66,6 +65,10 @@ tcss = """<script src="https://home.wrede.ca/pr/sorttable.js"></script>
|
||||
<style>
|
||||
#ntable {
|
||||
border-collapse: collapse;
|
||||
}
|
||||
|
||||
#wide-ntable {
|
||||
border-collapse: collapse;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
@@ -262,11 +265,11 @@ def dur(sec):
|
||||
|
||||
|
||||
def fixsort():
|
||||
s = hosts.keys()
|
||||
s = Host.hosts.keys()
|
||||
s.sort()
|
||||
x = 0
|
||||
for n in s:
|
||||
hosts[n].num = x
|
||||
Host.hosts[n].num = x
|
||||
x += 1
|
||||
|
||||
#
|
||||
@@ -290,17 +293,22 @@ def initlog(logfile):
|
||||
#
|
||||
#
|
||||
def checkoverdue():
|
||||
|
||||
for h in hosts.keys():
|
||||
if hosts[h].state == Host.down:
|
||||
continue
|
||||
timeout = hosts[h].interval+grace
|
||||
if hosts[h].state == Host.up and now-hosts[h].lastbeat > timeout:
|
||||
now = time.time()
|
||||
for h in Host.hosts.keys():
|
||||
pmsg = []
|
||||
for c in Host.hosts[h].connections:
|
||||
conn = Host.hosts[h].connections[c]
|
||||
if conn.state == Connection.down:
|
||||
continue
|
||||
timeout = Host.hosts[h].interval + grace
|
||||
if conn.state == Connection.up and (now - conn.lastbeat) > timeout:
|
||||
conn.newstate(Connection.overdue, now, grace)
|
||||
pmsg.append(conn.afam)
|
||||
if pmsg != []:
|
||||
if h in watchhosts:
|
||||
email("overdue", "%s is overdue" % h)
|
||||
pushover("%s is overdue" % h)
|
||||
hosts[h].newstate(Host.overdue, grace)
|
||||
log(h, "overdue")
|
||||
email("overdue", "%s overdue" % ",".join(pmsg))
|
||||
pushover("%s %s overdue" % (h, join(pmsg)))
|
||||
lof(h, "%s overdue" % join(pmsg))
|
||||
|
||||
|
||||
def log(host, m, service=None):
|
||||
@@ -344,10 +352,11 @@ def dnsupdatethread():
|
||||
#
|
||||
#
|
||||
def readsock(sock):
|
||||
global now
|
||||
if DEBUG > 3: sys.stderr.write("readsock recfrom start")
|
||||
data, addr = sock.recvfrom(MAXRECV)
|
||||
data, addrp = sock.recvfrom(MAXRECV)
|
||||
now = time.time()
|
||||
if DEBUG > 2: sys.stderr.write("readsock = %s, %s\n" % (data,addr))
|
||||
if DEBUG > 2: sys.stderr.write("readsock = %s, %s\n" % (data,addrp))
|
||||
msg = stodict(data)
|
||||
if not msg: # Old hbc client
|
||||
if verbose: print "old hbc:", data
|
||||
@@ -355,35 +364,30 @@ def readsock(sock):
|
||||
msg = oldmtodict(data)
|
||||
else:
|
||||
oldclient = False
|
||||
if verbose: print "readsock = %s, %s" % (msg,addr)
|
||||
if verbose: print "readsock = %s, %s" % (msg,addrp)
|
||||
|
||||
addr = addrp[0:2]
|
||||
name = shortname(msg.get('name', "unknown"))
|
||||
if not name in hosts: # was: hosts.has_key(name):
|
||||
if not name in Host.hosts: # was: hosts.has_key(name):
|
||||
host = Host(name)
|
||||
if verbose: print "XX: New host, num now %s" % (len(Host.hosts))
|
||||
newh=True
|
||||
else:
|
||||
host = hosts[name]
|
||||
host = Host.hosts[name]
|
||||
newh=False
|
||||
|
||||
cid = msg.get('id', 0)
|
||||
if cid not in host.connections:
|
||||
host.connections[cid] = Connection(name, cid, addr)
|
||||
conn = host.connections[cid]
|
||||
|
||||
rtt = msg.get('rtt',None)
|
||||
|
||||
if msg['ID'] == 'HTB':
|
||||
host.doesack = msg.get('acks', -1)
|
||||
host.lastbeat = now
|
||||
host.setcver(msg.get('ver', 0))
|
||||
|
||||
interval = msg.get('interval', 0)
|
||||
interval = int(msg.get('interval', 0))
|
||||
shutdown = msg.get('shutdown', 0)
|
||||
service = msg.get('service', "unknown")
|
||||
message = msg.get('msg', None)
|
||||
boot = msg.get('boot', 0)
|
||||
host.latency = now - msg.get('time', 0)
|
||||
rtt = msg.get('rtt',"up")
|
||||
conn.rtts.append(rtt)
|
||||
if len(conn.rtts) > MAXRTTS:
|
||||
del conn.rtts[0]
|
||||
|
||||
if boot:
|
||||
log(name, "booted")
|
||||
@@ -396,21 +400,22 @@ def readsock(sock):
|
||||
email("msg", message)
|
||||
pushover(message)
|
||||
|
||||
res = host.newaddr(addr[0])
|
||||
conn, res = host.conndata(cid, addr[0], rtt, now)
|
||||
if res:
|
||||
log(name, res)
|
||||
if name in watchhosts:
|
||||
email("address change", "%s %s" % (host.name, res))
|
||||
pushover("%s %s" % (host.name, res))
|
||||
|
||||
if host.getstate() != Host.up and interval > 0:
|
||||
lasts = host.state
|
||||
d = host.newstate(Host.up)
|
||||
m = "back after being %s for %s" % (lasts, dur(d))
|
||||
if conn.getstate() != Connection.up and interval > 0:
|
||||
lasts = conn.state
|
||||
d = conn.newstate(Connection.up, now)
|
||||
m = "%s back after being %s for %s" % (conn.afam, lasts, dur(d))
|
||||
log(name, m)
|
||||
if name in watchhosts:
|
||||
email("back", name)
|
||||
pushover("%s is back" % name)
|
||||
email("%s back" % conn.afam, name)
|
||||
pushover("%s %s is back" % (name, conn.afam))
|
||||
|
||||
if boot or newh:
|
||||
host.upcount = host.doesack
|
||||
else:
|
||||
@@ -418,19 +423,14 @@ def readsock(sock):
|
||||
|
||||
|
||||
if shutdown:
|
||||
log(name, "shutdown")
|
||||
log(name, "%s shutdown" % conn.afam)
|
||||
if name in watchhosts:
|
||||
email("shutdown", "%s shutdown" % name)
|
||||
pushover("%s hbc shutdown" % name)
|
||||
try:
|
||||
host.newstate(Host.down)
|
||||
except:
|
||||
pass
|
||||
email("shutdown", "%s %s shutdown" % (name, conn.afam))
|
||||
pushover("%s %s shutdown" % (name, conn.afam))
|
||||
conn.newstate(Connection.down, now)
|
||||
|
||||
if interval > 0:
|
||||
try:
|
||||
host.interval = interval
|
||||
except:
|
||||
pass
|
||||
host.interval = interval
|
||||
|
||||
rmsg = {'time': time.time()}
|
||||
op = 'ACK'
|
||||
@@ -487,7 +487,7 @@ def updatecode(ucode, uname):
|
||||
m.update(new_code)
|
||||
icsum = m.hexdigest()
|
||||
rmsg = {'csum': icsum, 'code': new_code.encode('base64','strict') }
|
||||
hosts[uname].cmds.append(('UPD',rmsg))
|
||||
Host.hosts[uname].cmds.append(('UPD',rmsg))
|
||||
return fail
|
||||
#
|
||||
#
|
||||
@@ -535,20 +535,22 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
|
||||
def buildpage(self):
|
||||
res=self.buildhead(refresh=60, extras=tcss)
|
||||
res.append("<H2>Heartbeat status %s</h2><h4> %s (%s)</H4>" % (VER, time.strftime("%H:%M:%S", time.localtime(now)), os.environ.get('TZ', 'CET-1CDT')))
|
||||
res.append("<H2>Heartbeat status %s</h2>" % VER)
|
||||
|
||||
res.append('<table id="ntable" class="sortable">')
|
||||
hosts_sorted = hosts.keys()
|
||||
hosts_sorted.sort()
|
||||
res.append(hosts[hosts_sorted[0]].htmldisp(True))
|
||||
for h in hosts_sorted:
|
||||
res.append(hosts[h].htmldisp())
|
||||
res.append(ubHost.htmlheaders())
|
||||
hosts_sorted = Host.hosts.keys()
|
||||
if len(hosts_sorted):
|
||||
hosts_sorted.sort()
|
||||
for h in hosts_sorted:
|
||||
res.append(Host.hosts[h].htmldisp())
|
||||
res.append("</table>")
|
||||
|
||||
le = max(40 - len(hosts), 3)
|
||||
le = max(40 - len(Host.hosts), 3)
|
||||
res.append("<h4>Log of Events</h4>")
|
||||
for m in msgs[len(msgs)-le:]:
|
||||
res.append("%s<BR>" % m)
|
||||
res.append('<p> %s (%s)</p>' % (time.strftime("%H:%M:%S", time.localtime(now)), os.environ.get('TZ', 'CET-1CDT')))
|
||||
res.append("</body></html>")
|
||||
return res
|
||||
|
||||
@@ -597,9 +599,9 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
uname=uarg[0][2:]
|
||||
if uarg[1][:2] == "c=":
|
||||
ucmd=uarg[1][2:]
|
||||
if ucmd != "" and uname != "" and hosts.has_key(uname):
|
||||
if ucmd != "" and uname != "" and Host.hosts.has_key(uname):
|
||||
rmsg = {'cmd': urllib.unquote(ucmd)}
|
||||
hosts[uname].cmds.append(('CMD', rmsg))
|
||||
Host.hosts[uname].cmds.append(('CMD', rmsg))
|
||||
res=self.buildhead()
|
||||
res.append("2Done")
|
||||
else:
|
||||
@@ -615,9 +617,9 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
else:
|
||||
if uarg[0][:2] == "h=":
|
||||
uname=uarg[0][2:]
|
||||
if uname != "" and hosts.has_key(uname):
|
||||
if uname != "" and Host.hosts.has_key(uname):
|
||||
log(uname, "dropped")
|
||||
del hosts[uname]
|
||||
del Host.hosts[uname]
|
||||
res=self.buildhead()
|
||||
res.append("Done")
|
||||
|
||||
@@ -630,8 +632,8 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
res=self.buildhead()
|
||||
if uarg[0][:2] == "h=":
|
||||
uname=uarg[0][2:]
|
||||
if uname != "" and hosts.has_key(uname):
|
||||
err = nsupdate(uname, hosts[uname].addr)
|
||||
if uname != "" and Host.hosts.has_key(uname):
|
||||
err = nsupdate(uname, Host.hosts[uname].addr)
|
||||
ll="nsupdate request: %s" % err
|
||||
else:
|
||||
ll="name %s not found" % uname
|
||||
@@ -650,7 +652,7 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
uname=uarg[0][2:]
|
||||
if uarg[1][:2] == "c=":
|
||||
ucode=uarg[1][2:]
|
||||
if ucode != "" and uname != "" and hosts.has_key(uname):
|
||||
if ucode != "" and uname != "" and Host.hosts.has_key(uname):
|
||||
err = updatecode(ucode, urllib.unquote(uname))
|
||||
res=self.buildhead()
|
||||
res.append("3 Done: %s" % err if err else "OK")
|
||||
@@ -664,8 +666,8 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
elif upar[0] == "/api/0/hosts": # api access to host table
|
||||
headerdict = {"Content-Type": "application/json; charset=utf-8" }
|
||||
l=[]
|
||||
for h in hosts:
|
||||
l.append(hosts[h].jsons())
|
||||
for h in Host.hosts:
|
||||
l.append(Host.hosts[h].jsons())
|
||||
res=["[",",".join(l),"]"]
|
||||
elif upar[0] == "/api/0/messages": # api access to host table
|
||||
headerdict = {"Content-Type": "application/json; charset=utf-8" }
|
||||
@@ -739,7 +741,7 @@ def closeup():
|
||||
|
||||
|
||||
def restart():
|
||||
print "execv %s %s" % (sys.argv[0], [sys.argv[0]]+cmdargs)
|
||||
if verbose: print "execv %s %s" % (sys.argv[0], [sys.argv[0]]+cmdargs)
|
||||
os.execv(sys.argv[0], [sys.argv[0]]+cmdargs)
|
||||
print "should not be here"
|
||||
|
||||
@@ -749,10 +751,9 @@ def saveandrestart():
|
||||
|
||||
|
||||
def pickleit():
|
||||
pickf = open(PICKFILE, 'w')
|
||||
pickf = open(pickfile, 'w')
|
||||
pick = cPickle.Pickler(pickf)
|
||||
pick.dump(hosts)
|
||||
pick.dump(htab)
|
||||
pick.dump(Host.hosts)
|
||||
pick.dump(msgs)
|
||||
pick.dump(lastfm)
|
||||
pickf.close()
|
||||
@@ -820,6 +821,7 @@ grace = 2
|
||||
hb_port = PORT
|
||||
hbd_host = THOST
|
||||
hbd_port = TPORT
|
||||
pickfile = PICKFILE
|
||||
logfile = LOGFILE
|
||||
logfmt = "text"
|
||||
interval = INTERVAL
|
||||
@@ -844,26 +846,30 @@ if f:
|
||||
if verbose:
|
||||
print " %s" % l[:-1]
|
||||
r = l[:-1].split('=')
|
||||
if r[0] == 'interval':
|
||||
interval = eval(r[1])
|
||||
elif r[0] == 'grace':
|
||||
grace = eval(r[1])
|
||||
elif r[0] == 'hbd_port':
|
||||
hbd_port = eval(r[1])
|
||||
elif r[0] == 'hbd_host':
|
||||
hbd_host = eval(r[1])
|
||||
elif r[0] == 'hb_port':
|
||||
hb_port = eval(r[1])
|
||||
elif r[0] == 'logfile':
|
||||
logfile = eval(r[1])
|
||||
elif r[0] == 'logfmt':
|
||||
logfmt = eval(r[1])
|
||||
elif r[0] == 'watchhosts':
|
||||
watchhosts = eval(r[1])
|
||||
elif r[0] == 'dyndnshosts':
|
||||
dyndnshosts = eval(r[1])
|
||||
elif r[0] == 'drophosts':
|
||||
drophosts = eval(r[1])
|
||||
o = r[0].strip()
|
||||
a = eval(r[1].strip())
|
||||
if o == 'interval':
|
||||
interval = a
|
||||
elif o == 'grace':
|
||||
grace = a
|
||||
elif o == 'hbd_port':
|
||||
hbd_port = a
|
||||
elif o == 'hbd_host':
|
||||
hbd_host = a
|
||||
elif o == 'pickfile':
|
||||
pickfile = a
|
||||
elif o == 'hb_port':
|
||||
hb_port = a
|
||||
elif o == 'logfile':
|
||||
logfile = a
|
||||
elif o == 'logfmt':
|
||||
logfmt = a
|
||||
elif o == 'watchhosts':
|
||||
watchhosts = a
|
||||
elif o == 'dyndnshosts':
|
||||
dyndnshosts = a
|
||||
elif o == 'drophosts':
|
||||
drophosts = a
|
||||
f.close()
|
||||
|
||||
if len(args) != 0:
|
||||
@@ -875,12 +881,12 @@ if verbose:
|
||||
print "notice: logging to %s" % logfile
|
||||
logf = initlog(logfile)
|
||||
|
||||
if os.path.exists(PICKFILE):
|
||||
pickf = open(PICKFILE, 'r')
|
||||
if 1 and os.path.exists(pickfile):
|
||||
if verbose: print "opening pickls %s" % pickfile
|
||||
pickf = open(pickfile, 'r')
|
||||
pick = cPickle.Unpickler(pickf)
|
||||
try:
|
||||
hosts = pick.load()
|
||||
htab = pick.load()
|
||||
Host.hosts = pick.load()
|
||||
msgs = pick.load()
|
||||
try:
|
||||
lastfm = pick.load()
|
||||
@@ -888,12 +894,17 @@ if os.path.exists(PICKFILE):
|
||||
lastfm = ["","",""]
|
||||
pickf.close()
|
||||
except:
|
||||
os.unlink(PICKFILE)
|
||||
for h in hosts.keys():
|
||||
hosts[h].fixup()
|
||||
os.unlink(pickfile)
|
||||
Connection.htab = {}
|
||||
for h in Host.hosts.keys():
|
||||
Host.hosts[h].dyn = h in dyndnshosts
|
||||
Host.hosts[h].fixup()
|
||||
for h in drophosts:
|
||||
if h in hosts:
|
||||
del hosts[h]
|
||||
if h in Host.hosts:
|
||||
del Host.hosts[h]
|
||||
if verbose: print "%s pickled hosts loaded" % len(Host.hosts)
|
||||
else:
|
||||
if verbose: print "no pickled data"
|
||||
|
||||
|
||||
now = time.time()
|
||||
@@ -999,8 +1010,8 @@ while running:
|
||||
ts=time.strftime(tsfm[v], time.localtime(now))
|
||||
if ts != lastfm[v]:
|
||||
lastfm[v]=ts
|
||||
for h in hosts.keys():
|
||||
hosts[h].hdwcounts[v] = [hosts[h].doesack, hosts[h].upcount]
|
||||
for h in Host.hosts.keys():
|
||||
Host.hosts[h].hdwcounts[v] = [Host.hosts[h].doesack, Host.hosts[h].upcount]
|
||||
|
||||
if now >= next and now >= firstcheck:
|
||||
next = now+1
|
||||
|
||||
Reference in New Issue
Block a user