part 2 of redo: state is in connections

This commit is contained in:
2016-04-26 21:59:15 +02:00
parent b1fef35d03
commit 21f8fe8842
2 changed files with 278 additions and 229 deletions
+110 -99
View File
@@ -37,7 +37,6 @@ SEND_PUSHOVER=True
DEBUG = 0
MAXRECV = 32767
MAXRTTS = 10
LOGFILE = "/home/andreas/public_html/messages/andreas"
PICKFILE = "/var/tmp/hbd.pick"
AEMAIL = ["andreas@wrede.ca"]
@@ -66,6 +65,10 @@ tcss = """<script src="https://home.wrede.ca/pr/sorttable.js"></script>
<style>
#ntable {
border-collapse: collapse;
}
#wide-ntable {
border-collapse: collapse;
width: 100%;
}
@@ -262,11 +265,11 @@ def dur(sec):
def fixsort():
s = hosts.keys()
s = Host.hosts.keys()
s.sort()
x = 0
for n in s:
hosts[n].num = x
Host.hosts[n].num = x
x += 1
#
@@ -290,17 +293,22 @@ def initlog(logfile):
#
#
def checkoverdue():
for h in hosts.keys():
if hosts[h].state == Host.down:
continue
timeout = hosts[h].interval+grace
if hosts[h].state == Host.up and now-hosts[h].lastbeat > timeout:
now = time.time()
for h in Host.hosts.keys():
pmsg = []
for c in Host.hosts[h].connections:
conn = Host.hosts[h].connections[c]
if conn.state == Connection.down:
continue
timeout = Host.hosts[h].interval + grace
if conn.state == Connection.up and (now - conn.lastbeat) > timeout:
conn.newstate(Connection.overdue, now, grace)
pmsg.append(conn.afam)
if pmsg != []:
if h in watchhosts:
email("overdue", "%s is overdue" % h)
pushover("%s is overdue" % h)
hosts[h].newstate(Host.overdue, grace)
log(h, "overdue")
email("overdue", "%s overdue" % ",".join(pmsg))
pushover("%s %s overdue" % (h, join(pmsg)))
lof(h, "%s overdue" % join(pmsg))
def log(host, m, service=None):
@@ -344,10 +352,11 @@ def dnsupdatethread():
#
#
def readsock(sock):
global now
if DEBUG > 3: sys.stderr.write("readsock recfrom start")
data, addr = sock.recvfrom(MAXRECV)
data, addrp = sock.recvfrom(MAXRECV)
now = time.time()
if DEBUG > 2: sys.stderr.write("readsock = %s, %s\n" % (data,addr))
if DEBUG > 2: sys.stderr.write("readsock = %s, %s\n" % (data,addrp))
msg = stodict(data)
if not msg: # Old hbc client
if verbose: print "old hbc:", data
@@ -355,35 +364,30 @@ def readsock(sock):
msg = oldmtodict(data)
else:
oldclient = False
if verbose: print "readsock = %s, %s" % (msg,addr)
if verbose: print "readsock = %s, %s" % (msg,addrp)
addr = addrp[0:2]
name = shortname(msg.get('name', "unknown"))
if not name in hosts: # was: hosts.has_key(name):
if not name in Host.hosts: # was: hosts.has_key(name):
host = Host(name)
if verbose: print "XX: New host, num now %s" % (len(Host.hosts))
newh=True
else:
host = hosts[name]
host = Host.hosts[name]
newh=False
cid = msg.get('id', 0)
if cid not in host.connections:
host.connections[cid] = Connection(name, cid, addr)
conn = host.connections[cid]
rtt = msg.get('rtt',None)
if msg['ID'] == 'HTB':
host.doesack = msg.get('acks', -1)
host.lastbeat = now
host.setcver(msg.get('ver', 0))
interval = msg.get('interval', 0)
interval = int(msg.get('interval', 0))
shutdown = msg.get('shutdown', 0)
service = msg.get('service', "unknown")
message = msg.get('msg', None)
boot = msg.get('boot', 0)
host.latency = now - msg.get('time', 0)
rtt = msg.get('rtt',"up")
conn.rtts.append(rtt)
if len(conn.rtts) > MAXRTTS:
del conn.rtts[0]
if boot:
log(name, "booted")
@@ -396,21 +400,22 @@ def readsock(sock):
email("msg", message)
pushover(message)
res = host.newaddr(addr[0])
conn, res = host.conndata(cid, addr[0], rtt, now)
if res:
log(name, res)
if name in watchhosts:
email("address change", "%s %s" % (host.name, res))
pushover("%s %s" % (host.name, res))
if host.getstate() != Host.up and interval > 0:
lasts = host.state
d = host.newstate(Host.up)
m = "back after being %s for %s" % (lasts, dur(d))
if conn.getstate() != Connection.up and interval > 0:
lasts = conn.state
d = conn.newstate(Connection.up, now)
m = "%s back after being %s for %s" % (conn.afam, lasts, dur(d))
log(name, m)
if name in watchhosts:
email("back", name)
pushover("%s is back" % name)
email("%s back" % conn.afam, name)
pushover("%s %s is back" % (name, conn.afam))
if boot or newh:
host.upcount = host.doesack
else:
@@ -418,19 +423,14 @@ def readsock(sock):
if shutdown:
log(name, "shutdown")
log(name, "%s shutdown" % conn.afam)
if name in watchhosts:
email("shutdown", "%s shutdown" % name)
pushover("%s hbc shutdown" % name)
try:
host.newstate(Host.down)
except:
pass
email("shutdown", "%s %s shutdown" % (name, conn.afam))
pushover("%s %s shutdown" % (name, conn.afam))
conn.newstate(Connection.down, now)
if interval > 0:
try:
host.interval = interval
except:
pass
host.interval = interval
rmsg = {'time': time.time()}
op = 'ACK'
@@ -487,7 +487,7 @@ def updatecode(ucode, uname):
m.update(new_code)
icsum = m.hexdigest()
rmsg = {'csum': icsum, 'code': new_code.encode('base64','strict') }
hosts[uname].cmds.append(('UPD',rmsg))
Host.hosts[uname].cmds.append(('UPD',rmsg))
return fail
#
#
@@ -535,20 +535,22 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
def buildpage(self):
res=self.buildhead(refresh=60, extras=tcss)
res.append("<H2>Heartbeat status %s</h2><h4> %s (%s)</H4>" % (VER, time.strftime("%H:%M:%S", time.localtime(now)), os.environ.get('TZ', 'CET-1CDT')))
res.append("<H2>Heartbeat status %s</h2>" % VER)
res.append('<table id="ntable" class="sortable">')
hosts_sorted = hosts.keys()
hosts_sorted.sort()
res.append(hosts[hosts_sorted[0]].htmldisp(True))
for h in hosts_sorted:
res.append(hosts[h].htmldisp())
res.append(ubHost.htmlheaders())
hosts_sorted = Host.hosts.keys()
if len(hosts_sorted):
hosts_sorted.sort()
for h in hosts_sorted:
res.append(Host.hosts[h].htmldisp())
res.append("</table>")
le = max(40 - len(hosts), 3)
le = max(40 - len(Host.hosts), 3)
res.append("<h4>Log of Events</h4>")
for m in msgs[len(msgs)-le:]:
res.append("%s<BR>" % m)
res.append('<p> %s (%s)</p>' % (time.strftime("%H:%M:%S", time.localtime(now)), os.environ.get('TZ', 'CET-1CDT')))
res.append("</body></html>")
return res
@@ -597,9 +599,9 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
uname=uarg[0][2:]
if uarg[1][:2] == "c=":
ucmd=uarg[1][2:]
if ucmd != "" and uname != "" and hosts.has_key(uname):
if ucmd != "" and uname != "" and Host.hosts.has_key(uname):
rmsg = {'cmd': urllib.unquote(ucmd)}
hosts[uname].cmds.append(('CMD', rmsg))
Host.hosts[uname].cmds.append(('CMD', rmsg))
res=self.buildhead()
res.append("2Done")
else:
@@ -615,9 +617,9 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
else:
if uarg[0][:2] == "h=":
uname=uarg[0][2:]
if uname != "" and hosts.has_key(uname):
if uname != "" and Host.hosts.has_key(uname):
log(uname, "dropped")
del hosts[uname]
del Host.hosts[uname]
res=self.buildhead()
res.append("Done")
@@ -630,8 +632,8 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
res=self.buildhead()
if uarg[0][:2] == "h=":
uname=uarg[0][2:]
if uname != "" and hosts.has_key(uname):
err = nsupdate(uname, hosts[uname].addr)
if uname != "" and Host.hosts.has_key(uname):
err = nsupdate(uname, Host.hosts[uname].addr)
ll="nsupdate request: %s" % err
else:
ll="name %s not found" % uname
@@ -650,7 +652,7 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
uname=uarg[0][2:]
if uarg[1][:2] == "c=":
ucode=uarg[1][2:]
if ucode != "" and uname != "" and hosts.has_key(uname):
if ucode != "" and uname != "" and Host.hosts.has_key(uname):
err = updatecode(ucode, urllib.unquote(uname))
res=self.buildhead()
res.append("3 Done: %s" % err if err else "OK")
@@ -664,8 +666,8 @@ class HttpHandler(BaseHTTPServer.BaseHTTPRequestHandler):
elif upar[0] == "/api/0/hosts": # api access to host table
headerdict = {"Content-Type": "application/json; charset=utf-8" }
l=[]
for h in hosts:
l.append(hosts[h].jsons())
for h in Host.hosts:
l.append(Host.hosts[h].jsons())
res=["[",",".join(l),"]"]
elif upar[0] == "/api/0/messages": # api access to host table
headerdict = {"Content-Type": "application/json; charset=utf-8" }
@@ -739,7 +741,7 @@ def closeup():
def restart():
print "execv %s %s" % (sys.argv[0], [sys.argv[0]]+cmdargs)
if verbose: print "execv %s %s" % (sys.argv[0], [sys.argv[0]]+cmdargs)
os.execv(sys.argv[0], [sys.argv[0]]+cmdargs)
print "should not be here"
@@ -749,10 +751,9 @@ def saveandrestart():
def pickleit():
pickf = open(PICKFILE, 'w')
pickf = open(pickfile, 'w')
pick = cPickle.Pickler(pickf)
pick.dump(hosts)
pick.dump(htab)
pick.dump(Host.hosts)
pick.dump(msgs)
pick.dump(lastfm)
pickf.close()
@@ -820,6 +821,7 @@ grace = 2
hb_port = PORT
hbd_host = THOST
hbd_port = TPORT
pickfile = PICKFILE
logfile = LOGFILE
logfmt = "text"
interval = INTERVAL
@@ -844,26 +846,30 @@ if f:
if verbose:
print " %s" % l[:-1]
r = l[:-1].split('=')
if r[0] == 'interval':
interval = eval(r[1])
elif r[0] == 'grace':
grace = eval(r[1])
elif r[0] == 'hbd_port':
hbd_port = eval(r[1])
elif r[0] == 'hbd_host':
hbd_host = eval(r[1])
elif r[0] == 'hb_port':
hb_port = eval(r[1])
elif r[0] == 'logfile':
logfile = eval(r[1])
elif r[0] == 'logfmt':
logfmt = eval(r[1])
elif r[0] == 'watchhosts':
watchhosts = eval(r[1])
elif r[0] == 'dyndnshosts':
dyndnshosts = eval(r[1])
elif r[0] == 'drophosts':
drophosts = eval(r[1])
o = r[0].strip()
a = eval(r[1].strip())
if o == 'interval':
interval = a
elif o == 'grace':
grace = a
elif o == 'hbd_port':
hbd_port = a
elif o == 'hbd_host':
hbd_host = a
elif o == 'pickfile':
pickfile = a
elif o == 'hb_port':
hb_port = a
elif o == 'logfile':
logfile = a
elif o == 'logfmt':
logfmt = a
elif o == 'watchhosts':
watchhosts = a
elif o == 'dyndnshosts':
dyndnshosts = a
elif o == 'drophosts':
drophosts = a
f.close()
if len(args) != 0:
@@ -875,12 +881,12 @@ if verbose:
print "notice: logging to %s" % logfile
logf = initlog(logfile)
if os.path.exists(PICKFILE):
pickf = open(PICKFILE, 'r')
if 1 and os.path.exists(pickfile):
if verbose: print "opening pickls %s" % pickfile
pickf = open(pickfile, 'r')
pick = cPickle.Unpickler(pickf)
try:
hosts = pick.load()
htab = pick.load()
Host.hosts = pick.load()
msgs = pick.load()
try:
lastfm = pick.load()
@@ -888,12 +894,17 @@ if os.path.exists(PICKFILE):
lastfm = ["","",""]
pickf.close()
except:
os.unlink(PICKFILE)
for h in hosts.keys():
hosts[h].fixup()
os.unlink(pickfile)
Connection.htab = {}
for h in Host.hosts.keys():
Host.hosts[h].dyn = h in dyndnshosts
Host.hosts[h].fixup()
for h in drophosts:
if h in hosts:
del hosts[h]
if h in Host.hosts:
del Host.hosts[h]
if verbose: print "%s pickled hosts loaded" % len(Host.hosts)
else:
if verbose: print "no pickled data"
now = time.time()
@@ -999,8 +1010,8 @@ while running:
ts=time.strftime(tsfm[v], time.localtime(now))
if ts != lastfm[v]:
lastfm[v]=ts
for h in hosts.keys():
hosts[h].hdwcounts[v] = [hosts[h].doesack, hosts[h].upcount]
for h in Host.hosts.keys():
Host.hosts[h].hdwcounts[v] = [Host.hosts[h].doesack, Host.hosts[h].upcount]
if now >= next and now >= firstcheck:
next = now+1