improve logging

This commit is contained in:
2016-04-29 04:17:33 +02:00
parent f36b8b979e
commit b11e2248f5
+37 -26
View File
@@ -16,6 +16,7 @@ import md5
import shutil import shutil
import zlib import zlib
import subprocess import subprocess
import syslog
try: try:
import lockfile import lockfile
@@ -39,12 +40,20 @@ except:
# which on FreeBSD in close to a million # which on FreeBSD in close to a million
# hack: replace the function in daemon with ths one: # hack: replace the function in daemon with ths one:
def log(msg):
if fdaemon:
syslog.syslog(syslog.LOG_ERR, msg)
else:
print msg
def get_maximum_file_descriptors(): def get_maximum_file_descriptors():
return 2048 return 2048
daemon.get_maximum_file_descriptors = get_maximum_file_descriptors daemon.get_maximum_file_descriptors = get_maximum_file_descriptors
import syslog
PORT = 50003 PORT = 50003
@@ -87,7 +96,8 @@ class Conn:
msg['time'] = time.time() msg['time'] = time.time()
m = dicttos(ID, msg) m = dicttos(ID, msg)
mz = zlib.compress(m,9) mz = zlib.compress(m,9)
if verbose: print "conn.send('%s', (%s:%s) %s>%s)" % (msg, self.addr, self.port, len(m), len(mz)) if verbose:
log("conn.send('%s', (%s:%s) %s>%s)" % (msg, self.addr, self.port, len(m), len(mz)))
try: try:
self.sock.sendto(m, (self.addr, self.port)) self.sock.sendto(m, (self.addr, self.port))
except socket.error as e: except socket.error as e:
@@ -101,7 +111,7 @@ class Conn:
def ack(self, msgDict): def ack(self, msgDict):
self.lastack = time.time() self.lastack = time.time()
self.lastacksent = float(msgDict.get('time','0')) self.lastacksent = float(msgDict.get('time','0'))
if verbose: print "ack RTT: %0.1f ms" % ((self.lastack - self.lastsend) * 1000.0) if verbose: log("ack RTT: %0.1f ms" % ((self.lastack - self.lastsend) * 1000.0))
self.rtts.append((self.lastack - self.lastsend) * 1000.0) self.rtts.append((self.lastack - self.lastsend) * 1000.0)
if len(self.rtts) > 10: if len(self.rtts) > 10:
del self.rtts[0] del self.rtts[0]
@@ -165,10 +175,9 @@ def stodict(msg):
def syslogtrace(note): def syslogtrace(note):
logm = '%s hbc died: \n%s' % (note, traceback.format_exc()) logm = '%s hbc died: \n%s' % (note, traceback.format_exc())
log(logm)
for l in logm.split('\n'): for l in logm.split('\n'):
syslog.syslog(syslog.LOG_ERR, ' tb: %s' % l) log(' tb: %s' % l)
if verbose:
print logm
@@ -176,21 +185,21 @@ conId = 1
def createConnections(hosts): def createConnections(hosts):
global conId global conId
for host in hosts: for host in hosts:
if verbose: print "createConnections for %s" % host if verbose: log("createConnections for %s" % host)
try: try:
rs=socket.getaddrinfo(host, hb_port, 0, 0, socket.SOL_UDP) rs=socket.getaddrinfo(host, hb_port, 0, 0, socket.SOL_UDP)
except socket.gaierror: except socket.gaierror:
logm = '%s hbc died: \n%s' % ('createConnections', traceback.format_exc()) logm = '%s hbc died: \n%s' % ('createConnections', traceback.format_exc())
if verbose: print logm if verbose: log(logm)
return None return None
for r in rs: for r in rs:
if verbose: print "address %s" % str(r) if verbose: log("address %s" % str(r))
if r[0] in [10, 24, 28, 30]: # for Linux, NetBSD, FreeBSD if r[0] in [10, 24, 28, 30]: # for Linux, NetBSD, FreeBSD
af=socket.AF_INET6 af=socket.AF_INET6
elif r[0] == 2: elif r[0] == 2:
af=socket.AF_INET af=socket.AF_INET
else: else:
print "dont know this net type: %s" % r[0][0] log("dont know this net type: %s" % r[0][0])
sys.exit(1) sys.exit(1)
addr = r[4][0] addr = r[4][0]
@@ -226,7 +235,7 @@ def doupdate(conn, msgDict):
msg={'service': 'update', 'msg': fail if fail else "OK"} msg={'service': 'update', 'msg': fail if fail else "OK"}
conns[conn].sendto(msg) conns[conn].sendto(msg)
if not fail: if not fail:
syslog.syslog(syslog.LOG_ERR, 'hc updates, fs = %s' % (len(code))) log('hc updates, fs = %s' % (len(code)))
return fail return fail
@@ -257,16 +266,14 @@ def doupdateone(code, csum):
def restart(): def restart():
if verbose: log('restart %s' % (sys.argv[0]))
print "restart: execv %s %s" % (sys.argv[0], [sys.argv[0]]+cmdargs)
syslog.syslog(syslog.LOG_ERR, 'restart %s' % (sys.argv[0]))
e = "fallthrough" e = "fallthrough"
try: try:
os.execv(sys.argv[0], [sys.argv[0]]+cmdargs) os.execv(sys.argv[0], [sys.argv[0]]+cmdargs)
except Exception as e: except Exception as e:
pass pass
print "should not be here:", str(e) print "should not be here:", str(e)
syslog.syslog(syslog.LOG_ERR, 'restart failed: %s' % e) log('restart failed: %s' % e)
def process(): def process():
@@ -282,7 +289,6 @@ def process():
while running: while running:
while time.time() < nextReport: while time.time() < nextReport:
sleep=nextReport - time.time() sleep=nextReport - time.time()
if verbose: print "process: sleep %s" % sleep
if sleep <= 0: if sleep <= 0:
break break
try: try:
@@ -291,7 +297,7 @@ def process():
running = False running = False
break break
except SystemExit: except SystemExit:
syslog.syslog(syslog.LOG_ERR, 'daemon exit, running=: %s' % running) log('daemon exit, running=: %s' % running)
if running: if running:
running = False running = False
break break
@@ -300,7 +306,6 @@ def process():
syslogtrace('select') syslogtrace('select')
running = False running = False
break break
if verbose: print "process: r is %s" % str(r)
for rfh in r[0]: for rfh in r[0]:
conn = conIds[rfh] conn = conIds[rfh]
data, addr = ifiles[rfh].recvfrom(MAXRECV) data, addr = ifiles[rfh].recvfrom(MAXRECV)
@@ -329,11 +334,11 @@ def process():
conns[conn].sendto(msg) conns[conn].sendto(msg)
nextReport = time.time() + interval nextReport = time.time() + interval
if verbose: print "process: done running" if verbose: log( "process: done running")
def cleanup(): def cleanup():
global running global running
if verbose: syslog.syslog(syslog.LOG_ERR, 'cleanup') if verbose: log('cleanup')
running = False running = False
for conn in conns: for conn in conns:
msg={'shutdown': 1, 'acks': conns[conn].ackcount} msg={'shutdown': 1, 'acks': conns[conn].ackcount}
@@ -344,7 +349,7 @@ def cleanup():
def closeall(): def closeall():
if verbose: syslog.syslog(syslog.LOG_ERR, 'closecall') if verbose: log('closecall')
for conn in conns: for conn in conns:
conns[conn].close() conns[conn].close()
@@ -468,9 +473,16 @@ if not msgonly:
conns = {} conns = {}
createConnections(hb_hosts) while True:
log(" create connections")
createConnections(hb_hosts)
if len(conns) != 0:
break
log(" no connections yet, sleep a bit")
time.sleep(2)
if verbose: print "%s connections created" % (len(conns)) if verbose:
log("%s connections created" % (len(conns)))
if len(msgboot) > 0: if len(msgboot) > 0:
if verbose: print "on boot" if verbose: print "on boot"
@@ -493,7 +505,6 @@ if fdaemon:
opid = pidfile.read_pid() opid = pidfile.read_pid()
except: except:
opid = None opid = None
if verbose: print "opid %s" % opid
if opid: if opid:
try: try:
@@ -527,7 +538,7 @@ if fdaemon:
for conn in conns: for conn in conns:
context.files_preserve += [conns[conn].sock, conns[conn].sock.fileno()] context.files_preserve += [conns[conn].sock, conns[conn].sock.fileno()]
with context: with context:
syslog.syslog(syslog.LOG_ERR, 'starting heartbeat to %s' % ','.join(hb_hosts)) log('starting heartbeat to %s' % ','.join(hb_hosts))
running = True running = True
try: try:
process() process()
@@ -542,7 +553,7 @@ else:
except Exception as e: except Exception as e:
if verbose: print "err: process exit: %s" % e if verbose: print "err: process exit: %s" % e
syslogtrace('process') syslogtrace('process')
if verbose: print "main: cleanup" if verbose: log( "main: cleanup")
cleanup() cleanup()
if dorestart: if dorestart:
restart() restart()