improve logging
This commit is contained in:
@@ -16,6 +16,7 @@ import md5
|
|||||||
import shutil
|
import shutil
|
||||||
import zlib
|
import zlib
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import syslog
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import lockfile
|
import lockfile
|
||||||
@@ -39,12 +40,20 @@ except:
|
|||||||
# which on FreeBSD in close to a million
|
# which on FreeBSD in close to a million
|
||||||
# hack: replace the function in daemon with ths one:
|
# hack: replace the function in daemon with ths one:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def log(msg):
|
||||||
|
if fdaemon:
|
||||||
|
syslog.syslog(syslog.LOG_ERR, msg)
|
||||||
|
else:
|
||||||
|
print msg
|
||||||
|
|
||||||
|
|
||||||
def get_maximum_file_descriptors():
|
def get_maximum_file_descriptors():
|
||||||
return 2048
|
return 2048
|
||||||
|
|
||||||
daemon.get_maximum_file_descriptors = get_maximum_file_descriptors
|
daemon.get_maximum_file_descriptors = get_maximum_file_descriptors
|
||||||
|
|
||||||
import syslog
|
|
||||||
|
|
||||||
|
|
||||||
PORT = 50003
|
PORT = 50003
|
||||||
@@ -87,7 +96,8 @@ class Conn:
|
|||||||
msg['time'] = time.time()
|
msg['time'] = time.time()
|
||||||
m = dicttos(ID, msg)
|
m = dicttos(ID, msg)
|
||||||
mz = zlib.compress(m,9)
|
mz = zlib.compress(m,9)
|
||||||
if verbose: print "conn.send('%s', (%s:%s) %s>%s)" % (msg, self.addr, self.port, len(m), len(mz))
|
if verbose:
|
||||||
|
log("conn.send('%s', (%s:%s) %s>%s)" % (msg, self.addr, self.port, len(m), len(mz)))
|
||||||
try:
|
try:
|
||||||
self.sock.sendto(m, (self.addr, self.port))
|
self.sock.sendto(m, (self.addr, self.port))
|
||||||
except socket.error as e:
|
except socket.error as e:
|
||||||
@@ -101,7 +111,7 @@ class Conn:
|
|||||||
def ack(self, msgDict):
|
def ack(self, msgDict):
|
||||||
self.lastack = time.time()
|
self.lastack = time.time()
|
||||||
self.lastacksent = float(msgDict.get('time','0'))
|
self.lastacksent = float(msgDict.get('time','0'))
|
||||||
if verbose: print "ack RTT: %0.1f ms" % ((self.lastack - self.lastsend) * 1000.0)
|
if verbose: log("ack RTT: %0.1f ms" % ((self.lastack - self.lastsend) * 1000.0))
|
||||||
self.rtts.append((self.lastack - self.lastsend) * 1000.0)
|
self.rtts.append((self.lastack - self.lastsend) * 1000.0)
|
||||||
if len(self.rtts) > 10:
|
if len(self.rtts) > 10:
|
||||||
del self.rtts[0]
|
del self.rtts[0]
|
||||||
@@ -165,10 +175,9 @@ def stodict(msg):
|
|||||||
|
|
||||||
def syslogtrace(note):
|
def syslogtrace(note):
|
||||||
logm = '%s hbc died: \n%s' % (note, traceback.format_exc())
|
logm = '%s hbc died: \n%s' % (note, traceback.format_exc())
|
||||||
|
log(logm)
|
||||||
for l in logm.split('\n'):
|
for l in logm.split('\n'):
|
||||||
syslog.syslog(syslog.LOG_ERR, ' tb: %s' % l)
|
log(' tb: %s' % l)
|
||||||
if verbose:
|
|
||||||
print logm
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -176,21 +185,21 @@ conId = 1
|
|||||||
def createConnections(hosts):
|
def createConnections(hosts):
|
||||||
global conId
|
global conId
|
||||||
for host in hosts:
|
for host in hosts:
|
||||||
if verbose: print "createConnections for %s" % host
|
if verbose: log("createConnections for %s" % host)
|
||||||
try:
|
try:
|
||||||
rs=socket.getaddrinfo(host, hb_port, 0, 0, socket.SOL_UDP)
|
rs=socket.getaddrinfo(host, hb_port, 0, 0, socket.SOL_UDP)
|
||||||
except socket.gaierror:
|
except socket.gaierror:
|
||||||
logm = '%s hbc died: \n%s' % ('createConnections', traceback.format_exc())
|
logm = '%s hbc died: \n%s' % ('createConnections', traceback.format_exc())
|
||||||
if verbose: print logm
|
if verbose: log(logm)
|
||||||
return None
|
return None
|
||||||
for r in rs:
|
for r in rs:
|
||||||
if verbose: print "address %s" % str(r)
|
if verbose: log("address %s" % str(r))
|
||||||
if r[0] in [10, 24, 28, 30]: # for Linux, NetBSD, FreeBSD
|
if r[0] in [10, 24, 28, 30]: # for Linux, NetBSD, FreeBSD
|
||||||
af=socket.AF_INET6
|
af=socket.AF_INET6
|
||||||
elif r[0] == 2:
|
elif r[0] == 2:
|
||||||
af=socket.AF_INET
|
af=socket.AF_INET
|
||||||
else:
|
else:
|
||||||
print "dont know this net type: %s" % r[0][0]
|
log("dont know this net type: %s" % r[0][0])
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
addr = r[4][0]
|
addr = r[4][0]
|
||||||
@@ -226,7 +235,7 @@ def doupdate(conn, msgDict):
|
|||||||
msg={'service': 'update', 'msg': fail if fail else "OK"}
|
msg={'service': 'update', 'msg': fail if fail else "OK"}
|
||||||
conns[conn].sendto(msg)
|
conns[conn].sendto(msg)
|
||||||
if not fail:
|
if not fail:
|
||||||
syslog.syslog(syslog.LOG_ERR, 'hc updates, fs = %s' % (len(code)))
|
log('hc updates, fs = %s' % (len(code)))
|
||||||
|
|
||||||
return fail
|
return fail
|
||||||
|
|
||||||
@@ -257,16 +266,14 @@ def doupdateone(code, csum):
|
|||||||
|
|
||||||
|
|
||||||
def restart():
|
def restart():
|
||||||
if verbose:
|
log('restart %s' % (sys.argv[0]))
|
||||||
print "restart: execv %s %s" % (sys.argv[0], [sys.argv[0]]+cmdargs)
|
|
||||||
syslog.syslog(syslog.LOG_ERR, 'restart %s' % (sys.argv[0]))
|
|
||||||
e = "fallthrough"
|
e = "fallthrough"
|
||||||
try:
|
try:
|
||||||
os.execv(sys.argv[0], [sys.argv[0]]+cmdargs)
|
os.execv(sys.argv[0], [sys.argv[0]]+cmdargs)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pass
|
pass
|
||||||
print "should not be here:", str(e)
|
print "should not be here:", str(e)
|
||||||
syslog.syslog(syslog.LOG_ERR, 'restart failed: %s' % e)
|
log('restart failed: %s' % e)
|
||||||
|
|
||||||
|
|
||||||
def process():
|
def process():
|
||||||
@@ -282,7 +289,6 @@ def process():
|
|||||||
while running:
|
while running:
|
||||||
while time.time() < nextReport:
|
while time.time() < nextReport:
|
||||||
sleep=nextReport - time.time()
|
sleep=nextReport - time.time()
|
||||||
if verbose: print "process: sleep %s" % sleep
|
|
||||||
if sleep <= 0:
|
if sleep <= 0:
|
||||||
break
|
break
|
||||||
try:
|
try:
|
||||||
@@ -291,7 +297,7 @@ def process():
|
|||||||
running = False
|
running = False
|
||||||
break
|
break
|
||||||
except SystemExit:
|
except SystemExit:
|
||||||
syslog.syslog(syslog.LOG_ERR, 'daemon exit, running=: %s' % running)
|
log('daemon exit, running=: %s' % running)
|
||||||
if running:
|
if running:
|
||||||
running = False
|
running = False
|
||||||
break
|
break
|
||||||
@@ -300,7 +306,6 @@ def process():
|
|||||||
syslogtrace('select')
|
syslogtrace('select')
|
||||||
running = False
|
running = False
|
||||||
break
|
break
|
||||||
if verbose: print "process: r is %s" % str(r)
|
|
||||||
for rfh in r[0]:
|
for rfh in r[0]:
|
||||||
conn = conIds[rfh]
|
conn = conIds[rfh]
|
||||||
data, addr = ifiles[rfh].recvfrom(MAXRECV)
|
data, addr = ifiles[rfh].recvfrom(MAXRECV)
|
||||||
@@ -329,11 +334,11 @@ def process():
|
|||||||
conns[conn].sendto(msg)
|
conns[conn].sendto(msg)
|
||||||
nextReport = time.time() + interval
|
nextReport = time.time() + interval
|
||||||
|
|
||||||
if verbose: print "process: done running"
|
if verbose: log( "process: done running")
|
||||||
|
|
||||||
def cleanup():
|
def cleanup():
|
||||||
global running
|
global running
|
||||||
if verbose: syslog.syslog(syslog.LOG_ERR, 'cleanup')
|
if verbose: log('cleanup')
|
||||||
running = False
|
running = False
|
||||||
for conn in conns:
|
for conn in conns:
|
||||||
msg={'shutdown': 1, 'acks': conns[conn].ackcount}
|
msg={'shutdown': 1, 'acks': conns[conn].ackcount}
|
||||||
@@ -344,7 +349,7 @@ def cleanup():
|
|||||||
|
|
||||||
|
|
||||||
def closeall():
|
def closeall():
|
||||||
if verbose: syslog.syslog(syslog.LOG_ERR, 'closecall')
|
if verbose: log('closecall')
|
||||||
for conn in conns:
|
for conn in conns:
|
||||||
conns[conn].close()
|
conns[conn].close()
|
||||||
|
|
||||||
@@ -468,9 +473,16 @@ if not msgonly:
|
|||||||
|
|
||||||
|
|
||||||
conns = {}
|
conns = {}
|
||||||
createConnections(hb_hosts)
|
while True:
|
||||||
|
log(" create connections")
|
||||||
|
createConnections(hb_hosts)
|
||||||
|
if len(conns) != 0:
|
||||||
|
break
|
||||||
|
log(" no connections yet, sleep a bit")
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
if verbose: print "%s connections created" % (len(conns))
|
if verbose:
|
||||||
|
log("%s connections created" % (len(conns)))
|
||||||
|
|
||||||
if len(msgboot) > 0:
|
if len(msgboot) > 0:
|
||||||
if verbose: print "on boot"
|
if verbose: print "on boot"
|
||||||
@@ -493,7 +505,6 @@ if fdaemon:
|
|||||||
opid = pidfile.read_pid()
|
opid = pidfile.read_pid()
|
||||||
except:
|
except:
|
||||||
opid = None
|
opid = None
|
||||||
if verbose: print "opid %s" % opid
|
|
||||||
|
|
||||||
if opid:
|
if opid:
|
||||||
try:
|
try:
|
||||||
@@ -527,7 +538,7 @@ if fdaemon:
|
|||||||
for conn in conns:
|
for conn in conns:
|
||||||
context.files_preserve += [conns[conn].sock, conns[conn].sock.fileno()]
|
context.files_preserve += [conns[conn].sock, conns[conn].sock.fileno()]
|
||||||
with context:
|
with context:
|
||||||
syslog.syslog(syslog.LOG_ERR, 'starting heartbeat to %s' % ','.join(hb_hosts))
|
log('starting heartbeat to %s' % ','.join(hb_hosts))
|
||||||
running = True
|
running = True
|
||||||
try:
|
try:
|
||||||
process()
|
process()
|
||||||
@@ -542,7 +553,7 @@ else:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
if verbose: print "err: process exit: %s" % e
|
if verbose: print "err: process exit: %s" % e
|
||||||
syslogtrace('process')
|
syslogtrace('process')
|
||||||
if verbose: print "main: cleanup"
|
if verbose: log( "main: cleanup")
|
||||||
cleanup()
|
cleanup()
|
||||||
if dorestart:
|
if dorestart:
|
||||||
restart()
|
restart()
|
||||||
|
|||||||
Reference in New Issue
Block a user