feat: async subprocess in nagios_runner with stderr capture and signal handling
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -21,8 +21,9 @@ nagios_runner:
|
||||
```
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from hbd.client.plugin import MonitorPlugin
|
||||
@@ -76,7 +77,6 @@ class NagiosRunnerPlugin(MonitorPlugin):
|
||||
# Extract configuration
|
||||
self.commands: List[Dict[str, str]] = config.get("commands", []) if config else []
|
||||
self.timeout: int = config.get("timeout", 30) if config else 30
|
||||
self.shell: bool = config.get("shell", True) if config else True
|
||||
self.interval = config.get("interval", 300) if config else 300
|
||||
|
||||
async def initialize(self) -> bool:
|
||||
@@ -135,7 +135,7 @@ class NagiosRunnerPlugin(MonitorPlugin):
|
||||
for metric_name, metric_value in perfdata.items():
|
||||
results[f"{name}_{metric_name}"] = metric_value
|
||||
|
||||
self.logger.debug(
|
||||
self.logger.info(
|
||||
f"Executed {name}: {STATUS_NAMES.get(status_code, 'UNKNOWN')} - {output[:50]}"
|
||||
)
|
||||
|
||||
@@ -157,46 +157,49 @@ class NagiosRunnerPlugin(MonitorPlugin):
|
||||
self,
|
||||
command: str
|
||||
) -> Tuple[int, str, Dict[str, Any]]:
|
||||
"""Execute a Nagios plugin and parse its output.
|
||||
|
||||
Args:
|
||||
command: Command string to execute
|
||||
|
||||
Returns:
|
||||
Tuple of (status_code, output_message, performance_data_dict)
|
||||
"""
|
||||
"""Execute a Nagios plugin and parse its output."""
|
||||
try:
|
||||
# Run command
|
||||
result = subprocess.run(
|
||||
proc = await asyncio.create_subprocess_shell(
|
||||
command,
|
||||
shell=self.shell,
|
||||
capture_output=True,
|
||||
timeout=self.timeout,
|
||||
text=True
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
|
||||
status_code = result.returncode
|
||||
output = result.stdout.strip()
|
||||
|
||||
# Nagios plugins can return codes > 3, treat as UNKNOWN
|
||||
try:
|
||||
stdout_bytes, stderr_bytes = await asyncio.wait_for(
|
||||
proc.communicate(), timeout=self.timeout
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
proc.kill()
|
||||
await proc.communicate()
|
||||
self.logger.error(f"Command timed out: {command}")
|
||||
return NAGIOS_UNKNOWN, f"Command timed out after {self.timeout}s", {}
|
||||
|
||||
status_code = proc.returncode
|
||||
|
||||
if status_code < 0:
|
||||
return NAGIOS_UNKNOWN, f"Process killed by signal {-status_code}", {}
|
||||
|
||||
if status_code > 3:
|
||||
status_code = NAGIOS_UNKNOWN
|
||||
|
||||
# Parse performance data
|
||||
perfdata = self._parse_perfdata(output)
|
||||
|
||||
# Extract just the status message (before the pipe if present)
|
||||
if '|' in output:
|
||||
output_msg = output.split('|')[0].strip()
|
||||
|
||||
stdout = stdout_bytes.decode(errors="replace").strip()
|
||||
stderr = stderr_bytes.decode(errors="replace").strip()
|
||||
|
||||
# Parse perfdata from stdout before mixing in stderr
|
||||
perfdata = self._parse_perfdata(stdout)
|
||||
|
||||
# Build status message
|
||||
status_part = stdout.split('|')[0].strip() if '|' in stdout else stdout
|
||||
|
||||
if not stdout and stderr:
|
||||
output_msg = stderr
|
||||
elif stdout and stderr:
|
||||
output_msg = f"{status_part} [stderr: {stderr}]"
|
||||
else:
|
||||
output_msg = output
|
||||
|
||||
output_msg = status_part
|
||||
|
||||
return status_code, output_msg, perfdata
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
self.logger.error(f"Command timed out: {command}")
|
||||
return NAGIOS_UNKNOWN, f"Command timed out after {self.timeout}s", {}
|
||||
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error executing command: {e}")
|
||||
return NAGIOS_UNKNOWN, f"Execution error: {str(e)}", {}
|
||||
|
||||
Reference in New Issue
Block a user