93 lines
2.7 KiB
Python
93 lines
2.7 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import subprocess
|
|
import json
|
|
import sys
|
|
import argparse
|
|
from datetime import datetime
|
|
from fnmatch import fnmatch
|
|
|
|
DEFAULT_WHITELIST = [
|
|
"emergency.service",
|
|
"rescue.service",
|
|
"rc-local.service",
|
|
"getty@tty1.service",
|
|
"systemd-udev-settle.service",
|
|
"systemd-pcrphase*.service",
|
|
"apt-daily*.service",
|
|
"fstrim.service",
|
|
"man-db.service",
|
|
"modprobe@*.service"
|
|
]
|
|
|
|
def get_all_services():
|
|
"""Pobiera wszystkie jednostki systemd typu service jako JSON."""
|
|
cmd = [
|
|
"systemctl", "list-units", "--type=service", "--all",
|
|
"--no-legend", "--no-pager", "--output=json"
|
|
]
|
|
try:
|
|
output = subprocess.check_output(cmd, text=True)
|
|
services = json.loads(output)
|
|
return services
|
|
except Exception as e:
|
|
print(f"UNKNOWN - systemctl JSON parse failed: {e}")
|
|
sys.exit(3)
|
|
|
|
def is_whitelisted(name, whitelist):
|
|
return any(fnmatch(name, pattern) for pattern in whitelist)
|
|
|
|
def check_services(services, whitelist):
|
|
problems = []
|
|
for svc in services:
|
|
name = svc.get("unit")
|
|
load = svc.get("load")
|
|
active = svc.get("active")
|
|
sub = svc.get("sub")
|
|
|
|
if not name or name == "●":
|
|
continue # Pomijamy nieczytelne/uszkodzone wpisy
|
|
|
|
if is_whitelisted(name, whitelist):
|
|
continue
|
|
|
|
if active == "failed" or sub == "failed":
|
|
problems.append((name, active, sub))
|
|
elif active not in ["active", "inactive"]:
|
|
problems.append((name, active, sub))
|
|
elif active == "inactive" and sub not in ["dead", "exited"]:
|
|
problems.append((name, active, sub))
|
|
return problems
|
|
|
|
def format_output(problems):
|
|
if not problems:
|
|
return "OK - All systemd services are healthy", 0
|
|
summary = ", ".join(f"{name}: {active}/{sub}" for name, active, sub in problems)
|
|
return f"CRITICAL - Problematic services: {summary}", 2
|
|
|
|
def log_result(message, logfile="/var/log/nagios_systemd_check.log"):
|
|
try:
|
|
with open(logfile, "a") as f:
|
|
f.write(f"[{datetime.now()}] {message}\n")
|
|
except Exception:
|
|
pass
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("-w", "--whitelist", nargs="*", default=DEFAULT_WHITELIST,
|
|
help="Whitelist of service patterns to ignore (fnmatch supported)")
|
|
parser.add_argument("--log", action="store_true", help="Log result to /var/log/nagios_systemd_check.log")
|
|
args = parser.parse_args()
|
|
|
|
services = get_all_services()
|
|
problems = check_services(services, args.whitelist)
|
|
message, code = format_output(problems)
|
|
|
|
print(message)
|
|
if args.log:
|
|
log_result(message)
|
|
sys.exit(code)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|