From 859746893ae46fb00d93db370bbd4ca204e8ea92 Mon Sep 17 00:00:00 2001 From: gru Date: Sat, 7 Jun 2025 22:23:56 +0200 Subject: [PATCH] Add check_systemd_services.py --- check_systemd_services.py | 92 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 check_systemd_services.py diff --git a/check_systemd_services.py b/check_systemd_services.py new file mode 100644 index 0000000..b44c357 --- /dev/null +++ b/check_systemd_services.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 + +import subprocess +import json +import sys +import argparse +from datetime import datetime +from fnmatch import fnmatch + +DEFAULT_WHITELIST = [ + "emergency.service", + "rescue.service", + "rc-local.service", + "getty@tty1.service", + "systemd-udev-settle.service", + "systemd-pcrphase*.service", + "apt-daily*.service", + "fstrim.service", + "man-db.service", + "modprobe@*.service" +] + +def get_all_services(): + """Pobiera wszystkie jednostki systemd typu service jako JSON.""" + cmd = [ + "systemctl", "list-units", "--type=service", "--all", + "--no-legend", "--no-pager", "--output=json" + ] + try: + output = subprocess.check_output(cmd, text=True) + services = json.loads(output) + return services + except Exception as e: + print(f"UNKNOWN - systemctl JSON parse failed: {e}") + sys.exit(3) + +def is_whitelisted(name, whitelist): + return any(fnmatch(name, pattern) for pattern in whitelist) + +def check_services(services, whitelist): + problems = [] + for svc in services: + name = svc.get("unit") + load = svc.get("load") + active = svc.get("active") + sub = svc.get("sub") + + if not name or name == "●": + continue # Pomijamy nieczytelne/uszkodzone wpisy + + if is_whitelisted(name, whitelist): + continue + + if active == "failed" or sub == "failed": + problems.append((name, active, sub)) + elif active not in ["active", "inactive"]: + problems.append((name, active, sub)) + elif active == "inactive" and sub not in ["dead", "exited"]: + problems.append((name, active, sub)) + return problems + +def format_output(problems): + if not problems: + return "OK - All systemd services are healthy", 0 + summary = ", ".join(f"{name}: {active}/{sub}" for name, active, sub in problems) + return f"CRITICAL - Problematic services: {summary}", 2 + +def log_result(message, logfile="/var/log/nagios_systemd_check.log"): + try: + with open(logfile, "a") as f: + f.write(f"[{datetime.now()}] {message}\n") + except Exception: + pass + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-w", "--whitelist", nargs="*", default=DEFAULT_WHITELIST, + help="Whitelist of service patterns to ignore (fnmatch supported)") + parser.add_argument("--log", action="store_true", help="Log result to /var/log/nagios_systemd_check.log") + args = parser.parse_args() + + services = get_all_services() + problems = check_services(services, args.whitelist) + message, code = format_output(problems) + + print(message) + if args.log: + log_result(message) + sys.exit(code) + +if __name__ == "__main__": + main()