diff --git a/service_restarter.py b/service_restarter.py new file mode 100644 index 0000000..afa5222 --- /dev/null +++ b/service_restarter.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python3 +""" +service_restarter.py — Universal service watchdog and auto-restarter for systemd-managed services. + + Features: +- Checks if a service is listening on a given port and IP using TCP/UDP +- Supports multiple check methods: + • listen — direct socket connection (default) + • curl — HTTP GET request using curl + • ping — ICMP echo (TCP only, host-level reachability) + • nc — use netcat to test open port + • cmd — run a custom command given via --check-cmd +- Restarts systemd service if check fails +- Configurable retry attempts and delay between retries +- Permission check (must be run as root) +- Quiet mode for cron use + + Examples: + # Basic TCP check: + ./service_restarter.py --proto tcp --port 631 --service cups + + # With curl: + ./service_restarter.py --proto tcp --port 80 --host 192.168.1.10 --service apache2 --check-method curl + + # With ping and retry config: + ./service_restarter.py --proto tcp --port 22 --service sshd --check-method ping --retries 5 --delay 2 + + # Using custom command: + ./service_restarter.py --proto tcp --port 8080 --service myapp --check-method cmd --check-cmd 'curl -fs http://localhost:8080/health' + + # Cron-friendly: + */5 * * * * root /usr/local/bin/service_restarter.py --proto tcp --port 631 --service cups --quiet +""" + +import socket +import subprocess +import argparse +import sys +import time +import os +import shutil + +def is_tcp_listening(host, port): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(1) + try: + s.connect((host, port)) + return True + except Exception: + return False + +def is_udp_listening(host, port): + with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s: + try: + s.settimeout(1) + s.sendto(b'', (host, port)) + s.recvfrom(1024) + return True + except Exception: + return False + +def check_with_curl(host, port): + try: + result = subprocess.run( + ["curl", "-fs", f"http://{host}:{port}/"], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + timeout=5 + ) + return result.returncode == 0 + except Exception: + return False + +def check_with_ping(host): + try: + subprocess.run(["ping", "-c", "1", "-W", "2", host], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + timeout=3) + return True + except Exception: + return False + +def check_with_nc(host, port): + nc = shutil.which("nc") + if not nc: + return False + try: + subprocess.run([nc, "-z", "-w", "2", host, str(port)], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + timeout=3) + return True + except Exception: + return False + +def check_with_cmd(command): + try: + result = subprocess.run(command, shell=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + timeout=5) + return result.returncode == 0 + except Exception: + return False + +def restart_service(service): + subprocess.run(["systemctl", "restart", service], check=False) + time.sleep(2) + +def log(msg, quiet): + if not quiet: + print(msg) + +def main(): + parser = argparse.ArgumentParser( + description="Check if a service is reachable on a given host/proto/port and restart it via systemd if not." + ) + parser.add_argument("--proto", required=True, choices=["tcp", "udp"], help="Protocol to check: tcp or udp (required)") + parser.add_argument("--port", required=True, type=int, help="Port number to check (required)") + parser.add_argument("--host", default="127.0.0.1", help="IP address to check (default: 127.0.0.1)") + parser.add_argument("--service", required=True, help="Systemd service name to restart if check fails (required)") + parser.add_argument("--check-method", choices=["listen", "curl", "ping", "nc", "cmd"], default="listen", + help="How to check service: listen (default), curl, ping, nc, cmd") + parser.add_argument("--check-cmd", help="Custom shell command to run when --check-method is 'cmd'") + parser.add_argument("--retries", type=int, default=3, help="Number of retries if service is unreachable (default: 3)") + parser.add_argument("--delay", type=int, default=2, help="Delay between retries in seconds (default: 2)") + parser.add_argument("--quiet", action="store_true", help="Suppress all output (for cron)") + + args = parser.parse_args() + + if os.geteuid() != 0: + print("ERROR: This script must be run as root.") + sys.exit(3) + + def run_check(): + if args.check_method == "listen": + if args.proto == "tcp": + return is_tcp_listening(args.host, args.port) + elif args.proto == "udp": + return is_udp_listening(args.host, args.port) + elif args.check_method == "curl": + return check_with_curl(args.host, args.port) + elif args.check_method == "ping": + return check_with_ping(args.host) + elif args.check_method == "nc": + return check_with_nc(args.host, args.port) + elif args.check_method == "cmd": + if not args.check_cmd: + log("ERROR: --check-cmd is required with --check-method cmd", args.quiet) + sys.exit(3) + return check_with_cmd(args.check_cmd) + return False + + for attempt in range(1, args.retries + 1): + if run_check(): + log(f"OK: Service is responding on {args.proto.upper()} {args.host}:{args.port}", args.quiet) + sys.exit(0) + log(f"Attempt {attempt}/{args.retries} failed.", args.quiet) + if attempt < args.retries: + time.sleep(args.delay) + + log(f"FAIL: Service unreachable after {args.retries} attempts. Restarting {args.service}...", args.quiet) + restart_service(args.service) + + # One final check after restart + if run_check(): + log(f"OK: {args.service} is responding after restart.", args.quiet) + sys.exit(0) + else: + log(f"ERROR: {args.service} is still not responding after restart.", args.quiet) + sys.exit(2) + +if __name__ == "__main__": + main()