Add service_restarter.py

This commit is contained in:
gru
2025-06-17 00:15:53 +02:00
parent 7677e337e3
commit ef8eba61cd

175
service_restarter.py Normal file
View File

@ -0,0 +1,175 @@
#!/usr/bin/env python3
"""
service_restarter.py — Universal service watchdog and auto-restarter for systemd-managed services.
Features:
- Checks if a service is listening on a given port and IP using TCP/UDP
- Supports multiple check methods:
• listen — direct socket connection (default)
• curl — HTTP GET request using curl
• ping — ICMP echo (TCP only, host-level reachability)
• nc — use netcat to test open port
• cmd — run a custom command given via --check-cmd
- Restarts systemd service if check fails
- Configurable retry attempts and delay between retries
- Permission check (must be run as root)
- Quiet mode for cron use
Examples:
# Basic TCP check:
./service_restarter.py --proto tcp --port 631 --service cups
# With curl:
./service_restarter.py --proto tcp --port 80 --host 192.168.1.10 --service apache2 --check-method curl
# With ping and retry config:
./service_restarter.py --proto tcp --port 22 --service sshd --check-method ping --retries 5 --delay 2
# Using custom command:
./service_restarter.py --proto tcp --port 8080 --service myapp --check-method cmd --check-cmd 'curl -fs http://localhost:8080/health'
# Cron-friendly:
*/5 * * * * root /usr/local/bin/service_restarter.py --proto tcp --port 631 --service cups --quiet
"""
import socket
import subprocess
import argparse
import sys
import time
import os
import shutil
def is_tcp_listening(host, port):
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.settimeout(1)
try:
s.connect((host, port))
return True
except Exception:
return False
def is_udp_listening(host, port):
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
try:
s.settimeout(1)
s.sendto(b'', (host, port))
s.recvfrom(1024)
return True
except Exception:
return False
def check_with_curl(host, port):
try:
result = subprocess.run(
["curl", "-fs", f"http://{host}:{port}/"],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
timeout=5
)
return result.returncode == 0
except Exception:
return False
def check_with_ping(host):
try:
subprocess.run(["ping", "-c", "1", "-W", "2", host],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
timeout=3)
return True
except Exception:
return False
def check_with_nc(host, port):
nc = shutil.which("nc")
if not nc:
return False
try:
subprocess.run([nc, "-z", "-w", "2", host, str(port)],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
timeout=3)
return True
except Exception:
return False
def check_with_cmd(command):
try:
result = subprocess.run(command, shell=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
timeout=5)
return result.returncode == 0
except Exception:
return False
def restart_service(service):
subprocess.run(["systemctl", "restart", service], check=False)
time.sleep(2)
def log(msg, quiet):
if not quiet:
print(msg)
def main():
parser = argparse.ArgumentParser(
description="Check if a service is reachable on a given host/proto/port and restart it via systemd if not."
)
parser.add_argument("--proto", required=True, choices=["tcp", "udp"], help="Protocol to check: tcp or udp (required)")
parser.add_argument("--port", required=True, type=int, help="Port number to check (required)")
parser.add_argument("--host", default="127.0.0.1", help="IP address to check (default: 127.0.0.1)")
parser.add_argument("--service", required=True, help="Systemd service name to restart if check fails (required)")
parser.add_argument("--check-method", choices=["listen", "curl", "ping", "nc", "cmd"], default="listen",
help="How to check service: listen (default), curl, ping, nc, cmd")
parser.add_argument("--check-cmd", help="Custom shell command to run when --check-method is 'cmd'")
parser.add_argument("--retries", type=int, default=3, help="Number of retries if service is unreachable (default: 3)")
parser.add_argument("--delay", type=int, default=2, help="Delay between retries in seconds (default: 2)")
parser.add_argument("--quiet", action="store_true", help="Suppress all output (for cron)")
args = parser.parse_args()
if os.geteuid() != 0:
print("ERROR: This script must be run as root.")
sys.exit(3)
def run_check():
if args.check_method == "listen":
if args.proto == "tcp":
return is_tcp_listening(args.host, args.port)
elif args.proto == "udp":
return is_udp_listening(args.host, args.port)
elif args.check_method == "curl":
return check_with_curl(args.host, args.port)
elif args.check_method == "ping":
return check_with_ping(args.host)
elif args.check_method == "nc":
return check_with_nc(args.host, args.port)
elif args.check_method == "cmd":
if not args.check_cmd:
log("ERROR: --check-cmd is required with --check-method cmd", args.quiet)
sys.exit(3)
return check_with_cmd(args.check_cmd)
return False
for attempt in range(1, args.retries + 1):
if run_check():
log(f"OK: Service is responding on {args.proto.upper()} {args.host}:{args.port}", args.quiet)
sys.exit(0)
log(f"Attempt {attempt}/{args.retries} failed.", args.quiet)
if attempt < args.retries:
time.sleep(args.delay)
log(f"FAIL: Service unreachable after {args.retries} attempts. Restarting {args.service}...", args.quiet)
restart_service(args.service)
# One final check after restart
if run_check():
log(f"OK: {args.service} is responding after restart.", args.quiet)
sys.exit(0)
else:
log(f"ERROR: {args.service} is still not responding after restart.", args.quiet)
sys.exit(2)
if __name__ == "__main__":
main()