Add service_restarter.py
This commit is contained in:
175
service_restarter.py
Normal file
175
service_restarter.py
Normal file
@ -0,0 +1,175 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
service_restarter.py — Universal service watchdog and auto-restarter for systemd-managed services.
|
||||
|
||||
Features:
|
||||
- Checks if a service is listening on a given port and IP using TCP/UDP
|
||||
- Supports multiple check methods:
|
||||
• listen — direct socket connection (default)
|
||||
• curl — HTTP GET request using curl
|
||||
• ping — ICMP echo (TCP only, host-level reachability)
|
||||
• nc — use netcat to test open port
|
||||
• cmd — run a custom command given via --check-cmd
|
||||
- Restarts systemd service if check fails
|
||||
- Configurable retry attempts and delay between retries
|
||||
- Permission check (must be run as root)
|
||||
- Quiet mode for cron use
|
||||
|
||||
Examples:
|
||||
# Basic TCP check:
|
||||
./service_restarter.py --proto tcp --port 631 --service cups
|
||||
|
||||
# With curl:
|
||||
./service_restarter.py --proto tcp --port 80 --host 192.168.1.10 --service apache2 --check-method curl
|
||||
|
||||
# With ping and retry config:
|
||||
./service_restarter.py --proto tcp --port 22 --service sshd --check-method ping --retries 5 --delay 2
|
||||
|
||||
# Using custom command:
|
||||
./service_restarter.py --proto tcp --port 8080 --service myapp --check-method cmd --check-cmd 'curl -fs http://localhost:8080/health'
|
||||
|
||||
# Cron-friendly:
|
||||
*/5 * * * * root /usr/local/bin/service_restarter.py --proto tcp --port 631 --service cups --quiet
|
||||
"""
|
||||
|
||||
import socket
|
||||
import subprocess
|
||||
import argparse
|
||||
import sys
|
||||
import time
|
||||
import os
|
||||
import shutil
|
||||
|
||||
def is_tcp_listening(host, port):
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
s.settimeout(1)
|
||||
try:
|
||||
s.connect((host, port))
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def is_udp_listening(host, port):
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
|
||||
try:
|
||||
s.settimeout(1)
|
||||
s.sendto(b'', (host, port))
|
||||
s.recvfrom(1024)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def check_with_curl(host, port):
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["curl", "-fs", f"http://{host}:{port}/"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
timeout=5
|
||||
)
|
||||
return result.returncode == 0
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def check_with_ping(host):
|
||||
try:
|
||||
subprocess.run(["ping", "-c", "1", "-W", "2", host],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
timeout=3)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def check_with_nc(host, port):
|
||||
nc = shutil.which("nc")
|
||||
if not nc:
|
||||
return False
|
||||
try:
|
||||
subprocess.run([nc, "-z", "-w", "2", host, str(port)],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
timeout=3)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def check_with_cmd(command):
|
||||
try:
|
||||
result = subprocess.run(command, shell=True,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
timeout=5)
|
||||
return result.returncode == 0
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def restart_service(service):
|
||||
subprocess.run(["systemctl", "restart", service], check=False)
|
||||
time.sleep(2)
|
||||
|
||||
def log(msg, quiet):
|
||||
if not quiet:
|
||||
print(msg)
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Check if a service is reachable on a given host/proto/port and restart it via systemd if not."
|
||||
)
|
||||
parser.add_argument("--proto", required=True, choices=["tcp", "udp"], help="Protocol to check: tcp or udp (required)")
|
||||
parser.add_argument("--port", required=True, type=int, help="Port number to check (required)")
|
||||
parser.add_argument("--host", default="127.0.0.1", help="IP address to check (default: 127.0.0.1)")
|
||||
parser.add_argument("--service", required=True, help="Systemd service name to restart if check fails (required)")
|
||||
parser.add_argument("--check-method", choices=["listen", "curl", "ping", "nc", "cmd"], default="listen",
|
||||
help="How to check service: listen (default), curl, ping, nc, cmd")
|
||||
parser.add_argument("--check-cmd", help="Custom shell command to run when --check-method is 'cmd'")
|
||||
parser.add_argument("--retries", type=int, default=3, help="Number of retries if service is unreachable (default: 3)")
|
||||
parser.add_argument("--delay", type=int, default=2, help="Delay between retries in seconds (default: 2)")
|
||||
parser.add_argument("--quiet", action="store_true", help="Suppress all output (for cron)")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if os.geteuid() != 0:
|
||||
print("ERROR: This script must be run as root.")
|
||||
sys.exit(3)
|
||||
|
||||
def run_check():
|
||||
if args.check_method == "listen":
|
||||
if args.proto == "tcp":
|
||||
return is_tcp_listening(args.host, args.port)
|
||||
elif args.proto == "udp":
|
||||
return is_udp_listening(args.host, args.port)
|
||||
elif args.check_method == "curl":
|
||||
return check_with_curl(args.host, args.port)
|
||||
elif args.check_method == "ping":
|
||||
return check_with_ping(args.host)
|
||||
elif args.check_method == "nc":
|
||||
return check_with_nc(args.host, args.port)
|
||||
elif args.check_method == "cmd":
|
||||
if not args.check_cmd:
|
||||
log("ERROR: --check-cmd is required with --check-method cmd", args.quiet)
|
||||
sys.exit(3)
|
||||
return check_with_cmd(args.check_cmd)
|
||||
return False
|
||||
|
||||
for attempt in range(1, args.retries + 1):
|
||||
if run_check():
|
||||
log(f"OK: Service is responding on {args.proto.upper()} {args.host}:{args.port}", args.quiet)
|
||||
sys.exit(0)
|
||||
log(f"Attempt {attempt}/{args.retries} failed.", args.quiet)
|
||||
if attempt < args.retries:
|
||||
time.sleep(args.delay)
|
||||
|
||||
log(f"FAIL: Service unreachable after {args.retries} attempts. Restarting {args.service}...", args.quiet)
|
||||
restart_service(args.service)
|
||||
|
||||
# One final check after restart
|
||||
if run_check():
|
||||
log(f"OK: {args.service} is responding after restart.", args.quiet)
|
||||
sys.exit(0)
|
||||
else:
|
||||
log(f"ERROR: {args.service} is still not responding after restart.", args.quiet)
|
||||
sys.exit(2)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Reference in New Issue
Block a user