Add service_restarter.py
This commit is contained in:
175
service_restarter.py
Normal file
175
service_restarter.py
Normal file
@ -0,0 +1,175 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
service_restarter.py — Universal service watchdog and auto-restarter for systemd-managed services.
|
||||||
|
|
||||||
|
Features:
|
||||||
|
- Checks if a service is listening on a given port and IP using TCP/UDP
|
||||||
|
- Supports multiple check methods:
|
||||||
|
• listen — direct socket connection (default)
|
||||||
|
• curl — HTTP GET request using curl
|
||||||
|
• ping — ICMP echo (TCP only, host-level reachability)
|
||||||
|
• nc — use netcat to test open port
|
||||||
|
• cmd — run a custom command given via --check-cmd
|
||||||
|
- Restarts systemd service if check fails
|
||||||
|
- Configurable retry attempts and delay between retries
|
||||||
|
- Permission check (must be run as root)
|
||||||
|
- Quiet mode for cron use
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
# Basic TCP check:
|
||||||
|
./service_restarter.py --proto tcp --port 631 --service cups
|
||||||
|
|
||||||
|
# With curl:
|
||||||
|
./service_restarter.py --proto tcp --port 80 --host 192.168.1.10 --service apache2 --check-method curl
|
||||||
|
|
||||||
|
# With ping and retry config:
|
||||||
|
./service_restarter.py --proto tcp --port 22 --service sshd --check-method ping --retries 5 --delay 2
|
||||||
|
|
||||||
|
# Using custom command:
|
||||||
|
./service_restarter.py --proto tcp --port 8080 --service myapp --check-method cmd --check-cmd 'curl -fs http://localhost:8080/health'
|
||||||
|
|
||||||
|
# Cron-friendly:
|
||||||
|
*/5 * * * * root /usr/local/bin/service_restarter.py --proto tcp --port 631 --service cups --quiet
|
||||||
|
"""
|
||||||
|
|
||||||
|
import socket
|
||||||
|
import subprocess
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
def is_tcp_listening(host, port):
|
||||||
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||||
|
s.settimeout(1)
|
||||||
|
try:
|
||||||
|
s.connect((host, port))
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def is_udp_listening(host, port):
|
||||||
|
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
|
||||||
|
try:
|
||||||
|
s.settimeout(1)
|
||||||
|
s.sendto(b'', (host, port))
|
||||||
|
s.recvfrom(1024)
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def check_with_curl(host, port):
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
["curl", "-fs", f"http://{host}:{port}/"],
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.DEVNULL,
|
||||||
|
timeout=5
|
||||||
|
)
|
||||||
|
return result.returncode == 0
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def check_with_ping(host):
|
||||||
|
try:
|
||||||
|
subprocess.run(["ping", "-c", "1", "-W", "2", host],
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.DEVNULL,
|
||||||
|
timeout=3)
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def check_with_nc(host, port):
|
||||||
|
nc = shutil.which("nc")
|
||||||
|
if not nc:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
subprocess.run([nc, "-z", "-w", "2", host, str(port)],
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.DEVNULL,
|
||||||
|
timeout=3)
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def check_with_cmd(command):
|
||||||
|
try:
|
||||||
|
result = subprocess.run(command, shell=True,
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.DEVNULL,
|
||||||
|
timeout=5)
|
||||||
|
return result.returncode == 0
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def restart_service(service):
|
||||||
|
subprocess.run(["systemctl", "restart", service], check=False)
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
def log(msg, quiet):
|
||||||
|
if not quiet:
|
||||||
|
print(msg)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Check if a service is reachable on a given host/proto/port and restart it via systemd if not."
|
||||||
|
)
|
||||||
|
parser.add_argument("--proto", required=True, choices=["tcp", "udp"], help="Protocol to check: tcp or udp (required)")
|
||||||
|
parser.add_argument("--port", required=True, type=int, help="Port number to check (required)")
|
||||||
|
parser.add_argument("--host", default="127.0.0.1", help="IP address to check (default: 127.0.0.1)")
|
||||||
|
parser.add_argument("--service", required=True, help="Systemd service name to restart if check fails (required)")
|
||||||
|
parser.add_argument("--check-method", choices=["listen", "curl", "ping", "nc", "cmd"], default="listen",
|
||||||
|
help="How to check service: listen (default), curl, ping, nc, cmd")
|
||||||
|
parser.add_argument("--check-cmd", help="Custom shell command to run when --check-method is 'cmd'")
|
||||||
|
parser.add_argument("--retries", type=int, default=3, help="Number of retries if service is unreachable (default: 3)")
|
||||||
|
parser.add_argument("--delay", type=int, default=2, help="Delay between retries in seconds (default: 2)")
|
||||||
|
parser.add_argument("--quiet", action="store_true", help="Suppress all output (for cron)")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if os.geteuid() != 0:
|
||||||
|
print("ERROR: This script must be run as root.")
|
||||||
|
sys.exit(3)
|
||||||
|
|
||||||
|
def run_check():
|
||||||
|
if args.check_method == "listen":
|
||||||
|
if args.proto == "tcp":
|
||||||
|
return is_tcp_listening(args.host, args.port)
|
||||||
|
elif args.proto == "udp":
|
||||||
|
return is_udp_listening(args.host, args.port)
|
||||||
|
elif args.check_method == "curl":
|
||||||
|
return check_with_curl(args.host, args.port)
|
||||||
|
elif args.check_method == "ping":
|
||||||
|
return check_with_ping(args.host)
|
||||||
|
elif args.check_method == "nc":
|
||||||
|
return check_with_nc(args.host, args.port)
|
||||||
|
elif args.check_method == "cmd":
|
||||||
|
if not args.check_cmd:
|
||||||
|
log("ERROR: --check-cmd is required with --check-method cmd", args.quiet)
|
||||||
|
sys.exit(3)
|
||||||
|
return check_with_cmd(args.check_cmd)
|
||||||
|
return False
|
||||||
|
|
||||||
|
for attempt in range(1, args.retries + 1):
|
||||||
|
if run_check():
|
||||||
|
log(f"OK: Service is responding on {args.proto.upper()} {args.host}:{args.port}", args.quiet)
|
||||||
|
sys.exit(0)
|
||||||
|
log(f"Attempt {attempt}/{args.retries} failed.", args.quiet)
|
||||||
|
if attempt < args.retries:
|
||||||
|
time.sleep(args.delay)
|
||||||
|
|
||||||
|
log(f"FAIL: Service unreachable after {args.retries} attempts. Restarting {args.service}...", args.quiet)
|
||||||
|
restart_service(args.service)
|
||||||
|
|
||||||
|
# One final check after restart
|
||||||
|
if run_check():
|
||||||
|
log(f"OK: {args.service} is responding after restart.", args.quiet)
|
||||||
|
sys.exit(0)
|
||||||
|
else:
|
||||||
|
log(f"ERROR: {args.service} is still not responding after restart.", args.quiet)
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
Reference in New Issue
Block a user