Add check_arping
This commit is contained in:
167
check_arping
Normal file
167
check_arping
Normal file
@@ -0,0 +1,167 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
check_arp_ping.py — Nagios/Icinga plugin
|
||||
RTT i utrata pakietów przez arping. Wyjście kompatybilne z check_ping.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import math
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from typing import List, Tuple, Optional
|
||||
|
||||
OK, WARNING, CRITICAL, UNKNOWN = 0, 1, 2, 3
|
||||
DEFAULT_LABEL = "ARPING"
|
||||
|
||||
TIME_RE = re.compile(
|
||||
r"""(?ix)
|
||||
(?:time[=\s]) # 'time=' lub 'time '
|
||||
(?P<val>\d+(?:\.\d+)?) # liczba
|
||||
\s*(?P<unit>ms|usec|us|µs)
|
||||
"""
|
||||
)
|
||||
|
||||
HOST_LINE_RE = re.compile(
|
||||
r"""(?ix)
|
||||
(?:reply|response|bytes\s+from|Unicast\s+reply|Unicast\s+from)
|
||||
"""
|
||||
)
|
||||
|
||||
def run_arping(host: str, count: int, timeout: int, iface: Optional[str]) -> Tuple[int, str, str]:
|
||||
if not shutil.which("arping"):
|
||||
return (127, "", "arping not found")
|
||||
cmd = ["arping", "-c", str(count), "-w", str(timeout)]
|
||||
if iface:
|
||||
cmd += ["-I", iface]
|
||||
cmd += [host]
|
||||
try:
|
||||
p = subprocess.run(
|
||||
cmd, capture_output=True, text=True,
|
||||
timeout=timeout + 2, check=False
|
||||
)
|
||||
return (p.returncode, p.stdout or "", p.stderr or "")
|
||||
except subprocess.TimeoutExpired:
|
||||
return (124, "", "arping timed out")
|
||||
|
||||
|
||||
def extract_timings(output: str, host: str) -> List[float]:
|
||||
timings: List[float] = []
|
||||
for line in output.splitlines():
|
||||
if host in line or HOST_LINE_RE.search(line):
|
||||
m = TIME_RE.search(line)
|
||||
if m:
|
||||
v = float(m.group("val"))
|
||||
u = m.group("unit").lower()
|
||||
if u in ("usec", "us", "µs"):
|
||||
v /= 1000.0 # µs → ms
|
||||
timings.append(v)
|
||||
return timings
|
||||
|
||||
|
||||
def compute_stats(samples: List[float]) -> Tuple[float, float, float]:
|
||||
if not samples:
|
||||
raise ValueError("no samples")
|
||||
mn = min(samples)
|
||||
mx = max(samples)
|
||||
avg = sum(samples) / len(samples)
|
||||
return (avg, mn, mx)
|
||||
|
||||
|
||||
def fmt(x: float, d: int = 1) -> str:
|
||||
return f"{x:.{d}f}".replace(",", ".")
|
||||
|
||||
|
||||
def status_from(avg_ms: Optional[float], pl_pct: int, warn_rta: float, crit_rta: float,
|
||||
warn_pl: int, crit_pl: int) -> int:
|
||||
# Najpierw RTA, potem PL
|
||||
if avg_ms is not None:
|
||||
if avg_ms >= crit_rta:
|
||||
return CRITICAL
|
||||
if avg_ms >= warn_rta:
|
||||
return WARNING
|
||||
if pl_pct >= crit_pl:
|
||||
return CRITICAL
|
||||
if pl_pct >= warn_pl:
|
||||
return WARNING
|
||||
return OK
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser(description="Nagios/Icinga plugin: ARP ping (arping) z perfdata (rta, pl).")
|
||||
ap.add_argument("host")
|
||||
ap.add_argument("warn_rta", type=float, help="WARN RTT (ms)")
|
||||
ap.add_argument("crit_rta", type=float, help="CRIT RTT (ms)")
|
||||
ap.add_argument("--warn-pl", type=int, default=20, dest="warn_pl", help="WARN PL (%) [20]")
|
||||
ap.add_argument("--crit-pl", type=int, default=80, dest="crit_pl", help="CRIT PL (%) [80]")
|
||||
ap.add_argument("-c", "--count", type=int, default=5, help="Liczba prób [5]")
|
||||
ap.add_argument("-t", "--timeout", type=int, default=5, help="Timeout arping w s [5]")
|
||||
ap.add_argument("-I", "--interface", help="Interfejs (np. eth0)")
|
||||
ap.add_argument("--label", default=DEFAULT_LABEL, help="Etykieta [ARPING]")
|
||||
ap.add_argument("--version", action="version", version="check_arp_ping.py 2.1.0")
|
||||
args = ap.parse_args()
|
||||
|
||||
if args.warn_rta >= args.crit_rta:
|
||||
print(f"{args.label} UNKNOWN - Złe progi RTA: warn >= crit")
|
||||
sys.exit(UNKNOWN)
|
||||
if args.warn_pl >= args.crit_pl:
|
||||
print(f"{args.label} UNKNOWN - Złe progi PL: warn >= crit")
|
||||
sys.exit(UNKNOWN)
|
||||
if args.count <= 0:
|
||||
print(f"{args.label} UNKNOWN - count musi być > 0")
|
||||
sys.exit(UNKNOWN)
|
||||
|
||||
rc, out, err = run_arping(args.host, args.count, args.timeout, args.interface)
|
||||
|
||||
# Domyślne: pełna utrata
|
||||
timings = extract_timings(out, args.host)
|
||||
received = len(timings)
|
||||
lost = max(0, args.count - received)
|
||||
pl_pct = int(round(100.0 * lost / args.count))
|
||||
|
||||
# Brak arping / brak uprawnień / twardy timeout
|
||||
if rc not in (0, 1, 2): # 0=ok, 1/2 bywa przy stratach
|
||||
perf = f"'rta'=-;{args.warn_rta};{args.crit_rta};; 'pl'={pl_pct}%;" \
|
||||
f"{args.warn_pl};{args.crit_pl};0;100"
|
||||
print(f"{args.label} CRITICAL - No response from host {args.host} | {perf}")
|
||||
sys.exit(CRITICAL)
|
||||
|
||||
# Jeśli arping coś zwrócił, ale nie udało się sparsować timingów,
|
||||
# rozróżnij 100% PL od błędu parsowania.
|
||||
any_reply_line = any((args.host in ln) or HOST_LINE_RE.search(ln) for ln in out.splitlines())
|
||||
|
||||
if not timings:
|
||||
if pl_pct == 100 and not any_reply_line:
|
||||
# faktycznie 100% PL
|
||||
perf = f"'rta'=-;{args.warn_rta};{args.crit_rta};; 'pl'={pl_pct}%;" \
|
||||
f"{args.warn_pl};{args.crit_pl};0;100"
|
||||
print(f"{args.label} CRITICAL - Packet loss = {pl_pct}%, RTA = - ms | {perf}")
|
||||
sys.exit(CRITICAL)
|
||||
else:
|
||||
# mamy odpowiedzi, ale bez pola 'time' → błąd parsowania/formatu
|
||||
perf = f"'rta'=-;{args.warn_rta};{args.crit_rta};; 'pl'={pl_pct}%;" \
|
||||
f"{args.warn_pl};{args.crit_pl};0;100"
|
||||
print(f"{args.label} UNKNOWN - Nie rozpoznano formatów czasu w wyjściu arping | {perf}")
|
||||
sys.exit(UNKNOWN)
|
||||
|
||||
avg, mn, mx = compute_stats(timings)
|
||||
state = status_from(avg, pl_pct, args.warn_rta, args.crit_rta, args.warn_pl, args.crit_pl)
|
||||
state_str = {OK: "OK", WARNING: "WARNING", CRITICAL: "CRITICAL", UNKNOWN: "UNKNOWN"}[state]
|
||||
|
||||
msg = f"Packet loss = {pl_pct}%, RTA = {fmt(avg)} ms"
|
||||
perf = (
|
||||
f"'rta'={fmt(avg)}ms;{args.warn_rta};{args.crit_rta};{fmt(mn)};{fmt(mx)} "
|
||||
f"'pl'={pl_pct}% ;{args.warn_pl};{args.crit_pl};0;100"
|
||||
)
|
||||
print(f"{args.label} {state_str} - {msg} | {perf}")
|
||||
sys.exit(state)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main()
|
||||
except Exception as e:
|
||||
print(f"{DEFAULT_LABEL} UNKNOWN - {e}")
|
||||
sys.exit(UNKNOWN)
|
Reference in New Issue
Block a user