#!/usr/bin/env python3 import subprocess import sys import re import argparse import os ESC = {"reset": "\033[0m", "red": "\033[31m", "yellow": "\033[33m", "green": "\033[32m", "cyan": "\033[36m", "bold": "\033[1m"} def color_text(text, color): return f"{ESC[color]}{text}{ESC['reset']}" def is_interactive(): """Detect if running interactively (not Nagios)""" return sys.stdout.isatty() def run_cmd(cmd): try: return subprocess.check_output(cmd, shell=True, text=True, stderr=subprocess.STDOUT).strip() except subprocess.CalledProcessError as e: return e.output if e.output else "" def find_xfs_mounts(): mounts = run_cmd("mount | grep 'type xfs'") xfs_mounts = [] for line in mounts.splitlines(): parts = line.split() if len(parts) >= 3: xfs_mounts.append(parts[2]) return xfs_mounts def is_xfs_mount(mount): """Check if mount point is actually XFS""" mounts_output = run_cmd(f"mount | grep {re.escape(mount)}") return "xfs" in mounts_output.lower() def check_disk_usage(mount): """Use df -P for consistent POSIX output""" output = run_cmd(f"df -P -h {mount}") for line in output.splitlines(): parts = line.split() if len(parts) >= 6 and mount in parts[5]: # Target is LAST column in -P usage_str = parts[4] if '%' in usage_str: usage_pct = int(usage_str.strip('%')) size = parts[1] avail = parts[3] return usage_pct, size, avail return None, None, None def check_xfs_repair(mount): output = run_cmd(f"xfs_repair -n {mount} 2>&1") if "UNRECOVERABLE" in output or "could not" in output or "errors detected" in output: return False, output return True, output def main(): parser = argparse.ArgumentParser( add_help=False, description="XFS Filesystem Health Check", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: %(prog)s # Check all XFS mounts %(prog)s --list # List available XFS mounts %(prog)s /data # Check specific mount %(prog)s -m /data,/var # Check multiple mounts %(prog)s -w 85 -c 95 # Custom thresholds """ ) parser.add_argument('-h', '--help', action='store_true', help='Show this help message') parser.add_argument('-l', '--list', action='store_true', help='List all mounted XFS filesystems') parser.add_argument('-m', '--mount', help='Comma-separated XFS mount points to check') parser.add_argument('-w', '--warn-threshold', type=int, default=80, help='Warning threshold %% (default: 80)') parser.add_argument('-c', '--crit-threshold', type=int, default=90, help='Critical threshold %% (default: 90)') parser.add_argument('mount', nargs='?', help='Single mount point to check') args = parser.parse_args() # Nagios: --help and --list exit 0 (OK) if args.help: parser.print_help() sys.exit(0) if args.list: mounts = find_xfs_mounts() if mounts: print(color_text("Available XFS mount points:", "cyan")) for m in mounts: print(f" -> {m}") else: print(color_text("No mounted XFS filesystems found.", "red")) sys.exit(0) # Determine target mounts target_mounts = [] if args.mount: target_mounts = [m.strip() for m in args.mount.split(',') if m.strip()] elif args.mount is not None: target_mounts = [args.mount] else: target_mounts = find_xfs_mounts() # Nagios: No mounts = UNKNOWN (3) - SINGLE LINE if not target_mounts: print("UNKNOWN No XFS mount points found or specified|") sys.exit(3) warn_threshold = args.warn_threshold crit_threshold = args.crit_threshold global_status = 0 all_perfdata = [] # ONLY show verbose colorful output in INTERACTIVE mode if is_interactive(): print(color_text("Checking XFS filesystems:", "bold")) print(f"Thresholds: WARN>{warn_threshold}% CRIT>{crit_threshold}%") print("=" * 60) for mount in target_mounts: if is_interactive(): print(f"\n{color_text(mount, 'cyan')}") # Validate it's actually XFS if not is_xfs_mount(mount): if is_interactive(): print(color_text(" UNKNOWN: Mount point not found or not XFS", "red")) global_status = max(global_status, 3) all_perfdata.append(f"{mount.replace('/', '_')}_status=3") continue usage_pct, size, avail = check_disk_usage(mount) if usage_pct is None: if is_interactive(): print(color_text(" UNKNOWN: Unable to read disk usage", "red")) global_status = max(global_status, 3) all_perfdata.append(f"{mount.replace('/', '_')}_status=3") continue # Usage status usage_status = 0 if usage_pct > crit_threshold: usage_status = 2 elif usage_pct > warn_threshold: usage_status = 1 if is_interactive(): usage_color = "green" if usage_status == 0 else "yellow" if usage_status == 1 else "red" print(f" Usage: {color_text(f'{usage_pct}%', usage_color)} (Size: {size}, Available: {avail})") # XFS repair check repair_ok, repair_out = check_xfs_repair(mount) xfs_status = 2 if not repair_ok else 0 if is_interactive(): if not repair_ok: print(color_text(" CRITICAL: XFS filesystem issues detected", "red")) else: print(color_text(" XFS filesystem OK (xfs_repair dry-run)", "green")) # Final status for this mount mount_status = max(usage_status, xfs_status) global_status = max(global_status, mount_status) # Nagios perfdata format mount_name = mount.replace('/', '_').replace(' ', '_') perf = f"{mount_name}_used_pct={usage_pct};{warn_threshold};{crit_threshold};0;100 size={size} avail={avail} xfs_status={xfs_status}" all_perfdata.append(perf) perfdata = "| " + " ".join(all_perfdata) status_text = {0: "OK", 1: "WARNING", 2: "CRITICAL", 3: "UNKNOWN"} # ALWAYS: Clean Nagios output FIRST LINE (parseable) print(f"{status_text[global_status]} XFS check: {len(target_mounts)} mount(s){perfdata}") # ONLY interactive: Additional colorful summary if is_interactive(): status_color = "green" if global_status == 0 else "yellow" if global_status == 1 else "red" print("\n" + "=" * 60) print(color_text(f"FINAL STATUS: {status_text[global_status]}", status_color)) print(f"{perfdata}") # Nagios standard exit codes: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN sys.exit(global_status) if __name__ == "__main__": main()