#!/usr/bin/env python3 import subprocess import sys import re import argparse from collections import defaultdict ESC = {"reset": "\033[0m", "red": "\033[31m", "yellow": "\033[33m", "green": "\033[32m", "cyan": "\033[36m", "bold": "\033[1m"} def color_text(text, color): return f"{ESC[color]}{text}{ESC['reset']}" def is_interactive(): """Detect if running interactively (not Nagios)""" return sys.stdout.isatty() def run_cmd(cmd): try: return subprocess.check_output(cmd, shell=True, text=True, stderr=subprocess.STDOUT).strip() except: return "" def find_btrfs_mounts(): """Find all BTRFS mount points""" mounts = run_cmd("mount | grep 'btrfs '") btrfs_mounts = [] for line in mounts.split('\n'): if line.strip(): parts = line.split() if len(parts) >= 3: btrfs_mounts.append(parts[2]) return btrfs_mounts def is_btrfs_mount(mount): """Check if mount point is actually BTRFS""" mounts_output = run_cmd(f"mount | grep {re.escape(mount)}") return "btrfs" in mounts_output.lower() def main(): parser = argparse.ArgumentParser( add_help=False, description="BTRFS Filesystem Health Check", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: %(prog)s # Check all BTRFS mounts %(prog)s --list # List available BTRFS mounts %(prog)s /data # Check specific mount %(prog)s -m /data,/var # Check multiple mounts %(prog)s -d 85 -D 95 -M 90 -C 98 # Custom thresholds """ ) parser.add_argument('-h', '--help', action='store_true', help='Show this help message') parser.add_argument('-l', '--list', action='store_true', help='List all mounted BTRFS filesystems') parser.add_argument('-m', '--mount', help='Comma-separated BTRFS mount points to check') parser.add_argument('-d', '--data-threshold', type=int, default=80, help='Data usage warning threshold %% (default: 80)') parser.add_argument('-D', '--data-crit', type=int, default=90, help='Data usage critical threshold %% (default: 90)') parser.add_argument('-M', '--meta-threshold', type=int, default=85, help='Metadata warning threshold %% (default: 85)') parser.add_argument('-C', '--meta-crit', type=int, default=95, help='Metadata critical threshold %% (default: 95)') parser.add_argument('-S', '--scrub-errors', type=int, default=0, help='Max allowed scrub errors (default: 0)') parser.add_argument('mount', nargs='?', help='Single mount point to check') args = parser.parse_args() # Nagios: --help and --list exit 0 (OK) if args.help: parser.print_help() sys.exit(0) if args.list: mounts = find_btrfs_mounts() if mounts: print(color_text("Available BTRFS mount points:", "cyan")) for m in mounts: print(f" -> {m}") else: print(color_text("No mounted BTRFS filesystems found.", "red")) sys.exit(0) # Determine target mounts target_mounts = [] if args.mount: target_mounts = [m.strip() for m in args.mount.split(',') if m.strip()] elif args.mount is not None: target_mounts = [args.mount] else: target_mounts = find_btrfs_mounts() # Nagios: No mounts = UNKNOWN (3) - SINGLE LINE if not target_mounts: print("UNKNOWN No BTRFS mount points found or specified|") sys.exit(3) data_warn = args.data_threshold data_crit = args.data_crit meta_warn = args.meta_threshold meta_crit = args.meta_crit max_scrub_err = args.scrub_errors global_status = 0 all_perfdata = [] # ONLY show verbose colorful output in INTERACTIVE mode if is_interactive(): print(color_text("Checking BTRFS filesystems:", "bold")) print(f"Thresholds: Data(W>{data_warn}/C>{data_crit}) Meta(W>{meta_warn}/C>{meta_crit}) Scrub(>{max_scrub_err})") print("=" * 80) for mount in target_mounts: if is_interactive(): print(f"\n{color_text(mount, 'cyan')}") # Validate mount point if not is_btrfs_mount(mount): if is_interactive(): print(color_text(" UNKNOWN: Mount point not found or not BTRFS", "red")) global_status = max(global_status, 3) mount_name = mount.replace('/', '_').replace(' ', '_') all_perfdata.append(f"{mount_name}_status=3") continue # Test btrfs df df_output = run_cmd(f"btrfs filesystem df {mount}") if not df_output or "Data" not in df_output: if is_interactive(): print(color_text(" UNKNOWN: btrfs filesystem df failed", "red")) global_status = max(global_status, 3) mount_name = mount.replace('/', '_').replace(' ', '_') all_perfdata.append(f"{mount_name}_status=3") continue if is_interactive(): print(color_text(" BTRFS filesystem accessible", "green")) # Parse usage percentages data_match = re.search(r'Data.*?(\d+)%', df_output, re.IGNORECASE) meta_match = re.search(r'Metadata.*?(\d+)%', df_output, re.IGNORECASE) data_pct = int(data_match.group(1)) if data_match else 0 meta_pct = int(meta_match.group(1)) if meta_match else 0 # Determine status data_status = 2 if data_pct >= data_crit else 1 if data_pct >= data_warn else 0 meta_status = 2 if meta_pct >= meta_crit else 1 if meta_pct >= meta_warn else 0 if is_interactive(): data_color = "green" if data_status == 0 else "yellow" if data_status == 1 else "red" meta_color = "green" if meta_status == 0 else "yellow" if meta_status == 1 else "red" print(f" Data: {color_text(f'{data_pct}%', data_color)}") print(f" Metadata: {color_text(f'{meta_pct}%', meta_color)}") # Scrub status scrub = run_cmd(f"btrfs scrub status {mount}") scrub_errors = len(re.findall(r'(\d+) errors?', scrub, re.IGNORECASE)) scrub_status = 2 if scrub_errors > max_scrub_err else 0 if is_interactive(): scrub_color = "green" if scrub_status == 0 else "red" print(f" Scrub errors: {color_text(str(scrub_errors), scrub_color)}") # Final status for this mount mount_status = max(data_status, meta_status, scrub_status) global_status = max(global_status, mount_status) # Nagios perfdata format mount_name = mount.replace('/', '_').replace(' ', '_') perf = f"{mount_name}_data_pct={data_pct};{data_warn};{data_crit} {mount_name}_meta_pct={meta_pct};{meta_warn};{meta_crit} {mount_name}_scrub_err={scrub_errors};{max_scrub_err}" all_perfdata.append(perf) perfdata = "| " + " ".join(all_perfdata) status_text = {0: "OK", 1: "WARNING", 2: "CRITICAL", 3: "UNKNOWN"} # ALWAYS: Clean Nagios output FIRST LINE (parseable) print(f"{status_text[global_status]} BTRFS check: {len(target_mounts)} mount(s){perfdata}") # ONLY interactive: Additional colorful summary if is_interactive(): status_color = "green" if global_status == 0 else "yellow" if global_status == 1 else "red" print("\n" + "=" * 80) print(color_text(f"FINAL STATUS: {status_text[global_status]}", status_color)) print(f"{perfdata}") # Nagios standard exit codes: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN sys.exit(global_status) if __name__ == "__main__": main()