From 36d8c52e8b807026ae9c192dd9db1fc76f2dc897 Mon Sep 17 00:00:00 2001 From: gru Date: Tue, 2 Dec 2025 10:06:17 +0100 Subject: [PATCH] Add check_btrfs.py --- check_btrfs.py | 175 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 check_btrfs.py diff --git a/check_btrfs.py b/check_btrfs.py new file mode 100644 index 0000000..da2a405 --- /dev/null +++ b/check_btrfs.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python3 +import subprocess +import sys +import re +import argparse +from collections import defaultdict + +ESC = {"reset": "\033[0m", "red": "\033[31m", "yellow": "\033[33m", "green": "\033[32m", "cyan": "\033[36m", "bold": "\033[1m"} + +def color_text(text, color): + return f"{ESC[color]}{text}{ESC['reset']}" + +def run_cmd(cmd): + try: + return subprocess.check_output(cmd, shell=True, text=True, stderr=subprocess.STDOUT).strip() + except: + return "" + +def find_btrfs_mounts(): + """Find all BTRFS mount points""" + mounts = run_cmd("mount | grep 'btrfs '") + btrfs_mounts = [] + for line in mounts.split('\n'): + if line.strip(): + parts = line.split() + if len(parts) >= 3: + btrfs_mounts.append(parts[2]) + return btrfs_mounts + +def is_btrfs_mount(mount): + """Check if mount point is actually BTRFS""" + mounts_output = run_cmd(f"mount | grep {re.escape(mount)}") + return "btrfs" in mounts_output.lower() + +def main(): + parser = argparse.ArgumentParser( + add_help=False, + description="BTRFS Filesystem Health Check", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + %(prog)s # Check all BTRFS mounts + %(prog)s --list # List available BTRFS mounts + %(prog)s /data # Check specific mount + %(prog)s -m /data,/var # Check multiple mounts + %(prog)s -d 85 -D 95 -M 90 -C 98 # Custom thresholds + """ + ) + + parser.add_argument('-h', '--help', action='store_true', help='Show this help message') + parser.add_argument('-l', '--list', action='store_true', help='List all mounted BTRFS filesystems') + parser.add_argument('-m', '--mount', help='Comma-separated BTRFS mount points to check') + parser.add_argument('-d', '--data-threshold', type=int, default=80, help='Data usage warning threshold %% (default: 80)') + parser.add_argument('-D', '--data-crit', type=int, default=90, help='Data usage critical threshold %% (default: 90)') + parser.add_argument('-M', '--meta-threshold', type=int, default=85, help='Metadata warning threshold %% (default: 85)') + parser.add_argument('-C', '--meta-crit', type=int, default=95, help='Metadata critical threshold %% (default: 95)') + parser.add_argument('-S', '--scrub-errors', type=int, default=0, help='Max allowed scrub errors (default: 0)') + + parser.add_argument('mount', nargs='?', help='Single mount point to check') + + args = parser.parse_args() + + # Nagios: --help and --list exit 0 (OK) + if args.help: + parser.print_help() + sys.exit(0) + + if args.list: + mounts = find_btrfs_mounts() + if mounts: + print(color_text("Available BTRFS mount points:", "cyan")) + for m in mounts: + print(f" -> {m}") + else: + print(color_text("No mounted BTRFS filesystems found.", "red")) + sys.exit(0) + + # Determine target mounts + target_mounts = [] + if args.mount: + target_mounts = [m.strip() for m in args.mount.split(',') if m.strip()] + elif args.mount is not None: + target_mounts = [args.mount] + else: + target_mounts = find_btrfs_mounts() + + # Nagios: No mounts = UNKNOWN (3) + if not target_mounts: + print(color_text("UNKNOWN No BTRFS mount points found or specified|", "yellow")) + sys.exit(3) + + data_warn = args.data_threshold + data_crit = args.data_crit + meta_warn = args.meta_threshold + meta_crit = args.meta_crit + max_scrub_err = args.scrub_errors + + global_status = 0 + all_perfdata = [] + + # Nagios: Human-readable output only (colorful) + print(color_text("Checking BTRFS filesystems:", "bold")) + print(f"Thresholds: Data(W>{data_warn}/C>{data_crit}) Meta(W>{meta_warn}/C>{meta_crit}) Scrub(>{max_scrub_err})") + print("=" * 80) + + for mount in target_mounts: + print(f"\n{color_text(mount, 'cyan')}") + + # Validate mount point + if not is_btrfs_mount(mount): + print(color_text(f" UNKNOWN: Mount point not found or not BTRFS", "red")) + global_status = max(global_status, 3) + all_perfdata.append(f"{mount}_status=3") + continue + + # Test btrfs df + df_output = run_cmd(f"btrfs filesystem df {mount}") + if not df_output or "Data" not in df_output: + print(color_text(" UNKNOWN: btrfs filesystem df failed", "red")) + global_status = max(global_status, 3) + all_perfdata.append(f"{mount}_status=3") + continue + + print(color_text(" BTRFS filesystem accessible", "green")) + + # Parse usage percentages + data_match = re.search(r'Data.*?(\d+)%', df_output, re.IGNORECASE) + meta_match = re.search(r'Metadata.*?(\d+)%', df_output, re.IGNORECASE) + + data_pct = int(data_match.group(1)) if data_match else 0 + meta_pct = int(meta_match.group(1)) if meta_match else 0 + + # Determine status colors + data_color = "green" if data_pct < data_warn else "yellow" if data_pct < data_crit else "red" + meta_color = "green" if meta_pct < meta_warn else "yellow" if meta_pct < meta_crit else "red" + + print(f" Data: {color_text(f'{data_pct}%', data_color)}") + print(f" Metadata: {color_text(f'{meta_pct}%', meta_color)}") + + # Scrub status + scrub = run_cmd(f"btrfs scrub status {mount}") + scrub_errors = len(re.findall(r'(\d+) errors?', scrub, re.IGNORECASE)) + scrub_color = "green" if scrub_errors <= max_scrub_err else "red" + print(f" Scrub errors: {color_text(str(scrub_errors), scrub_color)}") + + # Update global status per Nagios standard + data_status = 2 if data_pct >= data_crit else 1 if data_pct >= data_warn else 0 + meta_status = 2 if meta_pct >= meta_crit else 1 if meta_pct >= meta_warn else 0 + scrub_status = 2 if scrub_errors > max_scrub_err else 0 + + mount_status = max(data_status, meta_status, scrub_status) + global_status = max(global_status, mount_status) + + # Nagios perfdata format: metric=value;warn;crit;min;max + perf = f"data_pct={data_pct};{data_warn};{data_crit} meta_pct={meta_pct};{meta_warn};{meta_crit} scrub_err={scrub_errors};{max_scrub_err}" + all_perfdata.append(perf) + + # Nagios: Single line FINAL STATUS + perfdata + perfdata = " ".join(all_perfdata) + status_text = {0: "OK", 1: "WARNING", 2: "CRITICAL", 3: "UNKNOWN"} + + # Colorful human output + status_color = "green" if global_status == 0 else "yellow" if global_status == 1 else "red" + print("\n" + "=" * 80) + print(color_text(f"FINAL STATUS: {status_text[global_status]}", status_color)) + print(f"| {perfdata}") + + # Nagios: Clean single-line output (no colors for parsing) + print(f"\n{status_text[global_status]} BTRFS check complete| {perfdata}") + + # Nagios standard exit codes + sys.exit(global_status) + +if __name__ == "__main__": + main()