Add check_btrfs.py

2025-12-02 10:06:17 +01:00
parent 2ef1309061
commit 36d8c52e8b
1 changed files with 175 additions and 0 deletions
--- a/check_btrfs.py
+++ b/check_btrfs.py
@@ -0,0 +1,175 @@
+#!/usr/bin/env python3
+import subprocess
+import sys
+import re
+import argparse
+from collections import defaultdict
+
+ESC = {"reset": "\033[0m", "red": "\033[31m", "yellow": "\033[33m", "green": "\033[32m", "cyan": "\033[36m", "bold": "\033[1m"}
+
+def color_text(text, color):
+    return f"{ESC[color]}{text}{ESC['reset']}"
+
+def run_cmd(cmd):
+    try:
+        return subprocess.check_output(cmd, shell=True, text=True, stderr=subprocess.STDOUT).strip()
+    except:
+        return ""
+
+def find_btrfs_mounts():
+    """Find all BTRFS mount points"""
+    mounts = run_cmd("mount | grep 'btrfs '")
+    btrfs_mounts = []
+    for line in mounts.split('\n'):
+        if line.strip():
+            parts = line.split()
+            if len(parts) >= 3:
+                btrfs_mounts.append(parts[2])
+    return btrfs_mounts
+
+def is_btrfs_mount(mount):
+    """Check if mount point is actually BTRFS"""
+    mounts_output = run_cmd(f"mount | grep {re.escape(mount)}")
+    return "btrfs" in mounts_output.lower()
+
+def main():
+    parser = argparse.ArgumentParser(
+        add_help=False,
+        description="BTRFS Filesystem Health Check",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s                           # Check all BTRFS mounts
+  %(prog)s --list                    # List available BTRFS mounts
+  %(prog)s /data                     # Check specific mount
+  %(prog)s -m /data,/var             # Check multiple mounts
+  %(prog)s -d 85 -D 95 -M 90 -C 98   # Custom thresholds
+        """
+    )
+    
+    parser.add_argument('-h', '--help', action='store_true', help='Show this help message')
+    parser.add_argument('-l', '--list', action='store_true', help='List all mounted BTRFS filesystems')
+    parser.add_argument('-m', '--mount', help='Comma-separated BTRFS mount points to check')
+    parser.add_argument('-d', '--data-threshold', type=int, default=80, help='Data usage warning threshold %% (default: 80)')
+    parser.add_argument('-D', '--data-crit', type=int, default=90, help='Data usage critical threshold %% (default: 90)')
+    parser.add_argument('-M', '--meta-threshold', type=int, default=85, help='Metadata warning threshold %% (default: 85)')
+    parser.add_argument('-C', '--meta-crit', type=int, default=95, help='Metadata critical threshold %% (default: 95)')
+    parser.add_argument('-S', '--scrub-errors', type=int, default=0, help='Max allowed scrub errors (default: 0)')
+    
+    parser.add_argument('mount', nargs='?', help='Single mount point to check')
+    
+    args = parser.parse_args()
+
+    # Nagios: --help and --list exit 0 (OK)
+    if args.help:
+        parser.print_help()
+        sys.exit(0)
+
+    if args.list:
+        mounts = find_btrfs_mounts()
+        if mounts:
+            print(color_text("Available BTRFS mount points:", "cyan"))
+            for m in mounts:
+                print(f"  -> {m}")
+        else:
+            print(color_text("No mounted BTRFS filesystems found.", "red"))
+        sys.exit(0)
+
+    # Determine target mounts
+    target_mounts = []
+    if args.mount:
+        target_mounts = [m.strip() for m in args.mount.split(',') if m.strip()]
+    elif args.mount is not None:
+        target_mounts = [args.mount]
+    else:
+        target_mounts = find_btrfs_mounts()
+
+    # Nagios: No mounts = UNKNOWN (3)
+    if not target_mounts:
+        print(color_text("UNKNOWN No BTRFS mount points found or specified|", "yellow"))
+        sys.exit(3)
+
+    data_warn = args.data_threshold
+    data_crit = args.data_crit
+    meta_warn = args.meta_threshold
+    meta_crit = args.meta_crit
+    max_scrub_err = args.scrub_errors
+
+    global_status = 0
+    all_perfdata = []
+
+    # Nagios: Human-readable output only (colorful)
+    print(color_text("Checking BTRFS filesystems:", "bold"))
+    print(f"Thresholds: Data(W>{data_warn}/C>{data_crit}) Meta(W>{meta_warn}/C>{meta_crit}) Scrub(>{max_scrub_err})")
+    print("=" * 80)
+
+    for mount in target_mounts:
+        print(f"\n{color_text(mount, 'cyan')}")
+
+        # Validate mount point
+        if not is_btrfs_mount(mount):
+            print(color_text(f"  UNKNOWN: Mount point not found or not BTRFS", "red"))
+            global_status = max(global_status, 3)
+            all_perfdata.append(f"{mount}_status=3")
+            continue
+
+        # Test btrfs df
+        df_output = run_cmd(f"btrfs filesystem df {mount}")
+        if not df_output or "Data" not in df_output:
+            print(color_text("  UNKNOWN: btrfs filesystem df failed", "red"))
+            global_status = max(global_status, 3)
+            all_perfdata.append(f"{mount}_status=3")
+            continue
+
+        print(color_text("  BTRFS filesystem accessible", "green"))
+
+        # Parse usage percentages
+        data_match = re.search(r'Data.*?(\d+)%', df_output, re.IGNORECASE)
+        meta_match = re.search(r'Metadata.*?(\d+)%', df_output, re.IGNORECASE)
+        
+        data_pct = int(data_match.group(1)) if data_match else 0
+        meta_pct = int(meta_match.group(1)) if meta_match else 0
+
+        # Determine status colors
+        data_color = "green" if data_pct < data_warn else "yellow" if data_pct < data_crit else "red"
+        meta_color = "green" if meta_pct < meta_warn else "yellow" if meta_pct < meta_crit else "red"
+        
+        print(f"  Data: {color_text(f'{data_pct}%', data_color)}")
+        print(f"  Metadata: {color_text(f'{meta_pct}%', meta_color)}")
+
+        # Scrub status
+        scrub = run_cmd(f"btrfs scrub status {mount}")
+        scrub_errors = len(re.findall(r'(\d+) errors?', scrub, re.IGNORECASE))
+        scrub_color = "green" if scrub_errors <= max_scrub_err else "red"
+        print(f"  Scrub errors: {color_text(str(scrub_errors), scrub_color)}")
+
+        # Update global status per Nagios standard
+        data_status = 2 if data_pct >= data_crit else 1 if data_pct >= data_warn else 0
+        meta_status = 2 if meta_pct >= meta_crit else 1 if meta_pct >= meta_warn else 0
+        scrub_status = 2 if scrub_errors > max_scrub_err else 0
+        
+        mount_status = max(data_status, meta_status, scrub_status)
+        global_status = max(global_status, mount_status)
+
+        # Nagios perfdata format: metric=value;warn;crit;min;max
+        perf = f"data_pct={data_pct};{data_warn};{data_crit} meta_pct={meta_pct};{meta_warn};{meta_crit} scrub_err={scrub_errors};{max_scrub_err}"
+        all_perfdata.append(perf)
+
+    # Nagios: Single line FINAL STATUS + perfdata
+    perfdata = " ".join(all_perfdata)
+    status_text = {0: "OK", 1: "WARNING", 2: "CRITICAL", 3: "UNKNOWN"}
+    
+    # Colorful human output
+    status_color = "green" if global_status == 0 else "yellow" if global_status == 1 else "red"
+    print("\n" + "=" * 80)
+    print(color_text(f"FINAL STATUS: {status_text[global_status]}", status_color))
+    print(f"| {perfdata}")
+    
+    # Nagios: Clean single-line output (no colors for parsing)
+    print(f"\n{status_text[global_status]} BTRFS check complete| {perfdata}")
+
+    # Nagios standard exit codes
+    sys.exit(global_status)
+
+if __name__ == "__main__":
+    main()