Update adguard_log_parser.py

2025-06-15 20:10:05 +02:00
parent 368e62ad97
commit d585dffb43
1 changed files with 27 additions and 11 deletions
--- a/adguard_log_parser.py
+++ b/adguard_log_parser.py
@@ -5,15 +5,26 @@ import glob
 import tldextract
 from collections import Counter

-def matches(record, ip, keywords):
+def matches(record, ip, keywords, unblocked_only):
    if record.get("IP") != ip:
        return False
    if not keywords:
-        return True
+        domain_match = True
+    else:
        domain = record.get("QH", "").lower()
-    return any(keyword.lower() in domain for keyword in keywords)
+        domain_match = any(keyword.lower() in domain for keyword in keywords)
+    if not domain_match:
+        return False
+    if unblocked_only:
+        # Consider "Result" empty or missing as unblocked
+        result = record.get("Result")
+        # If Result is None or empty dict, treat as unblocked
+        if result and isinstance(result, dict) and len(result) > 0:
+            # Could be blocked
+            return False
+    return True

-def process_file(filepath, ip, keywords, domain_counter=None, raw_mode=False):
+def process_file(filepath, ip, keywords, domain_counter=None, raw_mode=False, unblocked_only=False):
    matched_count = 0
    open_func = gzip.open if filepath.endswith(".gz") else open

@@ -25,7 +36,7 @@ def process_file(filepath, ip, keywords, domain_counter=None, raw_mode=False):
                    continue
                try:
                    record = json.loads(line)
-                    if matches(record, ip, keywords):
+                    if matches(record, ip, keywords, unblocked_only):
                        matched_count += 1
                        if domain_counter is not None:
                            full_host = record.get("QH", "").lower()
@@ -61,7 +72,7 @@ AdGuard Home Log Analyzer
 =========================

 Usage:
-  python3 find_adguard_log.py <log_pattern(s)> <IP> [keyword1 keyword2 ...] [--top N] [--raw]
+  python3 find_adguard_log.py <log_pattern(s)> <IP> [keyword1 keyword2 ...] [--top N] [--raw] [--unblocked-only]

 Positional arguments:
  <log_pattern(s)>   One or more file patterns (e.g., querylog*.json or logs/*.gz)
@@ -71,6 +82,7 @@ Positional arguments:
 Options:
  --top N            Show top N most visited domains (default: off)
  --raw              Do NOT consolidate subdomains (e.g., keep 'api.apple.com' separate from 'itunes.apple.com')
+  --unblocked-only   Show only queries that were NOT blocked by AdGuard
  --help, -h         Show this help message

 Features:
@@ -82,13 +94,14 @@ Features:
 Examples:
  python3 find_adguard_log.py logs/querylog*.json.gz 192.168.1.5
  python3 find_adguard_log.py logs/querylog*.json* 172.16.0.25 facebook google
-  python3 find_adguard_log.py querylog*.json.gz 192.168.1.8 --top 100
+  python3 find_adguard_log.py querylog*.json.gz 192.168.1.8 --top 100 --unblocked-only
  python3 find_adguard_log.py *.json.gz 10.0.0.12 youtube --top 50 --raw
 """)

 def parse_arguments(argv):
    top_limit = None
    raw_mode = False
+    unblocked_only = False
    show_help = False
    args = []
    i = 0
@@ -106,14 +119,17 @@ def parse_arguments(argv):
        elif argv[i] == '--raw':
            raw_mode = True
            i += 1
+        elif argv[i] == '--unblocked-only':
+            unblocked_only = True
+            i += 1
        else:
            args.append(argv[i])
            i += 1
-    return args, top_limit, raw_mode, show_help
+    return args, top_limit, raw_mode, unblocked_only, show_help

 def main():
    raw_args = sys.argv[1:]
-    args, top_limit, raw_mode, show_help = parse_arguments(raw_args)
+    args, top_limit, raw_mode, unblocked_only, show_help = parse_arguments(raw_args)

    if show_help or len(args) < 2:
        print_help()
@@ -150,7 +166,7 @@ def main():
    domain_counter = Counter() if top_limit else None

    for log_file in sorted(log_files):
-        total_matches += process_file(log_file, ip, keywords, domain_counter, raw_mode)
+        total_matches += process_file(log_file, ip, keywords, domain_counter, raw_mode, unblocked_only)

    print(f"\nTotal matched entries across all files: {total_matches}")