Update adguard_log_parser.py

2025-06-15 20:14:47 +02:00
parent d585dffb43
commit 226e3b8ca0
1 changed files with 25 additions and 11 deletions
--- a/adguard_log_parser.py
+++ b/adguard_log_parser.py
@@ -16,16 +16,14 @@ def matches(record, ip, keywords, unblocked_only):
    if not domain_match:
        return False
    if unblocked_only:
-        # Consider "Result" empty or missing as unblocked
        result = record.get("Result")
-        # If Result is None or empty dict, treat as unblocked
        if result and isinstance(result, dict) and len(result) > 0:
-            # Could be blocked
            return False
    return True

-def process_file(filepath, ip, keywords, domain_counter=None, raw_mode=False, unblocked_only=False):
+def process_file(filepath, ip, keywords, domain_counter=None, raw_mode=False, unblocked_only=False, collect_raw=False):
    matched_count = 0
+    matched_records = [] if collect_raw else None
    open_func = gzip.open if filepath.endswith(".gz") else open

    try:
@@ -38,7 +36,9 @@ def process_file(filepath, ip, keywords, domain_counter=None, raw_mode=False, un
                    record = json.loads(line)
                    if matches(record, ip, keywords, unblocked_only):
                        matched_count += 1
-                        if domain_counter is not None:
+                        if collect_raw:
+                            matched_records.append(record)
+                        elif domain_counter is not None:
                            full_host = record.get("QH", "").lower()
                            if full_host:
                                if raw_mode:
@@ -57,7 +57,7 @@ def process_file(filepath, ip, keywords, domain_counter=None, raw_mode=False, un
        print(f"Error while processing file {filepath}: {e}")

    print(f"Matched {matched_count} entries in {filepath}.\n")
-    return matched_count
+    return matched_count, matched_records

 def print_top_domains(domain_counter, limit):
    print(f"\nTop {limit} visited domains:")
@@ -66,6 +66,14 @@ def print_top_domains(domain_counter, limit):
    for i, (domain, count) in enumerate(domain_counter.most_common(limit), 1):
        print(f"{i:<4} {count:<8} {domain}")

+def print_raw_records(records):
+    print("\nRaw matched entries:")
+    for rec in records:
+        t = rec.get("T", "")
+        ip = rec.get("IP", "")
+        qh = rec.get("QH", "")
+        print(f"{t} | IP: {ip} | Query: {qh}")
+
 def print_help():
    print("""
 AdGuard Home Log Analyzer
@@ -81,7 +89,7 @@ Positional arguments:

 Options:
  --top N            Show top N most visited domains (default: off)
-  --raw              Do NOT consolidate subdomains (e.g., keep 'api.apple.com' separate from 'itunes.apple.com')
+  --raw              Show ALL matched entries (with timestamps and full domain), no domain consolidation, no top list
  --unblocked-only   Show only queries that were NOT blocked by AdGuard
  --help, -h         Show this help message

@@ -96,6 +104,7 @@ Examples:
  python3 find_adguard_log.py logs/querylog*.json* 172.16.0.25 facebook google
  python3 find_adguard_log.py querylog*.json.gz 192.168.1.8 --top 100 --unblocked-only
  python3 find_adguard_log.py *.json.gz 10.0.0.12 youtube --top 50 --raw
+  python3 find_adguard_log.py *.json.gz 10.0.0.12 youtube --raw --unblocked-only
 """)

 def parse_arguments(argv):
@@ -138,7 +147,6 @@ def main():
    ip = None
    keywords = []

-    # Find IP in args (simple check for IPv4 format)
    for i, arg in enumerate(args):
        parts = arg.split('.')
        if len(parts) == 4 and all(p.isdigit() for p in parts):
@@ -163,14 +171,20 @@ def main():
        sys.exit(1)

    total_matches = 0
-    domain_counter = Counter() if top_limit else None
+    domain_counter = Counter() if (top_limit and not raw_mode) else None
+    collected_records = [] if raw_mode else None

    for log_file in sorted(log_files):
-        total_matches += process_file(log_file, ip, keywords, domain_counter, raw_mode, unblocked_only)
+        count, records = process_file(log_file, ip, keywords, domain_counter, raw_mode, unblocked_only, collect_raw=raw_mode)
+        total_matches += count
+        if raw_mode and records:
+            collected_records.extend(records)

    print(f"\nTotal matched entries across all files: {total_matches}")

-    if top_limit and total_matches > 0:
+    if raw_mode:
+        print_raw_records(collected_records)
+    elif top_limit and total_matches > 0:
        print_top_domains(domain_counter, top_limit)

 if __name__ == "__main__":