diff --git a/adguard_log_parser.py b/adguard_log_parser.py index d06a54a..2bb8c04 100644 --- a/adguard_log_parser.py +++ b/adguard_log_parser.py @@ -5,15 +5,26 @@ import glob import tldextract from collections import Counter -def matches(record, ip, keywords): +def matches(record, ip, keywords, unblocked_only): if record.get("IP") != ip: return False if not keywords: - return True - domain = record.get("QH", "").lower() - return any(keyword.lower() in domain for keyword in keywords) + domain_match = True + else: + domain = record.get("QH", "").lower() + domain_match = any(keyword.lower() in domain for keyword in keywords) + if not domain_match: + return False + if unblocked_only: + # Consider "Result" empty or missing as unblocked + result = record.get("Result") + # If Result is None or empty dict, treat as unblocked + if result and isinstance(result, dict) and len(result) > 0: + # Could be blocked + return False + return True -def process_file(filepath, ip, keywords, domain_counter=None, raw_mode=False): +def process_file(filepath, ip, keywords, domain_counter=None, raw_mode=False, unblocked_only=False): matched_count = 0 open_func = gzip.open if filepath.endswith(".gz") else open @@ -25,7 +36,7 @@ def process_file(filepath, ip, keywords, domain_counter=None, raw_mode=False): continue try: record = json.loads(line) - if matches(record, ip, keywords): + if matches(record, ip, keywords, unblocked_only): matched_count += 1 if domain_counter is not None: full_host = record.get("QH", "").lower() @@ -61,7 +72,7 @@ AdGuard Home Log Analyzer ========================= Usage: - python3 find_adguard_log.py [keyword1 keyword2 ...] [--top N] [--raw] + python3 find_adguard_log.py [keyword1 keyword2 ...] [--top N] [--raw] [--unblocked-only] Positional arguments: One or more file patterns (e.g., querylog*.json or logs/*.gz) @@ -71,6 +82,7 @@ Positional arguments: Options: --top N Show top N most visited domains (default: off) --raw Do NOT consolidate subdomains (e.g., keep 'api.apple.com' separate from 'itunes.apple.com') + --unblocked-only Show only queries that were NOT blocked by AdGuard --help, -h Show this help message Features: @@ -82,13 +94,14 @@ Features: Examples: python3 find_adguard_log.py logs/querylog*.json.gz 192.168.1.5 python3 find_adguard_log.py logs/querylog*.json* 172.16.0.25 facebook google - python3 find_adguard_log.py querylog*.json.gz 192.168.1.8 --top 100 + python3 find_adguard_log.py querylog*.json.gz 192.168.1.8 --top 100 --unblocked-only python3 find_adguard_log.py *.json.gz 10.0.0.12 youtube --top 50 --raw """) def parse_arguments(argv): top_limit = None raw_mode = False + unblocked_only = False show_help = False args = [] i = 0 @@ -106,14 +119,17 @@ def parse_arguments(argv): elif argv[i] == '--raw': raw_mode = True i += 1 + elif argv[i] == '--unblocked-only': + unblocked_only = True + i += 1 else: args.append(argv[i]) i += 1 - return args, top_limit, raw_mode, show_help + return args, top_limit, raw_mode, unblocked_only, show_help def main(): raw_args = sys.argv[1:] - args, top_limit, raw_mode, show_help = parse_arguments(raw_args) + args, top_limit, raw_mode, unblocked_only, show_help = parse_arguments(raw_args) if show_help or len(args) < 2: print_help() @@ -150,7 +166,7 @@ def main(): domain_counter = Counter() if top_limit else None for log_file in sorted(log_files): - total_matches += process_file(log_file, ip, keywords, domain_counter, raw_mode) + total_matches += process_file(log_file, ip, keywords, domain_counter, raw_mode, unblocked_only) print(f"\nTotal matched entries across all files: {total_matches}")