diff --git a/adguard_log_parser.py b/adguard_log_parser.py index 2bb8c04..f99315d 100644 --- a/adguard_log_parser.py +++ b/adguard_log_parser.py @@ -16,16 +16,14 @@ def matches(record, ip, keywords, unblocked_only): if not domain_match: return False if unblocked_only: - # Consider "Result" empty or missing as unblocked result = record.get("Result") - # If Result is None or empty dict, treat as unblocked if result and isinstance(result, dict) and len(result) > 0: - # Could be blocked return False return True -def process_file(filepath, ip, keywords, domain_counter=None, raw_mode=False, unblocked_only=False): +def process_file(filepath, ip, keywords, domain_counter=None, raw_mode=False, unblocked_only=False, collect_raw=False): matched_count = 0 + matched_records = [] if collect_raw else None open_func = gzip.open if filepath.endswith(".gz") else open try: @@ -38,7 +36,9 @@ def process_file(filepath, ip, keywords, domain_counter=None, raw_mode=False, un record = json.loads(line) if matches(record, ip, keywords, unblocked_only): matched_count += 1 - if domain_counter is not None: + if collect_raw: + matched_records.append(record) + elif domain_counter is not None: full_host = record.get("QH", "").lower() if full_host: if raw_mode: @@ -57,7 +57,7 @@ def process_file(filepath, ip, keywords, domain_counter=None, raw_mode=False, un print(f"Error while processing file {filepath}: {e}") print(f"Matched {matched_count} entries in {filepath}.\n") - return matched_count + return matched_count, matched_records def print_top_domains(domain_counter, limit): print(f"\nTop {limit} visited domains:") @@ -66,6 +66,14 @@ def print_top_domains(domain_counter, limit): for i, (domain, count) in enumerate(domain_counter.most_common(limit), 1): print(f"{i:<4} {count:<8} {domain}") +def print_raw_records(records): + print("\nRaw matched entries:") + for rec in records: + t = rec.get("T", "") + ip = rec.get("IP", "") + qh = rec.get("QH", "") + print(f"{t} | IP: {ip} | Query: {qh}") + def print_help(): print(""" AdGuard Home Log Analyzer @@ -81,7 +89,7 @@ Positional arguments: Options: --top N Show top N most visited domains (default: off) - --raw Do NOT consolidate subdomains (e.g., keep 'api.apple.com' separate from 'itunes.apple.com') + --raw Show ALL matched entries (with timestamps and full domain), no domain consolidation, no top list --unblocked-only Show only queries that were NOT blocked by AdGuard --help, -h Show this help message @@ -96,6 +104,7 @@ Examples: python3 find_adguard_log.py logs/querylog*.json* 172.16.0.25 facebook google python3 find_adguard_log.py querylog*.json.gz 192.168.1.8 --top 100 --unblocked-only python3 find_adguard_log.py *.json.gz 10.0.0.12 youtube --top 50 --raw + python3 find_adguard_log.py *.json.gz 10.0.0.12 youtube --raw --unblocked-only """) def parse_arguments(argv): @@ -138,7 +147,6 @@ def main(): ip = None keywords = [] - # Find IP in args (simple check for IPv4 format) for i, arg in enumerate(args): parts = arg.split('.') if len(parts) == 4 and all(p.isdigit() for p in parts): @@ -163,14 +171,20 @@ def main(): sys.exit(1) total_matches = 0 - domain_counter = Counter() if top_limit else None + domain_counter = Counter() if (top_limit and not raw_mode) else None + collected_records = [] if raw_mode else None for log_file in sorted(log_files): - total_matches += process_file(log_file, ip, keywords, domain_counter, raw_mode, unblocked_only) + count, records = process_file(log_file, ip, keywords, domain_counter, raw_mode, unblocked_only, collect_raw=raw_mode) + total_matches += count + if raw_mode and records: + collected_records.extend(records) print(f"\nTotal matched entries across all files: {total_matches}") - if top_limit and total_matches > 0: + if raw_mode: + print_raw_records(collected_records) + elif top_limit and total_matches > 0: print_top_domains(domain_counter, top_limit) if __name__ == "__main__":