Update adguard_log_parser.py

This commit is contained in:
gru
2025-06-15 20:10:05 +02:00
parent 368e62ad97
commit d585dffb43

View File

@ -5,15 +5,26 @@ import glob
import tldextract import tldextract
from collections import Counter from collections import Counter
def matches(record, ip, keywords): def matches(record, ip, keywords, unblocked_only):
if record.get("IP") != ip: if record.get("IP") != ip:
return False return False
if not keywords: if not keywords:
return True domain_match = True
domain = record.get("QH", "").lower() else:
return any(keyword.lower() in domain for keyword in keywords) domain = record.get("QH", "").lower()
domain_match = any(keyword.lower() in domain for keyword in keywords)
if not domain_match:
return False
if unblocked_only:
# Consider "Result" empty or missing as unblocked
result = record.get("Result")
# If Result is None or empty dict, treat as unblocked
if result and isinstance(result, dict) and len(result) > 0:
# Could be blocked
return False
return True
def process_file(filepath, ip, keywords, domain_counter=None, raw_mode=False): def process_file(filepath, ip, keywords, domain_counter=None, raw_mode=False, unblocked_only=False):
matched_count = 0 matched_count = 0
open_func = gzip.open if filepath.endswith(".gz") else open open_func = gzip.open if filepath.endswith(".gz") else open
@ -25,7 +36,7 @@ def process_file(filepath, ip, keywords, domain_counter=None, raw_mode=False):
continue continue
try: try:
record = json.loads(line) record = json.loads(line)
if matches(record, ip, keywords): if matches(record, ip, keywords, unblocked_only):
matched_count += 1 matched_count += 1
if domain_counter is not None: if domain_counter is not None:
full_host = record.get("QH", "").lower() full_host = record.get("QH", "").lower()
@ -61,7 +72,7 @@ AdGuard Home Log Analyzer
========================= =========================
Usage: Usage:
python3 find_adguard_log.py <log_pattern(s)> <IP> [keyword1 keyword2 ...] [--top N] [--raw] python3 find_adguard_log.py <log_pattern(s)> <IP> [keyword1 keyword2 ...] [--top N] [--raw] [--unblocked-only]
Positional arguments: Positional arguments:
<log_pattern(s)> One or more file patterns (e.g., querylog*.json or logs/*.gz) <log_pattern(s)> One or more file patterns (e.g., querylog*.json or logs/*.gz)
@ -71,6 +82,7 @@ Positional arguments:
Options: Options:
--top N Show top N most visited domains (default: off) --top N Show top N most visited domains (default: off)
--raw Do NOT consolidate subdomains (e.g., keep 'api.apple.com' separate from 'itunes.apple.com') --raw Do NOT consolidate subdomains (e.g., keep 'api.apple.com' separate from 'itunes.apple.com')
--unblocked-only Show only queries that were NOT blocked by AdGuard
--help, -h Show this help message --help, -h Show this help message
Features: Features:
@ -82,13 +94,14 @@ Features:
Examples: Examples:
python3 find_adguard_log.py logs/querylog*.json.gz 192.168.1.5 python3 find_adguard_log.py logs/querylog*.json.gz 192.168.1.5
python3 find_adguard_log.py logs/querylog*.json* 172.16.0.25 facebook google python3 find_adguard_log.py logs/querylog*.json* 172.16.0.25 facebook google
python3 find_adguard_log.py querylog*.json.gz 192.168.1.8 --top 100 python3 find_adguard_log.py querylog*.json.gz 192.168.1.8 --top 100 --unblocked-only
python3 find_adguard_log.py *.json.gz 10.0.0.12 youtube --top 50 --raw python3 find_adguard_log.py *.json.gz 10.0.0.12 youtube --top 50 --raw
""") """)
def parse_arguments(argv): def parse_arguments(argv):
top_limit = None top_limit = None
raw_mode = False raw_mode = False
unblocked_only = False
show_help = False show_help = False
args = [] args = []
i = 0 i = 0
@ -106,14 +119,17 @@ def parse_arguments(argv):
elif argv[i] == '--raw': elif argv[i] == '--raw':
raw_mode = True raw_mode = True
i += 1 i += 1
elif argv[i] == '--unblocked-only':
unblocked_only = True
i += 1
else: else:
args.append(argv[i]) args.append(argv[i])
i += 1 i += 1
return args, top_limit, raw_mode, show_help return args, top_limit, raw_mode, unblocked_only, show_help
def main(): def main():
raw_args = sys.argv[1:] raw_args = sys.argv[1:]
args, top_limit, raw_mode, show_help = parse_arguments(raw_args) args, top_limit, raw_mode, unblocked_only, show_help = parse_arguments(raw_args)
if show_help or len(args) < 2: if show_help or len(args) < 2:
print_help() print_help()
@ -150,7 +166,7 @@ def main():
domain_counter = Counter() if top_limit else None domain_counter = Counter() if top_limit else None
for log_file in sorted(log_files): for log_file in sorted(log_files):
total_matches += process_file(log_file, ip, keywords, domain_counter, raw_mode) total_matches += process_file(log_file, ip, keywords, domain_counter, raw_mode, unblocked_only)
print(f"\nTotal matched entries across all files: {total_matches}") print(f"\nTotal matched entries across all files: {total_matches}")