Update adguard_log_parser.py
This commit is contained in:
@ -5,15 +5,26 @@ import glob
|
|||||||
import tldextract
|
import tldextract
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
|
|
||||||
def matches(record, ip, keywords):
|
def matches(record, ip, keywords, unblocked_only):
|
||||||
if record.get("IP") != ip:
|
if record.get("IP") != ip:
|
||||||
return False
|
return False
|
||||||
if not keywords:
|
if not keywords:
|
||||||
return True
|
domain_match = True
|
||||||
|
else:
|
||||||
domain = record.get("QH", "").lower()
|
domain = record.get("QH", "").lower()
|
||||||
return any(keyword.lower() in domain for keyword in keywords)
|
domain_match = any(keyword.lower() in domain for keyword in keywords)
|
||||||
|
if not domain_match:
|
||||||
|
return False
|
||||||
|
if unblocked_only:
|
||||||
|
# Consider "Result" empty or missing as unblocked
|
||||||
|
result = record.get("Result")
|
||||||
|
# If Result is None or empty dict, treat as unblocked
|
||||||
|
if result and isinstance(result, dict) and len(result) > 0:
|
||||||
|
# Could be blocked
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
def process_file(filepath, ip, keywords, domain_counter=None, raw_mode=False):
|
def process_file(filepath, ip, keywords, domain_counter=None, raw_mode=False, unblocked_only=False):
|
||||||
matched_count = 0
|
matched_count = 0
|
||||||
open_func = gzip.open if filepath.endswith(".gz") else open
|
open_func = gzip.open if filepath.endswith(".gz") else open
|
||||||
|
|
||||||
@ -25,7 +36,7 @@ def process_file(filepath, ip, keywords, domain_counter=None, raw_mode=False):
|
|||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
record = json.loads(line)
|
record = json.loads(line)
|
||||||
if matches(record, ip, keywords):
|
if matches(record, ip, keywords, unblocked_only):
|
||||||
matched_count += 1
|
matched_count += 1
|
||||||
if domain_counter is not None:
|
if domain_counter is not None:
|
||||||
full_host = record.get("QH", "").lower()
|
full_host = record.get("QH", "").lower()
|
||||||
@ -61,7 +72,7 @@ AdGuard Home Log Analyzer
|
|||||||
=========================
|
=========================
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
python3 find_adguard_log.py <log_pattern(s)> <IP> [keyword1 keyword2 ...] [--top N] [--raw]
|
python3 find_adguard_log.py <log_pattern(s)> <IP> [keyword1 keyword2 ...] [--top N] [--raw] [--unblocked-only]
|
||||||
|
|
||||||
Positional arguments:
|
Positional arguments:
|
||||||
<log_pattern(s)> One or more file patterns (e.g., querylog*.json or logs/*.gz)
|
<log_pattern(s)> One or more file patterns (e.g., querylog*.json or logs/*.gz)
|
||||||
@ -71,6 +82,7 @@ Positional arguments:
|
|||||||
Options:
|
Options:
|
||||||
--top N Show top N most visited domains (default: off)
|
--top N Show top N most visited domains (default: off)
|
||||||
--raw Do NOT consolidate subdomains (e.g., keep 'api.apple.com' separate from 'itunes.apple.com')
|
--raw Do NOT consolidate subdomains (e.g., keep 'api.apple.com' separate from 'itunes.apple.com')
|
||||||
|
--unblocked-only Show only queries that were NOT blocked by AdGuard
|
||||||
--help, -h Show this help message
|
--help, -h Show this help message
|
||||||
|
|
||||||
Features:
|
Features:
|
||||||
@ -82,13 +94,14 @@ Features:
|
|||||||
Examples:
|
Examples:
|
||||||
python3 find_adguard_log.py logs/querylog*.json.gz 192.168.1.5
|
python3 find_adguard_log.py logs/querylog*.json.gz 192.168.1.5
|
||||||
python3 find_adguard_log.py logs/querylog*.json* 172.16.0.25 facebook google
|
python3 find_adguard_log.py logs/querylog*.json* 172.16.0.25 facebook google
|
||||||
python3 find_adguard_log.py querylog*.json.gz 192.168.1.8 --top 100
|
python3 find_adguard_log.py querylog*.json.gz 192.168.1.8 --top 100 --unblocked-only
|
||||||
python3 find_adguard_log.py *.json.gz 10.0.0.12 youtube --top 50 --raw
|
python3 find_adguard_log.py *.json.gz 10.0.0.12 youtube --top 50 --raw
|
||||||
""")
|
""")
|
||||||
|
|
||||||
def parse_arguments(argv):
|
def parse_arguments(argv):
|
||||||
top_limit = None
|
top_limit = None
|
||||||
raw_mode = False
|
raw_mode = False
|
||||||
|
unblocked_only = False
|
||||||
show_help = False
|
show_help = False
|
||||||
args = []
|
args = []
|
||||||
i = 0
|
i = 0
|
||||||
@ -106,14 +119,17 @@ def parse_arguments(argv):
|
|||||||
elif argv[i] == '--raw':
|
elif argv[i] == '--raw':
|
||||||
raw_mode = True
|
raw_mode = True
|
||||||
i += 1
|
i += 1
|
||||||
|
elif argv[i] == '--unblocked-only':
|
||||||
|
unblocked_only = True
|
||||||
|
i += 1
|
||||||
else:
|
else:
|
||||||
args.append(argv[i])
|
args.append(argv[i])
|
||||||
i += 1
|
i += 1
|
||||||
return args, top_limit, raw_mode, show_help
|
return args, top_limit, raw_mode, unblocked_only, show_help
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
raw_args = sys.argv[1:]
|
raw_args = sys.argv[1:]
|
||||||
args, top_limit, raw_mode, show_help = parse_arguments(raw_args)
|
args, top_limit, raw_mode, unblocked_only, show_help = parse_arguments(raw_args)
|
||||||
|
|
||||||
if show_help or len(args) < 2:
|
if show_help or len(args) < 2:
|
||||||
print_help()
|
print_help()
|
||||||
@ -150,7 +166,7 @@ def main():
|
|||||||
domain_counter = Counter() if top_limit else None
|
domain_counter = Counter() if top_limit else None
|
||||||
|
|
||||||
for log_file in sorted(log_files):
|
for log_file in sorted(log_files):
|
||||||
total_matches += process_file(log_file, ip, keywords, domain_counter, raw_mode)
|
total_matches += process_file(log_file, ip, keywords, domain_counter, raw_mode, unblocked_only)
|
||||||
|
|
||||||
print(f"\nTotal matched entries across all files: {total_matches}")
|
print(f"\nTotal matched entries across all files: {total_matches}")
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user