Update adguard_log_parser.py
This commit is contained in:
@ -16,16 +16,14 @@ def matches(record, ip, keywords, unblocked_only):
|
|||||||
if not domain_match:
|
if not domain_match:
|
||||||
return False
|
return False
|
||||||
if unblocked_only:
|
if unblocked_only:
|
||||||
# Consider "Result" empty or missing as unblocked
|
|
||||||
result = record.get("Result")
|
result = record.get("Result")
|
||||||
# If Result is None or empty dict, treat as unblocked
|
|
||||||
if result and isinstance(result, dict) and len(result) > 0:
|
if result and isinstance(result, dict) and len(result) > 0:
|
||||||
# Could be blocked
|
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def process_file(filepath, ip, keywords, domain_counter=None, raw_mode=False, unblocked_only=False):
|
def process_file(filepath, ip, keywords, domain_counter=None, raw_mode=False, unblocked_only=False, collect_raw=False):
|
||||||
matched_count = 0
|
matched_count = 0
|
||||||
|
matched_records = [] if collect_raw else None
|
||||||
open_func = gzip.open if filepath.endswith(".gz") else open
|
open_func = gzip.open if filepath.endswith(".gz") else open
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -38,7 +36,9 @@ def process_file(filepath, ip, keywords, domain_counter=None, raw_mode=False, un
|
|||||||
record = json.loads(line)
|
record = json.loads(line)
|
||||||
if matches(record, ip, keywords, unblocked_only):
|
if matches(record, ip, keywords, unblocked_only):
|
||||||
matched_count += 1
|
matched_count += 1
|
||||||
if domain_counter is not None:
|
if collect_raw:
|
||||||
|
matched_records.append(record)
|
||||||
|
elif domain_counter is not None:
|
||||||
full_host = record.get("QH", "").lower()
|
full_host = record.get("QH", "").lower()
|
||||||
if full_host:
|
if full_host:
|
||||||
if raw_mode:
|
if raw_mode:
|
||||||
@ -57,7 +57,7 @@ def process_file(filepath, ip, keywords, domain_counter=None, raw_mode=False, un
|
|||||||
print(f"Error while processing file {filepath}: {e}")
|
print(f"Error while processing file {filepath}: {e}")
|
||||||
|
|
||||||
print(f"Matched {matched_count} entries in {filepath}.\n")
|
print(f"Matched {matched_count} entries in {filepath}.\n")
|
||||||
return matched_count
|
return matched_count, matched_records
|
||||||
|
|
||||||
def print_top_domains(domain_counter, limit):
|
def print_top_domains(domain_counter, limit):
|
||||||
print(f"\nTop {limit} visited domains:")
|
print(f"\nTop {limit} visited domains:")
|
||||||
@ -66,6 +66,14 @@ def print_top_domains(domain_counter, limit):
|
|||||||
for i, (domain, count) in enumerate(domain_counter.most_common(limit), 1):
|
for i, (domain, count) in enumerate(domain_counter.most_common(limit), 1):
|
||||||
print(f"{i:<4} {count:<8} {domain}")
|
print(f"{i:<4} {count:<8} {domain}")
|
||||||
|
|
||||||
|
def print_raw_records(records):
|
||||||
|
print("\nRaw matched entries:")
|
||||||
|
for rec in records:
|
||||||
|
t = rec.get("T", "")
|
||||||
|
ip = rec.get("IP", "")
|
||||||
|
qh = rec.get("QH", "")
|
||||||
|
print(f"{t} | IP: {ip} | Query: {qh}")
|
||||||
|
|
||||||
def print_help():
|
def print_help():
|
||||||
print("""
|
print("""
|
||||||
AdGuard Home Log Analyzer
|
AdGuard Home Log Analyzer
|
||||||
@ -81,7 +89,7 @@ Positional arguments:
|
|||||||
|
|
||||||
Options:
|
Options:
|
||||||
--top N Show top N most visited domains (default: off)
|
--top N Show top N most visited domains (default: off)
|
||||||
--raw Do NOT consolidate subdomains (e.g., keep 'api.apple.com' separate from 'itunes.apple.com')
|
--raw Show ALL matched entries (with timestamps and full domain), no domain consolidation, no top list
|
||||||
--unblocked-only Show only queries that were NOT blocked by AdGuard
|
--unblocked-only Show only queries that were NOT blocked by AdGuard
|
||||||
--help, -h Show this help message
|
--help, -h Show this help message
|
||||||
|
|
||||||
@ -96,6 +104,7 @@ Examples:
|
|||||||
python3 find_adguard_log.py logs/querylog*.json* 172.16.0.25 facebook google
|
python3 find_adguard_log.py logs/querylog*.json* 172.16.0.25 facebook google
|
||||||
python3 find_adguard_log.py querylog*.json.gz 192.168.1.8 --top 100 --unblocked-only
|
python3 find_adguard_log.py querylog*.json.gz 192.168.1.8 --top 100 --unblocked-only
|
||||||
python3 find_adguard_log.py *.json.gz 10.0.0.12 youtube --top 50 --raw
|
python3 find_adguard_log.py *.json.gz 10.0.0.12 youtube --top 50 --raw
|
||||||
|
python3 find_adguard_log.py *.json.gz 10.0.0.12 youtube --raw --unblocked-only
|
||||||
""")
|
""")
|
||||||
|
|
||||||
def parse_arguments(argv):
|
def parse_arguments(argv):
|
||||||
@ -138,7 +147,6 @@ def main():
|
|||||||
ip = None
|
ip = None
|
||||||
keywords = []
|
keywords = []
|
||||||
|
|
||||||
# Find IP in args (simple check for IPv4 format)
|
|
||||||
for i, arg in enumerate(args):
|
for i, arg in enumerate(args):
|
||||||
parts = arg.split('.')
|
parts = arg.split('.')
|
||||||
if len(parts) == 4 and all(p.isdigit() for p in parts):
|
if len(parts) == 4 and all(p.isdigit() for p in parts):
|
||||||
@ -163,14 +171,20 @@ def main():
|
|||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
total_matches = 0
|
total_matches = 0
|
||||||
domain_counter = Counter() if top_limit else None
|
domain_counter = Counter() if (top_limit and not raw_mode) else None
|
||||||
|
collected_records = [] if raw_mode else None
|
||||||
|
|
||||||
for log_file in sorted(log_files):
|
for log_file in sorted(log_files):
|
||||||
total_matches += process_file(log_file, ip, keywords, domain_counter, raw_mode, unblocked_only)
|
count, records = process_file(log_file, ip, keywords, domain_counter, raw_mode, unblocked_only, collect_raw=raw_mode)
|
||||||
|
total_matches += count
|
||||||
|
if raw_mode and records:
|
||||||
|
collected_records.extend(records)
|
||||||
|
|
||||||
print(f"\nTotal matched entries across all files: {total_matches}")
|
print(f"\nTotal matched entries across all files: {total_matches}")
|
||||||
|
|
||||||
if top_limit and total_matches > 0:
|
if raw_mode:
|
||||||
|
print_raw_records(collected_records)
|
||||||
|
elif top_limit and total_matches > 0:
|
||||||
print_top_domains(domain_counter, top_limit)
|
print_top_domains(domain_counter, top_limit)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
Reference in New Issue
Block a user