diff --git a/log_parser.py b/log_parser.py index 984ac37..330d489 100644 --- a/log_parser.py +++ b/log_parser.py @@ -1,11 +1,13 @@ import re -from collections import defaultdict -from datetime import datetime def parse_log_file(log_file_path): - + """ + Parse HAProxy syslog format and identify security threats. + Format: <134>Nov 3 09:18:35 haproxy[18]: IP:PORT [DATE:TIME] FRONTEND BACKEND STATUS BYTES ... + """ parsed_entries = [] + # Security threat patterns xss_patterns = [ r'<\s*script\s*', r'javascript:', @@ -16,8 +18,7 @@ def parse_log_file(log_file_path): r'<\s*input\s*[^>]*\s*value\s*=?', r'<\s*form\s*action\s*=?', r'<\s*svg\s*on\w+\s*=?', - r'script', - r'alert', + r'alert\s*\(', r'onerror', r'onload', ] @@ -32,7 +33,6 @@ def parse_log_file(log_file_path): r'1\s*=\s*1', r'@@\w+', r'`1', - r'\|\|\s*chr\(', ] webshells_patterns = [ @@ -43,147 +43,93 @@ def parse_log_file(log_file_path): r'exec\s*\(', r'popen\s*\(', r'proc_open\s*\(', - r'pcntl_exec\s*\(', - r'\.php\?cmd=', - r'\.php\?id=', - r'backdoor|webshell|phpspy|c99|kacak|b374k|wsos|madspot|r57|c100|r57shell', + r'backdoor|webshell|phpspy|c99|kacak|b374k|wsos', ] + # Compile patterns xss_pattern = re.compile('|'.join(xss_patterns), re.IGNORECASE) sql_pattern = re.compile('|'.join(sql_patterns), re.IGNORECASE) webshell_pattern = re.compile('|'.join(webshells_patterns), re.IGNORECASE) try: - with open(log_file_path, 'r') as log_file: + with open(log_file_path, 'r', encoding='utf-8', errors='ignore') as log_file: log_lines = log_file.readlines() for line in log_lines: if not line.strip(): continue - - match = re.search( - r'(\w+\s+\d+\s\d+:\d+:\d+).*\s(\d+\.\d+\.\d+\.\d+).*"?\s*(GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS)\s+([^"\s]+)"?\s+(\d{3})', - line - ) - - if not match: + + try: + # Extract syslog header + syslog_match = re.search( + r'<\d+>(\w+\s+\d+\s+\d+:\d+:\d+).*haproxy\[\d+\]:\s+', + line + ) + + if not syslog_match: + continue + + timestamp = syslog_match.group(1) + + # Extract IP:PORT + ip_match = re.search(r'(\d+\.\d+\.\d+\.\d+):(\d+)', line) + if not ip_match: + continue + + ip_address = ip_match.group(1) + + # Extract date/time in brackets + datetime_match = re.search(r'\[(\d{2}/\w+/\d{4}:\d{2}:\d{2}:\d{2})', line) + if datetime_match: + timestamp = datetime_match.group(1) + + # Extract frontend and backend + fe_be_match = re.search(r'\]\s+(\S+)\s+(\S+)\s+(\d+/\d+/\d+/\d+/\d+)\s+(\d{3})', line) + if not fe_be_match: + continue + + frontend = fe_be_match.group(1) + backend = fe_be_match.group(2) + status_code = fe_be_match.group(4) + + # Extract HTTP method and URL + http_match = re.search(r'"(\w+)\s+([^\s]+)\s+HTTP', line) + if not http_match: + continue + + http_method = http_match.group(1) + requested_url = http_match.group(2) + + # Detect threats + xss_alert = bool(xss_pattern.search(line)) + sql_alert = bool(sql_pattern.search(line)) + webshell_alert = bool(webshell_pattern.search(line)) + put_method = http_method == 'PUT' + illegal_resource = status_code == '403' + + parsed_entries.append({ + 'timestamp': timestamp, + 'ip_address': ip_address, + 'http_method': http_method, + 'requested_url': requested_url, + 'status_code': status_code, + 'frontend': frontend, + 'backend': backend, + 'xss_alert': xss_alert, + 'sql_alert': sql_alert, + 'put_method': put_method, + 'illegal_resource': illegal_resource, + 'webshell_alert': webshell_alert, + }) + except Exception as e: + print(f"Error parsing line: {e}") continue - - timestamp = match.group(1) - ip_address = match.group(2) - http_method = match.group(3) - requested_url = match.group(4) - status_code = int(match.group(5)) - - threats = [] - threat_level = 'info' - - if xss_pattern.search(line): - threats.append('XSS Attack') - threat_level = 'danger' - - if sql_pattern.search(line): - threats.append('SQL Injection') - threat_level = 'danger' - - if webshell_pattern.search(line): - threats.append('Webshell') - threat_level = 'danger' - - if http_method == 'PUT': - threats.append('Remote Upload') - threat_level = 'warning' - - if 'admin' in requested_url.lower() or 'config' in requested_url.lower(): - if status_code == 403: - threats.append('Unauthorized Access') - threat_level = 'warning' - - status_category = 'info' - if 200 <= status_code < 300: - status_category = 'success' - elif 300 <= status_code < 400: - status_category = 'secondary' - elif 400 <= status_code < 500: - status_category = 'warning' - elif status_code >= 500: - status_category = 'danger' - - parsed_entries.append({ - 'timestamp': timestamp, - 'ip_address': ip_address, - 'http_method': http_method, - 'requested_url': requested_url, - 'status_code': status_code, - 'status_category': status_category, - 'threats': threats if threats else ['None'], - 'threat_level': threat_level if threats else 'info', - 'is_threat': bool(threats), - }) except FileNotFoundError: - return [{'error': f'Log file not found: {log_file_path}'}] + print(f"Log file not found: {log_file_path}") + return [] except Exception as e: - return [{'error': f'Error parsing log: {str(e)}'}] + print(f"Error reading log file: {e}") + return [] return parsed_entries - - -def get_log_statistics(parsed_entries): - - stats = { - 'total_requests': len(parsed_entries), - 'threat_count': sum(1 for e in parsed_entries if e.get('is_threat')), - 'status_codes': defaultdict(int), - 'http_methods': defaultdict(int), - 'top_ips': defaultdict(int), - 'threat_types': defaultdict(int), - } - - for entry in parsed_entries: - if 'error' in entry: - continue - - stats['status_codes'][entry['status_code']] += 1 - stats['http_methods'][entry['http_method']] += 1 - stats['top_ips'][entry['ip_address']] += 1 - - for threat in entry.get('threats', []): - if threat != 'None': - stats['threat_types'][threat] += 1 - - stats['top_ips'] = sorted( - stats['top_ips'].items(), - key=lambda x: x[1], - reverse=True - )[:5] - - stats['status_codes'] = dict(stats['status_codes']) - stats['http_methods'] = dict(stats['http_methods']) - stats['threat_types'] = dict(stats['threat_types']) - - return stats - - -def filter_logs(parsed_entries, filters=None): - if not filters: - return parsed_entries - - filtered = parsed_entries - - if 'status_code' in filters and filters['status_code']: - filtered = [e for e in filtered if e.get('status_code') == int(filters['status_code'])] - - if 'threat_level' in filters and filters['threat_level']: - filtered = [e for e in filtered if e.get('threat_level') == filters['threat_level']] - - if 'http_method' in filters and filters['http_method']: - filtered = [e for e in filtered if e.get('http_method') == filters['http_method']] - - if 'ip_address' in filters and filters['ip_address']: - filtered = [e for e in filtered if e.get('ip_address') == filters['ip_address']] - - if 'has_threat' in filters and filters['has_threat']: - filtered = [e for e in filtered if e.get('is_threat')] - - return filtered diff --git a/templates.tar.gz b/templates.tar.gz deleted file mode 100644 index 5e73581..0000000 Binary files a/templates.tar.gz and /dev/null differ diff --git a/templates/logs.html b/templates/logs.html index 546f4ae..8c1b1f2 100644 --- a/templates/logs.html +++ b/templates/logs.html @@ -9,12 +9,11 @@ {% block content %}