new options

This commit is contained in:
Mateusz Gruszczyński
2025-11-03 10:27:52 +01:00
parent 014dc76ff6
commit d01ca3512e
3 changed files with 79 additions and 134 deletions

View File

@@ -1,11 +1,13 @@
import re import re
from collections import defaultdict
from datetime import datetime
def parse_log_file(log_file_path): def parse_log_file(log_file_path):
"""
Parse HAProxy syslog format and identify security threats.
Format: <134>Nov 3 09:18:35 haproxy[18]: IP:PORT [DATE:TIME] FRONTEND BACKEND STATUS BYTES ...
"""
parsed_entries = [] parsed_entries = []
# Security threat patterns
xss_patterns = [ xss_patterns = [
r'<\s*script\s*', r'<\s*script\s*',
r'javascript:', r'javascript:',
@@ -16,8 +18,7 @@ def parse_log_file(log_file_path):
r'<\s*input\s*[^>]*\s*value\s*=?', r'<\s*input\s*[^>]*\s*value\s*=?',
r'<\s*form\s*action\s*=?', r'<\s*form\s*action\s*=?',
r'<\s*svg\s*on\w+\s*=?', r'<\s*svg\s*on\w+\s*=?',
r'script', r'alert\s*\(',
r'alert',
r'onerror', r'onerror',
r'onload', r'onload',
] ]
@@ -32,7 +33,6 @@ def parse_log_file(log_file_path):
r'1\s*=\s*1', r'1\s*=\s*1',
r'@@\w+', r'@@\w+',
r'`1', r'`1',
r'\|\|\s*chr\(',
] ]
webshells_patterns = [ webshells_patterns = [
@@ -43,71 +43,69 @@ def parse_log_file(log_file_path):
r'exec\s*\(', r'exec\s*\(',
r'popen\s*\(', r'popen\s*\(',
r'proc_open\s*\(', r'proc_open\s*\(',
r'pcntl_exec\s*\(', r'backdoor|webshell|phpspy|c99|kacak|b374k|wsos',
r'\.php\?cmd=',
r'\.php\?id=',
r'backdoor|webshell|phpspy|c99|kacak|b374k|wsos|madspot|r57|c100|r57shell',
] ]
# Compile patterns
xss_pattern = re.compile('|'.join(xss_patterns), re.IGNORECASE) xss_pattern = re.compile('|'.join(xss_patterns), re.IGNORECASE)
sql_pattern = re.compile('|'.join(sql_patterns), re.IGNORECASE) sql_pattern = re.compile('|'.join(sql_patterns), re.IGNORECASE)
webshell_pattern = re.compile('|'.join(webshells_patterns), re.IGNORECASE) webshell_pattern = re.compile('|'.join(webshells_patterns), re.IGNORECASE)
try: try:
with open(log_file_path, 'r') as log_file: with open(log_file_path, 'r', encoding='utf-8', errors='ignore') as log_file:
log_lines = log_file.readlines() log_lines = log_file.readlines()
for line in log_lines: for line in log_lines:
if not line.strip(): if not line.strip():
continue continue
match = re.search( try:
r'(\w+\s+\d+\s\d+:\d+:\d+).*\s(\d+\.\d+\.\d+\.\d+).*"?\s*(GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS)\s+([^"\s]+)"?\s+(\d{3})', # Extract syslog header
syslog_match = re.search(
r'<\d+>(\w+\s+\d+\s+\d+:\d+:\d+).*haproxy\[\d+\]:\s+',
line line
) )
if not match: if not syslog_match:
continue continue
timestamp = match.group(1) timestamp = syslog_match.group(1)
ip_address = match.group(2)
http_method = match.group(3)
requested_url = match.group(4)
status_code = int(match.group(5))
threats = [] # Extract IP:PORT
threat_level = 'info' ip_match = re.search(r'(\d+\.\d+\.\d+\.\d+):(\d+)', line)
if not ip_match:
continue
if xss_pattern.search(line): ip_address = ip_match.group(1)
threats.append('XSS Attack')
threat_level = 'danger'
if sql_pattern.search(line): # Extract date/time in brackets
threats.append('SQL Injection') datetime_match = re.search(r'\[(\d{2}/\w+/\d{4}:\d{2}:\d{2}:\d{2})', line)
threat_level = 'danger' if datetime_match:
timestamp = datetime_match.group(1)
if webshell_pattern.search(line): # Extract frontend and backend
threats.append('Webshell') fe_be_match = re.search(r'\]\s+(\S+)\s+(\S+)\s+(\d+/\d+/\d+/\d+/\d+)\s+(\d{3})', line)
threat_level = 'danger' if not fe_be_match:
continue
if http_method == 'PUT': frontend = fe_be_match.group(1)
threats.append('Remote Upload') backend = fe_be_match.group(2)
threat_level = 'warning' status_code = fe_be_match.group(4)
if 'admin' in requested_url.lower() or 'config' in requested_url.lower(): # Extract HTTP method and URL
if status_code == 403: http_match = re.search(r'"(\w+)\s+([^\s]+)\s+HTTP', line)
threats.append('Unauthorized Access') if not http_match:
threat_level = 'warning' continue
status_category = 'info' http_method = http_match.group(1)
if 200 <= status_code < 300: requested_url = http_match.group(2)
status_category = 'success'
elif 300 <= status_code < 400: # Detect threats
status_category = 'secondary' xss_alert = bool(xss_pattern.search(line))
elif 400 <= status_code < 500: sql_alert = bool(sql_pattern.search(line))
status_category = 'warning' webshell_alert = bool(webshell_pattern.search(line))
elif status_code >= 500: put_method = http_method == 'PUT'
status_category = 'danger' illegal_resource = status_code == '403'
parsed_entries.append({ parsed_entries.append({
'timestamp': timestamp, 'timestamp': timestamp,
@@ -115,75 +113,23 @@ def parse_log_file(log_file_path):
'http_method': http_method, 'http_method': http_method,
'requested_url': requested_url, 'requested_url': requested_url,
'status_code': status_code, 'status_code': status_code,
'status_category': status_category, 'frontend': frontend,
'threats': threats if threats else ['None'], 'backend': backend,
'threat_level': threat_level if threats else 'info', 'xss_alert': xss_alert,
'is_threat': bool(threats), 'sql_alert': sql_alert,
'put_method': put_method,
'illegal_resource': illegal_resource,
'webshell_alert': webshell_alert,
}) })
except FileNotFoundError:
return [{'error': f'Log file not found: {log_file_path}'}]
except Exception as e: except Exception as e:
return [{'error': f'Error parsing log: {str(e)}'}] print(f"Error parsing line: {e}")
return parsed_entries
def get_log_statistics(parsed_entries):
stats = {
'total_requests': len(parsed_entries),
'threat_count': sum(1 for e in parsed_entries if e.get('is_threat')),
'status_codes': defaultdict(int),
'http_methods': defaultdict(int),
'top_ips': defaultdict(int),
'threat_types': defaultdict(int),
}
for entry in parsed_entries:
if 'error' in entry:
continue continue
stats['status_codes'][entry['status_code']] += 1 except FileNotFoundError:
stats['http_methods'][entry['http_method']] += 1 print(f"Log file not found: {log_file_path}")
stats['top_ips'][entry['ip_address']] += 1 return []
except Exception as e:
print(f"Error reading log file: {e}")
return []
for threat in entry.get('threats', []):
if threat != 'None':
stats['threat_types'][threat] += 1
stats['top_ips'] = sorted(
stats['top_ips'].items(),
key=lambda x: x[1],
reverse=True
)[:5]
stats['status_codes'] = dict(stats['status_codes'])
stats['http_methods'] = dict(stats['http_methods'])
stats['threat_types'] = dict(stats['threat_types'])
return stats
def filter_logs(parsed_entries, filters=None):
if not filters:
return parsed_entries return parsed_entries
filtered = parsed_entries
if 'status_code' in filters and filters['status_code']:
filtered = [e for e in filtered if e.get('status_code') == int(filters['status_code'])]
if 'threat_level' in filters and filters['threat_level']:
filtered = [e for e in filtered if e.get('threat_level') == filters['threat_level']]
if 'http_method' in filters and filters['http_method']:
filtered = [e for e in filtered if e.get('http_method') == filters['http_method']]
if 'ip_address' in filters and filters['ip_address']:
filtered = [e for e in filtered if e.get('ip_address') == filters['ip_address']]
if 'has_threat' in filters and filters['has_threat']:
filtered = [e for e in filtered if e.get('is_threat')]
return filtered

Binary file not shown.

View File

@@ -9,12 +9,11 @@
{% block content %} {% block content %}
<div class="card shadow-sm mb-4"> <div class="card shadow-sm mb-4">
<div class="card-header bg-info text-white"> <div class="card-header bg-primary text-white">
<h5 class="mb-0"><i class="bi bi-file-text me-2"></i>HAProxy Access Logs</h5> <h5 class="mb-0"><i class="bi bi-file-text me-2"></i>HAProxy Access Logs</h5>
</div> </div>
<div class="card-body"> <div class="card-body">
<!-- Filter Section (kompaktnie, jak było) -->
{% if logs %} {% if logs %}
<div class="row mb-3 g-2"> <div class="row mb-3 g-2">
<div class="col-auto"> <div class="col-auto">