From c28049d2cf5e05bfd503236a301f027eb049ca84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Gruszczy=C5=84ski?= Date: Fri, 16 May 2025 11:34:01 +0200 Subject: [PATCH] first commit --- .env.example | 8 ++ .gitignore | 8 ++ app.py | 284 +++++++++++++++++++++++++++++++++++++++++++++++ delete_user.php | 32 ++++++ requirements.txt | 7 ++ 5 files changed, 339 insertions(+) create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 app.py create mode 100644 delete_user.php create mode 100644 requirements.txt diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..38496cf --- /dev/null +++ b/.env.example @@ -0,0 +1,8 @@ +# Database connection +DB_HOST=localhost +DB_USER=drupal +DB_PASSWORD=haslo +DB_NAME=drupal6 + +# Path to Drupal installation (used by delete_user.php) +DRUPAL_PATH=/var/www/drupal \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..33cd283 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +*.log +*.csv +*.xlsx +.env +__pycache__/ +*.pyc +user_cleanup_results_* +venv \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000..aeee37b --- /dev/null +++ b/app.py @@ -0,0 +1,284 @@ +#!/usr/bin/env python3 + +import argparse +import mysql.connector +import dns.resolver +import datetime +import csv +import socket +from dotenv import load_dotenv +import os +import sys +from tqdm import tqdm +import signal +import logging +import redis +from tabulate import tabulate +import xlsxwriter +from collections import defaultdict +import subprocess + +# Redis - baza 5 +redis_client = redis.Redis(host='localhost', port=6379, db=5, decode_responses=True) + +# Tymczasowe domeny +TEMP_DOMAINS = { + "10minutemail.com", "tempmail.com", "tempmail.net", "tempmail.org", + "guerrillamail.com", "mailinator.com", "discard.email", "fakeinbox.com", + "trashmail.com", "getnada.com", "yopmail.com", "maildrop.cc", "sharklasers.com" +} + +# Logi +logging.basicConfig( + filename='user_cleanup.log', + level=logging.INFO, + format='%(asctime)s [%(levelname)s] %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' +) + +def get_users(db_config): + connection = mysql.connector.connect(**db_config) + cursor = connection.cursor(dictionary=True) + query = """ + SELECT u.uid, u.name, u.mail, u.access, u.created, p.points, COUNT(n.nid) AS post_count + FROM users u + LEFT JOIN node n ON u.uid = n.uid + LEFT JOIN userpoints p ON u.uid = p.uid + WHERE u.uid > 0 + GROUP BY u.uid + """ + cursor.execute(query) + users = cursor.fetchall() + cursor.close() + connection.close() + return users + +def is_fake_email(email): + try: + domain = email.split('@')[1] + cache_key = f"mx:{domain}" + cached = redis_client.get(cache_key) + if cached is not None: + return cached == "true" + answers = dns.resolver.resolve(domain, 'MX', lifetime=5.0) + result = "false" if answers else "true" + except Exception: + result = "true" + redis_client.set(cache_key, result, ex=259200) + return result == "true" + +def is_temp_email(email): + try: + domain = email.split('@')[1].lower() + return domain in TEMP_DOMAINS + except Exception: + return False + + +def export_to_csv(users): + now = datetime.datetime.now().strftime("%Y-%m-%d_%H%M") + filename = f"user_cleanup_results_{now}.csv" + with open(filename, mode='w', newline='', encoding='utf-8') as f: + writer = csv.writer(f) + writer.writerow(["UID", "Login", "E-mail", "Ostatnie logowanie", "Rejestracja", "Punkty", "Nieaktywny", "E-mail OK", "Tymczasowy"]) + for u in users: + writer.writerow([ + u['uid'], u['name'], u['mail'], + datetime.datetime.fromtimestamp(u['access']).strftime('%Y-%m-%d') if u['access'] else 'nigdy', + datetime.datetime.fromtimestamp(u['created']).strftime('%Y-%m-%d') if u.get('created') else 'brak', + u.get('points', 0), + 'TAK' if u['inactive'] else 'NIE', + 'TAK' if u['email_valid'] else 'NIE', + 'TAK' if u['temp_email'] else 'NIE' + ]) + print(f"📁 CSV zapisany: {filename}") + +def export_to_excel(users): + now = datetime.datetime.now().strftime("%Y-%m-%d_%H%M") + filename = f"user_cleanup_results_{now}.xlsx" + workbook = xlsxwriter.Workbook(filename) + sheet = workbook.add_worksheet("Wyniki") + headers = ["UID", "Login", "E-mail", "Ostatnie logowanie", "Rejestracja", "Punkty", "Nieaktywny", "E-mail OK", "Tymczasowy"] + for col, header in enumerate(headers): + sheet.write(0, col, header) + for row_idx, u in enumerate(users, start=1): + sheet.write(row_idx, 0, u['uid']) + sheet.write(row_idx, 1, u['name']) + sheet.write(row_idx, 2, u['mail']) + sheet.write(row_idx, 3, datetime.datetime.fromtimestamp(u['access']).strftime('%Y-%m-%d') if u['access'] else 'nigdy') + sheet.write(row_idx, 4, datetime.datetime.fromtimestamp(u['created']).strftime('%Y-%m-%d') if u.get('created') else 'brak') + sheet.write(row_idx, 5, u.get('points', 0)) + sheet.write(row_idx, 6, 'TAK' if u['inactive'] else 'NIE') + sheet.write(row_idx, 7, 'TAK' if u['email_valid'] else 'NIE') + sheet.write(row_idx, 8, 'TAK' if u['temp_email'] else 'NIE') + workbook.close() + print(f"📁 Excel zapisany: {filename}") + +def flush_redis_cache(): + keys = redis_client.keys("mx:*") + for key in keys: + redis_client.delete(key) + print(f"🧹 Redis MX cache wyczyszczony: {len(keys)} wpisów") + +def domain_report(users): + domains = defaultdict(int) + for u in users: + domain = u['mail'].split('@')[1].lower() + domains[domain] += 1 + print("\n📊 Raport domen:") + for domain, count in sorted(domains.items(), key=lambda x: x[1], reverse=True): + print(f"- {domain}: {count} użytkowników") + +def delete_user_via_php(uid, drupal_path): + try: + result = subprocess.run( + ['php', 'delete_user.php', str(uid), drupal_path], + capture_output=True, text=True, check=True + ) + print(result.stdout.strip()) + logging.info(f"PHP delete UID {uid}: {result.stdout.strip()}") + except subprocess.CalledProcessError as e: + logging.error(f"Błąd PHP delete UID {uid}: {e.stderr}") + +def main(): + signal.signal(signal.SIGINT, lambda s, f: sys.exit("\n🛑 Przerwano przez użytkownika.")) + load_dotenv() + + parser = argparse.ArgumentParser( + description="Drupal 6 user cleanup tool", + epilog=""" + Przykłady użycia: + + # Podgląd nieaktywnych użytkowników bez punktów + /root/user_manager/venv/bin/python3 app.py --days-inactive 730 --dry-run + + # Usuń użytkowników z błędnymi e-mailami i nieaktywnych 2+ lata + /root/user_manager/venv/bin/python3 app.py --days-inactive 730 --delete + + # Uwzględnij starych użytkowników, którzy logowali się ostatnio + /root/user_manager/venv/bin/python3 app.py --days-inactive 730 --veteran-year 2012 --recent-login-days 1095 + + # Tylko walidacja adresów e-mail + /root/user_manager/venv/bin/python3 app.py --validate + + # Czyszczenie cache DNS w Redisie + /root/user_manager/venv/bin/python3 app.py --flush-cache + """, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument('--host') + parser.add_argument('--user') + parser.add_argument('--password') + parser.add_argument('--database') + parser.add_argument('--days-inactive', type=int) + parser.add_argument('--dry-run', action='store_true') + parser.add_argument('--delete', action='store_true') + parser.add_argument('--validate', action='store_true') + parser.add_argument('--flush-cache', action='store_true') + parser.add_argument('--export-excel', action='store_true') + parser.add_argument('--report-domains', action='store_true') + + parser.add_argument('--veteran-year', type=int, default=2012, + help='Minimalny rok rejestracji konta do uznania za stare (domyślnie: 2012)') + parser.add_argument('--recent-login-days', type=int, default=1095, + help='Ile dni wstecz ostatnie logowanie czyni konto aktywnym (domyślnie: 1095)') + + parser.add_argument('--show-table', action='store_true', + help='Wyświetl tabelę z listą użytkowników do usunięcia') + + + args = parser.parse_args() + + if args.flush_cache: + flush_redis_cache() + return + + db_config = { + 'host': args.host or os.getenv('DB_HOST'), + 'user': args.user or os.getenv('DB_USER'), + 'password': args.password or os.getenv('DB_PASSWORD'), + 'database': args.database or os.getenv('DB_NAME') + } + + users = get_users(db_config) + now_ts = int(datetime.datetime.now().timestamp()) + final_candidates = [] + inactive_count = 0 + invalid_email_count = 0 + temp_email_count = 0 + skipped_with_points = 0 + skipped_veterans = 0 + + for user in tqdm(users, desc="Analiza"): + if (user.get('points') or 0) > 0: + skipped_with_points += 1 + continue + + # Pomijanie aktywnych "weteranów" + created_year = datetime.datetime.fromtimestamp(user['created']).year if user.get('created') else None + recent_login_threshold = now_ts - (args.recent_login_days * 86400) + + if created_year and created_year <= args.veteran_year: + if user['access'] and user['access'] >= recent_login_threshold: + skipped_veterans += 1 + continue + + last_access = user['access'] or 0 + user['inactive'] = (args.days_inactive is not None) and ((now_ts - last_access) > args.days_inactive * 86400) + user['temp_email'] = is_temp_email(user['mail']) + user['email_valid'] = not is_fake_email(user['mail']) and not user['temp_email'] + + if user['inactive']: + inactive_count += 1 + if not user['email_valid']: + invalid_email_count += 1 + if user['temp_email']: + temp_email_count += 1 + + if args.validate or user['inactive'] or not user['email_valid']: + final_candidates.append(user) + + # Redundant safety filter to exclude any with points + final_candidates = [u for u in final_candidates if (u.get('points') or 0) == 0] + + if args.report_domains: + domain_report(final_candidates) + + if args.show_table: + print(tabulate([ + [u['uid'], u['name'], u['mail'], + datetime.datetime.fromtimestamp(u['access']).strftime('%Y-%m-%d') if u['access'] else 'nigdy', + datetime.datetime.fromtimestamp(u['created']).strftime('%Y-%m-%d') if u.get('created') else 'brak', + u.get('points', 0), + 'TAK' if u['inactive'] else 'NIE', + 'TAK' if u['email_valid'] else 'NIE', + 'TAK' if u['temp_email'] else 'NIE'] + for u in final_candidates + ], headers=["UID", "Login", "E-mail", "Ostatnie log.", "Rejestracja", "Punkty", "Nieaktywny", "E-mail OK", "Tymczasowy"], tablefmt="fancy_grid")) + + export_to_csv(final_candidates) + if args.export_excel: + export_to_excel(final_candidates) + + print("\n📊 Podsumowanie:") + print(f"- Całkowita liczba użytkowników w bazie: {len(users)}") + print(f"- Pominięci z punktami: {skipped_with_points}") + print(f"- Nieaktywni (> {args.days_inactive} dni): {inactive_count}") + print(f"- Z niepoprawnym e-mailem (MX lub tymczasowy): {invalid_email_count}") + print(f"- Z tymczasowym e-mailem: {temp_email_count}") + print(f"- Kandydaci do usunięcia: {len(final_candidates)}") + print(f"- Pominięci z punktami: {skipped_with_points}") + print(f"- Pominięci jako aktywni weterani: {skipped_veterans}") + + if args.delete: + if not args.drupal_path: + print("❌ Brak parametru --drupal-path. Nie można usunąć użytkowników bez ścieżki do Drupala.") + sys.exit(1) + for user in final_candidates: + delete_user_via_php(user['uid'], args.drupal_path) + print(f"✅ Usunięto {len(final_candidates)} użytkowników przez delete_user.php") + + +if __name__ == '__main__': + main() diff --git a/delete_user.php b/delete_user.php new file mode 100644 index 0000000..b79c692 --- /dev/null +++ b/delete_user.php @@ -0,0 +1,32 @@ +#!/usr/bin/env php + \n"; + exit(1); +} + +$uid = (int) $argv[1]; +$drupal_path = rtrim($argv[2], '/'); + +if (!file_exists($drupal_path . '/includes/bootstrap.inc')) { + echo "❌ Nie znaleziono bootstrap.inc w: $drupal_path\n"; + exit(1); +} + +define('DRUPAL_ROOT', $drupal_path); +require_once DRUPAL_ROOT . '/includes/bootstrap.inc'; +drupal_bootstrap(DRUPAL_BOOTSTRAP_FULL); + +if ($uid <= 0) { + echo "❌ UID musi być większy niż 0.\n"; + exit(1); +} + +$account = user_load($uid); +if ($account) { + user_delete($account->uid); + echo "✅ Użytkownik $uid został usunięty.\n"; +} else { + echo "⚠️ Użytkownik $uid nie istnieje.\n"; +} +?> \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b26cd57 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +mysql-connector-python +dnspython +tabulate +python-dotenv +tqdm +redis +xlsxwriter