first commit
This commit is contained in:
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
.env
|
||||||
|
config.py
|
||||||
|
venv
|
10
config.example.py
Normal file
10
config.example.py
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
# config.py
|
||||||
|
|
||||||
|
DB_CONFIG = {
|
||||||
|
'host': 'localhost',
|
||||||
|
'database': 'drupal6_db',
|
||||||
|
'user': 'drupal_user',
|
||||||
|
'password': 'secure_password',
|
||||||
|
}
|
||||||
|
|
||||||
|
FILES_BASE_PATH = '/ścieżka/do/drupala/sites/default/files/'
|
362
main.py
Normal file
362
main.py
Normal file
@ -0,0 +1,362 @@
|
|||||||
|
import os
|
||||||
|
import argparse
|
||||||
|
import pymysql
|
||||||
|
import hashlib
|
||||||
|
from PIL import Image
|
||||||
|
from config import DB_CONFIG, FILES_BASE_PATH
|
||||||
|
|
||||||
|
MAX_WIDTH = 1920
|
||||||
|
MAX_HEIGHT = 1080
|
||||||
|
MAX_FILESIZE_BYTES = 1 * 1024 * 1024 # 1MB
|
||||||
|
|
||||||
|
def get_connection():
|
||||||
|
return pymysql.connect(
|
||||||
|
host=DB_CONFIG['host'],
|
||||||
|
user=DB_CONFIG['user'],
|
||||||
|
password=DB_CONFIG['password'],
|
||||||
|
database=DB_CONFIG['database'],
|
||||||
|
charset='utf8mb4',
|
||||||
|
cursorclass=pymysql.cursors.DictCursor
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_image_resolution(path):
|
||||||
|
try:
|
||||||
|
with Image.open(path) as img:
|
||||||
|
return img.size # (width, height)
|
||||||
|
except Exception:
|
||||||
|
return (0, 0)
|
||||||
|
|
||||||
|
def hash_file(path):
|
||||||
|
hasher = hashlib.md5()
|
||||||
|
try:
|
||||||
|
with open(path, 'rb') as f:
|
||||||
|
for chunk in iter(lambda: f.read(8192), b""):
|
||||||
|
hasher.update(chunk)
|
||||||
|
return hasher.hexdigest()
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def resize_image(path):
|
||||||
|
try:
|
||||||
|
with Image.open(path) as img:
|
||||||
|
width, height = img.size
|
||||||
|
if width <= MAX_WIDTH and height <= MAX_HEIGHT and os.path.getsize(path) <= MAX_FILESIZE_BYTES:
|
||||||
|
return False # Nie trzeba zmieniać
|
||||||
|
|
||||||
|
# Oblicz skalę proporcjonalnie, by zmieścić w max rozmiar i rozdzielczość
|
||||||
|
width_ratio = MAX_WIDTH / width
|
||||||
|
height_ratio = MAX_HEIGHT / height
|
||||||
|
scale_ratio = min(width_ratio, height_ratio, 1)
|
||||||
|
|
||||||
|
new_width = int(width * scale_ratio)
|
||||||
|
new_height = int(height * scale_ratio)
|
||||||
|
|
||||||
|
img = img.resize((new_width, new_height), Image.LANCZOS)
|
||||||
|
# Zapisz nadpisując, jakość 85%
|
||||||
|
img.save(path, optimize=True, quality=85)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[ERROR] Nie udało się zmniejszyć {path}: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def fetch_used_filepaths(cursor):
|
||||||
|
"""
|
||||||
|
Pobiera unikalne ścieżki plików (filepath) ze wszystkich tabel, które mają kolumnę 'filepath'.
|
||||||
|
Zwraca set ścieżek względnych (z '/' zamiast '\\').
|
||||||
|
"""
|
||||||
|
# Pobierz listę tabel w bazie
|
||||||
|
cursor.execute("SHOW TABLES")
|
||||||
|
tables = [row[f'Tables_in_{DB_CONFIG["database"]}'] for row in cursor.fetchall()]
|
||||||
|
|
||||||
|
filepaths = set()
|
||||||
|
|
||||||
|
for table in tables:
|
||||||
|
# Sprawdź kolumny danej tabeli
|
||||||
|
cursor.execute(f"SHOW COLUMNS FROM `{table}` LIKE 'filepath'")
|
||||||
|
if cursor.rowcount == 1:
|
||||||
|
# Tabela ma kolumnę 'filepath' — pobierz jej wartości
|
||||||
|
try:
|
||||||
|
cursor.execute(f"SELECT filepath FROM `{table}`")
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
for row in rows:
|
||||||
|
path = row['filepath'].replace('\\', '/')
|
||||||
|
filepaths.add(path)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARN] Nie udało się pobrać danych z tabeli '{table}': {e}")
|
||||||
|
# Możesz tu też zdecydować, czy chcesz przerwać, czy tylko ostrzec
|
||||||
|
|
||||||
|
return filepaths
|
||||||
|
|
||||||
|
|
||||||
|
def file_exists_on_disk(base_path, rel_path):
|
||||||
|
# Normalizuj ścieżkę z bazy
|
||||||
|
rel_path_norm = rel_path.lstrip("/\\") # usuń początkowe ukośniki
|
||||||
|
|
||||||
|
full_path = os.path.normpath(os.path.join(base_path, rel_path_norm))
|
||||||
|
|
||||||
|
# Debug print, żeby zweryfikować jak łączy się ścieżka
|
||||||
|
# print(f"Sprawdzam plik: {full_path}")
|
||||||
|
|
||||||
|
return os.path.isfile(full_path), full_path
|
||||||
|
|
||||||
|
def compare_files(
|
||||||
|
debug=False,
|
||||||
|
to_optimize=False,
|
||||||
|
update_db=False,
|
||||||
|
dry_run=False,
|
||||||
|
extensions='jpg,jpeg,png,gif',
|
||||||
|
exclude_folders=None,
|
||||||
|
find_duplicates=False,
|
||||||
|
show_deleted=False,
|
||||||
|
top=20,
|
||||||
|
):
|
||||||
|
allowed_exts = tuple(f".{ext.strip().lower()}" for ext in extensions.split(','))
|
||||||
|
exclude_folders = exclude_folders or []
|
||||||
|
exclude_folders = [folder.rstrip('/\\') for folder in exclude_folders]
|
||||||
|
|
||||||
|
connection = get_connection()
|
||||||
|
cursor = connection.cursor()
|
||||||
|
|
||||||
|
# Pobierz pliki z bazy
|
||||||
|
cursor.execute("SELECT fid, filepath, filesize FROM files")
|
||||||
|
files = cursor.fetchall()
|
||||||
|
|
||||||
|
# Zbiór ścieżek plików używanych w systemie (dla duplikatów i wykrywania osieroconych)
|
||||||
|
used_filepaths = fetch_used_filepaths(cursor)
|
||||||
|
|
||||||
|
stats = {
|
||||||
|
'total': 0,
|
||||||
|
'missing': 0,
|
||||||
|
'mismatched': 0,
|
||||||
|
'saved_bytes_potential': 0,
|
||||||
|
'largest': [],
|
||||||
|
'highest_res': [],
|
||||||
|
'to_optimize': [],
|
||||||
|
'duplicates': [],
|
||||||
|
'deleted': [],
|
||||||
|
}
|
||||||
|
|
||||||
|
images_info = []
|
||||||
|
hash_map = {}
|
||||||
|
|
||||||
|
# Przetwarzanie plików z bazy
|
||||||
|
for file in files:
|
||||||
|
rel_path = file['filepath'].replace('\\', '/')
|
||||||
|
# Sprawdź wykluczenia folderów
|
||||||
|
if any(rel_path.startswith(excl + '/') or rel_path == excl for excl in exclude_folders):
|
||||||
|
if debug:
|
||||||
|
print(f"[POMINIĘTO] {rel_path} (folder wykluczony)")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not rel_path.lower().endswith(allowed_exts):
|
||||||
|
continue
|
||||||
|
|
||||||
|
fid = file['fid']
|
||||||
|
db_size = file['filesize']
|
||||||
|
full_path = os.path.join(FILES_BASE_PATH, rel_path)
|
||||||
|
|
||||||
|
stats['total'] += 1
|
||||||
|
|
||||||
|
if not os.path.isfile(full_path):
|
||||||
|
stats['missing'] += 1
|
||||||
|
if debug:
|
||||||
|
print(f"[BRAK] {rel_path}")
|
||||||
|
print(f" → {full_path}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
actual_size = os.path.getsize(full_path)
|
||||||
|
width, height = get_image_resolution(full_path)
|
||||||
|
file_hash = hash_file(full_path)
|
||||||
|
|
||||||
|
images_info.append({
|
||||||
|
'fid': fid,
|
||||||
|
'path': full_path,
|
||||||
|
'rel_path': rel_path,
|
||||||
|
'db_size': db_size,
|
||||||
|
'actual_size': actual_size,
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
'hash': file_hash,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Rozmiar różniący się
|
||||||
|
if actual_size != db_size:
|
||||||
|
stats['mismatched'] += 1
|
||||||
|
diff = db_size - actual_size
|
||||||
|
if diff > 0:
|
||||||
|
stats['saved_bytes_potential'] += diff
|
||||||
|
|
||||||
|
if debug:
|
||||||
|
print(f"[ROZMIAR] {rel_path}")
|
||||||
|
print(f" → DB: {db_size} B, Dysk: {actual_size} B ({(db_size - actual_size)/1024:.1f} KB różnicy)")
|
||||||
|
|
||||||
|
if update_db and not dry_run:
|
||||||
|
update_sql = "UPDATE files SET filesize = %s WHERE fid = %s"
|
||||||
|
cursor.execute(update_sql, (actual_size, fid))
|
||||||
|
connection.commit()
|
||||||
|
elif update_db and dry_run:
|
||||||
|
delta_kb = (actual_size - db_size) / 1024
|
||||||
|
print(f"[DRY-RUN] {rel_path}")
|
||||||
|
print(f" DB: {db_size} B | FS: {actual_size} B | Δ: {delta_kb:+.1f} KB")
|
||||||
|
|
||||||
|
# Optymalizacja (np. zmniejszanie obrazów)
|
||||||
|
if to_optimize:
|
||||||
|
if (width > MAX_WIDTH or height > MAX_HEIGHT) or actual_size > MAX_FILESIZE_BYTES:
|
||||||
|
stats['to_optimize'].append({
|
||||||
|
'path': full_path,
|
||||||
|
'size': actual_size,
|
||||||
|
'res': f"{width}x{height}"
|
||||||
|
})
|
||||||
|
if update_db and not dry_run:
|
||||||
|
resized = resize_image(full_path)
|
||||||
|
if resized:
|
||||||
|
new_size = os.path.getsize(full_path)
|
||||||
|
# Aktualizuj bazę po zmianie
|
||||||
|
update_sql = "UPDATE files SET filesize = %s WHERE fid = %s"
|
||||||
|
cursor.execute(update_sql, (new_size, fid))
|
||||||
|
connection.commit()
|
||||||
|
if debug:
|
||||||
|
print(f"[ZMIENIONO] {rel_path}: zmniejszono do {new_size} B")
|
||||||
|
|
||||||
|
elif update_db and dry_run:
|
||||||
|
print(f"[DRY-RUN] Zmniejszyłbym {rel_path}")
|
||||||
|
|
||||||
|
# Mapowanie hashy do detekcji duplikatów
|
||||||
|
if find_duplicates and file_hash:
|
||||||
|
if file_hash not in hash_map:
|
||||||
|
hash_map[file_hash] = []
|
||||||
|
hash_map[file_hash].append({
|
||||||
|
'fid': fid,
|
||||||
|
'rel_path': rel_path,
|
||||||
|
'path': full_path,
|
||||||
|
'db_size': db_size,
|
||||||
|
'actual_size': actual_size,
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Największe pliki
|
||||||
|
stats['largest'] = sorted(images_info, key=lambda x: x['actual_size'], reverse=True)[:top]
|
||||||
|
|
||||||
|
# Najwyższa rozdzielczość
|
||||||
|
stats['highest_res'] = sorted(images_info, key=lambda x: x['width'] * x['height'], reverse=True)[:top]
|
||||||
|
|
||||||
|
# Szukanie duplikatów
|
||||||
|
if find_duplicates:
|
||||||
|
duplicates = []
|
||||||
|
for file_hash, files_list in hash_map.items():
|
||||||
|
if len(files_list) > 1:
|
||||||
|
# Sprawdź które pliki są używane w systemie
|
||||||
|
used = []
|
||||||
|
orphaned = []
|
||||||
|
for f in files_list:
|
||||||
|
if f['rel_path'] in used_filepaths:
|
||||||
|
used.append(f)
|
||||||
|
else:
|
||||||
|
orphaned.append(f)
|
||||||
|
|
||||||
|
duplicates.append({
|
||||||
|
'hash': file_hash,
|
||||||
|
'used': used,
|
||||||
|
'orphaned': orphaned,
|
||||||
|
})
|
||||||
|
stats['duplicates'] = duplicates
|
||||||
|
|
||||||
|
# Pokazywanie plików na dysku, których brak w bazie (usunięte w bazie)
|
||||||
|
if show_deleted:
|
||||||
|
disk_files = []
|
||||||
|
for root, _, files in os.walk(FILES_BASE_PATH):
|
||||||
|
# Sprawdź czy folder jest wykluczony
|
||||||
|
rel_root = os.path.relpath(root, FILES_BASE_PATH).replace('\\','/')
|
||||||
|
if any(rel_root.startswith(excl) for excl in exclude_folders):
|
||||||
|
continue
|
||||||
|
|
||||||
|
for f in files:
|
||||||
|
if not f.lower().endswith(allowed_exts):
|
||||||
|
continue
|
||||||
|
full_path = os.path.join(root, f)
|
||||||
|
rel_path = os.path.relpath(full_path, FILES_BASE_PATH).replace('\\', '/')
|
||||||
|
if rel_path not in used_filepaths:
|
||||||
|
disk_files.append(rel_path)
|
||||||
|
stats['deleted'] = disk_files
|
||||||
|
|
||||||
|
connection.close()
|
||||||
|
return stats
|
||||||
|
|
||||||
|
def print_summary(stats, to_optimize=False, find_duplicates=False, show_deleted=False, top=20):
|
||||||
|
print(f"\n📊 Podsumowanie analizy:")
|
||||||
|
print(f" 🔢 Liczba plików: {stats['total']}")
|
||||||
|
print(f" ❌ Brakujące pliki: {stats['missing']}")
|
||||||
|
print(f" 🛠 Rozmiary różniące się: {stats['mismatched']}")
|
||||||
|
print(f" 💾 Potencjalne oszczędności po zmniejszeniu: {stats['saved_bytes_potential'] / (1024**2):.2f} MB")
|
||||||
|
|
||||||
|
print(f"\n🖼 Największe obrazy (top {top}):")
|
||||||
|
for img in stats['largest']:
|
||||||
|
size_kb = img['actual_size'] / 1024
|
||||||
|
size_mb = size_kb / 1024
|
||||||
|
print(f" - {img['path']} ({size_kb:.1f} KB / {size_mb:.2f} MB)")
|
||||||
|
|
||||||
|
print(f"\n📐 Najwyższe rozdzielczości (top {top}):")
|
||||||
|
for img in stats['highest_res']:
|
||||||
|
print(f" - {img['path']} ({img['width']}x{img['height']})")
|
||||||
|
|
||||||
|
if to_optimize and stats['to_optimize']:
|
||||||
|
print(f"\n⚡ Pliki do optymalizacji ({len(stats['to_optimize'])}):")
|
||||||
|
for opt in stats['to_optimize']:
|
||||||
|
size_kb = opt['size'] / 1024
|
||||||
|
print(f" - {opt['path']} ({opt['res']}, {size_kb:.1f} KB)")
|
||||||
|
|
||||||
|
if find_duplicates and stats['duplicates']:
|
||||||
|
print(f"\n🔍 Duplikaty ({len(stats['duplicates'])} grup):")
|
||||||
|
for group in stats['duplicates']:
|
||||||
|
print(f" Hash: {group['hash']}")
|
||||||
|
if group['used']:
|
||||||
|
print(" ▶ Używane w systemie:")
|
||||||
|
for f in group['used']:
|
||||||
|
print(f" - {f['rel_path']}")
|
||||||
|
if group['orphaned']:
|
||||||
|
print(" ⚠️ Osierocone (nieużywane):")
|
||||||
|
for f in group['orphaned']:
|
||||||
|
print(f" - {f['rel_path']}")
|
||||||
|
|
||||||
|
if show_deleted and stats['deleted']:
|
||||||
|
print(f"\n❌ Pliki na dysku nieznalezione w bazie (usunięte?):")
|
||||||
|
for f in stats['deleted']:
|
||||||
|
print(f" - {f}")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Analiza plików w Drupal 6")
|
||||||
|
parser.add_argument('--debug', action='store_true', help='Tryb debugowania')
|
||||||
|
parser.add_argument('--update-db', action='store_true', help='Aktualizuj rozmiary plików w bazie')
|
||||||
|
parser.add_argument('--dry-run', action='store_true', help='Symuluj zmiany bez zapisywania')
|
||||||
|
parser.add_argument('--extensions', default='jpg,jpeg,png,gif', help='Rozszerzenia plików do analizy')
|
||||||
|
parser.add_argument('--exclude-folders', nargs='*', default=[], help='Foldery do wykluczenia (względem files/)')
|
||||||
|
parser.add_argument('--find-duplicates', action='store_true', help='Znajdź i pokaż duplikaty')
|
||||||
|
parser.add_argument('--show-deleted', action='store_true', help='Pokaż pliki na dysku usunięte z bazy')
|
||||||
|
parser.add_argument('--optimize', action='store_true', help='Zmniejsz obrazy powyżej limitów')
|
||||||
|
parser.add_argument('--top', type=int, default=20, help='Ilość największych i najwyższych do pokazania')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
stats = compare_files(
|
||||||
|
debug=args.debug,
|
||||||
|
to_optimize=args.optimize,
|
||||||
|
update_db=args.update_db,
|
||||||
|
dry_run=args.dry_run,
|
||||||
|
extensions=args.extensions,
|
||||||
|
exclude_folders=args.exclude_folders,
|
||||||
|
find_duplicates=args.find_duplicates,
|
||||||
|
show_deleted=args.show_deleted,
|
||||||
|
top=args.top,
|
||||||
|
)
|
||||||
|
|
||||||
|
print_summary(
|
||||||
|
stats,
|
||||||
|
to_optimize=args.optimize,
|
||||||
|
find_duplicates=args.find_duplicates,
|
||||||
|
show_deleted=args.show_deleted,
|
||||||
|
top=args.top
|
||||||
|
)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
2
requirements.txt
Normal file
2
requirements.txt
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
pymysql
|
||||||
|
Pillow
|
Reference in New Issue
Block a user