From 8337be646944f28be69c04ff1d811dca13ac835d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Gruszczy=C5=84ski?= Date: Mon, 4 Aug 2025 22:13:29 +0200 Subject: [PATCH] obsluga pdf --- app.py | 49 +++++++++++++++++++++++++++++++------ requirements.txt | 3 ++- static/js/receipt_upload.js | 4 ++- templates/list_share.html | 6 +++++ 4 files changed, 52 insertions(+), 10 deletions(-) diff --git a/app.py b/app.py index 6b6785b..137e253 100644 --- a/app.py +++ b/app.py @@ -51,6 +51,8 @@ from functools import wraps from flask_talisman import Talisman from flask_session import Session from types import SimpleNamespace +from pdf2image import convert_from_bytes + # OCR import pytesseract @@ -95,7 +97,7 @@ talisman = Talisman( register_heif_opener() # pillow_heif dla HEIC SQLALCHEMY_ECHO = True -ALLOWED_EXTENSIONS = {"png", "jpg", "jpeg", "gif", "webp", "heic"} +ALLOWED_EXTENSIONS = {"png", "jpg", "jpeg", "gif", "webp", "heic", "pdf"} SYSTEM_PASSWORD = app.config.get("SYSTEM_PASSWORD") DEFAULT_ADMIN_USERNAME = app.config.get("DEFAULT_ADMIN_USERNAME") @@ -161,6 +163,7 @@ class User(UserMixin, db.Model): password_hash = db.Column(db.String(512), nullable=False) is_admin = db.Column(db.Boolean, default=False) + # Tabela pośrednia shopping_list_category = db.Table( "shopping_list_category", @@ -285,7 +288,7 @@ def set_authorized_cookie(response): AUTHORIZED_COOKIE_VALUE, max_age=max_age, secure=secure_flag, - httponly=True + httponly=True, ) return response @@ -370,6 +373,7 @@ def serve_css_lib(filename): response.headers.pop("Content-Disposition", None) return response + app.register_blueprint(static_bp) @@ -864,7 +868,7 @@ def get_total_expenses_grouped_by_category( if str(category_id) == "none": if not l.categories: data_map[key]["Bez kategorii"] += total_expense - continue + continue if not l.categories: data_map[key]["Bez kategorii"] += total_expense @@ -898,15 +902,36 @@ def get_total_expenses_grouped_by_category( return {"labels": labels, "datasets": datasets} +def save_pdf_as_webp(file, path): + try: + images = convert_from_bytes(file.read(), dpi=300) + if not images: + raise ValueError("Nie udało się przekonwertować PDF na obraz.") + + total_height = sum(img.height for img in images) + max_width = max(img.width for img in images) + combined = Image.new("RGB", (max_width, total_height), (255, 255, 255)) + + y_offset = 0 + for img in images: + combined.paste(img, (0, y_offset)) + y_offset += img.height + + combined.thumbnail((2000, 20000)) + new_path = path.rsplit(".", 1)[0] + ".webp" + combined.save(new_path, **WEBP_SAVE_PARAMS) + + except Exception as e: + raise ValueError(f"Błąd podczas przetwarzania PDF: {e}") + + ############# OCR ########################### def preprocess_image_for_tesseract(image): image = ImageOps.autocontrast(image) image = image.point(lambda x: 0 if x < 150 else 255) - image = image.resize( - (image.width * 2, image.height * 2), Image.BICUBIC - ) + image = image.resize((image.width * 2, image.height * 2), Image.BICUBIC) return image @@ -1904,7 +1929,11 @@ def upload_receipt(list_id): file_path = os.path.join(app.config["UPLOAD_FOLDER"], webp_filename) try: - save_resized_image(file, file_path) + if file.filename.lower().endswith(".pdf"): + file.seek(0) + save_pdf_as_webp(file, file_path) + else: + save_resized_image(file, file_path) except ValueError as e: return receipt_error(str(e)) @@ -2815,7 +2844,11 @@ def healthcheck(): @app.route("/robots.txt") def robots_txt(): - content = "User-agent: *\nDisallow: /" if app.config.get("DISABLE_ROBOTS") else "User-agent: *\nAllow: /" + content = ( + "User-agent: *\nDisallow: /" + if app.config.get("DISABLE_ROBOTS") + else "User-agent: *\nAllow: /" + ) return content, 200, {"Content-Type": "text/plain"} diff --git a/requirements.txt b/requirements.txt index 600ed1b..3b583ec 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,4 +16,5 @@ pymysql # mysql cryptography # mysql8 flask-talisman # nagłówki bcrypt -Flask-Session \ No newline at end of file +Flask-Session +pdf2image \ No newline at end of file diff --git a/static/js/receipt_upload.js b/static/js/receipt_upload.js index af89725..29f8488 100644 --- a/static/js/receipt_upload.js +++ b/static/js/receipt_upload.js @@ -5,6 +5,7 @@ if (!window.receiptUploaderInitialized) { const form = document.getElementById("receiptForm"); const inputCamera = document.getElementById("cameraInput"); const inputGallery = document.getElementById("galleryInput"); + const inputPDF = document.getElementById("pdfInput"); const galleryBtn = document.getElementById("galleryBtn"); const galleryBtnText = document.getElementById("galleryBtnText"); const cameraBtn = document.getElementById("cameraBtn"); @@ -12,7 +13,7 @@ if (!window.receiptUploaderInitialized) { const progressBar = document.getElementById("progressBar"); const gallery = document.getElementById("receiptGallery"); - if (!form || !inputCamera || !inputGallery || !gallery) return; + if (!form || !gallery) return; const isDesktop = window.matchMedia("(pointer: fine)").matches; @@ -105,6 +106,7 @@ if (!window.receiptUploaderInitialized) { inputCamera?.addEventListener("change", () => handleFileUpload(inputCamera)); inputGallery?.addEventListener("change", () => handleFileUpload(inputGallery)); + inputPDF?.addEventListener("change", () => handleFileUpload(inputPDF)); }); window.receiptUploaderInitialized = true; diff --git a/templates/list_share.html b/templates/list_share.html index 668ebd7..f6f3541 100644 --- a/templates/list_share.html +++ b/templates/list_share.html @@ -188,6 +188,12 @@ + + +