obsluga pdf

This commit is contained in:
Mateusz Gruszczyński
2025-08-04 22:13:29 +02:00
parent 1cd4f62004
commit 8337be6469
4 changed files with 52 additions and 10 deletions

49
app.py
View File

@@ -51,6 +51,8 @@ from functools import wraps
from flask_talisman import Talisman
from flask_session import Session
from types import SimpleNamespace
from pdf2image import convert_from_bytes
# OCR
import pytesseract
@@ -95,7 +97,7 @@ talisman = Talisman(
register_heif_opener() # pillow_heif dla HEIC
SQLALCHEMY_ECHO = True
ALLOWED_EXTENSIONS = {"png", "jpg", "jpeg", "gif", "webp", "heic"}
ALLOWED_EXTENSIONS = {"png", "jpg", "jpeg", "gif", "webp", "heic", "pdf"}
SYSTEM_PASSWORD = app.config.get("SYSTEM_PASSWORD")
DEFAULT_ADMIN_USERNAME = app.config.get("DEFAULT_ADMIN_USERNAME")
@@ -161,6 +163,7 @@ class User(UserMixin, db.Model):
password_hash = db.Column(db.String(512), nullable=False)
is_admin = db.Column(db.Boolean, default=False)
# Tabela pośrednia
shopping_list_category = db.Table(
"shopping_list_category",
@@ -285,7 +288,7 @@ def set_authorized_cookie(response):
AUTHORIZED_COOKIE_VALUE,
max_age=max_age,
secure=secure_flag,
httponly=True
httponly=True,
)
return response
@@ -370,6 +373,7 @@ def serve_css_lib(filename):
response.headers.pop("Content-Disposition", None)
return response
app.register_blueprint(static_bp)
@@ -864,7 +868,7 @@ def get_total_expenses_grouped_by_category(
if str(category_id) == "none":
if not l.categories:
data_map[key]["Bez kategorii"] += total_expense
continue
continue
if not l.categories:
data_map[key]["Bez kategorii"] += total_expense
@@ -898,15 +902,36 @@ def get_total_expenses_grouped_by_category(
return {"labels": labels, "datasets": datasets}
def save_pdf_as_webp(file, path):
try:
images = convert_from_bytes(file.read(), dpi=300)
if not images:
raise ValueError("Nie udało się przekonwertować PDF na obraz.")
total_height = sum(img.height for img in images)
max_width = max(img.width for img in images)
combined = Image.new("RGB", (max_width, total_height), (255, 255, 255))
y_offset = 0
for img in images:
combined.paste(img, (0, y_offset))
y_offset += img.height
combined.thumbnail((2000, 20000))
new_path = path.rsplit(".", 1)[0] + ".webp"
combined.save(new_path, **WEBP_SAVE_PARAMS)
except Exception as e:
raise ValueError(f"Błąd podczas przetwarzania PDF: {e}")
############# OCR ###########################
def preprocess_image_for_tesseract(image):
image = ImageOps.autocontrast(image)
image = image.point(lambda x: 0 if x < 150 else 255)
image = image.resize(
(image.width * 2, image.height * 2), Image.BICUBIC
)
image = image.resize((image.width * 2, image.height * 2), Image.BICUBIC)
return image
@@ -1904,7 +1929,11 @@ def upload_receipt(list_id):
file_path = os.path.join(app.config["UPLOAD_FOLDER"], webp_filename)
try:
save_resized_image(file, file_path)
if file.filename.lower().endswith(".pdf"):
file.seek(0)
save_pdf_as_webp(file, file_path)
else:
save_resized_image(file, file_path)
except ValueError as e:
return receipt_error(str(e))
@@ -2815,7 +2844,11 @@ def healthcheck():
@app.route("/robots.txt")
def robots_txt():
content = "User-agent: *\nDisallow: /" if app.config.get("DISABLE_ROBOTS") else "User-agent: *\nAllow: /"
content = (
"User-agent: *\nDisallow: /"
if app.config.get("DISABLE_ROBOTS")
else "User-agent: *\nAllow: /"
)
return content, 200, {"Content-Type": "text/plain"}

View File

@@ -16,4 +16,5 @@ pymysql # mysql
cryptography # mysql8
flask-talisman # nagłówki
bcrypt
Flask-Session
Flask-Session
pdf2image

View File

@@ -5,6 +5,7 @@ if (!window.receiptUploaderInitialized) {
const form = document.getElementById("receiptForm");
const inputCamera = document.getElementById("cameraInput");
const inputGallery = document.getElementById("galleryInput");
const inputPDF = document.getElementById("pdfInput");
const galleryBtn = document.getElementById("galleryBtn");
const galleryBtnText = document.getElementById("galleryBtnText");
const cameraBtn = document.getElementById("cameraBtn");
@@ -12,7 +13,7 @@ if (!window.receiptUploaderInitialized) {
const progressBar = document.getElementById("progressBar");
const gallery = document.getElementById("receiptGallery");
if (!form || !inputCamera || !inputGallery || !gallery) return;
if (!form || !gallery) return;
const isDesktop = window.matchMedia("(pointer: fine)").matches;
@@ -105,6 +106,7 @@ if (!window.receiptUploaderInitialized) {
inputCamera?.addEventListener("change", () => handleFileUpload(inputCamera));
inputGallery?.addEventListener("change", () => handleFileUpload(inputGallery));
inputPDF?.addEventListener("change", () => handleFileUpload(inputPDF));
});
window.receiptUploaderInitialized = true;

View File

@@ -188,6 +188,12 @@
</label>
<input type="file" name="receipt" accept="image/*" class="d-none" id="galleryInput">
<label for="pdfInput" id="pdfBtn"
class="btn btn-outline-light w-100 py-2 mb-2 d-flex align-items-center justify-content-center gap-2">
📄 Dodaj PDF
</label>
<input type="file" name="receipt" accept="application/pdf" class="d-none" id="pdfInput">
<div id="progressContainer" class="progress progress-dark rounded-3 overflow-hidden shadow-sm"
style="height: 20px; display: none;">
<div id="progressBar" class="progress-bar bg-success fw-bold text-white text-center" role="progressbar"