obsluga pdf
This commit is contained in:
49
app.py
49
app.py
@@ -51,6 +51,8 @@ from functools import wraps
|
||||
from flask_talisman import Talisman
|
||||
from flask_session import Session
|
||||
from types import SimpleNamespace
|
||||
from pdf2image import convert_from_bytes
|
||||
|
||||
|
||||
# OCR
|
||||
import pytesseract
|
||||
@@ -95,7 +97,7 @@ talisman = Talisman(
|
||||
|
||||
register_heif_opener() # pillow_heif dla HEIC
|
||||
SQLALCHEMY_ECHO = True
|
||||
ALLOWED_EXTENSIONS = {"png", "jpg", "jpeg", "gif", "webp", "heic"}
|
||||
ALLOWED_EXTENSIONS = {"png", "jpg", "jpeg", "gif", "webp", "heic", "pdf"}
|
||||
|
||||
SYSTEM_PASSWORD = app.config.get("SYSTEM_PASSWORD")
|
||||
DEFAULT_ADMIN_USERNAME = app.config.get("DEFAULT_ADMIN_USERNAME")
|
||||
@@ -161,6 +163,7 @@ class User(UserMixin, db.Model):
|
||||
password_hash = db.Column(db.String(512), nullable=False)
|
||||
is_admin = db.Column(db.Boolean, default=False)
|
||||
|
||||
|
||||
# Tabela pośrednia
|
||||
shopping_list_category = db.Table(
|
||||
"shopping_list_category",
|
||||
@@ -285,7 +288,7 @@ def set_authorized_cookie(response):
|
||||
AUTHORIZED_COOKIE_VALUE,
|
||||
max_age=max_age,
|
||||
secure=secure_flag,
|
||||
httponly=True
|
||||
httponly=True,
|
||||
)
|
||||
return response
|
||||
|
||||
@@ -370,6 +373,7 @@ def serve_css_lib(filename):
|
||||
response.headers.pop("Content-Disposition", None)
|
||||
return response
|
||||
|
||||
|
||||
app.register_blueprint(static_bp)
|
||||
|
||||
|
||||
@@ -864,7 +868,7 @@ def get_total_expenses_grouped_by_category(
|
||||
if str(category_id) == "none":
|
||||
if not l.categories:
|
||||
data_map[key]["Bez kategorii"] += total_expense
|
||||
continue
|
||||
continue
|
||||
|
||||
if not l.categories:
|
||||
data_map[key]["Bez kategorii"] += total_expense
|
||||
@@ -898,15 +902,36 @@ def get_total_expenses_grouped_by_category(
|
||||
return {"labels": labels, "datasets": datasets}
|
||||
|
||||
|
||||
def save_pdf_as_webp(file, path):
|
||||
try:
|
||||
images = convert_from_bytes(file.read(), dpi=300)
|
||||
if not images:
|
||||
raise ValueError("Nie udało się przekonwertować PDF na obraz.")
|
||||
|
||||
total_height = sum(img.height for img in images)
|
||||
max_width = max(img.width for img in images)
|
||||
combined = Image.new("RGB", (max_width, total_height), (255, 255, 255))
|
||||
|
||||
y_offset = 0
|
||||
for img in images:
|
||||
combined.paste(img, (0, y_offset))
|
||||
y_offset += img.height
|
||||
|
||||
combined.thumbnail((2000, 20000))
|
||||
new_path = path.rsplit(".", 1)[0] + ".webp"
|
||||
combined.save(new_path, **WEBP_SAVE_PARAMS)
|
||||
|
||||
except Exception as e:
|
||||
raise ValueError(f"Błąd podczas przetwarzania PDF: {e}")
|
||||
|
||||
|
||||
############# OCR ###########################
|
||||
|
||||
|
||||
def preprocess_image_for_tesseract(image):
|
||||
image = ImageOps.autocontrast(image)
|
||||
image = image.point(lambda x: 0 if x < 150 else 255)
|
||||
image = image.resize(
|
||||
(image.width * 2, image.height * 2), Image.BICUBIC
|
||||
)
|
||||
image = image.resize((image.width * 2, image.height * 2), Image.BICUBIC)
|
||||
return image
|
||||
|
||||
|
||||
@@ -1904,7 +1929,11 @@ def upload_receipt(list_id):
|
||||
file_path = os.path.join(app.config["UPLOAD_FOLDER"], webp_filename)
|
||||
|
||||
try:
|
||||
save_resized_image(file, file_path)
|
||||
if file.filename.lower().endswith(".pdf"):
|
||||
file.seek(0)
|
||||
save_pdf_as_webp(file, file_path)
|
||||
else:
|
||||
save_resized_image(file, file_path)
|
||||
except ValueError as e:
|
||||
return receipt_error(str(e))
|
||||
|
||||
@@ -2815,7 +2844,11 @@ def healthcheck():
|
||||
|
||||
@app.route("/robots.txt")
|
||||
def robots_txt():
|
||||
content = "User-agent: *\nDisallow: /" if app.config.get("DISABLE_ROBOTS") else "User-agent: *\nAllow: /"
|
||||
content = (
|
||||
"User-agent: *\nDisallow: /"
|
||||
if app.config.get("DISABLE_ROBOTS")
|
||||
else "User-agent: *\nAllow: /"
|
||||
)
|
||||
return content, 200, {"Content-Type": "text/plain"}
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user