From a44a61c7184f0c5e2bf96ba07a994e5b4f2cd967 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Gruszczy=C5=84ski?= Date: Tue, 22 Jul 2025 11:23:00 +0200 Subject: [PATCH] ocr usprawnienia --- app.py | 106 ++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 71 insertions(+), 35 deletions(-) diff --git a/app.py b/app.py index ce92783..c2dd629 100644 --- a/app.py +++ b/app.py @@ -9,7 +9,7 @@ import psutil import secrets import hashlib import re - +import numpy as np from pillow_heif import register_heif_opener @@ -44,7 +44,7 @@ from flask_compress import Compress from flask_socketio import SocketIO, emit, join_room from werkzeug.security import generate_password_hash, check_password_hash from config import Config -from PIL import Image, ExifTags +from PIL import Image, ExifTags, ImageFilter, ImageOps from werkzeug.utils import secure_filename from werkzeug.middleware.proxy_fix import ProxyFix from sqlalchemy import func, extract @@ -54,6 +54,7 @@ from functools import wraps # OCR from collections import Counter import pytesseract +from pytesseract import Output app = Flask(__name__) @@ -295,8 +296,8 @@ def save_resized_image(file, path): image.info.clear() new_path = path.rsplit(".", 1)[0] + ".webp" - image.save(new_path, format="WEBP", quality=85, method=6) - + #image.save(new_path, format="WEBP", quality=85, method=6) + image.save(new_path, format="WEBP", quality=100, method=0) def redirect_with_flash( message: str, category: str = "info", endpoint: str = "main_page" @@ -343,43 +344,57 @@ def _receipt_error(message): ############# OCR ########################### - -def preprocess_image_for_tesseract(pil_image): - import cv2 - import numpy as np - from PIL import Image - - # Konwersja PIL.Image → NumPy grayscale - image = np.array(pil_image.convert("L")) - - # Zwiększenie skali dla lepszej czytelności OCR - image = cv2.resize(image, None, fx=2.0, fy=2.0, interpolation=cv2.INTER_LINEAR) - - # Adaptacyjne progowanie (lepsze niż THRESH_BINARY przy nierównym tle) - image = cv2.adaptiveThreshold( - image, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, blockSize=15, C=10 - ) - - # Konwersja z powrotem na PIL.Image (dla pytesseract) - return Image.fromarray(image) - +def preprocess_image_for_tesseract(image): + image = ImageOps.autocontrast(image) + image = image.point(lambda x: 0 if x < 160 else 255) # mocniejsza binarizacja + image = image.resize((image.width * 2, image.height * 2), Image.BICUBIC) # większe powiększenie + return image def extract_total_tesseract(image): - - text = pytesseract.image_to_string(image, lang="pol", config="--psm 6") + text = pytesseract.image_to_string(image, lang="pol", config="--psm 4") lines = text.splitlines() candidates = [] + keyword_lines_debug = [] fuzzy_regex = re.compile(r"[\dOo][.,:;g9zZ][\d]{2}") + keyword_pattern = re.compile( + r""" + \b( + [5s]u[mn][aąo0]? | + razem | + zap[łl][aąo0]ty | + do\s+zap[łl][aąo0]ty | + kwota | + płatno[śćs] | + warto[śćs] | + total | + amount + )\b + """, + re.IGNORECASE | re.VERBOSE + ) + + for idx, line in enumerate(lines): + if keyword_pattern.search(line[:30]): + keyword_lines_debug.append((idx, line)) for line in lines: if not line.strip(): continue - matches = re.findall(r"\d{1,4}[.,]\d{2}", line) + matches = re.findall(r"\d{1,4}\s?[.,]\d{2}", line) for match in matches: try: - val = float(match.replace(",", ".")) + val = float(match.replace(" ", "").replace(",", ".")) + if 0.1 <= val <= 100000: + candidates.append((val, line)) + except: + continue + + spaced = re.findall(r"\d{1,4}\s\d{2}", line) + for match in spaced: + try: + val = float(match.replace(" ", ".")) if 0.1 <= val <= 100000: candidates.append((val, line)) except: @@ -399,24 +414,45 @@ def extract_total_tesseract(image): ) try: val = float(cleaned) - if 0.1 <= val <= 100: + if 0.1 <= val <= 100000: candidates.append((val, line)) except: continue preferred = [ - val + (val, line) for val, line in candidates - if re.search(r"sum[aąo]?|razem|zapłaty", line.lower()) + if keyword_pattern.search(line.lower()) ] if preferred: - max_val = round(max(preferred), 2) - return max_val, lines + max_val = max(preferred, key=lambda x: x[0])[0] + return round(max_val, 2), lines if candidates: - max_val = round(max([val for val, _ in candidates]), 2) - return max_val, lines + max_val = max([val for val, _ in candidates]) + return round(max_val, 2), lines + + data = pytesseract.image_to_data(image, lang="pol", config="--psm 4", output_type=Output.DICT) + font_candidates = [] + + for i in range(len(data["text"])): + word = data["text"][i].strip() + if not word: + continue + + if re.match(r"^\d{1,5}[.,\s]\d{2}$", word): + try: + val = float(word.replace(",", ".").replace(" ", ".")) + height = data["height"][i] + if 0.1 <= val <= 10000: + font_candidates.append((val, height, word)) + except: + continue + + if font_candidates: + best = max(font_candidates, key=lambda x: x[1]) + return round(best[0], 2), lines return 0.0, lines