ocr usprawnienia

This commit is contained in:
Mateusz Gruszczyński
2025-07-22 11:23:00 +02:00
parent aa865baf3b
commit a44a61c718

106
app.py
View File

@@ -9,7 +9,7 @@ import psutil
import secrets
import hashlib
import re
import numpy as np
from pillow_heif import register_heif_opener
@@ -44,7 +44,7 @@ from flask_compress import Compress
from flask_socketio import SocketIO, emit, join_room
from werkzeug.security import generate_password_hash, check_password_hash
from config import Config
from PIL import Image, ExifTags
from PIL import Image, ExifTags, ImageFilter, ImageOps
from werkzeug.utils import secure_filename
from werkzeug.middleware.proxy_fix import ProxyFix
from sqlalchemy import func, extract
@@ -54,6 +54,7 @@ from functools import wraps
# OCR
from collections import Counter
import pytesseract
from pytesseract import Output
app = Flask(__name__)
@@ -295,8 +296,8 @@ def save_resized_image(file, path):
image.info.clear()
new_path = path.rsplit(".", 1)[0] + ".webp"
image.save(new_path, format="WEBP", quality=85, method=6)
#image.save(new_path, format="WEBP", quality=85, method=6)
image.save(new_path, format="WEBP", quality=100, method=0)
def redirect_with_flash(
message: str, category: str = "info", endpoint: str = "main_page"
@@ -343,43 +344,57 @@ def _receipt_error(message):
############# OCR ###########################
def preprocess_image_for_tesseract(pil_image):
import cv2
import numpy as np
from PIL import Image
# Konwersja PIL.Image → NumPy grayscale
image = np.array(pil_image.convert("L"))
# Zwiększenie skali dla lepszej czytelności OCR
image = cv2.resize(image, None, fx=2.0, fy=2.0, interpolation=cv2.INTER_LINEAR)
# Adaptacyjne progowanie (lepsze niż THRESH_BINARY przy nierównym tle)
image = cv2.adaptiveThreshold(
image, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, blockSize=15, C=10
)
# Konwersja z powrotem na PIL.Image (dla pytesseract)
return Image.fromarray(image)
def preprocess_image_for_tesseract(image):
image = ImageOps.autocontrast(image)
image = image.point(lambda x: 0 if x < 160 else 255) # mocniejsza binarizacja
image = image.resize((image.width * 2, image.height * 2), Image.BICUBIC) # większe powiększenie
return image
def extract_total_tesseract(image):
text = pytesseract.image_to_string(image, lang="pol", config="--psm 6")
text = pytesseract.image_to_string(image, lang="pol", config="--psm 4")
lines = text.splitlines()
candidates = []
keyword_lines_debug = []
fuzzy_regex = re.compile(r"[\dOo][.,:;g9zZ][\d]{2}")
keyword_pattern = re.compile(
r"""
\b(
[5s]u[mn][aąo0]? |
razem |
zap[łl][aąo0]ty |
do\s+zap[łl][aąo0]ty |
kwota |
płatno[śćs] |
warto[śćs] |
total |
amount
)\b
""",
re.IGNORECASE | re.VERBOSE
)
for idx, line in enumerate(lines):
if keyword_pattern.search(line[:30]):
keyword_lines_debug.append((idx, line))
for line in lines:
if not line.strip():
continue
matches = re.findall(r"\d{1,4}[.,]\d{2}", line)
matches = re.findall(r"\d{1,4}\s?[.,]\d{2}", line)
for match in matches:
try:
val = float(match.replace(",", "."))
val = float(match.replace(" ", "").replace(",", "."))
if 0.1 <= val <= 100000:
candidates.append((val, line))
except:
continue
spaced = re.findall(r"\d{1,4}\s\d{2}", line)
for match in spaced:
try:
val = float(match.replace(" ", "."))
if 0.1 <= val <= 100000:
candidates.append((val, line))
except:
@@ -399,24 +414,45 @@ def extract_total_tesseract(image):
)
try:
val = float(cleaned)
if 0.1 <= val <= 100:
if 0.1 <= val <= 100000:
candidates.append((val, line))
except:
continue
preferred = [
val
(val, line)
for val, line in candidates
if re.search(r"sum[aąo]?|razem|zapłaty", line.lower())
if keyword_pattern.search(line.lower())
]
if preferred:
max_val = round(max(preferred), 2)
return max_val, lines
max_val = max(preferred, key=lambda x: x[0])[0]
return round(max_val, 2), lines
if candidates:
max_val = round(max([val for val, _ in candidates]), 2)
return max_val, lines
max_val = max([val for val, _ in candidates])
return round(max_val, 2), lines
data = pytesseract.image_to_data(image, lang="pol", config="--psm 4", output_type=Output.DICT)
font_candidates = []
for i in range(len(data["text"])):
word = data["text"][i].strip()
if not word:
continue
if re.match(r"^\d{1,5}[.,\s]\d{2}$", word):
try:
val = float(word.replace(",", ".").replace(" ", "."))
height = data["height"][i]
if 0.1 <= val <= 10000:
font_candidates.append((val, height, word))
except:
continue
if font_candidates:
best = max(font_candidates, key=lambda x: x[1])
return round(best[0], 2), lines
return 0.0, lines