ocr usprawnienia
This commit is contained in:
106
app.py
106
app.py
@@ -9,7 +9,7 @@ import psutil
|
||||
import secrets
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pillow_heif import register_heif_opener
|
||||
|
||||
@@ -44,7 +44,7 @@ from flask_compress import Compress
|
||||
from flask_socketio import SocketIO, emit, join_room
|
||||
from werkzeug.security import generate_password_hash, check_password_hash
|
||||
from config import Config
|
||||
from PIL import Image, ExifTags
|
||||
from PIL import Image, ExifTags, ImageFilter, ImageOps
|
||||
from werkzeug.utils import secure_filename
|
||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
||||
from sqlalchemy import func, extract
|
||||
@@ -54,6 +54,7 @@ from functools import wraps
|
||||
# OCR
|
||||
from collections import Counter
|
||||
import pytesseract
|
||||
from pytesseract import Output
|
||||
|
||||
|
||||
app = Flask(__name__)
|
||||
@@ -295,8 +296,8 @@ def save_resized_image(file, path):
|
||||
image.info.clear()
|
||||
|
||||
new_path = path.rsplit(".", 1)[0] + ".webp"
|
||||
image.save(new_path, format="WEBP", quality=85, method=6)
|
||||
|
||||
#image.save(new_path, format="WEBP", quality=85, method=6)
|
||||
image.save(new_path, format="WEBP", quality=100, method=0)
|
||||
|
||||
def redirect_with_flash(
|
||||
message: str, category: str = "info", endpoint: str = "main_page"
|
||||
@@ -343,43 +344,57 @@ def _receipt_error(message):
|
||||
|
||||
############# OCR ###########################
|
||||
|
||||
|
||||
def preprocess_image_for_tesseract(pil_image):
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
# Konwersja PIL.Image → NumPy grayscale
|
||||
image = np.array(pil_image.convert("L"))
|
||||
|
||||
# Zwiększenie skali dla lepszej czytelności OCR
|
||||
image = cv2.resize(image, None, fx=2.0, fy=2.0, interpolation=cv2.INTER_LINEAR)
|
||||
|
||||
# Adaptacyjne progowanie (lepsze niż THRESH_BINARY przy nierównym tle)
|
||||
image = cv2.adaptiveThreshold(
|
||||
image, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, blockSize=15, C=10
|
||||
)
|
||||
|
||||
# Konwersja z powrotem na PIL.Image (dla pytesseract)
|
||||
return Image.fromarray(image)
|
||||
|
||||
def preprocess_image_for_tesseract(image):
|
||||
image = ImageOps.autocontrast(image)
|
||||
image = image.point(lambda x: 0 if x < 160 else 255) # mocniejsza binarizacja
|
||||
image = image.resize((image.width * 2, image.height * 2), Image.BICUBIC) # większe powiększenie
|
||||
return image
|
||||
|
||||
def extract_total_tesseract(image):
|
||||
|
||||
text = pytesseract.image_to_string(image, lang="pol", config="--psm 6")
|
||||
text = pytesseract.image_to_string(image, lang="pol", config="--psm 4")
|
||||
lines = text.splitlines()
|
||||
candidates = []
|
||||
keyword_lines_debug = []
|
||||
|
||||
fuzzy_regex = re.compile(r"[\dOo][.,:;g9zZ][\d]{2}")
|
||||
keyword_pattern = re.compile(
|
||||
r"""
|
||||
\b(
|
||||
[5s]u[mn][aąo0]? |
|
||||
razem |
|
||||
zap[łl][aąo0]ty |
|
||||
do\s+zap[łl][aąo0]ty |
|
||||
kwota |
|
||||
płatno[śćs] |
|
||||
warto[śćs] |
|
||||
total |
|
||||
amount
|
||||
)\b
|
||||
""",
|
||||
re.IGNORECASE | re.VERBOSE
|
||||
)
|
||||
|
||||
for idx, line in enumerate(lines):
|
||||
if keyword_pattern.search(line[:30]):
|
||||
keyword_lines_debug.append((idx, line))
|
||||
|
||||
for line in lines:
|
||||
if not line.strip():
|
||||
continue
|
||||
|
||||
matches = re.findall(r"\d{1,4}[.,]\d{2}", line)
|
||||
matches = re.findall(r"\d{1,4}\s?[.,]\d{2}", line)
|
||||
for match in matches:
|
||||
try:
|
||||
val = float(match.replace(",", "."))
|
||||
val = float(match.replace(" ", "").replace(",", "."))
|
||||
if 0.1 <= val <= 100000:
|
||||
candidates.append((val, line))
|
||||
except:
|
||||
continue
|
||||
|
||||
spaced = re.findall(r"\d{1,4}\s\d{2}", line)
|
||||
for match in spaced:
|
||||
try:
|
||||
val = float(match.replace(" ", "."))
|
||||
if 0.1 <= val <= 100000:
|
||||
candidates.append((val, line))
|
||||
except:
|
||||
@@ -399,24 +414,45 @@ def extract_total_tesseract(image):
|
||||
)
|
||||
try:
|
||||
val = float(cleaned)
|
||||
if 0.1 <= val <= 100:
|
||||
if 0.1 <= val <= 100000:
|
||||
candidates.append((val, line))
|
||||
except:
|
||||
continue
|
||||
|
||||
preferred = [
|
||||
val
|
||||
(val, line)
|
||||
for val, line in candidates
|
||||
if re.search(r"sum[aąo]?|razem|zapłaty", line.lower())
|
||||
if keyword_pattern.search(line.lower())
|
||||
]
|
||||
|
||||
if preferred:
|
||||
max_val = round(max(preferred), 2)
|
||||
return max_val, lines
|
||||
max_val = max(preferred, key=lambda x: x[0])[0]
|
||||
return round(max_val, 2), lines
|
||||
|
||||
if candidates:
|
||||
max_val = round(max([val for val, _ in candidates]), 2)
|
||||
return max_val, lines
|
||||
max_val = max([val for val, _ in candidates])
|
||||
return round(max_val, 2), lines
|
||||
|
||||
data = pytesseract.image_to_data(image, lang="pol", config="--psm 4", output_type=Output.DICT)
|
||||
font_candidates = []
|
||||
|
||||
for i in range(len(data["text"])):
|
||||
word = data["text"][i].strip()
|
||||
if not word:
|
||||
continue
|
||||
|
||||
if re.match(r"^\d{1,5}[.,\s]\d{2}$", word):
|
||||
try:
|
||||
val = float(word.replace(",", ".").replace(" ", "."))
|
||||
height = data["height"][i]
|
||||
if 0.1 <= val <= 10000:
|
||||
font_candidates.append((val, height, word))
|
||||
except:
|
||||
continue
|
||||
|
||||
if font_candidates:
|
||||
best = max(font_candidates, key=lambda x: x[1])
|
||||
return round(best[0], 2), lines
|
||||
|
||||
return 0.0, lines
|
||||
|
||||
|
Reference in New Issue
Block a user