zmiany w sablonach i poprawki w ocr
This commit is contained in:
179
app.py
179
app.py
@@ -115,7 +115,7 @@ class ShoppingList(db.Model):
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
title = db.Column(db.String(150), nullable=False)
|
||||
created_at = db.Column(db.DateTime, default=datetime.utcnow)
|
||||
|
||||
|
||||
owner_id = db.Column(db.Integer, db.ForeignKey("user.id"))
|
||||
owner = db.relationship("User", backref="lists", foreign_keys=[owner_id])
|
||||
|
||||
@@ -135,7 +135,9 @@ class Item(db.Model):
|
||||
# added_at = db.Column(db.DateTime, default=datetime.utcnow)
|
||||
added_at = db.Column(db.DateTime, default=utcnow)
|
||||
added_by = db.Column(db.Integer, db.ForeignKey("user.id"), nullable=True)
|
||||
added_by_user = db.relationship("User", backref="added_items", lazy=True, foreign_keys=[added_by])
|
||||
added_by_user = db.relationship(
|
||||
"User", backref="added_items", lazy=True, foreign_keys=[added_by]
|
||||
)
|
||||
|
||||
purchased = db.Column(db.Boolean, default=False)
|
||||
purchased_at = db.Column(db.DateTime, nullable=True)
|
||||
@@ -393,22 +395,25 @@ def preprocess_image_for_tesseract(image):
|
||||
|
||||
|
||||
def extract_total_tesseract(image):
|
||||
import pytesseract
|
||||
from pytesseract import Output
|
||||
import re
|
||||
|
||||
text = pytesseract.image_to_string(image, lang="pol", config="--psm 4")
|
||||
lines = text.splitlines()
|
||||
candidates = []
|
||||
keyword_lines_debug = []
|
||||
|
||||
fuzzy_regex = re.compile(r"[\dOo][.,:;g9zZ][\d]{2}")
|
||||
keyword_pattern = re.compile(
|
||||
blacklist_keywords = re.compile(r"\b(ptu|vat|podatek|stawka)\b", re.IGNORECASE)
|
||||
|
||||
priority_keywords = re.compile(
|
||||
r"""
|
||||
\b(
|
||||
[5s]u[mn][aąo0]? |
|
||||
razem |
|
||||
zap[łl][aąo0]ty |
|
||||
do\s+zap[łl][aąo0]ty |
|
||||
razem\s*do\s*zap[łl][aąo0]ty |
|
||||
do\s*zap[łl][aąo0]ty |
|
||||
suma |
|
||||
kwota |
|
||||
płatno[śćs] |
|
||||
warto[śćs] |
|
||||
płatno[śćs] |
|
||||
total |
|
||||
amount
|
||||
)\b
|
||||
@@ -416,84 +421,71 @@ def extract_total_tesseract(image):
|
||||
re.IGNORECASE | re.VERBOSE,
|
||||
)
|
||||
|
||||
for idx, line in enumerate(lines):
|
||||
if keyword_pattern.search(line[:30]):
|
||||
keyword_lines_debug.append((idx, line))
|
||||
|
||||
for line in lines:
|
||||
if not line.strip():
|
||||
continue
|
||||
|
||||
matches = re.findall(r"\d{1,4}\s?[.,]\d{2}", line)
|
||||
if blacklist_keywords.search(line):
|
||||
continue
|
||||
|
||||
is_priority = priority_keywords.search(line)
|
||||
|
||||
matches = re.findall(r"\d{1,4}[.,]\d{2}", line)
|
||||
for match in matches:
|
||||
try:
|
||||
val = float(match.replace(" ", "").replace(",", "."))
|
||||
val = float(match.replace(",", "."))
|
||||
if 0.1 <= val <= 100000:
|
||||
candidates.append((val, line))
|
||||
candidates.append((val, line, is_priority is not None))
|
||||
except:
|
||||
continue
|
||||
|
||||
spaced = re.findall(r"\d{1,4}\s\d{2}", line)
|
||||
for match in spaced:
|
||||
try:
|
||||
val = float(match.replace(" ", "."))
|
||||
if 0.1 <= val <= 100000:
|
||||
candidates.append((val, line))
|
||||
except:
|
||||
continue
|
||||
# Tylko w liniach priorytetowych: sprawdzamy spaced fallback
|
||||
if is_priority:
|
||||
spaced = re.findall(r"\d{1,4}\s\d{2}", line)
|
||||
for match in spaced:
|
||||
try:
|
||||
val = float(match.replace(" ", "."))
|
||||
if 0.1 <= val <= 100000:
|
||||
candidates.append((val, line, True))
|
||||
except:
|
||||
continue
|
||||
|
||||
fuzzy_matches = fuzzy_regex.findall(line)
|
||||
for match in fuzzy_matches:
|
||||
cleaned = (
|
||||
match.replace("O", "0")
|
||||
.replace("o", "0")
|
||||
.replace(":", ".")
|
||||
.replace(";", ".")
|
||||
.replace(",", ".")
|
||||
.replace("g", "9")
|
||||
.replace("z", "9")
|
||||
.replace("Z", "9")
|
||||
)
|
||||
try:
|
||||
val = float(cleaned)
|
||||
if 0.1 <= val <= 100000:
|
||||
candidates.append((val, line))
|
||||
except:
|
||||
continue
|
||||
|
||||
preferred = [
|
||||
(val, line) for val, line in candidates if keyword_pattern.search(line.lower())
|
||||
]
|
||||
# Preferujemy linie priorytetowe
|
||||
preferred = [(val, line) for val, line, is_pref in candidates if is_pref]
|
||||
|
||||
if preferred:
|
||||
max_val = max(preferred, key=lambda x: x[0])[0]
|
||||
return round(max_val, 2), lines
|
||||
best_val = max(preferred, key=lambda x: x[0])[0]
|
||||
if best_val < 99999:
|
||||
return round(best_val, 2), lines
|
||||
|
||||
if candidates:
|
||||
max_val = max([val for val, _ in candidates])
|
||||
return round(max_val, 2), lines
|
||||
best_val = max(candidates, key=lambda x: x[0])[0]
|
||||
if best_val < 99999:
|
||||
return round(best_val, 2), lines
|
||||
|
||||
# Fallback: największy font + bold
|
||||
data = pytesseract.image_to_data(
|
||||
image, lang="pol", config="--psm 4", output_type=Output.DICT
|
||||
)
|
||||
font_candidates = []
|
||||
|
||||
font_candidates = []
|
||||
for i in range(len(data["text"])):
|
||||
word = data["text"][i].strip()
|
||||
if not word:
|
||||
if not word or not re.match(r"^\d{1,5}[.,\s]\d{2}$", word):
|
||||
continue
|
||||
|
||||
if re.match(r"^\d{1,5}[.,\s]\d{2}$", word):
|
||||
try:
|
||||
val = float(word.replace(",", ".").replace(" ", "."))
|
||||
height = data["height"][i]
|
||||
if 0.1 <= val <= 10000:
|
||||
font_candidates.append((val, height, word))
|
||||
except:
|
||||
continue
|
||||
try:
|
||||
val = float(word.replace(",", ".").replace(" ", "."))
|
||||
height = data["height"][i]
|
||||
conf = int(data.get("conf", ["0"] * len(data["text"]))[i])
|
||||
if 0.1 <= val <= 100000:
|
||||
font_candidates.append((val, height, conf))
|
||||
except:
|
||||
continue
|
||||
|
||||
if font_candidates:
|
||||
best = max(font_candidates, key=lambda x: x[1])
|
||||
# Preferuj najwyższy font z sensownym confidence
|
||||
best = max(font_candidates, key=lambda x: (x[1], x[2]))
|
||||
return round(best[0], 2), lines
|
||||
|
||||
return 0.0, lines
|
||||
@@ -964,15 +956,32 @@ def view_list(list_id):
|
||||
@app.route("/user_expenses")
|
||||
@login_required
|
||||
def user_expenses():
|
||||
# Lista wydatków użytkownika
|
||||
expenses = (
|
||||
start_date_str = request.args.get("start_date")
|
||||
end_date_str = request.args.get("end_date")
|
||||
start = None
|
||||
end = None
|
||||
|
||||
# Przygotowanie podstawowego zapytania o wydatki użytkownika
|
||||
expenses_query = (
|
||||
Expense.query.join(ShoppingList, Expense.list_id == ShoppingList.id)
|
||||
.options(joinedload(Expense.list))
|
||||
.filter(ShoppingList.owner_id == current_user.id)
|
||||
.order_by(Expense.added_at.desc())
|
||||
.all()
|
||||
)
|
||||
|
||||
# Filtrowanie po zakresie dat, jeśli podano
|
||||
if start_date_str and end_date_str:
|
||||
try:
|
||||
start = datetime.strptime(start_date_str, "%Y-%m-%d")
|
||||
end = datetime.strptime(end_date_str, "%Y-%m-%d") + timedelta(days=1)
|
||||
expenses_query = expenses_query.filter(
|
||||
Expense.added_at >= start, Expense.added_at < end
|
||||
)
|
||||
except ValueError:
|
||||
flash("Błędny zakres dat", "danger")
|
||||
|
||||
expenses = expenses_query.order_by(Expense.added_at.desc()).all()
|
||||
|
||||
# Tabela wydatków
|
||||
expense_table = [
|
||||
{
|
||||
"title": e.list.title if e.list else "Nieznana",
|
||||
@@ -982,34 +991,32 @@ def user_expenses():
|
||||
for e in expenses
|
||||
]
|
||||
|
||||
# Tylko listy z tych wydatków
|
||||
list_ids = {e.list_id for e in expenses}
|
||||
lists = (
|
||||
ShoppingList.query
|
||||
.filter(
|
||||
or_(
|
||||
ShoppingList.owner_id == current_user.id,
|
||||
ShoppingList.is_public == True
|
||||
)
|
||||
)
|
||||
ShoppingList.query.filter(ShoppingList.id.in_(list_ids))
|
||||
.order_by(ShoppingList.created_at.desc())
|
||||
.all()
|
||||
)
|
||||
|
||||
# Lista zsumowanych wydatków per lista (z uwzględnieniem filtra dat)
|
||||
lists_data = [
|
||||
{
|
||||
"id": l.id,
|
||||
"title": l.title,
|
||||
"created_at": l.created_at,
|
||||
"total_expense": sum(e.amount for e in l.expenses),
|
||||
"owner_username": l.owner.username if l.owner else "?"
|
||||
"total_expense": sum(
|
||||
e.amount
|
||||
for e in l.expenses
|
||||
if (not start or not end) or (e.added_at >= start and e.added_at < end)
|
||||
),
|
||||
"owner_username": l.owner.username if l.owner else "?",
|
||||
}
|
||||
for l in lists
|
||||
]
|
||||
|
||||
|
||||
return render_template(
|
||||
"user_expenses.html",
|
||||
expense_table=expense_table,
|
||||
lists_data=lists_data
|
||||
"user_expenses.html", expense_table=expense_table, lists_data=lists_data
|
||||
)
|
||||
|
||||
|
||||
@@ -1028,7 +1035,7 @@ def user_expenses_data():
|
||||
try:
|
||||
start = datetime.strptime(start_date, "%Y-%m-%d")
|
||||
end = datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1)
|
||||
query = query.filter(Expense.timestamp >= start, Expense.timestamp < end)
|
||||
query = query.filter(Expense.added_at >= start, Expense.added_at < end)
|
||||
except ValueError:
|
||||
return jsonify({"error": "Błędne daty"}), 400
|
||||
|
||||
@@ -2332,7 +2339,6 @@ def handle_add_item(data):
|
||||
)
|
||||
|
||||
|
||||
|
||||
@socketio.on("check_item")
|
||||
def handle_check_item(data):
|
||||
# item = Item.query.get(data["item_id"])
|
||||
@@ -2420,7 +2426,6 @@ def handle_request_full_list(data):
|
||||
emit("full_list", {"items": items_data}, to=request.sid)
|
||||
|
||||
|
||||
|
||||
@socketio.on("update_note")
|
||||
def handle_update_note(data):
|
||||
item_id = data["item_id"]
|
||||
@@ -2490,16 +2495,6 @@ def handle_unmark_not_purchased(data):
|
||||
emit("item_unmarked_not_purchased", {"item_id": item.id}, to=str(item.list_id))
|
||||
|
||||
|
||||
""" @socketio.on('receipt_uploaded')
|
||||
def handle_receipt_uploaded(data):
|
||||
list_id = data['list_id']
|
||||
url = data['url']
|
||||
|
||||
emit('receipt_added', {
|
||||
'url': url
|
||||
}, to=str(list_id), include_self=False) """
|
||||
|
||||
|
||||
@app.cli.command("create_db")
|
||||
def create_db():
|
||||
db.create_all()
|
||||
|
Reference in New Issue
Block a user