ocr #3
11
Dockerfile
11
Dockerfile
@@ -4,6 +4,17 @@ FROM python:3.13-slim
|
||||
# Ustawiamy katalog roboczy
|
||||
WORKDIR /app
|
||||
|
||||
# Zależności systemowe do OCR, obrazów, tesseract i języka PL
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
tesseract-ocr \
|
||||
tesseract-ocr-pol \
|
||||
libglib2.0-0 \
|
||||
libsm6 \
|
||||
libxrender1 \
|
||||
libxext6 \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Kopiujemy wymagania
|
||||
COPY requirements.txt requirements.txt
|
||||
|
||||
|
144
app.py
144
app.py
@@ -9,6 +9,9 @@ import psutil
|
||||
import secrets
|
||||
import hashlib
|
||||
|
||||
import re
|
||||
import tempfile
|
||||
|
||||
from pillow_heif import register_heif_opener
|
||||
|
||||
from datetime import datetime, timedelta, UTC, timezone
|
||||
@@ -49,6 +52,11 @@ from sqlalchemy import func, extract
|
||||
from collections import defaultdict, deque
|
||||
from functools import wraps
|
||||
|
||||
# OCR
|
||||
from collections import Counter
|
||||
import pytesseract
|
||||
|
||||
|
||||
app = Flask(__name__)
|
||||
app.config.from_object(Config)
|
||||
register_heif_opener() # pillow_heif dla HEIC
|
||||
@@ -335,6 +343,89 @@ def _receipt_error(message):
|
||||
return redirect(request.referrer or url_for("main_page"))
|
||||
|
||||
|
||||
############# OCR ###########################
|
||||
|
||||
|
||||
def preprocess_image_for_tesseract(pil_image):
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
# Konwersja PIL.Image → NumPy grayscale
|
||||
image = np.array(pil_image.convert("L"))
|
||||
|
||||
# Zwiększenie skali dla lepszej czytelności OCR
|
||||
image = cv2.resize(image, None, fx=2.0, fy=2.0, interpolation=cv2.INTER_LINEAR)
|
||||
|
||||
# Adaptacyjne progowanie (lepsze niż THRESH_BINARY przy nierównym tle)
|
||||
image = cv2.adaptiveThreshold(
|
||||
image, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, blockSize=15, C=10
|
||||
)
|
||||
|
||||
# Konwersja z powrotem na PIL.Image (dla pytesseract)
|
||||
return Image.fromarray(image)
|
||||
|
||||
|
||||
def extract_total_tesseract(image):
|
||||
|
||||
text = pytesseract.image_to_string(image, lang="pol", config="--psm 6")
|
||||
lines = text.splitlines()
|
||||
candidates = []
|
||||
|
||||
fuzzy_regex = re.compile(r"[\dOo][.,:;g9zZ][\d]{2}")
|
||||
|
||||
for line in lines:
|
||||
if not line.strip():
|
||||
continue
|
||||
|
||||
matches = re.findall(r"\d{1,4}[.,]\d{2}", line)
|
||||
for match in matches:
|
||||
try:
|
||||
val = float(match.replace(",", "."))
|
||||
if 0.1 <= val <= 100000:
|
||||
candidates.append((val, line))
|
||||
except:
|
||||
continue
|
||||
|
||||
fuzzy_matches = fuzzy_regex.findall(line)
|
||||
for match in fuzzy_matches:
|
||||
cleaned = (
|
||||
match.replace("O", "0")
|
||||
.replace("o", "0")
|
||||
.replace(":", ".")
|
||||
.replace(";", ".")
|
||||
.replace(",", ".")
|
||||
.replace("g", "9")
|
||||
.replace("z", "9")
|
||||
.replace("Z", "9")
|
||||
)
|
||||
try:
|
||||
val = float(cleaned)
|
||||
if 0.1 <= val <= 100:
|
||||
candidates.append((val, line))
|
||||
except:
|
||||
continue
|
||||
|
||||
preferred = [
|
||||
val
|
||||
for val, line in candidates
|
||||
if re.search(r"sum[aąo]?|razem|zapłaty", line.lower())
|
||||
]
|
||||
|
||||
if preferred:
|
||||
max_val = round(max(preferred), 2)
|
||||
return max_val, lines
|
||||
|
||||
if candidates:
|
||||
max_val = round(max([val for val, _ in candidates]), 2)
|
||||
return max_val, lines
|
||||
|
||||
return 0.0, lines
|
||||
|
||||
|
||||
############# END OCR #######################
|
||||
|
||||
|
||||
# zabezpieczenie logowani do systemu - błędne hasła
|
||||
def is_ip_blocked(ip):
|
||||
now = time.time()
|
||||
@@ -1037,6 +1128,59 @@ def reorder_items():
|
||||
return jsonify(success=True)
|
||||
|
||||
|
||||
# OCR
|
||||
@app.route("/lists/<int:list_id>/analyze", methods=["POST"])
|
||||
@login_required
|
||||
def analyze_receipts_for_list(list_id):
|
||||
list_obj = db.session.get(ShoppingList, list_id)
|
||||
if not list_obj or list_obj.owner_id != current_user.id:
|
||||
return jsonify({"error": "Brak dostępu"}), 403
|
||||
|
||||
receipt_objs = Receipt.query.filter_by(list_id=list_id).all()
|
||||
results = []
|
||||
total = 0.0
|
||||
|
||||
for receipt in receipt_objs:
|
||||
filepath = os.path.join(app.config["UPLOAD_FOLDER"], receipt.filename)
|
||||
if not os.path.exists(filepath):
|
||||
continue
|
||||
|
||||
temp_path = None
|
||||
|
||||
try:
|
||||
if filepath.lower().endswith(".webp"):
|
||||
|
||||
raw_image = Image.open(filepath).convert("RGB")
|
||||
image = preprocess_image_for_tesseract(raw_image)
|
||||
else:
|
||||
|
||||
raw_image = Image.open(filepath).convert("RGB")
|
||||
image = preprocess_image_for_tesseract(raw_image)
|
||||
|
||||
value, lines = extract_total_tesseract(image)
|
||||
|
||||
except Exception as e:
|
||||
print(f"OCR error for {receipt.filename}: {e}")
|
||||
value = 0.0
|
||||
lines = []
|
||||
|
||||
finally:
|
||||
if temp_path and os.path.exists(temp_path):
|
||||
os.unlink(temp_path)
|
||||
|
||||
results.append(
|
||||
{
|
||||
"id": receipt.id,
|
||||
"filename": receipt.filename,
|
||||
"amount": round(value, 2),
|
||||
"debug_text": lines,
|
||||
}
|
||||
)
|
||||
total += value
|
||||
|
||||
return jsonify({"results": results, "total": round(total, 2)})
|
||||
|
||||
|
||||
@app.route("/admin")
|
||||
@login_required
|
||||
@admin_required
|
||||
|
11
config.py
11
config.py
@@ -10,6 +10,13 @@ class Config:
|
||||
DEFAULT_ADMIN_PASSWORD = os.environ.get("DEFAULT_ADMIN_PASSWORD", "admin123")
|
||||
UPLOAD_FOLDER = os.environ.get("UPLOAD_FOLDER", "uploads")
|
||||
AUTHORIZED_COOKIE_VALUE = os.environ.get("AUTHORIZED_COOKIE_VALUE", "cookievalue")
|
||||
AUTH_COOKIE_MAX_AGE = int(os.environ.get("AUTH_COOKIE_MAX_AGE", 86400))
|
||||
try:
|
||||
AUTH_COOKIE_MAX_AGE = int(os.environ.get("AUTH_COOKIE_MAX_AGE", "86400") or "86400")
|
||||
except ValueError:
|
||||
AUTH_COOKIE_MAX_AGE = 86400
|
||||
|
||||
HEALTHCHECK_TOKEN = os.environ.get("HEALTHCHECK_TOKEN", "alamapsaikota1234")
|
||||
SESSION_TIMEOUT_MINUTES = int(os.environ.get("SESSION_TIMEOUT_MINUTES", 10080))
|
||||
try:
|
||||
SESSION_TIMEOUT_MINUTES = int(os.environ.get("SESSION_TIMEOUT_MINUTES", "10080") or "10080")
|
||||
except ValueError:
|
||||
SESSION_TIMEOUT_MINUTES = 10080
|
||||
|
@@ -7,4 +7,7 @@ eventlet
|
||||
Werkzeug
|
||||
Pillow
|
||||
psutil
|
||||
pillow-heif
|
||||
pillow-heif
|
||||
|
||||
pytesseract
|
||||
opencv-python-headless
|
63
static/js/receipt_analysis.js
Normal file
63
static/js/receipt_analysis.js
Normal file
@@ -0,0 +1,63 @@
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
const analyzeBtn = document.getElementById("analyzeBtn");
|
||||
if (analyzeBtn) {
|
||||
analyzeBtn.addEventListener("click", () => analyzeReceipts(LIST_ID));
|
||||
}
|
||||
});
|
||||
|
||||
async function analyzeReceipts(listId) {
|
||||
const resultsDiv = document.getElementById("analysisResults");
|
||||
resultsDiv.innerHTML = `<div class="text-info">⏳ Trwa analiza paragonów...</div>`;
|
||||
|
||||
const start = performance.now(); // ⏱ START
|
||||
|
||||
try {
|
||||
const res = await fetch(`/lists/${listId}/analyze`, { method: "POST" });
|
||||
const data = await res.json();
|
||||
|
||||
const duration = ((performance.now() - start) / 1000).toFixed(2); // ⏱ STOP
|
||||
|
||||
let html = `<p><b>📊 Łącznie wykryto:</b> ${data.total.toFixed(2)} PLN</p>`;
|
||||
html += `<p class="text-secondary"><small>⏱ Czas analizy OCR: ${duration} sek.</small></p>`;
|
||||
|
||||
data.results.forEach((r, i) => {
|
||||
html += `
|
||||
<div class="mb-2">
|
||||
<span class="text-light">${r.filename}</span>:
|
||||
<input type="number" id="amount-${i}" value="${r.amount}" step="0.01" class="form-control d-inline-block bg-dark text-white border-light rounded" style="width: 120px;">
|
||||
<button onclick="emitExpense(${i})" class="btn btn-sm btn-outline-success ms-2">➕ Dodaj</button>
|
||||
</div>`;
|
||||
});
|
||||
|
||||
if (data.results.length > 1) {
|
||||
html += `<button onclick="emitAllExpenses(${data.results.length})" class="btn btn-success mt-3">➕ Dodaj wszystkie</button>`;
|
||||
}
|
||||
|
||||
resultsDiv.innerHTML = html;
|
||||
window._ocr_results = data.results;
|
||||
|
||||
} catch (err) {
|
||||
resultsDiv.innerHTML = `<div class="text-danger">❌ Wystąpił błąd podczas analizy.</div>`;
|
||||
console.error(err);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function emitExpense(i) {
|
||||
const r = window._ocr_results[i];
|
||||
const val = parseFloat(document.getElementById(`amount-${i}`).value);
|
||||
if (!isNaN(val) && val > 0) {
|
||||
socket.emit('add_expense', {
|
||||
list_id: LIST_ID,
|
||||
amount: val
|
||||
|
||||
});
|
||||
document.getElementById(`amount-${i}`).disabled = true;
|
||||
}
|
||||
}
|
||||
|
||||
function emitAllExpenses(n) {
|
||||
for (let i = 0; i < n; i++) {
|
||||
emitExpense(i);
|
||||
}
|
||||
}
|
@@ -106,6 +106,21 @@
|
||||
<div class="collapse" id="receiptSection">
|
||||
{% set receipt_pattern = 'list_' ~ list.id %}
|
||||
|
||||
{% if receipt_files %}
|
||||
<hr>
|
||||
<div class="mt-3 p-3 border border-secondary rounded bg-dark text-white" id="receiptAnalysisBlock">
|
||||
<h5>🧠 Analiza paragonów (OCR)</h5>
|
||||
<p class="text-small">System spróbuje automatycznie rozpoznać kwoty z dodanych paragonów.</p>
|
||||
|
||||
<button id="analyzeBtn" class="btn btn-outline-info mb-3">
|
||||
🔍 Zleć analizę OCR
|
||||
</button>
|
||||
|
||||
<div id="analysisResults" class="mt-2"></div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
|
||||
<h5 class="mt-4">📸 Paragony dodane do tej listy</h5>
|
||||
|
||||
<div class="row g-3 mt-2" id="receiptGallery">
|
||||
@@ -192,7 +207,7 @@
|
||||
<script src="{{ url_for('static_bp.serve_js', filename='clickable_row.js') }}"></script>
|
||||
<script src="{{ url_for('static_bp.serve_js', filename='receipt_section.js') }}"></script>
|
||||
<script src="{{ url_for('static_bp.serve_js', filename='receipt_upload.js') }}"></script>
|
||||
|
||||
<script src="{{ url_for('static_bp.serve_js', filename='receipt_analysis.js') }}"></script>
|
||||
<script>
|
||||
setupList({{ list.id }}, '{{ current_user.username if current_user.is_authenticated else 'Gość' }}');
|
||||
</script>
|
||||
|
Reference in New Issue
Block a user