Spaces:
Sleeping
Sleeping
# app.py | |
from flask import Flask, request, jsonify | |
from paddleocr import PaddleOCR | |
import base64 | |
import re | |
import tempfile | |
import os | |
import logging | |
from typing import Any, List, Dict | |
app = Flask(__name__) | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
def dataurl_to_imagefile(dataurl: str) -> str: | |
match = re.match(r"data:image/(png|jpeg|jpg);base64,(.*)", dataurl, re.DOTALL | re.IGNORECASE) | |
if not match: | |
raise ValueError("Invalid data URL format. Expected: data:image/(png|jpeg|jpg);base64,...") | |
ext = match.group(1).lower() | |
b64data = match.group(2) | |
try: | |
img_data = base64.b64decode(b64data) | |
except Exception as e: | |
raise ValueError(f"Base64 decode failed: {e}") | |
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") | |
try: | |
tmp.write(img_data) | |
tmp.close() | |
except Exception as e: | |
tmp.close() | |
os.unlink(tmp.name) | |
raise IOError(f"Failed to write temp file: {e}") | |
return tmp.name | |
def parse_ocr_result(result: Any) -> List[Dict[str, Any]]: | |
""" | |
PaddleOCR の戻り値はバージョンや呼び出し方で様々。 | |
代表的なパターンをできるだけ広く扱って平坦なリストを返す。 | |
出力要素: {"text": str, "confidence": float|None, "box": list|None} | |
""" | |
out = [] | |
# helper to append safely | |
def push(text, conf=None, box=None): | |
if text is None: | |
return | |
out.append({"text": text, "confidence": conf, "box": box}) | |
# If dict-like with pages/data | |
if isinstance(result, dict): | |
# try common keys | |
pages = result.get("data") or result.get("pages") or result.get("results") or [] | |
if isinstance(pages, list) and pages: | |
for p in pages: | |
# page may be dict or list | |
if isinstance(p, dict) and "data" in p: | |
for line in p["data"]: | |
if isinstance(line, dict): | |
push(line.get("text") or line.get("content"), line.get("confidence"), line.get("box") or line.get("bbox")) | |
else: | |
# fallback to generic attempt | |
try: | |
text = line[1] if len(line) > 1 else None | |
conf = line[2] if len(line) > 2 else None | |
box = line[0] if len(line) > 0 else None | |
push(text, conf, box) | |
except Exception: | |
continue | |
elif isinstance(p, list): | |
# treat as list-of-lines | |
for line in p: | |
if isinstance(line, dict): | |
push(line.get("text") or line.get("content"), line.get("confidence"), line.get("box") or line.get("bbox")) | |
elif isinstance(line, (list, tuple)): | |
# pattern: [box, (text, conf)] OR [box, text, conf] | |
try: | |
if len(line) >= 2 and isinstance(line[1], (list, tuple)) and len(line[1]) >= 2: | |
push(line[1][0], line[1][1], line[0]) | |
elif len(line) >= 3 and isinstance(line[1], str): | |
push(line[1], line[2], line[0]) | |
else: | |
push(str(line), None, None) | |
except Exception: | |
continue | |
else: | |
# unknown page type, try to coerce | |
push(str(p), None, None) | |
else: | |
# dict but no pages -> maybe single result structure | |
if "text" in result: | |
push(result.get("text"), result.get("confidence"), result.get("box")) | |
else: | |
# fallback: stringify | |
push(str(result), None, None) | |
return out | |
# If list-like | |
if isinstance(result, list): | |
# could be: | |
# - [[ [box], (text, conf) ], ...] (single page) | |
# - [ page1_lines, page2_lines, ... ] where each page_lines is above | |
# - [ {dict}, {dict} ... ] | |
for page in result: | |
# if page is list of lines | |
if isinstance(page, list): | |
for line in page: | |
if isinstance(line, dict): | |
push(line.get("text") or line.get("content"), line.get("confidence"), line.get("box") or line.get("bbox")) | |
elif isinstance(line, (list, tuple)): | |
# common patterns | |
try: | |
# pattern: [box, (text, conf)] | |
if len(line) >= 2 and isinstance(line[1], (list, tuple)) and len(line[1]) >= 2: | |
push(line[1][0], line[1][1], line[0]) | |
# pattern: [box, text, conf] | |
elif len(line) >= 3 and isinstance(line[1], str): | |
push(line[1], line[2], line[0]) | |
else: | |
# last resort: try to find any string inside | |
found_text = None | |
for el in line: | |
if isinstance(el, str): | |
found_text = el | |
break | |
push(found_text, None, line[0] if line else None) | |
except Exception: | |
continue | |
elif isinstance(page, dict): | |
if "text" in page: | |
push(page.get("text"), page.get("confidence"), page.get("box")) | |
else: | |
# maybe single result block | |
push(str(page), None, None) | |
else: | |
# neither list nor dict, try coerce | |
push(str(page), None, None) | |
return out | |
# fallback: unknown type -> stringify | |
push(str(result), None, None) | |
return out | |
def ocr_api(): | |
img_path = None | |
try: | |
# Accept both JSON and non-json but prefer JSON | |
if not request.is_json: | |
# try to parse body as json anyway | |
try: | |
req_json = request.get_json(force=True) | |
except Exception: | |
return jsonify({"error": "Content-Type must be application/json or body must be valid JSON"}), 400 | |
else: | |
req_json = request.get_json(force=True) | |
if not isinstance(req_json, dict): | |
return jsonify({"error": "Request body must be a JSON object"}), 400 | |
logger.info("Received JSON keys: %s", list(req_json.keys())) | |
dataurl = req_json.get("image") | |
if not dataurl or not isinstance(dataurl, str): | |
return jsonify({"error": "Missing or invalid 'image' field (expect data:image/...;base64,...)"}), 400 | |
# boolean params: validate strictly | |
def get_bool_param(key: str, default: bool = False) -> bool: | |
val = req_json.get(key, default) | |
if not isinstance(val, bool): | |
raise ValueError(f"Parameter '{key}' must be boolean") | |
return val | |
use_doc_orientation_classify = get_bool_param("use_doc_orientation_classify", False) | |
use_doc_unwarping = get_bool_param("use_doc_unwarping", False) | |
use_textline_orientation = get_bool_param("use_textline_orientation", False) | |
lang = req_json.get("lang", "japan") | |
if not isinstance(lang, str): | |
return jsonify({"error": "Parameter 'lang' must be a string"}), 400 | |
ocr_params = { | |
"use_doc_orientation_classify": use_doc_orientation_classify, | |
"use_doc_unwarping": use_doc_unwarping, | |
"use_textline_orientation": use_textline_orientation, | |
"lang": lang, | |
} | |
img_path = dataurl_to_imagefile(dataurl) | |
# create PaddleOCR instance | |
# Note: PaddleOCR constructor may accept different params; pass only known ones | |
ocr = PaddleOCR(**ocr_params) | |
# choose call method depending on available API | |
if hasattr(ocr, "ocr"): | |
# classic API: ocr.ocr(image_path, cls=True/False) | |
result = ocr.predict(img_path, det=True, rec=True) | |
elif hasattr(ocr, "predict"): | |
# some wrappers use predict() | |
result = ocr.predict(img_path) | |
else: | |
raise RuntimeError("No usable OCR method found on PaddleOCR instance") | |
parsed = parse_ocr_result(result) | |
return jsonify({"results": parsed}) | |
except ValueError as e: | |
logger.error("ValueError: %s", e) | |
return jsonify({"error": f"Value error: {e}"}), 400 | |
except IOError as e: | |
logger.error("IOError: %s", e) | |
return jsonify({"error": f"I/O error: {e}"}), 500 | |
except Exception as e: | |
logger.exception("Unexpected error") | |
return jsonify({"error": f"Internal server error: {str(e)}"}), 500 | |
finally: | |
if img_path and os.path.exists(img_path): | |
try: | |
os.remove(img_path) | |
except Exception as e: | |
logger.warning("Failed to delete temp file %s: %s", img_path, e) | |
if __name__ == "__main__": | |
app.run(host="0.0.0.0", port=7860) | |