# app.py from flask import Flask, request, jsonify from paddleocr import PaddleOCR import base64 import re import tempfile import os import logging from typing import Any, List, Dict app = Flask(__name__) logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def dataurl_to_imagefile(dataurl: str) -> str: match = re.match(r"data:image/(png|jpeg|jpg);base64,(.*)", dataurl, re.DOTALL | re.IGNORECASE) if not match: raise ValueError("Invalid data URL format. Expected: data:image/(png|jpeg|jpg);base64,...") ext = match.group(1).lower() b64data = match.group(2) try: img_data = base64.b64decode(b64data) except Exception as e: raise ValueError(f"Base64 decode failed: {e}") tmp = tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") try: tmp.write(img_data) tmp.close() except Exception as e: tmp.close() os.unlink(tmp.name) raise IOError(f"Failed to write temp file: {e}") return tmp.name def parse_ocr_result(result: Any) -> List[Dict[str, Any]]: """ PaddleOCR の戻り値はバージョンや呼び出し方で様々。 代表的なパターンをできるだけ広く扱って平坦なリストを返す。 出力要素: {"text": str, "confidence": float|None, "box": list|None} """ out = [] # helper to append safely def push(text, conf=None, box=None): if text is None: return out.append({"text": text, "confidence": conf, "box": box}) # If dict-like with pages/data if isinstance(result, dict): # try common keys pages = result.get("data") or result.get("pages") or result.get("results") or [] if isinstance(pages, list) and pages: for p in pages: # page may be dict or list if isinstance(p, dict) and "data" in p: for line in p["data"]: if isinstance(line, dict): push(line.get("text") or line.get("content"), line.get("confidence"), line.get("box") or line.get("bbox")) else: # fallback to generic attempt try: text = line[1] if len(line) > 1 else None conf = line[2] if len(line) > 2 else None box = line[0] if len(line) > 0 else None push(text, conf, box) except Exception: continue elif isinstance(p, list): # treat as list-of-lines for line in p: if isinstance(line, dict): push(line.get("text") or line.get("content"), line.get("confidence"), line.get("box") or line.get("bbox")) elif isinstance(line, (list, tuple)): # pattern: [box, (text, conf)] OR [box, text, conf] try: if len(line) >= 2 and isinstance(line[1], (list, tuple)) and len(line[1]) >= 2: push(line[1][0], line[1][1], line[0]) elif len(line) >= 3 and isinstance(line[1], str): push(line[1], line[2], line[0]) else: push(str(line), None, None) except Exception: continue else: # unknown page type, try to coerce push(str(p), None, None) else: # dict but no pages -> maybe single result structure if "text" in result: push(result.get("text"), result.get("confidence"), result.get("box")) else: # fallback: stringify push(str(result), None, None) return out # If list-like if isinstance(result, list): # could be: # - [[ [box], (text, conf) ], ...] (single page) # - [ page1_lines, page2_lines, ... ] where each page_lines is above # - [ {dict}, {dict} ... ] for page in result: # if page is list of lines if isinstance(page, list): for line in page: if isinstance(line, dict): push(line.get("text") or line.get("content"), line.get("confidence"), line.get("box") or line.get("bbox")) elif isinstance(line, (list, tuple)): # common patterns try: # pattern: [box, (text, conf)] if len(line) >= 2 and isinstance(line[1], (list, tuple)) and len(line[1]) >= 2: push(line[1][0], line[1][1], line[0]) # pattern: [box, text, conf] elif len(line) >= 3 and isinstance(line[1], str): push(line[1], line[2], line[0]) else: # last resort: try to find any string inside found_text = None for el in line: if isinstance(el, str): found_text = el break push(found_text, None, line[0] if line else None) except Exception: continue elif isinstance(page, dict): if "text" in page: push(page.get("text"), page.get("confidence"), page.get("box")) else: # maybe single result block push(str(page), None, None) else: # neither list nor dict, try coerce push(str(page), None, None) return out # fallback: unknown type -> stringify push(str(result), None, None) return out @app.route("/ocr", methods=["POST"]) def ocr_api(): img_path = None try: # Accept both JSON and non-json but prefer JSON if not request.is_json: # try to parse body as json anyway try: req_json = request.get_json(force=True) except Exception: return jsonify({"error": "Content-Type must be application/json or body must be valid JSON"}), 400 else: req_json = request.get_json(force=True) if not isinstance(req_json, dict): return jsonify({"error": "Request body must be a JSON object"}), 400 logger.info("Received JSON keys: %s", list(req_json.keys())) dataurl = req_json.get("image") if not dataurl or not isinstance(dataurl, str): return jsonify({"error": "Missing or invalid 'image' field (expect data:image/...;base64,...)"}), 400 # boolean params: validate strictly def get_bool_param(key: str, default: bool = False) -> bool: val = req_json.get(key, default) if not isinstance(val, bool): raise ValueError(f"Parameter '{key}' must be boolean") return val use_doc_orientation_classify = get_bool_param("use_doc_orientation_classify", False) use_doc_unwarping = get_bool_param("use_doc_unwarping", False) use_textline_orientation = get_bool_param("use_textline_orientation", False) lang = req_json.get("lang", "japan") if not isinstance(lang, str): return jsonify({"error": "Parameter 'lang' must be a string"}), 400 ocr_params = { "use_doc_orientation_classify": use_doc_orientation_classify, "use_doc_unwarping": use_doc_unwarping, "use_textline_orientation": use_textline_orientation, "lang": lang, } img_path = dataurl_to_imagefile(dataurl) # create PaddleOCR instance # Note: PaddleOCR constructor may accept different params; pass only known ones ocr = PaddleOCR(**ocr_params) # choose call method depending on available API if hasattr(ocr, "ocr"): # classic API: ocr.ocr(image_path, cls=True/False) result = ocr.predict(img_path, det=True, rec=True) elif hasattr(ocr, "predict"): # some wrappers use predict() result = ocr.predict(img_path) else: raise RuntimeError("No usable OCR method found on PaddleOCR instance") parsed = parse_ocr_result(result) return jsonify({"results": parsed}) except ValueError as e: logger.error("ValueError: %s", e) return jsonify({"error": f"Value error: {e}"}), 400 except IOError as e: logger.error("IOError: %s", e) return jsonify({"error": f"I/O error: {e}"}), 500 except Exception as e: logger.exception("Unexpected error") return jsonify({"error": f"Internal server error: {str(e)}"}), 500 finally: if img_path and os.path.exists(img_path): try: os.remove(img_path) except Exception as e: logger.warning("Failed to delete temp file %s: %s", img_path, e) if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)