Spaces:
Sleeping
Sleeping
File size: 9,528 Bytes
b52aac6 02cf497 39d7e0d b8d9d04 b52aac6 02cf497 b8d9d04 b52aac6 b8d9d04 b52aac6 02cf497 b52aac6 39d7e0d b8d9d04 b52aac6 b8d9d04 b52aac6 b8d9d04 b52aac6 02cf497 b8d9d04 02cf497 b52aac6 b8d9d04 b52aac6 39d7e0d b52aac6 b8d9d04 39d7e0d b8d9d04 b52aac6 b8d9d04 b52aac6 b8d9d04 02cf497 39d7e0d b8d9d04 39d7e0d b52aac6 39d7e0d 02cf497 b52aac6 08db83f b52aac6 39d7e0d b52aac6 02cf497 b8d9d04 b52aac6 b8d9d04 b52aac6 b8d9d04 02cf497 b52aac6 b8d9d04 b52aac6 02cf497 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 |
# app.py
from flask import Flask, request, jsonify
from paddleocr import PaddleOCR
import base64
import re
import tempfile
import os
import logging
from typing import Any, List, Dict
app = Flask(__name__)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def dataurl_to_imagefile(dataurl: str) -> str:
match = re.match(r"data:image/(png|jpeg|jpg);base64,(.*)", dataurl, re.DOTALL | re.IGNORECASE)
if not match:
raise ValueError("Invalid data URL format. Expected: data:image/(png|jpeg|jpg);base64,...")
ext = match.group(1).lower()
b64data = match.group(2)
try:
img_data = base64.b64decode(b64data)
except Exception as e:
raise ValueError(f"Base64 decode failed: {e}")
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}")
try:
tmp.write(img_data)
tmp.close()
except Exception as e:
tmp.close()
os.unlink(tmp.name)
raise IOError(f"Failed to write temp file: {e}")
return tmp.name
def parse_ocr_result(result: Any) -> List[Dict[str, Any]]:
"""
PaddleOCR の戻り値はバージョンや呼び出し方で様々。
代表的なパターンをできるだけ広く扱って平坦なリストを返す。
出力要素: {"text": str, "confidence": float|None, "box": list|None}
"""
out = []
# helper to append safely
def push(text, conf=None, box=None):
if text is None:
return
out.append({"text": text, "confidence": conf, "box": box})
# If dict-like with pages/data
if isinstance(result, dict):
# try common keys
pages = result.get("data") or result.get("pages") or result.get("results") or []
if isinstance(pages, list) and pages:
for p in pages:
# page may be dict or list
if isinstance(p, dict) and "data" in p:
for line in p["data"]:
if isinstance(line, dict):
push(line.get("text") or line.get("content"), line.get("confidence"), line.get("box") or line.get("bbox"))
else:
# fallback to generic attempt
try:
text = line[1] if len(line) > 1 else None
conf = line[2] if len(line) > 2 else None
box = line[0] if len(line) > 0 else None
push(text, conf, box)
except Exception:
continue
elif isinstance(p, list):
# treat as list-of-lines
for line in p:
if isinstance(line, dict):
push(line.get("text") or line.get("content"), line.get("confidence"), line.get("box") or line.get("bbox"))
elif isinstance(line, (list, tuple)):
# pattern: [box, (text, conf)] OR [box, text, conf]
try:
if len(line) >= 2 and isinstance(line[1], (list, tuple)) and len(line[1]) >= 2:
push(line[1][0], line[1][1], line[0])
elif len(line) >= 3 and isinstance(line[1], str):
push(line[1], line[2], line[0])
else:
push(str(line), None, None)
except Exception:
continue
else:
# unknown page type, try to coerce
push(str(p), None, None)
else:
# dict but no pages -> maybe single result structure
if "text" in result:
push(result.get("text"), result.get("confidence"), result.get("box"))
else:
# fallback: stringify
push(str(result), None, None)
return out
# If list-like
if isinstance(result, list):
# could be:
# - [[ [box], (text, conf) ], ...] (single page)
# - [ page1_lines, page2_lines, ... ] where each page_lines is above
# - [ {dict}, {dict} ... ]
for page in result:
# if page is list of lines
if isinstance(page, list):
for line in page:
if isinstance(line, dict):
push(line.get("text") or line.get("content"), line.get("confidence"), line.get("box") or line.get("bbox"))
elif isinstance(line, (list, tuple)):
# common patterns
try:
# pattern: [box, (text, conf)]
if len(line) >= 2 and isinstance(line[1], (list, tuple)) and len(line[1]) >= 2:
push(line[1][0], line[1][1], line[0])
# pattern: [box, text, conf]
elif len(line) >= 3 and isinstance(line[1], str):
push(line[1], line[2], line[0])
else:
# last resort: try to find any string inside
found_text = None
for el in line:
if isinstance(el, str):
found_text = el
break
push(found_text, None, line[0] if line else None)
except Exception:
continue
elif isinstance(page, dict):
if "text" in page:
push(page.get("text"), page.get("confidence"), page.get("box"))
else:
# maybe single result block
push(str(page), None, None)
else:
# neither list nor dict, try coerce
push(str(page), None, None)
return out
# fallback: unknown type -> stringify
push(str(result), None, None)
return out
@app.route("/ocr", methods=["POST"])
def ocr_api():
img_path = None
try:
# Accept both JSON and non-json but prefer JSON
if not request.is_json:
# try to parse body as json anyway
try:
req_json = request.get_json(force=True)
except Exception:
return jsonify({"error": "Content-Type must be application/json or body must be valid JSON"}), 400
else:
req_json = request.get_json(force=True)
if not isinstance(req_json, dict):
return jsonify({"error": "Request body must be a JSON object"}), 400
logger.info("Received JSON keys: %s", list(req_json.keys()))
dataurl = req_json.get("image")
if not dataurl or not isinstance(dataurl, str):
return jsonify({"error": "Missing or invalid 'image' field (expect data:image/...;base64,...)"}), 400
# boolean params: validate strictly
def get_bool_param(key: str, default: bool = False) -> bool:
val = req_json.get(key, default)
if not isinstance(val, bool):
raise ValueError(f"Parameter '{key}' must be boolean")
return val
use_doc_orientation_classify = get_bool_param("use_doc_orientation_classify", False)
use_doc_unwarping = get_bool_param("use_doc_unwarping", False)
use_textline_orientation = get_bool_param("use_textline_orientation", False)
lang = req_json.get("lang", "japan")
if not isinstance(lang, str):
return jsonify({"error": "Parameter 'lang' must be a string"}), 400
ocr_params = {
"use_doc_orientation_classify": use_doc_orientation_classify,
"use_doc_unwarping": use_doc_unwarping,
"use_textline_orientation": use_textline_orientation,
"lang": lang,
}
img_path = dataurl_to_imagefile(dataurl)
# create PaddleOCR instance
# Note: PaddleOCR constructor may accept different params; pass only known ones
ocr = PaddleOCR(**ocr_params)
# choose call method depending on available API
if hasattr(ocr, "ocr"):
# classic API: ocr.ocr(image_path, cls=True/False)
result = ocr.predict(img_path, det=True, rec=True)
elif hasattr(ocr, "predict"):
# some wrappers use predict()
result = ocr.predict(img_path)
else:
raise RuntimeError("No usable OCR method found on PaddleOCR instance")
parsed = parse_ocr_result(result)
return jsonify({"results": parsed})
except ValueError as e:
logger.error("ValueError: %s", e)
return jsonify({"error": f"Value error: {e}"}), 400
except IOError as e:
logger.error("IOError: %s", e)
return jsonify({"error": f"I/O error: {e}"}), 500
except Exception as e:
logger.exception("Unexpected error")
return jsonify({"error": f"Internal server error: {str(e)}"}), 500
finally:
if img_path and os.path.exists(img_path):
try:
os.remove(img_path)
except Exception as e:
logger.warning("Failed to delete temp file %s: %s", img_path, e)
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)
|