Spaces:

soiz1
/

paddle-ocr

Sleeping

App Files Files Community

paddle-ocr / app.py

soiz1

Update app.py

08db83f verified about 1 month ago

raw

history blame contribute delete

9.53 kB

	# app.py
	from flask import Flask, request, jsonify
	from paddleocr import PaddleOCR
	import base64
	import re
	import tempfile
	import os
	import logging
	from typing import Any, List, Dict

	app = Flask(__name__)
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	def dataurl_to_imagefile(dataurl: str) -> str:
	match = re.match(r"data:image/(png\|jpeg\|jpg);base64,(.*)", dataurl, re.DOTALL \| re.IGNORECASE)
	if not match:
	raise ValueError("Invalid data URL format. Expected: data:image/(png\|jpeg\|jpg);base64,...")
	ext = match.group(1).lower()
	b64data = match.group(2)
	try:
	img_data = base64.b64decode(b64data)
	except Exception as e:
	raise ValueError(f"Base64 decode failed: {e}")
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}")
	try:
	tmp.write(img_data)
	tmp.close()
	except Exception as e:
	tmp.close()
	os.unlink(tmp.name)
	raise IOError(f"Failed to write temp file: {e}")
	return tmp.name

	def parse_ocr_result(result: Any) -> List[Dict[str, Any]]:
	"""
	PaddleOCR の戻り値はバージョンや呼び出し方で様々。
	代表的なパターンをできるだけ広く扱って平坦なリストを返す。
	出力要素: {"text": str, "confidence": float\|None, "box": list\|None}
	"""
	out = []

	# helper to append safely
	def push(text, conf=None, box=None):
	if text is None:
	return
	out.append({"text": text, "confidence": conf, "box": box})

	# If dict-like with pages/data
	if isinstance(result, dict):
	# try common keys
	pages = result.get("data") or result.get("pages") or result.get("results") or []
	if isinstance(pages, list) and pages:
	for p in pages:
	# page may be dict or list
	if isinstance(p, dict) and "data" in p:
	for line in p["data"]:
	if isinstance(line, dict):
	push(line.get("text") or line.get("content"), line.get("confidence"), line.get("box") or line.get("bbox"))
	else:
	# fallback to generic attempt
	try:
	text = line[1] if len(line) > 1 else None
	conf = line[2] if len(line) > 2 else None
	box = line[0] if len(line) > 0 else None
	push(text, conf, box)
	except Exception:
	continue
	elif isinstance(p, list):
	# treat as list-of-lines
	for line in p:
	if isinstance(line, dict):
	push(line.get("text") or line.get("content"), line.get("confidence"), line.get("box") or line.get("bbox"))
	elif isinstance(line, (list, tuple)):
	# pattern: [box, (text, conf)] OR [box, text, conf]
	try:
	if len(line) >= 2 and isinstance(line[1], (list, tuple)) and len(line[1]) >= 2:
	push(line[1][0], line[1][1], line[0])
	elif len(line) >= 3 and isinstance(line[1], str):
	push(line[1], line[2], line[0])
	else:
	push(str(line), None, None)
	except Exception:
	continue
	else:
	# unknown page type, try to coerce
	push(str(p), None, None)
	else:
	# dict but no pages -> maybe single result structure
	if "text" in result:
	push(result.get("text"), result.get("confidence"), result.get("box"))
	else:
	# fallback: stringify
	push(str(result), None, None)
	return out

	# If list-like
	if isinstance(result, list):
	# could be:
	# - [[ [box], (text, conf) ], ...] (single page)
	# - [ page1_lines, page2_lines, ... ] where each page_lines is above
	# - [ {dict}, {dict} ... ]
	for page in result:
	# if page is list of lines
	if isinstance(page, list):
	for line in page:
	if isinstance(line, dict):
	push(line.get("text") or line.get("content"), line.get("confidence"), line.get("box") or line.get("bbox"))
	elif isinstance(line, (list, tuple)):
	# common patterns
	try:
	# pattern: [box, (text, conf)]
	if len(line) >= 2 and isinstance(line[1], (list, tuple)) and len(line[1]) >= 2:
	push(line[1][0], line[1][1], line[0])
	# pattern: [box, text, conf]
	elif len(line) >= 3 and isinstance(line[1], str):
	push(line[1], line[2], line[0])
	else:
	# last resort: try to find any string inside
	found_text = None
	for el in line:
	if isinstance(el, str):
	found_text = el
	break
	push(found_text, None, line[0] if line else None)
	except Exception:
	continue
	elif isinstance(page, dict):
	if "text" in page:
	push(page.get("text"), page.get("confidence"), page.get("box"))
	else:
	# maybe single result block
	push(str(page), None, None)
	else:
	# neither list nor dict, try coerce
	push(str(page), None, None)
	return out

	# fallback: unknown type -> stringify
	push(str(result), None, None)
	return out

	@app.route("/ocr", methods=["POST"])
	def ocr_api():
	img_path = None
	try:
	# Accept both JSON and non-json but prefer JSON
	if not request.is_json:
	# try to parse body as json anyway
	try:
	req_json = request.get_json(force=True)
	except Exception:
	return jsonify({"error": "Content-Type must be application/json or body must be valid JSON"}), 400
	else:
	req_json = request.get_json(force=True)

	if not isinstance(req_json, dict):
	return jsonify({"error": "Request body must be a JSON object"}), 400

	logger.info("Received JSON keys: %s", list(req_json.keys()))

	dataurl = req_json.get("image")
	if not dataurl or not isinstance(dataurl, str):
	return jsonify({"error": "Missing or invalid 'image' field (expect data:image/...;base64,...)"}), 400

	# boolean params: validate strictly
	def get_bool_param(key: str, default: bool = False) -> bool:
	val = req_json.get(key, default)
	if not isinstance(val, bool):
	raise ValueError(f"Parameter '{key}' must be boolean")
	return val

	use_doc_orientation_classify = get_bool_param("use_doc_orientation_classify", False)
	use_doc_unwarping = get_bool_param("use_doc_unwarping", False)
	use_textline_orientation = get_bool_param("use_textline_orientation", False)

	lang = req_json.get("lang", "japan")
	if not isinstance(lang, str):
	return jsonify({"error": "Parameter 'lang' must be a string"}), 400

	ocr_params = {
	"use_doc_orientation_classify": use_doc_orientation_classify,
	"use_doc_unwarping": use_doc_unwarping,
	"use_textline_orientation": use_textline_orientation,
	"lang": lang,
	}

	img_path = dataurl_to_imagefile(dataurl)

	# create PaddleOCR instance
	# Note: PaddleOCR constructor may accept different params; pass only known ones
	ocr = PaddleOCR(**ocr_params)

	# choose call method depending on available API
	if hasattr(ocr, "ocr"):
	# classic API: ocr.ocr(image_path, cls=True/False)
	result = ocr.predict(img_path, det=True, rec=True)
	elif hasattr(ocr, "predict"):
	# some wrappers use predict()
	result = ocr.predict(img_path)
	else:
	raise RuntimeError("No usable OCR method found on PaddleOCR instance")

	parsed = parse_ocr_result(result)

	return jsonify({"results": parsed})

	except ValueError as e:
	logger.error("ValueError: %s", e)
	return jsonify({"error": f"Value error: {e}"}), 400
	except IOError as e:
	logger.error("IOError: %s", e)
	return jsonify({"error": f"I/O error: {e}"}), 500
	except Exception as e:
	logger.exception("Unexpected error")
	return jsonify({"error": f"Internal server error: {str(e)}"}), 500
	finally:
	if img_path and os.path.exists(img_path):
	try:
	os.remove(img_path)
	except Exception as e:
	logger.warning("Failed to delete temp file %s: %s", img_path, e)

	if __name__ == "__main__":
	app.run(host="0.0.0.0", port=7860)