File size: 9,528 Bytes
b52aac6
02cf497
 
 
 
39d7e0d
 
b8d9d04
b52aac6
02cf497
 
b8d9d04
b52aac6
b8d9d04
b52aac6
 
02cf497
b52aac6
 
39d7e0d
b8d9d04
 
 
b52aac6
 
b8d9d04
b52aac6
 
b8d9d04
b52aac6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02cf497
 
 
b8d9d04
02cf497
b52aac6
b8d9d04
b52aac6
 
 
 
 
 
 
39d7e0d
b52aac6
 
 
 
b8d9d04
39d7e0d
b8d9d04
b52aac6
b8d9d04
b52aac6
 
b8d9d04
 
 
 
 
 
 
 
 
 
 
 
02cf497
39d7e0d
b8d9d04
 
 
 
39d7e0d
 
 
 
b52aac6
 
39d7e0d
02cf497
b52aac6
 
 
08db83f
b52aac6
 
 
 
 
39d7e0d
b52aac6
 
 
02cf497
b8d9d04
b52aac6
b8d9d04
 
b52aac6
b8d9d04
02cf497
b52aac6
b8d9d04
 
 
 
 
 
b52aac6
02cf497
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
# app.py
from flask import Flask, request, jsonify
from paddleocr import PaddleOCR
import base64
import re
import tempfile
import os
import logging
from typing import Any, List, Dict

app = Flask(__name__)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def dataurl_to_imagefile(dataurl: str) -> str:
    match = re.match(r"data:image/(png|jpeg|jpg);base64,(.*)", dataurl, re.DOTALL | re.IGNORECASE)
    if not match:
        raise ValueError("Invalid data URL format. Expected: data:image/(png|jpeg|jpg);base64,...")
    ext = match.group(1).lower()
    b64data = match.group(2)
    try:
        img_data = base64.b64decode(b64data)
    except Exception as e:
        raise ValueError(f"Base64 decode failed: {e}")
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}")
    try:
        tmp.write(img_data)
        tmp.close()
    except Exception as e:
        tmp.close()
        os.unlink(tmp.name)
        raise IOError(f"Failed to write temp file: {e}")
    return tmp.name

def parse_ocr_result(result: Any) -> List[Dict[str, Any]]:
    """
    PaddleOCR の戻り値はバージョンや呼び出し方で様々。
    代表的なパターンをできるだけ広く扱って平坦なリストを返す。
    出力要素: {"text": str, "confidence": float|None, "box": list|None}
    """
    out = []

    # helper to append safely
    def push(text, conf=None, box=None):
        if text is None:
            return
        out.append({"text": text, "confidence": conf, "box": box})

    # If dict-like with pages/data
    if isinstance(result, dict):
        # try common keys
        pages = result.get("data") or result.get("pages") or result.get("results") or []
        if isinstance(pages, list) and pages:
            for p in pages:
                # page may be dict or list
                if isinstance(p, dict) and "data" in p:
                    for line in p["data"]:
                        if isinstance(line, dict):
                            push(line.get("text") or line.get("content"), line.get("confidence"), line.get("box") or line.get("bbox"))
                        else:
                            # fallback to generic attempt
                            try:
                                text = line[1] if len(line) > 1 else None
                                conf = line[2] if len(line) > 2 else None
                                box = line[0] if len(line) > 0 else None
                                push(text, conf, box)
                            except Exception:
                                continue
                elif isinstance(p, list):
                    # treat as list-of-lines
                    for line in p:
                        if isinstance(line, dict):
                            push(line.get("text") or line.get("content"), line.get("confidence"), line.get("box") or line.get("bbox"))
                        elif isinstance(line, (list, tuple)):
                            # pattern: [box, (text, conf)]  OR  [box, text, conf]
                            try:
                                if len(line) >= 2 and isinstance(line[1], (list, tuple)) and len(line[1]) >= 2:
                                    push(line[1][0], line[1][1], line[0])
                                elif len(line) >= 3 and isinstance(line[1], str):
                                    push(line[1], line[2], line[0])
                                else:
                                    push(str(line), None, None)
                            except Exception:
                                continue
                else:
                    # unknown page type, try to coerce
                    push(str(p), None, None)
        else:
            # dict but no pages -> maybe single result structure
            if "text" in result:
                push(result.get("text"), result.get("confidence"), result.get("box"))
            else:
                # fallback: stringify
                push(str(result), None, None)
        return out

    # If list-like
    if isinstance(result, list):
        # could be:
        #  - [[ [box], (text, conf) ], ...]   (single page)
        #  - [ page1_lines, page2_lines, ... ] where each page_lines is above
        #  - [ {dict}, {dict} ... ]
        for page in result:
            # if page is list of lines
            if isinstance(page, list):
                for line in page:
                    if isinstance(line, dict):
                        push(line.get("text") or line.get("content"), line.get("confidence"), line.get("box") or line.get("bbox"))
                    elif isinstance(line, (list, tuple)):
                        # common patterns
                        try:
                            # pattern: [box, (text, conf)]
                            if len(line) >= 2 and isinstance(line[1], (list, tuple)) and len(line[1]) >= 2:
                                push(line[1][0], line[1][1], line[0])
                            # pattern: [box, text, conf]
                            elif len(line) >= 3 and isinstance(line[1], str):
                                push(line[1], line[2], line[0])
                            else:
                                # last resort: try to find any string inside
                                found_text = None
                                for el in line:
                                    if isinstance(el, str):
                                        found_text = el
                                        break
                                push(found_text, None, line[0] if line else None)
                        except Exception:
                            continue
            elif isinstance(page, dict):
                if "text" in page:
                    push(page.get("text"), page.get("confidence"), page.get("box"))
                else:
                    # maybe single result block
                    push(str(page), None, None)
            else:
                # neither list nor dict, try coerce
                push(str(page), None, None)
        return out

    # fallback: unknown type -> stringify
    push(str(result), None, None)
    return out

@app.route("/ocr", methods=["POST"])
def ocr_api():
    img_path = None
    try:
        # Accept both JSON and non-json but prefer JSON
        if not request.is_json:
            # try to parse body as json anyway
            try:
                req_json = request.get_json(force=True)
            except Exception:
                return jsonify({"error": "Content-Type must be application/json or body must be valid JSON"}), 400
        else:
            req_json = request.get_json(force=True)

        if not isinstance(req_json, dict):
            return jsonify({"error": "Request body must be a JSON object"}), 400

        logger.info("Received JSON keys: %s", list(req_json.keys()))

        dataurl = req_json.get("image")
        if not dataurl or not isinstance(dataurl, str):
            return jsonify({"error": "Missing or invalid 'image' field (expect data:image/...;base64,...)"}), 400

        # boolean params: validate strictly
        def get_bool_param(key: str, default: bool = False) -> bool:
            val = req_json.get(key, default)
            if not isinstance(val, bool):
                raise ValueError(f"Parameter '{key}' must be boolean")
            return val

        use_doc_orientation_classify = get_bool_param("use_doc_orientation_classify", False)
        use_doc_unwarping = get_bool_param("use_doc_unwarping", False)
        use_textline_orientation = get_bool_param("use_textline_orientation", False)

        lang = req_json.get("lang", "japan")
        if not isinstance(lang, str):
            return jsonify({"error": "Parameter 'lang' must be a string"}), 400

        ocr_params = {
            "use_doc_orientation_classify": use_doc_orientation_classify,
            "use_doc_unwarping": use_doc_unwarping,
            "use_textline_orientation": use_textline_orientation,
            "lang": lang,
        }

        img_path = dataurl_to_imagefile(dataurl)

        # create PaddleOCR instance
        # Note: PaddleOCR constructor may accept different params; pass only known ones
        ocr = PaddleOCR(**ocr_params)

        # choose call method depending on available API
        if hasattr(ocr, "ocr"):
            # classic API: ocr.ocr(image_path, cls=True/False)
            result = ocr.predict(img_path, det=True, rec=True)
        elif hasattr(ocr, "predict"):
            # some wrappers use predict()
            result = ocr.predict(img_path)
        else:
            raise RuntimeError("No usable OCR method found on PaddleOCR instance")

        parsed = parse_ocr_result(result)

        return jsonify({"results": parsed})

    except ValueError as e:
        logger.error("ValueError: %s", e)
        return jsonify({"error": f"Value error: {e}"}), 400
    except IOError as e:
        logger.error("IOError: %s", e)
        return jsonify({"error": f"I/O error: {e}"}), 500
    except Exception as e:
        logger.exception("Unexpected error")
        return jsonify({"error": f"Internal server error: {str(e)}"}), 500
    finally:
        if img_path and os.path.exists(img_path):
            try:
                os.remove(img_path)
            except Exception as e:
                logger.warning("Failed to delete temp file %s: %s", img_path, e)

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)