guoshengjian commited on
Commit
565ceb9
·
1 Parent(s): f4b31d0
Files changed (4) hide show
  1. .pre-commit-config.yaml +54 -0
  2. app.py +916 -51
  3. icon/upload.png +3 -0
  4. requirements.txt +2 -3
.pre-commit-config.yaml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v4.6.0
4
+ hooks:
5
+ - id: check-added-large-files
6
+ args: ['--maxkb=11000']
7
+ - id: check-case-conflict
8
+ - id: check-merge-conflict
9
+ - id: check-symlinks
10
+ - id: detect-private-key
11
+ - id: end-of-file-fixer
12
+ - id: trailing-whitespace
13
+ files: \.(md|c|cc|cxx|cpp|cu|h|hpp|hxx|py)$
14
+ - repo: https://github.com/Lucas-C/pre-commit-hooks
15
+ rev: v1.5.1
16
+ hooks:
17
+ - id: remove-crlf
18
+ - id: remove-tabs
19
+ files: \.(md|c|cc|cxx|cpp|cu|h|hpp|hxx|py)$
20
+ - repo: local
21
+ hooks:
22
+ - id: clang-format
23
+ name: clang-format
24
+ description: Format files with ClangFormat
25
+ entry: bash .precommit/clang_format.hook -i
26
+ language: system
27
+ files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$
28
+
29
+ # For Python files
30
+ - repo: https://github.com/psf/black.git
31
+ rev: 24.4.2
32
+ hooks:
33
+ - id: black
34
+ files: (.*\.(py|pyi|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
35
+
36
+ # Flake8
37
+ - repo: https://github.com/pycqa/flake8
38
+ rev: 7.0.0
39
+ hooks:
40
+ - id: flake8
41
+ args:
42
+ - --count
43
+ - --select=E9,F63,F7,F82,E721,F401
44
+ - --per-file-ignores=__init__.py:F401
45
+ - --show-source
46
+ - --statistics
47
+
48
+ # isort
49
+ - repo: https://github.com/pycqa/isort
50
+ rev: 5.12.0
51
+ hooks:
52
+ - id: isort
53
+ args:
54
+ - --profile=black
app.py CHANGED
@@ -1,59 +1,265 @@
 
1
  import base64
2
  import io
 
3
  import os
 
 
 
 
 
 
4
 
5
  import gradio as gr
6
  import requests
7
  from PIL import Image
8
 
9
- API_URL = "https://t7nd0cf3u89ck4bf.aistudio-hub.baidu.com/ocr"
10
- TOKEN = os.getenv("API_TOKEN", "")
11
 
 
12
 
13
- def inference(img):
14
- with io.BytesIO() as buffer:
15
- img.save(buffer, format="png")
16
- img_base64 = base64.b64encode(buffer.getvalue()).decode("ascii")
 
 
 
17
 
18
- headers = {
19
- "Authorization": f"token {TOKEN}",
20
- "Content-Type": "application/json",
21
- }
22
 
23
- response = requests.post(
24
- API_URL,
25
- json={
26
- "file": img_base64,
27
- "fileType": 1,
28
- "useDocOrientationClassify": False,
29
- "useDocUnwarping": False,
30
- "useTextlineOrientation": False,
31
- },
32
- headers=headers,
33
- timeout=1000,
34
- )
35
- response.raise_for_status()
 
 
 
 
36
 
37
- result = response.json()
38
- ocr_img_url = result["result"]["ocrResults"][0]["ocrImage"]
 
 
 
 
 
39
 
40
- response = requests.get(ocr_img_url, timeout=10)
41
- response.raise_for_status()
42
- ocr_img_base64 = Image.open(io.BytesIO(response.content))
 
 
 
 
 
43
 
44
- return ocr_img_base64, result["result"]["ocrResults"][0]["prunedResult"]
 
 
 
 
 
 
45
 
 
 
 
 
46
 
47
- title = "PP-OCRv5 Online Demo"
48
- description = """
49
- - PP-OCRv5 is the latest generation of the PP-OCR series model, designed to handle a wide range of scene and text types.
50
- - It supports five major text types: Simplified Chinese, Traditional Chinese, Pinyin annotation, English, and Japanese.
51
- - PP-OCRv5 has enhanced recognition capabilities for challenging use cases, including complex handwritten Chinese and English, vertical text, and rare characters.
52
- - To use it, simply upload your image, or click one of the examples to load them. Read more at the links below.
53
- - [Docs](https://paddlepaddle.github.io/PaddleOCR/), [Github Repository](https://github.com/PaddlePaddle/PaddleOCR).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  """
55
 
56
- examples = [
57
  ["examples/ancient_demo.png"],
58
  ["examples/handwrite_ch_demo.png"],
59
  ["examples/handwrite_en_demo.png"],
@@ -63,19 +269,678 @@ examples = [
63
  ["examples/research.png"],
64
  ["examples/tech.png"],
65
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
- css = """
68
- .output_image, .input_image {height: 40rem !important; width: 100% !important;}
69
- h1 {text-align: center !important;}
70
- """
71
 
72
- gr.Interface(
73
- inference,
74
- gr.Image(type="pil", label="Input Image"),
75
- [gr.Image(type="pil", label="Output Image"), gr.JSON(label="Output JSON", show_label=True)],
76
- title=title,
77
- description=description,
78
- examples=examples,
79
- cache_examples=False,
80
- css=css,
81
- ).launch(debug=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import atexit
2
  import base64
3
  import io
4
+ import json
5
  import os
6
+ import tempfile
7
+ import threading
8
+ import time
9
+ import uuid
10
+ import zipfile
11
+ from pathlib import Path
12
 
13
  import gradio as gr
14
  import requests
15
  from PIL import Image
16
 
17
+ API_URL = os.environ["API_URL"]
 
18
 
19
+ TOKEN = os.environ["API_TOKEN"]
20
 
21
+ TITLE = "PP-OCRv5 Online Demo"
22
+ DESCRIPTION = """
23
+ - PP-OCRv5 is the latest generation of the PP-OCR series model, designed to handle a wide range of scene and text types.
24
+ - It supports five major text types: Simplified Chinese, Traditional Chinese, Pinyin annotation, English, and Japanese.
25
+ - PP-OCRv5 has enhanced recognition capabilities for challenging use cases, including complex handwritten Chinese and English, vertical text, and rare characters.
26
+ - To use it, simply upload your image, or click one of the examples to load them. Read more at the links below.
27
+ """
28
 
29
+ TEMP_DIR = tempfile.TemporaryDirectory()
30
+ atexit.register(TEMP_DIR.cleanup)
 
 
31
 
32
+ paddle_theme = gr.themes.Soft(
33
+ font=["Roboto", "Open Sans", "Arial", "sans-serif"],
34
+ font_mono=["Fira Code", "monospace"],
35
+ )
36
+ MAX_NUM_PAGES = 10
37
+ TMP_DELETE_TIME = 900
38
+ THREAD_WAKEUP_TIME = 600
39
+ CSS = """
40
+ :root {
41
+ --sand-color: #FAF9F6;
42
+ --white: #ffffff;
43
+ --shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
44
+ --text-color: #F3F4F7;
45
+ --black:#000000;
46
+ --link-hover: #2b6cb0;
47
+ --content-width: 1200px;
48
+ }
49
 
50
+ body {
51
+ display: flex;
52
+ justify-content: center;
53
+ background-color: var(--sand-color);
54
+ color: var(--text-color);
55
+ font-family: Arial, sans-serif;
56
+ }
57
 
58
+ .upload-section {
59
+ width: 100%;
60
+ margin: 0 auto 30px;
61
+ padding: 20px;
62
+ background-color: var(--sand-color) !important;
63
+ border-radius: 8px;
64
+ box-shadow: var(--shadow);
65
+ }
66
 
67
+ .center-content {
68
+ display: flex;
69
+ flex-direction: column;
70
+ align-items: center;
71
+ text-align: center;
72
+ margin-bottom: 20px;
73
+ }
74
 
75
+ .header {
76
+ margin-bottom: 30px;
77
+ width: 100%;
78
+ }
79
 
80
+ .logo-container {
81
+ width: 100%;
82
+ margin-bottom: 20px;
83
+ }
84
+
85
+ .logo-img {
86
+ width: 100%;
87
+ max-width: var(--content-width);
88
+ margin: 0 auto;
89
+ display: block;
90
+ }
91
+
92
+ .nav-bar {
93
+ display: flex;
94
+ justify-content: center;
95
+ background-color: var(--white);
96
+ padding: 15px 0;
97
+ box-shadow: var(--shadow);
98
+ margin-bottom: 20px;
99
+ }
100
+
101
+ .nav-links {
102
+ display: flex;
103
+ gap: 30px;
104
+ width: 100%;
105
+ justify-content: center;
106
+ }
107
+
108
+ .nav-link {
109
+ color: var(--black);
110
+ text-decoration: none;
111
+ font-weight: bold;
112
+ font-size: 24px;
113
+ transition: color 0.2s;
114
+ }
115
+
116
+ .nav-link:hover {
117
+ color: var(--link-hover);
118
+ text-decoration: none;
119
+ }
120
+
121
+ button {
122
+ background-color: var(--text-color) !important;
123
+ color: var(--black) !important;
124
+ border: none !important;
125
+ border-radius: 4px;
126
+ padding: 8px 16px;
127
+ }
128
+
129
+ .file-download {
130
+ margin-top: 15px !important;
131
+ }
132
+ .loader {
133
+ border: 5px solid #f3f3f3;
134
+ border-top: 5px solid #3498db;
135
+ border-radius: 50%;
136
+ width: 50px;
137
+ height: 50px;
138
+ animation: spin 1s linear infinite;
139
+ margin: 20px auto;
140
+ }
141
+
142
+ @keyframes spin {
143
+ 0% { transform: rotate(0deg); }
144
+ 100% { transform: rotate(360deg); }
145
+ }
146
+
147
+ .loader-container {
148
+ text-align: center;
149
+ margin: 20px 0;
150
+ }
151
+ .loader-container-prepare {
152
+ text-align: left;
153
+ margin: 20px 0;
154
+ }
155
+ .bold-label .gr-radio {
156
+ margin-top: 8px;
157
+ background-color: var(--white);
158
+ padding: 10px;
159
+ border-radius: 4px;
160
+ }
161
+
162
+ .bold-label .gr-radio label {
163
+ font-size: 14px;
164
+ color: var(--black);
165
+ }
166
+
167
+ #analyze-btn {
168
+ background-color: #FF5722 !important;
169
+ color: white !important;
170
+ transition: all 0.3s ease !important;
171
+ box-shadow: 0 2px 5px rgba(0,0,0,0.2) !important;
172
+ position: fixed !important;
173
+ bottom: 1% !important;
174
+ left: 3% !important;
175
+ z-index: 1000 !important;
176
+ }
177
+
178
+
179
+ #unzip-btn {
180
+ background-color: #4CAF50 !important;
181
+ color: white !important;
182
+ transition: all 0.3s ease !important;
183
+ box-shadow: 0 2px 5px rgba(0,0,0,0.2) !important;
184
+ position: fixed !important;
185
+ bottom: 1% !important;
186
+ left: 18% !important;
187
+ z-index: 1000 !important;
188
+ }
189
+
190
+ #download_file {
191
+ position: fixed !important;
192
+ bottom: 1% !important;
193
+ left: 22% !important;
194
+ z-index: 1000 !important;
195
+ }
196
+
197
+ #analyze-btn:hover,#unzip-btn:hover{
198
+ transform: translateY(-3px) !important;
199
+ box-shadow: 0 4px 8px rgba(0,0,0,0.3) !important;
200
+ }
201
+
202
+ .square-pdf-btn {
203
+ width: 90% !important;
204
+ height: 3% !important;
205
+ padding: 0 !important;
206
+ display: flex !important;
207
+ flex-direction: column !important;
208
+ align-items: center !important;
209
+ justify-content: center !important;
210
+ gap: 8px !important;
211
+ }
212
+
213
+
214
+ .square-pdf-btn img {
215
+ width: 20% !important;
216
+ height: 1% !important;
217
+ margin: 0 !important;
218
+ }
219
+
220
+
221
+ .square-pdf-btn span {
222
+ font-size: 14px !important;
223
+ text-align: center !important;
224
+ }
225
+
226
+
227
+ .gradio-gallery-item:hover {
228
+ background-color: transparent !important;
229
+ filter: none !important;
230
+ transform: none !important;
231
+ }
232
+
233
+ .custom-markdown h3 {
234
+ font-size: 25px !important;
235
+ }
236
+
237
+ .tight-spacing {
238
+ margin-bottom: -20px !important;
239
+ }
240
+
241
+ .tight-spacing-as {
242
+ margin-top: 0px !important;
243
+ margin-bottom: 0px !important;
244
+ }
245
+
246
+ .left-margin-column {
247
+ margin-left: 5%;
248
+ }
249
+
250
+ .image-container img {
251
+ display: inline-block !important;
252
+ }
253
+
254
+ #markdown-title {
255
+ text-align: center;
256
+ }
257
+
258
+
259
+ }
260
  """
261
 
262
+ EXAMPLE_TEST = [
263
  ["examples/ancient_demo.png"],
264
  ["examples/handwrite_ch_demo.png"],
265
  ["examples/handwrite_en_demo.png"],
 
269
  ["examples/research.png"],
270
  ["examples/tech.png"],
271
  ]
272
+ DESC_DICT = {
273
+ "use_doc_orientation_classify": "Whether to use the document image orientation classification module. After use, you can correct distorted images, such as wrinkles, tilts, etc.",
274
+ "use_doc_unwarping": "Whether to use the document unwarping module. After use, you can correct distorted images, such as wrinkles, tilts, etc.",
275
+ "use_textline_orientation": "Whether to use the text line orientation classification module to support the distinction and correction of text lines of 0 degrees and 180 degrees.",
276
+ "text_det_limit_type": "[Short side] means to ensure that the shortest side of the image is not less than [Image side length limit for text detection], and [Long side] means to ensure that the longest side of the image is not greater than [Image side length limit for text detection].",
277
+ "text_det_limit_side_len_nb": "For the side length limit of the text detection input image, for large images with dense text, if you want more accurate recognition, you should choose a larger size. This parameter is used in conjunction with the [Image side length limit type for text detection]. Generally, the maximum [Long side] is suitable for scenes with large images and text, and the minimum [Short side] is suitable for document scenes with small and dense images.",
278
+ "text_det_thresh_nb": "In the output probability map, only pixels with scores greater than the threshold are considered text pixels, and the value range is 0~1.",
279
+ "text_det_box_thresh_nb": "When the average score of all pixels in the detection result border is greater than the threshold, the result will be considered as a text area, and the value range is 0 to 1. If missed detection occurs, this value can be appropriately lowered.",
280
+ "text_det_unclip_ratio_nb": "Use this method to expand the text area. The larger the value, the larger the expanded area.",
281
+ "text_rec_score_thresh_nb": "After text detection, the text box performs text recognition, and the text results with scores greater than the threshold will be retained. The value range is 0~1.",
282
+ }
283
+ tmp_time = {}
284
+ lock = threading.Lock()
285
 
 
 
 
 
286
 
287
+ def gen_tooltip_radio(desc_dict):
288
+ tooltip = {}
289
+ for key, desc in desc_dict.items():
290
+ suffixes = ["_rd", "_md"]
291
+ if key.endswith("_nb"):
292
+ suffix = "_nb"
293
+ suffixes = ["_nb", "_md"]
294
+ key = key[: -len(suffix)]
295
+ for suffix in suffixes:
296
+ tooltip[f"{key}{suffix}"] = desc
297
+ return tooltip
298
+
299
+
300
+ TOOLTIP_RADIO = gen_tooltip_radio(DESC_DICT)
301
+
302
+
303
+ def url_to_bytes(url, *, timeout=10):
304
+ resp = requests.get(url, timeout=timeout)
305
+ resp.raise_for_status()
306
+ return resp.content
307
+
308
+
309
+ def bytes_to_image(image_bytes):
310
+ return Image.open(io.BytesIO(image_bytes))
311
+
312
+
313
+ def process_file(
314
+ file_path,
315
+ image_input,
316
+ use_doc_orientation_classify,
317
+ use_doc_unwarping,
318
+ use_textline_orientation,
319
+ text_det_limit_type,
320
+ text_det_limit_side_len,
321
+ text_det_thresh,
322
+ text_det_box_thresh,
323
+ text_det_unclip_ratio,
324
+ text_rec_score_thresh,
325
+ ):
326
+ """Process uploaded file with API"""
327
+ try:
328
+ if not file_path and not image_input:
329
+ raise ValueError("Please upload a file first")
330
+ if file_path:
331
+ if Path(file_path).suffix == ".pdf":
332
+ file_type = "pdf"
333
+ else:
334
+ file_type = "image"
335
+ else:
336
+ file_path = image_input
337
+ file_type = "image"
338
+ # Read file content
339
+ with open(file_path, "rb") as f:
340
+ file_bytes = f.read()
341
+
342
+ # Call API for processing
343
+
344
+ file_data = base64.b64encode(file_bytes).decode("ascii")
345
+ headers = {
346
+ "Authorization": f"token {TOKEN}",
347
+ "Content-Type": "application/json",
348
+ }
349
+
350
+ response = requests.post(
351
+ API_URL,
352
+ json={
353
+ "file": file_data,
354
+ "fileType": 0 if file_type == "pdf" else 1,
355
+ "useDocOrientationClassify": use_doc_orientation_classify,
356
+ "useDocUnwarping": use_doc_unwarping,
357
+ "useTextlineOrientation": use_textline_orientation,
358
+ "textDetLimitType": text_det_limit_type,
359
+ "textTetLimitSideLen": text_det_limit_side_len,
360
+ "textDetThresh": text_det_thresh,
361
+ "textDetBoxThresh": text_det_box_thresh,
362
+ "textDetUnclipRatio": text_det_unclip_ratio,
363
+ "textRecScoreThresh": text_rec_score_thresh,
364
+ },
365
+ headers=headers,
366
+ timeout=1000,
367
+ )
368
+ try:
369
+ response.raise_for_status()
370
+ except requests.exceptions.RequestException as e:
371
+ raise RuntimeError("API request failed") from e
372
+ # Parse API response
373
+ result = response.json()
374
+ layout_results = result.get("result", {}).get("ocrResults", [])
375
+ overall_ocr_res_images = []
376
+ output_json = result.get("result", {})
377
+ input_images = []
378
+ input_images_gallery = []
379
+ for res in layout_results:
380
+ overall_ocr_res_images.append(url_to_bytes(res["ocrImage"]))
381
+ input_images.append(url_to_bytes(res["inputImage"]))
382
+ input_images_gallery.append(res["inputImage"])
383
+
384
+ return {
385
+ "original_file": file_path,
386
+ "file_type": file_type,
387
+ "overall_ocr_res_images": overall_ocr_res_images,
388
+ "output_json": output_json,
389
+ "input_images": input_images,
390
+ "input_images_gallery": input_images_gallery,
391
+ "api_response": result,
392
+ }
393
+
394
+ except requests.exceptions.RequestException as e:
395
+ raise gr.Error(f"API request failed: {str(e)}")
396
+ except Exception as e:
397
+ raise gr.Error(f"Error processing file: {str(e)}")
398
+
399
+
400
+ def export_full_results(results):
401
+ """Create ZIP file with all analysis results"""
402
+ try:
403
+ global tmp_time
404
+ if not results:
405
+ raise ValueError("No results to export")
406
+
407
+ filename = Path(results["original_file"]).stem + f"_{uuid.uuid4().hex}.zip"
408
+ zip_path = Path(TEMP_DIR.name, filename)
409
+
410
+ with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
411
+ for i, img_bytes in enumerate(results["overall_ocr_res_images"]):
412
+ zipf.writestr(f"overall_ocr_res_images/page_{i+1}.jpg", img_bytes)
413
+
414
+ zipf.writestr(
415
+ "output.json",
416
+ json.dumps(results["output_json"], indent=2, ensure_ascii=False),
417
+ )
418
+
419
+ # Add API response
420
+ api_response = results.get("api_response", {})
421
+ zipf.writestr(
422
+ "api_response.json",
423
+ json.dumps(api_response, indent=2, ensure_ascii=False),
424
+ )
425
+
426
+ for i, img_bytes in enumerate(results["input_images"]):
427
+ zipf.writestr(f"input_images/page_{i+1}.jpg", img_bytes)
428
+ with lock:
429
+ tmp_time[zip_path] = time.time()
430
+ return str(zip_path)
431
+
432
+ except Exception as e:
433
+ raise gr.Error(f"Error creating ZIP file: {str(e)}")
434
+
435
+
436
+ def on_file_change(file):
437
+ if file:
438
+ return gr.Textbox(
439
+ value=f"✅ Chosen file: {os.path.basename(file.name)}", visible=True
440
+ )
441
+ else:
442
+ return gr.Textbox()
443
+
444
+
445
+ def clear_file_selection():
446
+ return gr.File(value=None), gr.Textbox(value=None)
447
+
448
+
449
+ def clear_file_selection_examples(image_input):
450
+ text_name = "✅ Chosen file: " + os.path.basename(image_input)
451
+ return gr.File(value=None), gr.Textbox(value=text_name, visible=True)
452
+
453
+
454
+ def toggle_sections(choice):
455
+ return {
456
+ Module_Options: gr.Column(visible=(choice == "Module Options")),
457
+ Text_detection_Options: gr.Column(visible=(choice == "Text detection Options")),
458
+ }
459
+
460
+
461
+ # Interaction logic
462
+ def toggle_spinner():
463
+ return (
464
+ gr.Column(visible=True),
465
+ gr.Column(visible=False),
466
+ gr.File(visible=False),
467
+ gr.update(visible=False),
468
+ )
469
+
470
+
471
+ def hide_spinner():
472
+ return gr.Column(visible=False), gr.update(visible=True)
473
+
474
+
475
+ def update_display(results):
476
+ if not results:
477
+ return gr.skip()
478
+ assert len(results["overall_ocr_res_images"]) <= MAX_NUM_PAGES, len(
479
+ results["overall_ocr_res_images"]
480
+ )
481
+ assert len(results["input_images_gallery"]) <= MAX_NUM_PAGES, len(
482
+ results["input_images_gallery"]
483
+ )
484
+ gallery_list_imgs = []
485
+ for i in range(len(gallery_list)):
486
+ gallery_list_imgs.append(
487
+ gr.Gallery(
488
+ value=results["input_images_gallery"],
489
+ rows=len(results["input_images_gallery"]),
490
+ )
491
+ )
492
+ ocr_imgs = []
493
+ for img in results["overall_ocr_res_images"]:
494
+ ocr_imgs.append(gr.Image(value=bytes_to_image(img), visible=True))
495
+ for _ in range(len(results["overall_ocr_res_images"]), MAX_NUM_PAGES):
496
+ ocr_imgs.append(gr.Image(visible=False))
497
+
498
+ output_json = [gr.Markdown(value=results["output_json"], visible=True)]
499
+ return ocr_imgs + output_json + gallery_list_imgs
500
+
501
+
502
+ def update_image(evt: gr.SelectData):
503
+ update_images = []
504
+ for index in range(MAX_NUM_PAGES):
505
+ update_images.append(
506
+ gr.Image(visible=False) if index != evt.index else gr.Image(visible=True)
507
+ )
508
+ return update_images
509
+
510
+
511
+ def delete_file_periodically():
512
+ global tmp_time
513
+ while True:
514
+ current_time = time.time()
515
+ delete_tmp = []
516
+ for filename, strat_time in list(tmp_time.items()):
517
+ if (current_time - strat_time) >= TMP_DELETE_TIME:
518
+ if os.path.exists(filename):
519
+ os.remove(filename)
520
+ delete_tmp.append(filename)
521
+ for filename in delete_tmp:
522
+ with lock:
523
+ del tmp_time[filename]
524
+ time.sleep(THREAD_WAKEUP_TIME)
525
+
526
+
527
+ with gr.Blocks(css=CSS, title=TITLE, theme=paddle_theme) as demo:
528
+ results_state = gr.State()
529
+ gr.Markdown(
530
+ value=f"# PP-OCRv5 Online Demo",
531
+ elem_id="markdown-title",
532
+ )
533
+ gr.Markdown(value=DESCRIPTION)
534
+ gr.Markdown(
535
+ """
536
+ Since our inference server is deployed in mainland China, cross-border
537
+ network transmission may be slow, which could result in a suboptimal experience on Hugging Face.
538
+ We recommend visiting the [PaddlePaddle AI Studio Community](https://aistudio.baidu.com/community/app/91660/webUI?source=appCenter) to try the demo for a smoother experience.
539
+ """,
540
+ elem_classes=["tight-spacing-as"],
541
+ visible=True,
542
+ )
543
+ # Upload section
544
+ with gr.Row():
545
+ with gr.Column(scale=4):
546
+ file_input = gr.File(
547
+ label="Upload document",
548
+ file_types=[".pdf", ".jpg", ".jpeg", ".png"],
549
+ type="filepath",
550
+ visible=False,
551
+ )
552
+ file_select = gr.Textbox(label="Select File Path", visible=False)
553
+ image_input = gr.Image(
554
+ label="Image",
555
+ sources="upload",
556
+ type="filepath",
557
+ visible=False,
558
+ interactive=True,
559
+ placeholder="Click to upload image...",
560
+ )
561
+ pdf_btn = gr.Button(
562
+ "Click to upload file...",
563
+ variant="primary",
564
+ icon="icon/upload.png",
565
+ elem_classes=["square-pdf-btn"],
566
+ )
567
+ examples_image = gr.Examples(
568
+ fn=clear_file_selection_examples,
569
+ inputs=image_input,
570
+ outputs=[file_input, file_select],
571
+ examples_per_page=11,
572
+ examples=EXAMPLE_TEST,
573
+ run_on_click=True,
574
+ )
575
+
576
+ file_input.change(
577
+ fn=on_file_change, inputs=file_input, outputs=[file_select]
578
+ )
579
+ with gr.Column():
580
+ section_choice = gr.Dropdown(
581
+ choices=[
582
+ "Module Options",
583
+ "Text detection Options",
584
+ ],
585
+ value="Module Options",
586
+ label="Advance Options",
587
+ show_label=True,
588
+ container=True,
589
+ scale=0,
590
+ elem_classes=["tight-spacing"],
591
+ )
592
+ with gr.Column(
593
+ visible=True, elem_classes="left-margin-column"
594
+ ) as Module_Options:
595
+ use_doc_orientation_classify_md = gr.Markdown(
596
+ "### Using the document image orientation classification module",
597
+ elem_id="use_doc_orientation_classify_md",
598
+ )
599
+ use_doc_orientation_classify_rd = gr.Radio(
600
+ choices=[("yes", True), ("no", False)],
601
+ value=False,
602
+ interactive=True,
603
+ show_label=False,
604
+ elem_id="use_doc_orientation_classify_rd",
605
+ )
606
+ use_doc_unwarping_md = gr.Markdown(
607
+ "### Using the document unwarping module",
608
+ elem_id="use_doc_unwarping_md",
609
+ )
610
+ use_doc_unwarping_rd = gr.Radio(
611
+ choices=[("yes", True), ("no", False)],
612
+ value=False,
613
+ interactive=True,
614
+ show_label=False,
615
+ elem_id="use_doc_unwarping_rd",
616
+ )
617
+ use_textline_orientation_md = gr.Markdown(
618
+ "### Using the text line orientation classification module",
619
+ elem_id="use_textline_orientation_md",
620
+ )
621
+ use_textline_orientation_rd = gr.Radio(
622
+ choices=[("yes", True), ("no", False)],
623
+ value=False,
624
+ interactive=True,
625
+ show_label=False,
626
+ elem_id="use_textline_orientation_rd",
627
+ )
628
+ with gr.Column(
629
+ visible=False, elem_classes="left-margin-column"
630
+ ) as Text_detection_Options:
631
+ text_det_limit_type_md = gr.Markdown(
632
+ "### Image side length restriction type for text detection",
633
+ elem_id="text_det_limit_type_md",
634
+ )
635
+ text_det_limit_type_rd = gr.Radio(
636
+ choices=[("Short side", "min"), ("Long side", "max")],
637
+ value="min",
638
+ interactive=True,
639
+ show_label=False,
640
+ elem_id="text_det_limit_type_rd",
641
+ )
642
+ text_det_limit_side_len_md = gr.Markdown(
643
+ "### Layout region detection expansion coefficient",
644
+ elem_id="text_det_limit_side_len_md",
645
+ )
646
+ text_det_limit_side_len_nb = gr.Number(
647
+ value=736,
648
+ step=1,
649
+ minimum=0,
650
+ maximum=10000,
651
+ interactive=True,
652
+ show_label=False,
653
+ elem_id="text_det_limit_side_len_nb",
654
+ )
655
+ text_det_thresh_md = gr.Markdown(
656
+ "### Text detection pixel threshold",
657
+ elem_id="text_det_thresh_md",
658
+ )
659
+ text_det_thresh_nb = gr.Number(
660
+ value=0.30,
661
+ step=0.01,
662
+ minimum=0.00,
663
+ maximum=1.00,
664
+ interactive=True,
665
+ show_label=False,
666
+ elem_id="text_det_thresh_nb",
667
+ )
668
+ text_det_box_thresh_md = gr.Markdown(
669
+ "### Text detection box threshold",
670
+ elem_id="text_det_box_thresh_md",
671
+ )
672
+ text_det_box_thresh_nb = gr.Number(
673
+ value=0.60,
674
+ step=0.01,
675
+ minimum=0.00,
676
+ maximum=1.00,
677
+ interactive=True,
678
+ show_label=False,
679
+ elem_id="text_det_box_thresh_nb",
680
+ )
681
+ text_det_unclip_ratio_md = gr.Markdown(
682
+ "### Text detection unclip ratio",
683
+ elem_id="text_det_unclip_ratio_md",
684
+ )
685
+ text_det_unclip_ratio_nb = gr.Number(
686
+ value=1.5,
687
+ step=0.1,
688
+ minimum=0,
689
+ maximum=10.0,
690
+ interactive=True,
691
+ show_label=False,
692
+ elem_id="text_det_unclip_ratio_nb",
693
+ )
694
+
695
+ text_rec_score_thresh_md = gr.Markdown(
696
+ "### Text recognition score threshold",
697
+ elem_id="text_rec_score_thresh_md",
698
+ )
699
+ text_rec_score_thresh_nb = gr.Number(
700
+ value=0.00,
701
+ step=0.01,
702
+ minimum=0,
703
+ maximum=1.00,
704
+ interactive=True,
705
+ show_label=False,
706
+ elem_id="text_rec_score_thresh_nb",
707
+ )
708
+ with gr.Row():
709
+ process_btn = gr.Button(
710
+ "🚀 Parse Document", elem_id="analyze-btn", variant="primary"
711
+ )
712
+ download_all_btn = gr.Button(
713
+ "📦 Download Full Results (ZIP)",
714
+ elem_id="unzip-btn",
715
+ variant="primary",
716
+ )
717
+
718
+ # Results display section
719
+ with gr.Column(scale=7):
720
+ gr.Markdown("### Results", elem_classes="custom-markdown")
721
+ loading_spinner = gr.Column(
722
+ visible=False, elem_classes=["loader-container"]
723
+ )
724
+ with loading_spinner:
725
+ gr.HTML(
726
+ """
727
+ <div class="loader"></div>
728
+ <p>Processing, please wait...</p>
729
+ """
730
+ )
731
+ prepare_spinner = gr.Column(
732
+ visible=True, elem_classes=["loader-container-prepare"]
733
+ )
734
+ with prepare_spinner:
735
+ gr.HTML(
736
+ """
737
+ <div style="
738
+ max-width: 100%;
739
+ max-height: 100%;
740
+ margin: 24px 0 0 12px;
741
+ padding: 24px 32px;
742
+ border: 2px solid #A8C1E7;
743
+ border-radius: 12px;
744
+ background: #f8faff;
745
+ box-shadow: 0 2px 8px rgba(100,150,200,0.08);
746
+ font-size: 18px;
747
+ ">
748
+ <b>🚀 User Guide</b><br>
749
+ <b>Step 1:</b> Upload Your File<br>
750
+ Supported formats: JPG, PNG, PDF, JPEG<br>
751
+ <b>Step 2:</b> Click Analyze Document Button<br>
752
+ System will process automatically<br>
753
+ <b>Step 3:</b> Wait for Results<br>
754
+ Results will be displayed after processing<br>
755
+ <b>Step 4:</b> Download results zip<br>
756
+ Results zip will be displayed after processing<br><br>
757
+ <b>Attention:</b> Only the first 10 pages will be processed
758
+ </div>
759
+ """
760
+ )
761
+ download_file = gr.File(visible=False, label="Download File")
762
+ overall_ocr_res_images = []
763
+ output_json_list = []
764
+ gallery_list = []
765
+ with gr.Tabs(visible=False) as tabs:
766
+ with gr.Tab("OCR"):
767
+ with gr.Row():
768
+ with gr.Column(scale=2, min_width=1):
769
+ gallery_ocr_det = gr.Gallery(
770
+ show_label=False,
771
+ allow_preview=False,
772
+ preview=False,
773
+ columns=1,
774
+ min_width=10,
775
+ object_fit="contain",
776
+ )
777
+ gallery_list.append(gallery_ocr_det)
778
+ with gr.Column(scale=10):
779
+ for i in range(MAX_NUM_PAGES):
780
+ overall_ocr_res_images.append(
781
+ gr.Image(
782
+ label=f"OCR Image {i}",
783
+ show_label=True,
784
+ visible=False,
785
+ )
786
+ )
787
+ with gr.Tab("JSON"):
788
+ with gr.Row():
789
+ with gr.Column(scale=2, min_width=1):
790
+ gallery_json = gr.Gallery(
791
+ show_label=False,
792
+ allow_preview=False,
793
+ preview=False,
794
+ columns=1,
795
+ min_width=10,
796
+ object_fit="contain",
797
+ )
798
+ gallery_list.append(gallery_json)
799
+ with gr.Column(scale=10):
800
+ gr.HTML(
801
+ """
802
+ <style>
803
+ .line.svelte-19ir0ev svg {
804
+ width: 30px !important;
805
+ height: 30px !important;
806
+ min-width: 30px !important;
807
+ min-height: 30px !important;
808
+ padding: 0 !important;
809
+ font-size: 18px !important;
810
+ }
811
+ .line.svelte-19ir0ev span:contains('Object(') {
812
+ font-size: 12px;
813
+ }
814
+ </style>
815
+ """
816
+ )
817
+ output_json_list.append(
818
+ gr.JSON(
819
+ visible=False,
820
+ )
821
+ )
822
+ # # Navigation bar
823
+ with gr.Column(elem_classes=["nav-bar"]):
824
+ gr.HTML(
825
+ """
826
+ <div class="nav-links">
827
+ <a href="https://github.com/PaddlePaddle/PaddleOCR" class="nav-link" target="_blank">GitHub</a>
828
+ </div>
829
+ """
830
+ )
831
+
832
+ section_choice.change(
833
+ fn=toggle_sections,
834
+ inputs=section_choice,
835
+ outputs=[
836
+ Module_Options,
837
+ Text_detection_Options,
838
+ ],
839
+ )
840
+ pdf_btn.click(
841
+ fn=clear_file_selection, inputs=[], outputs=[file_input, file_select]
842
+ ).then(
843
+ None,
844
+ [],
845
+ [],
846
+ js="""
847
+ () => {
848
+ const fileInput = document.querySelector('input[type="file"]');
849
+ fileInput.value = '';
850
+ fileInput.click();
851
+ }
852
+ """,
853
+ )
854
+ process_btn.click(
855
+ toggle_spinner, outputs=[loading_spinner, prepare_spinner, download_file, tabs]
856
+ ).then(
857
+ process_file,
858
+ inputs=[
859
+ file_input,
860
+ image_input,
861
+ use_doc_orientation_classify_rd,
862
+ use_doc_unwarping_rd,
863
+ use_textline_orientation_rd,
864
+ text_det_limit_type_rd,
865
+ text_det_limit_side_len_nb,
866
+ text_det_thresh_nb,
867
+ text_det_box_thresh_nb,
868
+ text_det_unclip_ratio_nb,
869
+ text_rec_score_thresh_nb,
870
+ ],
871
+ outputs=[results_state],
872
+ ).then(
873
+ hide_spinner, outputs=[loading_spinner, tabs]
874
+ ).then(
875
+ update_display,
876
+ inputs=[results_state],
877
+ outputs=overall_ocr_res_images + output_json_list + gallery_list,
878
+ )
879
+ gallery_ocr_det.select(update_image, outputs=overall_ocr_res_images)
880
+
881
+ download_all_btn.click(
882
+ export_full_results, inputs=[results_state], outputs=[download_file]
883
+ ).success(lambda: gr.File(visible=True), outputs=[download_file])
884
+
885
+ demo.load(
886
+ fn=lambda: None,
887
+ inputs=[],
888
+ outputs=[],
889
+ js=f"""
890
+ () => {{
891
+ const tooltipTexts = {TOOLTIP_RADIO};
892
+ let tooltip = document.getElementById("custom-tooltip");
893
+ if (!tooltip) {{
894
+ tooltip = document.createElement("div");
895
+ tooltip.id = "custom-tooltip";
896
+ tooltip.style.position = "fixed";
897
+ tooltip.style.background = "rgba(0, 0, 0, 0.75)";
898
+ tooltip.style.color = "white";
899
+ tooltip.style.padding = "6px 10px";
900
+ tooltip.style.borderRadius = "4px";
901
+ tooltip.style.fontSize = "13px";
902
+ tooltip.style.maxWidth = "300px";
903
+ tooltip.style.zIndex = "10000";
904
+ tooltip.style.pointerEvents = "none";
905
+ tooltip.style.transition = "opacity 0.2s";
906
+ tooltip.style.opacity = "0";
907
+ tooltip.style.whiteSpace = "normal";
908
+ document.body.appendChild(tooltip);
909
+ }}
910
+ Object.keys(tooltipTexts).forEach(id => {{
911
+ const elem = document.getElementById(id);
912
+ if (!elem) return;
913
+ function showTooltip(e) {{
914
+ tooltip.style.opacity = "1";
915
+ tooltip.innerText = tooltipTexts[id];
916
+ let x = e.clientX + 10;
917
+ let y = e.clientY + 10;
918
+ if (x + tooltip.offsetWidth > window.innerWidth) {{
919
+ x = e.clientX - tooltip.offsetWidth - 10;
920
+ }}
921
+ if (y + tooltip.offsetHeight > window.innerHeight) {{
922
+ y = e.clientY - tooltip.offsetHeight - 10;
923
+ }}
924
+ tooltip.style.left = x + "px";
925
+ tooltip.style.top = y + "px";
926
+ }}
927
+
928
+ function hideTooltip() {{
929
+ tooltip.style.opacity = "0";
930
+ }}
931
+
932
+ elem.addEventListener("mousemove", showTooltip);
933
+ elem.addEventListener("mouseleave", hideTooltip);
934
+ }});
935
+ }}
936
+ """,
937
+ )
938
+
939
+
940
+ if __name__ == "__main__":
941
+ t = threading.Thread(target=delete_file_periodically)
942
+ t.start()
943
+ demo.launch(
944
+ server_name="0.0.0.0",
945
+ server_port=7860,
946
+ )
icon/upload.png ADDED

Git LFS Details

  • SHA256: bf7780dd26d21a5f09ef9c0f9c86ba992b2bf53fcd5f5618617cce33afec44b6
  • Pointer size: 130 Bytes
  • Size of remote file: 49.2 kB
requirements.txt CHANGED
@@ -1,3 +1,2 @@
1
- gradio==5.30.0
2
- pillow==9.5.0
3
- requests==2.31.0
 
1
+ pillow==9.5.0
2
+ requests==2.31.0