Spaces:
Running
Running
Ashwin V. Mohanan
commited on
Commit
·
b0bdc40
1
Parent(s):
ec94475
Add poppler to allow processing PDF and fix filetype filter
Browse files- Dockerfile +1 -0
- app/content/sidebar.md +5 -2
- app/tabs/submit.py +3 -3
- app/tabs/submit_functions.py +9 -2
Dockerfile
CHANGED
@@ -29,6 +29,7 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
|
|
29 |
libsm6 \
|
30 |
libxext6 \
|
31 |
libgl1 \
|
|
|
32 |
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
33 |
|
34 |
# Create a non-root user
|
|
|
29 |
libsm6 \
|
30 |
libxext6 \
|
31 |
libgl1 \
|
32 |
+
poppler \
|
33 |
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
34 |
|
35 |
# Create a non-root user
|
app/content/sidebar.md
CHANGED
@@ -5,8 +5,11 @@ handwritten observations in weather journals
|
|
5 |
|
6 |
### How It Works
|
7 |
|
8 |
-
1. **Upload
|
9 |
-
|
|
|
|
|
|
|
10 |
<!-- 3. **Export:** Choose your preferred format and download your results. -->
|
11 |
|
12 |
> Note: This demo application is for demonstration purposes only and is not intended for production use.
|
|
|
5 |
|
6 |
### How It Works
|
7 |
|
8 |
+
1. **Upload** tab: Either,
|
9 |
+
**1a.** select from the **examples** on the right side bar, or
|
10 |
+
**1b.** submit your own PDF / Zarr.zip file and **upload**.
|
11 |
+
2. **Digitize**: click on the blue button to digitize the data previewed above.
|
12 |
+
3. **Results** tab: View the digitized text generated by the system.
|
13 |
<!-- 3. **Export:** Choose your preferred format and download your results. -->
|
14 |
|
15 |
> Note: This demo application is for demonstration purposes only and is not intended for production use.
|
app/tabs/submit.py
CHANGED
@@ -50,12 +50,12 @@ with gr.Blocks() as submit:
|
|
50 |
|
51 |
upload_file = gr.File(
|
52 |
label="1b. Upload a .pdf or .zarr.zip file",
|
53 |
-
file_types=[".pdf", ".zarr.zip"],
|
54 |
)
|
55 |
|
56 |
# upload_file_true_path = gr.Textbox(visible=False)
|
57 |
|
58 |
-
upload_button = gr.Button(value="Upload", min_width=200)
|
59 |
|
60 |
with Modal(visible=False) as edit_table_fmt_modal:
|
61 |
with gr.Column():
|
@@ -88,7 +88,7 @@ with gr.Blocks() as submit:
|
|
88 |
)
|
89 |
|
90 |
with gr.Row():
|
91 |
-
run_button = gr.Button("Digitize", variant="primary", scale=0, min_width=200)
|
92 |
edit_table_fmt_button = gr.Button(
|
93 |
"Edit table format", variant="secondary", scale=0, min_width=200
|
94 |
)
|
|
|
50 |
|
51 |
upload_file = gr.File(
|
52 |
label="1b. Upload a .pdf or .zarr.zip file",
|
53 |
+
file_types=[".pdf", ".zarr.zip", ".zip"],
|
54 |
)
|
55 |
|
56 |
# upload_file_true_path = gr.Textbox(visible=False)
|
57 |
|
58 |
+
upload_button = gr.Button(value="1b. Upload", min_width=200)
|
59 |
|
60 |
with Modal(visible=False) as edit_table_fmt_modal:
|
61 |
with gr.Column():
|
|
|
88 |
)
|
89 |
|
90 |
with gr.Row():
|
91 |
+
run_button = gr.Button("2. Digitize", variant="primary", scale=0, min_width=200)
|
92 |
edit_table_fmt_button = gr.Button(
|
93 |
"Edit table format", variant="secondary", scale=0, min_width=200
|
94 |
)
|
app/tabs/submit_functions.py
CHANGED
@@ -228,7 +228,10 @@ def get_selected_example_image(
|
|
228 |
"""
|
229 |
orig_name = event.value["image"]["orig_name"]
|
230 |
# for name, details in PIPELINES.items():
|
231 |
-
|
|
|
|
|
|
|
232 |
|
233 |
station_tf = Path("table_formats", name).with_suffix(".toml")
|
234 |
|
@@ -272,7 +275,11 @@ def get_uploaded_image(
|
|
272 |
first_page: int, last_page: int, table_fmt_filename: str, filename: str
|
273 |
) -> tuple[list[NDArray], io.Book, str, str] | None:
|
274 |
|
275 |
-
|
|
|
|
|
|
|
|
|
276 |
station_tf = Path("table_formats", table_fmt_filename)
|
277 |
if not station_tf.exists():
|
278 |
station_tf = Path("table_formats", "bjuröklubb.toml")
|
|
|
228 |
"""
|
229 |
orig_name = event.value["image"]["orig_name"]
|
230 |
# for name, details in PIPELINES.items():
|
231 |
+
orig_path = Path(orig_name)
|
232 |
+
name = orig_path.name
|
233 |
+
for suffix in orig_path.suffixes[::-1]:
|
234 |
+
name = name.removesuffix(suffix)
|
235 |
|
236 |
station_tf = Path("table_formats", name).with_suffix(".toml")
|
237 |
|
|
|
275 |
first_page: int, last_page: int, table_fmt_filename: str, filename: str
|
276 |
) -> tuple[list[NDArray], io.Book, str, str] | None:
|
277 |
|
278 |
+
orig_path = Path(filename)
|
279 |
+
name = orig_path.name
|
280 |
+
for suffix in orig_path.suffixes[::-1]:
|
281 |
+
name = name.removesuffix(suffix)
|
282 |
+
|
283 |
station_tf = Path("table_formats", table_fmt_filename)
|
284 |
if not station_tf.exists():
|
285 |
station_tf = Path("table_formats", "bjuröklubb.toml")
|