Ashwin V. Mohanan commited on
Commit
b0bdc40
·
1 Parent(s): ec94475

Add poppler to allow processing PDF and fix filetype filter

Browse files
Dockerfile CHANGED
@@ -29,6 +29,7 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
29
  libsm6 \
30
  libxext6 \
31
  libgl1 \
 
32
  && apt-get clean && rm -rf /var/lib/apt/lists/*
33
 
34
  # Create a non-root user
 
29
  libsm6 \
30
  libxext6 \
31
  libgl1 \
32
+ poppler \
33
  && apt-get clean && rm -rf /var/lib/apt/lists/*
34
 
35
  # Create a non-root user
app/content/sidebar.md CHANGED
@@ -5,8 +5,11 @@ handwritten observations in weather journals
5
 
6
  ### How It Works
7
 
8
- 1. **Upload:** Select from examples on the right or submit your own image file (WIP!) to run through Dawsonia.
9
- 2. **Results:** View the digitized text generated by the system.
 
 
 
10
  <!-- 3. **Export:** Choose your preferred format and download your results. -->
11
 
12
  > Note: This demo application is for demonstration purposes only and is not intended for production use.
 
5
 
6
  ### How It Works
7
 
8
+ 1. **Upload** tab: Either,
9
+ **1a.** select from the **examples** on the right side bar, or
10
+ **1b.** submit your own PDF / Zarr.zip file and **upload**.
11
+ 2. **Digitize**: click on the blue button to digitize the data previewed above.
12
+ 3. **Results** tab: View the digitized text generated by the system.
13
  <!-- 3. **Export:** Choose your preferred format and download your results. -->
14
 
15
  > Note: This demo application is for demonstration purposes only and is not intended for production use.
app/tabs/submit.py CHANGED
@@ -50,12 +50,12 @@ with gr.Blocks() as submit:
50
 
51
  upload_file = gr.File(
52
  label="1b. Upload a .pdf or .zarr.zip file",
53
- file_types=[".pdf", ".zarr.zip"],
54
  )
55
 
56
  # upload_file_true_path = gr.Textbox(visible=False)
57
 
58
- upload_button = gr.Button(value="Upload", min_width=200)
59
 
60
  with Modal(visible=False) as edit_table_fmt_modal:
61
  with gr.Column():
@@ -88,7 +88,7 @@ with gr.Blocks() as submit:
88
  )
89
 
90
  with gr.Row():
91
- run_button = gr.Button("Digitize", variant="primary", scale=0, min_width=200)
92
  edit_table_fmt_button = gr.Button(
93
  "Edit table format", variant="secondary", scale=0, min_width=200
94
  )
 
50
 
51
  upload_file = gr.File(
52
  label="1b. Upload a .pdf or .zarr.zip file",
53
+ file_types=[".pdf", ".zarr.zip", ".zip"],
54
  )
55
 
56
  # upload_file_true_path = gr.Textbox(visible=False)
57
 
58
+ upload_button = gr.Button(value="1b. Upload", min_width=200)
59
 
60
  with Modal(visible=False) as edit_table_fmt_modal:
61
  with gr.Column():
 
88
  )
89
 
90
  with gr.Row():
91
+ run_button = gr.Button("2. Digitize", variant="primary", scale=0, min_width=200)
92
  edit_table_fmt_button = gr.Button(
93
  "Edit table format", variant="secondary", scale=0, min_width=200
94
  )
app/tabs/submit_functions.py CHANGED
@@ -228,7 +228,10 @@ def get_selected_example_image(
228
  """
229
  orig_name = event.value["image"]["orig_name"]
230
  # for name, details in PIPELINES.items():
231
- name, _ext = orig_name.split(".")
 
 
 
232
 
233
  station_tf = Path("table_formats", name).with_suffix(".toml")
234
 
@@ -272,7 +275,11 @@ def get_uploaded_image(
272
  first_page: int, last_page: int, table_fmt_filename: str, filename: str
273
  ) -> tuple[list[NDArray], io.Book, str, str] | None:
274
 
275
- name, _ext = filename.split(".")
 
 
 
 
276
  station_tf = Path("table_formats", table_fmt_filename)
277
  if not station_tf.exists():
278
  station_tf = Path("table_formats", "bjuröklubb.toml")
 
228
  """
229
  orig_name = event.value["image"]["orig_name"]
230
  # for name, details in PIPELINES.items():
231
+ orig_path = Path(orig_name)
232
+ name = orig_path.name
233
+ for suffix in orig_path.suffixes[::-1]:
234
+ name = name.removesuffix(suffix)
235
 
236
  station_tf = Path("table_formats", name).with_suffix(".toml")
237
 
 
275
  first_page: int, last_page: int, table_fmt_filename: str, filename: str
276
  ) -> tuple[list[NDArray], io.Book, str, str] | None:
277
 
278
+ orig_path = Path(filename)
279
+ name = orig_path.name
280
+ for suffix in orig_path.suffixes[::-1]:
281
+ name = name.removesuffix(suffix)
282
+
283
  station_tf = Path("table_formats", table_fmt_filename)
284
  if not station_tf.exists():
285
  station_tf = Path("table_formats", "bjuröklubb.toml")