Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -22,22 +22,6 @@ from transformers import (
|
|
22 |
)
|
23 |
from transformers.image_utils import load_image
|
24 |
|
25 |
-
#theme:custom
|
26 |
-
#custom_theme = gr.themes.Base(
|
27 |
-
# primary_hue="indigo",
|
28 |
-
# secondary_hue="violet",
|
29 |
-
# neutral_hue="gray"
|
30 |
-
#).set(
|
31 |
-
# body_background_fill="#f7f5fa",
|
32 |
-
# body_text_color="#1f1f1f",
|
33 |
-
# input_background_fill="#ffffff",
|
34 |
-
# button_primary_background_fill="#8b5cf6",
|
35 |
-
# button_primary_text_color="#ffffff",
|
36 |
-
# button_secondary_background_fill="#e0d7f5",
|
37 |
-
# button_secondary_text_color="#1f1f1f",
|
38 |
-
# shadow_spread="sm"
|
39 |
-
#)
|
40 |
-
|
41 |
# Constants for text generation
|
42 |
MAX_MAX_NEW_TOKENS = 2048
|
43 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
@@ -307,9 +291,10 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
|
|
307 |
with gr.Column(elem_classes="canvas-output"):
|
308 |
gr.Markdown("## Output")
|
309 |
output = gr.Textbox(label="Raw Output Stream", interactive=False, lines=2, show_copy_button=True)
|
310 |
-
|
311 |
with gr.Accordion("(Result.md)", open=False):
|
312 |
markdown_output = gr.Markdown(label="Formatted Result (Result.Md)")
|
|
|
313 |
model_choice = gr.Radio(
|
314 |
choices=["olmOCR-7B-0725", "Nanonets-OCR-s", "RolmOCR-7B",
|
315 |
"Aya-Vision-8B", "Qwen2-VL-OCR-2B"],
|
@@ -322,7 +307,8 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
|
|
322 |
gr.Markdown("> [Qwen2-VL-OCR-2B](https://huggingface.co/prithivMLmods/Qwen2-VL-OCR-2B-Instruct): qwen2-vl-ocr-2b-instruct model is a fine-tuned version of qwen2-vl-2b-instruct, tailored for tasks that involve [messy] optical character recognition (ocr), image-to-text conversion, and math problem solving with latex formatting.")
|
323 |
gr.Markdown("> [RolmOCR](https://huggingface.co/reducto/RolmOCR): rolmocr, high-quality, openly available approach to parsing pdfs and other complex documents optical character recognition. it is designed to handle a wide range of document types, including scanned documents, handwritten text, and complex layouts.")
|
324 |
gr.Markdown("> [Aya-Vision](https://huggingface.co/CohereLabs/aya-vision-8b): cohere labs aya vision 8b is an open weights research release of an 8-billion parameter model with advanced capabilities optimized for a variety of vision-language use cases, including ocr, captioning, visual reasoning, summarization, question answering, code, and more.")
|
325 |
-
|
|
|
326 |
|
327 |
image_submit.click(
|
328 |
fn=generate_image,
|
|
|
22 |
)
|
23 |
from transformers.image_utils import load_image
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
# Constants for text generation
|
26 |
MAX_MAX_NEW_TOKENS = 2048
|
27 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
|
|
291 |
with gr.Column(elem_classes="canvas-output"):
|
292 |
gr.Markdown("## Output")
|
293 |
output = gr.Textbox(label="Raw Output Stream", interactive=False, lines=2, show_copy_button=True)
|
294 |
+
|
295 |
with gr.Accordion("(Result.md)", open=False):
|
296 |
markdown_output = gr.Markdown(label="Formatted Result (Result.Md)")
|
297 |
+
|
298 |
model_choice = gr.Radio(
|
299 |
choices=["olmOCR-7B-0725", "Nanonets-OCR-s", "RolmOCR-7B",
|
300 |
"Aya-Vision-8B", "Qwen2-VL-OCR-2B"],
|
|
|
307 |
gr.Markdown("> [Qwen2-VL-OCR-2B](https://huggingface.co/prithivMLmods/Qwen2-VL-OCR-2B-Instruct): qwen2-vl-ocr-2b-instruct model is a fine-tuned version of qwen2-vl-2b-instruct, tailored for tasks that involve [messy] optical character recognition (ocr), image-to-text conversion, and math problem solving with latex formatting.")
|
308 |
gr.Markdown("> [RolmOCR](https://huggingface.co/reducto/RolmOCR): rolmocr, high-quality, openly available approach to parsing pdfs and other complex documents optical character recognition. it is designed to handle a wide range of document types, including scanned documents, handwritten text, and complex layouts.")
|
309 |
gr.Markdown("> [Aya-Vision](https://huggingface.co/CohereLabs/aya-vision-8b): cohere labs aya vision 8b is an open weights research release of an 8-billion parameter model with advanced capabilities optimized for a variety of vision-language use cases, including ocr, captioning, visual reasoning, summarization, question answering, code, and more.")
|
310 |
+
|
311 |
+
gr.Markdown("> ⚠️ Note: Models in this space may not perform well on video inference tasks.")
|
312 |
|
313 |
image_submit.click(
|
314 |
fn=generate_image,
|