Spaces:

merterbak
/

Mistral-OCR

Running

App Files Files Community

merterbak commited on May 2

Commit

6f73570

verified ·

1 Parent(s): 9f1a5c3

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -19

app.py CHANGED Viewed

@@ -6,27 +6,33 @@ from mistralai import Mistral
 from PIL import Image
 from pathlib import Path
-api_key = os.environ.get("MISTRAL")
-client = Mistral(api_key=api_key)
-#config
 VALID_DOCUMENT_EXTENSIONS = {".pdf"}
-VALID_IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png",}
-def upload_pdf(content, filename):
     uploaded_file = client.files.upload(
         file={"file_name": filename, "content": content},
         purpose="ocr",
     )
     signed_url = client.files.get_signed_url(file_id=uploaded_file.id)
     return signed_url.url
-def process_ocr(document_source):
     return client.ocr.process(
         model="mistral-ocr-latest",
         document=document_source,
         include_image_base64=True
     )
-def do_ocr(input_type, url=None, file=None):
     document_source = None
     if input_type == "URL":
@@ -46,7 +52,7 @@ def do_ocr(input_type, url=None, file=None):
         if file_extension in VALID_DOCUMENT_EXTENSIONS:
             with open(file.name, "rb") as f:
                 content = f.read()
-            signed_url = upload_pdf(content, os.path.basename(file_name))
             document_source = {"type": "document_url", "document_url": signed_url}
         elif file_extension in VALID_IMAGE_EXTENSIONS:
             img = Image.open(file)
@@ -58,9 +64,13 @@ def do_ocr(input_type, url=None, file=None):
             return f"Error: Unsupported file type. Supported types: {', '.join(VALID_DOCUMENT_EXTENSIONS | VALID_IMAGE_EXTENSIONS)}", "", []
     else:
-        return "Invalid input type ", "", []
-    ocr_response = process_ocr(document_source)
     markdown_text = "\n\n".join(page.markdown for page in ocr_response.pages)
     extracted_text = markdown_text
     rendered_markdown = markdown_text
@@ -86,10 +96,10 @@ def do_ocr(input_type, url=None, file=None):
                 rendered_markdown += f"\n\n[Image Warning: No base64 data for {img.id}]"
     return extracted_text.strip(), rendered_markdown.strip(), images
 custom_css = """
-    body {font-family: body {font-family: 'Helvetica Neue', Helvetica;}
-    .gr-button {background-color: #4CAF50; color: white; border: none; padding: 10px 20px; border-radius: 5px;}
     .gr-button:hover {background-color: #45a049;}
     .gr-textbox {margin-bottom: 15px;}
     .example-button {background-color: #1E90FF; color: white; border: none; padding: 8px 15px; border-radius: 5px; margin: 5px;}
@@ -97,16 +107,24 @@ custom_css = """
     .tall-radio .gr-radio-item {padding: 15px 0; min-height: 50px; display: flex; align-items: center;}
     .tall-radio label {font-size: 16px;}
 """
 with gr.Blocks(
     title="Mistral OCR Demo",
     css=custom_css,
     theme=gr.themes.Soft()
 ) as demo:
     gr.Markdown("<h1 style='text-align: center; color: #333;'>Mistral OCR Demo</h1>")
-    gr.Markdown("<p style='text-align: center; color: #666;'>Extract text and images from PDFs or images using Mistral's latest OCR model. You can also see markdown live.</p>")
     with gr.Row():
         with gr.Column(scale=1):
             input_type = gr.Radio(
                 choices=["URL", "Upload file"],
                 label="Input Type",
@@ -115,7 +133,7 @@ with gr.Blocks(
             )
             url_input = gr.Textbox(
                 label="Document or Image URL",
-                placeholder="e.g., https://arxiv.org/pdf/2501.12948",
                 visible=True,
                 lines=1
             )
@@ -135,7 +153,9 @@ with gr.Blocks(
     def update_visibility(choice):
         return gr.update(visible=(choice == "URL")), gr.update(visible=(choice == "Upload file"))
     input_type.change(fn=update_visibility, inputs=input_type, outputs=[url_input, file_input])
     def set_url_and_type(url):
         return url, "URL"
@@ -150,9 +170,9 @@ with gr.Blocks(
     submit_btn.click(
         fn=do_ocr,
-        inputs=[input_type, url_input, file_input],
         outputs=[cleaned_output, markdown_output, image_output]
     )
 if __name__ == "__main__":
-    demo.launch()

 from PIL import Image
 from pathlib import Path
+# Config
 VALID_DOCUMENT_EXTENSIONS = {".pdf"}
+VALID_IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png"}
+def upload_pdf(content, filename, api_key):
+    client = Mistral(api_key=api_key)
     uploaded_file = client.files.upload(
         file={"file_name": filename, "content": content},
         purpose="ocr",
     )
     signed_url = client.files.get_signed_url(file_id=uploaded_file.id)
     return signed_url.url
+def process_ocr(document_source, api_key):
+    client = Mistral(api_key=api_key)
     return client.ocr.process(
         model="mistral-ocr-latest",
         document=document_source,
         include_image_base64=True
     )
+def do_ocr(input_type, url, file, api_key):
+    # Check UI-provided API key first, then fall back to environment variable
+    api_key = api_key.strip() if api_key and api_key.strip() else os.environ.get("MISTRAL")
+    if not api_key:
+        return "Please provide a valid Mistral API key via the input field or set the MISTRAL environment variable.", "", []
     document_source = None
     if input_type == "URL":
         if file_extension in VALID_DOCUMENT_EXTENSIONS:
             with open(file.name, "rb") as f:
                 content = f.read()
+            signed_url = upload_pdf(content, os.path.basename(file_name), api_key)
             document_source = {"type": "document_url", "document_url": signed_url}
         elif file_extension in VALID_IMAGE_EXTENSIONS:
             img = Image.open(file)
             return f"Error: Unsupported file type. Supported types: {', '.join(VALID_DOCUMENT_EXTENSIONS | VALID_IMAGE_EXTENSIONS)}", "", []
     else:
+        return "Invalid input type.", "", []
+    try:
+        ocr_response = process_ocr(document_source, api_key)
+    except Exception as e:
+        return f"Error processing OCR: {str(e)}", "", []
     markdown_text = "\n\n".join(page.markdown for page in ocr_response.pages)
     extracted_text = markdown_text
     rendered_markdown = markdown_text
                 rendered_markdown += f"\n\n[Image Warning: No base64 data for {img.id}]"
     return extracted_text.strip(), rendered_markdown.strip(), images
 custom_css = """
+    body {font-family: 'Helvetica Neue', Helvetica;}
+    .gr-button {background-color: #4CAF50; color: white; border: none; padding: 10px ละpx; border-radius: 5px;}
     .gr-button:hover {background-color: #45a049;}
     .gr-textbox {margin-bottom: 15px;}
     .example-button {background-color: #1E90FF; color: white; border: none; padding: 8px 15px; border-radius: 5px; margin: 5px;}
     .tall-radio .gr-radio-item {padding: 15px 0; min-height: 50px; display: flex; align-items: center;}
     .tall-radio label {font-size: 16px;}
 """
 with gr.Blocks(
     title="Mistral OCR Demo",
     css=custom_css,
     theme=gr.themes.Soft()
 ) as demo:
     gr.Markdown("<h1 style='text-align: center; color: #333;'>Mistral OCR Demo</h1>")
+    gr.Markdown("<p style='text-align: center; color: #666;'>Extract text and images from PDFs or images using Mistral's latest OCR model. Visit <a href='https://console.mistral.ai/'>Mistral AI Console</a> to manage your API key. You can also see markdown live.</p>")
+    gr.Markdown("<p style='text-align: center; color: #666;'>Provide your Mistral API key below or set the MISTRAL environment variable.</p>")
     with gr.Row():
         with gr.Column(scale=1):
+            api_key_input = gr.Textbox(
+                label="Mistral API Key",
+                placeholder="Paste your Mistral API key here (or use MISTRAL env variable)",
+                type="password",
+                lines=1
+            )
             input_type = gr.Radio(
                 choices=["URL", "Upload file"],
                 label="Input Type",
             )
             url_input = gr.Textbox(
                 label="Document or Image URL",
+                placeholder="e E.g., https://arxiv.org/pdf/2501.12948",
                 visible=True,
                 lines=1
             )
     def update_visibility(choice):
         return gr.update(visible=(choice == "URL")), gr.update(visible=(choice == "Upload file"))
     input_type.change(fn=update_visibility, inputs=input_type, outputs=[url_input, file_input])
     def set_url_and_type(url):
         return url, "URL"
     submit_btn.click(
         fn=do_ocr,
+        inputs=[input_type, url_input, file_input, api_key_input],
         outputs=[cleaned_output, markdown_output, image_output]
     )
 if __name__ == "__main__":
+    demo.launch()