merterbak commited on
Commit
6f73570
·
verified ·
1 Parent(s): 9f1a5c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -19
app.py CHANGED
@@ -6,27 +6,33 @@ from mistralai import Mistral
6
  from PIL import Image
7
  from pathlib import Path
8
 
9
- api_key = os.environ.get("MISTRAL")
10
- client = Mistral(api_key=api_key)
11
-
12
- #config
13
  VALID_DOCUMENT_EXTENSIONS = {".pdf"}
14
- VALID_IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png",}
15
 
16
- def upload_pdf(content, filename):
 
17
  uploaded_file = client.files.upload(
18
  file={"file_name": filename, "content": content},
19
  purpose="ocr",
20
  )
21
  signed_url = client.files.get_signed_url(file_id=uploaded_file.id)
22
  return signed_url.url
23
- def process_ocr(document_source):
 
 
24
  return client.ocr.process(
25
  model="mistral-ocr-latest",
26
  document=document_source,
27
  include_image_base64=True
28
  )
29
- def do_ocr(input_type, url=None, file=None):
 
 
 
 
 
 
30
  document_source = None
31
 
32
  if input_type == "URL":
@@ -46,7 +52,7 @@ def do_ocr(input_type, url=None, file=None):
46
  if file_extension in VALID_DOCUMENT_EXTENSIONS:
47
  with open(file.name, "rb") as f:
48
  content = f.read()
49
- signed_url = upload_pdf(content, os.path.basename(file_name))
50
  document_source = {"type": "document_url", "document_url": signed_url}
51
  elif file_extension in VALID_IMAGE_EXTENSIONS:
52
  img = Image.open(file)
@@ -58,9 +64,13 @@ def do_ocr(input_type, url=None, file=None):
58
  return f"Error: Unsupported file type. Supported types: {', '.join(VALID_DOCUMENT_EXTENSIONS | VALID_IMAGE_EXTENSIONS)}", "", []
59
 
60
  else:
61
- return "Invalid input type ", "", []
 
 
 
 
 
62
 
63
- ocr_response = process_ocr(document_source)
64
  markdown_text = "\n\n".join(page.markdown for page in ocr_response.pages)
65
  extracted_text = markdown_text
66
  rendered_markdown = markdown_text
@@ -86,10 +96,10 @@ def do_ocr(input_type, url=None, file=None):
86
  rendered_markdown += f"\n\n[Image Warning: No base64 data for {img.id}]"
87
 
88
  return extracted_text.strip(), rendered_markdown.strip(), images
89
-
90
  custom_css = """
91
- body {font-family: body {font-family: 'Helvetica Neue', Helvetica;}
92
- .gr-button {background-color: #4CAF50; color: white; border: none; padding: 10px 20px; border-radius: 5px;}
93
  .gr-button:hover {background-color: #45a049;}
94
  .gr-textbox {margin-bottom: 15px;}
95
  .example-button {background-color: #1E90FF; color: white; border: none; padding: 8px 15px; border-radius: 5px; margin: 5px;}
@@ -97,16 +107,24 @@ custom_css = """
97
  .tall-radio .gr-radio-item {padding: 15px 0; min-height: 50px; display: flex; align-items: center;}
98
  .tall-radio label {font-size: 16px;}
99
  """
 
100
  with gr.Blocks(
101
  title="Mistral OCR Demo",
102
  css=custom_css,
103
  theme=gr.themes.Soft()
104
  ) as demo:
105
  gr.Markdown("<h1 style='text-align: center; color: #333;'>Mistral OCR Demo</h1>")
106
- gr.Markdown("<p style='text-align: center; color: #666;'>Extract text and images from PDFs or images using Mistral's latest OCR model. You can also see markdown live.</p>")
 
107
 
108
  with gr.Row():
109
  with gr.Column(scale=1):
 
 
 
 
 
 
110
  input_type = gr.Radio(
111
  choices=["URL", "Upload file"],
112
  label="Input Type",
@@ -115,7 +133,7 @@ with gr.Blocks(
115
  )
116
  url_input = gr.Textbox(
117
  label="Document or Image URL",
118
- placeholder="e.g., https://arxiv.org/pdf/2501.12948",
119
  visible=True,
120
  lines=1
121
  )
@@ -135,7 +153,9 @@ with gr.Blocks(
135
 
136
  def update_visibility(choice):
137
  return gr.update(visible=(choice == "URL")), gr.update(visible=(choice == "Upload file"))
 
138
  input_type.change(fn=update_visibility, inputs=input_type, outputs=[url_input, file_input])
 
139
  def set_url_and_type(url):
140
  return url, "URL"
141
 
@@ -150,9 +170,9 @@ with gr.Blocks(
150
 
151
  submit_btn.click(
152
  fn=do_ocr,
153
- inputs=[input_type, url_input, file_input],
154
  outputs=[cleaned_output, markdown_output, image_output]
155
  )
156
-
157
  if __name__ == "__main__":
158
- demo.launch()
 
6
  from PIL import Image
7
  from pathlib import Path
8
 
9
+ # Config
 
 
 
10
  VALID_DOCUMENT_EXTENSIONS = {".pdf"}
11
+ VALID_IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png"}
12
 
13
+ def upload_pdf(content, filename, api_key):
14
+ client = Mistral(api_key=api_key)
15
  uploaded_file = client.files.upload(
16
  file={"file_name": filename, "content": content},
17
  purpose="ocr",
18
  )
19
  signed_url = client.files.get_signed_url(file_id=uploaded_file.id)
20
  return signed_url.url
21
+
22
+ def process_ocr(document_source, api_key):
23
+ client = Mistral(api_key=api_key)
24
  return client.ocr.process(
25
  model="mistral-ocr-latest",
26
  document=document_source,
27
  include_image_base64=True
28
  )
29
+
30
+ def do_ocr(input_type, url, file, api_key):
31
+ # Check UI-provided API key first, then fall back to environment variable
32
+ api_key = api_key.strip() if api_key and api_key.strip() else os.environ.get("MISTRAL")
33
+ if not api_key:
34
+ return "Please provide a valid Mistral API key via the input field or set the MISTRAL environment variable.", "", []
35
+
36
  document_source = None
37
 
38
  if input_type == "URL":
 
52
  if file_extension in VALID_DOCUMENT_EXTENSIONS:
53
  with open(file.name, "rb") as f:
54
  content = f.read()
55
+ signed_url = upload_pdf(content, os.path.basename(file_name), api_key)
56
  document_source = {"type": "document_url", "document_url": signed_url}
57
  elif file_extension in VALID_IMAGE_EXTENSIONS:
58
  img = Image.open(file)
 
64
  return f"Error: Unsupported file type. Supported types: {', '.join(VALID_DOCUMENT_EXTENSIONS | VALID_IMAGE_EXTENSIONS)}", "", []
65
 
66
  else:
67
+ return "Invalid input type.", "", []
68
+
69
+ try:
70
+ ocr_response = process_ocr(document_source, api_key)
71
+ except Exception as e:
72
+ return f"Error processing OCR: {str(e)}", "", []
73
 
 
74
  markdown_text = "\n\n".join(page.markdown for page in ocr_response.pages)
75
  extracted_text = markdown_text
76
  rendered_markdown = markdown_text
 
96
  rendered_markdown += f"\n\n[Image Warning: No base64 data for {img.id}]"
97
 
98
  return extracted_text.strip(), rendered_markdown.strip(), images
99
+
100
  custom_css = """
101
+ body {font-family: 'Helvetica Neue', Helvetica;}
102
+ .gr-button {background-color: #4CAF50; color: white; border: none; padding: 10px ละpx; border-radius: 5px;}
103
  .gr-button:hover {background-color: #45a049;}
104
  .gr-textbox {margin-bottom: 15px;}
105
  .example-button {background-color: #1E90FF; color: white; border: none; padding: 8px 15px; border-radius: 5px; margin: 5px;}
 
107
  .tall-radio .gr-radio-item {padding: 15px 0; min-height: 50px; display: flex; align-items: center;}
108
  .tall-radio label {font-size: 16px;}
109
  """
110
+
111
  with gr.Blocks(
112
  title="Mistral OCR Demo",
113
  css=custom_css,
114
  theme=gr.themes.Soft()
115
  ) as demo:
116
  gr.Markdown("<h1 style='text-align: center; color: #333;'>Mistral OCR Demo</h1>")
117
+ gr.Markdown("<p style='text-align: center; color: #666;'>Extract text and images from PDFs or images using Mistral's latest OCR model. Visit <a href='https://console.mistral.ai/'>Mistral AI Console</a> to manage your API key. You can also see markdown live.</p>")
118
+ gr.Markdown("<p style='text-align: center; color: #666;'>Provide your Mistral API key below or set the MISTRAL environment variable.</p>")
119
 
120
  with gr.Row():
121
  with gr.Column(scale=1):
122
+ api_key_input = gr.Textbox(
123
+ label="Mistral API Key",
124
+ placeholder="Paste your Mistral API key here (or use MISTRAL env variable)",
125
+ type="password",
126
+ lines=1
127
+ )
128
  input_type = gr.Radio(
129
  choices=["URL", "Upload file"],
130
  label="Input Type",
 
133
  )
134
  url_input = gr.Textbox(
135
  label="Document or Image URL",
136
+ placeholder="e E.g., https://arxiv.org/pdf/2501.12948",
137
  visible=True,
138
  lines=1
139
  )
 
153
 
154
  def update_visibility(choice):
155
  return gr.update(visible=(choice == "URL")), gr.update(visible=(choice == "Upload file"))
156
+
157
  input_type.change(fn=update_visibility, inputs=input_type, outputs=[url_input, file_input])
158
+
159
  def set_url_and_type(url):
160
  return url, "URL"
161
 
 
170
 
171
  submit_btn.click(
172
  fn=do_ocr,
173
+ inputs=[input_type, url_input, file_input, api_key_input],
174
  outputs=[cleaned_output, markdown_output, image_output]
175
  )
176
+
177
  if __name__ == "__main__":
178
+ demo.launch()