adil9858 commited on
Commit
34b887b
Β·
verified Β·
1 Parent(s): 4ba60cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +360 -196
app.py CHANGED
@@ -6,48 +6,81 @@ import io
6
  import fitz # PyMuPDF
7
  import tempfile
8
  import os
 
9
 
10
- # --- HELPER FUNCTIONS ---
11
- def convert_pdf_to_images(pdf_file):
12
- """Convert PDF to list of PIL Images"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  images = []
 
 
14
  try:
15
- # Save uploaded file to a temporary file
16
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
17
- tmp_file.write(pdf_file)
18
- tmp_file_path = tmp_file.name
19
 
20
- # Open the PDF file
21
- pdf_document = fitz.open(tmp_file_path)
22
 
23
- # Iterate through each page
24
- for page_num in range(len(pdf_document)):
25
  page = pdf_document.load_page(page_num)
26
- pix = page.get_pixmap()
 
27
  img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
28
  images.append(img)
29
-
30
- # Clean up
 
 
 
 
31
  pdf_document.close()
32
- os.unlink(tmp_file_path)
33
 
 
 
34
  except Exception as e:
35
- raise gr.Error(f"Error converting PDF: {e}")
36
- return images
 
 
 
37
 
38
  def image_to_base64(image):
39
  """Convert PIL Image to base64 string"""
 
 
 
 
40
  with io.BytesIO() as buffer:
 
41
  image.save(buffer, format="PNG")
42
  return base64.b64encode(buffer.getvalue()).decode("utf-8")
43
 
44
  def generate_summary(extracted_texts, api_key):
45
  """Generate a comprehensive summary of all extracted texts"""
 
 
 
46
  try:
47
- client = OpenAI(
48
- base_url="https://openrouter.ai/api/v1",
49
- api_key=api_key
50
- )
51
 
52
  summary_prompt = f"""
53
  You are an expert document analyst. Below are the extracted contents from multiple pages of a document.
@@ -58,226 +91,357 @@ def generate_summary(extracted_texts, api_key):
58
  4. Presents the information in a clear, structured format
59
 
60
  Extracted contents from pages:
 
61
  {extracted_texts}
 
62
 
63
  Comprehensive Summary:
64
  """
65
-
66
  response = client.chat.completions.create(
67
- model="opengvlab/internvl3-14b:free",
68
  messages=[
69
  {"role": "system", "content": "You are Dalton, an expert in analyzing and summarizing document contents."},
70
  {"role": "user", "content": summary_prompt}
71
  ],
72
- max_tokens=2048
73
  )
74
 
75
  return response.choices[0].message.content
76
-
77
  except Exception as e:
78
- raise gr.Error(f"Error generating summary: {e}")
 
79
 
80
- def analyze_document(api_key, user_prompt, uploaded_file):
81
- """Main processing function"""
82
- if not api_key:
83
- raise gr.Error("Please enter your OpenRouter API key")
84
-
85
- if uploaded_file is None:
86
- raise gr.Error("Please upload a document")
87
-
88
- images_to_analyze = []
89
- file_ext = os.path.splitext(uploaded_file.name)[1].lower()
90
-
91
- # Handle PDF or image
92
- if file_ext == '.pdf':
93
- with open(uploaded_file.name, "rb") as f:
94
- pdf_data = f.read()
95
- pdf_images = convert_pdf_to_images(pdf_data)
96
- images_to_analyze = pdf_images # For simplicity, using all pages
97
- else:
98
- image = Image.open(uploaded_file.name)
99
- images_to_analyze = [image]
100
-
101
- # Process each image
102
- all_results = []
103
- extracted_texts = []
104
-
105
- for idx, image in enumerate(images_to_analyze, 1):
 
 
 
106
  try:
107
- client = OpenAI(
108
- base_url="https://openrouter.ai/api/v1",
109
- api_key=api_key
110
- )
 
 
 
 
 
111
 
112
- image_base64 = image_to_base64(image)
 
 
113
 
114
- response = client.chat.completions.create(
115
- model="opengvlab/internvl3-14b:free",
116
- messages=[
117
- {"role": "system", "content": "You are Dalton, an expert in understanding images that can analyze images and provide detailed descriptions."},
118
- {"role": "user", "content": [
119
- {"type": "text", "text": user_prompt},
120
- {"type": "image_url", "image_url": {
121
- "url": f"data:image/png;base64,{image_base64}"
122
- }}
123
- ]}
124
- ],
125
- max_tokens=1024
126
- )
127
-
128
- result = response.choices[0].message.content
129
- extracted_texts.append(f"### Page {idx}\n{result}\n")
130
- all_results.append(f"## πŸ“„ Page {idx} Results\n{result}\n---\n")
131
 
132
  except Exception as e:
133
- raise gr.Error(f"Error analyzing page {idx}: {e}")
134
-
135
- # Generate summary if multiple pages
136
- markdown_output = "\n".join(all_results)
137
-
138
- if len(extracted_texts) > 1:
139
- summary = generate_summary("\n".join(extracted_texts), api_key)
140
- markdown_output += f"\n## πŸ“ Comprehensive Summary\n{summary}\n"
 
 
 
 
141
 
142
- # Add structured data section
143
-
144
-
145
-
 
 
 
 
 
146
 
147
- return markdown_output
148
-
149
- # Custom CSS for dark theme with green text
150
- custom_css = """
151
- :root {
152
- --primary: #00ff00;
153
- --primary-50: #00ff0033;
154
- --primary-100: #00ff0066;
155
- --primary-200: #00ff0099;
156
- --primary-300: #00ff00cc;
157
- --secondary: #00cc00;
158
- --secondary-50: #00cc0033;
159
- --secondary-100: #00cc0066;
160
- --secondary-200: #00cc0099;
161
- --secondary-300: #00cc00cc;
162
- --color-background-primary: #000000;
163
- --color-background-secondary: #111111;
164
- --color-background-tertiary: #222222;
165
- --text-color: #00ff00;
166
- --block-background-fill: #111111;
167
- --block-border-color: #00aa00;
168
- --block-label-text-color: #00ff00;
169
- --block-title-text-color: #00ff00;
170
- --input-background-fill: #111111;
171
- --input-border-color: #00aa00;
172
- --input-text-color: #00ff00;
173
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  body {
176
- background-color: var(--color-background-primary) !important;
177
- color: var(--text-color) !important;
178
  }
179
-
180
- .markdown-output {
 
181
  padding: 20px;
182
- border-radius: 8px;
183
- background: var(--color-background-secondary);
184
- border: 1px solid var(--block-border-color);
185
- max-height: 600px;
186
- overflow-y: auto;
187
- color: var(--text-color) !important;
188
  }
189
-
190
- .markdown-output h1,
191
- .markdown-output h2,
192
- .markdown-output h3 {
193
- color: var(--primary) !important;
194
- border-bottom: 1px solid var(--primary-300);
195
  }
196
-
197
- .markdown-output a {
198
- color: var(--secondary) !important;
 
199
  }
200
-
201
- .markdown-output code {
202
- background-color: var(--color-background-tertiary);
203
- color: var(--secondary);
 
 
204
  }
205
-
206
- .markdown-output pre {
207
- background-color: var(--color-background-tertiary) !important;
208
- border: 1px solid var(--block-border-color);
209
  }
210
-
211
- .markdown-output ul,
212
- .markdown-output ol {
213
- color: var(--text-color);
214
  }
215
-
216
- button {
217
- background: var(--primary) !important;
218
- color: black !important;
219
- font-weight: bold !important;
220
  }
221
-
222
- button:hover {
223
- background: var(--primary-300) !important;
 
 
224
  }
225
  """
226
 
227
- # Create dark theme
228
- dark_green_theme = gr.themes.Default(
229
- primary_hue="green",
230
- secondary_hue="green",
231
- neutral_hue="green",
232
- ).set(
233
- background_fill_primary="#000000",
234
- background_fill_secondary="#111111",
235
- block_background_fill="#111111",
236
- border_color_accent="#00aa00",
237
- block_label_text_color="#00ff00",
238
- body_text_color="#00ff00",
239
- button_primary_text_color="#000000",
240
- )
241
-
242
- # --- GRADIO INTERFACE ---
243
- with gr.Blocks(
244
- title="DocSum - Document Summarizer",
245
- theme=dark_green_theme,
246
- css=custom_css
247
- ) as demo:
248
- gr.Markdown("# 🧾 DocSum")
249
- gr.Markdown("Document Summarizer Powered by VLM β€’ Developed by [Koshur AI](https://koshurai.com)")
250
 
251
  with gr.Row():
252
- api_key = gr.Textbox(
253
- label="πŸ”‘ OpenRouter API Key",
254
- type="password",
255
- placeholder="Enter your OpenRouter API key"
256
- )
257
- user_prompt = gr.Textbox(
258
  label="πŸ“ Enter Your Prompt",
259
  value="Extract all content structurally",
260
- placeholder="What would you like to extract?"
 
 
 
261
  )
262
-
263
- uploaded_file = gr.File(
264
- label="Upload Document (PDF/Image)",
265
- file_types=[".pdf", ".jpg", ".jpeg", ".png"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  )
267
 
268
- submit_btn = gr.Button("πŸ” Analyze Document", variant="primary")
269
-
270
- # Markdown output with custom class
271
- output = gr.Markdown(
272
- label="Analysis Results",
273
- elem_classes=["markdown-output"]
 
 
 
 
 
 
 
 
 
 
274
  )
275
 
276
- submit_btn.click(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  fn=analyze_document,
278
- inputs=[api_key, user_prompt, uploaded_file],
279
- outputs=output
 
280
  )
 
 
 
 
 
 
 
 
 
 
281
 
 
282
  if __name__ == "__main__":
283
- demo.launch()
 
 
 
 
 
 
6
  import fitz # PyMuPDF
7
  import tempfile
8
  import os
9
+ import shutil # Added for cleaning up temp dirs
10
 
11
+ # --- OPENAI CLIENT SETUP ---
12
+ # Use environment variable or textbox for API key for better security in deployed apps
13
+ # client = OpenAI(
14
+ # base_url="https://openrouter.ai/api/v1",
15
+ # api_key=os.getenv("OPENROUTER_API_KEY") # Recommended approach
16
+ # )
17
+ # For this example, we'll get the key from the input field
18
+
19
+ def get_openai_client(api_key):
20
+ """Initializes and returns the OpenAI client."""
21
+ if not api_key:
22
+ # Handle case where API key is missing (though Gradio will likely prevent this)
23
+ raise ValueError("API key is required.")
24
+
25
+ return OpenAI(
26
+ base_url="https://openrouter.ai/api/v1",
27
+ api_key=api_key
28
+ )
29
+
30
+ def convert_pdf_to_images(pdf_path):
31
+ """Convert PDF file path to list of PIL Images and return the images,
32
+ and a list of temporary image file paths."""
33
  images = []
34
+ temp_image_paths = []
35
+ temp_dir = None
36
  try:
37
+ pdf_document = fitz.open(pdf_path)
38
+ num_pages = len(pdf_document)
 
 
39
 
40
+ # Create a temporary directory for images
41
+ temp_dir = tempfile.mkdtemp()
42
 
43
+ for page_num in range(num_pages):
 
44
  page = pdf_document.load_page(page_num)
45
+ # Render at a higher DPI for better clarity for VLM
46
+ pix = page.get_pixmap(dpi=300)
47
  img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
48
  images.append(img)
49
+
50
+ # Save image to temp directory for Gradio preview/processing later
51
+ temp_img_path = os.path.join(temp_dir, f"page_{page_num+1}.png")
52
+ img.save(temp_img_path, format="PNG")
53
+ temp_image_paths.append(temp_img_path)
54
+
55
  pdf_document.close()
 
56
 
57
+ return images, temp_image_paths, num_pages, temp_dir
58
+
59
  except Exception as e:
60
+ print(f"Error converting PDF: {e}")
61
+ # Clean up temp dir if it was created
62
+ if temp_dir and os.path.exists(temp_dir):
63
+ shutil.rmtree(temp_dir)
64
+ return [], [], 0, None
65
 
66
  def image_to_base64(image):
67
  """Convert PIL Image to base64 string"""
68
+ # Ensure image is RGB (some images might be RGBA, etc.)
69
+ if image.mode != 'RGB':
70
+ image = image.convert('RGB')
71
+
72
  with io.BytesIO() as buffer:
73
+ # Using PNG as it's lossless and well-supported
74
  image.save(buffer, format="PNG")
75
  return base64.b64encode(buffer.getvalue()).decode("utf-8")
76
 
77
  def generate_summary(extracted_texts, api_key):
78
  """Generate a comprehensive summary of all extracted texts"""
79
+ if not extracted_texts:
80
+ return "No content extracted to summarize."
81
+
82
  try:
83
+ client = get_openai_client(api_key)
 
 
 
84
 
85
  summary_prompt = f"""
86
  You are an expert document analyst. Below are the extracted contents from multiple pages of a document.
 
91
  4. Presents the information in a clear, structured format
92
 
93
  Extracted contents from pages:
94
+ ---
95
  {extracted_texts}
96
+ ---
97
 
98
  Comprehensive Summary:
99
  """
100
+
101
  response = client.chat.completions.create(
102
+ model="opengvlab/internvl3-14b:free", # Ensure this model is available via OpenRouter
103
  messages=[
104
  {"role": "system", "content": "You are Dalton, an expert in analyzing and summarizing document contents."},
105
  {"role": "user", "content": summary_prompt}
106
  ],
107
+ max_tokens=2048 # Adjust as needed
108
  )
109
 
110
  return response.choices[0].message.content
111
+
112
  except Exception as e:
113
+ print(f"Error generating summary: {e}")
114
+ return f"Error generating summary: {e}"
115
 
116
+ # --- Gradio App Functions ---
117
+
118
+ def process_upload(file_obj):
119
+ """Handle file upload - converts PDF, prepares image previews, and updates state."""
120
+ if file_obj is None:
121
+ # Clear outputs
122
+ return None, None, [], [], "Please upload a document.", None, None, None
123
+
124
+ file_path = file_obj.name # Gradio's File component provides a path
125
+ file_type = file_obj.orig_name.split('.')[-1].lower() # Get extension from original name
126
+
127
+ if file_type == "pdf":
128
+ images, temp_paths, num_pages, temp_dir = convert_pdf_to_images(file_path)
129
+ if not images:
130
+ return None, None, [], [], "Failed to convert PDF to images.", None, None, None
131
+
132
+ page_options = [f"Page {i}" for i in range(1, num_pages + 1)]
133
+ # By default select all pages
134
+ default_selection = page_options
135
+
136
+ # Store original PIL images and temp dir in state
137
+ # State will hold (list of PIL images, list of temp file paths, temp directory path)
138
+ images_state = (images, temp_paths, temp_dir)
139
+
140
+ status = f"PDF uploaded. {num_pages} pages detected. Select pages to analyze."
141
+ # Return selected pages (as names), image previews (as paths), page options, status
142
+ return images_state, default_selection, temp_paths, page_options, status, None, None, None # Also return None for results and summary
143
+
144
+ elif file_type in ["jpg", "jpeg", "png"]:
145
  try:
146
+ img = Image.open(file_path)
147
+ # Ensure it's RGB
148
+ if img.mode != 'RGB':
149
+ img = img.convert('RGB')
150
+
151
+ # Save to a temp file for Gradio preview
152
+ temp_dir = tempfile.mkdtemp()
153
+ temp_img_path = os.path.join(temp_dir, "uploaded_image.png")
154
+ img.save(temp_img_path, format="PNG")
155
 
156
+ # Store original PIL image and temp dir in state
157
+ # State will hold (list of PIL images, list of temp file paths, temp directory path)
158
+ images_state = ([img], [temp_img_path], temp_dir)
159
 
160
+ status = "Image uploaded."
161
+ # Return empty selection/options for image, but provide the single image path for preview
162
+ return images_state, [], [temp_img_path], [], status, None, None, None # Also return None for results and summary
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
  except Exception as e:
165
+ print(f"Error loading image: {e}")
166
+ # Clean up temp dir if created
167
+ if temp_dir and os.path.exists(temp_dir):
168
+ shutil.rmtree(temp_dir)
169
+ return None, None, [], [], f"Failed to load image: {e}", None, None, None
170
+ else:
171
+ return None, None, [], [], "Unsupported file type. Please upload JPG, PNG, or PDF.", None, None, None
172
+
173
+ def analyze_document(api_key, user_prompt, images_state, selected_page_names):
174
+ """Analyze selected images using the VLM and generate summary."""
175
+ if not api_key:
176
+ return None, None, "Please enter your Open Router API Key."
177
 
178
+ if not images_state or not images_state[0]: # Check if images_state exists and contains images
179
+ return None, None, "No document uploaded or converted."
180
+
181
+ all_pil_images = images_state[0]
182
+ temp_dir = images_state[2] # Get the temp directory path
183
+
184
+ images_to_analyze = []
185
+ extracted_texts = []
186
+ all_results = []
187
 
188
+ # Determine which images to process based on selection (or process all if image file)
189
+ if selected_page_names: # This indicates PDF and pages were selected
190
+ selected_indices = [int(name.split(" ")[1]) - 1 for name in selected_page_names]
191
+ images_to_analyze = [(idx + 1, all_pil_images[idx]) for idx in selected_indices if idx < len(all_pil_images)]
192
+ elif all_pil_images: # This indicates a single image file
193
+ images_to_analyze = [(1, all_pil_images[0])]
194
+
195
+ if not images_to_analyze:
196
+ # Clean up temp dir as analysis failed or no pages selected
197
+ if temp_dir and os.path.exists(temp_dir):
198
+ shutil.rmtree(temp_dir)
199
+ return None, None, "No pages selected for analysis."
200
+
201
+
202
+ try:
203
+ client = get_openai_client(api_key)
204
+
205
+ for page_num, image in images_to_analyze:
206
+ status_message = f"Analyzing page {page_num}..."
207
+ yield None, None, status_message # Update status message during processing
208
+
209
+ try:
210
+ image_base64_data = image_to_base64(image)
211
+
212
+ response = client.chat.completions.create(
213
+ model="opengvlab/internvl3-14b:free", # Ensure this model is available via OpenRouter
214
+ messages=[
215
+ {"role": "system", "content": "You are Dalton, an expert in understanding images that can analyze images and provide detailed descriptions."},
216
+ {"role": "user", "content": [
217
+ {"type": "text", "text": user_prompt},
218
+ {"type": "image_url", "image_url": {
219
+ "url": f"data:image/png;base64,{image_base64_data}"
220
+ }}
221
+ ]}
222
+ ],
223
+ max_tokens=1024 # Adjust as needed
224
+ )
225
+
226
+ result = response.choices[0].message.content
227
+ extracted_texts.append(f"=== Page {page_num} ===\n{result}\n")
228
+
229
+ if len(images_to_analyze) > 1:
230
+ all_results.append(f"### πŸ“„ Page {page_num} Result:")
231
+ else:
232
+ all_results.append("### βœ… Analysis Result:")
233
+ all_results.append(result)
234
+ all_results.append("---")
235
+
236
+ except Exception as e:
237
+ error_msg = f"An error occurred analyzing page {page_num}: {e}"
238
+ print(error_msg)
239
+ all_results.append(f"### ❌ Error on Page {page_num}:")
240
+ all_results.append(error_msg)
241
+ all_results.append("---")
242
+ # Don't stop, try other pages
243
+
244
+ # Combine individual results
245
+ individual_results_markdown = "\n".join(all_results) if all_results else "No results generated."
246
+
247
+ # Generate and display comprehensive summary if multiple pages were processed
248
+ summary_text = ""
249
+ if len(images_to_analyze) > 1 and extracted_texts:
250
+ yield individual_results_markdown, None, "Generating comprehensive summary..."
251
+ full_extracted_text = "\n".join(extracted_texts)
252
+ summary_text = generate_summary(full_extracted_text, api_key)
253
+ status_message = "Analysis complete. Summary generated."
254
+ elif extracted_texts: # Single page case
255
+ summary_text = "Summary not generated for single page analysis. See analysis result above."
256
+ status_message = "Analysis complete."
257
+ else:
258
+ summary_text = "No content extracted for summary."
259
+ status_message = "Analysis complete, but no text extracted."
260
+
261
+ # Clean up the temporary directory used for images
262
+ if temp_dir and os.path.exists(temp_dir):
263
+ shutil.rmtree(temp_dir)
264
+
265
+ return individual_results_markdown, summary_text, status_message
266
+
267
+ except Exception as e:
268
+ # Clean up the temporary directory in case of error
269
+ if temp_dir and os.path.exists(temp_dir):
270
+ shutil.rmtree(temp_dir)
271
+
272
+ error_msg = f"An unhandled error occurred during analysis: {e}"
273
+ print(error_msg)
274
+ return None, None, error_msg
275
 
276
+
277
+ # Function to clean up temp dir when session ends or is closed
278
+ def clean_temp_dir(temp_dir):
279
+ if temp_dir and os.path.exists(temp_dir):
280
+ print(f"Cleaning up temporary directory: {temp_dir}")
281
+ shutil.rmtree(temp_dir)
282
+
283
+
284
+ # --- Gradio Interface Layout ---
285
+
286
+ # Custom CSS (simplified from Streamlit CSS)
287
+ css = """
288
  body {
289
+ font-family: 'Inter', sans-serif;
 
290
  }
291
+ .gradio-container {
292
+ max-width: 800px !important;
293
+ margin: auto;
294
  padding: 20px;
295
+ background-color: #f9fafb; /* Light gray background */
 
 
 
 
 
296
  }
297
+ h1, h2, h3, h4 {
298
+ color: #111827; /* Darker text for headers */
 
 
 
 
299
  }
300
+ .subtitle {
301
+ font-size: 1rem;
302
+ color: #6b7280; /* Gray text for subtitle */
303
+ margin-bottom: 2rem;
304
  }
305
+ .summary-box {
306
+ background-color: #e0f2fe; /* Light blue background */
307
+ padding: 1.5rem;
308
+ border-radius: 8px;
309
+ margin-top: 1rem; /* Reduced margin-top */
310
+ border: 1px solid #bfdbfe; /* Light blue border */
311
  }
312
+ .summary-box p {
313
+ margin: 0; /* Remove paragraph margin */
 
 
314
  }
315
+ .file-upload-label .wrap {
316
+ text-align: center !important;
 
 
317
  }
318
+ .gr-button {
319
+ margin-top: 1rem !important;
 
 
 
320
  }
321
+ /* Style for the status message */
322
+ #status_message_id {
323
+ margin-top: 1rem;
324
+ font-weight: bold;
325
+ color: #1f2937;
326
  }
327
  """
328
 
329
+ with gr.Blocks(css=css, title="DocSum - Document Summarizer", theme=gr.themes.Soft()) as demo:
330
+
331
+ # State to hold images and temp paths after PDF conversion
332
+ # Structure: (list of PIL images, list of temp file paths for preview/analysis, temp directory path)
333
+ images_state = gr.State(None)
334
+ # State to hold the temp dir path for cleanup
335
+ current_temp_dir = gr.State(None)
336
+
337
+ gr.HTML("""
338
+ <div style="text-align: center;">
339
+ <img src='https://raw.githubusercontent.com/KoshurAI/DocSum/main/blob.png' width='100'>
340
+ <h1>DocSum</h1>
341
+ <p class="subtitle">Document Summarizer Powered by VLM β€’ Developed by <a href="https://koshurai.com" target="_blank">Koshur AI</a></p>
342
+ </div>
343
+ """)
 
 
 
 
 
 
 
 
344
 
345
  with gr.Row():
346
+ user_prompt_input = gr.Textbox(
 
 
 
 
 
347
  label="πŸ“ Enter Your Prompt",
348
  value="Extract all content structurally",
349
+ lines=2,
350
+ interactive=True,
351
+ container=True,
352
+ scale=2
353
  )
354
+ api_key_input = gr.Textbox(
355
+ label="πŸ”’ OpenRouter API Key",
356
+ type="password",
357
+ interactive=True,
358
+ container=True,
359
+ scale=1,
360
+ info="Your key is not stored."
361
+ # Consider adding value=os.getenv("OPENROUTER_API_KEY", "") for easier local testing
362
+ )
363
+
364
+ file_upload = gr.File(
365
+ label="Upload a document (JPG/PNG/PDF)",
366
+ file_types=[".jpg", ".jpeg", ".png", ".pdf"],
367
+ interactive=True
368
+ )
369
+
370
+ # Components for PDF page selection and preview (initially hidden)
371
+ page_selector = gr.Checkboxgroup(
372
+ label="Select PDF Pages to Analyze",
373
+ choices=[],
374
+ value=[],
375
+ visible=False,
376
+ interactive=True
377
+ )
378
+ preview_gallery = gr.Gallery(
379
+ label="Selected Page Previews",
380
+ visible=False,
381
+ container=True,
382
+ preview=True, # Show previews
383
+ columns=3,
384
+ rows=1,
385
+ object_fit="contain",
386
+ height="auto"
387
  )
388
 
389
+ status_message = gr.Markdown(elem_id="status_message_id") # Use a Markdown element for status updates
390
+
391
+ analyze_button = gr.Button("πŸ” Analyze Document")
392
+
393
+ # Outputs
394
+ individual_results_output = gr.Markdown(label="Page-by-Page Analysis Results")
395
+ summary_output = gr.Markdown(label="Comprehensive Document Summary", elem_classes="summary-box") # Apply CSS class
396
+
397
+ # --- Event Handling ---
398
+
399
+ # When a file is uploaded, process it (convert PDF, show previews, update state)
400
+ file_upload.change(
401
+ fn=process_upload,
402
+ inputs=[file_upload],
403
+ outputs=[images_state, page_selector, preview_gallery, page_selector.choices, status_message, individual_results_output, summary_output, current_temp_dir],
404
+ show_progress=True # Show Gradio's built-in progress indicator
405
  )
406
 
407
+ # When page selection changes (for PDF), update the preview gallery
408
+ # Note: This requires saving the temp image paths in the state from process_upload
409
+ page_selector.change(
410
+ fn=lambda selected_pages, images_state: [images_state[1][int(name.split(" ")[1]) - 1] for name in selected_pages] if images_state and images_state[1] else [],
411
+ inputs=[page_selector, images_state],
412
+ outputs=[preview_gallery],
413
+ show_progress=False # No need for progress bar here
414
+ ).then( # Chain another event to update status message
415
+ fn=lambda num_selected: f"{num_selected} pages selected." if num_selected > 0 else "No pages selected.",
416
+ inputs=[page_selector],
417
+ outputs=[status_message],
418
+ show_progress=False
419
+ )
420
+
421
+
422
+ # When the Analyze button is clicked, run the analysis function
423
+ analyze_button.click(
424
  fn=analyze_document,
425
+ inputs=[api_key_input, user_prompt_input, images_state, page_selector],
426
+ outputs=[individual_results_output, summary_output, status_message],
427
+ show_progress=False # We handle progress manually with status_message yield
428
  )
429
+
430
+ # --- Footer ---
431
+ gr.HTML("<footer style='text-align: center; margin-top: 3rem; color: #9ca3af; font-size: 0.875rem;'>Β© 2025 Koshur AI. All rights reserved.</footer>")
432
+
433
+ # Clean up temp directory when the Gradio app finishes or encounters a critical error
434
+ # Note: This might not catch all termination scenarios, especially if the server crashes unexpectedly.
435
+ # A more robust solution for production might involve monitoring temp dirs periodically.
436
+ # Using demo.load() to clean up at startup and demo.close() to clean up at exit.
437
+ demo.load(fn=lambda: clean_temp_dir(current_temp_dir.value), inputs=[], outputs=[], every=10, show_progress=False) # Check & cleanup periodically (adjust interval)
438
+ # The close event handler is tricky for cleanup; rely more on periodic check or OS cleanup.
439
 
440
+ # --- Launch App ---
441
  if __name__ == "__main__":
442
+ # The share=True option creates a public URL (useful for testing)
443
+ # The debug=True option provides more detailed error messages
444
+ demo.launch(share=False, debug=True)
445
+
446
+ # You might want to add cleanup here if running locally and not sharing
447
+ # clean_temp_dir(current_temp_dir.value) # This won't run if the app is killed externally