naman1102 commited on
Commit
f7505a2
·
1 Parent(s): 1939d2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -382
app.py CHANGED
@@ -39,10 +39,7 @@ client = InferenceClient(token=HF_TOKEN)
39
  # Constants
40
  # -------------------------
41
 
42
- SYSTEM = (
43
- "You are a parser-safe assistant.\n"
44
- "Output **ONLY** the JSON object requested—no extra words."
45
- )
46
 
47
  # -------------------------
48
  # Utility helpers
@@ -201,7 +198,6 @@ class BasicAgent:
201
  resp = self.llm.chat.completions.create(
202
  model="gpt-4.1",
203
  messages=[
204
- {"role": "system", "content": SYSTEM},
205
  {"role": "user", "content": prompt},
206
  ],
207
  temperature=0.3,
@@ -212,317 +208,51 @@ class BasicAgent:
212
  print(f"\nLLM Error: {str(e)}")
213
  raise
214
 
215
- def _safe_parse(self, raw: str) -> dict:
216
- """Fallback parser for when JSON parsing fails."""
217
- try:
218
- # Try to extract a dict-like structure
219
- match = re.search(r'\{.*\}', raw, re.DOTALL)
220
- if match:
221
- return ast.literal_eval(match.group(0))
222
- except:
223
- pass
224
- return {"needs_search": True, "search_query": ""}
225
-
226
- def _analyze_question(self, state: AgentState) -> AgentState:
227
- # Check for file attachments
228
- if state["file_url"]:
229
- file_type = self._detect_file_type(state["file_url"])
230
- if file_type == "video":
231
- state["current_step"] = "video"
232
- elif file_type == "image":
233
- state["current_step"] = "image"
234
- elif file_type in ["excel", "csv"]:
235
- state["current_step"] = "sheet"
236
- return state
237
-
238
- # Regular text question analysis
239
- prompt = (
240
- "Return ONLY valid JSON:\n"
241
- "{\"needs_search\": bool, \"search_query\": str}\n\n"
242
- f"Question: {state['question']}"
243
- )
244
- try:
245
- raw = self._call_llm(prompt)
246
- try:
247
- decision = json.loads(raw)
248
- except json.JSONDecodeError:
249
- print(f"JSON parse error, falling back to safe parse. Raw response: {raw}")
250
- decision = self._safe_parse(raw)
251
-
252
- state["needs_search"] = bool(decision.get("needs_search", False))
253
- state["search_query"] = decision.get("search_query", state["question"])
254
- except Exception as e:
255
- print(f"\nLLM Error in question analysis: {str(e)}")
256
- state["needs_search"] = True
257
- state["search_query"] = state["question"]
258
-
259
- state["current_step"] = "search" if state["needs_search"] else "answer"
260
- return state
261
-
262
- def _detect_file_type(self, url: str) -> str:
263
- """Detect file type from URL extension."""
264
- ext = url.split(".")[-1].lower()
265
- return {
266
- "mp4": "video",
267
- "jpg": "image",
268
- "jpeg": "image",
269
- "png": "image",
270
- "xlsx": "excel",
271
- "csv": "csv"
272
- }.get(ext, "unknown")
273
-
274
- def _image_node(self, state: AgentState) -> AgentState:
275
- """Handle image-based questions."""
276
- try:
277
- data = self._download_file(state["file_url"])
278
- answer = image_qa_bytes(data, "What is shown in this image?")
279
- state["history"].append({"step": "image", "output": answer})
280
- except Exception as e:
281
- state["logs"]["image_error"] = str(e)
282
- state["current_step"] = "answer"
283
- return state
284
-
285
- def _video_node(self, state: AgentState) -> AgentState:
286
- """Handle video-based questions."""
287
- try:
288
- data = self._download_file(state["file_url"])
289
- label = video_label_bytes(data)
290
- state["history"].append({"step": "video", "output": label})
291
- except Exception as e:
292
- state["logs"]["video_error"] = str(e)
293
- state["current_step"] = "answer"
294
- return state
295
-
296
- def _sheet_node(self, state: AgentState) -> AgentState:
297
- """Handle spreadsheet-based questions."""
298
- try:
299
- data = self._download_file(state["file_url"])
300
- answer = sheet_answer_bytes(data, state["file_url"])
301
- state["history"].append({"step": "sheet", "output": answer})
302
- except Exception as e:
303
- state["logs"]["sheet_error"] = str(e)
304
- state["current_step"] = "answer"
305
- return state
306
-
307
- def _perform_search(self, state: AgentState) -> AgentState:
308
- try:
309
- results = simple_search(state["search_query"], max_results=6)
310
- print("\nSearch Results:")
311
- for i, s in enumerate(results, 1):
312
- print(f"[{i}] {s[:120]}…")
313
-
314
- if not results:
315
- print("Warning: No search results found")
316
- state["needs_search"] = True
317
- else:
318
- state["needs_search"] = False
319
-
320
- state["history"].append({"step": "search", "results": results})
321
-
322
- except Exception as e:
323
- print(f"Search error: {str(e)}")
324
- state["needs_search"] = True
325
- state["history"].append({"step": "search", "error": str(e)})
326
-
327
- state["current_step"] = "answer"
328
- return state
329
-
330
- def _code_analysis_node(self, state: AgentState) -> AgentState:
331
- """Handle code analysis questions."""
332
- try:
333
- outputs = []
334
- for block in state["code_blocks"]:
335
- if block["language"].lower() == "python":
336
- result = run_python(block["code"]) # execute safely
337
- outputs.append(result)
338
- state["history"].append({"step": "code", "output": "\n".join(outputs)})
339
- except Exception as e:
340
- state["logs"]["code_error"] = str(e)
341
- state["current_step"] = "answer"
342
- return state
343
-
344
  def _generate_answer(self, state: AgentState) -> AgentState:
345
- # Collect all tool outputs with clear section headers
346
- materials = []
347
-
348
- # Add search results if any
349
- search_results = [h for h in state["history"] if h["step"] == "search"]
350
- if search_results:
351
- materials.append("=== Search Results ===")
352
- for result in search_results:
353
- for item in result.get("results", []):
354
- materials.append(item)
355
-
356
- # Add image analysis if any
357
- image_results = [h for h in state["history"] if h["step"] == "image"]
358
- if image_results:
359
- materials.append("=== Image Analysis ===")
360
- for result in image_results:
361
- materials.append(result.get("output", ""))
362
-
363
- # Add video analysis if any
364
- video_results = [h for h in state["history"] if h["step"] == "video"]
365
- if video_results:
366
- materials.append("=== Video Analysis ===")
367
- for result in video_results:
368
- materials.append(result.get("output", ""))
369
-
370
- # Add spreadsheet analysis if any
371
- sheet_results = [h for h in state["history"] if h["step"] == "sheet"]
372
- if sheet_results:
373
- materials.append("=== Spreadsheet Analysis ===")
374
- for result in sheet_results:
375
- materials.append(result.get("output", ""))
376
-
377
- # Join all materials with clear separation
378
- search_block = "\n\n".join(materials) if materials else "No materials available."
379
-
380
- # First attempt with full context
381
  prompt = f"""
382
- You are a helpful assistant. Your task is to answer the question using ONLY the materials provided.
383
- If you cannot find a direct answer, provide the most relevant information you can find.
384
 
385
  QUESTION:
386
  {state['question']}
387
 
388
- MATERIALS:
389
- {search_block}
390
-
391
  Return ONLY this exact JSON object:
392
  {{"ANSWER": "<answer text>"}}
393
  """
394
  try:
395
  raw = self._call_llm(prompt, 300)
396
- try:
397
- data = json.loads(raw)
398
- answer = data["ANSWER"]
399
- except (json.JSONDecodeError, KeyError):
400
- print("\nJSON parse error, trying direct prompt...")
401
- # If first attempt fails, try a more direct prompt
402
- direct_prompt = f"""
403
- Answer this question directly and concisely. Use the materials provided.
404
-
405
- QUESTION:
406
- {state['question']}
407
-
408
- MATERIALS:
409
- {search_block}
410
-
411
- Return ONLY this exact JSON object:
412
- {{"ANSWER": "<answer text>"}}
413
- """
414
- raw = self._call_llm(direct_prompt, 300)
415
- try:
416
- data = json.loads(raw)
417
- answer = data["ANSWER"]
418
- except (json.JSONDecodeError, KeyError):
419
- print("\nBoth attempts failed, using fallback answer...")
420
- if materials:
421
- # If we have materials but no answer, summarize what we know
422
- summary_prompt = f"""
423
- Summarize the key information from these materials in one sentence.
424
-
425
- MATERIALS:
426
- {search_block}
427
-
428
- Return ONLY this exact JSON object:
429
- {{"ANSWER": "<answer text>"}}
430
- """
431
- raw = self._call_llm(summary_prompt, 150)
432
- try:
433
- data = json.loads(raw)
434
- answer = data["ANSWER"]
435
- except (json.JSONDecodeError, KeyError):
436
- answer = "I cannot provide a definitive answer at this time."
437
- else:
438
- answer = "I cannot provide a definitive answer at this time."
439
-
440
  state["final_answer"] = answer
441
- state["current_step"] = "done"
442
-
443
  except Exception as e:
444
  print(f"\nLLM Error in answer generation: {str(e)}")
445
  state["final_answer"] = "I encountered an error while generating the answer."
446
- state["current_step"] = "done"
447
-
448
  return state
449
 
450
  def _build_workflow(self) -> Graph:
451
  sg = StateGraph(state_schema=AgentState)
452
-
453
- # Add nodes
454
- sg.add_node("analyze", self._analyze_question)
455
- sg.add_node("search", self._perform_search)
456
  sg.add_node("answer", self._generate_answer)
457
- sg.add_node("image", self._image_node)
458
- sg.add_node("video", self._video_node)
459
- sg.add_node("sheet", self._sheet_node)
460
- sg.add_node("code", self._code_analysis_node)
461
-
462
- # Add edges
463
- sg.add_edge("analyze", "search")
464
- sg.add_edge("analyze", "answer")
465
- sg.add_edge("search", "answer")
466
- sg.add_edge("image", "answer")
467
- sg.add_edge("video", "answer")
468
- sg.add_edge("sheet", "answer")
469
- sg.add_edge("code", "answer")
470
-
471
- def router(state: AgentState):
472
- return state["current_step"]
473
-
474
- sg.add_conditional_edges("analyze", router, {
475
- "search": "search",
476
- "answer": "answer",
477
- "image": "image",
478
- "video": "video",
479
- "sheet": "sheet",
480
- "code": "code"
481
- })
482
-
483
- sg.set_entry_point("analyze")
484
  sg.set_finish_point("answer")
485
  return sg.compile()
486
 
487
  def __call__(self, question: str, task_id: str = "unknown") -> str:
488
- # Parse question to get both text and file_url
489
- try:
490
- question_data = json.loads(question)
491
- state: AgentState = {
492
- "question": question_data.get("question", ""),
493
- "current_step": "analyze",
494
- "final_answer": "",
495
- "history": [],
496
- "needs_search": False,
497
- "search_query": "",
498
- "task_id": task_id,
499
- "logs": {},
500
- "file_url": question_data.get("file_url", ""),
501
- "code_blocks": question_data.get("code_blocks", [])
502
- }
503
- except (json.JSONDecodeError, KeyError) as e:
504
- print(f"Error parsing question data: {e}")
505
- state: AgentState = {
506
- "question": question,
507
- "current_step": "analyze",
508
- "final_answer": "",
509
- "history": [],
510
- "needs_search": False,
511
- "search_query": "",
512
- "task_id": task_id,
513
- "logs": {},
514
- "file_url": "",
515
- "code_blocks": []
516
- }
517
 
518
  final_state = self.workflow.invoke(state)
519
- return final_state["final_answer"] # Return the answer string directly, not JSON encoded
520
-
521
- def _download_file(self, url: str) -> bytes:
522
- """Download a file from a URL."""
523
- r = requests.get(url, timeout=30)
524
- r.raise_for_status()
525
- return r.content
526
 
527
  # ----------------------------------------------------------------------------------
528
  # Gradio Interface & Submission Routines
@@ -556,82 +286,46 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
556
  print(f"Error instantiating agent: {e}")
557
  return f"Error initializing agent: {e}", None
558
 
559
- # In the case of an app running as a hugging Face space, this link points toward your codebase
560
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
561
- print(f"Agent code location: {agent_code}")
562
-
563
- # 2. Fetch Questions with retry logic
564
  print(f"Fetching questions from: {questions_url}")
565
- max_retries = 3
566
- base_timeout = 30 # Increased from 15 to 30 seconds
567
-
568
- for attempt in range(max_retries):
569
- try:
570
- response = requests.get(
571
- questions_url,
572
- timeout=base_timeout * (attempt + 1), # Increase timeout with each retry
573
- headers={'User-Agent': 'Mozilla/5.0'} # Add user agent to avoid potential blocking
574
- )
575
- response.raise_for_status()
576
- questions_data = response.json()
577
- if not questions_data:
578
- print("Fetched questions list is empty.")
579
- return "Fetched questions list is empty or invalid format.", None
580
- print(f"Fetched {len(questions_data)} questions.")
581
- break # Success, exit retry loop
582
- except requests.exceptions.Timeout:
583
- if attempt < max_retries - 1:
584
- print(f"Timeout on attempt {attempt + 1}/{max_retries}. Retrying with longer timeout...")
585
- time.sleep(2 * (attempt + 1)) # Exponential backoff
586
- continue
587
- else:
588
- print("All retry attempts timed out.")
589
- return "Error: All attempts to fetch questions timed out. Please try again later.", None
590
- except requests.exceptions.RequestException as e:
591
- print(f"Error fetching questions: {e}")
592
- if attempt < max_retries - 1:
593
- print(f"Retrying... (attempt {attempt + 1}/{max_retries})")
594
- time.sleep(2 * (attempt + 1))
595
- continue
596
- return f"Error fetching questions after {max_retries} attempts: {e}", None
597
- except requests.exceptions.JSONDecodeError as e:
598
- print(f"Error decoding JSON response from questions endpoint: {e}")
599
- print(f"Response text: {response.text[:500]}")
600
- return f"Error decoding server response for questions: {e}", None
601
- except Exception as e:
602
- print(f"An unexpected error occurred fetching questions: {e}")
603
- return f"An unexpected error occurred fetching questions: {e}", None
604
 
605
- # 3. Run your Agent
606
  results_log = []
607
  answers_payload = []
608
- print(f"Running agent workflow on {len(questions_data)} questions...")
609
 
610
  for item in questions_data:
611
  task_id = item.get("task_id")
612
  if not task_id:
613
- print(f"Skipping item with missing task_id: {item}")
614
  continue
615
 
616
  try:
617
  print(f"\nProcessing question {task_id}...")
618
-
619
- # Pass the entire item as JSON string
620
- question_json = json.dumps(item)
621
- answer = agent(question_json, task_id)
622
 
623
  # Add to results
624
- answers_payload.append({"task_id": task_id, "submitted_answer": answer})
 
 
 
625
  results_log.append({
626
  "Task ID": task_id,
627
  "Question": item.get("question", ""),
628
  "Submitted Answer": answer
629
  })
630
 
631
- print(f"Completed question {task_id}")
632
-
633
  except Exception as e:
634
- print(f"Error running agent on task {task_id}: {e}")
635
  results_log.append({
636
  "Task ID": task_id,
637
  "Question": item.get("question", ""),
@@ -639,20 +333,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
639
  })
640
 
641
  if not answers_payload:
642
- print("Agent did not produce any answers to submit.")
643
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
644
 
645
- # 4. Prepare Submission
646
  submission_data = {
647
  "username": username.strip(),
648
- "agent_code": agent_code,
649
  "answers": answers_payload
650
  }
651
- status_update = f"Agent workflow finished. Submitting {len(answers_payload)} answers for user '{username}'..."
652
- print(status_update)
653
 
654
- # 5. Submit
655
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
656
  try:
657
  response = requests.post(submit_url, json=submission_data, timeout=60)
658
  response.raise_for_status()
@@ -664,36 +353,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
664
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
665
  f"Message: {result_data.get('message', 'No message received.')}"
666
  )
667
- print("Submission successful.")
668
- results_df = pd.DataFrame(results_log)
669
- return final_status, results_df
670
- except requests.exceptions.HTTPError as e:
671
- error_detail = f"Server responded with status {e.response.status_code}."
672
- try:
673
- error_json = e.response.json()
674
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
675
- except requests.exceptions.JSONDecodeError:
676
- error_detail += f" Response: {e.response.text[:500]}"
677
- status_message = f"Submission Failed: {error_detail}"
678
- print(status_message)
679
- results_df = pd.DataFrame(results_log)
680
- return status_message, results_df
681
- except requests.exceptions.Timeout:
682
- status_message = "Submission Failed: The request timed out."
683
- print(status_message)
684
- results_df = pd.DataFrame(results_log)
685
- return status_message, results_df
686
- except requests.exceptions.RequestException as e:
687
- status_message = f"Submission Failed: Network error - {e}"
688
- print(status_message)
689
- results_df = pd.DataFrame(results_log)
690
- return status_message, results_df
691
  except Exception as e:
692
- status_message = f"An unexpected error occurred during submission: {e}"
693
- print(status_message)
694
- results_df = pd.DataFrame(results_log)
695
- return status_message, results_df
696
-
697
 
698
  # --- Build Gradio Interface using Blocks ---
699
  with gr.Blocks() as demo:
 
39
  # Constants
40
  # -------------------------
41
 
42
+ # Remove SYSTEM constant as we're using JSON contract
 
 
 
43
 
44
  # -------------------------
45
  # Utility helpers
 
198
  resp = self.llm.chat.completions.create(
199
  model="gpt-4.1",
200
  messages=[
 
201
  {"role": "user", "content": prompt},
202
  ],
203
  temperature=0.3,
 
208
  print(f"\nLLM Error: {str(e)}")
209
  raise
210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  def _generate_answer(self, state: AgentState) -> AgentState:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  prompt = f"""
213
+ Answer this question using the materials provided.
 
214
 
215
  QUESTION:
216
  {state['question']}
217
 
 
 
 
218
  Return ONLY this exact JSON object:
219
  {{"ANSWER": "<answer text>"}}
220
  """
221
  try:
222
  raw = self._call_llm(prompt, 300)
223
+ data = json.loads(raw)
224
+ answer = data["ANSWER"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  state["final_answer"] = answer
 
 
226
  except Exception as e:
227
  print(f"\nLLM Error in answer generation: {str(e)}")
228
  state["final_answer"] = "I encountered an error while generating the answer."
229
+
230
+ state["current_step"] = "done"
231
  return state
232
 
233
  def _build_workflow(self) -> Graph:
234
  sg = StateGraph(state_schema=AgentState)
 
 
 
 
235
  sg.add_node("answer", self._generate_answer)
236
+ sg.set_entry_point("answer")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  sg.set_finish_point("answer")
238
  return sg.compile()
239
 
240
  def __call__(self, question: str, task_id: str = "unknown") -> str:
241
+ state: AgentState = {
242
+ "question": question,
243
+ "current_step": "answer",
244
+ "final_answer": "",
245
+ "history": [],
246
+ "needs_search": False,
247
+ "search_query": "",
248
+ "task_id": task_id,
249
+ "logs": {},
250
+ "file_url": "",
251
+ "code_blocks": []
252
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
 
254
  final_state = self.workflow.invoke(state)
255
+ return final_state["final_answer"]
 
 
 
 
 
 
256
 
257
  # ----------------------------------------------------------------------------------
258
  # Gradio Interface & Submission Routines
 
286
  print(f"Error instantiating agent: {e}")
287
  return f"Error initializing agent: {e}", None
288
 
289
+ # 2. Fetch Questions
 
 
 
 
290
  print(f"Fetching questions from: {questions_url}")
291
+ try:
292
+ response = requests.get(questions_url, timeout=30)
293
+ response.raise_for_status()
294
+ questions_data = response.json()
295
+ if not questions_data:
296
+ print("Fetched questions list is empty.")
297
+ return "Fetched questions list is empty or invalid format.", None
298
+ print(f"Fetched {len(questions_data)} questions.")
299
+ except Exception as e:
300
+ print(f"Error fetching questions: {e}")
301
+ return f"Error fetching questions: {e}", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
 
303
+ # 3. Run Agent and Collect Answers
304
  results_log = []
305
  answers_payload = []
 
306
 
307
  for item in questions_data:
308
  task_id = item.get("task_id")
309
  if not task_id:
 
310
  continue
311
 
312
  try:
313
  print(f"\nProcessing question {task_id}...")
314
+ answer = agent(item.get("question", ""), task_id)
 
 
 
315
 
316
  # Add to results
317
+ answers_payload.append({
318
+ "task_id": task_id,
319
+ "submitted_answer": answer # Plain string, not JSON encoded
320
+ })
321
  results_log.append({
322
  "Task ID": task_id,
323
  "Question": item.get("question", ""),
324
  "Submitted Answer": answer
325
  })
326
 
 
 
327
  except Exception as e:
328
+ print(f"Error processing task {task_id}: {e}")
329
  results_log.append({
330
  "Task ID": task_id,
331
  "Question": item.get("question", ""),
 
333
  })
334
 
335
  if not answers_payload:
336
+ return "No answers were generated.", pd.DataFrame(results_log)
 
337
 
338
+ # 4. Submit Answers
339
  submission_data = {
340
  "username": username.strip(),
341
+ "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
342
  "answers": answers_payload
343
  }
 
 
344
 
 
 
345
  try:
346
  response = requests.post(submit_url, json=submission_data, timeout=60)
347
  response.raise_for_status()
 
353
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
354
  f"Message: {result_data.get('message', 'No message received.')}"
355
  )
356
+ return final_status, pd.DataFrame(results_log)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  except Exception as e:
358
+ return f"Submission Failed: {str(e)}", pd.DataFrame(results_log)
 
 
 
 
359
 
360
  # --- Build Gradio Interface using Blocks ---
361
  with gr.Blocks() as demo: