Final_Assignment_Template

Running

App Files Files Community

naman1102 commited on 11 days ago

Commit

f7505a2

1 Parent(s): 1939d2d

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -382

app.py CHANGED Viewed

@@ -39,10 +39,7 @@ client = InferenceClient(token=HF_TOKEN)
 # Constants
 # -------------------------
-SYSTEM = (
-    "You are a parser-safe assistant.\n"
-    "Output **ONLY** the JSON object requested—no extra words."
-)
 # -------------------------
 # Utility helpers
@@ -201,7 +198,6 @@ class BasicAgent:
             resp = self.llm.chat.completions.create(
                 model="gpt-4.1",
                 messages=[
-                    {"role": "system", "content": SYSTEM},
                     {"role": "user", "content": prompt},
                 ],
                 temperature=0.3,
@@ -212,317 +208,51 @@ class BasicAgent:
             print(f"\nLLM Error: {str(e)}")
             raise
-    def _safe_parse(self, raw: str) -> dict:
-        """Fallback parser for when JSON parsing fails."""
-        try:
-            # Try to extract a dict-like structure
-            match = re.search(r'\{.*\}', raw, re.DOTALL)
-            if match:
-                return ast.literal_eval(match.group(0))
-        except:
-            pass
-        return {"needs_search": True, "search_query": ""}
-    def _analyze_question(self, state: AgentState) -> AgentState:
-        # Check for file attachments
-        if state["file_url"]:
-            file_type = self._detect_file_type(state["file_url"])
-            if file_type == "video":
-                state["current_step"] = "video"
-            elif file_type == "image":
-                state["current_step"] = "image"
-            elif file_type in ["excel", "csv"]:
-                state["current_step"] = "sheet"
-            return state
-        # Regular text question analysis
-        prompt = (
-            "Return ONLY valid JSON:\n"
-            "{\"needs_search\": bool, \"search_query\": str}\n\n"
-            f"Question: {state['question']}"
-        )
-        try:
-            raw = self._call_llm(prompt)
-            try:
-                decision = json.loads(raw)
-            except json.JSONDecodeError:
-                print(f"JSON parse error, falling back to safe parse. Raw response: {raw}")
-                decision = self._safe_parse(raw)
-            state["needs_search"] = bool(decision.get("needs_search", False))
-            state["search_query"] = decision.get("search_query", state["question"])
-        except Exception as e:
-            print(f"\nLLM Error in question analysis: {str(e)}")
-            state["needs_search"] = True
-            state["search_query"] = state["question"]
-        state["current_step"] = "search" if state["needs_search"] else "answer"
-        return state
-    def _detect_file_type(self, url: str) -> str:
-        """Detect file type from URL extension."""
-        ext = url.split(".")[-1].lower()
-        return {
-            "mp4": "video",
-            "jpg": "image",
-            "jpeg": "image",
-            "png": "image",
-            "xlsx": "excel",
-            "csv": "csv"
-        }.get(ext, "unknown")
-    def _image_node(self, state: AgentState) -> AgentState:
-        """Handle image-based questions."""
-        try:
-            data = self._download_file(state["file_url"])
-            answer = image_qa_bytes(data, "What is shown in this image?")
-            state["history"].append({"step": "image", "output": answer})
-        except Exception as e:
-            state["logs"]["image_error"] = str(e)
-        state["current_step"] = "answer"
-        return state
-    def _video_node(self, state: AgentState) -> AgentState:
-        """Handle video-based questions."""
-        try:
-            data = self._download_file(state["file_url"])
-            label = video_label_bytes(data)
-            state["history"].append({"step": "video", "output": label})
-        except Exception as e:
-            state["logs"]["video_error"] = str(e)
-        state["current_step"] = "answer"
-        return state
-    def _sheet_node(self, state: AgentState) -> AgentState:
-        """Handle spreadsheet-based questions."""
-        try:
-            data = self._download_file(state["file_url"])
-            answer = sheet_answer_bytes(data, state["file_url"])
-            state["history"].append({"step": "sheet", "output": answer})
-        except Exception as e:
-            state["logs"]["sheet_error"] = str(e)
-        state["current_step"] = "answer"
-        return state
-    def _perform_search(self, state: AgentState) -> AgentState:
-        try:
-            results = simple_search(state["search_query"], max_results=6)
-            print("\nSearch Results:")
-            for i, s in enumerate(results, 1):
-                print(f"[{i}] {s[:120]}…")
-            if not results:
-                print("Warning: No search results found")
-                state["needs_search"] = True
-            else:
-                state["needs_search"] = False
-            state["history"].append({"step": "search", "results": results})
-        except Exception as e:
-            print(f"Search error: {str(e)}")
-            state["needs_search"] = True
-            state["history"].append({"step": "search", "error": str(e)})
-        state["current_step"] = "answer"
-        return state
-    def _code_analysis_node(self, state: AgentState) -> AgentState:
-        """Handle code analysis questions."""
-        try:
-            outputs = []
-            for block in state["code_blocks"]:
-                if block["language"].lower() == "python":
-                    result = run_python(block["code"])   # execute safely
-                    outputs.append(result)
-            state["history"].append({"step": "code", "output": "\n".join(outputs)})
-        except Exception as e:
-            state["logs"]["code_error"] = str(e)
-        state["current_step"] = "answer"
-        return state
     def _generate_answer(self, state: AgentState) -> AgentState:
-        # Collect all tool outputs with clear section headers
-        materials = []
-        # Add search results if any
-        search_results = [h for h in state["history"] if h["step"] == "search"]
-        if search_results:
-            materials.append("=== Search Results ===")
-            for result in search_results:
-                for item in result.get("results", []):
-                    materials.append(item)
-        # Add image analysis if any
-        image_results = [h for h in state["history"] if h["step"] == "image"]
-        if image_results:
-            materials.append("=== Image Analysis ===")
-            for result in image_results:
-                materials.append(result.get("output", ""))
-        # Add video analysis if any
-        video_results = [h for h in state["history"] if h["step"] == "video"]
-        if video_results:
-            materials.append("=== Video Analysis ===")
-            for result in video_results:
-                materials.append(result.get("output", ""))
-        # Add spreadsheet analysis if any
-        sheet_results = [h for h in state["history"] if h["step"] == "sheet"]
-        if sheet_results:
-            materials.append("=== Spreadsheet Analysis ===")
-            for result in sheet_results:
-                materials.append(result.get("output", ""))
-        # Join all materials with clear separation
-        search_block = "\n\n".join(materials) if materials else "No materials available."
-        # First attempt with full context
         prompt = f"""
-You are a helpful assistant. Your task is to answer the question using ONLY the materials provided.
-If you cannot find a direct answer, provide the most relevant information you can find.
 QUESTION:
 {state['question']}
-MATERIALS:
-{search_block}
 Return ONLY this exact JSON object:
 {{"ANSWER": "<answer text>"}}
 """
         try:
             raw = self._call_llm(prompt, 300)
-            try:
-                data = json.loads(raw)
-                answer = data["ANSWER"]
-            except (json.JSONDecodeError, KeyError):
-                print("\nJSON parse error, trying direct prompt...")
-                # If first attempt fails, try a more direct prompt
-                direct_prompt = f"""
-Answer this question directly and concisely. Use the materials provided.
-QUESTION:
-{state['question']}
-MATERIALS:
-{search_block}
-Return ONLY this exact JSON object:
-{{"ANSWER": "<answer text>"}}
-"""
-                raw = self._call_llm(direct_prompt, 300)
-                try:
-                    data = json.loads(raw)
-                    answer = data["ANSWER"]
-                except (json.JSONDecodeError, KeyError):
-                    print("\nBoth attempts failed, using fallback answer...")
-                    if materials:
-                        # If we have materials but no answer, summarize what we know
-                        summary_prompt = f"""
-Summarize the key information from these materials in one sentence.
-MATERIALS:
-{search_block}
-Return ONLY this exact JSON object:
-{{"ANSWER": "<answer text>"}}
-"""
-                        raw = self._call_llm(summary_prompt, 150)
-                        try:
-                            data = json.loads(raw)
-                            answer = data["ANSWER"]
-                        except (json.JSONDecodeError, KeyError):
-                            answer = "I cannot provide a definitive answer at this time."
-                    else:
-                        answer = "I cannot provide a definitive answer at this time."
             state["final_answer"] = answer
-            state["current_step"] = "done"
         except Exception as e:
             print(f"\nLLM Error in answer generation: {str(e)}")
             state["final_answer"] = "I encountered an error while generating the answer."
-            state["current_step"] = "done"
         return state
     def _build_workflow(self) -> Graph:
         sg = StateGraph(state_schema=AgentState)
-        # Add nodes
-        sg.add_node("analyze", self._analyze_question)
-        sg.add_node("search", self._perform_search)
         sg.add_node("answer", self._generate_answer)
-        sg.add_node("image", self._image_node)
-        sg.add_node("video", self._video_node)
-        sg.add_node("sheet", self._sheet_node)
-        sg.add_node("code", self._code_analysis_node)
-        # Add edges
-        sg.add_edge("analyze", "search")
-        sg.add_edge("analyze", "answer")
-        sg.add_edge("search", "answer")
-        sg.add_edge("image", "answer")
-        sg.add_edge("video", "answer")
-        sg.add_edge("sheet", "answer")
-        sg.add_edge("code", "answer")
-        def router(state: AgentState):
-            return state["current_step"]
-        sg.add_conditional_edges("analyze", router, {
-            "search": "search",
-            "answer": "answer",
-            "image": "image",
-            "video": "video",
-            "sheet": "sheet",
-            "code": "code"
-        })
-        sg.set_entry_point("analyze")
         sg.set_finish_point("answer")
         return sg.compile()
     def __call__(self, question: str, task_id: str = "unknown") -> str:
-        # Parse question to get both text and file_url
-        try:
-            question_data = json.loads(question)
-            state: AgentState = {
-                "question": question_data.get("question", ""),
-                "current_step": "analyze",
-                "final_answer": "",
-                "history": [],
-                "needs_search": False,
-                "search_query": "",
-                "task_id": task_id,
-                "logs": {},
-                "file_url": question_data.get("file_url", ""),
-                "code_blocks": question_data.get("code_blocks", [])
-            }
-        except (json.JSONDecodeError, KeyError) as e:
-            print(f"Error parsing question data: {e}")
-            state: AgentState = {
-                "question": question,
-                "current_step": "analyze",
-                "final_answer": "",
-                "history": [],
-                "needs_search": False,
-                "search_query": "",
-                "task_id": task_id,
-                "logs": {},
-                "file_url": "",
-                "code_blocks": []
-            }
         final_state = self.workflow.invoke(state)
-        return final_state["final_answer"]  # Return the answer string directly, not JSON encoded
-    def _download_file(self, url: str) -> bytes:
-        """Download a file from a URL."""
-        r = requests.get(url, timeout=30)
-        r.raise_for_status()
-        return r.content
 # ----------------------------------------------------------------------------------
 # Gradio Interface & Submission Routines
@@ -556,82 +286,46 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
-    # In the case of an app running as a hugging Face space, this link points toward your codebase
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    print(f"Agent code location: {agent_code}")
-    # 2. Fetch Questions with retry logic
     print(f"Fetching questions from: {questions_url}")
-    max_retries = 3
-    base_timeout = 30  # Increased from 15 to 30 seconds
-    for attempt in range(max_retries):
-        try:
-            response = requests.get(
-                questions_url,
-                timeout=base_timeout * (attempt + 1),  # Increase timeout with each retry
-                headers={'User-Agent': 'Mozilla/5.0'}  # Add user agent to avoid potential blocking
-            )
-            response.raise_for_status()
-            questions_data = response.json()
-            if not questions_data:
-                print("Fetched questions list is empty.")
-                return "Fetched questions list is empty or invalid format.", None
-            print(f"Fetched {len(questions_data)} questions.")
-            break  # Success, exit retry loop
-        except requests.exceptions.Timeout:
-            if attempt < max_retries - 1:
-                print(f"Timeout on attempt {attempt + 1}/{max_retries}. Retrying with longer timeout...")
-                time.sleep(2 * (attempt + 1))  # Exponential backoff
-                continue
-            else:
-                print("All retry attempts timed out.")
-                return "Error: All attempts to fetch questions timed out. Please try again later.", None
-        except requests.exceptions.RequestException as e:
-            print(f"Error fetching questions: {e}")
-            if attempt < max_retries - 1:
-                print(f"Retrying... (attempt {attempt + 1}/{max_retries})")
-                time.sleep(2 * (attempt + 1))
-                continue
-            return f"Error fetching questions after {max_retries} attempts: {e}", None
-        except requests.exceptions.JSONDecodeError as e:
-            print(f"Error decoding JSON response from questions endpoint: {e}")
-            print(f"Response text: {response.text[:500]}")
-            return f"Error decoding server response for questions: {e}", None
-        except Exception as e:
-            print(f"An unexpected error occurred fetching questions: {e}")
-            return f"An unexpected error occurred fetching questions: {e}", None
-    # 3. Run your Agent
     results_log = []
     answers_payload = []
-    print(f"Running agent workflow on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
         if not task_id:
-            print(f"Skipping item with missing task_id: {item}")
             continue
         try:
             print(f"\nProcessing question {task_id}...")
-            # Pass the entire item as JSON string
-            question_json = json.dumps(item)
-            answer = agent(question_json, task_id)
             # Add to results
-            answers_payload.append({"task_id": task_id, "submitted_answer": answer})
             results_log.append({
                 "Task ID": task_id,
                 "Question": item.get("question", ""),
                 "Submitted Answer": answer
             })
-            print(f"Completed question {task_id}")
         except Exception as e:
-            print(f"Error running agent on task {task_id}: {e}")
             results_log.append({
                 "Task ID": task_id,
                 "Question": item.get("question", ""),
@@ -639,20 +333,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             })
     if not answers_payload:
-        print("Agent did not produce any answers to submit.")
-        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
     submission_data = {
         "username": username.strip(),
-        "agent_code": agent_code,
         "answers": answers_payload
     }
-    status_update = f"Agent workflow finished. Submitting {len(answers_payload)} answers for user '{username}'..."
-    print(status_update)
-    # 5. Submit
-    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
@@ -664,36 +353,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
             f"Message: {result_data.get('message', 'No message received.')}"
         )
-        print("Submission successful.")
-        results_df = pd.DataFrame(results_log)
-        return final_status, results_df
-    except requests.exceptions.HTTPError as e:
-        error_detail = f"Server responded with status {e.response.status_code}."
-        try:
-            error_json = e.response.json()
-            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
-        except requests.exceptions.JSONDecodeError:
-            error_detail += f" Response: {e.response.text[:500]}"
-        status_message = f"Submission Failed: {error_detail}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.Timeout:
-        status_message = "Submission Failed: The request timed out."
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.RequestException as e:
-        status_message = f"Submission Failed: Network error - {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
     except Exception as e:
-        status_message = f"An unexpected error occurred during submission: {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:

 # Constants
 # -------------------------
+# Remove SYSTEM constant as we're using JSON contract
 # -------------------------
 # Utility helpers
             resp = self.llm.chat.completions.create(
                 model="gpt-4.1",
                 messages=[
                     {"role": "user", "content": prompt},
                 ],
                 temperature=0.3,
             print(f"\nLLM Error: {str(e)}")
             raise
     def _generate_answer(self, state: AgentState) -> AgentState:
         prompt = f"""
+Answer this question using the materials provided.
 QUESTION:
 {state['question']}
 Return ONLY this exact JSON object:
 {{"ANSWER": "<answer text>"}}
 """
         try:
             raw = self._call_llm(prompt, 300)
+            data = json.loads(raw)
+            answer = data["ANSWER"]
             state["final_answer"] = answer
         except Exception as e:
             print(f"\nLLM Error in answer generation: {str(e)}")
             state["final_answer"] = "I encountered an error while generating the answer."
+        state["current_step"] = "done"
         return state
     def _build_workflow(self) -> Graph:
         sg = StateGraph(state_schema=AgentState)
         sg.add_node("answer", self._generate_answer)
+        sg.set_entry_point("answer")
         sg.set_finish_point("answer")
         return sg.compile()
     def __call__(self, question: str, task_id: str = "unknown") -> str:
+        state: AgentState = {
+            "question": question,
+            "current_step": "answer",
+            "final_answer": "",
+            "history": [],
+            "needs_search": False,
+            "search_query": "",
+            "task_id": task_id,
+            "logs": {},
+            "file_url": "",
+            "code_blocks": []
+        }
         final_state = self.workflow.invoke(state)
+        return final_state["final_answer"]
 # ----------------------------------------------------------------------------------
 # Gradio Interface & Submission Routines
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
+    # 2. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
+    try:
+        response = requests.get(questions_url, timeout=30)
+        response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+            print("Fetched questions list is empty.")
+            return "Fetched questions list is empty or invalid format.", None
+        print(f"Fetched {len(questions_data)} questions.")
+    except Exception as e:
+        print(f"Error fetching questions: {e}")
+        return f"Error fetching questions: {e}", None
+    # 3. Run Agent and Collect Answers
     results_log = []
     answers_payload = []
     for item in questions_data:
         task_id = item.get("task_id")
         if not task_id:
             continue
         try:
             print(f"\nProcessing question {task_id}...")
+            answer = agent(item.get("question", ""), task_id)
             # Add to results
+            answers_payload.append({
+                "task_id": task_id,
+                "submitted_answer": answer  # Plain string, not JSON encoded
+            })
             results_log.append({
                 "Task ID": task_id,
                 "Question": item.get("question", ""),
                 "Submitted Answer": answer
             })
         except Exception as e:
+            print(f"Error processing task {task_id}: {e}")
             results_log.append({
                 "Task ID": task_id,
                 "Question": item.get("question", ""),
             })
     if not answers_payload:
+        return "No answers were generated.", pd.DataFrame(results_log)
+    # 4. Submit Answers
     submission_data = {
         "username": username.strip(),
+        "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
         "answers": answers_payload
     }
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
             f"Message: {result_data.get('message', 'No message received.')}"
         )
+        return final_status, pd.DataFrame(results_log)
     except Exception as e:
+        return f"Submission Failed: {str(e)}", pd.DataFrame(results_log)
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo: