File size: 4,727 Bytes
f9a7c9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import requests
import json
from typing import List, Dict, Any

def fetch_questions(api_url: str = "https://agents-course-unit4-scoring.hf.space") -> List[Dict[str, Any]]:
    """Fetch all questions from the GAIA API."""
    try:
        response = requests.get(f"{api_url}/questions", timeout=15)
        response.raise_for_status()
        return response.json()
    except Exception as e:
        print(f"Error fetching questions: {e}")
        return []

def fetch_random_question(api_url: str = "https://agents-course-unit4-scoring.hf.space") -> Dict[str, Any]:
    """Fetch a random question from the GAIA API."""
    try:
        response = requests.get(f"{api_url}/random-question", timeout=15)
        response.raise_for_status()
        return response.json()
    except Exception as e:
        print(f"Error fetching random question: {e}")
        return {}

def submit_answers(username: str, agent_code: str, answers: List[Dict[str, str]], 
                  api_url: str = "https://agents-course-unit4-scoring.hf.space") -> Dict[str, Any]:
    """Submit answers to the GAIA API for scoring."""
    try:
        submission_data = {
            "username": username.strip(),
            "agent_code": agent_code,
            "answers": answers
        }
        
        response = requests.post(f"{api_url}/submit", json=submission_data, timeout=60)
        response.raise_for_status()
        return response.json()
    except Exception as e:
        print(f"Error submitting answers: {e}")
        return {"error": str(e)}

def format_gaia_answer(raw_answer: str) -> str:
    """Format the agent's raw answer for GAIA submission (exact match)."""
    # Remove common prefixes that might interfere with exact matching
    prefixes_to_remove = [
        "FINAL ANSWER:",
        "Final Answer:",
        "Answer:",
        "The answer is:",
        "The final answer is:",
    ]
    
    answer = raw_answer.strip()
    
    for prefix in prefixes_to_remove:
        if answer.startswith(prefix):
            answer = answer[len(prefix):].strip()
    
    # Remove trailing punctuation that might not be in ground truth
    while answer and answer[-1] in '.!?':
        answer = answer[:-1].strip()
    
    return answer

def analyze_question_type(question: str) -> Dict[str, bool]:
    """Analyze what capabilities a question might need."""
    question_lower = question.lower()
    
    analysis = {
        "needs_web_search": any(keyword in question_lower for keyword in [
            "current", "recent", "latest", "today", "now", "2024", "2023"
        ]),
        "needs_file_processing": "file" in question_lower or "document" in question_lower,
        "needs_calculation": any(keyword in question_lower for keyword in [
            "calculate", "compute", "sum", "total", "average", "percentage", "multiply", "divide"
        ]),
        "needs_image_analysis": any(keyword in question_lower for keyword in [
            "image", "picture", "photo", "visual", "shown", "displayed"
        ]),
        "needs_text_processing": any(keyword in question_lower for keyword in [
            "extract", "find in", "search for", "list", "count"
        ])
    }
    
    return analysis

def create_execution_plan(question: str, task_id: str = None) -> List[str]:
    """Create a step-by-step execution plan for a GAIA question."""
    analysis = analyze_question_type(question)
    plan = []
    
    # Always start with understanding the question
    plan.append("Analyze the question to understand what information is needed")
    
    # Add file processing if needed
    if task_id and analysis["needs_file_processing"]:
        plan.append(f"Download and process any files associated with task {task_id}")
    
    # Add web search if needed
    if analysis["needs_web_search"]:
        plan.append("Search the web for current/recent information")
    
    # Add image analysis if needed
    if analysis["needs_image_analysis"]:
        plan.append("Analyze any images for visual information")
    
    # Add calculation if needed
    if analysis["needs_calculation"]:
        plan.append("Perform necessary calculations")
    
    # Add text processing if needed
    if analysis["needs_text_processing"]:
        plan.append("Process and extract specific information from text")
    
    # Always end with synthesis
    plan.append("Synthesize all information to provide the final answer")
    
    return plan

def log_agent_step(step: str, result: str, step_number: int = None):
    """Log agent execution steps for debugging."""
    prefix = f"Step {step_number}: " if step_number else ""
    print(f"\n🤖 {prefix}{step}")
    print(f"📝 Result: {result[:200]}{'...' if len(result) > 200 else ''}")