File size: 7,040 Bytes
f9a7c9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
from langchain.tools import Tool
import requests
import os
from PIL import Image
import io
import base64
from langchain_community.tools import DuckDuckGoSearchRun
from typing import Optional
import json
import PyPDF2
import tempfile

# Initialize web search tool
search_tool = DuckDuckGoSearchRun()

def web_search_tool_func(query: str) -> str:
    """Searches the web for information using DuckDuckGo."""
    try:
        results = search_tool.run(query)
        return results
    except Exception as e:
        return f"Web search failed: {str(e)}"

web_search_tool = Tool(
    name="web_search",
    func=web_search_tool_func,
    description="Searches the web for current information. Use this for factual questions, recent events, or when you need to find information not in your training data."
)

def file_download_tool_func(task_id: str) -> str:
    """Downloads a file associated with a GAIA task ID."""
    try:
        api_url = "https://agents-course-unit4-scoring.hf.space"
        file_url = f"{api_url}/files/{task_id}"
        
        response = requests.get(file_url, timeout=30)
        response.raise_for_status()
        
        # Save to temporary file
        with tempfile.NamedTemporaryFile(delete=False, suffix=".tmp") as temp_file:
            temp_file.write(response.content)
            temp_path = temp_file.name
        
        # Try to determine file type and process accordingly
        content_type = response.headers.get('content-type', '').lower()
        
        if 'image' in content_type:
            return f"Image file downloaded to {temp_path}. Use image_analysis_tool to analyze it."
        elif 'pdf' in content_type:
            return process_pdf_file(temp_path)
        elif 'text' in content_type:
            with open(temp_path, 'r', encoding='utf-8') as f:
                content = f.read()
            os.unlink(temp_path)  # Clean up
            return f"Text file content:\n{content}"
        else:
            return f"File downloaded to {temp_path}. Content type: {content_type}"
            
    except Exception as e:
        return f"Failed to download file for task {task_id}: {str(e)}"

def process_pdf_file(file_path: str) -> str:
    """Process a PDF file and extract text content."""
    try:
        with open(file_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            text_content = ""
            
            for page_num in range(len(pdf_reader.pages)):
                page = pdf_reader.pages[page_num]
                text_content += f"\n--- Page {page_num + 1} ---\n"
                text_content += page.extract_text()
        
        os.unlink(file_path)  # Clean up
        return f"PDF content extracted:\n{text_content}"
    except Exception as e:
        return f"Failed to process PDF: {str(e)}"

file_download_tool = Tool(
    name="file_download",
    func=file_download_tool_func,
    description="Downloads and processes files associated with GAIA task IDs. Can handle images, PDFs, and text files."
)

def image_analysis_tool_func(image_path_or_description: str) -> str:
    """Analyzes images for GAIA questions. For now, returns a placeholder."""
    # This is a simplified version - in a full implementation, you'd use a vision model
    try:
        if os.path.exists(image_path_or_description):
            # Try to open and get basic info about the image
            with Image.open(image_path_or_description) as img:
                width, height = img.size
                mode = img.mode
                format_info = img.format
                
                # Clean up the temporary file
                os.unlink(image_path_or_description)
                
                return f"Image analyzed: {width}x{height} pixels, mode: {mode}, format: {format_info}. Note: This is a basic analysis. For detailed image content analysis, a vision model would be needed."
        else:
            return f"Image analysis requested for: {image_path_or_description}. Note: Full image analysis requires a vision model integration."
    except Exception as e:
        return f"Image analysis failed: {str(e)}"

image_analysis_tool = Tool(
    name="image_analysis",
    func=image_analysis_tool_func,
    description="Analyzes images to extract information. Use this for questions involving visual content."
)

def calculator_tool_func(expression: str) -> str:
    """Performs mathematical calculations safely."""
    try:
        # Basic safety check - only allow certain characters
        allowed_chars = set('0123456789+-*/().= ')
        if not all(c in allowed_chars for c in expression):
            return f"Invalid characters in expression: {expression}"
        
        # Use eval safely for basic math
        result = eval(expression)
        return f"Calculation result: {expression} = {result}"
    except Exception as e:
        return f"Calculation failed for '{expression}': {str(e)}"

calculator_tool = Tool(
    name="calculator",
    func=calculator_tool_func,
    description="Performs mathematical calculations. Use this for numerical computations and math problems."
)

def text_processor_tool_func(text: str, operation: str = "summarize") -> str:
    """Processes text for various operations like summarization, extraction, etc."""
    try:
        if operation == "summarize":
            # Simple summarization - take first and last sentences if long
            sentences = text.split('.')
            if len(sentences) > 5:
                summary = '. '.join(sentences[:2] + sentences[-2:])
                return f"Text summary: {summary}"
            else:
                return f"Text (short enough to not need summarization): {text}"
        
        elif operation == "extract_numbers":
            import re
            numbers = re.findall(r'\d+(?:\.\d+)?', text)
            return f"Numbers found in text: {numbers}"
        
        elif operation == "extract_dates":
            import re
            # Simple date pattern matching
            date_patterns = [
                r'\d{1,2}/\d{1,2}/\d{4}',  # MM/DD/YYYY
                r'\d{4}-\d{1,2}-\d{1,2}',  # YYYY-MM-DD
                r'\b\w+ \d{1,2}, \d{4}\b'  # Month DD, YYYY
            ]
            dates = []
            for pattern in date_patterns:
                dates.extend(re.findall(pattern, text))
            return f"Dates found in text: {dates}"
        
        else:
            return f"Text processing operation '{operation}' not supported. Available: summarize, extract_numbers, extract_dates"
            
    except Exception as e:
        return f"Text processing failed: {str(e)}"

text_processor_tool = Tool(
    name="text_processor",
    func=text_processor_tool_func,
    description="Processes text for various operations like summarization, number extraction, date extraction. Specify operation as second parameter."
)

# List of all tools for easy import
agent_tools = [
    web_search_tool,
    file_download_tool,
    image_analysis_tool,
    calculator_tool,
    text_processor_tool
]