File size: 7,040 Bytes
f9a7c9b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
from langchain.tools import Tool
import requests
import os
from PIL import Image
import io
import base64
from langchain_community.tools import DuckDuckGoSearchRun
from typing import Optional
import json
import PyPDF2
import tempfile
# Initialize web search tool
search_tool = DuckDuckGoSearchRun()
def web_search_tool_func(query: str) -> str:
"""Searches the web for information using DuckDuckGo."""
try:
results = search_tool.run(query)
return results
except Exception as e:
return f"Web search failed: {str(e)}"
web_search_tool = Tool(
name="web_search",
func=web_search_tool_func,
description="Searches the web for current information. Use this for factual questions, recent events, or when you need to find information not in your training data."
)
def file_download_tool_func(task_id: str) -> str:
"""Downloads a file associated with a GAIA task ID."""
try:
api_url = "https://agents-course-unit4-scoring.hf.space"
file_url = f"{api_url}/files/{task_id}"
response = requests.get(file_url, timeout=30)
response.raise_for_status()
# Save to temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".tmp") as temp_file:
temp_file.write(response.content)
temp_path = temp_file.name
# Try to determine file type and process accordingly
content_type = response.headers.get('content-type', '').lower()
if 'image' in content_type:
return f"Image file downloaded to {temp_path}. Use image_analysis_tool to analyze it."
elif 'pdf' in content_type:
return process_pdf_file(temp_path)
elif 'text' in content_type:
with open(temp_path, 'r', encoding='utf-8') as f:
content = f.read()
os.unlink(temp_path) # Clean up
return f"Text file content:\n{content}"
else:
return f"File downloaded to {temp_path}. Content type: {content_type}"
except Exception as e:
return f"Failed to download file for task {task_id}: {str(e)}"
def process_pdf_file(file_path: str) -> str:
"""Process a PDF file and extract text content."""
try:
with open(file_path, 'rb') as file:
pdf_reader = PyPDF2.PdfReader(file)
text_content = ""
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text_content += f"\n--- Page {page_num + 1} ---\n"
text_content += page.extract_text()
os.unlink(file_path) # Clean up
return f"PDF content extracted:\n{text_content}"
except Exception as e:
return f"Failed to process PDF: {str(e)}"
file_download_tool = Tool(
name="file_download",
func=file_download_tool_func,
description="Downloads and processes files associated with GAIA task IDs. Can handle images, PDFs, and text files."
)
def image_analysis_tool_func(image_path_or_description: str) -> str:
"""Analyzes images for GAIA questions. For now, returns a placeholder."""
# This is a simplified version - in a full implementation, you'd use a vision model
try:
if os.path.exists(image_path_or_description):
# Try to open and get basic info about the image
with Image.open(image_path_or_description) as img:
width, height = img.size
mode = img.mode
format_info = img.format
# Clean up the temporary file
os.unlink(image_path_or_description)
return f"Image analyzed: {width}x{height} pixels, mode: {mode}, format: {format_info}. Note: This is a basic analysis. For detailed image content analysis, a vision model would be needed."
else:
return f"Image analysis requested for: {image_path_or_description}. Note: Full image analysis requires a vision model integration."
except Exception as e:
return f"Image analysis failed: {str(e)}"
image_analysis_tool = Tool(
name="image_analysis",
func=image_analysis_tool_func,
description="Analyzes images to extract information. Use this for questions involving visual content."
)
def calculator_tool_func(expression: str) -> str:
"""Performs mathematical calculations safely."""
try:
# Basic safety check - only allow certain characters
allowed_chars = set('0123456789+-*/().= ')
if not all(c in allowed_chars for c in expression):
return f"Invalid characters in expression: {expression}"
# Use eval safely for basic math
result = eval(expression)
return f"Calculation result: {expression} = {result}"
except Exception as e:
return f"Calculation failed for '{expression}': {str(e)}"
calculator_tool = Tool(
name="calculator",
func=calculator_tool_func,
description="Performs mathematical calculations. Use this for numerical computations and math problems."
)
def text_processor_tool_func(text: str, operation: str = "summarize") -> str:
"""Processes text for various operations like summarization, extraction, etc."""
try:
if operation == "summarize":
# Simple summarization - take first and last sentences if long
sentences = text.split('.')
if len(sentences) > 5:
summary = '. '.join(sentences[:2] + sentences[-2:])
return f"Text summary: {summary}"
else:
return f"Text (short enough to not need summarization): {text}"
elif operation == "extract_numbers":
import re
numbers = re.findall(r'\d+(?:\.\d+)?', text)
return f"Numbers found in text: {numbers}"
elif operation == "extract_dates":
import re
# Simple date pattern matching
date_patterns = [
r'\d{1,2}/\d{1,2}/\d{4}', # MM/DD/YYYY
r'\d{4}-\d{1,2}-\d{1,2}', # YYYY-MM-DD
r'\b\w+ \d{1,2}, \d{4}\b' # Month DD, YYYY
]
dates = []
for pattern in date_patterns:
dates.extend(re.findall(pattern, text))
return f"Dates found in text: {dates}"
else:
return f"Text processing operation '{operation}' not supported. Available: summarize, extract_numbers, extract_dates"
except Exception as e:
return f"Text processing failed: {str(e)}"
text_processor_tool = Tool(
name="text_processor",
func=text_processor_tool_func,
description="Processes text for various operations like summarization, number extraction, date extraction. Specify operation as second parameter."
)
# List of all tools for easy import
agent_tools = [
web_search_tool,
file_download_tool,
image_analysis_tool,
calculator_tool,
text_processor_tool
]
|