Kai Jennissen
commited on
added tools
Browse files
agent.py
CHANGED
@@ -3,7 +3,7 @@ from smolagents import (
|
|
3 |
CodeAgent,
|
4 |
DuckDuckGoSearchTool,
|
5 |
VisitWebpageTool,
|
6 |
-
|
7 |
OpenAIServerModel,
|
8 |
WikipediaSearchTool,
|
9 |
)
|
@@ -79,12 +79,26 @@ if Text('Accept cookies?').exists():
|
|
79 |
```<end_code>
|
80 |
"""
|
81 |
|
82 |
-
add_sys_prompt = """\n\
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
|
90 |
def initialize_tracing(enabled=True, provider="langfuse"):
|
@@ -134,19 +148,24 @@ def get_agent():
|
|
134 |
description="A web agent that can search the web and visit webpages.",
|
135 |
verbosity_level=1,
|
136 |
)
|
|
|
137 |
mm_agent = CodeAgent(
|
138 |
tools=[
|
|
|
139 |
read_image,
|
140 |
transcribe_audio,
|
141 |
read_code,
|
142 |
run_video,
|
143 |
],
|
144 |
-
model=
|
|
|
|
|
145 |
max_steps=3,
|
146 |
name="Multimedia_Agent",
|
147 |
-
description="An agent that can
|
148 |
verbosity_level=1,
|
149 |
)
|
|
|
150 |
|
151 |
# Initialize the model
|
152 |
# vlm = InferenceClientModel(model_id="Qwen/Qwen2.5-Vision-32B", provider="together")
|
@@ -168,16 +187,15 @@ def get_agent():
|
|
168 |
# Import helium for the agent
|
169 |
# Create manager agent
|
170 |
manager_agent = CodeAgent(
|
171 |
-
tools=[
|
172 |
-
managed_agents=[
|
173 |
model=OpenAIServerModel(model_id="gpt-4.1", temperature=0.1),
|
174 |
max_steps=5,
|
175 |
planning_interval=10,
|
176 |
additional_authorized_imports=["pandas", "numpy"],
|
177 |
-
verbosity_level=
|
178 |
)
|
179 |
|
180 |
-
manager_agent.prompt_templates["system_prompt"] += add_sys_prompt
|
181 |
return manager_agent
|
182 |
|
183 |
|
|
|
3 |
CodeAgent,
|
4 |
DuckDuckGoSearchTool,
|
5 |
VisitWebpageTool,
|
6 |
+
InferenceClientModel,
|
7 |
OpenAIServerModel,
|
8 |
WikipediaSearchTool,
|
9 |
)
|
|
|
79 |
```<end_code>
|
80 |
"""
|
81 |
|
82 |
+
add_sys_prompt = """\n\nWhen processing tasks with files:
|
83 |
+
|
84 |
+
1. Use the fetch_task_files tool with the URL provided to you to download and process files
|
85 |
+
2. Depending on the file type returned, use the appropriate specialized tool:
|
86 |
+
- For images: Use the data_url returned with read_image tool
|
87 |
+
- For audio: Use the audio data with transcribe_audio tool
|
88 |
+
- For code files: Use read_code tool
|
89 |
+
- For videos: Use run_video tool
|
90 |
+
|
91 |
+
3. When handling different file types:
|
92 |
+
- Images: The fetch_task_files tool will return a data_url you can use directly with read_image
|
93 |
+
- Code: Do not execute code files, analyze them as text
|
94 |
+
- Tabular data (CSV, Excel): Use pandas to analyze the data
|
95 |
+
- Videos: Extract relevant information from visual frames and audio
|
96 |
+
|
97 |
+
4. Keep answers concise and to the point. The answer is likely as simple as one word.
|
98 |
+
5. Make sure you provide the answer in accordance with the instruction provided in the question.
|
99 |
+
6. Do not return the raw result of tool calls as your final answer.
|
100 |
+
7. Do not add any additional information, explanation, unnecessary words or symbols.
|
101 |
+
"""
|
102 |
|
103 |
|
104 |
def initialize_tracing(enabled=True, provider="langfuse"):
|
|
|
148 |
description="A web agent that can search the web and visit webpages.",
|
149 |
verbosity_level=1,
|
150 |
)
|
151 |
+
|
152 |
mm_agent = CodeAgent(
|
153 |
tools=[
|
154 |
+
fetch_task_files,
|
155 |
read_image,
|
156 |
transcribe_audio,
|
157 |
read_code,
|
158 |
run_video,
|
159 |
],
|
160 |
+
model=InferenceClientModel(
|
161 |
+
model_id="Qwen/Qwen2.5-VL-32B-Instruct", # provider="together"
|
162 |
+
),
|
163 |
max_steps=3,
|
164 |
name="Multimedia_Agent",
|
165 |
+
description="An agent that can process and analyze images, audio, video, and other files. It needs to be provided with a valid URL to fetch the file.",
|
166 |
verbosity_level=1,
|
167 |
)
|
168 |
+
mm_agent.prompt_templates["system_prompt"] += add_sys_prompt
|
169 |
|
170 |
# Initialize the model
|
171 |
# vlm = InferenceClientModel(model_id="Qwen/Qwen2.5-Vision-32B", provider="together")
|
|
|
187 |
# Import helium for the agent
|
188 |
# Create manager agent
|
189 |
manager_agent = CodeAgent(
|
190 |
+
tools=[],
|
191 |
+
managed_agents=[mm_agent, web_agent],
|
192 |
model=OpenAIServerModel(model_id="gpt-4.1", temperature=0.1),
|
193 |
max_steps=5,
|
194 |
planning_interval=10,
|
195 |
additional_authorized_imports=["pandas", "numpy"],
|
196 |
+
verbosity_level=2,
|
197 |
)
|
198 |
|
|
|
199 |
return manager_agent
|
200 |
|
201 |
|
app.py
CHANGED
@@ -29,11 +29,13 @@ class BasicAgent:
|
|
29 |
|
30 |
# If task_id is provided, we'll include context about possible files
|
31 |
if task_id:
|
|
|
|
|
|
|
32 |
# Add context about files to the question
|
33 |
context = f"""Task ID: {task_id}
|
34 |
|
35 |
-
If
|
36 |
-
Example: fetch_task_files(task_id="{task_id}")
|
37 |
|
38 |
Question: {question}"""
|
39 |
|
|
|
29 |
|
30 |
# If task_id is provided, we'll include context about possible files
|
31 |
if task_id:
|
32 |
+
# API base URL for constructing file URLs
|
33 |
+
api_base_url = "https://agents-course-unit4-scoring.hf.space"
|
34 |
+
|
35 |
# Add context about files to the question
|
36 |
context = f"""Task ID: {task_id}
|
37 |
|
38 |
+
IMPORTANT: If the question mentions an image, file, or other media, construct the file URL using: {api_base_url}/files/{task_id}
|
|
|
39 |
|
40 |
Question: {question}"""
|
41 |
|
tools.py
CHANGED
@@ -549,18 +549,18 @@ def process_binary(response, filename, content_type):
|
|
549 |
|
550 |
|
551 |
@tool
|
552 |
-
def fetch_task_files(
|
553 |
"""
|
554 |
-
Download files
|
555 |
|
556 |
Args:
|
557 |
-
|
558 |
|
559 |
Returns:
|
560 |
dict: A dictionary containing file information and data in appropriate format for the file type
|
561 |
"""
|
562 |
-
|
563 |
-
|
564 |
|
565 |
try:
|
566 |
response = requests.get(files_url, timeout=15)
|
@@ -572,7 +572,7 @@ def fetch_task_files(task_id: str) -> Dict[str, Any]:
|
|
572 |
if "filename=" in filename:
|
573 |
filename = filename.split("filename=")[-1].strip('"')
|
574 |
else:
|
575 |
-
filename =
|
576 |
|
577 |
print(f"Received file: {filename}, type: {content_type}")
|
578 |
|
@@ -620,10 +620,10 @@ def fetch_task_files(task_id: str) -> Dict[str, Any]:
|
|
620 |
return process_binary(response, filename, content_type)
|
621 |
|
622 |
except requests.exceptions.RequestException as e:
|
623 |
-
print(f"Error fetching
|
624 |
return {"error": f"Error fetching files: {e}"}
|
625 |
except Exception as e:
|
626 |
-
print(f"An unexpected error occurred fetching files
|
627 |
return {"error": f"An unexpected error occurred: {e}"}
|
628 |
|
629 |
|
@@ -652,21 +652,23 @@ def search_wikipedia(query: str) -> str:
|
|
652 |
|
653 |
|
654 |
if __name__ == "__main__":
|
655 |
-
# Simple test for fetch_task_files
|
656 |
-
|
657 |
-
|
658 |
-
"
|
659 |
-
"
|
|
|
660 |
]
|
661 |
-
|
|
|
662 |
print(
|
663 |
"=" * 20
|
664 |
+ " "
|
665 |
-
+ f"Testing fetch_task_files with
|
666 |
+ " "
|
667 |
+ "=" * 20
|
668 |
)
|
669 |
|
670 |
-
result = fetch_task_files(
|
671 |
print(f"File type: {result.get('file_type')}")
|
672 |
print(f"Filename: {result.get('filename')}")
|
|
|
549 |
|
550 |
|
551 |
@tool
|
552 |
+
def fetch_task_files(url: str) -> Dict[str, Any]:
|
553 |
"""
|
554 |
+
Download files from a given URL.
|
555 |
|
556 |
Args:
|
557 |
+
url (str): Direct URL to the file to download.
|
558 |
|
559 |
Returns:
|
560 |
dict: A dictionary containing file information and data in appropriate format for the file type
|
561 |
"""
|
562 |
+
files_url = url
|
563 |
+
print(f"Fetching file from: {files_url}")
|
564 |
|
565 |
try:
|
566 |
response = requests.get(files_url, timeout=15)
|
|
|
572 |
if "filename=" in filename:
|
573 |
filename = filename.split("filename=")[-1].strip('"')
|
574 |
else:
|
575 |
+
filename = "file.bin" # Default filename
|
576 |
|
577 |
print(f"Received file: {filename}, type: {content_type}")
|
578 |
|
|
|
620 |
return process_binary(response, filename, content_type)
|
621 |
|
622 |
except requests.exceptions.RequestException as e:
|
623 |
+
print(f"Error fetching url: {files_url} - {e}")
|
624 |
return {"error": f"Error fetching files: {e}"}
|
625 |
except Exception as e:
|
626 |
+
print(f"An unexpected error occurred fetching files from url: {files_url}- {e}")
|
627 |
return {"error": f"An unexpected error occurred: {e}"}
|
628 |
|
629 |
|
|
|
652 |
|
653 |
|
654 |
if __name__ == "__main__":
|
655 |
+
# Simple test for fetch_task_files with direct URLs
|
656 |
+
api_base = "https://agents-course-unit4-scoring.hf.space"
|
657 |
+
test_urls = [
|
658 |
+
f"{api_base}/files/cca530fc-4052-43b2-b130-b30968d8aa44",
|
659 |
+
f"{api_base}/files/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
|
660 |
+
f"{api_base}/files/7bd855d8-463d-4ed5-93ca-5fe35145f733",
|
661 |
]
|
662 |
+
|
663 |
+
for url in test_urls:
|
664 |
print(
|
665 |
"=" * 20
|
666 |
+ " "
|
667 |
+
+ f"Testing fetch_task_files with URL: {url}"
|
668 |
+ " "
|
669 |
+ "=" * 20
|
670 |
)
|
671 |
|
672 |
+
result = fetch_task_files(url)
|
673 |
print(f"File type: {result.get('file_type')}")
|
674 |
print(f"Filename: {result.get('filename')}")
|