Kai Jennissen commited on
Commit
e74aed7
·
unverified ·
1 Parent(s): 8102d4b

added tools

Browse files
Files changed (3) hide show
  1. agent.py +31 -13
  2. app.py +4 -2
  3. tools.py +18 -16
agent.py CHANGED
@@ -3,7 +3,7 @@ from smolagents import (
3
  CodeAgent,
4
  DuckDuckGoSearchTool,
5
  VisitWebpageTool,
6
- # InferenceClientModel,
7
  OpenAIServerModel,
8
  WikipediaSearchTool,
9
  )
@@ -79,12 +79,26 @@ if Text('Accept cookies?').exists():
79
  ```<end_code>
80
  """
81
 
82
- add_sys_prompt = """\n\nIf a file_url is available or an url is given in question statement, then request and use the content to answer the question. \
83
- If a code file, such as .py file, is given, do not attempt to execute it but rather open it as a text file and analyze the content. \
84
- When a tabluar file, such as csv, tsv, xlsx, is given, read it using pandas.
85
-
86
- Make sure you provide the answer in accordance with the instruction provided in the question. Do not return the result of tool as a final_answer.
87
- Do Not add any additional information, explanation, unnecessary words or symbols. The answer is likely as simple as one word."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
 
90
  def initialize_tracing(enabled=True, provider="langfuse"):
@@ -134,19 +148,24 @@ def get_agent():
134
  description="A web agent that can search the web and visit webpages.",
135
  verbosity_level=1,
136
  )
 
137
  mm_agent = CodeAgent(
138
  tools=[
 
139
  read_image,
140
  transcribe_audio,
141
  read_code,
142
  run_video,
143
  ],
144
- model=OpenAIServerModel(model_id="gpt-4.1", temperature=0.1),
 
 
145
  max_steps=3,
146
  name="Multimedia_Agent",
147
- description="An agent that can answer questions about all types of images, videos and speech. Needs to be provided with a valid url or an image.",
148
  verbosity_level=1,
149
  )
 
150
 
151
  # Initialize the model
152
  # vlm = InferenceClientModel(model_id="Qwen/Qwen2.5-Vision-32B", provider="together")
@@ -168,16 +187,15 @@ def get_agent():
168
  # Import helium for the agent
169
  # Create manager agent
170
  manager_agent = CodeAgent(
171
- tools=[fetch_task_files],
172
- managed_agents=[web_agent, mm_agent],
173
  model=OpenAIServerModel(model_id="gpt-4.1", temperature=0.1),
174
  max_steps=5,
175
  planning_interval=10,
176
  additional_authorized_imports=["pandas", "numpy"],
177
- verbosity_level=1,
178
  )
179
 
180
- manager_agent.prompt_templates["system_prompt"] += add_sys_prompt
181
  return manager_agent
182
 
183
 
 
3
  CodeAgent,
4
  DuckDuckGoSearchTool,
5
  VisitWebpageTool,
6
+ InferenceClientModel,
7
  OpenAIServerModel,
8
  WikipediaSearchTool,
9
  )
 
79
  ```<end_code>
80
  """
81
 
82
+ add_sys_prompt = """\n\nWhen processing tasks with files:
83
+
84
+ 1. Use the fetch_task_files tool with the URL provided to you to download and process files
85
+ 2. Depending on the file type returned, use the appropriate specialized tool:
86
+ - For images: Use the data_url returned with read_image tool
87
+ - For audio: Use the audio data with transcribe_audio tool
88
+ - For code files: Use read_code tool
89
+ - For videos: Use run_video tool
90
+
91
+ 3. When handling different file types:
92
+ - Images: The fetch_task_files tool will return a data_url you can use directly with read_image
93
+ - Code: Do not execute code files, analyze them as text
94
+ - Tabular data (CSV, Excel): Use pandas to analyze the data
95
+ - Videos: Extract relevant information from visual frames and audio
96
+
97
+ 4. Keep answers concise and to the point. The answer is likely as simple as one word.
98
+ 5. Make sure you provide the answer in accordance with the instruction provided in the question.
99
+ 6. Do not return the raw result of tool calls as your final answer.
100
+ 7. Do not add any additional information, explanation, unnecessary words or symbols.
101
+ """
102
 
103
 
104
  def initialize_tracing(enabled=True, provider="langfuse"):
 
148
  description="A web agent that can search the web and visit webpages.",
149
  verbosity_level=1,
150
  )
151
+
152
  mm_agent = CodeAgent(
153
  tools=[
154
+ fetch_task_files,
155
  read_image,
156
  transcribe_audio,
157
  read_code,
158
  run_video,
159
  ],
160
+ model=InferenceClientModel(
161
+ model_id="Qwen/Qwen2.5-VL-32B-Instruct", # provider="together"
162
+ ),
163
  max_steps=3,
164
  name="Multimedia_Agent",
165
+ description="An agent that can process and analyze images, audio, video, and other files. It needs to be provided with a valid URL to fetch the file.",
166
  verbosity_level=1,
167
  )
168
+ mm_agent.prompt_templates["system_prompt"] += add_sys_prompt
169
 
170
  # Initialize the model
171
  # vlm = InferenceClientModel(model_id="Qwen/Qwen2.5-Vision-32B", provider="together")
 
187
  # Import helium for the agent
188
  # Create manager agent
189
  manager_agent = CodeAgent(
190
+ tools=[],
191
+ managed_agents=[mm_agent, web_agent],
192
  model=OpenAIServerModel(model_id="gpt-4.1", temperature=0.1),
193
  max_steps=5,
194
  planning_interval=10,
195
  additional_authorized_imports=["pandas", "numpy"],
196
+ verbosity_level=2,
197
  )
198
 
 
199
  return manager_agent
200
 
201
 
app.py CHANGED
@@ -29,11 +29,13 @@ class BasicAgent:
29
 
30
  # If task_id is provided, we'll include context about possible files
31
  if task_id:
 
 
 
32
  # Add context about files to the question
33
  context = f"""Task ID: {task_id}
34
 
35
- If you need files for this task, you can use the fetch_task_files tool with the task_id.
36
- Example: fetch_task_files(task_id="{task_id}")
37
 
38
  Question: {question}"""
39
 
 
29
 
30
  # If task_id is provided, we'll include context about possible files
31
  if task_id:
32
+ # API base URL for constructing file URLs
33
+ api_base_url = "https://agents-course-unit4-scoring.hf.space"
34
+
35
  # Add context about files to the question
36
  context = f"""Task ID: {task_id}
37
 
38
+ IMPORTANT: If the question mentions an image, file, or other media, construct the file URL using: {api_base_url}/files/{task_id}
 
39
 
40
  Question: {question}"""
41
 
tools.py CHANGED
@@ -549,18 +549,18 @@ def process_binary(response, filename, content_type):
549
 
550
 
551
  @tool
552
- def fetch_task_files(task_id: str) -> Dict[str, Any]:
553
  """
554
- Download files associated with a specific task from the API.
555
 
556
  Args:
557
- task_id (str): The Task-ID of the task to download files for.
558
 
559
  Returns:
560
  dict: A dictionary containing file information and data in appropriate format for the file type
561
  """
562
- api_base_url: str = "https://agents-course-unit4-scoring.hf.space"
563
- files_url = f"{api_base_url}/files/{task_id}"
564
 
565
  try:
566
  response = requests.get(files_url, timeout=15)
@@ -572,7 +572,7 @@ def fetch_task_files(task_id: str) -> Dict[str, Any]:
572
  if "filename=" in filename:
573
  filename = filename.split("filename=")[-1].strip('"')
574
  else:
575
- filename = f"{task_id}.bin" # Default filename
576
 
577
  print(f"Received file: {filename}, type: {content_type}")
578
 
@@ -620,10 +620,10 @@ def fetch_task_files(task_id: str) -> Dict[str, Any]:
620
  return process_binary(response, filename, content_type)
621
 
622
  except requests.exceptions.RequestException as e:
623
- print(f"Error fetching files for task {task_id}: {e}")
624
  return {"error": f"Error fetching files: {e}"}
625
  except Exception as e:
626
- print(f"An unexpected error occurred fetching files for task {task_id}: {e}")
627
  return {"error": f"An unexpected error occurred: {e}"}
628
 
629
 
@@ -652,21 +652,23 @@ def search_wikipedia(query: str) -> str:
652
 
653
 
654
  if __name__ == "__main__":
655
- # Simple test for fetch_task_files
656
- task_ids = [
657
- "cca530fc-4052-43b2-b130-b30968d8aa44",
658
- "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
659
- "7bd855d8-463d-4ed5-93ca-5fe35145f733",
 
660
  ]
661
- for task_id in task_ids:
 
662
  print(
663
  "=" * 20
664
  + " "
665
- + f"Testing fetch_task_files with task_id: {task_id}"
666
  + " "
667
  + "=" * 20
668
  )
669
 
670
- result = fetch_task_files(task_id)
671
  print(f"File type: {result.get('file_type')}")
672
  print(f"Filename: {result.get('filename')}")
 
549
 
550
 
551
  @tool
552
+ def fetch_task_files(url: str) -> Dict[str, Any]:
553
  """
554
+ Download files from a given URL.
555
 
556
  Args:
557
+ url (str): Direct URL to the file to download.
558
 
559
  Returns:
560
  dict: A dictionary containing file information and data in appropriate format for the file type
561
  """
562
+ files_url = url
563
+ print(f"Fetching file from: {files_url}")
564
 
565
  try:
566
  response = requests.get(files_url, timeout=15)
 
572
  if "filename=" in filename:
573
  filename = filename.split("filename=")[-1].strip('"')
574
  else:
575
+ filename = "file.bin" # Default filename
576
 
577
  print(f"Received file: {filename}, type: {content_type}")
578
 
 
620
  return process_binary(response, filename, content_type)
621
 
622
  except requests.exceptions.RequestException as e:
623
+ print(f"Error fetching url: {files_url} - {e}")
624
  return {"error": f"Error fetching files: {e}"}
625
  except Exception as e:
626
+ print(f"An unexpected error occurred fetching files from url: {files_url}- {e}")
627
  return {"error": f"An unexpected error occurred: {e}"}
628
 
629
 
 
652
 
653
 
654
  if __name__ == "__main__":
655
+ # Simple test for fetch_task_files with direct URLs
656
+ api_base = "https://agents-course-unit4-scoring.hf.space"
657
+ test_urls = [
658
+ f"{api_base}/files/cca530fc-4052-43b2-b130-b30968d8aa44",
659
+ f"{api_base}/files/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
660
+ f"{api_base}/files/7bd855d8-463d-4ed5-93ca-5fe35145f733",
661
  ]
662
+
663
+ for url in test_urls:
664
  print(
665
  "=" * 20
666
  + " "
667
+ + f"Testing fetch_task_files with URL: {url}"
668
  + " "
669
  + "=" * 20
670
  )
671
 
672
+ result = fetch_task_files(url)
673
  print(f"File type: {result.get('file_type')}")
674
  print(f"Filename: {result.get('filename')}")