Kai Jennissen commited on
Commit
096b983
·
unverified ·
1 Parent(s): e74aed7

updated tools description

Browse files
Files changed (3) hide show
  1. agent.py +2 -1
  2. app.py +1 -1
  3. tools.py +25 -3
agent.py CHANGED
@@ -161,6 +161,7 @@ def get_agent():
161
  model_id="Qwen/Qwen2.5-VL-32B-Instruct", # provider="together"
162
  ),
163
  max_steps=3,
 
164
  name="Multimedia_Agent",
165
  description="An agent that can process and analyze images, audio, video, and other files. It needs to be provided with a valid URL to fetch the file.",
166
  verbosity_level=1,
@@ -192,7 +193,7 @@ def get_agent():
192
  model=OpenAIServerModel(model_id="gpt-4.1", temperature=0.1),
193
  max_steps=5,
194
  planning_interval=10,
195
- additional_authorized_imports=["pandas", "numpy"],
196
  verbosity_level=2,
197
  )
198
 
 
161
  model_id="Qwen/Qwen2.5-VL-32B-Instruct", # provider="together"
162
  ),
163
  max_steps=3,
164
+ additional_authorized_imports=["pandas", "numpy", "openpyxl"],
165
  name="Multimedia_Agent",
166
  description="An agent that can process and analyze images, audio, video, and other files. It needs to be provided with a valid URL to fetch the file.",
167
  verbosity_level=1,
 
193
  model=OpenAIServerModel(model_id="gpt-4.1", temperature=0.1),
194
  max_steps=5,
195
  planning_interval=10,
196
+ additional_authorized_imports=["pandas", "numpy", "openpyxl"],
197
  verbosity_level=2,
198
  )
199
 
app.py CHANGED
@@ -109,7 +109,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
109
  results_log = []
110
  answers_payload = []
111
  print(f"Running agent on {len(questions_data)} questions...")
112
- for item in questions_data[3:4]:
113
  task_id = item.get("task_id")
114
  question_text = item.get("question")
115
  if not task_id or question_text is None:
 
109
  results_log = []
110
  answers_payload = []
111
  print(f"Running agent on {len(questions_data)} questions...")
112
+ for item in questions_data:
113
  task_id = item.get("task_id")
114
  question_text = item.get("question")
115
  if not task_id or question_text is None:
tools.py CHANGED
@@ -551,13 +551,35 @@ def process_binary(response, filename, content_type):
551
  @tool
552
  def fetch_task_files(url: str) -> Dict[str, Any]:
553
  """
554
- Download files from a given URL.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
555
 
556
  Args:
557
- url (str): Direct URL to the file to download.
 
558
 
559
  Returns:
560
- dict: A dictionary containing file information and data in appropriate format for the file type
561
  """
562
  files_url = url
563
  print(f"Fetching file from: {files_url}")
 
551
  @tool
552
  def fetch_task_files(url: str) -> Dict[str, Any]:
553
  """
554
+ Download and process files from a given URL. This tool detects the file type and returns the data in a suitable format for further processing.
555
+
556
+ For different file types, this tool returns:
557
+
558
+ - Images: Returns a data_url that can be directly used with the read_image tool
559
+ Example: result = fetch_task_files(url); then use read_image(question, result["data_url"])
560
+
561
+ - Audio: Returns audio data that can be used with the transcribe_audio tool
562
+ Example: result = fetch_task_files(url); then use transcribe_audio(result["data_url"], result["filename"])
563
+
564
+ - Video: Returns frame extractions and a video buffer for processing with run_video
565
+ Example: result = fetch_task_files(url); you can access frames via result["frame_urls"]
566
+
567
+ - Tabular data (Excel/CSV): Returns parsed data as records, column names, and dimensions
568
+ Example: result = fetch_task_files(url); then analyze data with result["data"] and result["columns"]
569
+
570
+ - Text/Code: Returns the content as text for analysis
571
+ Example: result = fetch_task_files(url); then access text via result["content"]
572
+
573
+ - PDFs & other files: Returns encoded file data for processing
574
+
575
+ All responses include metadata like file_type, filename, and content_type to help determine how to handle the file.
576
 
577
  Args:
578
+ url (str): Direct URL to the file to download. For task files, construct using the API base URL
579
+ and the task ID (e.g., "https://agents-course-unit4-scoring.hf.space/files/{task_id}")
580
 
581
  Returns:
582
+ dict: A dictionary with file data and metadata structured for the specific file type
583
  """
584
  files_url = url
585
  print(f"Fetching file from: {files_url}")