|
""" |
|
GAIA Sample Tasks for Testing the AI Agent |
|
|
|
This file contains sample tasks from the GAIA benchmark categories |
|
to test the agent's capabilities across different skills. |
|
""" |
|
|
|
|
|
GAIA_SAMPLE_TASKS = [ |
|
|
|
{ |
|
"category": "reasoning", |
|
"difficulty": "easy", |
|
"task": "If a train travels at 60 miles per hour, how far will it travel in 2.5 hours?" |
|
}, |
|
{ |
|
"category": "reasoning", |
|
"difficulty": "medium", |
|
"task": "A store is having a 30% off sale. If an item originally costs $85, what is the sale price? Additionally, if there's a 8% sales tax, what is the final price?" |
|
}, |
|
{ |
|
"category": "reasoning", |
|
"difficulty": "hard", |
|
"task": "In a class of 30 students, 40% are boys. If 3 more girls join the class, what percentage of the class will be boys?" |
|
}, |
|
|
|
|
|
{ |
|
"category": "web_search", |
|
"difficulty": "easy", |
|
"task": "What is the capital of Japan and what is its population?" |
|
}, |
|
{ |
|
"category": "web_search", |
|
"difficulty": "medium", |
|
"task": "Who won the Nobel Prize in Physics in 2023? What was their contribution?" |
|
}, |
|
{ |
|
"category": "web_search", |
|
"difficulty": "hard", |
|
"task": "Compare and contrast the climate policies of the United States and the European Union. What are the key differences in their approaches to reducing carbon emissions?" |
|
}, |
|
|
|
|
|
{ |
|
"category": "multimodal", |
|
"difficulty": "easy", |
|
"task": "Analyze this image URL and describe what you see: https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg/800px-Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg" |
|
}, |
|
{ |
|
"category": "multimodal", |
|
"difficulty": "medium", |
|
"task": "Look at this chart image and explain the trend: https://upload.wikimedia.org/wikipedia/commons/thumb/5/51/Global-surface-temperature.svg/1200px-Global-surface-temperature.svg.png" |
|
}, |
|
|
|
|
|
{ |
|
"category": "tool_usage", |
|
"difficulty": "easy", |
|
"task": "Write a Python function to calculate the factorial of a number, then use it to find the factorial of 5." |
|
}, |
|
{ |
|
"category": "tool_usage", |
|
"difficulty": "medium", |
|
"task": "Create a Python script that fetches the current weather for New York City using a weather API and displays the temperature, humidity, and weather conditions." |
|
}, |
|
{ |
|
"category": "tool_usage", |
|
"difficulty": "hard", |
|
"task": "Write a Python script that analyzes a text file containing a list of numbers (one per line), calculates the mean, median, mode, and standard deviation, and creates a histogram visualization of the data." |
|
}, |
|
|
|
|
|
{ |
|
"category": "combined", |
|
"difficulty": "medium", |
|
"task": "Research the top 3 electric vehicle manufacturers by market share. Create a Python script to visualize their market shares in a pie chart." |
|
}, |
|
{ |
|
"category": "combined", |
|
"difficulty": "hard", |
|
"task": "Find information about global coffee production by country for the last year. Write a Python script to create a bar chart showing the top 5 coffee-producing countries and their production volumes." |
|
} |
|
] |
|
|
|
|
|
def get_tasks_by_category(category): |
|
return [task for task in GAIA_SAMPLE_TASKS if task["category"] == category] |
|
|
|
|
|
def get_tasks_by_difficulty(difficulty): |
|
return [task for task in GAIA_SAMPLE_TASKS if task["difficulty"] == difficulty] |
|
|
|
|
|
def get_all_task_queries(): |
|
return [task["task"] for task in GAIA_SAMPLE_TASKS] |
|
|
|
|
|
def get_quick_test_tasks(): |
|
|
|
quick_test_tasks = [ |
|
GAIA_SAMPLE_TASKS[0], |
|
GAIA_SAMPLE_TASKS[3], |
|
GAIA_SAMPLE_TASKS[6], |
|
GAIA_SAMPLE_TASKS[9], |
|
GAIA_SAMPLE_TASKS[11] |
|
] |
|
return [task["task"] for task in quick_test_tasks] |
|
|