""" GAIA Sample Tasks for Testing the AI Agent This file contains sample tasks from the GAIA benchmark categories to test the agent's capabilities across different skills. """ # Sample GAIA tasks for testing the agent GAIA_SAMPLE_TASKS = [ # Reasoning tasks { "category": "reasoning", "difficulty": "easy", "task": "If a train travels at 60 miles per hour, how far will it travel in 2.5 hours?" }, { "category": "reasoning", "difficulty": "medium", "task": "A store is having a 30% off sale. If an item originally costs $85, what is the sale price? Additionally, if there's a 8% sales tax, what is the final price?" }, { "category": "reasoning", "difficulty": "hard", "task": "In a class of 30 students, 40% are boys. If 3 more girls join the class, what percentage of the class will be boys?" }, # Web search and information retrieval tasks { "category": "web_search", "difficulty": "easy", "task": "What is the capital of Japan and what is its population?" }, { "category": "web_search", "difficulty": "medium", "task": "Who won the Nobel Prize in Physics in 2023? What was their contribution?" }, { "category": "web_search", "difficulty": "hard", "task": "Compare and contrast the climate policies of the United States and the European Union. What are the key differences in their approaches to reducing carbon emissions?" }, # Multimodal understanding tasks (would require image input in a real scenario) { "category": "multimodal", "difficulty": "easy", "task": "Analyze this image URL and describe what you see: https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg/800px-Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg" }, { "category": "multimodal", "difficulty": "medium", "task": "Look at this chart image and explain the trend: https://upload.wikimedia.org/wikipedia/commons/thumb/5/51/Global-surface-temperature.svg/1200px-Global-surface-temperature.svg.png" }, # Tool usage tasks { "category": "tool_usage", "difficulty": "easy", "task": "Write a Python function to calculate the factorial of a number, then use it to find the factorial of 5." }, { "category": "tool_usage", "difficulty": "medium", "task": "Create a Python script that fetches the current weather for New York City using a weather API and displays the temperature, humidity, and weather conditions." }, { "category": "tool_usage", "difficulty": "hard", "task": "Write a Python script that analyzes a text file containing a list of numbers (one per line), calculates the mean, median, mode, and standard deviation, and creates a histogram visualization of the data." }, # Combined skills tasks { "category": "combined", "difficulty": "medium", "task": "Research the top 3 electric vehicle manufacturers by market share. Create a Python script to visualize their market shares in a pie chart." }, { "category": "combined", "difficulty": "hard", "task": "Find information about global coffee production by country for the last year. Write a Python script to create a bar chart showing the top 5 coffee-producing countries and their production volumes." } ] # Function to get tasks by category def get_tasks_by_category(category): return [task for task in GAIA_SAMPLE_TASKS if task["category"] == category] # Function to get tasks by difficulty def get_tasks_by_difficulty(difficulty): return [task for task in GAIA_SAMPLE_TASKS if task["difficulty"] == difficulty] # Function to get all task queries as a list def get_all_task_queries(): return [task["task"] for task in GAIA_SAMPLE_TASKS] # Function to get a subset of tasks for quick testing def get_quick_test_tasks(): # One task from each category and difficulty level quick_test_tasks = [ GAIA_SAMPLE_TASKS[0], # reasoning, easy GAIA_SAMPLE_TASKS[3], # web_search, easy GAIA_SAMPLE_TASKS[6], # multimodal, easy GAIA_SAMPLE_TASKS[9], # tool_usage, medium GAIA_SAMPLE_TASKS[11] # combined, medium ] return [task["task"] for task in quick_test_tasks]