JoachimVC's picture
Fix: Add missing DEFAULT_CHECKPOINT_PATH to config.py
7b53e15
"""
Configuration module for the GAIA agent.
This module centralizes all configuration settings for the agent, including:
- API endpoints
- Model selection
- Tool parameters
- Memory settings
- Logging configuration
- Web browsing settings
- Multimodal processing parameters
It uses python-dotenv to load environment variables for sensitive credentials.
"""
import os
import logging
from typing import Dict, Any, Optional, List, Union, Tuple
try:
from dotenv import load_dotenv
load_dotenv(verbose=False)
except ImportError:
def load_dotenv(*args, **kwargs):
pass # No-op implementation when dotenv is not available
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
SERPER_API_KEY = os.getenv("SERPER_API_KEY", "")
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY", "")
SUPABASE_KEY = os.getenv("SUPABASE_KEY", "")
HF_TOKEN = os.getenv("HF_TOKEN", "")
# Supabase Configuration (partially hardcoded)
SUPABASE_URL = "https://tjamxhvvtnypbadvrkjq.supabase.co"
SUPABASE_PROJECT_ID = "tjamxhvvtnypbadvrkjq"
SUPABASE_PROJECT_NAME = "HF_GAIA_Assessment_AGT"
OPENAI_API_BASE = "https://api.openai.com/v1"
SERPER_API_URL = "https://google.serper.dev/search"
PERPLEXITY_API_URL = "https://api.perplexity.ai"
HUGGINGFACE_API_URL = "https://api-inference.huggingface.co/models"
# Model Configuration (hardcoded with updated models)
DEFAULT_MODEL = "gpt-4o" # Updated from gpt-4-turbo
EMBEDDING_MODEL = "text-embedding-3-large" # Updated from text-embedding-ada-002
FALLBACK_MODEL = "gpt-3.5-turbo"
VISION_MODEL = "gpt-4o" # Updated from gpt-4-vision-preview
REASONING_MODEL = "gpt-4o" # Updated from gpt-4-turbo
MODEL_TEMPERATURE = 0.1
MODEL_MAX_TOKENS = 4096
# Tool Parameters (hardcoded)
WEB_SEARCH_RESULT_COUNT = 5
WEB_SEARCH_TIMEOUT = 10
ARXIV_MAX_RESULTS = 3
DUCKDUCKGO_TIMEOUT = 10
DUCKDUCKGO_MAX_RESULTS = 5
PERPLEXITY_TIMEOUT = 30
PERPLEXITY_MODEL = "sonar-reasoning"
WEB_SCRAPING_TIMEOUT = 15
WEB_SCRAPING_MAX_LINKS = 3
WEB_SCRAPING_MAX_CONTENT_LENGTH = 10000
# Memory Settings (hardcoded)
MEMORY_ENABLED = True
MEMORY_TABLE_NAME = "gaia_memory"
MEMORY_TTL = 86400 # 24 hours in seconds
MEMORY_CACHE_SIZE = 100
MEMORY_SIMILARITY_THRESHOLD = 0.85
MEMORY_MAX_ENTRIES = 1000
# Agent Settings (hardcoded)
MAX_ITERATIONS = 10
VERBOSE = False
TIMEOUT = 300 # 5 minutes in seconds
MAX_RETRIES = 3
RETRY_DELAY = 2 # seconds
PARALLEL_TOOL_EXECUTION = False
DEFAULT_CHECKPOINT_PATH = "checkpoints/gaia_agent_state.json"
# Logging Configuration (hardcoded)
LOG_LEVEL = "INFO"
LOG_FILE = "gaia_agent.log"
LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
# Multimodal Settings (hardcoded)
IMAGE_MAX_SIZE = 1024 # pixels
IMAGE_FORMAT = "JPEG"
VISION_DETAIL_LEVEL = "high" # low, medium, high
# Web Browsing Settings (hardcoded)
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
BROWSER_TIMEOUT = 30
BROWSER_HEADLESS = True
BROWSER_IMAGES_ENABLED = True
def get_logging_config() -> Dict[str, Any]:
"""
Returns a dictionary with logging configuration.
Returns:
Dict[str, Any]: Logging configuration dictionary
"""
return {
"level": getattr(logging, LOG_LEVEL.upper(), logging.INFO),
"format": LOG_FORMAT,
"filename": LOG_FILE if LOG_FILE else None,
}
def get_model_config() -> Dict[str, Any]:
"""
Returns a dictionary with model configuration.
Returns:
Dict[str, Any]: Model configuration dictionary
"""
return {
"default_model": DEFAULT_MODEL,
"embedding_model": EMBEDDING_MODEL,
"fallback_model": FALLBACK_MODEL,
"vision_model": VISION_MODEL,
"reasoning_model": REASONING_MODEL,
"temperature": MODEL_TEMPERATURE,
"max_tokens": MODEL_MAX_TOKENS,
}
def get_tool_config() -> Dict[str, Dict[str, Any]]:
"""
Returns a dictionary with tool configurations.
Returns:
Dict[str, Dict[str, Any]]: Tool configuration dictionary
"""
return {
"web_search": {
"result_count": WEB_SEARCH_RESULT_COUNT,
"timeout": WEB_SEARCH_TIMEOUT,
},
"arxiv": {
"max_results": ARXIV_MAX_RESULTS,
},
"duckduckgo": {
"timeout": DUCKDUCKGO_TIMEOUT,
"max_results": DUCKDUCKGO_MAX_RESULTS,
},
"perplexity": {
"timeout": PERPLEXITY_TIMEOUT,
"model": PERPLEXITY_MODEL,
"api_url": PERPLEXITY_API_URL,
},
"web_scraping": {
"timeout": WEB_SCRAPING_TIMEOUT,
"max_links": WEB_SCRAPING_MAX_LINKS,
"max_content_length": WEB_SCRAPING_MAX_CONTENT_LENGTH,
},
}
def get_memory_config() -> Dict[str, Any]:
"""
Returns a dictionary with memory configuration.
Returns:
Dict[str, Any]: Memory configuration dictionary
"""
return {
"enabled": MEMORY_ENABLED,
"table_name": MEMORY_TABLE_NAME,
"ttl": MEMORY_TTL,
"cache_size": MEMORY_CACHE_SIZE,
"similarity_threshold": MEMORY_SIMILARITY_THRESHOLD,
"max_entries": MEMORY_MAX_ENTRIES,
}
def get_agent_config() -> Dict[str, Any]:
"""
Returns a dictionary with agent configuration.
Returns:
Dict[str, Any]: Agent configuration dictionary
"""
return {
"max_iterations": MAX_ITERATIONS,
"verbose": VERBOSE,
"timeout": TIMEOUT,
"max_retries": MAX_RETRIES,
"retry_delay": RETRY_DELAY,
"parallel_tool_execution": PARALLEL_TOOL_EXECUTION,
"checkpoint_path": DEFAULT_CHECKPOINT_PATH,
}