""" Configuration module for the GAIA agent. This module centralizes all configuration settings for the agent, including: - API endpoints - Model selection - Tool parameters - Memory settings - Logging configuration - Web browsing settings - Multimodal processing parameters It uses python-dotenv to load environment variables for sensitive credentials. """ import os import logging from typing import Dict, Any, Optional, List, Union, Tuple try: from dotenv import load_dotenv load_dotenv(verbose=False) except ImportError: def load_dotenv(*args, **kwargs): pass # No-op implementation when dotenv is not available OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") SERPER_API_KEY = os.getenv("SERPER_API_KEY", "") PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY", "") SUPABASE_KEY = os.getenv("SUPABASE_KEY", "") HF_TOKEN = os.getenv("HF_TOKEN", "") # Supabase Configuration (partially hardcoded) SUPABASE_URL = "https://tjamxhvvtnypbadvrkjq.supabase.co" SUPABASE_PROJECT_ID = "tjamxhvvtnypbadvrkjq" SUPABASE_PROJECT_NAME = "HF_GAIA_Assessment_AGT" OPENAI_API_BASE = "https://api.openai.com/v1" SERPER_API_URL = "https://google.serper.dev/search" PERPLEXITY_API_URL = "https://api.perplexity.ai" HUGGINGFACE_API_URL = "https://api-inference.huggingface.co/models" # Model Configuration (hardcoded with updated models) DEFAULT_MODEL = "gpt-4o" # Updated from gpt-4-turbo EMBEDDING_MODEL = "text-embedding-3-large" # Updated from text-embedding-ada-002 FALLBACK_MODEL = "gpt-3.5-turbo" VISION_MODEL = "gpt-4o" # Updated from gpt-4-vision-preview REASONING_MODEL = "gpt-4o" # Updated from gpt-4-turbo MODEL_TEMPERATURE = 0.1 MODEL_MAX_TOKENS = 4096 # Tool Parameters (hardcoded) WEB_SEARCH_RESULT_COUNT = 5 WEB_SEARCH_TIMEOUT = 10 ARXIV_MAX_RESULTS = 3 DUCKDUCKGO_TIMEOUT = 10 DUCKDUCKGO_MAX_RESULTS = 5 PERPLEXITY_TIMEOUT = 30 PERPLEXITY_MODEL = "sonar-reasoning" WEB_SCRAPING_TIMEOUT = 15 WEB_SCRAPING_MAX_LINKS = 3 WEB_SCRAPING_MAX_CONTENT_LENGTH = 10000 # Memory Settings (hardcoded) MEMORY_ENABLED = True MEMORY_TABLE_NAME = "gaia_memory" MEMORY_TTL = 86400 # 24 hours in seconds MEMORY_CACHE_SIZE = 100 MEMORY_SIMILARITY_THRESHOLD = 0.85 MEMORY_MAX_ENTRIES = 1000 # Agent Settings (hardcoded) MAX_ITERATIONS = 10 VERBOSE = False TIMEOUT = 300 # 5 minutes in seconds MAX_RETRIES = 3 RETRY_DELAY = 2 # seconds PARALLEL_TOOL_EXECUTION = False DEFAULT_CHECKPOINT_PATH = "checkpoints/gaia_agent_state.json" # Logging Configuration (hardcoded) LOG_LEVEL = "INFO" LOG_FILE = "gaia_agent.log" LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" # Multimodal Settings (hardcoded) IMAGE_MAX_SIZE = 1024 # pixels IMAGE_FORMAT = "JPEG" VISION_DETAIL_LEVEL = "high" # low, medium, high # Web Browsing Settings (hardcoded) USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" BROWSER_TIMEOUT = 30 BROWSER_HEADLESS = True BROWSER_IMAGES_ENABLED = True def get_logging_config() -> Dict[str, Any]: """ Returns a dictionary with logging configuration. Returns: Dict[str, Any]: Logging configuration dictionary """ return { "level": getattr(logging, LOG_LEVEL.upper(), logging.INFO), "format": LOG_FORMAT, "filename": LOG_FILE if LOG_FILE else None, } def get_model_config() -> Dict[str, Any]: """ Returns a dictionary with model configuration. Returns: Dict[str, Any]: Model configuration dictionary """ return { "default_model": DEFAULT_MODEL, "embedding_model": EMBEDDING_MODEL, "fallback_model": FALLBACK_MODEL, "vision_model": VISION_MODEL, "reasoning_model": REASONING_MODEL, "temperature": MODEL_TEMPERATURE, "max_tokens": MODEL_MAX_TOKENS, } def get_tool_config() -> Dict[str, Dict[str, Any]]: """ Returns a dictionary with tool configurations. Returns: Dict[str, Dict[str, Any]]: Tool configuration dictionary """ return { "web_search": { "result_count": WEB_SEARCH_RESULT_COUNT, "timeout": WEB_SEARCH_TIMEOUT, }, "arxiv": { "max_results": ARXIV_MAX_RESULTS, }, "duckduckgo": { "timeout": DUCKDUCKGO_TIMEOUT, "max_results": DUCKDUCKGO_MAX_RESULTS, }, "perplexity": { "timeout": PERPLEXITY_TIMEOUT, "model": PERPLEXITY_MODEL, "api_url": PERPLEXITY_API_URL, }, "web_scraping": { "timeout": WEB_SCRAPING_TIMEOUT, "max_links": WEB_SCRAPING_MAX_LINKS, "max_content_length": WEB_SCRAPING_MAX_CONTENT_LENGTH, }, } def get_memory_config() -> Dict[str, Any]: """ Returns a dictionary with memory configuration. Returns: Dict[str, Any]: Memory configuration dictionary """ return { "enabled": MEMORY_ENABLED, "table_name": MEMORY_TABLE_NAME, "ttl": MEMORY_TTL, "cache_size": MEMORY_CACHE_SIZE, "similarity_threshold": MEMORY_SIMILARITY_THRESHOLD, "max_entries": MEMORY_MAX_ENTRIES, } def get_agent_config() -> Dict[str, Any]: """ Returns a dictionary with agent configuration. Returns: Dict[str, Any]: Agent configuration dictionary """ return { "max_iterations": MAX_ITERATIONS, "verbose": VERBOSE, "timeout": TIMEOUT, "max_retries": MAX_RETRIES, "retry_delay": RETRY_DELAY, "parallel_tool_execution": PARALLEL_TOOL_EXECUTION, "checkpoint_path": DEFAULT_CHECKPOINT_PATH, }