Final_Assignment_GAIAAgent

Sleeping

File size: 5,683 Bytes

"""
Configuration module for the GAIA agent.

This module centralizes all configuration settings for the agent, including:
- API endpoints
- Model selection
- Tool parameters
- Memory settings
- Logging configuration
- Web browsing settings
- Multimodal processing parameters

It uses python-dotenv to load environment variables for sensitive credentials.
"""

import os
import logging
from typing import Dict, Any, Optional, List, Union, Tuple

try:
    from dotenv import load_dotenv
    load_dotenv(verbose=False)
except ImportError:
    def load_dotenv(*args, **kwargs):
        pass  # No-op implementation when dotenv is not available

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
SERPER_API_KEY = os.getenv("SERPER_API_KEY", "")
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY", "")
SUPABASE_KEY = os.getenv("SUPABASE_KEY", "")
HF_TOKEN = os.getenv("HF_TOKEN", "")

# Supabase Configuration (partially hardcoded)
SUPABASE_URL = "https://tjamxhvvtnypbadvrkjq.supabase.co"
SUPABASE_PROJECT_ID = "tjamxhvvtnypbadvrkjq"
SUPABASE_PROJECT_NAME = "HF_GAIA_Assessment_AGT"

OPENAI_API_BASE = "https://api.openai.com/v1"
SERPER_API_URL = "https://google.serper.dev/search"
PERPLEXITY_API_URL = "https://api.perplexity.ai"
HUGGINGFACE_API_URL = "https://api-inference.huggingface.co/models"

# Model Configuration (hardcoded with updated models)
DEFAULT_MODEL = "gpt-4o"  # Updated from gpt-4-turbo
EMBEDDING_MODEL = "text-embedding-3-large"  # Updated from text-embedding-ada-002
FALLBACK_MODEL = "gpt-3.5-turbo"
VISION_MODEL = "gpt-4o"  # Updated from gpt-4-vision-preview
REASONING_MODEL = "gpt-4o"  # Updated from gpt-4-turbo
MODEL_TEMPERATURE = 0.1
MODEL_MAX_TOKENS = 4096

# Tool Parameters (hardcoded)
WEB_SEARCH_RESULT_COUNT = 5
WEB_SEARCH_TIMEOUT = 10
ARXIV_MAX_RESULTS = 3
DUCKDUCKGO_TIMEOUT = 10
DUCKDUCKGO_MAX_RESULTS = 5
PERPLEXITY_TIMEOUT = 30
PERPLEXITY_MODEL = "sonar-reasoning"
WEB_SCRAPING_TIMEOUT = 15
WEB_SCRAPING_MAX_LINKS = 3
WEB_SCRAPING_MAX_CONTENT_LENGTH = 10000

# Memory Settings (hardcoded)
MEMORY_ENABLED = True
MEMORY_TABLE_NAME = "gaia_memory"
MEMORY_TTL = 86400  # 24 hours in seconds
MEMORY_CACHE_SIZE = 100
MEMORY_SIMILARITY_THRESHOLD = 0.85
MEMORY_MAX_ENTRIES = 1000

# Agent Settings (hardcoded)
MAX_ITERATIONS = 10
VERBOSE = False
TIMEOUT = 300  # 5 minutes in seconds
MAX_RETRIES = 3
RETRY_DELAY = 2  # seconds
PARALLEL_TOOL_EXECUTION = False
DEFAULT_CHECKPOINT_PATH = "checkpoints/gaia_agent_state.json"

# Logging Configuration (hardcoded)
LOG_LEVEL = "INFO"
LOG_FILE = "gaia_agent.log"
LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"

# Multimodal Settings (hardcoded)
IMAGE_MAX_SIZE = 1024  # pixels
IMAGE_FORMAT = "JPEG"
VISION_DETAIL_LEVEL = "high"  # low, medium, high

# Web Browsing Settings (hardcoded)
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
BROWSER_TIMEOUT = 30
BROWSER_HEADLESS = True
BROWSER_IMAGES_ENABLED = True

def get_logging_config() -> Dict[str, Any]:
    """
    Returns a dictionary with logging configuration.
    
    Returns:
        Dict[str, Any]: Logging configuration dictionary
    """
    return {
        "level": getattr(logging, LOG_LEVEL.upper(), logging.INFO),
        "format": LOG_FORMAT,
        "filename": LOG_FILE if LOG_FILE else None,
    }

def get_model_config() -> Dict[str, Any]:
    """
    Returns a dictionary with model configuration.
    
    Returns:
        Dict[str, Any]: Model configuration dictionary
    """
    return {
        "default_model": DEFAULT_MODEL,
        "embedding_model": EMBEDDING_MODEL,
        "fallback_model": FALLBACK_MODEL,
        "vision_model": VISION_MODEL,
        "reasoning_model": REASONING_MODEL,
        "temperature": MODEL_TEMPERATURE,
        "max_tokens": MODEL_MAX_TOKENS,
    }

def get_tool_config() -> Dict[str, Dict[str, Any]]:
    """
    Returns a dictionary with tool configurations.
    
    Returns:
        Dict[str, Dict[str, Any]]: Tool configuration dictionary
    """
    return {
        "web_search": {
            "result_count": WEB_SEARCH_RESULT_COUNT,
            "timeout": WEB_SEARCH_TIMEOUT,
        },
        "arxiv": {
            "max_results": ARXIV_MAX_RESULTS,
        },
        "duckduckgo": {
            "timeout": DUCKDUCKGO_TIMEOUT,
            "max_results": DUCKDUCKGO_MAX_RESULTS,
        },
        "perplexity": {
            "timeout": PERPLEXITY_TIMEOUT,
            "model": PERPLEXITY_MODEL,
            "api_url": PERPLEXITY_API_URL,
        },
        "web_scraping": {
            "timeout": WEB_SCRAPING_TIMEOUT,
            "max_links": WEB_SCRAPING_MAX_LINKS,
            "max_content_length": WEB_SCRAPING_MAX_CONTENT_LENGTH,
        },
    }

def get_memory_config() -> Dict[str, Any]:
    """
    Returns a dictionary with memory configuration.
    
    Returns:
        Dict[str, Any]: Memory configuration dictionary
    """
    return {
        "enabled": MEMORY_ENABLED,
        "table_name": MEMORY_TABLE_NAME,
        "ttl": MEMORY_TTL,
        "cache_size": MEMORY_CACHE_SIZE,
        "similarity_threshold": MEMORY_SIMILARITY_THRESHOLD,
        "max_entries": MEMORY_MAX_ENTRIES,
    }

def get_agent_config() -> Dict[str, Any]:
    """
    Returns a dictionary with agent configuration.
    
    Returns:
        Dict[str, Any]: Agent configuration dictionary
    """
    return {
        "max_iterations": MAX_ITERATIONS,
        "verbose": VERBOSE,
        "timeout": TIMEOUT,
        "max_retries": MAX_RETRIES,
        "retry_delay": RETRY_DELAY,
        "parallel_tool_execution": PARALLEL_TOOL_EXECUTION,
        "checkpoint_path": DEFAULT_CHECKPOINT_PATH,
    }