|
""" |
|
Configuration module for the GAIA agent. |
|
|
|
This module centralizes all configuration settings for the agent, including: |
|
- API endpoints |
|
- Model selection |
|
- Tool parameters |
|
- Memory settings |
|
- Logging configuration |
|
- Web browsing settings |
|
- Multimodal processing parameters |
|
|
|
It uses python-dotenv to load environment variables for sensitive credentials. |
|
""" |
|
|
|
import os |
|
import logging |
|
from typing import Dict, Any, Optional, List, Union, Tuple |
|
|
|
try: |
|
from dotenv import load_dotenv |
|
load_dotenv(verbose=False) |
|
except ImportError: |
|
def load_dotenv(*args, **kwargs): |
|
pass |
|
|
|
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") |
|
SERPER_API_KEY = os.getenv("SERPER_API_KEY", "") |
|
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY", "") |
|
SUPABASE_KEY = os.getenv("SUPABASE_KEY", "") |
|
HF_TOKEN = os.getenv("HF_TOKEN", "") |
|
|
|
|
|
SUPABASE_URL = "https://tjamxhvvtnypbadvrkjq.supabase.co" |
|
SUPABASE_PROJECT_ID = "tjamxhvvtnypbadvrkjq" |
|
SUPABASE_PROJECT_NAME = "HF_GAIA_Assessment_AGT" |
|
|
|
OPENAI_API_BASE = "https://api.openai.com/v1" |
|
SERPER_API_URL = "https://google.serper.dev/search" |
|
PERPLEXITY_API_URL = "https://api.perplexity.ai" |
|
HUGGINGFACE_API_URL = "https://api-inference.huggingface.co/models" |
|
|
|
|
|
DEFAULT_MODEL = "gpt-4o" |
|
EMBEDDING_MODEL = "text-embedding-3-large" |
|
FALLBACK_MODEL = "gpt-3.5-turbo" |
|
VISION_MODEL = "gpt-4o" |
|
REASONING_MODEL = "gpt-4o" |
|
MODEL_TEMPERATURE = 0.1 |
|
MODEL_MAX_TOKENS = 4096 |
|
|
|
|
|
WEB_SEARCH_RESULT_COUNT = 5 |
|
WEB_SEARCH_TIMEOUT = 10 |
|
ARXIV_MAX_RESULTS = 3 |
|
DUCKDUCKGO_TIMEOUT = 10 |
|
DUCKDUCKGO_MAX_RESULTS = 5 |
|
PERPLEXITY_TIMEOUT = 30 |
|
PERPLEXITY_MODEL = "sonar-reasoning" |
|
WEB_SCRAPING_TIMEOUT = 15 |
|
WEB_SCRAPING_MAX_LINKS = 3 |
|
WEB_SCRAPING_MAX_CONTENT_LENGTH = 10000 |
|
|
|
|
|
MEMORY_ENABLED = True |
|
MEMORY_TABLE_NAME = "gaia_memory" |
|
MEMORY_TTL = 86400 |
|
MEMORY_CACHE_SIZE = 100 |
|
MEMORY_SIMILARITY_THRESHOLD = 0.85 |
|
MEMORY_MAX_ENTRIES = 1000 |
|
|
|
|
|
MAX_ITERATIONS = 10 |
|
VERBOSE = False |
|
TIMEOUT = 300 |
|
MAX_RETRIES = 3 |
|
RETRY_DELAY = 2 |
|
PARALLEL_TOOL_EXECUTION = False |
|
DEFAULT_CHECKPOINT_PATH = "checkpoints/gaia_agent_state.json" |
|
|
|
|
|
LOG_LEVEL = "INFO" |
|
LOG_FILE = "gaia_agent.log" |
|
LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" |
|
|
|
|
|
IMAGE_MAX_SIZE = 1024 |
|
IMAGE_FORMAT = "JPEG" |
|
VISION_DETAIL_LEVEL = "high" |
|
|
|
|
|
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" |
|
BROWSER_TIMEOUT = 30 |
|
BROWSER_HEADLESS = True |
|
BROWSER_IMAGES_ENABLED = True |
|
|
|
def get_logging_config() -> Dict[str, Any]: |
|
""" |
|
Returns a dictionary with logging configuration. |
|
|
|
Returns: |
|
Dict[str, Any]: Logging configuration dictionary |
|
""" |
|
return { |
|
"level": getattr(logging, LOG_LEVEL.upper(), logging.INFO), |
|
"format": LOG_FORMAT, |
|
"filename": LOG_FILE if LOG_FILE else None, |
|
} |
|
|
|
def get_model_config() -> Dict[str, Any]: |
|
""" |
|
Returns a dictionary with model configuration. |
|
|
|
Returns: |
|
Dict[str, Any]: Model configuration dictionary |
|
""" |
|
return { |
|
"default_model": DEFAULT_MODEL, |
|
"embedding_model": EMBEDDING_MODEL, |
|
"fallback_model": FALLBACK_MODEL, |
|
"vision_model": VISION_MODEL, |
|
"reasoning_model": REASONING_MODEL, |
|
"temperature": MODEL_TEMPERATURE, |
|
"max_tokens": MODEL_MAX_TOKENS, |
|
} |
|
|
|
def get_tool_config() -> Dict[str, Dict[str, Any]]: |
|
""" |
|
Returns a dictionary with tool configurations. |
|
|
|
Returns: |
|
Dict[str, Dict[str, Any]]: Tool configuration dictionary |
|
""" |
|
return { |
|
"web_search": { |
|
"result_count": WEB_SEARCH_RESULT_COUNT, |
|
"timeout": WEB_SEARCH_TIMEOUT, |
|
}, |
|
"arxiv": { |
|
"max_results": ARXIV_MAX_RESULTS, |
|
}, |
|
"duckduckgo": { |
|
"timeout": DUCKDUCKGO_TIMEOUT, |
|
"max_results": DUCKDUCKGO_MAX_RESULTS, |
|
}, |
|
"perplexity": { |
|
"timeout": PERPLEXITY_TIMEOUT, |
|
"model": PERPLEXITY_MODEL, |
|
"api_url": PERPLEXITY_API_URL, |
|
}, |
|
"web_scraping": { |
|
"timeout": WEB_SCRAPING_TIMEOUT, |
|
"max_links": WEB_SCRAPING_MAX_LINKS, |
|
"max_content_length": WEB_SCRAPING_MAX_CONTENT_LENGTH, |
|
}, |
|
} |
|
|
|
def get_memory_config() -> Dict[str, Any]: |
|
""" |
|
Returns a dictionary with memory configuration. |
|
|
|
Returns: |
|
Dict[str, Any]: Memory configuration dictionary |
|
""" |
|
return { |
|
"enabled": MEMORY_ENABLED, |
|
"table_name": MEMORY_TABLE_NAME, |
|
"ttl": MEMORY_TTL, |
|
"cache_size": MEMORY_CACHE_SIZE, |
|
"similarity_threshold": MEMORY_SIMILARITY_THRESHOLD, |
|
"max_entries": MEMORY_MAX_ENTRIES, |
|
} |
|
|
|
def get_agent_config() -> Dict[str, Any]: |
|
""" |
|
Returns a dictionary with agent configuration. |
|
|
|
Returns: |
|
Dict[str, Any]: Agent configuration dictionary |
|
""" |
|
return { |
|
"max_iterations": MAX_ITERATIONS, |
|
"verbose": VERBOSE, |
|
"timeout": TIMEOUT, |
|
"max_retries": MAX_RETRIES, |
|
"retry_delay": RETRY_DELAY, |
|
"parallel_tool_execution": PARALLEL_TOOL_EXECUTION, |
|
"checkpoint_path": DEFAULT_CHECKPOINT_PATH, |
|
} |