""" GAIA Environment Configuration Validator This module provides functionality to validate environment configuration for the GAIA system. It ensures all required environment variables are set and have valid values. Usage: from src.gaia.utils.config_validator import validate_config validation_result = validate_config() if validation_result['valid']: print("Configuration is valid!") else: print(f"Configuration errors: {validation_result['errors']}") """ import os import logging from typing import Dict, List, Any, Tuple, Optional import re from pathlib import Path # Configure logging logger = logging.getLogger("gaia.config_validator") handler = logging.StreamHandler() formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') handler.setFormatter(formatter) logger.addHandler(handler) logger.setLevel(logging.INFO) # Define required environment variables REQUIRED_ENV_VARS = [ { "name": "OPENAI_API_KEY", "description": "OpenAI API key for model access", "validator": lambda x: x.startswith("sk-") and len(x) > 20, "error_message": "Must begin with 'sk-' and be at least 20 characters long" }, { "name": "SUPABASE_URL", "description": "URL for your Supabase instance", "validator": lambda x: x.startswith("https://") and ".supabase.co" in x, "error_message": "Must be a valid Supabase URL (https://xxx.supabase.co)" }, { "name": "SUPABASE_KEY", "description": "Service role API key for Supabase access", "validator": lambda x: len(x) > 20, "error_message": "Must be at least 20 characters long" }, { "name": "SERPER_API_KEY", "description": "API key for Serper search service", "validator": lambda x: len(x) > 5, "error_message": "Must be at least 5 characters long" } ] # Define optional environment variables with default values OPTIONAL_ENV_VARS = [ { "name": "PERPLEXITY_API_KEY", "description": "API key for Perplexity AI search", "default": "", "validator": lambda x: x == "" or len(x) > 5, "error_message": "If provided, must be at least 5 characters long" }, { "name": "HF_TOKEN", "description": "Hugging Face API token", "default": "", "validator": lambda x: x == "" or len(x) > 5, "error_message": "If provided, must be at least 5 characters long" }, { "name": "DEFAULT_MODEL", "description": "Default OpenAI model to use", "default": "gpt-4o", "validator": lambda x: x in ["gpt-4o", "gpt-4", "gpt-3.5-turbo", "text-davinci-003"], "error_message": "Must be one of: gpt-4o, gpt-4, gpt-3.5-turbo, text-davinci-003" }, { "name": "EMBEDDING_MODEL", "description": "OpenAI embedding model to use", "default": "text-embedding-3-large", "validator": lambda x: "embedding" in x.lower(), "error_message": "Must be a valid embedding model" }, { "name": "FALLBACK_MODEL", "description": "Fallback model if default is unavailable", "default": "gpt-3.5-turbo", "validator": lambda x: x in ["gpt-4o", "gpt-4", "gpt-3.5-turbo", "text-davinci-003"], "error_message": "Must be one of: gpt-4o, gpt-4, gpt-3.5-turbo, text-davinci-003" }, { "name": "MEMORY_ENABLED", "description": "Enable/disable memory functionality", "default": "true", "validator": lambda x: x.lower() in ["true", "false", "1", "0", "yes", "no"], "error_message": "Must be a boolean value (true/false, 1/0, yes/no)" }, { "name": "MEMORY_TABLE_NAME", "description": "Supabase table for memory storage", "default": "gaia_memory", "validator": lambda x: re.match(r'^[a-zA-Z0-9_]+$', x), "error_message": "Must contain only letters, numbers, and underscores" }, { "name": "DUCKDUCKGO_TIMEOUT", "description": "Timeout for DuckDuckGo searches in seconds", "default": "30", "validator": lambda x: x.isdigit() and 1 <= int(x) <= 60, "error_message": "Must be a number between 1 and 60" }, { "name": "DUCKDUCKGO_MAX_RESULTS", "description": "Maximum number of results from DuckDuckGo", "default": "5", "validator": lambda x: x.isdigit() and 1 <= int(x) <= 10, "error_message": "Must be a number between 1 and 10" }, { "name": "OPENAI_API_BASE", "description": "Custom API base URL for OpenAI (optional)", "default": "https://api.openai.com/v1", "validator": lambda x: x.startswith("https://"), "error_message": "Must be a valid HTTPS URL" }, { "name": "MODEL_TEMPERATURE", "description": "Temperature for model responses (0.0-1.0)", "default": "0.7", "validator": lambda x: 0 <= float(x) <= 1, "error_message": "Must be a number between 0 and 1" }, { "name": "MODEL_MAX_TOKENS", "description": "Maximum tokens in model responses", "default": "4000", "validator": lambda x: x.isdigit() and 1 <= int(x) <= 16000, "error_message": "Must be a number between 1 and 16000" }, { "name": "WEB_SEARCH_RESULT_COUNT", "description": "Number of results to return from web searches", "default": "3", "validator": lambda x: x.isdigit() and 1 <= int(x) <= 10, "error_message": "Must be a number between 1 and 10" }, { "name": "MEMORY_TTL", "description": "Time-to-live for memory entries in seconds", "default": "604800", # 7 days "validator": lambda x: x.isdigit() and int(x) > 0, "error_message": "Must be a positive number" }, { "name": "MEMORY_CACHE_SIZE", "description": "Size of memory cache", "default": "100", "validator": lambda x: x.isdigit() and int(x) > 0, "error_message": "Must be a positive number" }, { "name": "MAX_ITERATIONS", "description": "Maximum iterations for agent execution", "default": "10", "validator": lambda x: x.isdigit() and 1 <= int(x) <= 50, "error_message": "Must be a number between 1 and 50" }, { "name": "VERBOSE", "description": "Enable verbose logging", "default": "false", "validator": lambda x: x.lower() in ["true", "false", "1", "0", "yes", "no"], "error_message": "Must be a boolean value (true/false, 1/0, yes/no)" }, { "name": "LOG_LEVEL", "description": "Logging level (DEBUG, INFO, WARNING, ERROR)", "default": "INFO", "validator": lambda x: x.upper() in ["DEBUG", "INFO", "WARNING", "ERROR"], "error_message": "Must be one of: DEBUG, INFO, WARNING, ERROR" } ] def validate_env_var(var_name: str, validator, value: str, error_message: str) -> Tuple[bool, Optional[str]]: """ Validate an environment variable value using the provided validator function. Args: var_name: The name of the environment variable validator: A function that takes a value and returns a boolean value: The value to validate error_message: The error message to return if validation fails Returns: Tuple of (valid, error_message), where valid is a boolean and error_message is None if the value is valid, or a string if it's invalid. """ try: if validator(value): return True, None else: return False, f"{var_name}: {error_message}" except Exception as e: return False, f"{var_name}: {error_message} (Error: {str(e)})" def validate_config() -> Dict[str, Any]: """ Validate all environment variables against their validators. Returns: Dictionary containing: - 'valid': Boolean indicating if all required variables are valid - 'errors': List of error messages for invalid variables - 'warnings': List of warning messages for missing optional variables - 'config': Dictionary of all configuration values (with defaults applied) """ errors = [] warnings = [] config = {} logger.info("Validating environment configuration...") # Validate required environment variables for var in REQUIRED_ENV_VARS: var_name = var["name"] var_value = os.environ.get(var_name) if not var_value: errors.append(f"{var_name}: Missing required environment variable") continue valid, error = validate_env_var(var_name, var["validator"], var_value, var["error_message"]) if not valid: errors.append(error) config[var_name] = var_value # Validate optional environment variables for var in OPTIONAL_ENV_VARS: var_name = var["name"] var_value = os.environ.get(var_name, var["default"]) if var_name not in os.environ and var["default"]: warnings.append(f"{var_name}: Using default value: {var['default']}") valid, error = validate_env_var(var_name, var["validator"], var_value, var["error_message"]) if not valid: errors.append(error) config[var_name] = var_value # Additional cross-validations # e.g., check if FALLBACK_MODEL is different from DEFAULT_MODEL if config.get("FALLBACK_MODEL") == config.get("DEFAULT_MODEL"): warnings.append("FALLBACK_MODEL is the same as DEFAULT_MODEL. Consider using a different model for fallback.") result = { "valid": len(errors) == 0, "errors": errors, "warnings": warnings, "config": config } if result["valid"]: logger.info("✅ Environment configuration is valid!") else: logger.error(f"❌ Environment configuration has {len(errors)} errors!") for error in errors: logger.error(f" - {error}") if warnings: logger.warning(f"⚠️ Environment configuration has {len(warnings)} warnings:") for warning in warnings: logger.warning(f" - {warning}") return result def generate_env_example() -> str: """ Generate a .env.example file template containing all required and optional variables. Returns: The path to the generated .env.example file """ env_example_path = Path(".") / ".env.example" with open(env_example_path, "w") as f: f.write("# GAIA System Environment Variables\n") f.write("# Copy this file to .env and fill in the values\n\n") f.write("# === REQUIRED ENVIRONMENT VARIABLES ===\n\n") for var in REQUIRED_ENV_VARS: f.write(f"# {var['description']}\n") f.write(f"# {var['error_message']}\n") f.write(f"{var['name']}=\n\n") f.write("# === OPTIONAL ENVIRONMENT VARIABLES ===\n\n") for var in OPTIONAL_ENV_VARS: f.write(f"# {var['description']}\n") f.write(f"# Default: {var['default']}\n") f.write(f"# {var['error_message']}\n") f.write(f"{var['name']}={var['default']}\n\n") logger.info(f"Generated environment example file at {env_example_path}") return str(env_example_path) def initialize_env_from_example() -> None: """ Initialize a .env file from the .env.example if it doesn't exist. """ env_path = Path(".") / ".env" env_example_path = Path(".") / ".env.example" if env_path.exists(): logger.info(f".env file already exists at {env_path}") return if not env_example_path.exists(): generate_env_example() with open(env_example_path, "r") as example_file: example_content = example_file.read() with open(env_path, "w") as env_file: env_file.write(example_content) logger.info(f"Initialized .env file from .env.example at {env_path}") logger.info("Please edit the .env file and fill in the required values") def get_documentation() -> Dict[str, List[Dict[str, str]]]: """ Get documentation for all environment variables. Returns: Dictionary containing lists of variable information for required and optional variables """ required_vars = [] for var in REQUIRED_ENV_VARS: required_vars.append({ "name": var["name"], "description": var["description"], "validation": var["error_message"] }) optional_vars = [] for var in OPTIONAL_ENV_VARS: optional_vars.append({ "name": var["name"], "description": var["description"], "default": var["default"], "validation": var["error_message"] }) return { "required": required_vars, "optional": optional_vars } if __name__ == "__main__": # When run as a script, validate the configuration and generate example files validation_result = validate_config() # Generate .env.example file generate_env_example() # Print validation summary if validation_result["valid"]: print("\n✅ Configuration is valid!") else: print("\n❌ Configuration validation failed:") for error in validation_result["errors"]: print(f" - {error}") if validation_result["warnings"]: print("\n⚠️ Configuration warnings:") for warning in validation_result["warnings"]: print(f" - {warning}")