|
""" |
|
GAIA Environment Configuration Validator |
|
|
|
This module provides functionality to validate environment configuration for the GAIA system. |
|
It ensures all required environment variables are set and have valid values. |
|
|
|
Usage: |
|
from src.gaia.utils.config_validator import validate_config |
|
validation_result = validate_config() |
|
if validation_result['valid']: |
|
print("Configuration is valid!") |
|
else: |
|
print(f"Configuration errors: {validation_result['errors']}") |
|
""" |
|
|
|
import os |
|
import logging |
|
from typing import Dict, List, Any, Tuple, Optional |
|
import re |
|
from pathlib import Path |
|
|
|
|
|
logger = logging.getLogger("gaia.config_validator") |
|
handler = logging.StreamHandler() |
|
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') |
|
handler.setFormatter(formatter) |
|
logger.addHandler(handler) |
|
logger.setLevel(logging.INFO) |
|
|
|
|
|
REQUIRED_ENV_VARS = [ |
|
{ |
|
"name": "OPENAI_API_KEY", |
|
"description": "OpenAI API key for model access", |
|
"validator": lambda x: x.startswith("sk-") and len(x) > 20, |
|
"error_message": "Must begin with 'sk-' and be at least 20 characters long" |
|
}, |
|
{ |
|
"name": "SUPABASE_URL", |
|
"description": "URL for your Supabase instance", |
|
"validator": lambda x: x.startswith("https://") and ".supabase.co" in x, |
|
"error_message": "Must be a valid Supabase URL (https://xxx.supabase.co)" |
|
}, |
|
{ |
|
"name": "SUPABASE_KEY", |
|
"description": "Service role API key for Supabase access", |
|
"validator": lambda x: len(x) > 20, |
|
"error_message": "Must be at least 20 characters long" |
|
}, |
|
{ |
|
"name": "SERPER_API_KEY", |
|
"description": "API key for Serper search service", |
|
"validator": lambda x: len(x) > 5, |
|
"error_message": "Must be at least 5 characters long" |
|
} |
|
] |
|
|
|
|
|
OPTIONAL_ENV_VARS = [ |
|
{ |
|
"name": "PERPLEXITY_API_KEY", |
|
"description": "API key for Perplexity AI search", |
|
"default": "", |
|
"validator": lambda x: x == "" or len(x) > 5, |
|
"error_message": "If provided, must be at least 5 characters long" |
|
}, |
|
{ |
|
"name": "HF_TOKEN", |
|
"description": "Hugging Face API token", |
|
"default": "", |
|
"validator": lambda x: x == "" or len(x) > 5, |
|
"error_message": "If provided, must be at least 5 characters long" |
|
}, |
|
{ |
|
"name": "DEFAULT_MODEL", |
|
"description": "Default OpenAI model to use", |
|
"default": "gpt-4o", |
|
"validator": lambda x: x in ["gpt-4o", "gpt-4", "gpt-3.5-turbo", "text-davinci-003"], |
|
"error_message": "Must be one of: gpt-4o, gpt-4, gpt-3.5-turbo, text-davinci-003" |
|
}, |
|
{ |
|
"name": "EMBEDDING_MODEL", |
|
"description": "OpenAI embedding model to use", |
|
"default": "text-embedding-3-large", |
|
"validator": lambda x: "embedding" in x.lower(), |
|
"error_message": "Must be a valid embedding model" |
|
}, |
|
{ |
|
"name": "FALLBACK_MODEL", |
|
"description": "Fallback model if default is unavailable", |
|
"default": "gpt-3.5-turbo", |
|
"validator": lambda x: x in ["gpt-4o", "gpt-4", "gpt-3.5-turbo", "text-davinci-003"], |
|
"error_message": "Must be one of: gpt-4o, gpt-4, gpt-3.5-turbo, text-davinci-003" |
|
}, |
|
{ |
|
"name": "MEMORY_ENABLED", |
|
"description": "Enable/disable memory functionality", |
|
"default": "true", |
|
"validator": lambda x: x.lower() in ["true", "false", "1", "0", "yes", "no"], |
|
"error_message": "Must be a boolean value (true/false, 1/0, yes/no)" |
|
}, |
|
{ |
|
"name": "MEMORY_TABLE_NAME", |
|
"description": "Supabase table for memory storage", |
|
"default": "gaia_memory", |
|
"validator": lambda x: re.match(r'^[a-zA-Z0-9_]+$', x), |
|
"error_message": "Must contain only letters, numbers, and underscores" |
|
}, |
|
{ |
|
"name": "DUCKDUCKGO_TIMEOUT", |
|
"description": "Timeout for DuckDuckGo searches in seconds", |
|
"default": "30", |
|
"validator": lambda x: x.isdigit() and 1 <= int(x) <= 60, |
|
"error_message": "Must be a number between 1 and 60" |
|
}, |
|
{ |
|
"name": "DUCKDUCKGO_MAX_RESULTS", |
|
"description": "Maximum number of results from DuckDuckGo", |
|
"default": "5", |
|
"validator": lambda x: x.isdigit() and 1 <= int(x) <= 10, |
|
"error_message": "Must be a number between 1 and 10" |
|
}, |
|
{ |
|
"name": "OPENAI_API_BASE", |
|
"description": "Custom API base URL for OpenAI (optional)", |
|
"default": "https://api.openai.com/v1", |
|
"validator": lambda x: x.startswith("https://"), |
|
"error_message": "Must be a valid HTTPS URL" |
|
}, |
|
{ |
|
"name": "MODEL_TEMPERATURE", |
|
"description": "Temperature for model responses (0.0-1.0)", |
|
"default": "0.7", |
|
"validator": lambda x: 0 <= float(x) <= 1, |
|
"error_message": "Must be a number between 0 and 1" |
|
}, |
|
{ |
|
"name": "MODEL_MAX_TOKENS", |
|
"description": "Maximum tokens in model responses", |
|
"default": "4000", |
|
"validator": lambda x: x.isdigit() and 1 <= int(x) <= 16000, |
|
"error_message": "Must be a number between 1 and 16000" |
|
}, |
|
{ |
|
"name": "WEB_SEARCH_RESULT_COUNT", |
|
"description": "Number of results to return from web searches", |
|
"default": "3", |
|
"validator": lambda x: x.isdigit() and 1 <= int(x) <= 10, |
|
"error_message": "Must be a number between 1 and 10" |
|
}, |
|
{ |
|
"name": "MEMORY_TTL", |
|
"description": "Time-to-live for memory entries in seconds", |
|
"default": "604800", |
|
"validator": lambda x: x.isdigit() and int(x) > 0, |
|
"error_message": "Must be a positive number" |
|
}, |
|
{ |
|
"name": "MEMORY_CACHE_SIZE", |
|
"description": "Size of memory cache", |
|
"default": "100", |
|
"validator": lambda x: x.isdigit() and int(x) > 0, |
|
"error_message": "Must be a positive number" |
|
}, |
|
{ |
|
"name": "MAX_ITERATIONS", |
|
"description": "Maximum iterations for agent execution", |
|
"default": "10", |
|
"validator": lambda x: x.isdigit() and 1 <= int(x) <= 50, |
|
"error_message": "Must be a number between 1 and 50" |
|
}, |
|
{ |
|
"name": "VERBOSE", |
|
"description": "Enable verbose logging", |
|
"default": "false", |
|
"validator": lambda x: x.lower() in ["true", "false", "1", "0", "yes", "no"], |
|
"error_message": "Must be a boolean value (true/false, 1/0, yes/no)" |
|
}, |
|
{ |
|
"name": "LOG_LEVEL", |
|
"description": "Logging level (DEBUG, INFO, WARNING, ERROR)", |
|
"default": "INFO", |
|
"validator": lambda x: x.upper() in ["DEBUG", "INFO", "WARNING", "ERROR"], |
|
"error_message": "Must be one of: DEBUG, INFO, WARNING, ERROR" |
|
} |
|
] |
|
|
|
def validate_env_var(var_name: str, validator, value: str, error_message: str) -> Tuple[bool, Optional[str]]: |
|
""" |
|
Validate an environment variable value using the provided validator function. |
|
|
|
Args: |
|
var_name: The name of the environment variable |
|
validator: A function that takes a value and returns a boolean |
|
value: The value to validate |
|
error_message: The error message to return if validation fails |
|
|
|
Returns: |
|
Tuple of (valid, error_message), where valid is a boolean and error_message |
|
is None if the value is valid, or a string if it's invalid. |
|
""" |
|
try: |
|
if validator(value): |
|
return True, None |
|
else: |
|
return False, f"{var_name}: {error_message}" |
|
except Exception as e: |
|
return False, f"{var_name}: {error_message} (Error: {str(e)})" |
|
|
|
def validate_config() -> Dict[str, Any]: |
|
""" |
|
Validate all environment variables against their validators. |
|
|
|
Returns: |
|
Dictionary containing: |
|
- 'valid': Boolean indicating if all required variables are valid |
|
- 'errors': List of error messages for invalid variables |
|
- 'warnings': List of warning messages for missing optional variables |
|
- 'config': Dictionary of all configuration values (with defaults applied) |
|
""" |
|
errors = [] |
|
warnings = [] |
|
config = {} |
|
|
|
logger.info("Validating environment configuration...") |
|
|
|
|
|
for var in REQUIRED_ENV_VARS: |
|
var_name = var["name"] |
|
var_value = os.environ.get(var_name) |
|
|
|
if not var_value: |
|
errors.append(f"{var_name}: Missing required environment variable") |
|
continue |
|
|
|
valid, error = validate_env_var(var_name, var["validator"], var_value, var["error_message"]) |
|
if not valid: |
|
errors.append(error) |
|
|
|
config[var_name] = var_value |
|
|
|
|
|
for var in OPTIONAL_ENV_VARS: |
|
var_name = var["name"] |
|
var_value = os.environ.get(var_name, var["default"]) |
|
|
|
if var_name not in os.environ and var["default"]: |
|
warnings.append(f"{var_name}: Using default value: {var['default']}") |
|
|
|
valid, error = validate_env_var(var_name, var["validator"], var_value, var["error_message"]) |
|
if not valid: |
|
errors.append(error) |
|
|
|
config[var_name] = var_value |
|
|
|
|
|
|
|
if config.get("FALLBACK_MODEL") == config.get("DEFAULT_MODEL"): |
|
warnings.append("FALLBACK_MODEL is the same as DEFAULT_MODEL. Consider using a different model for fallback.") |
|
|
|
result = { |
|
"valid": len(errors) == 0, |
|
"errors": errors, |
|
"warnings": warnings, |
|
"config": config |
|
} |
|
|
|
if result["valid"]: |
|
logger.info("✅ Environment configuration is valid!") |
|
else: |
|
logger.error(f"❌ Environment configuration has {len(errors)} errors!") |
|
for error in errors: |
|
logger.error(f" - {error}") |
|
|
|
if warnings: |
|
logger.warning(f"⚠️ Environment configuration has {len(warnings)} warnings:") |
|
for warning in warnings: |
|
logger.warning(f" - {warning}") |
|
|
|
return result |
|
|
|
def generate_env_example() -> str: |
|
""" |
|
Generate a .env.example file template containing all required and optional variables. |
|
|
|
Returns: |
|
The path to the generated .env.example file |
|
""" |
|
env_example_path = Path(".") / ".env.example" |
|
|
|
with open(env_example_path, "w") as f: |
|
f.write("# GAIA System Environment Variables\n") |
|
f.write("# Copy this file to .env and fill in the values\n\n") |
|
|
|
f.write("# === REQUIRED ENVIRONMENT VARIABLES ===\n\n") |
|
for var in REQUIRED_ENV_VARS: |
|
f.write(f"# {var['description']}\n") |
|
f.write(f"# {var['error_message']}\n") |
|
f.write(f"{var['name']}=\n\n") |
|
|
|
f.write("# === OPTIONAL ENVIRONMENT VARIABLES ===\n\n") |
|
for var in OPTIONAL_ENV_VARS: |
|
f.write(f"# {var['description']}\n") |
|
f.write(f"# Default: {var['default']}\n") |
|
f.write(f"# {var['error_message']}\n") |
|
f.write(f"{var['name']}={var['default']}\n\n") |
|
|
|
logger.info(f"Generated environment example file at {env_example_path}") |
|
return str(env_example_path) |
|
|
|
def initialize_env_from_example() -> None: |
|
""" |
|
Initialize a .env file from the .env.example if it doesn't exist. |
|
""" |
|
env_path = Path(".") / ".env" |
|
env_example_path = Path(".") / ".env.example" |
|
|
|
if env_path.exists(): |
|
logger.info(f".env file already exists at {env_path}") |
|
return |
|
|
|
if not env_example_path.exists(): |
|
generate_env_example() |
|
|
|
with open(env_example_path, "r") as example_file: |
|
example_content = example_file.read() |
|
|
|
with open(env_path, "w") as env_file: |
|
env_file.write(example_content) |
|
|
|
logger.info(f"Initialized .env file from .env.example at {env_path}") |
|
logger.info("Please edit the .env file and fill in the required values") |
|
|
|
def get_documentation() -> Dict[str, List[Dict[str, str]]]: |
|
""" |
|
Get documentation for all environment variables. |
|
|
|
Returns: |
|
Dictionary containing lists of variable information for required and optional variables |
|
""" |
|
required_vars = [] |
|
for var in REQUIRED_ENV_VARS: |
|
required_vars.append({ |
|
"name": var["name"], |
|
"description": var["description"], |
|
"validation": var["error_message"] |
|
}) |
|
|
|
optional_vars = [] |
|
for var in OPTIONAL_ENV_VARS: |
|
optional_vars.append({ |
|
"name": var["name"], |
|
"description": var["description"], |
|
"default": var["default"], |
|
"validation": var["error_message"] |
|
}) |
|
|
|
return { |
|
"required": required_vars, |
|
"optional": optional_vars |
|
} |
|
|
|
if __name__ == "__main__": |
|
|
|
validation_result = validate_config() |
|
|
|
|
|
generate_env_example() |
|
|
|
|
|
if validation_result["valid"]: |
|
print("\n✅ Configuration is valid!") |
|
else: |
|
print("\n❌ Configuration validation failed:") |
|
for error in validation_result["errors"]: |
|
print(f" - {error}") |
|
|
|
if validation_result["warnings"]: |
|
print("\n⚠️ Configuration warnings:") |
|
for warning in validation_result["warnings"]: |
|
print(f" - {warning}") |