"""
GAIA Environment Configuration Validator

This module provides functionality to validate environment configuration for the GAIA system.
It ensures all required environment variables are set and have valid values.

Usage:
    from src.gaia.utils.config_validator import validate_config
    validation_result = validate_config()
    if validation_result['valid']:
        print("Configuration is valid!")
    else:
        print(f"Configuration errors: {validation_result['errors']}")
"""

import os
import logging
from typing import Dict, List, Any, Tuple, Optional
import re
from pathlib import Path

# Configure logging
logger = logging.getLogger("gaia.config_validator")
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)

# Define required environment variables
REQUIRED_ENV_VARS = [
    {
        "name": "OPENAI_API_KEY",
        "description": "OpenAI API key for model access",
        "validator": lambda x: x.startswith("sk-") and len(x) > 20,
        "error_message": "Must begin with 'sk-' and be at least 20 characters long"
    },
    {
        "name": "SUPABASE_URL",
        "description": "URL for your Supabase instance",
        "validator": lambda x: x.startswith("https://") and ".supabase.co" in x,
        "error_message": "Must be a valid Supabase URL (https://xxx.supabase.co)"
    },
    {
        "name": "SUPABASE_KEY",
        "description": "Service role API key for Supabase access",
        "validator": lambda x: len(x) > 20,
        "error_message": "Must be at least 20 characters long"
    },
    {
        "name": "SERPER_API_KEY",
        "description": "API key for Serper search service",
        "validator": lambda x: len(x) > 5,
        "error_message": "Must be at least 5 characters long"
    }
]

# Define optional environment variables with default values
OPTIONAL_ENV_VARS = [
    {
        "name": "PERPLEXITY_API_KEY",
        "description": "API key for Perplexity AI search",
        "default": "",
        "validator": lambda x: x == "" or len(x) > 5,
        "error_message": "If provided, must be at least 5 characters long"
    },
    {
        "name": "HF_TOKEN",
        "description": "Hugging Face API token",
        "default": "",
        "validator": lambda x: x == "" or len(x) > 5,
        "error_message": "If provided, must be at least 5 characters long"
    },
    {
        "name": "DEFAULT_MODEL",
        "description": "Default OpenAI model to use",
        "default": "gpt-4o",
        "validator": lambda x: x in ["gpt-4o", "gpt-4", "gpt-3.5-turbo", "text-davinci-003"],
        "error_message": "Must be one of: gpt-4o, gpt-4, gpt-3.5-turbo, text-davinci-003"
    },
    {
        "name": "EMBEDDING_MODEL",
        "description": "OpenAI embedding model to use",
        "default": "text-embedding-3-large",
        "validator": lambda x: "embedding" in x.lower(),
        "error_message": "Must be a valid embedding model"
    },
    {
        "name": "FALLBACK_MODEL",
        "description": "Fallback model if default is unavailable",
        "default": "gpt-3.5-turbo",
        "validator": lambda x: x in ["gpt-4o", "gpt-4", "gpt-3.5-turbo", "text-davinci-003"],
        "error_message": "Must be one of: gpt-4o, gpt-4, gpt-3.5-turbo, text-davinci-003"
    },
    {
        "name": "MEMORY_ENABLED",
        "description": "Enable/disable memory functionality",
        "default": "true",
        "validator": lambda x: x.lower() in ["true", "false", "1", "0", "yes", "no"],
        "error_message": "Must be a boolean value (true/false, 1/0, yes/no)"
    },
    {
        "name": "MEMORY_TABLE_NAME",
        "description": "Supabase table for memory storage",
        "default": "gaia_memory",
        "validator": lambda x: re.match(r'^[a-zA-Z0-9_]+$', x),
        "error_message": "Must contain only letters, numbers, and underscores"
    },
    {
        "name": "DUCKDUCKGO_TIMEOUT",
        "description": "Timeout for DuckDuckGo searches in seconds",
        "default": "30",
        "validator": lambda x: x.isdigit() and 1 <= int(x) <= 60,
        "error_message": "Must be a number between 1 and 60"
    },
    {
        "name": "DUCKDUCKGO_MAX_RESULTS",
        "description": "Maximum number of results from DuckDuckGo",
        "default": "5",
        "validator": lambda x: x.isdigit() and 1 <= int(x) <= 10,
        "error_message": "Must be a number between 1 and 10"
    },
    {
        "name": "OPENAI_API_BASE",
        "description": "Custom API base URL for OpenAI (optional)",
        "default": "https://api.openai.com/v1",
        "validator": lambda x: x.startswith("https://"),
        "error_message": "Must be a valid HTTPS URL"
    },
    {
        "name": "MODEL_TEMPERATURE",
        "description": "Temperature for model responses (0.0-1.0)",
        "default": "0.7",
        "validator": lambda x: 0 <= float(x) <= 1,
        "error_message": "Must be a number between 0 and 1"
    },
    {
        "name": "MODEL_MAX_TOKENS",
        "description": "Maximum tokens in model responses",
        "default": "4000",
        "validator": lambda x: x.isdigit() and 1 <= int(x) <= 16000,
        "error_message": "Must be a number between 1 and 16000"
    },
    {
        "name": "WEB_SEARCH_RESULT_COUNT",
        "description": "Number of results to return from web searches",
        "default": "3",
        "validator": lambda x: x.isdigit() and 1 <= int(x) <= 10,
        "error_message": "Must be a number between 1 and 10"
    },
    {
        "name": "MEMORY_TTL",
        "description": "Time-to-live for memory entries in seconds",
        "default": "604800",  # 7 days
        "validator": lambda x: x.isdigit() and int(x) > 0,
        "error_message": "Must be a positive number"
    },
    {
        "name": "MEMORY_CACHE_SIZE",
        "description": "Size of memory cache",
        "default": "100",
        "validator": lambda x: x.isdigit() and int(x) > 0,
        "error_message": "Must be a positive number"
    },
    {
        "name": "MAX_ITERATIONS",
        "description": "Maximum iterations for agent execution",
        "default": "10",
        "validator": lambda x: x.isdigit() and 1 <= int(x) <= 50,
        "error_message": "Must be a number between 1 and 50"
    },
    {
        "name": "VERBOSE",
        "description": "Enable verbose logging",
        "default": "false",
        "validator": lambda x: x.lower() in ["true", "false", "1", "0", "yes", "no"],
        "error_message": "Must be a boolean value (true/false, 1/0, yes/no)"
    },
    {
        "name": "LOG_LEVEL",
        "description": "Logging level (DEBUG, INFO, WARNING, ERROR)",
        "default": "INFO",
        "validator": lambda x: x.upper() in ["DEBUG", "INFO", "WARNING", "ERROR"],
        "error_message": "Must be one of: DEBUG, INFO, WARNING, ERROR"
    }
]

def validate_env_var(var_name: str, validator, value: str, error_message: str) -> Tuple[bool, Optional[str]]:
    """
    Validate an environment variable value using the provided validator function.
    
    Args:
        var_name: The name of the environment variable
        validator: A function that takes a value and returns a boolean
        value: The value to validate
        error_message: The error message to return if validation fails
        
    Returns:
        Tuple of (valid, error_message), where valid is a boolean and error_message
        is None if the value is valid, or a string if it's invalid.
    """
    try:
        if validator(value):
            return True, None
        else:
            return False, f"{var_name}: {error_message}"
    except Exception as e:
        return False, f"{var_name}: {error_message} (Error: {str(e)})"

def validate_config() -> Dict[str, Any]:
    """
    Validate all environment variables against their validators.
    
    Returns:
        Dictionary containing:
        - 'valid': Boolean indicating if all required variables are valid
        - 'errors': List of error messages for invalid variables
        - 'warnings': List of warning messages for missing optional variables
        - 'config': Dictionary of all configuration values (with defaults applied)
    """
    errors = []
    warnings = []
    config = {}
    
    logger.info("Validating environment configuration...")
    
    # Validate required environment variables
    for var in REQUIRED_ENV_VARS:
        var_name = var["name"]
        var_value = os.environ.get(var_name)
        
        if not var_value:
            errors.append(f"{var_name}: Missing required environment variable")
            continue
        
        valid, error = validate_env_var(var_name, var["validator"], var_value, var["error_message"])
        if not valid:
            errors.append(error)
        
        config[var_name] = var_value
    
    # Validate optional environment variables
    for var in OPTIONAL_ENV_VARS:
        var_name = var["name"]
        var_value = os.environ.get(var_name, var["default"])
        
        if var_name not in os.environ and var["default"]:
            warnings.append(f"{var_name}: Using default value: {var['default']}")
        
        valid, error = validate_env_var(var_name, var["validator"], var_value, var["error_message"])
        if not valid:
            errors.append(error)
        
        config[var_name] = var_value
    
    # Additional cross-validations
    # e.g., check if FALLBACK_MODEL is different from DEFAULT_MODEL
    if config.get("FALLBACK_MODEL") == config.get("DEFAULT_MODEL"):
        warnings.append("FALLBACK_MODEL is the same as DEFAULT_MODEL. Consider using a different model for fallback.")
    
    result = {
        "valid": len(errors) == 0,
        "errors": errors,
        "warnings": warnings,
        "config": config
    }
    
    if result["valid"]:
        logger.info("✅ Environment configuration is valid!")
    else:
        logger.error(f"❌ Environment configuration has {len(errors)} errors!")
        for error in errors:
            logger.error(f"  - {error}")
    
    if warnings:
        logger.warning(f"⚠️ Environment configuration has {len(warnings)} warnings:")
        for warning in warnings:
            logger.warning(f"  - {warning}")
    
    return result

def generate_env_example() -> str:
    """
    Generate a .env.example file template containing all required and optional variables.
    
    Returns:
        The path to the generated .env.example file
    """
    env_example_path = Path(".") / ".env.example"
    
    with open(env_example_path, "w") as f:
        f.write("# GAIA System Environment Variables\n")
        f.write("# Copy this file to .env and fill in the values\n\n")
        
        f.write("# === REQUIRED ENVIRONMENT VARIABLES ===\n\n")
        for var in REQUIRED_ENV_VARS:
            f.write(f"# {var['description']}\n")
            f.write(f"# {var['error_message']}\n")
            f.write(f"{var['name']}=\n\n")
        
        f.write("# === OPTIONAL ENVIRONMENT VARIABLES ===\n\n")
        for var in OPTIONAL_ENV_VARS:
            f.write(f"# {var['description']}\n")
            f.write(f"# Default: {var['default']}\n")
            f.write(f"# {var['error_message']}\n")
            f.write(f"{var['name']}={var['default']}\n\n")
    
    logger.info(f"Generated environment example file at {env_example_path}")
    return str(env_example_path)

def initialize_env_from_example() -> None:
    """
    Initialize a .env file from the .env.example if it doesn't exist.
    """
    env_path = Path(".") / ".env"
    env_example_path = Path(".") / ".env.example"
    
    if env_path.exists():
        logger.info(f".env file already exists at {env_path}")
        return
    
    if not env_example_path.exists():
        generate_env_example()
    
    with open(env_example_path, "r") as example_file:
        example_content = example_file.read()
    
    with open(env_path, "w") as env_file:
        env_file.write(example_content)
    
    logger.info(f"Initialized .env file from .env.example at {env_path}")
    logger.info("Please edit the .env file and fill in the required values")

def get_documentation() -> Dict[str, List[Dict[str, str]]]:
    """
    Get documentation for all environment variables.
    
    Returns:
        Dictionary containing lists of variable information for required and optional variables
    """
    required_vars = []
    for var in REQUIRED_ENV_VARS:
        required_vars.append({
            "name": var["name"],
            "description": var["description"],
            "validation": var["error_message"]
        })
    
    optional_vars = []
    for var in OPTIONAL_ENV_VARS:
        optional_vars.append({
            "name": var["name"],
            "description": var["description"],
            "default": var["default"],
            "validation": var["error_message"]
        })
    
    return {
        "required": required_vars,
        "optional": optional_vars
    }

if __name__ == "__main__":
    # When run as a script, validate the configuration and generate example files
    validation_result = validate_config()
    
    # Generate .env.example file
    generate_env_example()
    
    # Print validation summary
    if validation_result["valid"]:
        print("\n✅ Configuration is valid!")
    else:
        print("\n❌ Configuration validation failed:")
        for error in validation_result["errors"]:
            print(f"  - {error}")
    
    if validation_result["warnings"]:
        print("\n⚠️ Configuration warnings:")
        for warning in validation_result["warnings"]:
            print(f"  - {warning}")