Spaces:

jayashree
/

TatTwamAI

Sleeping

File size: 6,747 Bytes

"""
Models module for Personal Coach CrewAI Application
Handles all AI model loading and management
"""

from typing import TYPE_CHECKING, Optional, Dict, Any
import torch

# Version info
__version__ = "1.0.0"

# Lazy imports
if TYPE_CHECKING:
    from .mistral_model import MistralModel, MistralConfig, MistralPromptFormatter
    from .tiny_gpt2_model import TinyGPT2Model

# Public API
__all__ = [
    # Main model classes
    "MistralModel",
    "MistralConfig", 
    "MistralPromptFormatter",
    "TinyGPT2Model",

    # Model management
    "load_model",
    "get_model_info",
    "clear_model_cache",

    # Constants
    "AVAILABLE_MODELS",
    "MODEL_REQUIREMENTS",
    "DEFAULT_MODEL_CONFIG"
]

# Available models
AVAILABLE_MODELS = {
    "mistral-7b-instruct": {
        "model_id": "mistralai/Mistral-7B-Instruct-v0.1",
        "type": "instruction-following",
        "size": "7B",
        "context_length": 32768,
        "languages": ["multilingual"]
    },
    "mistral-7b": {
        "model_id": "mistralai/Mistral-7B-v0.1",
        "type": "base",
        "size": "7B", 
        "context_length": 32768,
        "languages": ["multilingual"]
    },
    "tiny-gpt2": {
        "model_id": "sshleifer/tiny-gpt2",
        "type": "tiny",
        "size": "small",
        "context_length": 256,
        "languages": ["en"]
    }
}

# Model requirements
MODEL_REQUIREMENTS = {
    "mistral-7b-instruct": {
        "ram": "16GB",
        "vram": "8GB (GPU) or 16GB (CPU)",
        "disk": "15GB",
        "compute": "GPU recommended"
    },
    "tiny-gpt2": {
        "ram": "≤1GB",
        "vram": "CPU only",
        "disk": "<1GB",
        "compute": "CPU"
    }
}

# Default configuration: Set to CPU/float32
DEFAULT_MODEL_CONFIG = {
    "max_length": 256,
    "temperature": 0.7,
    "top_p": 0.95,
    "top_k": 50,
    "do_sample": True,
    "num_return_sequences": 1,
    "device": "cpu",
    "torch_dtype": torch.float32,
    "load_in_8bit": False,
    "cache_dir": ".cache/models"
}

# Model instance cache
_model_cache: Dict[str, Any] = {}

def load_model(model_name: str = "tiny-gpt2", config: Optional[Dict[str, Any]] = None):
    """
    Load a model with caching support

    Args:
        model_name: Name of the model to load
        config: Optional configuration override
        
    Returns:
        Model instance
    """
    # Check cache first
    cache_key = f"{model_name}_{str(config)}"
    if cache_key in _model_cache:
        return _model_cache[cache_key]

    # Import here to avoid circular imports
    if model_name == "tiny-gpt2":
        from .tiny_gpt2_model import TinyGPT2Model
        # No config needed for TinyGPT2, ignore config for now
        model = TinyGPT2Model()
    elif model_name in ["mistral-7b-instruct", "mistral-7b"]:
        from .mistral_model import MistralModel, MistralConfig
        model_info = AVAILABLE_MODELS.get(model_name)
        if not model_info:
            raise ValueError(f"Unknown model: {model_name}")
        model_config = DEFAULT_MODEL_CONFIG.copy()
        if config:
            model_config.update(config)
        mistral_config = MistralConfig(
            model_id=model_info["model_id"],
            **model_config
        )
        model = MistralModel(mistral_config)
    else:
        raise ValueError(f"Unknown model: {model_name}")

    # Cache it
    _model_cache[cache_key] = model
    return model

def get_model_info(model_name: str) -> Optional[Dict[str, Any]]:
    """
    Get information about a model

    Args:
        model_name: Name of the model

    Returns:
        Model information dictionary or None
    """
    info = AVAILABLE_MODELS.get(model_name)
    if info:
        # Add requirements
        requirements = MODEL_REQUIREMENTS.get(model_name, {})
        info = info.copy() # avoid mutating global dict!
        info["requirements"] = requirements

        # Add loading status
        cache_keys = [k for k in _model_cache.keys() if k.startswith(model_name)]
        info["is_loaded"] = len(cache_keys) > 0

    return info

def clear_model_cache(model_name: Optional[str] = None):
    """
    Clear model cache to free memory

    Args:
        model_name: Specific model to clear, or None for all
    """
    global _model_cache

    if model_name:
        # Clear specific model
        keys_to_remove = [k for k in _model_cache.keys() if k.startswith(model_name)]
        for key in keys_to_remove:
            del _model_cache[key]
    else:
        # Clear all
        _model_cache.clear()

    # Force garbage collection
    import gc
    gc.collect()

    # Clear GPU cache if using CUDA
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

# Utility functions
def estimate_memory_usage(model_name: str) -> Dict[str, Any]:
    """
    Estimate memory usage for a model

    Args:
        model_name: Name of the model

    Returns:
        Memory estimation dictionary
    """
    model_info = AVAILABLE_MODELS.get(model_name)
    if not model_info:
        return {}

    size = model_info.get("size", "7B")
    if size.endswith("B"):
        size_gb = float(size.replace("B", ""))  # e.g. "7B"
    elif size == "small":
        size_gb = 0.02  # Arbitrary tiny model size in GB
    else:
        size_gb = 0.1  # catchall

    estimates = {
        "model_size_gb": size_gb,
        "fp32_memory_gb": size_gb * 4,  # 4 bytes per parameter
        "fp16_memory_gb": size_gb * 2,  # 2 bytes per parameter
        "int8_memory_gb": size_gb,      # 1 byte per parameter
        "recommended_ram_gb": size_gb * 2.5,
        "recommended_vram_gb": size_gb * 1.5
    }

    return estimates

def get_device_info() -> Dict[str, Any]:
    """Get information about available compute devices"""
    info = {
        "cuda_available": torch.cuda.is_available(),
        "device_count": torch.cuda.device_count() if torch.cuda.is_available() else 0,
        "current_device": torch.cuda.current_device() if torch.cuda.is_available() else None,
        "device_name": torch.cuda.get_device_name() if torch.cuda.is_available() else "CPU"
    }

    if torch.cuda.is_available():
        info["gpu_memory"] = {
            "allocated": torch.cuda.memory_allocated() / 1024**3,  # GB
            "reserved": torch.cuda.memory_reserved() / 1024**3,    # GB
            "total": torch.cuda.get_device_properties(0).total_memory / 1024**3  # GB
        }

    return info

# Module initialization
import os
if os.getenv("DEBUG_MODE", "false").lower() == "true":
    print(f"Models module v{__version__} initialized")
    device_info = get_device_info()
    print(f"Device: {device_info['device_name']}")
    if device_info['cuda_available']:
        print(f"GPU Memory: {device_info['gpu_memory']['total']:.1f}GB")