import os
import torch
from dataclasses import dataclass
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline, WhisperFeatureExtractor, AutoTokenizer
from typing import Dict, Any


@dataclass
class ModelConfig:
    """Configuration for Whisper models"""
    model_id: str
    display_name: str


class ModelConfigs:
    """Available model configurations"""
    SMALL = ModelConfig(
        model_id="nineninesix/kyrgyz-whisper-small",
        display_name="Small"
    )
    MEDIUM = ModelConfig(
        model_id="nineninesix/kyrgyz-whisper-medium",
        display_name="Medium"
    )

    @classmethod
    def get_all_configs(cls) -> Dict[str, ModelConfig]:
        """Get all available model configurations"""
        return {
            "Small": cls.SMALL,
            "Medium": cls.MEDIUM
        }


class InitModels:
    """Initialize and manage Whisper models for Kyrgyz speech recognition"""

    def __init__(self):
        self.token = os.getenv('HF_TOKEN')
        self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
        self.torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
        self.models: Dict[str, Any] = {}
        self.pipelines: Dict[str, Any] = {}

    def initialize_model(self, model_config: ModelConfig) -> None:
        """Initialize a specific model and its pipeline"""
        model_id = model_config.model_id

        # Load model (keep on CPU for ZeroGPU compatibility)
        model = AutoModelForSpeechSeq2Seq.from_pretrained(
            model_id,
            torch_dtype=self.torch_dtype,
            low_cpu_mem_usage=True,
            use_safetensors=True,
            token=self.token
        )

        # Load feature extractor
        feature_extractor = WhisperFeatureExtractor.from_pretrained(
            model_id,
            token=self.token
        )

        # Load tokenizer
        tokenizer = AutoTokenizer.from_pretrained(
            model_id,
            trust_remote_code=True,
            language="kyrgyz",
            task="transcribe",
            token=self.token
        )

        # Create pipeline (device will be set during inference)
        pipe = pipeline(
            "automatic-speech-recognition",
            model=model,
            tokenizer=tokenizer,
            feature_extractor=feature_extractor,
            torch_dtype=self.torch_dtype,
            device=-1  # CPU, will move to GPU in decorated function
        )

        # Store model components
        self.models[model_config.display_name] = {
            "model": model,
            "tokenizer": tokenizer,
            "feature_extractor": feature_extractor
        }
        self.pipelines[model_config.display_name] = pipe

    def initialize_all_models(self) -> None:
        """Initialize all available models"""
        configs = ModelConfigs.get_all_configs()
        for name, config in configs.items():
            print(f"Initializing {name} model: {config.model_id}")
            self.initialize_model(config)

    def get_pipeline(self, model_name: str) -> Any:
        """Get pipeline for a specific model"""
        return self.pipelines.get(model_name)

    def get_tokenizer(self, model_name: str) -> Any:
        """Get tokenizer for a specific model"""
        return self.models.get(model_name, {}).get("tokenizer")

    def get_model(self, model_name: str) -> Any:
        """Get model for a specific model name"""
        return self.models.get(model_name, {}).get("model")