import json
import logging
import pickle
import shutil
from pathlib import Path

import torch
from huggingface_hub import snapshot_download
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV

from config import Config

REPO_ID = Config.REPO_ID_LANG
MODEL_DIR = Path(Config.LANG_MODEL) if Config.LANG_MODEL else None
HF_TOKEN = Config.HF_TOKEN
ENGLISH_SUBDIR = "English_model"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

REQUIRED_FILES = (
    "classifier.pkl",
    "scaler.pkl",
    "word_vectorizer.pkl",
    "char_vectorizer.pkl",
    "feature_names.json",
    "metadata.json",
)


def _patch_legacy_logistic_model(model):
    """Backfill attributes expected by newer sklearn versions."""
    if isinstance(model, (LogisticRegression, LogisticRegressionCV)) and not hasattr(model, "multi_class"):
        model.multi_class = "auto"
    return model


def _has_required_artifacts(model_dir: Path) -> bool:
    if not model_dir.exists() or not model_dir.is_dir():
        return False
    return all((model_dir / filename).exists() for filename in REQUIRED_FILES)


def _resolve_artifact_dir(base_dir: Path) -> Path | None:
    candidates = [base_dir, base_dir / ENGLISH_SUBDIR]
    for candidate in candidates:
        if _has_required_artifacts(candidate):
            return candidate
    return None


def warmup():
    logging.info("Warming up model...")
    if MODEL_DIR is None:
        raise ValueError("LANG_MODEL is not configured")
    if _resolve_artifact_dir(MODEL_DIR):
        logging.info("Model artifacts already exist, skipping download.")
        return
    download_model_repo()


def download_model_repo():
    if MODEL_DIR is None:
        raise ValueError("LANG_MODEL is not configured")
    if not REPO_ID:
        raise ValueError("English_model repo id is not configured")
    if _resolve_artifact_dir(MODEL_DIR):
        logging.info("Model artifacts already exist, skipping download.")
        return
    snapshot_path = Path(snapshot_download(repo_id=REPO_ID, token=HF_TOKEN))
    source_dir = snapshot_path / ENGLISH_SUBDIR if (snapshot_path / ENGLISH_SUBDIR).is_dir() else snapshot_path
    MODEL_DIR.mkdir(parents=True, exist_ok=True)
    shutil.copytree(source_dir, MODEL_DIR, dirs_exist_ok=True)


def load_model():
    if MODEL_DIR is None:
        raise ValueError("LANG_MODEL is not configured")
    artifact_dir = _resolve_artifact_dir(MODEL_DIR)
    if artifact_dir is None:
        logging.info("Model artifacts missing in %s, downloading now.", MODEL_DIR)
        download_model_repo()
        artifact_dir = _resolve_artifact_dir(MODEL_DIR)
    if artifact_dir is None:
        raise FileNotFoundError(
            f"Required model artifacts not found in {MODEL_DIR}. Expected files: {', '.join(REQUIRED_FILES)}"
        )

    with open(artifact_dir / "classifier.pkl", "rb") as f:
        loaded_classifier = pickle.load(f)
    loaded_classifier = _patch_legacy_logistic_model(loaded_classifier)

    with open(artifact_dir / "scaler.pkl", "rb") as f:
        loaded_scaler = pickle.load(f)

    with open(artifact_dir / "word_vectorizer.pkl", "rb") as f:
        loaded_word_vectorizer = pickle.load(f)

    with open(artifact_dir / "char_vectorizer.pkl", "rb") as f:
        loaded_char_vectorizer = pickle.load(f)

    with open(artifact_dir / "feature_names.json", "r") as f:
        loaded_features = json.load(f)

    with open(artifact_dir / "metadata.json", "r") as f:
        loaded_metadata = json.load(f)
    return (
        loaded_classifier,
        loaded_scaler,
        loaded_word_vectorizer,
        loaded_char_vectorizer,
        loaded_features,
        loaded_metadata,
    )