from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import logging
from ..utils.cache import cache_qa_result
import torch
from ..utils.enhanced_models import enhanced_model_manager

# Check GPU availability
if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(0)
    gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
    logging.info(f"GPU detected: {gpu_name} ({gpu_memory:.1f}GB) - Using GPU for QA model")
else:
    logging.warning("No GPU detected - Using CPU for QA model (this will be slower)")

# Initialize model and tokenizer
def get_qa_model():
    try:
        logging.info("Loading QA model and tokenizer...")
        model = AutoModelForSeq2SeqLM.from_pretrained("TheGod-2003/legal_QA_model")
        tokenizer = AutoTokenizer.from_pretrained("TheGod-2003/legal_QA_model", use_fast=False)
        
        # Move model to GPU if available
        if torch.cuda.is_available():
            model = model.to("cuda")
            logging.info("QA model moved to GPU successfully")
        else:
            logging.info("QA model loaded on CPU")
            
        return model, tokenizer
    except Exception as e:
        logging.error(f"Error initializing QA model: {str(e)}")
        raise

# Load legal QA model
try:
    qa_model, qa_tokenizer = get_qa_model()
    device_str = "GPU" if torch.cuda.is_available() else "CPU"
    logging.info(f"QA model loaded successfully on {device_str}")
except Exception as e:
    logging.error(f"Failed to load QA model: {str(e)}")
    qa_model = None
    qa_tokenizer = None

def get_top_n_chunks(question, context, n=3):
    # Split context into chunks, handling both paragraph and sentence-level splits
    chunks = []
    # First split by paragraphs
    paragraphs = context.split('\n\n')
    for para in paragraphs:
        # Then split by sentences if paragraph is too long
        if len(para.split()) > 100:  # If paragraph has more than 100 words
            sentences = para.split('. ')
            chunks.extend(sentences)
        else:
            chunks.append(para)
    
    # Remove empty chunks
    chunks = [chunk for chunk in chunks if chunk.strip()]
    
    # If we have very few chunks, return the whole context
    if len(chunks) <= n:
        return context
    
    # Calculate relevance scores
    vectorizer = TfidfVectorizer().fit(chunks + [question])
    scores = vectorizer.transform([question]) @ vectorizer.transform(chunks).T
    top_indices = np.argsort(scores.toarray()[0])[-n:][::-1]
    
    # Combine top chunks with proper spacing
    return " ".join([chunks[i] for i in top_indices])

@cache_qa_result
def answer_question(question, context):
    result = enhanced_model_manager.answer_question_enhanced(question, context)
    return {
        'answer': result['answer'],
        'score': result.get('confidence', 0.0),
        'start': 0,
        'end': 0
    }