import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

# Load model and tokenizer
model_name = "baidu/ERNIE-4.5-0.3B-PT"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    torch_dtype=torch.float32
).to(device)

embedding_layer = model.get_input_embeddings()

# Get sentence embedding by averaging token embeddings
def get_sentence_embedding(text):
    inputs = tokenizer(text, return_tensors="pt", add_special_tokens=True).to(device)
    with torch.no_grad():
        embeddings = embedding_layer(inputs["input_ids"])
        sentence_embedding = embeddings.mean(dim=1)
    return sentence_embedding

# Show token list and token IDs
def tokenize_sentence(sentence):
    tokens = tokenizer.tokenize(sentence)
    token_ids = tokenizer.convert_tokens_to_ids(tokens)
    return list(zip(tokens, token_ids))

# PCA plot of two sentence embeddings
def plot_embeddings(sentence1, sentence2):
    emb1 = get_sentence_embedding(sentence1).cpu().numpy()
    emb2 = get_sentence_embedding(sentence2).cpu().numpy()
    embeddings = np.vstack([emb1, emb2])  # Shape: (2, hidden_size)
    
    # PCA to reduce to 2D
    pca = PCA(n_components=2)
    reduced = pca.fit_transform(embeddings)

    # Plot
    fig, ax = plt.subplots()
    ax.scatter(reduced[:, 0], reduced[:, 1], color=["red", "blue"])
    ax.annotate("Sentence 1", (reduced[0, 0], reduced[0, 1]), color="red")
    ax.annotate("Sentence 2", (reduced[1, 0], reduced[1, 1]), color="blue")
    ax.set_title("2D PCA of Sentence Embeddings")
    ax.set_xlabel("PCA 1")
    ax.set_ylabel("PCA 2")
    ax.grid(True)
    return fig

# Main function to run all outputs
def analyze_sentences(sentence1, sentence2):
    # Cosine similarity
    emb1 = get_sentence_embedding(sentence1)
    emb2 = get_sentence_embedding(sentence2)
    similarity = F.cosine_similarity(emb1, emb2).item()

    # Token info
    tokens1 = tokenize_sentence(sentence1)
    tokens2 = tokenize_sentence(sentence2)

    # Plot
    fig = plot_embeddings(sentence1, sentence2)

    return f"Similarity: {similarity:.4f}", tokens1, tokens2, fig

# Build Gradio interface
demo = gr.Interface(
    fn=analyze_sentences,
    inputs=[
        gr.Textbox(label="Sentence 1", placeholder="I love cat."),
        gr.Textbox(label="Sentence 2", placeholder="I love dog."),
    ],
    outputs=[
        gr.Textbox(label="Cosine Similarity Score"),
        gr.Dataframe(headers=["Token", "Token ID"], label="Sentence 1 Tokens"),
        gr.Dataframe(headers=["Token", "Token ID"], label="Sentence 2 Tokens"),
        gr.Plot(label="2D PCA Plot of Embeddings"),
    ],
    title="ERNIE 4.5 Embedding Visualization",
    description="Compare two sentences using ERNIE 4.5-0.3B's embedding layer. Outputs cosine similarity, token info, and PCA plot.",
)

if __name__ == "__main__":
    demo.launch()