import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# ✅ Smaller DeepSeek model that runs on CPU
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

# Load tokenizer & model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto")

# Create a text generation pipeline
chatbot = pipeline("text-generation", model=model, tokenizer=tokenizer)

def chat_with_ai(prompt):
    response = chatbot(prompt, max_new_tokens=200, do_sample=True, temperature=0.7)
    return response[0]["generated_text"]

# Gradio interface
ui = gr.Interface(
    fn=chat_with_ai,
    inputs=gr.Textbox(label="Ask DeepSeek something..."),
    outputs="text",
    title="🤖 DeepSeek-R1 (Distilled Version)",
    description="Chat with the smaller DeepSeek-R1 that runs even without a GPU!"
)

ui.launch()