import gradio as gr
from llama_cpp import Llama

# Path to model (downloaded automatically from HF hub)
MODEL_PATH = "TheBloke/CodeLlama-7B-Instruct-GGUF"

# Load LLM (first time takes time)
llm = Llama.from_pretrained(
    repo_id=MODEL_PATH,
    filename="codellama-7b-instruct.Q4_K_M.gguf",  # 4-bit quantized version
    n_ctx=2048,
    verbose=True
)

# Prompt wrapper
def build_prompt(user_prompt):
    return f"[INST] Write Python code using the Manim library: {user_prompt} [/INST]"

def generate_code(prompt):
    result = llm(build_prompt(prompt), max_tokens=512, temperature=0.7, stop=["</s>"])
    return result["choices"][0]["text"]

gr.Interface(
    fn=generate_code,
    inputs=gr.Textbox(label="Prompt", placeholder="e.g., Animate a bouncing ball in Manim"),
    outputs=gr.Textbox(label="Generated Python Code"),
    title="🐍 Manim Code Generator - Code Llama 7B",
    description="Powered by llama-cpp and Code Llama 7B (Quantized). Runs on CPU!"
).launch()