import gradio as gr
import ollama

# The model name must exactly match what was pulled from Hugging Face
MODEL_NAME = 'hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M'

def predict(prompt, history):
    # The history is not used in this simple example, but is required by the ChatInterface
    response = ollama.chat(
        model=MODEL_NAME,
        messages=[{'role': 'user', 'content': prompt}]
    )
    return response['message']['content']

# Setup the Gradio Chat Interface
iface = gr.ChatInterface(
    fn=predict,
    title="Gemma-3 QAT GGUF Chat",
    description=f"Chat with the {MODEL_NAME} model via Ollama."
)

# Launch the interface
iface.launch(server_name="0.0.0.0", server_port=7860)