ollama-gguf-cpu / app_simpleOK.py
SkyNetWalker's picture
Rename app.py to app_simpleOK.py
7b0fb10 verified
raw
history blame contribute delete
698 Bytes
import gradio as gr
import ollama
# The model name must exactly match what was pulled from Hugging Face
MODEL_NAME = 'hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M'
def predict(prompt, history):
# The history is not used in this simple example, but is required by the ChatInterface
response = ollama.chat(
model=MODEL_NAME,
messages=[{'role': 'user', 'content': prompt}]
)
return response['message']['content']
# Setup the Gradio Chat Interface
iface = gr.ChatInterface(
fn=predict,
title="Gemma-3 QAT GGUF Chat",
description=f"Chat with the {MODEL_NAME} model via Ollama."
)
# Launch the interface
iface.launch(server_name="0.0.0.0", server_port=7860)