import gradio as gr import ollama # The model name must exactly match what was pulled from Hugging Face MODEL_NAME = 'hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M' def predict(prompt, history): # The history is not used in this simple example, but is required by the ChatInterface response = ollama.chat( model=MODEL_NAME, messages=[{'role': 'user', 'content': prompt}] ) return response['message']['content'] # Setup the Gradio Chat Interface iface = gr.ChatInterface( fn=predict, title="Gemma-3 QAT GGUF Chat", description=f"Chat with the {MODEL_NAME} model via Ollama." ) # Launch the interface iface.launch(server_name="0.0.0.0", server_port=7860)