Spaces:
Sleeping
Sleeping
import requests | |
check_ipinfo = requests.get("https://ipinfo.io").json()['country'] | |
print("Run-Location-As: ",check_ipinfo) | |
import gradio as gr | |
import ollama | |
# List of available models for selection. | |
# IMPORTANT: These names must correspond to models that have been either | |
# Model from run.sh | |
AVAILABLE_MODELS = [ | |
'smollm2:360m-instruct-q5_K_M', | |
'hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M', # OK speed with CPU | |
'hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M', #Slow. | |
'hf.co/bartowski/Qwen_Qwen3-1.7B-GGUF:Q5_K_M', | |
'gemma3n:e2b-it-q4_K_M', | |
'granite3.3:2b' | |
] | |
#---fail to run | |
#'hf.co/ggml-org/SmolLM3-3B-GGUF:Q4_K_M', | |
#'hf.co/bartowski/nvidia_OpenReasoning-Nemotron-1.5B-GGUF:Q5_K_M', | |
# Default System Prompt | |
DEFAULT_SYSTEM_PROMPT = "Answer everything in simple, smart, relevant and accurate style. No chatty!" | |
# --- Gradio Interface --- | |
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutral")) as demo: | |
gr.Markdown(f"## Small Language Model (SLM) run with CPU") # Changed title to be more generic | |
gr.Markdown(f"(Run-Location-As: `{check_ipinfo}`)") | |
gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.") | |
# Model Selection | |
with gr.Row(): | |
selected_model = gr.Radio( | |
choices=AVAILABLE_MODELS, | |
value=AVAILABLE_MODELS[0], # Default to the first model in the list | |
label="Select Model", | |
info="Choose the LLM model to chat with.", | |
interactive=True | |
) | |
chatbot = gr.Chatbot( | |
label="Conversation", | |
height=400, | |
type='messages', | |
layout="bubble" | |
) | |
with gr.Row(): | |
msg = gr.Textbox( | |
show_label=False, | |
placeholder="Type your message here and press Enter...", | |
lines=1, | |
scale=4, | |
container=False | |
) | |
with gr.Accordion("Advanced Options", open=False): | |
with gr.Row(): | |
stream_checkbox = gr.Checkbox( | |
label="Stream Output", | |
value=True, | |
info="Enable to see the response generate in real-time." | |
) | |
use_custom_prompt_checkbox = gr.Checkbox( | |
label="Use Custom System Prompt", | |
value=False, | |
info="Check this box to provide your own system prompt below." | |
) | |
system_prompt_textbox = gr.Textbox( | |
label="System Prompt", | |
value=DEFAULT_SYSTEM_PROMPT, | |
lines=3, | |
placeholder="Enter a system prompt to guide the model's behavior...", | |
interactive=False | |
) | |
# Function to toggle the interactivity of the system prompt textbox | |
def toggle_system_prompt(use_custom): | |
return gr.update(interactive=use_custom) | |
use_custom_prompt_checkbox.change( | |
fn=toggle_system_prompt, | |
inputs=use_custom_prompt_checkbox, | |
outputs=system_prompt_textbox, | |
queue=False | |
) | |
# --- Core Chat Logic --- | |
# This function is the heart of the application. | |
def respond(history, system_prompt, stream_output, current_selected_model): # Added current_selected_model | |
""" | |
This is the single function that handles the entire chat process. | |
It takes the history, prepends the system prompt, calls the Ollama API, | |
and streams the response back to the chatbot. | |
""" | |
#Disable Qwen3 thinking | |
if "Qwen3".lower() in current_selected_model: | |
system_prompt = system_prompt+" /no_think" | |
# The 'history' variable from Gradio contains the entire conversation. | |
# We prepend the system prompt to this history to form the final payload. | |
messages = [{"role": "system", "content": system_prompt}] + history | |
# Add a placeholder for the assistant's response to the UI history. | |
# This creates the space where the streamed response will be displayed. | |
history.append({"role": "assistant", "content": ""}) | |
# Stream the response from the Ollama API using the currently selected model | |
response_stream = ollama.chat( | |
model=current_selected_model, # Use the dynamically selected model | |
messages=messages, | |
stream=True | |
) | |
# Iterate through the stream, updating the placeholder with each new chunk. | |
for chunk in response_stream: | |
if chunk['message']['content']: | |
history[-1]['content'] += chunk['message']['content'] | |
# Yield the updated history to the chatbot for a real-time effect. | |
yield history | |
# This function handles the user's submission. | |
def user_submit(history, user_message): | |
""" | |
Adds the user's message to the chat history and clears the input box. | |
This prepares the state for the main 'respond' function. | |
""" | |
return history + [{"role": "user", "content": user_message}], "" | |
# Gradio Event Wiring | |
msg.submit( | |
user_submit, | |
inputs=[chatbot, msg], | |
outputs=[chatbot, msg], | |
queue=False | |
).then( | |
respond, | |
inputs=[chatbot, system_prompt_textbox, stream_checkbox, selected_model], # Pass selected_model here | |
outputs=[chatbot] | |
) | |
# Launch the Gradio interface | |
demo.launch(server_name="0.0.0.0", server_port=7860) | |
""" | |
#--------------------------------------------------------------- | |
# v20250625, OK run with CPU, Gemma 3 4b it qat gguf, history support. | |
import gradio as gr | |
import ollama | |
# The model name must exactly match what was pulled from Hugging Face | |
MODEL_NAME = 'hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M' | |
# Default System Prompt | |
DEFAULT_SYSTEM_PROMPT = "You must response in zh-TW. Answer everything in simple, smart, relevant and accurate style. No chatty!" | |
# --- Gradio Interface --- | |
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutral")) as demo: | |
gr.Markdown(f"## LLM GGUF Chat with `{MODEL_NAME}`") | |
gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.") | |
# Use the modern 'messages' type for the Chatbot component | |
chatbot = gr.Chatbot( | |
label="Conversation", | |
height=500, | |
type='messages', | |
layout="bubble" | |
) | |
with gr.Row(): | |
msg = gr.Textbox( | |
show_label=False, | |
placeholder="Type your message here and press Enter...", | |
lines=1, | |
scale=4, | |
container=False | |
) | |
with gr.Accordion("Advanced Options", open=False): | |
with gr.Row(): | |
stream_checkbox = gr.Checkbox( | |
label="Stream Output", | |
value=True, | |
info="Enable to see the response generate in real-time." | |
) | |
use_custom_prompt_checkbox = gr.Checkbox( | |
label="Use Custom System Prompt", | |
value=False, | |
info="Check this box to provide your own system prompt below." | |
) | |
system_prompt_textbox = gr.Textbox( | |
label="System Prompt", | |
value=DEFAULT_SYSTEM_PROMPT, | |
lines=3, | |
placeholder="Enter a system prompt to guide the model's behavior...", | |
interactive=False | |
) | |
# Function to toggle the interactivity of the system prompt textbox | |
def toggle_system_prompt(use_custom): | |
return gr.update(interactive=use_custom) | |
use_custom_prompt_checkbox.change( | |
fn=toggle_system_prompt, | |
inputs=use_custom_prompt_checkbox, | |
outputs=system_prompt_textbox, | |
queue=False | |
) | |
# --- Core Chat Logic --- | |
# This function is the heart of the application. | |
def respond(history, system_prompt, stream_output): | |
#This is the single function that handles the entire chat process. | |
#It takes the history, prepends the system prompt, calls the Ollama API, | |
#and streams the response back to the chatbot. | |
# --- FINAL FIX: Construct the API payload correctly --- | |
# The 'history' variable from Gradio contains the entire conversation. | |
# We prepend the system prompt to this history to form the final payload. | |
messages = [{"role": "system", "content": system_prompt}] + history | |
# Add a placeholder for the assistant's response to the UI history. | |
# This creates the space where the streamed response will be displayed. | |
history.append({"role": "assistant", "content": ""}) | |
# Stream the response from the Ollama API | |
response_stream = ollama.chat( | |
model=MODEL_NAME, | |
messages=messages, | |
stream=True | |
) | |
# Iterate through the stream, updating the placeholder with each new chunk. | |
for chunk in response_stream: | |
if chunk['message']['content']: | |
history[-1]['content'] += chunk['message']['content'] | |
# Yield the updated history to the chatbot for a real-time effect. | |
yield history | |
# This function handles the user's submission. | |
def user_submit(history, user_message): | |
#Adds the user's message to the chat history and clears the input box. | |
#This prepares the state for the main 'respond' function. | |
return history + [{"role": "user", "content": user_message}], "" | |
# Gradio Event Wiring | |
msg.submit( | |
user_submit, | |
inputs=[chatbot, msg], | |
outputs=[chatbot, msg], | |
queue=False | |
).then( | |
respond, | |
inputs=[chatbot, system_prompt_textbox, stream_checkbox], | |
outputs=[chatbot] | |
) | |
# Launch the Gradio interface | |
demo.launch(server_name="0.0.0.0", server_port=7860) | |
#--------------------------------------------------------------- | |
""" | |
""" | |
#--------------------------------------------------------------- | |
# Backup, OK: history, user sys prompt, cpu.: | |
#--------------------------------------------------------------- | |
import gradio as gr | |
import ollama | |
# The model name must exactly match what was pulled from Hugging Face | |
MODEL_NAME = 'hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M' | |
# Default System Prompt | |
DEFAULT_SYSTEM_PROMPT = "You must response in zh-TW. Answer everything in simple, smart, relevant and accurate style. No chatty!" | |
# --- Gradio Interface --- | |
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutral")) as demo: | |
gr.Markdown(f"## LLM GGUF Chat with `{MODEL_NAME}`") | |
gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.") | |
# Use the modern 'messages' type for the Chatbot component | |
chatbot = gr.Chatbot( | |
label="Conversation", | |
height=500, | |
type='messages', | |
layout="bubble" | |
) | |
with gr.Row(): | |
msg = gr.Textbox( | |
show_label=False, | |
placeholder="Type your message here and press Enter...", | |
lines=1, | |
scale=4, | |
container=False | |
) | |
with gr.Accordion("Advanced Options", open=False): | |
with gr.Row(): | |
stream_checkbox = gr.Checkbox( | |
label="Stream Output", | |
value=True, | |
info="Enable to see the response generate in real-time." | |
) | |
use_custom_prompt_checkbox = gr.Checkbox( | |
label="Use Custom System Prompt", | |
value=False, | |
info="Check this box to provide your own system prompt below." | |
) | |
system_prompt_textbox = gr.Textbox( | |
label="System Prompt", | |
value=DEFAULT_SYSTEM_PROMPT, | |
lines=3, | |
placeholder="Enter a system prompt to guide the model's behavior...", | |
interactive=False | |
) | |
# Function to toggle the interactivity of the system prompt textbox | |
def toggle_system_prompt(use_custom): | |
return gr.update(interactive=use_custom) | |
use_custom_prompt_checkbox.change( | |
fn=toggle_system_prompt, | |
inputs=use_custom_prompt_checkbox, | |
outputs=system_prompt_textbox, | |
queue=False | |
) | |
# --- Core Chat Logic --- | |
# This function is the heart of the application. | |
def respond(history, system_prompt, stream_output): | |
#This is the single function that handles the entire chat process. | |
#It takes the history, prepends the system prompt, calls the Ollama API, | |
#and streams the response back to the chatbot. | |
# --- FINAL FIX: Construct the API payload correctly --- | |
# The 'history' variable from Gradio contains the entire conversation. | |
# We prepend the system prompt to this history to form the final payload. | |
messages = [{"role": "system", "content": system_prompt}] + history | |
# Add a placeholder for the assistant's response to the UI history. | |
# This creates the space where the streamed response will be displayed. | |
history.append({"role": "assistant", "content": ""}) | |
# Stream the response from the Ollama API | |
response_stream = ollama.chat( | |
model=MODEL_NAME, | |
messages=messages, | |
stream=True | |
) | |
# Iterate through the stream, updating the placeholder with each new chunk. | |
for chunk in response_stream: | |
if chunk['message']['content']: | |
history[-1]['content'] += chunk['message']['content'] | |
# Yield the updated history to the chatbot for a real-time effect. | |
yield history | |
# This function handles the user's submission. | |
def user_submit(history, user_message): | |
#Adds the user's message to the chat history and clears the input box. | |
#This prepares the state for the main 'respond' function. | |
return history + [{"role": "user", "content": user_message}], "" | |
# Gradio Event Wiring | |
msg.submit( | |
user_submit, | |
inputs=[chatbot, msg], | |
outputs=[chatbot, msg], | |
queue=False | |
).then( | |
respond, | |
inputs=[chatbot, system_prompt_textbox, stream_checkbox], | |
outputs=[chatbot] | |
) | |
# Launch the Gradio interface | |
demo.launch(server_name="0.0.0.0", server_port=7860) | |
""" |