Spaces:
Running
Running
import requests | |
check_ipinfo = requests.get("https://ipinfo.io").json()['country'] | |
print("Run-Location-As: ",check_ipinfo) | |
import gradio as gr | |
import ollama | |
# List of available models for selection. | |
# IMPORTANT: These names must correspond to models that have been either | |
# Model from run.sh | |
MODEL_ID_MAP = { | |
"(阿里千問)Qwen3-4B-Instruct-2507": 'hf.co/bartowski/Qwen_Qwen3-4B-Instruct-2507-GGUF:Q4_K_M', | |
"(IBM)Granite3.3-2B": 'granite3.3:2b', | |
"(Meta)Llama3.2-3B-Instruct": 'hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M', # OK speed with CPU | |
#"(阿里千問)Qwen3-4B-Thinking-2507": 'hf.co/bartowski/Qwen_Qwen3-4B-Thinking-2507-GGUF:Q4_K_M', | |
"(Google)Gemma3n-e2b-it": 'gemma3n:e2b-it-q4_K_M', | |
#"(Tencent)混元-1.8B-Instruct":'hf.co/bartowski/tencent_Hunyuan-1.8B-Instruct-GGUF:Q4_K_M', | |
#"(Tencent)混元-4B-Instruct": 'hf.co/bartowski/tencent_Hunyuan-4B-Instruct-GGUF:Q4_K_M', | |
"(HuggingFace)SmolLM2-360M": 'smollm2:360m-instruct-q5_K_M' | |
} | |
# Default System Prompt | |
DEFAULT_SYSTEM_PROMPT = """Answer everything in simple, smart, relevant and accurate style. No chatty! Besides, pls: | |
1. 如果查詢是以中文輸入,使用標準繁體中文回答 | |
2. 如果查詢是以英文輸入,使用英文回答""" | |
# --- Gradio Interface --- | |
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutral")) as demo: | |
gr.Markdown(f"## HH demo: LLM/SLM chatbot running with CPU only.") # Changed title to be more generic | |
gr.Markdown(f"(Run-Location-As: `{check_ipinfo}`)") | |
gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.") | |
# Model Selection | |
with gr.Row(): | |
selected_model_label = gr.Radio( | |
choices=list(MODEL_ID_MAP.keys()), | |
value=list(MODEL_ID_MAP.keys())[0], # Default to first display name | |
label="Select Model", | |
info="Choose the LLM model to chat with.", | |
interactive=True | |
) | |
chatbot = gr.Chatbot( | |
label="Conversation", | |
height=400, | |
type='messages', | |
layout="bubble" | |
) | |
with gr.Row(): | |
msg = gr.Textbox( | |
show_label=False, | |
placeholder="Type your message here and press Enter...", | |
lines=1, | |
scale=4, | |
container=False | |
) | |
with gr.Accordion("Advanced Options", open=False): | |
with gr.Row(): | |
stream_checkbox = gr.Checkbox( | |
label="Stream Output", | |
value=True, | |
info="Enable to see the response generate in real-time." | |
) | |
use_custom_prompt_checkbox = gr.Checkbox( | |
label="Use Custom System Prompt", | |
value=False, | |
info="Check this box to provide your own system prompt below." | |
) | |
# --- New: System Prompt Options --- | |
SYSTEM_PROMPT_OPTIONS = { | |
"AI Henry": DEFAULT_SYSTEM_PROMPT, | |
"繁體中文回答":"無論如何,必須使用標準繁體中文回答. Answer everything in simple, smart, relevant and accurate style. No chatty!", | |
"简体中文回答":"无论如何,必须使用标准简体中文回答. Answer everything in simple, smart, relevant and accurate style. No chatty!", | |
"English Chat":"You must reply by English. Answer everything in simple, smart, relevant and accurate style. No chatty!", | |
"Friendly & Conversational":"Respond in a warm, friendly, and engaging tone. Use natural language and offer helpful suggestions. Keep responses concise but personable.", | |
"Professional & Formal":"Maintain a formal and professional tone. Use precise language, avoid slang, and ensure responses are suitable for business or academic contexts.", | |
"Elon Musk style":"You must chat in Elon Musk style!", | |
"Good Coder":""" | |
You are a highly capable coding assistant specialized in software development, algorithms, and debugging. Your responses must be accurate, efficient, and tailored to the user's request. Always follow these principles: | |
1. Use clear, well-commented code. | |
2. Prioritize readability and best practices. | |
3. When asked to explain, provide concise, step-by-step reasoning. | |
4. When asked to generate code, include input/output examples if relevant. | |
5. If the user provides buggy code, identify the issue and suggest a fix. | |
6. If multiple solutions exist, briefly compare them and recommend the best. | |
7. Always respect the specified programming language, libraries, and constraints. | |
8. Never make assumptions beyond the user’s instructions unless explicitly asked. | |
9. If the task is ambiguous, ask clarifying questions before proceeding. | |
10. Avoid unnecessary boilerplate unless requested. | |
11. Use only open-source and free resources, libraries, and APIs. Do not suggest or rely on paid, proprietary, or license-restricted tools unless explicitly requested. | |
12. You code shall be fine to run on Colab or Kaggle. | |
13. Always include inner comments for user learning. | |
14. Always provide installation and operation steps. | |
You support multiple languages including Python, JavaScript, TypeScript, C++, Java, Go, Rust, and Bash. You can also assist with frameworks like React, Node.js, Django, Flask, and more. | |
Your goal is to help the user write better code, faster, and deepen their understanding of programming concepts. | |
""", | |
"Test1(Auto TC/EN)":"Always detect the user's input language and respond in that same language. Do not translate unless explicitly requested. Answer everything in simple, smart, relevant and accurate style. No chatty!", | |
"Simulate Tencent Auto TC/EN":"""Answer everything in simple, smart, relevant and accurate style. No chatty! Besides, pls: | |
1. 如果查詢是以中文輸入,使用標準繁體中文回答,符合官方文書規範 | |
2. 要提供引用規則依据 | |
3. 如果查詢是以英文輸入,使用英文回答""" | |
} | |
system_prompt_selector = gr.Radio( | |
label="Choose a System Prompt Style", | |
choices=list(SYSTEM_PROMPT_OPTIONS.keys()), | |
value="AI Henry", | |
interactive=True | |
) | |
system_prompt_textbox = gr.Textbox( | |
label="System Prompt", | |
value=DEFAULT_SYSTEM_PROMPT, | |
lines=3, | |
placeholder="Enter a system prompt to guide the model's behavior...", | |
interactive=False | |
) | |
# Function to toggle the interactivity of the system prompt textbox | |
def toggle_system_prompt(use_custom): | |
return gr.update(interactive=use_custom) | |
use_custom_prompt_checkbox.change( | |
fn=toggle_system_prompt, | |
inputs=use_custom_prompt_checkbox, | |
outputs=system_prompt_textbox, | |
queue=False | |
) | |
# Function to update textbox when prompt style changes | |
def update_prompt_text(selected_key, use_custom): | |
if not use_custom: | |
return gr.update(value=SYSTEM_PROMPT_OPTIONS[selected_key]) | |
else: | |
return gr.update() | |
system_prompt_selector.change( | |
fn=update_prompt_text, | |
inputs=[system_prompt_selector, use_custom_prompt_checkbox], | |
outputs=system_prompt_textbox, | |
queue=False | |
) | |
# --- Core Chat Logic --- | |
# This function is the heart of the application. | |
def respond(history, system_prompt, stream_output, selected_model_name, selected_prompt_key, use_custom_prompt): # Added selected_model_name | |
""" | |
This is the single function that handles the entire chat process. | |
It takes the history, prepends the system prompt, calls the Ollama API, | |
and streams the response back to the chatbot. | |
""" | |
# Use selected predefined prompt unless custom is enabled | |
if not use_custom_prompt: | |
system_prompt = SYSTEM_PROMPT_OPTIONS[selected_prompt_key] | |
current_selected_model = MODEL_ID_MAP[selected_model_name] | |
#Disable Qwen3 thinking | |
#if "Qwen3".lower() in current_selected_model: | |
# system_prompt = system_prompt+" /no_think" | |
if any(substring in current_selected_model.lower() for substring in ["qwen3-0.6b", "qwen3-1.7b"]): | |
system_prompt = system_prompt+" /no_think" | |
# The 'history' variable from Gradio contains the entire conversation. | |
# We prepend the system prompt to this history to form the final payload. | |
messages = [{"role": "system", "content": system_prompt}] + history | |
# Add a placeholder for the assistant's response to the UI history. | |
# This creates the space where the streamed response will be displayed. | |
history.append({"role": "assistant", "content": ""}) | |
# Stream the response from the Ollama API using the currently selected model | |
response_stream = ollama.chat( | |
model=current_selected_model, # Use the dynamically selected model | |
messages=messages, | |
stream=True | |
) | |
# Iterate through the stream, updating the placeholder with each new chunk. | |
for chunk in response_stream: | |
if chunk['message']['content']: | |
history[-1]['content'] += chunk['message']['content'] | |
# Yield the updated history to the chatbot for a real-time effect. | |
yield history | |
# This function handles the user's submission. | |
def user_submit(history, user_message): | |
""" | |
Adds the user's message to the chat history and clears the input box. | |
This prepares the state for the main 'respond' function. | |
""" | |
return history + [{"role": "user", "content": user_message}], "" | |
# Gradio Event Wiring | |
msg.submit( | |
user_submit, | |
inputs=[chatbot, msg], | |
outputs=[chatbot, msg], | |
queue=False | |
).then( | |
respond, | |
inputs=[chatbot, system_prompt_textbox, stream_checkbox, selected_model_label, system_prompt_selector, use_custom_prompt_checkbox], # Pass new inputs | |
outputs=[chatbot] | |
) | |
# Launch the Gradio interface | |
demo.launch(server_name="0.0.0.0", server_port=7860) |