|
import gradio as gr |
|
from llama_cpp import Llama |
|
from huggingface_hub import hf_hub_download |
|
import os |
|
|
|
|
|
MODEL_NAME_OR_PATH = "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF" |
|
|
|
|
|
|
|
|
|
|
|
MODEL_FILE = "DeepSeek-R1-0528-Qwen3-8B-Q4_K_M.gguf" |
|
|
|
|
|
if not os.path.exists(MODEL_FILE): |
|
print(f"Downloading {MODEL_FILE} from {MODEL_NAME_OR_PATH}...") |
|
try: |
|
hf_hub_download( |
|
repo_id=MODEL_NAME_OR_PATH, |
|
filename=MODEL_FILE, |
|
local_dir=".", |
|
local_dir_use_symlinks=False |
|
) |
|
print("Download complete.") |
|
except Exception as e: |
|
print(f"Error downloading model: {e}") |
|
print("Please ensure the MODEL_FILE name is correct and available in the repository.") |
|
exit() |
|
else: |
|
print(f"Model file {MODEL_FILE} already exists.") |
|
|
|
|
|
|
|
|
|
try: |
|
print("Loading model...") |
|
llm = Llama( |
|
model_path=MODEL_FILE, |
|
n_ctx=2048, |
|
n_threads=None, |
|
n_gpu_layers=0 |
|
|
|
) |
|
print("Model loaded successfully.") |
|
except Exception as e: |
|
print(f"Error loading Llama model: {e}") |
|
print("Ensure llama-cpp-python is installed correctly and the model file is valid.") |
|
exit() |
|
|
|
|
|
def predict(message, history): |
|
history_llama_format = [] |
|
for human, ai in history: |
|
history_llama_format.append({"role": "user", "content": human}) |
|
history_llama_format.append({"role": "assistant", "content": ai}) |
|
history_llama_format.append({"role": "user", "content": message}) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
response = llm.create_chat_completion( |
|
messages=history_llama_format, |
|
|
|
|
|
|
|
) |
|
assistant_response = response['choices'][0]['message']['content'] |
|
except Exception as e: |
|
print(f"Error during model inference: {e}") |
|
assistant_response = "Sorry, I encountered an error." |
|
|
|
|
|
prompt = "" |
|
for entry in history_llama_format: |
|
if entry["role"] == "user": |
|
prompt += f"<|im_start|>user\n{entry['content']}<|im_end|>\n" |
|
elif entry["role"] == "assistant": |
|
prompt += f"<|im_start|>assistant\n{entry['content']}<|im_end|>\n" |
|
prompt += "<|im_start|>assistant\n" |
|
|
|
try: |
|
output = llm( |
|
prompt, |
|
max_tokens=256, |
|
stop=["<|im_end|>", "<|im_start|>user"], |
|
echo=False |
|
) |
|
assistant_response = output['choices'][0]['text'].strip() |
|
except Exception as e_fallback: |
|
print(f"Error during fallback model inference: {e_fallback}") |
|
assistant_response = "Sorry, I encountered an error during fallback." |
|
|
|
|
|
return assistant_response |
|
|
|
|
|
iface = gr.ChatInterface( |
|
fn=predict, |
|
title="Unsloth DeepSeek-Qwen3-8B GGUF Chat", |
|
description="Chat with the unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF model.", |
|
examples=[ |
|
["Hello, how are you?"], |
|
["What is the capital of France?"], |
|
["Write a short story about a friendly robot."] |
|
], |
|
chatbot=gr.Chatbot(height=600) |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
print("Launching Gradio interface...") |
|
iface.launch() |