Spaces:
Sleeping
Sleeping
import gradio as gr | |
from ctransformers import AutoModelForCausalLM | |
import time | |
llm = AutoModelForCausalLM.from_pretrained( | |
"TheBloke/WizardCoder-Python-7B-V1.0-GGUF", | |
model_file="wizardcoder-python-7b-v1.0.Q4_K_M.gguf", | |
model_type="llama", | |
max_new_tokens=512, | |
temperature=0.7, | |
top_p=0.9, | |
stream=True | |
) | |
def generate_response(message, history): | |
prompt = "" | |
for user, bot in history: | |
prompt += f"<user>: {user}\n<assistant>: {bot}\n" | |
prompt += f"<user>: {message}\n<assistant>:" | |
history.append([message, ""]) | |
response = "" | |
for chunk in llm(prompt): | |
response += chunk | |
history[-1][1] = response | |
time.sleep(0.01) | |
yield history | |
with gr.Blocks() as demo: | |
chatbot = gr.Chatbot() | |
msg = gr.Textbox(placeholder="Ask coding questions...", label="Your Message") | |
clear = gr.Button("Clear") | |
msg.submit(generate_response, [msg, chatbot], chatbot) | |
clear.click(lambda: [], None, chatbot) | |
demo.launch() | |