Spaces:
Runtime error
Runtime error
File size: 2,476 Bytes
3dd2b2b d34bc05 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import transformers
import torch
model_id = "unsloth/llama-3-8b-Instruct-bnb-4bit"
pipeline = transformers.pipeline(
"text-generation",
model=model_id,
model_kwargs={
"torch_dtype": torch.float16,
"quantization_config": {"load_in_4bit": True},
"low_cpu_mem_usage": True,
},
)
messages = [
{"role" : "system",
"content": "You are an interviewer testing the user whether he can be a good manager or not. When the user says hi there!, i want you to begin"},
{"role" : "user",
"content": """hi there!"""},
]
prompt = pipeline.tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
terminators = [
pipeline.tokenizer.eos_token_id,
pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
outputs = pipeline(
prompt,
max_new_tokens=256,
eos_token_id=terminators,
do_sample=True,
temperature=0.6,
top_p=0.9,
)
print(outputs[0]["generated_text"][len(prompt):])
import gradio as gr
messages = [{"role" : "system",
"content": "You are an interviewer testing the user whether he can be a good manager or not. When the user says hi there!, i want you to begin"},
{"role" : "user",
"content": """hi there!"""},]
def add_text(history, text):
global messages #message[list] is defined globally
history = history + [(text,'')]
messages = messages + [{"role":'user', 'content': text}]
return history, ''
def generate(history):
global messages
prompt = pipeline.tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
terminators = [
pipeline.tokenizer.eos_token_id,
pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
outputs = pipeline(
prompt,
max_new_tokens=256,
eos_token_id=terminators,
do_sample=True,
temperature=0.6,
top_p=0.9,
)
response_msg = outputs[0]["generated_text"][len(prompt):]
for char in response_msg:
history[-1][1] += char
yield history
pass
with gr.Blocks() as demo:
chatbot = gr.Chatbot(value=[], elem_id="chatbot")
with gr.Row():
txt = gr.Textbox(
show_label=False,
placeholder="Enter text and press enter",
)
txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
generate, inputs =[chatbot,],outputs = chatbot,)
demo.queue()
demo.launch(debug=True)
|