Spaces:
Paused
Paused
| import torch | |
| import gradio as gr | |
| from threading import Thread | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer | |
| #device = "cuda" # the device to load the model onto | |
| device = "cpu" # the device to load the model onto | |
| bot_avatar = "shuaikang/dl_logo_rect.png" # 聊天机器人头像位置 | |
| user_avatar = "shuaikang/user_avatar.jpg" # 用户头像位置 | |
| #model_path = "sethuiyer/Medichat-Llama3-8B" # 已下载的模型位置 | |
| model_path = "johnsnowlabs/JSL-MedMX-7X" | |
| #model_path = "aaditya/Llama3-OpenBioLLM-8B" | |
| # 存储全局的历史对话记录,Llama3支持系统prompt,所以这里默认设置! | |
| llama3_chat_history = [ | |
| {"role": "system", "content": "You are a helpful assistant trained by MetaAI! But you are running with DataLearnerAI Code."} | |
| ] | |
| # 初始化所有变量,用于载入模型 | |
| tokenizer = None | |
| streamer = None | |
| model = None | |
| terminators = None | |
| def init_model(): | |
| """初始化模型,载入本地模型 | |
| """ | |
| global tokenizer, model, streamer, terminators | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| model_path, trust_remote_code=True) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_path, | |
| torch_dtype=torch.float16, | |
| device_map=device, | |
| trust_remote_code=True | |
| ) | |
| terminators = [ | |
| tokenizer.eos_token_id, | |
| tokenizer.convert_tokens_to_ids("<|eot_id|>") | |
| ] | |
| streamer = TextIteratorStreamer( | |
| tokenizer, | |
| skip_prompt=True, | |
| skip_special_tokens=True | |
| ) | |
| with gr.Blocks() as demo: | |
| # step1: 载入模型 | |
| init_model() | |
| # step2: 初始化gradio的chatbot应用,并添加按钮等信息 | |
| chatbot = gr.Chatbot( | |
| height=900, | |
| avatar_images=(user_avatar, bot_avatar) | |
| ) | |
| msg = gr.Textbox() | |
| clear = gr.ClearButton([msg, chatbot]) | |
| # 清楚历史记录 | |
| def clear_history(): | |
| global llama3_chat_history | |
| llama3_chat_history = [] | |
| # 用于回复的方法 | |
| def respond(message, chat_history): | |
| global llama3_chat_history, tokenizer, model, streamer | |
| llama3_chat_history.append({"role": "user", "content": message}) | |
| # 使用Llama3自带的聊天模板,格式化对话记录 | |
| history_str = tokenizer.apply_chat_template( | |
| llama3_chat_history, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| # tokenzier | |
| inputs = tokenizer(history_str, return_tensors='pt').to(device) | |
| chat_history.append([message, ""]) | |
| generation_kwargs = dict( | |
| **inputs, | |
| streamer=streamer, | |
| max_new_tokens=4096, | |
| num_beams=1, | |
| do_sample=True, | |
| top_p=0.8, | |
| temperature=0.3, | |
| eos_token_id=terminators | |
| ) | |
| # 启动线程,用以监控流失输出结果 | |
| thread = Thread(target=model.generate, kwargs=generation_kwargs) | |
| thread.start() | |
| for new_text in streamer: | |
| chat_history[-1][1] += new_text | |
| yield "", chat_history | |
| llama3_chat_history.append( | |
| {"role": "assistant", "content": chat_history[-1][1]} | |
| ) | |
| # 点击清楚按钮,触发历史记录清楚 | |
| clear.click(clear_history) | |
| msg.submit(respond, [msg, chatbot], [msg, chatbot]) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |