Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| import psutil | |
| ## Download the GGUF model | |
| model_name = "jackangel/LLama_3_Instruct_SPFx_Docs_Unsloth" | |
| model_file = "Llama_3_Instruct_SPFx_Docs_Unsloth.Q4_K_M.gguf" # this is the specific model file we'll use in this example. It's a 4-bit quant, but other levels of quantization are available in the model repo if preferred | |
| model_path = hf_hub_download(model_name, filename=model_file) | |
| _ = psutil.cpu_count(logical=False) - 1 | |
| cpu_count: int = int(_) if _ else 1 | |
| ## Instantiate model from downloaded file | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=1024, # Context length to use | |
| n_threads=cpu_count, # Number of CPU threads to use | |
| n_gpu_layers=0 # Number of model layers to offload to GPU | |
| ) | |
| ## Generation kwargs | |
| generation_kwargs = { | |
| "max_tokens":1000, | |
| "stop":["</s>"], | |
| "temperature":0.2, | |
| "echo":False, # Echo the prompt in the output | |
| "top_k":20, | |
| "top_p":0.7 | |
| } | |
| def chatbot(message, history): | |
| prompt = "INSTRUCTION: You are a helpful assistant\nINPUT: " + message + "\nOUTPUT:" | |
| airemember = "" | |
| for human,assistant in history: | |
| airemember += "USER: " + human + "\nASSISTANT:" + assistant+"\n\n" | |
| sendtoai = airemember + prompt | |
| result = llm(sendtoai, **generation_kwargs) | |
| text=result["choices"][0]["text"].strip() | |
| return text | |
| app = gr.ChatInterface(chatbot) | |
| app.launch() |