llama3-instruct-spfx-docs

Runtime error

Update app.py

48024cb verified over 1 year ago

1.46 kB

	import gradio as gr
	from huggingface_hub import hf_hub_download
	from llama_cpp import Llama
	import psutil

	## Download the GGUF model
	model_name = "jackangel/LLama_3_Instruct_SPFx_Docs_Unsloth"
	model_file = "Llama_3_Instruct_SPFx_Docs_Unsloth.Q4_K_M.gguf" # this is the specific model file we'll use in this example. It's a 4-bit quant, but other levels of quantization are available in the model repo if preferred
	model_path = hf_hub_download(model_name, filename=model_file)


	_ = psutil.cpu_count(logical=False) - 1
	cpu_count: int = int(_) if _ else 1

	## Instantiate model from downloaded file
	llm = Llama(
	model_path=model_path,
	n_ctx=1024, # Context length to use
	n_threads=cpu_count, # Number of CPU threads to use
	n_gpu_layers=0 # Number of model layers to offload to GPU
	)

	## Generation kwargs
	generation_kwargs = {
	"max_tokens":1000,
	"stop":["</s>"],
	"temperature":0.2,
	"echo":False, # Echo the prompt in the output
	"top_k":20,
	"top_p":0.7
	}


	def chatbot(message, history):
	prompt = "INSTRUCTION: You are a helpful assistant\nINPUT: " + message + "\nOUTPUT:"
	airemember = ""
	for human,assistant in history:

	airemember += "USER: " + human + "\nASSISTANT:" + assistant+"\n\n"
	sendtoai = airemember + prompt
	result = llm(sendtoai, **generation_kwargs)
	text=result["choices"][0]["text"].strip()
	return text
	app = gr.ChatInterface(chatbot)
	app.launch()