Spaces:

Usman174
/

sport-chatbot-docker

Runtime error

App Files Files Community

sport-chatbot-docker / app.py

Usman174

Create app.py

e123768 verified 10 months ago

raw

history blame contribute delete

2.17 kB

	import streamlit as st
	from llama_cpp import Llama
	import os

	def main():
	direct_url = "https://huggingface.co/TheBloke/Mistral-7B-v0.1-GGUF/resolve/main/mistral-7b-v0.1.Q4_K_M.gguf"
	model_path = "model/mistral-7b-v0.1.Q4_K_M.gguf"

	# Check if the model file exists
	if not os.path.exists(model_path):
	st.error(f"Model file {model_path} not found! Please ensure the model is included in the Docker image.")
	return

	# Load the model
	@st.cache_resource
	def load_model():
	return Llama(
	model_path=model_path,
	n_ctx=4096,
	n_gpu_layers=0, # CPU only
	verbose=False,
	)

	llm = load_model()

	def process_query(query: str) -> str:
	MAX_ATTEMPTS = 5

	for attempt in range(MAX_ATTEMPTS):
	try:
	response = llm(
	query,
	max_tokens=1024,
	temperature=0.4,
	top_p=0.95,
	echo=False,
	stop=["Question:", "\n\n"]
	)

	answer = response['choices'][0]['text'].strip()

	# Check if response is empty or too short
	if not answer or len(answer) < 2:
	print(f"Got empty or too short response: '{answer}'. Retrying...")
	continue
	else:
	return answer

	except Exception as e:
	print(f"Error on attempt {attempt + 1}: {str(e)}")
	continue

	return "I apologize, but after multiple attempts, I was unable to generate a satisfactory response. Please try rephrasing your question."

	# Streamlit UI
	st.title("LLama_cpp GGUF Model Inference")
	user_input = st.text_input("Enter your prompt:")

	if st.button("Generate"):
	if user_input:
	with st.spinner("Generating response..."):
	output = process_query(user_input)
	st.success("Response generated!")
	st.write(output)
	else:
	st.error("Please enter a prompt.")

	if __name__ == "__main__":
	main()