Spaces:

Usman174
/

sport-chatbot-docker

Runtime error

App Files Files Community

sport-chatbot-docker / app_no_docker.py

Usman174

Rename app.py to app_no_docker.py

169c3e6 verified 10 months ago

raw

history blame contribute delete

3.02 kB

	import streamlit as st
	from llama_cpp import Llama
	import requests
	import os
	from tqdm import tqdm

	direct_url = "https://huggingface.co/TheBloke/Mistral-7B-v0.1-GGUF/resolve/main/mistral-7b-v0.1.Q4_K_M.gguf"
	model_path = "model/mistral-7b-v0.1.Q4_K_M.gguf"

	def main():

	def download_file_with_progress(url: str, filename: str):
	"""Download a file with progress bar using requests"""
	response = requests.get(url, stream=True)
	total_size = int(response.headers.get('content-length', 0))

	with open(filename, 'wb') as file, tqdm(
	desc=f"Downloading {filename}",
	total=total_size,
	unit='iB',
	unit_scale=True,
	unit_divisor=1024,
	) as progress_bar:
	for data in response.iter_content(chunk_size=1024):
	size = file.write(data)
	progress_bar.update(size)

	# Load the model
	@st.cache_resource
	def download_model():
	os.makedirs(os.path.dirname(model_path), exist_ok=True)
	download_file_with_progress(direct_url, model_path)

	# Ensure the model is downloaded
	if not os.path.exists(model_path):
	st.info("Model file not found. Downloading...")
	download_model()
	if not os.path.exists(model_path):
	st.error(f"Model file {model_path} not found after download!")
	return

	# Load the model
	llm = Llama(
	model_path=model_path,
	n_ctx=4096,
	n_gpu_layers=0, # CPU only
	verbose=False,
	)

	def process_query(query: str) -> str:
	MAX_ATTEMPTS = 5

	for attempt in range(MAX_ATTEMPTS):
	try:
	response = llm(
	query,
	max_tokens=1024,
	temperature=0.4,
	top_p=0.95,
	echo=False,
	stop=["Question:", "\n\n"]
	)

	answer = response['choices'][0]['text'].strip()

	# Check if response is empty or too short
	if not answer or len(answer) < 2:
	print(f"Got empty or too short response: '{answer}'. Retrying...")
	continue
	else:
	return answer

	except Exception as e:
	print(f"Error on attempt {attempt + 1}: {str(e)}")
	continue

	return "I apologize, but after multiple attempts, I was unable to generate a satisfactory response. Please try rephrasing your question."

	# Streamlit UI
	st.title("LLama_cpp GGUF Model Inference")
	user_input = st.text_input("Enter your prompt:")

	if st.button("Generate"):
	if user_input:
	with st.spinner("Generating response..."):
	output = process_query(user_input)
	st.success("Response generated!")
	st.write(output)
	else:
	st.error("Please enter a prompt.")

	if __name__ == "__main__":
	main()