sport-chatbot-docker / app_no_docker.py
Usman174's picture
Rename app.py to app_no_docker.py
169c3e6 verified
import streamlit as st
from llama_cpp import Llama
import requests
import os
from tqdm import tqdm
direct_url = "https://huggingface.co/TheBloke/Mistral-7B-v0.1-GGUF/resolve/main/mistral-7b-v0.1.Q4_K_M.gguf"
model_path = "model/mistral-7b-v0.1.Q4_K_M.gguf"
def main():
def download_file_with_progress(url: str, filename: str):
"""Download a file with progress bar using requests"""
response = requests.get(url, stream=True)
total_size = int(response.headers.get('content-length', 0))
with open(filename, 'wb') as file, tqdm(
desc=f"Downloading {filename}",
total=total_size,
unit='iB',
unit_scale=True,
unit_divisor=1024,
) as progress_bar:
for data in response.iter_content(chunk_size=1024):
size = file.write(data)
progress_bar.update(size)
# Load the model
@st.cache_resource
def download_model():
os.makedirs(os.path.dirname(model_path), exist_ok=True)
download_file_with_progress(direct_url, model_path)
# Ensure the model is downloaded
if not os.path.exists(model_path):
st.info("Model file not found. Downloading...")
download_model()
if not os.path.exists(model_path):
st.error(f"Model file {model_path} not found after download!")
return
# Load the model
llm = Llama(
model_path=model_path,
n_ctx=4096,
n_gpu_layers=0, # CPU only
verbose=False,
)
def process_query(query: str) -> str:
MAX_ATTEMPTS = 5
for attempt in range(MAX_ATTEMPTS):
try:
response = llm(
query,
max_tokens=1024,
temperature=0.4,
top_p=0.95,
echo=False,
stop=["Question:", "\n\n"]
)
answer = response['choices'][0]['text'].strip()
# Check if response is empty or too short
if not answer or len(answer) < 2:
print(f"Got empty or too short response: '{answer}'. Retrying...")
continue
else:
return answer
except Exception as e:
print(f"Error on attempt {attempt + 1}: {str(e)}")
continue
return "I apologize, but after multiple attempts, I was unable to generate a satisfactory response. Please try rephrasing your question."
# Streamlit UI
st.title("LLama_cpp GGUF Model Inference")
user_input = st.text_input("Enter your prompt:")
if st.button("Generate"):
if user_input:
with st.spinner("Generating response..."):
output = process_query(user_input)
st.success("Response generated!")
st.write(output)
else:
st.error("Please enter a prompt.")
if __name__ == "__main__":
main()