import streamlit as st
from llama_cpp import Llama
import os

def main():
    direct_url = "https://huggingface.co/TheBloke/Mistral-7B-v0.1-GGUF/resolve/main/mistral-7b-v0.1.Q4_K_M.gguf"
    model_path = "model/mistral-7b-v0.1.Q4_K_M.gguf"

    # Check if the model file exists
    if not os.path.exists(model_path):
        st.error(f"Model file {model_path} not found! Please ensure the model is included in the Docker image.")
        return

    # Load the model
    @st.cache_resource
    def load_model():
        return Llama(
            model_path=model_path,
            n_ctx=4096,
            n_gpu_layers=0,  # CPU only
            verbose=False,
        )

    llm = load_model()

    def process_query(query: str) -> str:
        MAX_ATTEMPTS = 5

        for attempt in range(MAX_ATTEMPTS):
            try:
                response = llm(
                    query,
                    max_tokens=1024,
                    temperature=0.4,
                    top_p=0.95,
                    echo=False,
                    stop=["Question:", "\n\n"]
                )

                answer = response['choices'][0]['text'].strip()

                # Check if response is empty or too short
                if not answer or len(answer) < 2:
                    print(f"Got empty or too short response: '{answer}'. Retrying...")
                    continue
                else:
                    return answer

            except Exception as e:
                print(f"Error on attempt {attempt + 1}: {str(e)}")
                continue

        return "I apologize, but after multiple attempts, I was unable to generate a satisfactory response. Please try rephrasing your question."

    # Streamlit UI
    st.title("LLama_cpp GGUF Model Inference")
    user_input = st.text_input("Enter your prompt:")

    if st.button("Generate"):
        if user_input:
            with st.spinner("Generating response..."):
                output = process_query(user_input)
                st.success("Response generated!")
                st.write(output)
        else:
            st.error("Please enter a prompt.")

if __name__ == "__main__":
    main()