import gradio as gr
from PyPDF2 import PdfReader

# LangChain components
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.prompts import PromptTemplate

# Hugging Face Transformers
from transformers import pipeline


# ---------------- Load LLM ----------------
def load_llm():
    try:
        # Use a model that's good at instruction following
        pipe = pipeline(
            "text2text-generation",
            model="google/flan-t5-base",
            max_length=512,
            temperature=0.1  # Lower temperature for more focused answers
        )
        print("✅ Successfully loaded model: google/flan-t5-base")
        return pipe
    except Exception as e:
        print(f"⚠️ Failed to load model: {e}")
        return None


llm = load_llm()


# ---------------- Process PDF ----------------
def process_pdf(pdf_files):
    text = ""
    for pdf in pdf_files:
        reader = PdfReader(pdf)
        for page in reader.pages:
            extracted = page.extract_text()
            if extracted:
                text += extracted + "\n"

    if not text.strip():
        return None

    # Split text into chunks
    splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=100)
    texts = splitter.split_text(text)

    # Embeddings & vector store
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    db = FAISS.from_texts(texts, embeddings)

    return db


# ---------------- Ask Questions ----------------
def ask_question(pdf_files, question):
    try:
        if not pdf_files:
            return "⚠️ Please upload at least one PDF file."
        
        if not llm:
            return "⚠️ Language model failed to load. Please try again later."
            
        db = process_pdf(pdf_files)
        if not db:
            return "⚠️ No text found in the uploaded PDF(s)."

        retriever = db.as_retriever(search_kwargs={"k": 4})
        docs = retriever.get_relevant_documents(question)

        # Combine retrieved context
        context = "\n".join([doc.page_content for doc in docs])
        
        # Clean up context to remove excessive whitespace
        context = " ".join(context.split())

        # Better prompt template that forces the model to answer
        prompt = f"""Based on the following information, answer the question clearly and concisely.

Information:
{context}

Question: {question}

Answer:"""

        # Generate response
        result = llm(
            prompt,
            max_length=300,
            num_return_sequences=1,
            do_sample=False,
            temperature=0.1
        )
        
        response = result[0]['generated_text'].strip()
        
        # Clean up the response
        if response.startswith("Answer:"):
            response = response.replace("Answer:", "").strip()
        
        # If response is empty or just repeats the prompt, provide fallback
        if not response or len(response) < 10:
            return "I couldn't find a clear answer to your question in the provided documents. Please try rephrasing your question or check if the relevant information is in the uploaded PDFs."
        
        return response

    except Exception as e:
        return f"⚠️ Error: {str(e)}"


# ---------------- Gradio UI ----------------
with gr.Blocks() as demo:
    gr.Markdown("## 📚 PDF Question Answering System")
    gr.Markdown("Upload PDF files and ask questions about their content.")
    
    with gr.Row():
        with gr.Column():
            pdf_input = gr.File(
                label="Upload PDF Files",
                file_types=[".pdf"],
                file_count="multiple"
            )
        with gr.Column():
            question_input = gr.Textbox(
                label="Your Question",
                placeholder="What would you like to know about the document?",
                lines=2
            )
            submit_btn = gr.Button("Ask Question", variant="primary")
    
    with gr.Row():
        output = gr.Textbox(
            label="Answer",
            lines=4,
            interactive=False
        )
    
    # Examples
    gr.Examples(
        examples=[
            ["What is the main topic of this document?"],
            ["Can you summarize the key points?"],
            ["What are the main findings or conclusions?"],
            ["Who are the authors and what are their credentials?"]
        ],
        inputs=question_input,
        label="Example Questions"
    )
    
    # Handle both button click and enter key
    submit_btn.click(ask_question, inputs=[pdf_input, question_input], outputs=output)
    question_input.submit(ask_question, inputs=[pdf_input, question_input], outputs=output)

demo.launch()