Spaces:

himanshukumar378
/

Mutliple_chat_pdf

Running

File size: 4,882 Bytes

67d85a4
0eefb20
c630cd2
 
 
10d6816
c630cd2
 
 
 
cb6ff7e
ccfc149
c630cd2
cb6ff7e
ccfc149
cb6ff7e
 
 
 
 
 
 
 
 
 
 
 
 
c630cd2
 
cb6ff7e
c630cd2
 
 
 
67d85a4
c630cd2
6f67ac4
c630cd2
 
 
 
60db15e
 
6f67ac4
67d85a4
60db15e
cb6ff7e
c630cd2
0eefb20
c630cd2
60db15e
c630cd2
 
60db15e
67d85a4
60db15e
c630cd2
 
 
6f67ac4
 
cb6ff7e
 
 
6f67ac4
c630cd2
 
 
 
cb6ff7e
c630cd2
 
 
6f67ac4
cb6ff7e
 
 
c630cd2
cb6ff7e
 
4f755fe
cb6ff7e
 
c630cd2
4f755fe
 
 
 
cb6ff7e
 
 
 
 
 
 
 
 
4f755fe
cb6ff7e
 
 
 
 
 
 
 
 
 
c630cd2
 
cb6ff7e
c630cd2
 
 
67d85a4
cb6ff7e
6f67ac4
cb6ff7e
c630cd2
cb6ff7e
 
 
 
 
 
 
 
 
 
 
 
 
 
9689af9
cb6ff7e
 
 
 
6f67ac4
cb6ff7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6f67ac4
c979a25

import gradio as gr
from PyPDF2 import PdfReader

# LangChain components
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.prompts import PromptTemplate

# Hugging Face Transformers
from transformers import pipeline


# ---------------- Load LLM ----------------
def load_llm():
    try:
        # Use a model that's good at instruction following
        pipe = pipeline(
            "text2text-generation",
            model="google/flan-t5-base",
            max_length=512,
            temperature=0.1  # Lower temperature for more focused answers
        )
        print("✅ Successfully loaded model: google/flan-t5-base")
        return pipe
    except Exception as e:
        print(f"⚠️ Failed to load model: {e}")
        return None


llm = load_llm()


# ---------------- Process PDF ----------------
def process_pdf(pdf_files):
    text = ""
    for pdf in pdf_files:
        reader = PdfReader(pdf)
        for page in reader.pages:
            extracted = page.extract_text()
            if extracted:
                text += extracted + "\n"

    if not text.strip():
        return None

    # Split text into chunks
    splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=100)
    texts = splitter.split_text(text)

    # Embeddings & vector store
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    db = FAISS.from_texts(texts, embeddings)

    return db


# ---------------- Ask Questions ----------------
def ask_question(pdf_files, question):
    try:
        if not pdf_files:
            return "⚠️ Please upload at least one PDF file."
        
        if not llm:
            return "⚠️ Language model failed to load. Please try again later."
            
        db = process_pdf(pdf_files)
        if not db:
            return "⚠️ No text found in the uploaded PDF(s)."

        retriever = db.as_retriever(search_kwargs={"k": 4})
        docs = retriever.get_relevant_documents(question)

        # Combine retrieved context
        context = "\n".join([doc.page_content for doc in docs])
        
        # Clean up context to remove excessive whitespace
        context = " ".join(context.split())

        # Better prompt template that forces the model to answer
        prompt = f"""Based on the following information, answer the question clearly and concisely.

Information:
{context}

Question: {question}

Answer:"""

        # Generate response
        result = llm(
            prompt,
            max_length=300,
            num_return_sequences=1,
            do_sample=False,
            temperature=0.1
        )
        
        response = result[0]['generated_text'].strip()
        
        # Clean up the response
        if response.startswith("Answer:"):
            response = response.replace("Answer:", "").strip()
        
        # If response is empty or just repeats the prompt, provide fallback
        if not response or len(response) < 10:
            return "I couldn't find a clear answer to your question in the provided documents. Please try rephrasing your question or check if the relevant information is in the uploaded PDFs."
        
        return response

    except Exception as e:
        return f"⚠️ Error: {str(e)}"


# ---------------- Gradio UI ----------------
with gr.Blocks() as demo:
    gr.Markdown("## 📚 PDF Question Answering System")
    gr.Markdown("Upload PDF files and ask questions about their content.")
    
    with gr.Row():
        with gr.Column():
            pdf_input = gr.File(
                label="Upload PDF Files",
                file_types=[".pdf"],
                file_count="multiple"
            )
        with gr.Column():
            question_input = gr.Textbox(
                label="Your Question",
                placeholder="What would you like to know about the document?",
                lines=2
            )
            submit_btn = gr.Button("Ask Question", variant="primary")
    
    with gr.Row():
        output = gr.Textbox(
            label="Answer",
            lines=4,
            interactive=False
        )
    
    # Examples
    gr.Examples(
        examples=[
            ["What is the main topic of this document?"],
            ["Can you summarize the key points?"],
            ["What are the main findings or conclusions?"],
            ["Who are the authors and what are their credentials?"]
        ],
        inputs=question_input,
        label="Example Questions"
    )
    
    # Handle both button click and enter key
    submit_btn.click(ask_question, inputs=[pdf_input, question_input], outputs=output)
    question_input.submit(ask_question, inputs=[pdf_input, question_input], outputs=output)

demo.launch()