surabhic's picture
Update app.py
79d40ce verified
from transformers import pipeline
from langchain.chains.question_answering import load_qa_chain
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader, TextLoader
import gradio as gr
# Load local HF model pipeline
qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")
# Load documents
def load_docs(file):
if file.name.endswith(".pdf"):
loader = PyPDFLoader(file.name)
else:
loader = TextLoader(file.name)
return loader.load()
# Split text into chunks
def split_docs(documents):
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
return splitter.split_documents(documents)
# Embed documents and store in FAISS
def embed_docs(splits):
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
return FAISS.from_documents(splits, embeddings)
# QA chain using local pipeline
def qa_chain(vectorstore, query):
from langchain.llms import HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=qa_pipeline)
chain = load_qa_chain(llm, chain_type="stuff")
docs = vectorstore.similarity_search(query)
return chain.run(input_documents=docs, question=query)
# Gradio interface
def analyze(file, question):
docs = load_docs(file)
splits = split_docs(docs)
vectorstore = embed_docs(splits)
answer = qa_chain(vectorstore, question)
return answer
interface = gr.Interface(
fn=analyze,
inputs=[
gr.File(label="Upload Document"),
gr.Textbox(label="Enter Your Question")
],
outputs="text",
title="πŸ“„ RAG-Powered Document Analyzer",
description="Upload a document and ask questions about its content!"
)
if __name__ == "__main__":
interface.launch(share=True)