|
from transformers import pipeline |
|
from langchain.chains.question_answering import load_qa_chain |
|
from langchain.vectorstores import FAISS |
|
from langchain.embeddings import HuggingFaceEmbeddings |
|
from langchain.text_splitter import CharacterTextSplitter |
|
from langchain_community.document_loaders import PyPDFLoader, TextLoader |
|
import gradio as gr |
|
|
|
|
|
qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base") |
|
|
|
|
|
def load_docs(file): |
|
if file.name.endswith(".pdf"): |
|
loader = PyPDFLoader(file.name) |
|
else: |
|
loader = TextLoader(file.name) |
|
return loader.load() |
|
|
|
|
|
def split_docs(documents): |
|
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200) |
|
return splitter.split_documents(documents) |
|
|
|
|
|
def embed_docs(splits): |
|
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") |
|
return FAISS.from_documents(splits, embeddings) |
|
|
|
|
|
def qa_chain(vectorstore, query): |
|
from langchain.llms import HuggingFacePipeline |
|
llm = HuggingFacePipeline(pipeline=qa_pipeline) |
|
chain = load_qa_chain(llm, chain_type="stuff") |
|
docs = vectorstore.similarity_search(query) |
|
return chain.run(input_documents=docs, question=query) |
|
|
|
|
|
def analyze(file, question): |
|
docs = load_docs(file) |
|
splits = split_docs(docs) |
|
vectorstore = embed_docs(splits) |
|
answer = qa_chain(vectorstore, question) |
|
return answer |
|
|
|
interface = gr.Interface( |
|
fn=analyze, |
|
inputs=[ |
|
gr.File(label="Upload Document"), |
|
gr.Textbox(label="Enter Your Question") |
|
], |
|
outputs="text", |
|
title="π RAG-Powered Document Analyzer", |
|
description="Upload a document and ask questions about its content!" |
|
) |
|
|
|
if __name__ == "__main__": |
|
interface.launch(share=True) |
|
|
|
|
|
|