from transformers import pipeline from langchain.chains.question_answering import load_qa_chain from langchain.vectorstores import FAISS from langchain.embeddings import HuggingFaceEmbeddings from langchain.text_splitter import CharacterTextSplitter from langchain_community.document_loaders import PyPDFLoader, TextLoader import gradio as gr # Load local HF model pipeline qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base") # Load documents def load_docs(file): if file.name.endswith(".pdf"): loader = PyPDFLoader(file.name) else: loader = TextLoader(file.name) return loader.load() # Split text into chunks def split_docs(documents): splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200) return splitter.split_documents(documents) # Embed documents and store in FAISS def embed_docs(splits): embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") return FAISS.from_documents(splits, embeddings) # QA chain using local pipeline def qa_chain(vectorstore, query): from langchain.llms import HuggingFacePipeline llm = HuggingFacePipeline(pipeline=qa_pipeline) chain = load_qa_chain(llm, chain_type="stuff") docs = vectorstore.similarity_search(query) return chain.run(input_documents=docs, question=query) # Gradio interface def analyze(file, question): docs = load_docs(file) splits = split_docs(docs) vectorstore = embed_docs(splits) answer = qa_chain(vectorstore, question) return answer interface = gr.Interface( fn=analyze, inputs=[ gr.File(label="Upload Document"), gr.Textbox(label="Enter Your Question") ], outputs="text", title="📄 RAG-Powered Document Analyzer", description="Upload a document and ask questions about its content!" ) if __name__ == "__main__": interface.launch(share=True)