File size: 1,911 Bytes
79d40ce 180432e 79d40ce 180432e 79d40ce 86a19ae 79d40ce 4769565 79d40ce 180432e 79d40ce 4769565 79d40ce 180432e 79d40ce 180432e 4769565 180432e 79d40ce 180432e 79d40ce 180432e 79d40ce 86a19ae 79d40ce 180432e 79d40ce 180432e 79d40ce 180432e 79d40ce 180432e 86a19ae 180432e 4769565 79d40ce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
from transformers import pipeline
from langchain.chains.question_answering import load_qa_chain
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader, TextLoader
import gradio as gr
# Load local HF model pipeline
qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")
# Load documents
def load_docs(file):
if file.name.endswith(".pdf"):
loader = PyPDFLoader(file.name)
else:
loader = TextLoader(file.name)
return loader.load()
# Split text into chunks
def split_docs(documents):
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
return splitter.split_documents(documents)
# Embed documents and store in FAISS
def embed_docs(splits):
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
return FAISS.from_documents(splits, embeddings)
# QA chain using local pipeline
def qa_chain(vectorstore, query):
from langchain.llms import HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=qa_pipeline)
chain = load_qa_chain(llm, chain_type="stuff")
docs = vectorstore.similarity_search(query)
return chain.run(input_documents=docs, question=query)
# Gradio interface
def analyze(file, question):
docs = load_docs(file)
splits = split_docs(docs)
vectorstore = embed_docs(splits)
answer = qa_chain(vectorstore, question)
return answer
interface = gr.Interface(
fn=analyze,
inputs=[
gr.File(label="Upload Document"),
gr.Textbox(label="Enter Your Question")
],
outputs="text",
title="📄 RAG-Powered Document Analyzer",
description="Upload a document and ask questions about its content!"
)
if __name__ == "__main__":
interface.launch(share=True)
|