File size: 1,911 Bytes
79d40ce
180432e
 
79d40ce
180432e
 
79d40ce
86a19ae
79d40ce
 
4769565
79d40ce
180432e
79d40ce
 
 
 
 
4769565
79d40ce
180432e
 
 
 
79d40ce
180432e
4769565
180432e
 
79d40ce
180432e
79d40ce
 
180432e
 
79d40ce
86a19ae
79d40ce
180432e
79d40ce
 
 
 
 
180432e
 
 
 
79d40ce
 
180432e
 
79d40ce
 
180432e
86a19ae
180432e
4769565
 
79d40ce
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from transformers import pipeline
from langchain.chains.question_answering import load_qa_chain
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader, TextLoader
import gradio as gr

# Load local HF model pipeline
qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")

# Load documents
def load_docs(file):
    if file.name.endswith(".pdf"):
        loader = PyPDFLoader(file.name)
    else:
        loader = TextLoader(file.name)
    return loader.load()

# Split text into chunks
def split_docs(documents):
    splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    return splitter.split_documents(documents)

# Embed documents and store in FAISS
def embed_docs(splits):
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return FAISS.from_documents(splits, embeddings)

# QA chain using local pipeline
def qa_chain(vectorstore, query):
    from langchain.llms import HuggingFacePipeline
    llm = HuggingFacePipeline(pipeline=qa_pipeline)
    chain = load_qa_chain(llm, chain_type="stuff")
    docs = vectorstore.similarity_search(query)
    return chain.run(input_documents=docs, question=query)

# Gradio interface
def analyze(file, question):
    docs = load_docs(file)
    splits = split_docs(docs)
    vectorstore = embed_docs(splits)
    answer = qa_chain(vectorstore, question)
    return answer

interface = gr.Interface(
    fn=analyze,
    inputs=[
        gr.File(label="Upload Document"),
        gr.Textbox(label="Enter Your Question")
    ],
    outputs="text",
    title="📄 RAG-Powered Document Analyzer",
    description="Upload a document and ask questions about its content!"
)

if __name__ == "__main__":
    interface.launch(share=True)