File size: 1,323 Bytes
d49d46a
 
 
 
 
 
9dad4ce
 
d49d46a
 
 
 
 
ab134ca
d49d46a
 
ab134ca
d49d46a
 
877c0e7
d49d46a
 
 
 
 
 
 
 
 
fdf4adf
d49d46a
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from smolagents import Tool
from pypdf import PdfReader
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains import RetrievalQA
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import HuggingFaceHub


class PDFQATool(Tool):
    name = "pdf_qa"
    description = "Answer questions based on uploaded PDF files."
    inputs = {
        "pdf_path": {"type": "string", "description": "Path to the uploaded PDF file."},
        "query": {"type": "string", "description": "User question about the PDF."}
    }
    output_type = "string"

    def forward(self, pdf_path: str, query: str) -> str:
        
        loader = PyPDFLoader(pdf_path)
        documents = loader.load()
        
        # Create embeddings
        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        db = FAISS.from_documents(documents, embeddings)

        retriever = db.as_retriever(search_kwargs={"k": 3})
        
        llm = HuggingFaceHub(repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1", task="text-generation")
        
        qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
        answer = qa_chain.run(query)
        return answer