GraphiqueAcademia / pdf_qa_tool.py
AxDutta's picture
Update pdf_qa_tool.py
877c0e7 verified
from smolagents import Tool
from pypdf import PdfReader
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains import RetrievalQA
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import HuggingFaceHub
class PDFQATool(Tool):
name = "pdf_qa"
description = "Answer questions based on uploaded PDF files."
inputs = {
"pdf_path": {"type": "string", "description": "Path to the uploaded PDF file."},
"query": {"type": "string", "description": "User question about the PDF."}
}
output_type = "string"
def forward(self, pdf_path: str, query: str) -> str:
loader = PyPDFLoader(pdf_path)
documents = loader.load()
# Create embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.from_documents(documents, embeddings)
retriever = db.as_retriever(search_kwargs={"k": 3})
llm = HuggingFaceHub(repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1", task="text-generation")
qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
answer = qa_chain.run(query)
return answer