|
from smolagents import Tool |
|
from pypdf import PdfReader |
|
from langchain_community.document_loaders import PyPDFLoader |
|
from langchain.chains import RetrievalQA |
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
from langchain_community.vectorstores import FAISS |
|
from langchain_community.llms import HuggingFaceHub |
|
|
|
|
|
class PDFQATool(Tool): |
|
name = "pdf_qa" |
|
description = "Answer questions based on uploaded PDF files." |
|
inputs = { |
|
"pdf_path": {"type": "string", "description": "Path to the uploaded PDF file."}, |
|
"query": {"type": "string", "description": "User question about the PDF."} |
|
} |
|
output_type = "string" |
|
|
|
def forward(self, pdf_path: str, query: str) -> str: |
|
|
|
loader = PyPDFLoader(pdf_path) |
|
documents = loader.load() |
|
|
|
|
|
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") |
|
db = FAISS.from_documents(documents, embeddings) |
|
|
|
retriever = db.as_retriever(search_kwargs={"k": 3}) |
|
|
|
llm = HuggingFaceHub(repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1", task="text-generation") |
|
|
|
qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever) |
|
answer = qa_chain.run(query) |
|
return answer |
|
|