Spaces:
Running
Running
""" | |
RAG answer chain for FormPilot. | |
Call `get_answer(question: str)` to obtain (answer, sources). | |
""" | |
from pathlib import Path | |
from qdrant_client import QdrantClient | |
from langchain_openai import OpenAIEmbeddings, ChatOpenAI | |
from langchain_community.vectorstores import Qdrant | |
from langchain.chains import RetrievalQA | |
from langchain.prompts import PromptTemplate | |
from langchain import hub | |
# ---------- static vector store ---------- | |
_EMB = OpenAIEmbeddings(model="text-embedding-3-small") | |
_QCLIENT = QdrantClient(path="qdrant_data") | |
_VSTORE = Qdrant( | |
client=_QCLIENT, | |
collection_name="formpilot_docs", | |
embeddings=_EMB, | |
content_payload_key="text", | |
) | |
# ---------- prompt ---------- | |
_SYSTEM = """You are FormPilot, an AI paralegal assistant. | |
Answer the user's question ONLY with information grounded in the context. | |
If the answer is not in the context, say "I don't know.". | |
Always cite sources like (I‑485instr.pdf:page‑X).""" | |
prompt = PromptTemplate.from_template( | |
"{context}\n\nQuestion: {question}\nAnswer: (with citations):" | |
) | |
# ---------- QA chain ---------- | |
_CHAIN = RetrievalQA.from_chain_type( | |
llm=ChatOpenAI(model="gpt-3.5-turbo", temperature=0), | |
chain_type="stuff", | |
retriever=_VSTORE.as_retriever(search_kwargs={"k": 4}), | |
chain_type_kwargs={"prompt": prompt}, | |
return_source_documents=True, | |
) | |
def get_answer(question: str): | |
result = _CHAIN(question) | |
answer = result["result"] | |
docs = result["source_documents"] | |
# if no context (or only blank pages), bail out early | |
if not docs or all(not (d.page_content or "").strip() for d in docs): | |
return "I don't know.", set() | |
# build citations only from docs that actually have a source key | |
citations = { | |
d.metadata["source"] | |
for d in docs | |
if "source" in d.metadata and d.metadata["source"] | |
} | |
return answer, citations | |