""" RAG answer chain for FormPilot. Call `get_answer(question: str)` to obtain (answer, sources). """ from pathlib import Path from qdrant_client import QdrantClient from langchain_openai import OpenAIEmbeddings, ChatOpenAI from langchain_community.vectorstores import Qdrant from langchain.chains import RetrievalQA from langchain.prompts import PromptTemplate from langchain import hub # ---------- static vector store ---------- _EMB = OpenAIEmbeddings(model="text-embedding-3-small") _QCLIENT = QdrantClient(path="qdrant_data") _VSTORE = Qdrant( client=_QCLIENT, collection_name="formpilot_docs", embeddings=_EMB, content_payload_key="text", ) # ---------- prompt ---------- _SYSTEM = """You are FormPilot, an AI paralegal assistant. Answer the user's question ONLY with information grounded in the context. If the answer is not in the context, say "I don't know.". Always cite sources like (I‑485instr.pdf:page‑X).""" prompt = PromptTemplate.from_template( "{context}\n\nQuestion: {question}\nAnswer: (with citations):" ) # ---------- QA chain ---------- _CHAIN = RetrievalQA.from_chain_type( llm=ChatOpenAI(model="gpt-3.5-turbo", temperature=0), chain_type="stuff", retriever=_VSTORE.as_retriever(search_kwargs={"k": 4}), chain_type_kwargs={"prompt": prompt}, return_source_documents=True, ) def get_answer(question: str): result = _CHAIN(question) answer = result["result"] docs = result["source_documents"] # if no context (or only blank pages), bail out early if not docs or all(not (d.page_content or "").strip() for d in docs): return "I don't know.", set() # build citations only from docs that actually have a source key citations = { d.metadata["source"] for d in docs if "source" in d.metadata and d.metadata["source"] } return answer, citations