Spaces:

afulara
/

formpilot-demo

Running

App Files Files Community

formpilot-demo / rag /qa_chain.py

afulara

Auto‑deploy from GitHub

5ee4946 verified 3 months ago

raw

history blame contribute delete

1.88 kB

	"""
	RAG answer chain for FormPilot.
	Call `get_answer(question: str)` to obtain (answer, sources).
	"""
	from pathlib import Path
	from qdrant_client import QdrantClient
	from langchain_openai import OpenAIEmbeddings, ChatOpenAI
	from langchain_community.vectorstores import Qdrant
	from langchain.chains import RetrievalQA
	from langchain.prompts import PromptTemplate
	from langchain import hub

	# ---------- static vector store ----------
	_EMB = OpenAIEmbeddings(model="text-embedding-3-small")
	_QCLIENT = QdrantClient(path="qdrant_data")
	_VSTORE = Qdrant(
	client=_QCLIENT,
	collection_name="formpilot_docs",
	embeddings=_EMB,
	content_payload_key="text",
	)

	# ---------- prompt ----------
	_SYSTEM = """You are FormPilot, an AI paralegal assistant.
	Answer the user's question ONLY with information grounded in the context.
	If the answer is not in the context, say "I don't know.".
	Always cite sources like (I‑485instr.pdf:page‑X)."""
	prompt = PromptTemplate.from_template(
	"{context}\n\nQuestion: {question}\nAnswer: (with citations):"
	)

	# ---------- QA chain ----------
	_CHAIN = RetrievalQA.from_chain_type(
	llm=ChatOpenAI(model="gpt-3.5-turbo", temperature=0),
	chain_type="stuff",
	retriever=_VSTORE.as_retriever(search_kwargs={"k": 4}),
	chain_type_kwargs={"prompt": prompt},
	return_source_documents=True,
	)

	def get_answer(question: str):
	result = _CHAIN(question)
	answer = result["result"]
	docs = result["source_documents"]

	# if no context (or only blank pages), bail out early
	if not docs or all(not (d.page_content or "").strip() for d in docs):
	return "I don't know.", set()

	# build citations only from docs that actually have a source key
	citations = {
	d.metadata["source"]
	for d in docs
	if "source" in d.metadata and d.metadata["source"]
	}

	return answer, citations