Spaces:
Sleeping
Sleeping
import os | |
from pathlib import Path | |
from langchain.chains import RetrievalQA | |
from langchain_openai import ChatOpenAI, OpenAIEmbeddings | |
from langchain_chroma import Chroma | |
from langchain.prompts import PromptTemplate | |
BASE_DIR = Path(__file__).resolve().parent.parent | |
DB_DIR = BASE_DIR / "db" | |
def build_general_qa_chain(model_name=None): | |
if not DB_DIR.exists(): | |
print("📦 No DB found. Building vectorstore...") | |
from scripts import load_documents, chunk_and_embed, setup_vectorstore | |
load_documents.main() | |
chunk_and_embed.main() | |
setup_vectorstore.main() | |
embedding = OpenAIEmbeddings(model="text-embedding-3-small") | |
vectorstore = Chroma(persist_directory=str(DB_DIR), embedding_function=embedding) | |
template = """Use the following context to answer the question. | |
If the answer isn't found in the context, use your general knowledge but say so. | |
Do not answer questions that are completely irrelevant to the main points of the context. | |
Always cite your sources at the end with 'Source: <filename>' when using course materials. | |
Context: {context} | |
Question: {question} | |
Helpful Answer:""" | |
QA_PROMPT = PromptTemplate( | |
template=template, | |
input_variables=["context", "question"] | |
) | |
llm = ChatOpenAI(model_name=model_name or "gpt-4o-mini", temperature=0.0) | |
qa_chain = RetrievalQA.from_chain_type( | |
llm=llm, | |
retriever=vectorstore.as_retriever(search_kwargs={"k": 4}), | |
chain_type_kwargs={"prompt": QA_PROMPT}, | |
return_source_documents=True | |
) | |
return qa_chain | |