import os from pathlib import Path from langchain.chains import RetrievalQA from langchain_openai import ChatOpenAI, OpenAIEmbeddings from langchain_chroma import Chroma from langchain.prompts import PromptTemplate BASE_DIR = Path(__file__).resolve().parent.parent DB_DIR = BASE_DIR / "db" def build_general_qa_chain(model_name=None): if not DB_DIR.exists(): print("📦 No DB found. Building vectorstore...") from scripts import load_documents, chunk_and_embed, setup_vectorstore load_documents.main() chunk_and_embed.main() setup_vectorstore.main() embedding = OpenAIEmbeddings(model="text-embedding-3-small") vectorstore = Chroma(persist_directory=str(DB_DIR), embedding_function=embedding) template = """Use the following context to answer the question. If the answer isn't found in the context, use your general knowledge but say so. Do not answer questions that are completely irrelevant to the main points of the context. Always cite your sources at the end with 'Source: ' when using course materials. Context: {context} Question: {question} Helpful Answer:""" QA_PROMPT = PromptTemplate( template=template, input_variables=["context", "question"] ) llm = ChatOpenAI(model_name=model_name or "gpt-4o-mini", temperature=0.0) qa_chain = RetrievalQA.from_chain_type( llm=llm, retriever=vectorstore.as_retriever(search_kwargs={"k": 4}), chain_type_kwargs={"prompt": QA_PROMPT}, return_source_documents=True ) return qa_chain