import os from pathlib import Path import re # Disable Chroma telemetry (optional) os.environ["CHROMA_TELEMETRY_ENABLED"] = "false" # Check if DB exists, else build DB_DIR = Path(__file__).parent / "db" if not DB_DIR.exists() or not any(DB_DIR.iterdir()): print("š¦ No DB found. Building vectorstore...") import scripts.load_documents import scripts.chunk_and_embed import scripts.setup_vectorstore else: print("ā DB found. Skipping build.") import gradio as gr from scripts.router_chain import build_router_chain OPENAI_KEY = os.getenv("OPENAI_API_KEY", None) MODEL_NAME = os.getenv("OPENAI_MODEL", "gpt-4o-mini") if not OPENAI_KEY: print("WARNING: OPENAI_API_KEY not set. The app may fail at runtime.") # Build the router once (keeps vectorstore & models in memory) router = build_router_chain(model_name=MODEL_NAME) def chat_fn(message, history): if not message: return history, "" # call router result = router.invoke({"input": message}) # RetrievalQA returns dict with 'result' key (and maybe 'source_documents') answer = result.get("result") if isinstance(result, dict) else str(result) # append sources if present sources = None if isinstance(result, dict) and "source_documents" in result and result["source_documents"]: try: sources = list({str(d.metadata.get("source", "unknown")) for d in result["source_documents"]}) except Exception: sources = None if sources: answer = f"{answer}\n\nš Sources: {', '.join(sources)}" def format_answer(answer): # Wrap LaTeX formulas in a span so MathJax can render them answer = re.sub(r"\$\$(.+?)\$\$", r'$$\1$$', answer) return f"