import os import logging from fastapi import APIRouter, HTTPException from pydantic import BaseModel, EmailStr from langchain_community.document_loaders import PyPDFDirectoryLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_openai import OpenAIEmbeddings from langchain_community.vectorstores import FAISS from dotenv import load_dotenv load_dotenv() router = APIRouter() logger = logging.getLogger("pdf_ingestion") class IngestRequest(BaseModel): user_id: int @router.post("/ingest-pdfs") async def ingest_pdfs(request: IngestRequest): user_id = request.user_id user_pdf_path = f"./pdfs/user_{user_id}" user_vector_path = f"./vector_store/user_{user_id}" if not os.path.exists(user_pdf_path): raise HTTPException(status_code=404, detail=f"No PDF directory found for user {user_id}") try: logger.info(f"📥 Loading PDFs for user {user_id} from {user_pdf_path}") loader = PyPDFDirectoryLoader(user_pdf_path) documents = loader.load() splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) split_docs = splitter.split_documents(documents) embeddings = OpenAIEmbeddings() vector_store = FAISS.from_documents(split_docs, embeddings) os.makedirs(user_vector_path, exist_ok=True) vector_store.save_local(user_vector_path) logger.info(f"✅ Re-ingested and saved vector store for user {user_id}") return {"message": f"Vector store updated for user {user_id}", "documents_ingested": len(split_docs)} except Exception as e: logger.error(f"❌ PDF ingestion failed: {e}") raise HTTPException(status_code=500, detail="Failed to ingest PDFs and build vector store.")