Spaces:
Running
Running
File size: 1,769 Bytes
e27e999 ed6b1d2 e27e999 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import os
import logging
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel, EmailStr
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from dotenv import load_dotenv
load_dotenv()
router = APIRouter()
logger = logging.getLogger("pdf_ingestion")
class IngestRequest(BaseModel):
user_id: int
@router.post("/ingest-pdfs")
async def ingest_pdfs(request: IngestRequest):
user_id = request.user_id
user_pdf_path = f"./pdfs/user_{user_id}"
user_vector_path = f"./vector_store/user_{user_id}"
if not os.path.exists(user_pdf_path):
raise HTTPException(status_code=404, detail=f"No PDF directory found for user {user_id}")
try:
logger.info(f"📥 Loading PDFs for user {user_id} from {user_pdf_path}")
loader = PyPDFDirectoryLoader(user_pdf_path)
documents = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_docs = splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
vector_store = FAISS.from_documents(split_docs, embeddings)
os.makedirs(user_vector_path, exist_ok=True)
vector_store.save_local(user_vector_path)
logger.info(f"✅ Re-ingested and saved vector store for user {user_id}")
return {"message": f"Vector store updated for user {user_id}", "documents_ingested": len(split_docs)}
except Exception as e:
logger.error(f"❌ PDF ingestion failed: {e}")
raise HTTPException(status_code=500, detail="Failed to ingest PDFs and build vector store.")
|