Spaces:

Manjesh501
/

hackrx

Running

App Files Files Community

hackrx / main.py

Manjesh501

Initial commit with all project files

e159fb9 8 days ago

raw

history blame contribute delete

9.39 kB

	from fastapi import FastAPI, HTTPException, Depends, BackgroundTasks
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import JSONResponse
	import asyncio
	import time
	from contextlib import asynccontextmanager
	from typing import Dict, Any

	from config import settings
	from models.schemas import QueryRequest, QueryResponse, ErrorResponse, LegacyQueryRequest
	from services.query_service import QueryService

	# Global service instance
	query_service = None

	@asynccontextmanager
	async def lifespan(app: FastAPI):
	"""Application lifespan context manager"""
	global query_service

	# Startup
	print("🚀 Starting HackRx 6.0 Intelligent Query-Retrieval System...")
	query_service = QueryService()

	try:
	await query_service.initialize()
	print("✅ All services initialized successfully")
	except Exception as e:
	print(f"❌ Failed to initialize services: {e}")
	raise

	yield

	# Shutdown
	print("🔄 Shutting down services...")

	# Create FastAPI app
	app = FastAPI(
	title=settings.PROJECT_NAME,
	version=settings.VERSION,
	description="LLM-Powered Intelligent Query-Retrieval System for HackRx 6.0",
	lifespan=lifespan
	)

	# Add CORS middleware
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	def get_query_service() -> QueryService:
	"""Dependency to get query service instance"""
	if query_service is None:
	raise HTTPException(status_code=503, detail="Service not initialized")
	return query_service

	@app.get("/")
	async def root():
	"""Root endpoint with API information"""
	return {
	"message": "HackRx 6.0 - Intelligent Query-Retrieval System",
	"version": settings.VERSION,
	"status": "running",
	"endpoints": {
	"main": "api/v1/hackrx/run",
	"legacy": "hackrx/run",
	"detailed": "api/v1/hackrx/run/detailed",
	"health": "health",
	"docs": "docs"
	},
	"request_formats": {
	"standard": {
	"documents": "string (URL)",
	"questions": ["array", "of", "strings"]
	},
	"legacy": {
	"query": "string",
	"document_url": "string (URL) - Optional, uses local docs if not provided"
	}
	}
	}

	@app.get("/health")
	async def health_check():
	"""Health check endpoint with Pinecone status"""
	global query_service

	services_status = {
	"query_service": query_service is not None,
	"embedding_service": query_service.embedding_service.is_initialized if query_service else False,
	"llm_service": query_service.llm_service.is_initialized if query_service else False,
	"pinecone_service": query_service.embedding_service.pinecone_service.is_initialized if query_service else False
	}

	health_status = {
	"status": "healthy",
	"timestamp": time.time(),
	"services": services_status
	}

	# Add Pinecone stats if available
	if query_service and query_service.embedding_service.pinecone_service.is_initialized:
	pinecone_stats = await query_service.embedding_service.pinecone_service.get_index_stats()
	health_status["pinecone_stats"] = pinecone_stats

	if not all(services_status.values()):
	health_status["status"] = "degraded"

	return health_status

	@app.post(f"{settings.API_V1_PREFIX}/hackrx/run", response_model=QueryResponse)
	async def process_query(
	request: QueryRequest,
	service: QueryService = Depends(get_query_service)
	):
	"""
	Main endpoint for processing document queries

	This endpoint:
	1. Downloads and processes documents (PDF, DOCX, etc.)
	2. Creates semantic embeddings using FAISS
	3. Performs similarity search for relevant chunks
	4. Uses LLM (Gemini) to generate contextual answers
	5. Returns structured JSON responses
	"""
	try:
	# Validate request
	if not request.documents:
	raise HTTPException(status_code=400, detail="Document URL is required")

	if not request.questions:
	raise HTTPException(status_code=400, detail="At least one question is required")

	# Process the query
	result = await service.process_query(request, include_detailed=False)

	return result

	except HTTPException:
	raise
	except Exception as e:
	raise HTTPException(
	status_code=500,
	detail=f"Internal server error: {str(e)}"
	)

	@app.post("/hackrx/run", response_model=QueryResponse)
	async def process_query_legacy(
	request: LegacyQueryRequest,
	service: QueryService = Depends(get_query_service)
	):
	"""
	Backward-compatible endpoint for legacy request format

	Accepts requests in the format:
	{
	"query": "Your question here",
	"document_url": "https://..." // Optional - uses local docs if not provided
	}

	And converts them to the standard format internally.
	"""
	try:
	# If no document_url provided, use local documents from docs folder
	document_source = request.document_url
	if not document_source:
	# Use local documents from docs folder
	import os
	docs_folder = os.path.join(os.getcwd(), "docs")
	if os.path.exists(docs_folder):
	# Find the first document in docs folder
	for filename in os.listdir(docs_folder):
	if filename.lower().endswith(('.pdf', '.docx', '.txt')):
	document_source = os.path.join(docs_folder, filename)
	print(f"Using local document: {document_source}")
	break

	if not document_source:
	raise HTTPException(
	status_code=400,
	detail="No document URL provided and no documents found in docs folder"
	)
	else:
	raise HTTPException(
	status_code=400,
	detail="No document URL provided and docs folder not found"
	)

	# Convert legacy format to standard format
	standard_request = QueryRequest(
	documents=document_source,
	questions=[request.query]
	)

	# Process using the standard service method
	result = await service.process_query(standard_request, include_detailed=False)

	return result

	except HTTPException:
	raise
	except Exception as e:
	raise HTTPException(
	status_code=500,
	detail=f"Internal server error: {str(e)}"
	)

	@app.post(f"{settings.API_V1_PREFIX}/hackrx/run/detailed", response_model=QueryResponse)
	async def process_query_detailed(
	request: QueryRequest,
	service: QueryService = Depends(get_query_service)
	):
	"""
	Extended endpoint that returns detailed responses including:
	- Confidence scores
	- Reasoning explanations
	- Relevant document clauses
	- Token usage statistics
	"""
	try:
	result = await service.process_query(request, include_detailed=True)
	return result

	except Exception as e:
	raise HTTPException(
	status_code=500,
	detail=f"Internal server error: {str(e)}"
	)

	@app.post(f"{settings.API_V1_PREFIX}/analyze-document")
	async def analyze_document(
	document_url: str,
	service: QueryService = Depends(get_query_service)
	):
	"""
	Analyze document structure and extract metadata
	"""
	try:
	analysis = await service.analyze_document_structure(document_url)
	return analysis

	except Exception as e:
	raise HTTPException(
	status_code=500,
	detail=f"Document analysis failed: {str(e)}"
	)

	@app.post(f"{settings.API_V1_PREFIX}/summarize-document")
	async def summarize_document(
	document_url: str,
	max_length: int = 500,
	service: QueryService = Depends(get_query_service)
	):
	"""
	Generate a summary of the document
	"""
	try:
	summary = await service.get_document_summary(document_url, max_length)
	return {"summary": summary}

	except Exception as e:
	raise HTTPException(
	status_code=500,
	detail=f"Document summarization failed: {str(e)}"
	)

	@app.exception_handler(Exception)
	async def global_exception_handler(request, exc):
	"""Global exception handler"""
	return JSONResponse(
	status_code=500,
	content=ErrorResponse(
	error="Internal Server Error",
	message=str(exc),
	details={"path": str(request.url)}
	).dict()
	)

	if __name__ == "__main__":
	import uvicorn
	import os

	port = int(os.getenv("PORT", 8000))

	print(f"🚀 Starting {settings.PROJECT_NAME}")
	print(f"📖 API Documentation: http://localhost:{port}/docs")
	print(f"🔗 Main Endpoint: http://localhost:{port}{settings.API_V1_PREFIX}/hackrx/run")

	uvicorn.run(
	"main:app",
	host="0.0.0.0",
	port=port,
	log_level="info"
	)