ResearchMate / app.py
Ananthakr1shnan's picture
Updated files
f48599d
# ==== Hugging Face Spaces Environment Setup (from main.py) ====
import os
import sys
import json
import asyncio
from typing import Dict, List, Optional, Any
from datetime import datetime
from pathlib import Path
from contextlib import asynccontextmanager
# Import centralized settings
from src.settings import settings
# Set up environment variables for Hugging Face Spaces compatibility
def setup_environment():
env_vars = {
'DATA_DIR': '/tmp/researchmate/data',
'LOGS_DIR': '/tmp/researchmate/logs',
'CHROMA_DIR': '/tmp/researchmate/chroma_persist',
'UPLOADS_DIR': '/tmp/researchmate/uploads',
'CHROMA_DB_DIR': '/tmp/researchmate/chroma_db',
'CONFIG_DIR': '/tmp/researchmate/config',
'TEMP_DIR': '/tmp/researchmate/tmp',
'CHROMA_PERSIST_DIR': '/tmp/researchmate/chroma_persist',
'MPLCONFIGDIR': '/tmp/matplotlib',
'TRANSFORMERS_CACHE': '/tmp/transformers',
'HF_HOME': '/tmp/huggingface',
'SENTENCE_TRANSFORMERS_HOME': '/tmp/sentence_transformers',
'HF_DATASETS_CACHE': '/tmp/datasets',
'HUGGINGFACE_HUB_CACHE': '/tmp/huggingface_hub',
'XDG_CACHE_HOME': '/tmp/cache',
'PYTORCH_KERNEL_CACHE_PATH': '/tmp/cache',
'TORCH_HOME': '/tmp/cache',
'NLTK_DATA': '/tmp/cache/nltk_data',
'TOKENIZERS_PARALLELISM': 'false',
'HOME': '/tmp/cache',
'TMPDIR': '/tmp/researchmate/tmp',
'HF_DATASETS_OFFLINE': '1',
'HF_HUB_OFFLINE': '0',
}
for key, value in env_vars.items():
os.environ[key] = value
sys.path.insert(0, '/tmp/cache')
directories = [
'/tmp/researchmate/data',
'/tmp/researchmate/logs',
'/tmp/researchmate/chroma_persist',
'/tmp/researchmate/uploads',
'/tmp/researchmate/chroma_db',
'/tmp/researchmate/config',
'/tmp/researchmate/tmp',
'/tmp/matplotlib',
'/tmp/transformers',
'/tmp/huggingface',
'/tmp/sentence_transformers',
'/tmp/datasets',
'/tmp/huggingface_hub',
'/tmp/cache',
'/tmp/cache/nltk_data'
]
for directory in directories:
try:
path = Path(directory)
path.mkdir(parents=True, exist_ok=True)
path.chmod(0o777)
except Exception as e:
print(f"⚠ Warning: Could not create directory {directory}: {e}")
setup_environment()
# Add the project root to Python path
sys.path.append(str(Path(__file__).parent))
from fastapi import FastAPI, HTTPException, UploadFile, File, Form, Request, Depends
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse, FileResponse
from fastapi.middleware.cors import CORSMiddleware
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from pydantic import BaseModel, Field
import uvicorn
# Import settings and ResearchMate components
from src.components.research_assistant import ResearchMate
from src.components.citation_network import CitationNetworkAnalyzer
from src.components.auth import AuthManager
# Initialize only essential components at startup (fast components only)
auth_manager = AuthManager()
security = HTTPBearer(auto_error=False)
# Initialize ResearchMate and Citation Analyzer (will be done during loading screen)
research_mate = None
citation_analyzer = None
# Global initialization flag
research_mate_initialized = False
initialization_in_progress = False
async def initialize_research_mate():
"""Initialize ResearchMate and Citation Analyzer in the background"""
global research_mate, citation_analyzer, research_mate_initialized, initialization_in_progress
if initialization_in_progress:
return
initialization_in_progress = True
print("πŸš€ Starting ResearchMate background initialization...")
try:
# Run initialization in thread pool to avoid blocking
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor() as executor:
loop = asyncio.get_event_loop()
print("πŸ“Š Initializing Citation Network Analyzer...")
citation_analyzer = await loop.run_in_executor(executor, CitationNetworkAnalyzer)
print("βœ… Citation Network Analyzer initialized!")
print("🧠 Initializing ResearchMate core...")
research_mate = await loop.run_in_executor(executor, ResearchMate)
print("βœ… ResearchMate core initialized!")
research_mate_initialized = True
print("πŸŽ‰ All components initialized successfully!")
except Exception as e:
print(f"❌ Failed to initialize components: {e}")
print("⚠️ Server will start but some features may not work")
research_mate = None
citation_analyzer = None
research_mate_initialized = False
finally:
initialization_in_progress = False
# Pydantic models for API
class SearchQuery(BaseModel):
query: str = Field(..., description="Search query")
max_results: int = Field(default=10, ge=1, le=50, description="Maximum number of results")
class QuestionQuery(BaseModel):
question: str = Field(..., description="Research question")
class ProjectCreate(BaseModel):
name: str = Field(..., description="Project name")
research_question: str = Field(..., description="Research question")
keywords: List[str] = Field(..., description="Keywords")
class ProjectQuery(BaseModel):
project_id: str = Field(..., description="Project ID")
question: str = Field(..., description="Question about the project")
class TrendQuery(BaseModel):
topic: str = Field(..., description="Research topic")
# Authentication models
class LoginRequest(BaseModel):
username: str = Field(..., description="Username")
password: str = Field(..., description="Password")
class RegisterRequest(BaseModel):
username: str = Field(..., description="Username")
email: str = Field(..., description="Email address")
password: str = Field(..., description="Password")
# Authentication dependency for API endpoints
async def get_current_user_dependency(request: Request, credentials: HTTPAuthorizationCredentials = Depends(security)):
user = None
# Try Authorization header first
if credentials:
user = auth_manager.verify_token(credentials.credentials)
# If no user from header, try cookie
if not user:
token = request.cookies.get('authToken')
if token:
user = auth_manager.verify_token(token)
if not user:
raise HTTPException(status_code=401, detail="Authentication required")
return user
# Authentication for web pages (checks both header and cookie)
async def get_current_user_web(request: Request):
"""Get current user for web page requests (checks both Authorization header and cookies)"""
user = None
# First try Authorization header
try:
credentials = await security(request)
if credentials:
user = auth_manager.verify_token(credentials.credentials)
except:
pass
# If no user from header, try cookie
if not user:
token = request.cookies.get('authToken')
if token:
user = auth_manager.verify_token(token)
return user
# Background task to clean up expired sessions
async def cleanup_expired_sessions():
while True:
try:
expired_count = auth_manager.cleanup_expired_sessions()
if expired_count > 0:
print(f"Cleaned up {expired_count} expired sessions")
except Exception as e:
print(f"Error cleaning up sessions: {e}")
# Run cleanup every 30 minutes
await asyncio.sleep(30 * 60)
@asynccontextmanager
async def lifespan(app: FastAPI):
# Start ResearchMate initialization in background (non-blocking)
asyncio.create_task(initialize_research_mate())
# Start background cleanup task
cleanup_task = asyncio.create_task(cleanup_expired_sessions())
try:
yield
finally:
cleanup_task.cancel()
try:
await cleanup_task
except asyncio.CancelledError:
pass
# Initialize FastAPI app with lifespan
app = FastAPI(
title="ResearchMate API",
description="AI Research Assistant powered by Groq Llama 3.3 70B",
version="1.0.0",
debug=settings.server.debug,
lifespan=lifespan
)
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=settings.security.cors_origins,
allow_credentials=True,
allow_methods=settings.security.cors_methods,
allow_headers=settings.security.cors_headers,
)
# Mount static files with cache control for development
static_dir = Path(settings.get_static_dir())
static_dir.mkdir(parents=True, exist_ok=True)
# Custom static files class to add no-cache headers for development
class NoCacheStaticFiles(StaticFiles):
def file_response(self, full_path, stat_result, scope):
response = FileResponse(
path=full_path,
stat_result=stat_result
)
# Add no-cache headers for development
response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
response.headers["Pragma"] = "no-cache"
response.headers["Expires"] = "0"
return response
app.mount("/static", NoCacheStaticFiles(directory=str(static_dir)), name="static")
# Templates
templates_dir = Path(settings.get_templates_dir())
templates_dir.mkdir(parents=True, exist_ok=True)
templates = Jinja2Templates(directory=str(templates_dir))
# Loading page route
@app.get("/loading", response_class=HTMLResponse)
async def loading_page(request: Request):
return templates.TemplateResponse("loading.html", {"request": request})
# Authentication routes
@app.post("/api/auth/register")
async def register(request: RegisterRequest):
result = auth_manager.create_user(request.username, request.email, request.password)
if result["success"]:
return {"success": True, "message": "Account created successfully"}
else:
raise HTTPException(status_code=400, detail=result["error"])
@app.post("/api/auth/login")
async def login(request: LoginRequest):
result = auth_manager.authenticate_user(request.username, request.password)
if result["success"]:
return {
"success": True,
"token": result["token"],
"user_id": result["user_id"],
"username": result["username"]
}
else:
raise HTTPException(status_code=401, detail=result["error"])
@app.get("/login", response_class=HTMLResponse)
async def login_page(request: Request):
# Check if ResearchMate is initialized
global research_mate_initialized
if not research_mate_initialized:
return RedirectResponse(url="/loading", status_code=302)
return templates.TemplateResponse("login.html", {"request": request})
@app.post("/api/auth/logout")
async def logout(request: Request):
# Get current user to invalidate their session
user = await get_current_user_web(request)
if user:
auth_manager.logout_user(user['user_id'])
response = JSONResponse({"success": True, "message": "Logged out successfully"})
response.delete_cookie("authToken", path="/")
return response
# Web interface routes (protected)
@app.get("/", response_class=HTMLResponse)
async def home(request: Request):
# Check if ResearchMate is initialized first
global research_mate_initialized
if not research_mate_initialized:
return RedirectResponse(url="/loading", status_code=302)
# Check if user is authenticated
user = await get_current_user_web(request)
if not user:
return RedirectResponse(url="/login", status_code=302)
return templates.TemplateResponse("index.html", {"request": request, "user": user})
@app.get("/search", response_class=HTMLResponse)
async def search_page(request: Request):
# Check if ResearchMate is initialized first
global research_mate_initialized
if not research_mate_initialized:
return RedirectResponse(url="/loading", status_code=302)
user = await get_current_user_web(request)
if not user:
return RedirectResponse(url="/login", status_code=302)
return templates.TemplateResponse("search.html", {"request": request, "user": user})
@app.get("/projects", response_class=HTMLResponse)
async def projects_page(request: Request):
user = await get_current_user_web(request)
if not user:
return RedirectResponse(url="/login", status_code=302)
return templates.TemplateResponse("projects.html", {"request": request, "user": user})
@app.get("/trends", response_class=HTMLResponse)
async def trends_page(request: Request):
user = await get_current_user_web(request)
if not user:
return RedirectResponse(url="/login", status_code=302)
return templates.TemplateResponse("trends.html", {"request": request, "user": user})
@app.get("/upload", response_class=HTMLResponse)
async def upload_page(request: Request):
user = await get_current_user_web(request)
if not user:
return RedirectResponse(url="/login", status_code=302)
return templates.TemplateResponse("upload.html", {"request": request, "user": user})
@app.get("/citation", response_class=HTMLResponse)
async def citation_page(request: Request):
try:
if citation_analyzer is None:
# If citation analyzer isn't initialized yet, show empty state
summary = {"total_papers": 0, "total_citations": 0, "networks": []}
else:
summary = citation_analyzer.get_network_summary()
return templates.TemplateResponse("citation.html", {"request": request, "summary": summary})
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/test-search", response_class=HTMLResponse)
async def test_search_page(request: Request):
"""Simple test page for debugging search"""
with open("test_search.html", "r") as f:
content = f.read()
return HTMLResponse(content=content)
# Health check endpoint for Azure
@app.get("/health")
async def health_check():
"""Health check endpoint for Azure and other platforms"""
return {"status": "ok", "timestamp": datetime.now().isoformat()}
# API endpoints
@app.post("/api/search")
async def search_papers(query: SearchQuery, current_user: dict = Depends(get_current_user_dependency)):
try:
if research_mate is None:
raise HTTPException(status_code=503, detail="ResearchMate not initialized")
rm = research_mate
result = rm.search(query.query, query.max_results)
if not result.get("success"):
raise HTTPException(status_code=400, detail=result.get("error", "Search failed"))
papers = result.get("papers", [])
if papers and citation_analyzer is not None: # Only add papers if citation analyzer is ready
citation_analyzer.add_papers(papers)
return result
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/ask")
async def ask_question(question: QuestionQuery, current_user: dict = Depends(get_current_user_dependency)):
try:
if research_mate is None:
raise HTTPException(status_code=503, detail="ResearchMate not initialized")
rm = research_mate
result = rm.ask(question.question)
if not result.get("success"):
raise HTTPException(status_code=400, detail=result.get("error", "Question failed"))
return result
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/upload")
async def upload_pdf(file: UploadFile = File(...), current_user: dict = Depends(get_current_user_dependency)):
if research_mate is None:
raise HTTPException(status_code=503, detail="ResearchMate not initialized")
if not file.filename.endswith('.pdf'):
raise HTTPException(status_code=400, detail="Only PDF files are supported")
try:
# Save uploaded file to /tmp/researchmate/uploads
upload_dir = Path(settings.get_upload_dir())
upload_dir.mkdir(exist_ok=True)
file_path = upload_dir / file.filename
with open(file_path, "wb") as buffer:
content = await file.read()
buffer.write(content)
# Process PDF
result = research_mate.upload_pdf(str(file_path))
# Clean up file
file_path.unlink()
if not result.get("success"):
raise HTTPException(status_code=400, detail=result.get("error", "PDF analysis failed"))
return result
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/projects")
async def create_project(project: ProjectCreate, current_user: dict = Depends(get_current_user_dependency)):
if research_mate is None:
raise HTTPException(status_code=503, detail="ResearchMate not initialized")
try:
user_id = current_user.get("user_id")
result = research_mate.create_project(project.name, project.research_question, project.keywords, user_id)
if not result.get("success"):
raise HTTPException(status_code=400, detail=result.get("error", "Project creation failed"))
return result
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/projects")
async def list_projects(current_user: dict = Depends(get_current_user_dependency)):
if research_mate is None:
raise HTTPException(status_code=503, detail="ResearchMate not initialized")
try:
user_id = current_user.get("user_id")
result = research_mate.list_projects(user_id)
if not result.get("success"):
raise HTTPException(status_code=400, detail=result.get("error", "Failed to list projects"))
return result
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/projects/{project_id}")
async def get_project(project_id: str, current_user: dict = Depends(get_current_user_dependency)):
if research_mate is None:
raise HTTPException(status_code=503, detail="ResearchMate not initialized")
try:
user_id = current_user.get("user_id")
result = research_mate.get_project(project_id, user_id)
if not result.get("success"):
raise HTTPException(status_code=404, detail=result.get("error", "Project not found"))
return result
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/projects/{project_id}/search")
async def search_project_literature(project_id: str, max_papers: int = 10, current_user: dict = Depends(get_current_user_dependency)):
if research_mate is None:
raise HTTPException(status_code=503, detail="ResearchMate not initialized")
try:
user_id = current_user.get("user_id")
result = research_mate.search_project_literature(project_id, max_papers, user_id)
if not result.get("success"):
raise HTTPException(status_code=400, detail=result.get("error", "Literature search failed"))
return result
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/projects/{project_id}/analyze")
async def analyze_project(project_id: str, current_user: dict = Depends(get_current_user_dependency)):
if research_mate is None:
raise HTTPException(status_code=503, detail="ResearchMate not initialized")
try:
user_id = current_user.get("user_id")
result = research_mate.analyze_project(project_id, user_id)
if not result.get("success"):
raise HTTPException(status_code=400, detail=result.get("error", "Project analysis failed"))
return result
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/projects/{project_id}/review")
async def generate_review(project_id: str, current_user: dict = Depends(get_current_user_dependency)):
if research_mate is None:
raise HTTPException(status_code=503, detail="ResearchMate not initialized")
try:
user_id = current_user.get("user_id")
result = research_mate.generate_review(project_id, user_id)
if not result.get("success"):
raise HTTPException(status_code=400, detail=result.get("error", "Review generation failed"))
return result
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/projects/{project_id}/ask")
async def ask_project_question(project_id: str, question: QuestionQuery):
if research_mate is None:
raise HTTPException(status_code=503, detail="ResearchMate not initialized")
try:
result = research_mate.ask_project_question(project_id, question.question)
if not result.get("success"):
raise HTTPException(status_code=400, detail=result.get("error", "Project question failed"))
return result
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/trends")
async def get_trends(trend: TrendQuery):
if research_mate is None:
raise HTTPException(status_code=503, detail="ResearchMate not initialized")
try:
result = research_mate.analyze_trends(trend.topic)
if result.get("error"):
raise HTTPException(status_code=400, detail=result.get("error", "Trend analysis failed"))
return result
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/trends/temporal")
async def get_temporal_trends(trend: TrendQuery):
"""Get temporal trend analysis"""
if research_mate is None:
raise HTTPException(status_code=503, detail="ResearchMate not initialized")
try:
# Get papers for analysis
papers = research_mate.search_papers(trend.topic, 50)
if not papers:
raise HTTPException(status_code=404, detail="No papers found for temporal analysis")
# Use advanced trend monitor
result = research_mate.trend_monitor.analyze_temporal_trends(papers)
if result.get("error"):
raise HTTPException(status_code=400, detail=result.get("error"))
return {
"topic": trend.topic,
"temporal_analysis": result,
"papers_analyzed": len(papers)
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/trends/gaps")
async def detect_research_gaps(trend: TrendQuery):
"""Detect research gaps for a topic"""
if research_mate is None:
raise HTTPException(status_code=503, detail="ResearchMate not initialized")
try:
# Get papers for gap analysis
papers = research_mate.search_papers(trend.topic, 50)
if not papers:
raise HTTPException(status_code=404, detail="No papers found for gap analysis")
# Use advanced trend monitor
result = research_mate.trend_monitor.detect_research_gaps(papers)
if result.get("error"):
raise HTTPException(status_code=400, detail=result.get("error"))
return {
"topic": trend.topic,
"gap_analysis": result,
"papers_analyzed": len(papers)
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/status")
async def get_status(current_user: dict = Depends(get_current_user_dependency)):
if research_mate is None:
raise HTTPException(status_code=503, detail="ResearchMate not initialized")
try:
result = research_mate.get_status()
# Ensure proper structure for frontend
if result.get('success'):
return {
'success': True,
'statistics': result.get('statistics', {
'rag_documents': 0,
'system_version': '1.0.0',
'status_check_time': datetime.now().isoformat()
}),
'components': result.get('components', {})
}
else:
return result
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# Initialization status endpoint
@app.get("/api/init-status")
async def get_init_status():
"""Check if ResearchMate is initialized"""
global research_mate_initialized, initialization_in_progress
if research_mate_initialized:
status = "ready"
elif initialization_in_progress:
status = "initializing"
else:
status = "not_started"
return {
"initialized": research_mate_initialized,
"in_progress": initialization_in_progress,
"timestamp": datetime.now().isoformat(),
"status": status
}
# Fast search endpoint that initializes on first call
@app.post("/api/search-fast")
async def search_papers_fast(query: SearchQuery, current_user: dict = Depends(get_current_user_dependency)):
"""Fast search that shows initialization progress"""
try:
global research_mate
if research_mate is None:
# Return immediate response indicating initialization
return {
"initializing": True,
"message": "ResearchMate is initializing (this may take 30-60 seconds)...",
"query": query.query,
"estimated_time": "30-60 seconds"
}
# Use existing search
result = research_mate.search(query.query, query.max_results)
if not result.get("success"):
raise HTTPException(status_code=400, detail=result.get("error", "Search failed"))
papers = result.get("papers", [])
if papers and citation_analyzer is not None:
citation_analyzer.add_papers(papers)
return result
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/user/status")
async def get_user_status(current_user: dict = Depends(get_current_user_dependency)):
"""Get current user's status and statistics"""
if research_mate is None:
raise HTTPException(status_code=503, detail="ResearchMate not initialized")
try:
user_id = current_user.get("user_id")
# Get user's projects
projects_result = research_mate.list_projects(user_id)
if not projects_result.get("success"):
raise HTTPException(status_code=400, detail="Failed to get user projects")
user_projects = projects_result.get("projects", [])
total_papers = sum(len(p.get('papers', [])) for p in user_projects)
return {
"success": True,
"user_id": user_id,
"username": current_user.get("username"),
"statistics": {
"total_projects": len(user_projects),
"total_papers": total_papers,
"active_projects": len([p for p in user_projects if p.get('status') == 'active'])
},
"last_updated": datetime.now().isoformat()
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# Trigger initialization endpoint (for testing)
@app.post("/api/trigger-init")
async def trigger_initialization():
"""Manually trigger ResearchMate initialization"""
if not initialization_in_progress and not research_mate_initialized:
asyncio.create_task(initialize_research_mate())
return {"message": "Initialization triggered"}
elif initialization_in_progress:
return {"message": "Initialization already in progress"}
else:
return {"message": "Already initialized"}
# Legacy health check endpoint
@app.get("/api/health")
async def api_health_check():
"""Legacy health check endpoint"""
return {"status": "ok", "timestamp": datetime.now().isoformat()}
# Update the existing FastAPI app to use lifespan
app.router.lifespan_context = lifespan
# Startup event to ensure initialization begins immediately after server starts
@app.on_event("startup")
async def startup_event():
"""Ensure initialization starts on startup"""
print("🌟 Server started, ensuring ResearchMate initialization begins...")
# Give the server a moment to fully start, then trigger initialization
await asyncio.sleep(1)
if not initialization_in_progress and not research_mate_initialized:
asyncio.create_task(initialize_research_mate())
# Local development: run with `python app.py`
if __name__ == "__main__":
import uvicorn
port = settings.server.port or 7860
host = settings.server.host or "0.0.0.0"
print(f"\nStarting ResearchMate locally at http://{host}:{port}\n")
uvicorn.run(
"app:app",
host=host,
port=port,
log_level=settings.server.log_level,
reload=settings.server.reload
)