import os
import warnings
import logging
import time
from datetime import datetime

from fastapi import FastAPI, Request, HTTPException, Depends, Header
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel

from pdf_parser import parse_pdf_from_url_multithreaded as parse_pdf_from_url, parse_pdf_from_file_multithreaded as parse_pdf_from_file
from embedder import build_pinecone_index, preload_model
from retriever import retrieve_chunks
from llm import query_gemini

import uvicorn

# Set up cache directory for HuggingFace models
cache_dir = os.path.join(os.getcwd(), ".cache")
os.makedirs(cache_dir, exist_ok=True)
os.environ['HF_HOME'] = cache_dir
os.environ['TRANSFORMERS_CACHE'] = cache_dir

# Suppress TensorFlow warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
os.environ['TF_LOGGING_LEVEL'] = 'ERROR'
os.environ['TF_ENABLE_DEPRECATION_WARNINGS'] = '0'
warnings.filterwarnings('ignore', category=DeprecationWarning, module='tensorflow')
logging.getLogger('tensorflow').setLevel(logging.ERROR)

app = FastAPI(title="HackRx Insurance Policy Assistant", version="1.0.0")

# Add CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Preload the model at startup
@app.on_event("startup")
async def startup_event():
    print("Starting up HackRx Insurance Policy Assistant...")
    print("Preloading sentence transformer model...")
    preload_model()
    print("Model preloading completed. API is ready to serve requests!")

@app.get("/")
async def root():
    return {"message": "HackRx Insurance Policy Assistant API is running!"}

@app.get("/health")
async def health_check():
    return {"status": "healthy", "message": "API is ready to process requests"}

class QueryRequest(BaseModel):
    documents: str
    questions: list[str]

class LocalQueryRequest(BaseModel):
    document_path: str
    questions: list[str]

def verify_token(authorization: str = Header(None)):
    if not authorization or not authorization.startswith("Bearer "):
        raise HTTPException(status_code=401, detail="Invalid authorization header")
    token = authorization.replace("Bearer ", "")
    # For demo purposes, accept any token. In production, validate against a database
    if not token:
        raise HTTPException(status_code=401, detail="Invalid token")
    return token

@app.post("/api/v1/hackrx/run")
async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
    start_time = time.time()
    timing_data = {}
    try:
        print(f"\n=== INPUT JSON ===")
        print(f"Documents: {request.documents}")
        print(f"Questions: {request.questions}")
        print(f"==================\n")

        print(f"Processing {len(request.questions)} questions...")

        # Time PDF parsing
        pdf_start = time.time()
        text_chunks = parse_pdf_from_url(request.documents)
        pdf_time = time.time() - pdf_start
        timing_data['pdf_parsing'] = round(pdf_time, 2)

        print(f"Extracted {len(text_chunks)} text chunks from PDF")

        # Time Pinecone index building/upsert
        index_start = time.time()
        pinecone_index = build_pinecone_index(text_chunks)
        index_time = time.time() - index_start
        timing_data['pinecone_index_building'] = round(index_time, 2)
        texts = text_chunks  # for retrieve_chunks

        # Time chunk retrieval for all questions
        retrieval_start = time.time()
        all_chunks = set()
        for i, question in enumerate(request.questions):
            question_start = time.time()
            top_chunks = retrieve_chunks(pinecone_index, texts, question)
            question_time = time.time() - question_start
            all_chunks.update(top_chunks)
        retrieval_time = time.time() - retrieval_start
        timing_data['chunk_retrieval'] = round(retrieval_time, 2)

        print(f"Retrieved {len(all_chunks)} unique chunks")

        # Time LLM processing
        llm_start = time.time()
        print(f"Processing all {len(request.questions)} questions in batch...")
        response = query_gemini(request.questions, list(all_chunks))
        llm_time = time.time() - llm_start
        timing_data['llm_processing'] = round(llm_time, 2)

        # Time response processing
        response_start = time.time()
        # Extract answers from the JSON response
        if isinstance(response, dict) and "answers" in response:
            answers = response["answers"]
            while len(answers) < len(request.questions):
                answers.append("Not Found")
            answers = answers[:len(request.questions)]
        else:
            answers = [response] if isinstance(response, str) else []
            while len(answers) < len(request.questions):
                answers.append("Not Found")
            answers = answers[:len(request.questions)]

        response_time = time.time() - response_start
        timing_data['response_processing'] = round(response_time, 2)

        print(f"Generated {len(answers)} answers")

        # Calculate total time
        total_time = time.time() - start_time
        timing_data['total_time'] = round(total_time, 2)

        print(f"\n=== TIMING BREAKDOWN ===")
        print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
        print(f"Pinecone Index Building: {timing_data['pinecone_index_building']}s")
        print(f"Chunk Retrieval: {timing_data['chunk_retrieval']}s")
        print(f"LLM Processing: {timing_data['llm_processing']}s")
        print(f"Response Processing: {timing_data['response_processing']}s")
        print(f"TOTAL TIME: {timing_data['total_time']}s")
        print(f"=======================\n")

        result = {"answers": answers}

        print(f"=== OUTPUT JSON ===")
        print(f"{result}")
        print(f"==================\n")

        return result

    except Exception as e:
        total_time = time.time() - start_time
        print(f"Error after {total_time:.2f} seconds: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")

@app.post("/api/v1/hackrx/local")
async def run_local_query(request: LocalQueryRequest):
    start_time = time.time()
    timing_data = {}
    try:
        print(f"\n=== INPUT JSON ===")
        print(f"Document Path: {request.document_path}")
        print(f"Questions: {request.questions}")
        print(f"==================\n")

        print(f"Processing local document: {request.document_path}")
        print(f"Processing {len(request.questions)} questions...")

        # Time local PDF parsing
        pdf_start = time.time()
        text_chunks = parse_pdf_from_file(request.document_path)
        pdf_time = time.time() - pdf_start
        timing_data['pdf_parsing'] = round(pdf_time, 2)

        print(f"Extracted {len(text_chunks)} text chunks from local PDF")

        # Time Pinecone index building/upsert
        index_start = time.time()
        pinecone_index = build_pinecone_index(text_chunks)
        index_time = time.time() - index_start
        timing_data['pinecone_index_building'] = round(index_time, 2)
        texts = text_chunks

        # Time chunk retrieval for all questions
        retrieval_start = time.time()
        all_chunks = set()
        for i, question in enumerate(request.questions):
            question_start = time.time()
            top_chunks = retrieve_chunks(pinecone_index, texts, question)
            question_time = time.time() - question_start
            all_chunks.update(top_chunks)
        retrieval_time = time.time() - retrieval_start
        timing_data['chunk_retrieval'] = round(retrieval_time, 2)

        print(f"Retrieved {len(all_chunks)} unique chunks")

        # Time LLM processing
        llm_start = time.time()
        print(f"Processing all {len(request.questions)} questions in batch...")
        response = query_gemini(request.questions, list(all_chunks))
        llm_time = time.time() - llm_start
        timing_data['llm_processing'] = round(llm_time, 2)

        # Time response processing
        response_start = time.time()
        if isinstance(response, dict) and "answers" in response:
            answers = response["answers"]
            while len(answers) < len(request.questions):
                answers.append("Not Found")
            answers = answers[:len(request.questions)]
        else:
            answers = [response] if isinstance(response, str) else []
            while len(answers) < len(request.questions):
                answers.append("Not Found")
            answers = answers[:len(request.questions)]

        response_time = time.time() - response_start
        timing_data['response_processing'] = round(response_time, 2)

        print(f"Generated {len(answers)} answers")

        total_time = time.time() - start_time
        timing_data['total_time'] = round(total_time, 2)

        print(f"\n=== TIMING BREAKDOWN ===")
        print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
        print(f"Pinecone Index Building: {timing_data['pinecone_index_building']}s")
        print(f"Chunk Retrieval: {timing_data['chunk_retrieval']}s")
        print(f"LLM Processing: {timing_data['llm_processing']}s")
        print(f"Response Processing: {timing_data['response_processing']}s")
        print(f"TOTAL TIME: {timing_data['total_time']}s")
        print(f"=======================\n")

        result = {"answers": answers}

        print(f"=== OUTPUT JSON ===")
        print(f"{result}")
        print(f"==================\n")

        return result
    except Exception as e:
        total_time = time.time() - start_time
        print(f"Error after {total_time:.2f} seconds: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")

if __name__ == "__main__":
    port = int(os.environ.get("PORT", 7860))
    uvicorn.run("app:app", host="0.0.0.0", port=port)