import asyncio import logging from io import BytesIO from fastapi import Depends, HTTPException, UploadFile, status from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer from config import Config from .inferencer import analyze_text_with_sentences, classify_text from .preprocess import parse_docx, parse_pdf, parse_txt security = HTTPBearer() # def build_bias_summary(ai_likelihood: float) -> dict[str, object]: # """Convert an AI likelihood score into a human-readable bias summary.""" # if ai_likelihood > 50: # overall_bias = "AI" # bias_statement = f"The text is biased toward AI-generated writing ({ai_likelihood}% AI likelihood)." # elif ai_likelihood < 50: # overall_bias = "Human" # bias_statement = f"The text is biased toward human writing ({100 - ai_likelihood}% human likelihood)." # else: # overall_bias = "Balanced" # bias_statement = "The text is balanced between AI and human writing." # return { # "overall_bias": overall_bias, # "bias_statement": bias_statement, # } # Verify Bearer token from Authorization header async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)): token = credentials.credentials expected_token = Config.SECRET_TOKEN if token != expected_token: raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, detail="Invalid or expired token" ) return token # Classify plain text input async def handle_text_analysis(text: str): text = text.strip() if not text or len(text.split()) < 10: raise HTTPException( status_code=400, detail="Text must contain at least 10 words" ) if len(text) > 50000: raise HTTPException( status_code=413, detail="Text must be less than 50,000 characters" ) label, perplexity, ai_likelihood = await asyncio.to_thread(classify_text, text) # bias_summary = build_bias_summary(ai_likelihood) return { "result": label, "perplexity": round(perplexity, 2), "ai_likelihood": ai_likelihood, } # Extract text from uploaded files (.docx, .pdf, .txt) async def extract_file_contents(file: UploadFile) -> str: content = await file.read() file_stream = BytesIO(content) if ( file.content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" ): return parse_docx(file_stream) elif file.content_type == "application/pdf": return parse_pdf(file_stream) elif file.content_type == "text/plain": return parse_txt(file_stream) else: raise HTTPException( status_code=415, detail="Invalid file type. Only .docx, .pdf and .txt are allowed.", ) # Classify text from uploaded file async def handle_file_upload(file: UploadFile): try: file_contents = await extract_file_contents(file) logging.info(f"Extracted text length: {len(file_contents)} characters") if len(file_contents) > 50000: return { "status_code": 413, "detail": "Text must be less than 50,000 characters", } cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip() if not cleaned_text: raise HTTPException( status_code=400, detail="The uploaded file is empty or only contains whitespace.", ) # print(f"Cleaned text: '{cleaned_text}'") # Debugging statement label, perplexity, ai_likelihood = await asyncio.to_thread( classify_text, cleaned_text ) return { "content": file_contents, "result": label, "perplexity": round(perplexity, 2), "ai_likelihood": ai_likelihood, } except Exception as e: logging.error(f"Error processing file: {e}") raise HTTPException(status_code=500, detail="Error processing the file") async def handle_sentence_level_analysis(text: str): text = text.strip() if not text or len(text.split()) < 10: raise HTTPException( status_code=400, detail="Text must contain at least 10 words" ) if len(text) > 50000: raise HTTPException( status_code=413, detail="Text must be less than 50,000 characters" ) result = await asyncio.to_thread(analyze_text_with_sentences, text) return result # Analyze each sentence from uploaded file async def handle_file_sentence(file: UploadFile): try: file_contents = await extract_file_contents(file) if len(file_contents) > 50000: # raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters") return { "status_code": 413, "detail": "Text must be less than 50,000 characters", } cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip() if not cleaned_text: raise HTTPException( status_code=400, detail="The uploaded file is empty or only contains whitespace.", ) result = await handle_sentence_level_analysis(cleaned_text) return {"content": file_contents, **result} except HTTPException: raise except Exception as e: logging.error(f"Error processing file: {e}") raise HTTPException(status_code=500, detail="Error processing the file") def classify(text: str): return classify_text(text)