import runpod import base64 import fitz # PyMuPDF def handler(job): """Simple PDF to text handler for testing""" try: job_input = job["input"] # Get PDF data from base64 pdf_base64 = job_input.get("pdf_base64") filename = job_input.get("filename", "document.pdf") if not pdf_base64: return {"error": "No PDF data provided", "status": "failed"} # Decode base64 PDF pdf_data = base64.b64decode(pdf_base64) # Extract text using PyMuPDF doc = fitz.open(stream=pdf_data, filetype="pdf") text_content = "" for page_num, page in enumerate(doc): text_content += f"\n\n--- Page {page_num + 1} ---\n\n" text_content += page.get_text() doc.close() # Convert to simple markdown markdown_content = f"# {filename}\n\n" markdown_content += f"*Extracted using PyMuPDF (simplified version)*\n\n" markdown_content += text_content return { "markdown": markdown_content, "filename": filename, "status": "success", "pages": len(doc) } except Exception as e: return { "error": str(e), "status": "failed" } # RunPod serverless entrypoint runpod.serverless.start({"handler": handler})