File size: 1,424 Bytes
4112422
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import runpod
import base64
import fitz  # PyMuPDF

def handler(job):
    """Simple PDF to text handler for testing"""
    try:
        job_input = job["input"]
        
        # Get PDF data from base64
        pdf_base64 = job_input.get("pdf_base64")
        filename = job_input.get("filename", "document.pdf")
        
        if not pdf_base64:
            return {"error": "No PDF data provided", "status": "failed"}
        
        # Decode base64 PDF
        pdf_data = base64.b64decode(pdf_base64)
        
        # Extract text using PyMuPDF
        doc = fitz.open(stream=pdf_data, filetype="pdf")
        text_content = ""
        
        for page_num, page in enumerate(doc):
            text_content += f"\n\n--- Page {page_num + 1} ---\n\n"
            text_content += page.get_text()
        
        doc.close()
        
        # Convert to simple markdown
        markdown_content = f"# {filename}\n\n"
        markdown_content += f"*Extracted using PyMuPDF (simplified version)*\n\n"
        markdown_content += text_content
        
        return {
            "markdown": markdown_content,
            "filename": filename,
            "status": "success",
            "pages": len(doc)
        }
        
    except Exception as e:
        return {
            "error": str(e),
            "status": "failed"
        }

# RunPod serverless entrypoint
runpod.serverless.start({"handler": handler})