File size: 1,217 Bytes
1f680d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from flask import Flask, request, jsonify
import os, tempfile, requests
from pdf_extract_kit.utils.config_loader import load_config, initialize_tasks_and_models

app = Flask(__name__)

config = load_config("configs/layout_detection.yaml")
tasks = initialize_tasks_and_models(config)
layout_task = tasks["layout_detection"]

@app.route("/extract")
def extract():
    pdf_url = request.args.get("pdf_url")
    if not pdf_url:
        return jsonify({"error": "Missing pdf_url"}), 400

    try:
        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
            r = requests.get(pdf_url)
            tmp_file.write(r.content)
            tmp_pdf_path = tmp_file.name

        output_dir = "/tmp/layout_result"
        os.makedirs(output_dir, exist_ok=True)

        layout_task.predict_pdfs(tmp_pdf_path, output_dir)

        results = []
        for fname in os.listdir(output_dir):
            if fname.endswith(".png"):
                results.append({
                    "caption": f"Figure extracted from {fname}",
                    "url": f"/files/{fname}"
                })
        return jsonify(results)

    except Exception as e:
        return jsonify({"error": str(e)}), 500