|
from flask import Flask, request, jsonify |
|
import os, tempfile, requests |
|
from pdf_extract_kit.utils.config_loader import load_config, initialize_tasks_and_models |
|
|
|
app = Flask(__name__) |
|
|
|
config = load_config("configs/layout_detection.yaml") |
|
tasks = initialize_tasks_and_models(config) |
|
layout_task = tasks["layout_detection"] |
|
|
|
@app.route("/extract") |
|
def extract(): |
|
pdf_url = request.args.get("pdf_url") |
|
if not pdf_url: |
|
return jsonify({"error": "Missing pdf_url"}), 400 |
|
|
|
try: |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: |
|
r = requests.get(pdf_url) |
|
tmp_file.write(r.content) |
|
tmp_pdf_path = tmp_file.name |
|
|
|
output_dir = "/tmp/layout_result" |
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
layout_task.predict_pdfs(tmp_pdf_path, output_dir) |
|
|
|
results = [] |
|
for fname in os.listdir(output_dir): |
|
if fname.endswith(".png"): |
|
results.append({ |
|
"caption": f"Figure extracted from {fname}", |
|
"url": f"/files/{fname}" |
|
}) |
|
return jsonify(results) |
|
|
|
except Exception as e: |
|
return jsonify({"error": str(e)}), 500 |
|
|