from flask import Flask, request, jsonify import os, tempfile, requests from pdf_extract_kit.utils.config_loader import load_config, initialize_tasks_and_models app = Flask(__name__) config = load_config("configs/layout_detection.yaml") tasks = initialize_tasks_and_models(config) layout_task = tasks["layout_detection"] @app.route("/extract") def extract(): pdf_url = request.args.get("pdf_url") if not pdf_url: return jsonify({"error": "Missing pdf_url"}), 400 try: with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: r = requests.get(pdf_url) tmp_file.write(r.content) tmp_pdf_path = tmp_file.name output_dir = "/tmp/layout_result" os.makedirs(output_dir, exist_ok=True) layout_task.predict_pdfs(tmp_pdf_path, output_dir) results = [] for fname in os.listdir(output_dir): if fname.endswith(".png"): results.append({ "caption": f"Figure extracted from {fname}", "url": f"/files/{fname}" }) return jsonify(results) except Exception as e: return jsonify({"error": str(e)}), 500