#!/usr/bin/env python3 """Test PDF conversion API locally""" import requests import time import sys def test_pdf_conversion(pdf_path, api_base_url="http://localhost:7860"): """Test PDF conversion through API""" # 1. Upload PDF print(f"Uploading PDF: {pdf_path}") with open(pdf_path, 'rb') as f: files = {'file': (pdf_path.split('/')[-1], f, 'application/pdf')} response = requests.post(f"{api_base_url}/api/convert", files=files) if response.status_code != 200: print(f"Upload failed: {response.status_code}") print(response.text) return result = response.json() task_id = result['task_id'] print(f"Task ID: {task_id}") print(f"Status: {result['status']}") # 2. Check status print("\nChecking conversion status...") while True: response = requests.get(f"{api_base_url}/api/status/{task_id}") if response.status_code != 200: print(f"Status check failed: {response.status_code}") break status = response.json() print(f"Status: {status['status']}") if status['status'] == 'completed': print(f"Download URL: {status['download_url']}") # 3. Download result response = requests.get(f"{api_base_url}{status['download_url']}") if response.status_code == 200: output_file = f"output_{task_id}.md" with open(output_file, 'w') as f: f.write(response.text) print(f"\nMarkdown saved to: {output_file}") print("\nContent preview:") print(response.text[:500]) break elif status['status'] == 'failed': print(f"Conversion failed: {status.get('error', 'Unknown error')}") break time.sleep(1) if __name__ == "__main__": if len(sys.argv) > 1: pdf_path = sys.argv[1] else: pdf_path = "/Users/marcos/Documents/projects/pdf2md/batch-files/test-simple.pdf" # Test on HF Space print("Testing on Hugging Face Space...") test_pdf_conversion(pdf_path, "https://marcosremar2-mineru2.hf.space")