File size: 2,794 Bytes
c6a18bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# import io
import os

from fastapi import FastAPI, File, HTTPException, UploadFile
from fastapi.responses import JSONResponse
from starlette.responses import FileResponse
from starlette.middleware.cors import CORSMiddleware

# From PIL import Image
from pdftoword import convertPDFtoWORD

# from model import inference


app = FastAPI()

origins = ["http://localhost:3000"]  # Replace with your frontend origin URL

app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Allows all origins
allow_credentials=True,
allow_methods=["*"], # Allows all methods
allow_headers=["*"], # Allows all headers
)


@app.post("/upload")
async def extract_table_data(image: UploadFile = File(...)):
    return f"table ocr is disabled πŸ˜”"
    # try:
    #     # Read image data
    #     image_data = await image.read()

    #     # Open image in memory
    #     image = Image.open(io.BytesIO(image_data))
    #     rgb_img = image.convert("RGB")
    #     rgb_img.save('output.jpg')
    #     image = Image.open('output.jpg')

    #     table_fram= inference(image)
    #     if table_fram.empty:
    #         return "<h2 style=\"color: darkslategrey;\">πŸ’‘ the image has no tables πŸ’‘</h2>"

    #     return table_fram.to_html(escape=True,border=1,index=False).replace('\n', '')

    # except Exception as e:
    #     # Handle and log exceptions appropriately
    #     print(f"Error processing image: {e}")
    #     raise HTTPException(status_code=500, detail="Internal server error")



@app.post("/convert")
async def convert_pdf(docxFile: UploadFile = File(...)):
    uploaded_file = docxFile
    try:
        if not uploaded_file.content_type.startswith("application/pdf"):
            raise HTTPException(415, detail="Unsupported file format. Please upload a PDF file.")

        # Create uploads directory if it doesn't exist
        os.makedirs("uploads", exist_ok=True)

        # Save the uploaded file
        pdf_file_path = os.path.join("uploads", uploaded_file.filename)  
        with open(pdf_file_path, "wb+") as file_object:
            file_object.write(uploaded_file.file.read())

        # Process the PDF 
        docx_path = convertPDFtoWORD(pdf_file_path)

        # remove the uploaded pdf
        os.unlink(pdf_file_path)

        return FileResponse(docx_path, media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document", filename="converted_document.docx")

    except FileNotFoundError as e:
        # Handle case where conversion fails (e.g., missing converter)
        return JSONResponse({"error": "Conversion failed. Please check the converter or file."}, status_code=500)
    except Exception as e:
        # Catch any unexpected errors
        return JSONResponse({"error": f"An unexpected error occurred: {str(e)}"}, status_code=500)