File size: 2,758 Bytes
3474fe8
 
 
 
 
 
 
 
 
 
 
 
 
c6a18bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf0f2e0
 
 
 
 
 
 
a3691b9
 
 
cf0f2e0
 
 
 
 
 
 
 
 
 
 
c6a18bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import io
import os

from fastapi import FastAPI, File, HTTPException, UploadFile
from fastapi.responses import JSONResponse
from starlette.responses import FileResponse
from starlette.middleware.cors import CORSMiddleware

from PIL import Image
from pdftoword import convertPDFtoWORD

from model import inference


app = FastAPI()

origins = ["http://localhost:3000"]  # Replace with your frontend origin URL

app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Allows all origins
allow_credentials=True,
allow_methods=["*"], # Allows all methods
allow_headers=["*"], # Allows all headers
)


@app.post("/upload")
async def extract_table_data(image: UploadFile = File(...)):
    # return f"table ocr is disabled πŸ˜”"
    try:
        # Read image data
        image_data = await image.read()

        # Open image in memory
        image = Image.open(io.BytesIO(image_data))
        image = image.convert("RGB")
        #rgb_img.save('output.jpg')
        #image = Image.open('output.jpg')

        table_fram= inference(image)
        if table_fram.empty:
            return "<h2 style=\"color: darkslategrey;\">πŸ’‘ the image has no tables πŸ’‘</h2>"

        return table_fram.to_html(escape=True,border=1,index=False).replace('\n', '')

    except Exception as e:
        # Handle and log exceptions appropriately
        print(f"Error processing image: {e}")
        raise HTTPException(status_code=500, detail="Internal server error")



@app.post("/convert")
async def convert_pdf(docxFile: UploadFile = File(...)):
    uploaded_file = docxFile
    try:
        if not uploaded_file.content_type.startswith("application/pdf"):
            raise HTTPException(415, detail="Unsupported file format. Please upload a PDF file.")

        # Create uploads directory if it doesn't exist
        os.makedirs("uploads", exist_ok=True)

        # Save the uploaded file
        pdf_file_path = os.path.join("uploads", uploaded_file.filename)  
        with open(pdf_file_path, "wb+") as file_object:
            file_object.write(uploaded_file.file.read())

        # Process the PDF 
        docx_path = convertPDFtoWORD(pdf_file_path)

        # remove the uploaded pdf
        os.unlink(pdf_file_path)

        return FileResponse(docx_path, media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document", filename="converted_document.docx")

    except FileNotFoundError as e:
        # Handle case where conversion fails (e.g., missing converter)
        return JSONResponse({"error": "Conversion failed. Please check the converter or file."}, status_code=500)
    except Exception as e:
        # Catch any unexpected errors
        return JSONResponse({"error": f"An unexpected error occurred: {str(e)}"}, status_code=500)