Spaces:

ahmedzein
/

tableocr

Sleeping

File size: 2,794 Bytes

c6a18bd

# import io
import os

from fastapi import FastAPI, File, HTTPException, UploadFile
from fastapi.responses import JSONResponse
from starlette.responses import FileResponse
from starlette.middleware.cors import CORSMiddleware

# From PIL import Image
from pdftoword import convertPDFtoWORD

# from model import inference


app = FastAPI()

origins = ["http://localhost:3000"]  # Replace with your frontend origin URL

app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Allows all origins
allow_credentials=True,
allow_methods=["*"], # Allows all methods
allow_headers=["*"], # Allows all headers
)


@app.post("/upload")
async def extract_table_data(image: UploadFile = File(...)):
    return f"table ocr is disabled 😔"
    # try:
    #     # Read image data
    #     image_data = await image.read()

    #     # Open image in memory
    #     image = Image.open(io.BytesIO(image_data))
    #     rgb_img = image.convert("RGB")
    #     rgb_img.save('output.jpg')
    #     image = Image.open('output.jpg')

    #     table_fram= inference(image)
    #     if table_fram.empty:
    #         return "<h2 style=\"color: darkslategrey;\">💡 the image has no tables 💡</h2>"

    #     return table_fram.to_html(escape=True,border=1,index=False).replace('\n', '')

    # except Exception as e:
    #     # Handle and log exceptions appropriately
    #     print(f"Error processing image: {e}")
    #     raise HTTPException(status_code=500, detail="Internal server error")



@app.post("/convert")
async def convert_pdf(docxFile: UploadFile = File(...)):
    uploaded_file = docxFile
    try:
        if not uploaded_file.content_type.startswith("application/pdf"):
            raise HTTPException(415, detail="Unsupported file format. Please upload a PDF file.")

        # Create uploads directory if it doesn't exist
        os.makedirs("uploads", exist_ok=True)

        # Save the uploaded file
        pdf_file_path = os.path.join("uploads", uploaded_file.filename)  
        with open(pdf_file_path, "wb+") as file_object:
            file_object.write(uploaded_file.file.read())

        # Process the PDF 
        docx_path = convertPDFtoWORD(pdf_file_path)

        # remove the uploaded pdf
        os.unlink(pdf_file_path)

        return FileResponse(docx_path, media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document", filename="converted_document.docx")

    except FileNotFoundError as e:
        # Handle case where conversion fails (e.g., missing converter)
        return JSONResponse({"error": "Conversion failed. Please check the converter or file."}, status_code=500)
    except Exception as e:
        # Catch any unexpected errors
        return JSONResponse({"error": f"An unexpected error occurred: {str(e)}"}, status_code=500)