tableocr / main.py
ahmedzein's picture
Upload 7 files
c6a18bd verified
raw
history blame
2.79 kB
# import io
import os
from fastapi import FastAPI, File, HTTPException, UploadFile
from fastapi.responses import JSONResponse
from starlette.responses import FileResponse
from starlette.middleware.cors import CORSMiddleware
# From PIL import Image
from pdftoword import convertPDFtoWORD
# from model import inference
app = FastAPI()
origins = ["http://localhost:3000"] # Replace with your frontend origin URL
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Allows all origins
allow_credentials=True,
allow_methods=["*"], # Allows all methods
allow_headers=["*"], # Allows all headers
)
@app.post("/upload")
async def extract_table_data(image: UploadFile = File(...)):
return f"table ocr is disabled πŸ˜”"
# try:
# # Read image data
# image_data = await image.read()
# # Open image in memory
# image = Image.open(io.BytesIO(image_data))
# rgb_img = image.convert("RGB")
# rgb_img.save('output.jpg')
# image = Image.open('output.jpg')
# table_fram= inference(image)
# if table_fram.empty:
# return "<h2 style=\"color: darkslategrey;\">πŸ’‘ the image has no tables πŸ’‘</h2>"
# return table_fram.to_html(escape=True,border=1,index=False).replace('\n', '')
# except Exception as e:
# # Handle and log exceptions appropriately
# print(f"Error processing image: {e}")
# raise HTTPException(status_code=500, detail="Internal server error")
@app.post("/convert")
async def convert_pdf(docxFile: UploadFile = File(...)):
uploaded_file = docxFile
try:
if not uploaded_file.content_type.startswith("application/pdf"):
raise HTTPException(415, detail="Unsupported file format. Please upload a PDF file.")
# Create uploads directory if it doesn't exist
os.makedirs("uploads", exist_ok=True)
# Save the uploaded file
pdf_file_path = os.path.join("uploads", uploaded_file.filename)
with open(pdf_file_path, "wb+") as file_object:
file_object.write(uploaded_file.file.read())
# Process the PDF
docx_path = convertPDFtoWORD(pdf_file_path)
# remove the uploaded pdf
os.unlink(pdf_file_path)
return FileResponse(docx_path, media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document", filename="converted_document.docx")
except FileNotFoundError as e:
# Handle case where conversion fails (e.g., missing converter)
return JSONResponse({"error": "Conversion failed. Please check the converter or file."}, status_code=500)
except Exception as e:
# Catch any unexpected errors
return JSONResponse({"error": f"An unexpected error occurred: {str(e)}"}, status_code=500)