Spaces:

ahmedzein
/

tableocr

Sleeping

App Files Files Community

tableocr / main.py

ahmedzein

Upload 7 files

c6a18bd verified about 1 year ago

raw

history blame

2.79 kB

	# import io
	import os

	from fastapi import FastAPI, File, HTTPException, UploadFile
	from fastapi.responses import JSONResponse
	from starlette.responses import FileResponse
	from starlette.middleware.cors import CORSMiddleware

	# From PIL import Image
	from pdftoword import convertPDFtoWORD

	# from model import inference


	app = FastAPI()

	origins = ["http://localhost:3000"] # Replace with your frontend origin URL

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"], # Allows all origins
	allow_credentials=True,
	allow_methods=["*"], # Allows all methods
	allow_headers=["*"], # Allows all headers
	)


	@app.post("/upload")
	async def extract_table_data(image: UploadFile = File(...)):
	return f"table ocr is disabled 😔"
	# try:
	# # Read image data
	# image_data = await image.read()

	# # Open image in memory
	# image = Image.open(io.BytesIO(image_data))
	# rgb_img = image.convert("RGB")
	# rgb_img.save('output.jpg')
	# image = Image.open('output.jpg')

	# table_fram= inference(image)
	# if table_fram.empty:
	# return "<h2 style=\"color: darkslategrey;\">💡 the image has no tables 💡</h2>"

	# return table_fram.to_html(escape=True,border=1,index=False).replace('\n', '')

	# except Exception as e:
	# # Handle and log exceptions appropriately
	# print(f"Error processing image: {e}")
	# raise HTTPException(status_code=500, detail="Internal server error")



	@app.post("/convert")
	async def convert_pdf(docxFile: UploadFile = File(...)):
	uploaded_file = docxFile
	try:
	if not uploaded_file.content_type.startswith("application/pdf"):
	raise HTTPException(415, detail="Unsupported file format. Please upload a PDF file.")

	# Create uploads directory if it doesn't exist
	os.makedirs("uploads", exist_ok=True)

	# Save the uploaded file
	pdf_file_path = os.path.join("uploads", uploaded_file.filename)
	with open(pdf_file_path, "wb+") as file_object:
	file_object.write(uploaded_file.file.read())

	# Process the PDF
	docx_path = convertPDFtoWORD(pdf_file_path)

	# remove the uploaded pdf
	os.unlink(pdf_file_path)

	return FileResponse(docx_path, media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document", filename="converted_document.docx")

	except FileNotFoundError as e:
	# Handle case where conversion fails (e.g., missing converter)
	return JSONResponse({"error": "Conversion failed. Please check the converter or file."}, status_code=500)
	except Exception as e:
	# Catch any unexpected errors
	return JSONResponse({"error": f"An unexpected error occurred: {str(e)}"}, status_code=500)