|
import platform |
|
if platform.system() == "Windows": |
|
print("Windows detected. Assigning cache directory to Transformers in AppData\Local.") |
|
transformers_cache_directory = os.path.join(os.getenv('LOCALAPPDATA'), 'transformers_cache') |
|
if not os.path.exists(transformers_cache_directory): |
|
try: |
|
os.mkdir(transformers_cache_directory) |
|
print(f"First launch. Directory '{transformers_cache_directory}' created successfully.") |
|
except OSError as e: |
|
print(f"Error creating directory '{transformers_cache_directory}': {e}") |
|
else: |
|
print(f"Directory '{transformers_cache_directory}' already exists.") |
|
os.environ['TRANSFORMERS_CACHE'] = transformers_cache_directory |
|
print("Environment variable assigned.") |
|
del transformers_cache_directory |
|
|
|
else: |
|
print("Windows not detected. Assignment of Transformers cache directory not necessary.") |
|
|
|
import io |
|
import os |
|
|
|
from fastapi import FastAPI, File, HTTPException, UploadFile |
|
from fastapi.responses import JSONResponse |
|
from starlette.responses import FileResponse |
|
from starlette.middleware.cors import CORSMiddleware |
|
|
|
from PIL import Image |
|
from pdftoword import convertPDFtoWORD |
|
|
|
from model import inference |
|
|
|
|
|
app = FastAPI() |
|
|
|
origins = ["http://localhost:3000"] |
|
|
|
app.add_middleware( |
|
CORSMiddleware, |
|
allow_origins=["*"], |
|
allow_credentials=True, |
|
allow_methods=["*"], |
|
allow_headers=["*"], |
|
) |
|
|
|
|
|
@app.post("/upload") |
|
async def extract_table_data(image: UploadFile = File(...)): |
|
|
|
try: |
|
|
|
image_data = await image.read() |
|
|
|
|
|
image = Image.open(io.BytesIO(image_data)) |
|
rgb_img = image.convert("RGB") |
|
rgb_img.save('output.jpg') |
|
image = Image.open('output.jpg') |
|
|
|
table_fram= inference(image) |
|
if table_fram.empty: |
|
return "<h2 style=\"color: darkslategrey;\">π‘ the image has no tables π‘</h2>" |
|
|
|
return table_fram.to_html(escape=True,border=1,index=False).replace('\n', '') |
|
|
|
except Exception as e: |
|
|
|
print(f"Error processing image: {e}") |
|
raise HTTPException(status_code=500, detail="Internal server error") |
|
|
|
|
|
|
|
@app.post("/convert") |
|
async def convert_pdf(docxFile: UploadFile = File(...)): |
|
uploaded_file = docxFile |
|
try: |
|
if not uploaded_file.content_type.startswith("application/pdf"): |
|
raise HTTPException(415, detail="Unsupported file format. Please upload a PDF file.") |
|
|
|
|
|
os.makedirs("uploads", exist_ok=True) |
|
|
|
|
|
pdf_file_path = os.path.join("uploads", uploaded_file.filename) |
|
with open(pdf_file_path, "wb+") as file_object: |
|
file_object.write(uploaded_file.file.read()) |
|
|
|
|
|
docx_path = convertPDFtoWORD(pdf_file_path) |
|
|
|
|
|
os.unlink(pdf_file_path) |
|
|
|
return FileResponse(docx_path, media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document", filename="converted_document.docx") |
|
|
|
except FileNotFoundError as e: |
|
|
|
return JSONResponse({"error": "Conversion failed. Please check the converter or file."}, status_code=500) |
|
except Exception as e: |
|
|
|
return JSONResponse({"error": f"An unexpected error occurred: {str(e)}"}, status_code=500) |