OCT-to-Markdown / app11.py
Vishal Sharma
changed for russian and malay support
be6eecc
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
import pytesseract
from PIL import Image
import os
app = FastAPI()
# Create directories for uploads and outputs
UPLOAD_DIR = "uploads"
OUTPUT_DIR = "outputs"
os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)
@app.post("/upload/")
async def upload_image(file: UploadFile = File(...)):
file_path = os.path.join(UPLOAD_DIR, file.filename)
with open(file_path, "wb") as f:
f.write(await file.read())
try:
# Perform OCR
image = Image.open(file_path)
text = pytesseract.image_to_string(image)
# Save as Markdown
markdown_path = os.path.join(OUTPUT_DIR, f"{os.path.splitext(file.filename)[0]}.md")
with open(markdown_path, "w", encoding="utf-8") as md_file:
md_file.write(text)
except Exception as e:
return {"error": str(e)}
return {"download_url": f"/download/{os.path.basename(markdown_path)}"}
@app.get("/download/{filename}")
async def download_file(filename: str):
file_path = os.path.join(OUTPUT_DIR, filename)
if os.path.exists(file_path):
return FileResponse(file_path, media_type='text/markdown', filename=filename)
return {"error": "File not found"}
# Serve static files (HTML and assets)
app.mount("/", StaticFiles(directory="static", html=True), name="static")