File size: 2,139 Bytes
6e8141b
 
 
 
 
 
 
 
 
be6eecc
6e8141b
 
 
 
 
be6eecc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6e8141b
 
 
 
 
 
 
 
 
be6eecc
 
 
 
 
6e8141b
be6eecc
6e8141b
 
be6eecc
 
6e8141b
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
import pytesseract
from PIL import Image
import os

app = FastAPI()

# Directories for uploads and outputs
UPLOAD_DIR = "uploads"
OUTPUT_DIR = "outputs"
os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Supported languages and their codes
SUPPORTED_LANGUAGES = {
    "eng": "English",
    "rus": "Russian",
    "msa": "Malay",
}

def detect_language(text):
    """Detect the language from the extracted text."""
    from langdetect import detect, DetectorFactory
    DetectorFactory.seed = 0  # Ensures consistent results
    try:
        detected_lang = detect(text)
        return detected_lang
    except Exception:
        return "unknown"

@app.post("/upload/")
async def upload_image(file: UploadFile = File(...)):
    file_path = os.path.join(UPLOAD_DIR, file.filename)
    with open(file_path, "wb") as f:
        f.write(await file.read())

    try:
        # Perform OCR
        image = Image.open(file_path)
        # Specify languages to be used for OCR
        text = pytesseract.image_to_string(image, lang='+'.join(SUPPORTED_LANGUAGES.keys()))
        
        # Detect language of the extracted text
        detected_lang = detect_language(text)

        # Save OCR result as Markdown
        markdown_path = os.path.join(OUTPUT_DIR, f"{os.path.splitext(file.filename)[0]}.md")
        with open(markdown_path, "w", encoding="utf-8") as md_file:
            md_file.write(f"# Detected Language: {detected_lang}\n\n{text}")

    except Exception as e:
        return {"error": str(e)}

    return {"download_url": f"/download/{os.path.basename(markdown_path)}"}

@app.get("/download/{filename}")
async def download_file(filename: str):
    file_path = os.path.join(OUTPUT_DIR, filename)
    if os.path.exists(file_path):
        return FileResponse(file_path, media_type='text/markdown', filename=filename)
    return {"error": "File not found"}

# Serve static files (HTML and assets)
app.mount("/", StaticFiles(directory="static", html=True), name="static")