File size: 837 Bytes
668bf5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import pytesseract
from PIL import Image
import os
from concurrent.futures import ProcessPoolExecutor

def ocr_frame(path):
    fname = os.path.basename(path)
    img = Image.open(path)
    text = pytesseract.image_to_string(img, lang='kor+eng')
    return {"frame": fname, "text": text.strip()}


def ocr_all_frames(frame_dir):
    """

    Args:

        frame_dir (str): 프레임 이미지(.jpg)들이 저장된 폴더 경로



    Returns:

        List[Dict[str, str]]: [{'frame': 파일명, 'text': OCR결과}, ...]

    """
    img_paths = sorted([
        os.path.join(frame_dir, fname)
        for fname in os.listdir(frame_dir)
        if fname.endswith(".jpg")
    ])

    with ProcessPoolExecutor() as executor:
        ocr_results = list(executor.map(ocr_frame, img_paths))

    return ocr_results