Spaces:
No application file
No application file
import pytesseract | |
from PIL import Image | |
import os | |
from concurrent.futures import ProcessPoolExecutor | |
def ocr_frame(path): | |
fname = os.path.basename(path) | |
img = Image.open(path) | |
text = pytesseract.image_to_string(img, lang='kor+eng') | |
return {"frame": fname, "text": text.strip()} | |
def ocr_all_frames(frame_dir): | |
""" | |
Args: | |
frame_dir (str): ํ๋ ์ ์ด๋ฏธ์ง(.jpg)๋ค์ด ์ ์ฅ๋ ํด๋ ๊ฒฝ๋ก | |
Returns: | |
List[Dict[str, str]]: [{'frame': ํ์ผ๋ช , 'text': OCR๊ฒฐ๊ณผ}, ...] | |
""" | |
img_paths = sorted([ | |
os.path.join(frame_dir, fname) | |
for fname in os.listdir(frame_dir) | |
if fname.endswith(".jpg") | |
]) | |
with ProcessPoolExecutor() as executor: | |
ocr_results = list(executor.map(ocr_frame, img_paths)) | |
return ocr_results | |