Spaces:
No application file
No application file
File size: 837 Bytes
668bf5d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
import pytesseract
from PIL import Image
import os
from concurrent.futures import ProcessPoolExecutor
def ocr_frame(path):
fname = os.path.basename(path)
img = Image.open(path)
text = pytesseract.image_to_string(img, lang='kor+eng')
return {"frame": fname, "text": text.strip()}
def ocr_all_frames(frame_dir):
"""
Args:
frame_dir (str): 프레임 이미지(.jpg)들이 저장된 폴더 경로
Returns:
List[Dict[str, str]]: [{'frame': 파일명, 'text': OCR결과}, ...]
"""
img_paths = sorted([
os.path.join(frame_dir, fname)
for fname in os.listdir(frame_dir)
if fname.endswith(".jpg")
])
with ProcessPoolExecutor() as executor:
ocr_results = list(executor.map(ocr_frame, img_paths))
return ocr_results
|