changgyu's picture
Upload 19 files
668bf5d verified
raw
history blame contribute delete
837 Bytes
import pytesseract
from PIL import Image
import os
from concurrent.futures import ProcessPoolExecutor
def ocr_frame(path):
fname = os.path.basename(path)
img = Image.open(path)
text = pytesseract.image_to_string(img, lang='kor+eng')
return {"frame": fname, "text": text.strip()}
def ocr_all_frames(frame_dir):
"""
Args:
frame_dir (str): ํ”„๋ ˆ์ž„ ์ด๋ฏธ์ง€(.jpg)๋“ค์ด ์ €์žฅ๋œ ํด๋” ๊ฒฝ๋กœ
Returns:
List[Dict[str, str]]: [{'frame': ํŒŒ์ผ๋ช…, 'text': OCR๊ฒฐ๊ณผ}, ...]
"""
img_paths = sorted([
os.path.join(frame_dir, fname)
for fname in os.listdir(frame_dir)
if fname.endswith(".jpg")
])
with ProcessPoolExecutor() as executor:
ocr_results = list(executor.map(ocr_frame, img_paths))
return ocr_results