Spaces:
Sleeping
Sleeping
| import pickle | |
| import re | |
| from PIL import Image | |
| from transformers import pipeline | |
| import io | |
| def clean_text(text): | |
| clean_text = re.sub(r'<[^>]+>', '', text) | |
| clean_text = clean_text.strip() | |
| clean_text = re.sub(r'\s+', ' ', clean_text) | |
| return clean_text | |
| pipe = pipeline("image-to-text", model="jinhybr/OCR-Donut-CORD") | |
| def extract_text(binary_image): | |
| image = Image.open(io.BytesIO(binary_image)) | |
| result = pipe(image) | |
| text = result[0]['generated_text'] | |
| cleaned_text = clean_text(text) | |
| return cleaned_text | |
| # print(extract_text(open("pictures/users/2.jpg", "rb").read())) | |
| print("OCR pipeline loaded successfully!") |