| from transformers import TrOCRProcessor, VisionEncoderDecoderModel | |
| from PIL import Image | |
| import cv2 | |
| MODEL_NAME = "microsoft/trocr-large-handwritten" | |
| processor = TrOCRProcessor.from_pretrained(MODEL_NAME) | |
| model = VisionEncoderDecoderModel.from_pretrained(MODEL_NAME) | |
| def text(image_cv): | |
| t = "" | |
| for i in image_cv: | |
| img_rgb = cv2.cvtColor(i, cv2.COLOR_BGR2RGB) | |
| image = Image.fromarray(img_rgb) | |
| # image = Image.open(i).convert("RGB") | |
| pixel_values = processor(image, return_tensors="pt").pixel_values | |
| generated_ids = model.generate(pixel_values) | |
| generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| t = t+generated_text.replace(" ", "")+ " " | |
| # print(t) | |
| return t | |