|
from transformers import TrOCRProcessor, VisionEncoderDecoderModel |
|
from PIL import Image |
|
import cv2 |
|
import os |
|
import torch |
|
|
|
|
|
MODEL_NAME = "microsoft/trocr-large-handwritten" |
|
processor = None |
|
model = None |
|
|
|
def initialize_model(): |
|
global processor, model |
|
if processor is None or model is None: |
|
try: |
|
processor = TrOCRProcessor.from_pretrained(MODEL_NAME) |
|
model = VisionEncoderDecoderModel.from_pretrained(MODEL_NAME) |
|
if torch.cuda.is_available(): |
|
model = model.to('cuda') |
|
except Exception as e: |
|
print(f"Error initializing model: {str(e)}") |
|
raise |
|
|
|
def text(image_cv): |
|
try: |
|
|
|
initialize_model() |
|
|
|
t = "" |
|
for i in image_cv: |
|
try: |
|
|
|
img_rgb = cv2.cvtColor(i, cv2.COLOR_BGR2RGB) |
|
image = Image.fromarray(img_rgb) |
|
|
|
|
|
pixel_values = processor(image, return_tensors="pt").pixel_values |
|
if torch.cuda.is_available(): |
|
pixel_values = pixel_values.to('cuda') |
|
|
|
generated_ids = model.generate(pixel_values) |
|
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] |
|
t = t + generated_text.replace(" ", "") + " " |
|
|
|
except Exception as e: |
|
print(f"Error processing image: {str(e)}") |
|
continue |
|
|
|
return t.strip() |
|
except Exception as e: |
|
print(f"Error in text function: {str(e)}") |
|
return "" |
|
|
|
|