yamanavijayavardhan's picture
update_new_new
26f855a
raw
history blame
1.76 kB
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image
import cv2
import os
import torch
# Initialize model and processor globally
MODEL_NAME = "microsoft/trocr-large-handwritten"
processor = None
model = None
def initialize_model():
global processor, model
if processor is None or model is None:
try:
processor = TrOCRProcessor.from_pretrained(MODEL_NAME)
model = VisionEncoderDecoderModel.from_pretrained(MODEL_NAME)
if torch.cuda.is_available():
model = model.to('cuda')
except Exception as e:
print(f"Error initializing model: {str(e)}")
raise
def text(image_cv):
try:
# Initialize model if not already done
initialize_model()
t = ""
for i in image_cv:
try:
# Convert BGR to RGB
img_rgb = cv2.cvtColor(i, cv2.COLOR_BGR2RGB)
image = Image.fromarray(img_rgb)
# Process image
pixel_values = processor(image, return_tensors="pt").pixel_values
if torch.cuda.is_available():
pixel_values = pixel_values.to('cuda')
generated_ids = model.generate(pixel_values)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
t = t + generated_text.replace(" ", "") + " "
except Exception as e:
print(f"Error processing image: {str(e)}")
continue
return t.strip()
except Exception as e:
print(f"Error in text function: {str(e)}")
return ""