from transformers import TrOCRProcessor, VisionEncoderDecoderModel from PIL import Image import cv2 import os import torch import sys sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from main import notification_queue # Initialize model and processor at module load time MODEL_NAME = "microsoft/trocr-large-handwritten" try: notification_queue.put({ "type": "info", "message": "Initializing TrOCR model..." }) processor = TrOCRProcessor.from_pretrained(MODEL_NAME) model = VisionEncoderDecoderModel.from_pretrained(MODEL_NAME) if torch.cuda.is_available(): model = model.to('cuda') notification_queue.put({ "type": "success", "message": "TrOCR model initialized successfully" }) except Exception as e: error_msg = str(e) notification_queue.put({ "type": "error", "message": f"Error initializing TrOCR model: {error_msg}" }) processor = None model = None def text(image_cv): try: # Check if model is initialized if processor is None or model is None: notification_queue.put({ "type": "error", "message": "TrOCR model not initialized properly" }) return "" if not isinstance(image_cv, list): image_cv = [image_cv] t = "" total_images = len(image_cv) notification_queue.put({ "type": "info", "message": f"Processing {total_images} image(s)..." }) for i in image_cv: img_rgb = cv2.cvtColor(i, cv2.COLOR_BGR2RGB) image = Image.fromarray(img_rgb) pixel_values = processor(image, return_tensors="pt").pixel_values generated_ids = model.generate(pixel_values) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] t = t + generated_text.replace(" ", "") + " " return t except Exception as e: error_msg = str(e) notification_queue.put({ "type": "error", "message": f"Error in text function: {error_msg}" }) return ""