from transformers import TrOCRProcessor, VisionEncoderDecoderModel from PIL import Image import cv2 import os import torch import sys sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from utils import notification_queue, log_print from all_models import models def text(image_cv): try: # Get model instance from singleton model, processor = models.get_trocr_model() if not isinstance(image_cv, list): image_cv = [image_cv] t = "" total_images = len(image_cv) log_print(f"Processing {total_images} image(s) for text extraction") for i, img in enumerate(image_cv): try: log_print(f"Processing image {i+1}/{total_images}") # Validate image if img is None: log_print(f"Skipping image {i+1} - Image is None", "WARNING") continue # Convert to RGB img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) image = Image.fromarray(img_rgb) # Get pixel values pixel_values = processor(image, return_tensors="pt").pixel_values if torch.cuda.is_available(): pixel_values = pixel_values.to(models.device) # Generate text generated_ids = model.generate(pixel_values) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] # Clean up the text cleaned_text = generated_text.replace(" ", "") t = t + cleaned_text + " " log_print(f"Successfully extracted text from image {i+1}: {cleaned_text}") # Clean up CUDA memory if torch.cuda.is_available(): del pixel_values del generated_ids torch.cuda.empty_cache() except Exception as e: log_print(f"Error processing image {i+1}: {str(e)}", "ERROR") continue return t.strip() except Exception as e: error_msg = f"Error in text function: {str(e)}" log_print(error_msg, "ERROR") notification_queue.put({ "type": "error", "message": error_msg }) return "" finally: # Release model reference models.release_trocr_model()