from typing import Optional

from transformers import AutoTokenizer
from transformers.processing_utils import ProcessorMixin

from .image_processing_m2_encoder import M2EncoderImageProcessor


class M2EncoderProcessor(ProcessorMixin):
    attributes = ["image_processor", "tokenizer"]
    image_processor_class = "M2EncoderImageProcessor"
    tokenizer_class = ("GLMChineseTokenizer", None)

    def __init__(self, image_processor, tokenizer):
        self.image_processor = image_processor
        self.tokenizer = tokenizer

    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
        trust_remote_code = kwargs.pop("trust_remote_code", True)
        image_processor = M2EncoderImageProcessor.from_pretrained(
            pretrained_model_name_or_path, **kwargs
        )
        tokenizer = AutoTokenizer.from_pretrained(
            pretrained_model_name_or_path,
            trust_remote_code=trust_remote_code,
            **kwargs,
        )
        return cls(image_processor=image_processor, tokenizer=tokenizer)

    def __call__(
        self,
        text=None,
        images=None,
        padding="max_length",
        truncation=True,
        max_length: Optional[int] = 52,
        return_tensors=None,
        **kwargs,
    ):
        encoding = {}
        if text is not None:
            encoding.update(
                self.tokenizer(
                    text,
                    padding=padding,
                    truncation=truncation,
                    max_length=max_length,
                    return_special_tokens_mask=True,
                    return_tensors=return_tensors,
                    **kwargs,
                )
            )
        if images is not None:
            encoding.update(
                self.image_processor(images, return_tensors=return_tensors, **kwargs)
            )
        return encoding