Spaces:

point9
/

Deberta

Sleeping

File size: 2,255 Bytes

7f02e38

# models/deberta_model.py

import torch.nn as nn
from transformers import DebertaModel
from config import DROPOUT_RATE, DEBERTA_MODEL_NAME # Import DEBERTA_MODEL_NAME

class DebertaMultiOutputModel(nn.Module):
    """
    DeBERTa-based model for multi-output classification.
    Similar structure to the BERT model, using a pre-trained DeBERTa model
    as the backbone for text feature extraction.
    """
    # Statically set tokenizer name for easy access in main.py
    tokenizer_name = DEBERTA_MODEL_NAME

    def __init__(self, num_labels):
        """
        Initializes the DebertaMultiOutputModel.

        Args:
            num_labels (list): A list where each element is the number of classes
                                for a corresponding label column.
        """
        super(DebertaMultiOutputModel, self).__init__()
        # Load the pre-trained DeBERTa model.
        # DeBERTa models typically also provide a 'pooler_output' which is suitable for classification.
        self.deberta = DebertaModel.from_pretrained(DEBERTA_MODEL_NAME)
        self.dropout = nn.Dropout(DROPOUT_RATE) # Dropout layer for regularization

        # Create classification heads for each label column.
        # Each head maps DeBERTa's pooled output size to the number of classes for that label.
        self.classifiers = nn.ModuleList([
            nn.Linear(self.deberta.config.hidden_size, n_classes) for n_classes in num_labels
        ])

    def forward(self, input_ids, attention_mask):
        """
        Performs the forward pass of the model.

        Args:
            input_ids (torch.Tensor): Tensor of token IDs.
            attention_mask (torch.Tensor): Tensor indicating attention.

        Returns:
            list: A list of logit tensors, one for each classification head.
        """
        # Pass input_ids and attention_mask through DeBERTa.
        # .pooler_output is used here, similar to BERT.
        pooled_output = self.deberta(input_ids=input_ids, attention_mask=attention_mask).pooler_output

        # Apply dropout
        pooled_output = self.dropout(pooled_output)

        # Pass the pooled output through each classification head.
        return [classifier(pooled_output) for classifier in self.classifiers]