# models/deberta_model.py import torch.nn as nn from transformers import DebertaModel from config import DROPOUT_RATE, DEBERTA_MODEL_NAME # Import DEBERTA_MODEL_NAME class DebertaMultiOutputModel(nn.Module): """ DeBERTa-based model for multi-output classification. Similar structure to the BERT model, using a pre-trained DeBERTa model as the backbone for text feature extraction. """ # Statically set tokenizer name for easy access in main.py tokenizer_name = DEBERTA_MODEL_NAME def __init__(self, num_labels): """ Initializes the DebertaMultiOutputModel. Args: num_labels (list): A list where each element is the number of classes for a corresponding label column. """ super(DebertaMultiOutputModel, self).__init__() # Load the pre-trained DeBERTa model. # DeBERTa models typically also provide a 'pooler_output' which is suitable for classification. self.deberta = DebertaModel.from_pretrained(DEBERTA_MODEL_NAME) self.dropout = nn.Dropout(DROPOUT_RATE) # Dropout layer for regularization # Create classification heads for each label column. # Each head maps DeBERTa's pooled output size to the number of classes for that label. self.classifiers = nn.ModuleList([ nn.Linear(self.deberta.config.hidden_size, n_classes) for n_classes in num_labels ]) def forward(self, input_ids, attention_mask): """ Performs the forward pass of the model. Args: input_ids (torch.Tensor): Tensor of token IDs. attention_mask (torch.Tensor): Tensor indicating attention. Returns: list: A list of logit tensors, one for each classification head. """ # Pass input_ids and attention_mask through DeBERTa. # .pooler_output is used here, similar to BERT. pooled_output = self.deberta(input_ids=input_ids, attention_mask=attention_mask).pooler_output # Apply dropout pooled_output = self.dropout(pooled_output) # Pass the pooled output through each classification head. return [classifier(pooled_output) for classifier in self.classifiers]