| from transformers import PretrainedConfig | |
| class BilmaConfig(PretrainedConfig): | |
| model_type = "bilma" | |
| def __init__( | |
| self, | |
| weights="US", | |
| include_top = True, | |
| add_head = None, | |
| pooling = None, | |
| num_attention_heads: int = 4, | |
| num_hidden_layers: int = 2, | |
| seq_max_length: int = 280, | |
| hidden_size: int = 512, | |
| vocab_size: int = 29025, | |
| hidden_dropout_prob: float = 0.1, | |
| **kwargs, | |
| ): | |
| countries = ["US"] | |
| poolings = ["mean", "cls", "max"] | |
| if weights not in countries: | |
| raise ValueError(f"`weights` must be one of {countries}, got {weights}.") | |
| if add_head is not None and include_top == True: | |
| raise ValueError(f"To add a head, 'include_top' must be False") | |
| if pooling is not None and include_top == True: | |
| raise ValueError(f"To specify a pooling, 'include_top' must be False") | |
| if pooling is not None and pooling not in poolings: | |
| raise ValueError(f"`pooling` must be one of {poolings}, got {pooling}.") | |
| if weights is not None: | |
| self.weights = weights | |
| self.include_top = include_top | |
| self.add_head = add_head | |
| self.pooling = pooling | |
| self.num_attention_heads = 4 | |
| self.num_hidden_layers = 2 | |
| self.seq_max_length = 280 | |
| self.hidden_size = 512 | |
| self.vocab_size = 29025 | |
| self.hidden_dropout_prob = 0.1 | |
| super().__init__(**kwargs) | |
| return | |
| self.weights = weights | |
| self.include_top = include_top | |
| self.add_head = add_head | |
| self.pooling = pooling | |
| self.num_attention_heads = num_attention_heads | |
| self.num_hidden_layers = num_hidden_layers | |
| self.seq_max_length = seq_max_length | |
| self.hidden_size = hidden_size | |
| self.vocab_size = vocab_size | |
| self.hidden_dropout_prob = hidden_dropout_prob | |
| super().__init__(**kwargs) |