| # configuration_i3.py | |
| from transformers import PretrainedConfig | |
| class I3Config(PretrainedConfig): | |
| model_type = "i3" # unique name for your model | |
| def __init__( | |
| self, | |
| vocab_size=4466, | |
| d_model=512, | |
| n_layers=12, | |
| n_heads=16, | |
| max_seq_len=256, | |
| rank=128, | |
| d_state=64, | |
| **kwargs, | |
| ): | |
| super().__init__(**kwargs) | |
| self.vocab_size = vocab_size | |
| self.d_model = d_model | |
| self.n_layers = n_layers | |
| self.n_heads = n_heads | |
| self.max_seq_len = max_seq_len | |
| self.rank = rank | |
| self.d_state = d_state | |