OrcaleSeek / model_config.py
prelington's picture
Create model_config.py
806b578 verified
from dataclasses import dataclass
from transformers import PretrainedConfig
@dataclass
class OrcaleSeekConfig:
model_type: str = "orcaleseek"
vocab_size: int = 50257
n_embd: int = 768
n_layer: int = 12
n_head: int = 12
n_inner: int = 3072
activation_function: str = "gelu_new"
resid_pdrop: float = 0.1
embd_pdrop: float = 0.1
attn_pdrop: float = 0.1
layer_norm_epsilon: float = 1e-5
initializer_range: float = 0.02
scale_attn_weights: bool = True
use_cache: bool = True
bos_token_id: int = 50256
eos_token_id: int = 50256
architectures = ["OrcaleSeekForCausalLM"]
def to_hf_config(self):
return PretrainedConfig(**self.__dict__)