from dataclasses import dataclass from transformers import PretrainedConfig @dataclass class OrcaleSeekConfig: model_type: str = "orcaleseek" vocab_size: int = 50257 n_embd: int = 768 n_layer: int = 12 n_head: int = 12 n_inner: int = 3072 activation_function: str = "gelu_new" resid_pdrop: float = 0.1 embd_pdrop: float = 0.1 attn_pdrop: float = 0.1 layer_norm_epsilon: float = 1e-5 initializer_range: float = 0.02 scale_attn_weights: bool = True use_cache: bool = True bos_token_id: int = 50256 eos_token_id: int = 50256 architectures = ["OrcaleSeekForCausalLM"] def to_hf_config(self): return PretrainedConfig(**self.__dict__)