From scratch
Collection
Finisha SLM ✨
•
80 items
•
Updated
(Architecture FrozerYTB)
Mini-Theya-Frozer est un Small Language Model (SLM) ultra-compact conçu par Finisha. Basé sur l'architecture propriétaire FrozerYTB, ce modèle privilégie la densité de connaissance et la texture du langage sur un volume de paramètres minimaliste.
#. 🚀 Capacités & Usages
Malgré sa petite taille, l'architecture FrozerYTB permet des performances surprenantes en milieu contraint :
"Ce modèle n'est pas là pour être poli ou conventionnel. Il est là pour être vrai. Si vous cherchez du texte pré-mâché, passez votre chemin. Ici, on travaille sur la structure pure."
⚡ Exemple d'inférence ❄️
import torch
import sys
from transformers import PretrainedConfig, PreTrainedModel, PreTrainedTokenizerFast
import transformers # Import transformers to get its __file__ path
import safetensors.torch # Import safetensors for loading .safetensors files
# Temporarily patch PreTrainedModel._can_set_experts_implementation to prevent FileNotFoundError
# This method tries to open and read the source file of the class, which fails in Colab notebooks.
original_can_set_experts_implementation = PreTrainedModel._can_set_experts_implementation
PreTrainedModel._can_set_experts_implementation = lambda cls: False
try:
# 1. CONFIGURATION ET ARCHITECTURE (Inchangées)
class FrozerConfig(PretrainedConfig):
model_type = "frozerytb"
def __init__(self, vocab_size=13588, embed_dim=256, n_layers=6, n_heads=8, **kwargs):
super().__init__(**kwargs)
self.vocab_size = vocab_size
self.embed_dim = embed_dim
self.n_layers = n_layers
self.n_heads = n_heads
class FrozerYTBModel(PreTrainedModel):
config_class = FrozerConfig
def __init__(self, config):
super().__init__(config)
self._auto_class = None # Keep this line, it might help with other AutoModel cases
self.embedding = torch.nn.Embedding(config.vocab_size, config.embed_dim)
self.encoder_layer = torch.nn.TransformerEncoderLayer(
d_model=config.embed_dim, nhead=config.n_heads, batch_first=True, norm_first=True
)
self.transformer = torch.nn.TransformerEncoder(self.encoder_layer, num_layers=config.n_layers)
self.lm_head = torch.nn.Linear(config.embed_dim, config.vocab_size)
# self.init_weights() # Removed this call as weights are loaded manually later
def generate_causal_mask(self, sz):
mask = torch.triu(torch.ones(sz, sz) * float('-inf'), diagonal=1)
return mask
def forward(self, input_ids, attention_mask=None, **kwargs):
seq_len = input_ids.size(1)
x = self.embedding(input_ids)
mask = self.generate_causal_mask(seq_len).to(input_ids.device)
x = self.transformer(x, mask=mask)
return self.lm_head(x)
# 2. CHARGEMENT SÉCURISÉ
repo_id = "Finisha-F-scratch/Mini-Theya-Frozer"
print("Chargement du tokenizer...")
tokenizer = PreTrainedTokenizerFast.from_pretrained(repo_id)
print("Initialisation de Mini-Theya-Frozer (11.2M)...")
# On charge d'abord la config, puis le modèle, SANS passer par l'auto-enregistrement
config = FrozerConfig.from_pretrained(repo_id)
model = FrozerYTBModel(config)
# Chargement manuel des poids pour éviter l'erreur de fichier
from huggingface_hub import hf_hub_download
weights_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors")
state_dict = safetensors.torch.load_file(weights_path, device="cpu") # Use safetensors.torch.load_file
# Remap keys from the loaded state_dict to match the model's architecture
new_state_dict = {}
for k, v in state_dict.items():
if k.startswith('layers.'):
# Prepend 'transformer.' to keys for the transformer encoder layers
new_key = 'transformer.' + k
else:
# Keep keys for embedding and lm_head as is
new_key = k
new_state_dict[new_key] = v
# Load the state_dict, ignoring missing keys for the 'encoder_layer' instance
# which serves as a blueprint for the transformer's internal layers.
model.load_state_dict(new_state_dict, strict=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device).eval()
# 3. GÉNÉRATION
def generate_response(prompt, max_new_tokens=100, temperature=0.7):
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
for _ in range(max_new_tokens):
with torch.no_grad():
outputs = model(input_ids)
next_token_logits = outputs[:, -1, :] / temperature
probs = torch.softmax(next_token_logits, dim=-1)
next_token = torch.multinomial(probs, num_samples=1)
input_ids = torch.cat((input_ids, next_token), dim=1)
if next_token.item() == tokenizer.eos_token_id:
break
return tokenizer.decode(input_ids[0], skip_special_tokens=True)
# 4. LE TEST
prompt_test = "Qui es-tu ?"
print("-" * 30)
print(f"Question : {prompt_test}")
print(f"Réponse : {generate_response(prompt_test)}")
print("-" * 30)
print('fin')
finally:
# Restore the original method after execution to avoid side effects on other models
PreTrainedModel._can_set_experts_implementation = original_can_set_experts_implementation