Spaces:
Runtime error
Runtime error
import torch | |
from datasets import load_dataset | |
from peft import LoraConfig, get_peft_model | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from trl import GRPOConfig, GRPOTrainer | |
import wandb | |
wandb.login(key="2fa14e3cc1db3ff6c0d83973c3b7b9d152a73b70") | |
dataset = load_dataset("mlabonne/smoltldr") | |
print(dataset) | |
import os | |
os.environ["FLASH_ATTENTION_FORCE_DISABLED"] = "1" | |
model_id = "HuggingFaceTB/SmolLM-135M-Instruct" | |
model = AutoModelForCausalLM.from_pretrained( | |
model_id, | |
torch_dtype="auto", | |
device_map="auto", | |
) | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
# Load LoRA | |
lora_config = LoraConfig( | |
task_type="CAUSAL_LM", | |
r=16, | |
lora_alpha=32, | |
target_modules="all-linear", | |
) | |
model = get_peft_model(model, lora_config) | |
print(model.print_trainable_parameters()) | |
# Reward function | |
ideal_length = 50 | |
def reward_len(completions, **kwargs): | |
return [-abs(ideal_length - len(completion)) for completion in completions] | |
training_args = GRPOConfig( | |
output_dir="GRPO", | |
learning_rate=2e-5, | |
per_device_train_batch_size=8, | |
gradient_accumulation_steps=2, | |
max_prompt_length=512, | |
max_completion_length=96, | |
num_generations=8, | |
num_train_epochs=1, | |
report_to=["wandb"], | |
remove_unused_columns=False, | |
logging_steps=1, | |
bf16=False, | |
fp16=True, # если есть GPU | |
optim="adamw_torch_fused", # НЕ "adamw_8bit" | |
) | |
trainer = GRPOTrainer( | |
model=model, | |
reward_funcs=[reward_len], | |
args=training_args, | |
train_dataset=dataset["train"], | |
) | |
# Train model | |
wandb.init(project="GRPO") | |
trainer.train() | |