Spaces:
Runtime error
Runtime error
import torch | |
import transformers | |
import bitsandbytes | |
from datasets import load_dataset | |
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments | |
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, PeftModel | |
model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1" | |
new_model = "sedataskan/mistral8x7B-finetuned" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name, | |
load_in_4bit=False, | |
torch_dtype=torch.float16, | |
device_map="auto") | |
tokenizer.pad_token = "!" | |
LORA_R = 8 | |
LORA_ALPHA = 2 * LORA_R | |
LORA_DROPOUT = 0.1 | |
config = LoraConfig( | |
r=LORA_R, | |
lora_alpha=LORA_ALPHA, | |
target_modules=[ "w1", "w2", "w3"], # Only Training the "expert" layers | |
lora_dropout=LORA_DROPOUT, | |
bias="none", | |
task_type="CAUSAL_LM" | |
) | |
model = get_peft_model(model, config) | |
def print_trainable_parameters(m): | |
trainable_params = sum(p.numel() for p in m.parameters() if p.requires_grad) | |
all_params = sum(p.numel() for p in m.parameters()) | |
print(f"trainable params: {trainable_params} || all params: {all_params} || trainable%: {100 * trainable_params / all_params}") | |
print_trainable_parameters(model) | |
train_data = load_dataset("oscar-corpus/OSCAR-2201", "tr") | |
print("Dataset", train_data) | |
def generate_prompt(user_query, sep="\n\n### "): #The prompt format is taken from the official Mixtral huggingface page | |
sys_msg= "Take a look at the following instructions and try to follow them." | |
p = "<s> [INST]" + sys_msg +"\n"+ user_query["instruction"] + "[/INST]" + user_query["output"] + "</s>" | |
return p | |
max_len = 1024 | |
def tokenize(prompt): | |
return tokenizer( | |
prompt + tokenizer.eos_token, | |
truncation=True, | |
max_length=max_len, | |
padding="max_length" | |
) | |
train_data = train_data.shuffle().map(lambda x: tokenize(generate_prompt(x)), remove_columns=["instruction" , "output"]) | |
trainer = Trainer( | |
model=model, | |
train_dataset=train_data, | |
args=TrainingArguments( | |
per_device_train_batch_size=1, | |
gradient_accumulation_steps=4, | |
num_train_epochs=6, | |
learning_rate=1e-4, | |
logging_steps=2, | |
optim="adamw_torch", | |
save_strategy="epoch" | |
), | |
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False) | |
) | |
model.config.use_cache = False | |
# Train model | |
trainer.train() | |
# Save trained model | |
trainer.model.save_pretrained(new_model) | |
tokenizer.save_pretrained(new_model) | |
# Push them to the HF Hub | |
trainer.model.push_to_hub(new_model, use_temp_dir=False, token="") | |
tokenizer.push_to_hub(new_model, use_temp_dir=False, token="") | |
# # Format prompt | |
# message = [ | |
# "Türkiye'nin başkenti neresidir?" | |
# ] | |
# tokenizer = AutoTokenizer.from_pretrained(new_model) | |
# prompt = tokenizer(message, return_tensors="pt", padding=True) | |
# # Generate output | |
# output = trainer.model.generate( | |
# input_ids=prompt.input_ids, | |
# attention_mask=prompt.attention_mask, | |
# max_length=128, | |
# do_sample=True, | |
# top_p=0.95, | |
# top_k=60, | |
# num_return_sequences=1, | |
# ) | |
# # Print output | |
# print(tokenizer.batch_decode(output, skip_special_tokens=True)) |