import torch import transformers import bitsandbytes from datasets import load_dataset from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, PeftModel model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1" new_model = "sedataskan/mistral8x7B-finetuned" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=False, torch_dtype=torch.float16, device_map="auto") tokenizer.pad_token = "!" LORA_R = 8 LORA_ALPHA = 2 * LORA_R LORA_DROPOUT = 0.1 config = LoraConfig( r=LORA_R, lora_alpha=LORA_ALPHA, target_modules=[ "w1", "w2", "w3"], # Only Training the "expert" layers lora_dropout=LORA_DROPOUT, bias="none", task_type="CAUSAL_LM" ) model = get_peft_model(model, config) def print_trainable_parameters(m): trainable_params = sum(p.numel() for p in m.parameters() if p.requires_grad) all_params = sum(p.numel() for p in m.parameters()) print(f"trainable params: {trainable_params} || all params: {all_params} || trainable%: {100 * trainable_params / all_params}") print_trainable_parameters(model) train_data = load_dataset("oscar-corpus/OSCAR-2201", "tr") print("Dataset", train_data) def generate_prompt(user_query, sep="\n\n### "): #The prompt format is taken from the official Mixtral huggingface page sys_msg= "Take a look at the following instructions and try to follow them." p = " [INST]" + sys_msg +"\n"+ user_query["instruction"] + "[/INST]" + user_query["output"] + "" return p max_len = 1024 def tokenize(prompt): return tokenizer( prompt + tokenizer.eos_token, truncation=True, max_length=max_len, padding="max_length" ) train_data = train_data.shuffle().map(lambda x: tokenize(generate_prompt(x)), remove_columns=["instruction" , "output"]) trainer = Trainer( model=model, train_dataset=train_data, args=TrainingArguments( per_device_train_batch_size=1, gradient_accumulation_steps=4, num_train_epochs=6, learning_rate=1e-4, logging_steps=2, optim="adamw_torch", save_strategy="epoch" ), data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False) ) model.config.use_cache = False # Train model trainer.train() # Save trained model trainer.model.save_pretrained(new_model) tokenizer.save_pretrained(new_model) # Push them to the HF Hub trainer.model.push_to_hub(new_model, use_temp_dir=False, token="") tokenizer.push_to_hub(new_model, use_temp_dir=False, token="") # # Format prompt # message = [ # "Türkiye'nin başkenti neresidir?" # ] # tokenizer = AutoTokenizer.from_pretrained(new_model) # prompt = tokenizer(message, return_tensors="pt", padding=True) # # Generate output # output = trainer.model.generate( # input_ids=prompt.input_ids, # attention_mask=prompt.attention_mask, # max_length=128, # do_sample=True, # top_p=0.95, # top_k=60, # num_return_sequences=1, # ) # # Print output # print(tokenizer.batch_decode(output, skip_special_tokens=True))