OUTPUT=./output_codellama_7b_lora_bf16 | |
ZERO_STAGE=3 | |
mkdir -p $OUTPUT | |
deepspeed main.py \ | |
--data_path local/jsonfile \ | |
--data_split "10,0,0" \ | |
--data_output_path /scratch/huijaean/garbage \ | |
--model_name_or_path meta-llama/CodeLlama-7b-Instruct-hf \ | |
--per_device_train_batch_size 8 \ | |
--per_device_eval_batch_size 8 \ | |
--max_seq_len 512 \ | |
--learning_rate 5e-5 \ | |
--num_train_epochs 1 \ | |
--gradient_accumulation_steps 32 \ | |
--lr_scheduler_type cosine \ | |
--num_warmup_steps 0 \ | |
--seed 1234 \ | |
--gradient_checkpointing \ | |
--dtype bf16 \ | |
--zero_stage $ZERO_STAGE \ | |
--deepspeed \ | |
--lora_dim 32 \ | |
--lora_module_name "layers." \ | |
--output_dir $OUTPUT \ | |
| tee $OUTPUT/training.log | |