platma-retrain / src /config.yaml
Platma's picture
Update src/config.yaml
74d2855 verified
raw
history blame contribute delete
752 Bytes
task: llm-sft
base_model: microsoft/Phi-3.5-mini-instruct
# base_model: TheBloke/WizardCoder-33B-V1.1-AWQ
project_name: llama
log: tensorboard
backend: local
data:
path: Platma/platma-ai
train_split: train
valid_split: null
chat_template: null
column_mapping:
text_column: text
params:
block_size: 1024
lr: 1e-4
warmup_ratio: 0.1
weight_decay: 0.01
epochs: 1
batch_size: 2
gradient_accumulation: 8
mixed_precision: fp16
peft: True
quantization: null
lora_r: 16
lora_alpha: 32
lora_dropout: 0.05
unsloth: False
optimizer: paged_adamw_8bit
target_modules: all-linear
padding: right
optimizer: paged_adamw_8bit
scheduler: cosine
hub:
username: Platma
token: ${HF_ACCESS_TOKEN}
push_to_hub: True