mtasic85 commited on
Commit
1c9b116
·
1 Parent(s): 056e2c6

micro_batch_size: 4

Browse files
Files changed (1) hide show
  1. scripts/pretrain-core-model.yaml +4 -4
scripts/pretrain-core-model.yaml CHANGED
@@ -67,9 +67,9 @@ train:
67
  # global_batch_size: 256
68
 
69
  # Number of samples per data-parallel rank (type: int, default: 4)
70
- # micro_batch_size: 4
71
  # micro_batch_size: 2
72
- micro_batch_size: 1
73
 
74
  # Number of iterations with learning rate warmup active (type: int, default: 2000)
75
  lr_warmup_steps: 200
@@ -115,11 +115,11 @@ eval:
115
  # Optimizer-related arguments
116
 
117
  optimizer:
118
- class_path: torch.optim.AdamW
119
  # class_path: torchao.prototype.low_bit_optim.AdamW8bit
120
  # class_path: torchao.prototype.low_bit_optim.AdamW4bit
121
  # class_path: bitsandbytes.optim.AdamW8bit
122
- # class_path: bitsandbytes.optim.PagedAdamW8bit
123
  init_args:
124
  # (type: float, default: 0.001)
125
  lr: 1e-4
 
67
  # global_batch_size: 256
68
 
69
  # Number of samples per data-parallel rank (type: int, default: 4)
70
+ micro_batch_size: 4
71
  # micro_batch_size: 2
72
+ # micro_batch_size: 1
73
 
74
  # Number of iterations with learning rate warmup active (type: int, default: 2000)
75
  lr_warmup_steps: 200
 
115
  # Optimizer-related arguments
116
 
117
  optimizer:
118
+ # class_path: torch.optim.AdamW
119
  # class_path: torchao.prototype.low_bit_optim.AdamW8bit
120
  # class_path: torchao.prototype.low_bit_optim.AdamW4bit
121
  # class_path: bitsandbytes.optim.AdamW8bit
122
+ class_path: bitsandbytes.optim.PagedAdamW8bit
123
  init_args:
124
  # (type: float, default: 0.001)
125
  lr: 1e-4