mtasic85 commited on
Commit
1511fb2
·
1 Parent(s): 9105382

pretrain core 0

Browse files
Files changed (1) hide show
  1. scripts/pretrain-core-model-0.yaml +19 -19
scripts/pretrain-core-model-0.yaml CHANGED
@@ -110,29 +110,29 @@ eval:
110
 
111
  # Optimizer-related arguments
112
 
113
- # optimizer:
114
- # # class_path: torch.optim.AdamW
115
- # class_path: torchao.prototype.low_bit_optim.AdamW8bit
116
- # # class_path: torchao.prototype.low_bit_optim.AdamW4bit
117
- # # class_path: bitsandbytes.optim.AdamW8bit
118
- # # class_path: bitsandbytes.optim.PagedAdamW8bit
119
- # init_args:
120
- # # (type: float, default: 0.001)
121
- # lr: 1e-4
122
- # # (type: float, default: 0.01)
123
- # weight_decay: 0.01
124
- # # (type: tuple, default: (0.9,0.999))
125
- # betas:
126
- # - 0.9
127
- # - 0.99
128
-
129
  optimizer:
130
- class_path: dolphinflow.DolphinFlow
 
 
 
 
131
  init_args:
 
132
  lr: 1e-4
 
133
  weight_decay: 0.01
134
- momentum: 0.9
135
- nesterov: true
 
 
 
 
 
 
 
 
 
 
136
 
137
  # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
138
  devices: auto
 
110
 
111
  # Optimizer-related arguments
112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  optimizer:
114
+ class_path: torch.optim.AdamW
115
+ # class_path: torchao.prototype.low_bit_optim.AdamW8bit
116
+ # class_path: torchao.prototype.low_bit_optim.AdamW4bit
117
+ # class_path: bitsandbytes.optim.AdamW8bit
118
+ # class_path: bitsandbytes.optim.PagedAdamW8bit
119
  init_args:
120
+ # (type: float, default: 0.001)
121
  lr: 1e-4
122
+ # (type: float, default: 0.01)
123
  weight_decay: 0.01
124
+ # (type: tuple, default: (0.9,0.999))
125
+ betas:
126
+ - 0.9
127
+ - 0.999
128
+
129
+ # optimizer:
130
+ # class_path: dolphinflow.DolphinFlow
131
+ # init_args:
132
+ # lr: 1e-4
133
+ # weight_decay: 0.01
134
+ # momentum: 0.9
135
+ # nesterov: true
136
 
137
  # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
138
  devices: auto