| { | |
| "best_global_step": 280, | |
| "best_metric": 0.198628231883049, | |
| "best_model_checkpoint": "./adalora_weather_model/checkpoint-280", | |
| "epoch": 2.8637532133676094, | |
| "eval_steps": 20, | |
| "global_step": 280, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.10282776349614396, | |
| "grad_norm": 3.2930359840393066, | |
| "learning_rate": 6.75e-05, | |
| "loss": 16.7516, | |
| "mean_token_accuracy": 0.5331788018345833, | |
| "num_tokens": 171254.0, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.20565552699228792, | |
| "grad_norm": 5.34633731842041, | |
| "learning_rate": 0.0001425, | |
| "loss": 14.2345, | |
| "mean_token_accuracy": 0.5723872803151607, | |
| "num_tokens": 342816.0, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.20565552699228792, | |
| "eval_loss": 1.4267879724502563, | |
| "eval_mean_token_accuracy": 0.6206505249708126, | |
| "eval_num_tokens": 342816.0, | |
| "eval_runtime": 103.2151, | |
| "eval_samples_per_second": 1.889, | |
| "eval_steps_per_second": 1.889, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.30848329048843187, | |
| "grad_norm": 8.451922416687012, | |
| "learning_rate": 0.00021749999999999997, | |
| "loss": 8.4633, | |
| "mean_token_accuracy": 0.7056376278400421, | |
| "num_tokens": 514067.0, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.41131105398457585, | |
| "grad_norm": 3.2435312271118164, | |
| "learning_rate": 0.00029249999999999995, | |
| "loss": 3.6174, | |
| "mean_token_accuracy": 0.8711350880563259, | |
| "num_tokens": 685570.0, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.41131105398457585, | |
| "eval_loss": 0.34826213121414185, | |
| "eval_mean_token_accuracy": 0.8939384683584556, | |
| "eval_num_tokens": 685570.0, | |
| "eval_runtime": 103.283, | |
| "eval_samples_per_second": 1.888, | |
| "eval_steps_per_second": 1.888, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5141388174807198, | |
| "grad_norm": 3.4071648120880127, | |
| "learning_rate": 0.0002995163544683256, | |
| "loss": 2.9181, | |
| "mean_token_accuracy": 0.8942699111998081, | |
| "num_tokens": 856740.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.6169665809768637, | |
| "grad_norm": 2.6412267684936523, | |
| "learning_rate": 0.00029784849709745616, | |
| "loss": 2.638, | |
| "mean_token_accuracy": 0.9004527874290943, | |
| "num_tokens": 1028000.0, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.6169665809768637, | |
| "eval_loss": 0.29210129380226135, | |
| "eval_mean_token_accuracy": 0.9071287249907469, | |
| "eval_num_tokens": 1028000.0, | |
| "eval_runtime": 103.2512, | |
| "eval_samples_per_second": 1.889, | |
| "eval_steps_per_second": 1.889, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.7197943444730077, | |
| "grad_norm": 10.460367202758789, | |
| "learning_rate": 0.0002950037303267096, | |
| "loss": 2.2428, | |
| "mean_token_accuracy": 0.9117808744311333, | |
| "num_tokens": 1199650.0, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.8226221079691517, | |
| "grad_norm": 5.442368984222412, | |
| "learning_rate": 0.0002910046991800035, | |
| "loss": 2.0227, | |
| "mean_token_accuracy": 0.9168093383312226, | |
| "num_tokens": 1370524.0, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.8226221079691517, | |
| "eval_loss": 0.2537098526954651, | |
| "eval_mean_token_accuracy": 0.9172265719144772, | |
| "eval_num_tokens": 1370524.0, | |
| "eval_runtime": 103.2432, | |
| "eval_samples_per_second": 1.889, | |
| "eval_steps_per_second": 1.889, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.9254498714652957, | |
| "grad_norm": 2.3143043518066406, | |
| "learning_rate": 0.00028588323690176954, | |
| "loss": 1.9486, | |
| "mean_token_accuracy": 0.9203169830143452, | |
| "num_tokens": 1542159.0, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.0205655526992288, | |
| "grad_norm": 2.387840986251831, | |
| "learning_rate": 0.0002796801115567139, | |
| "loss": 1.7171, | |
| "mean_token_accuracy": 0.9238405316262632, | |
| "num_tokens": 1700574.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.0205655526992288, | |
| "eval_loss": 0.237007275223732, | |
| "eval_mean_token_accuracy": 0.9215406671548501, | |
| "eval_num_tokens": 1700574.0, | |
| "eval_runtime": 102.9604, | |
| "eval_samples_per_second": 1.894, | |
| "eval_steps_per_second": 1.894, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.1233933161953726, | |
| "grad_norm": 2.3169972896575928, | |
| "learning_rate": 0.0002724447015062708, | |
| "loss": 1.7776, | |
| "mean_token_accuracy": 0.925829317420721, | |
| "num_tokens": 1871783.0, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.2262210796915167, | |
| "grad_norm": 2.366626262664795, | |
| "learning_rate": 0.0002642346023450357, | |
| "loss": 1.7638, | |
| "mean_token_accuracy": 0.9251113034784794, | |
| "num_tokens": 2043203.0, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.2262210796915167, | |
| "eval_loss": 0.2297067493200302, | |
| "eval_mean_token_accuracy": 0.9240646191132375, | |
| "eval_num_tokens": 2043203.0, | |
| "eval_runtime": 103.0662, | |
| "eval_samples_per_second": 1.892, | |
| "eval_steps_per_second": 1.892, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.3290488431876606, | |
| "grad_norm": 2.324875593185425, | |
| "learning_rate": 0.0002551151684260553, | |
| "loss": 1.7129, | |
| "mean_token_accuracy": 0.9276402719318867, | |
| "num_tokens": 2214867.0, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.4318766066838047, | |
| "grad_norm": 2.4916014671325684, | |
| "learning_rate": 0.0002451589926245468, | |
| "loss": 1.6328, | |
| "mean_token_accuracy": 0.9298155799508094, | |
| "num_tokens": 2385981.0, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.4318766066838047, | |
| "eval_loss": 0.22466857731342316, | |
| "eval_mean_token_accuracy": 0.9257748848352677, | |
| "eval_num_tokens": 2385981.0, | |
| "eval_runtime": 103.2959, | |
| "eval_samples_per_second": 1.888, | |
| "eval_steps_per_second": 1.888, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.5347043701799485, | |
| "grad_norm": 2.331782341003418, | |
| "learning_rate": 0.00023444532848124715, | |
| "loss": 1.6382, | |
| "mean_token_accuracy": 0.9296720393002034, | |
| "num_tokens": 2557432.0, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.6375321336760926, | |
| "grad_norm": 2.2701163291931152, | |
| "learning_rate": 0.00022305945932527308, | |
| "loss": 1.6396, | |
| "mean_token_accuracy": 0.9298155024647713, | |
| "num_tokens": 2729083.0, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.6375321336760926, | |
| "eval_loss": 0.21823178231716156, | |
| "eval_mean_token_accuracy": 0.92809411745805, | |
| "eval_num_tokens": 2729083.0, | |
| "eval_runtime": 103.3054, | |
| "eval_samples_per_second": 1.888, | |
| "eval_steps_per_second": 1.888, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.7403598971722365, | |
| "grad_norm": 2.184347629547119, | |
| "learning_rate": 0.0002110920193984228, | |
| "loss": 1.667, | |
| "mean_token_accuracy": 0.928074149042368, | |
| "num_tokens": 2900445.0, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.8431876606683804, | |
| "grad_norm": 2.0277137756347656, | |
| "learning_rate": 0.00019863827238493308, | |
| "loss": 1.5743, | |
| "mean_token_accuracy": 0.9325967490673065, | |
| "num_tokens": 3072258.0, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.8431876606683804, | |
| "eval_loss": 0.2095421850681305, | |
| "eval_mean_token_accuracy": 0.9297601647866078, | |
| "eval_num_tokens": 3072258.0, | |
| "eval_runtime": 103.4026, | |
| "eval_samples_per_second": 1.886, | |
| "eval_steps_per_second": 1.886, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.9460154241645244, | |
| "grad_norm": 2.0166707038879395, | |
| "learning_rate": 0.00018579735308976727, | |
| "loss": 1.5818, | |
| "mean_token_accuracy": 0.9324821837246418, | |
| "num_tokens": 3242706.0, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.0411311053984575, | |
| "grad_norm": 2.155334949493408, | |
| "learning_rate": 0.00017267147830185608, | |
| "loss": 1.4363, | |
| "mean_token_accuracy": 0.9325833642804945, | |
| "num_tokens": 3401061.0, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.0411311053984575, | |
| "eval_loss": 0.2087530493736267, | |
| "eval_mean_token_accuracy": 0.9303424829091781, | |
| "eval_num_tokens": 3401061.0, | |
| "eval_runtime": 103.3012, | |
| "eval_samples_per_second": 1.888, | |
| "eval_steps_per_second": 1.888, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.1439588688946016, | |
| "grad_norm": 2.2415578365325928, | |
| "learning_rate": 0.00015936513312400936, | |
| "loss": 1.4133, | |
| "mean_token_accuracy": 0.9386202253401279, | |
| "num_tokens": 3572271.0, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.2467866323907453, | |
| "grad_norm": 2.0938873291015625, | |
| "learning_rate": 0.0001459842392465063, | |
| "loss": 1.4341, | |
| "mean_token_accuracy": 0.9366819895803928, | |
| "num_tokens": 3743063.0, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.2467866323907453, | |
| "eval_loss": 0.20562225580215454, | |
| "eval_mean_token_accuracy": 0.9320944171685439, | |
| "eval_num_tokens": 3743063.0, | |
| "eval_runtime": 103.287, | |
| "eval_samples_per_second": 1.888, | |
| "eval_steps_per_second": 1.888, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.3496143958868894, | |
| "grad_norm": 2.0981836318969727, | |
| "learning_rate": 0.00013263531178510647, | |
| "loss": 1.3837, | |
| "mean_token_accuracy": 0.9389841854572296, | |
| "num_tokens": 3914486.0, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.4524421593830334, | |
| "grad_norm": 2.3699076175689697, | |
| "learning_rate": 0.00011942461139525123, | |
| "loss": 1.4155, | |
| "mean_token_accuracy": 0.9380967736244201, | |
| "num_tokens": 4085959.0, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.4524421593830334, | |
| "eval_loss": 0.2033875733613968, | |
| "eval_mean_token_accuracy": 0.9327177851628035, | |
| "eval_num_tokens": 4085959.0, | |
| "eval_runtime": 103.2681, | |
| "eval_samples_per_second": 1.888, | |
| "eval_steps_per_second": 1.888, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.5552699228791775, | |
| "grad_norm": 2.4609436988830566, | |
| "learning_rate": 0.00010645729841183066, | |
| "loss": 1.4136, | |
| "mean_token_accuracy": 0.9391755022108554, | |
| "num_tokens": 4257059.0, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.658097686375321, | |
| "grad_norm": 2.2968101501464844, | |
| "learning_rate": 9.383659574776544e-05, | |
| "loss": 1.3925, | |
| "mean_token_accuracy": 0.9393771559000015, | |
| "num_tokens": 4428749.0, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.658097686375321, | |
| "eval_loss": 0.2008339911699295, | |
| "eval_mean_token_accuracy": 0.9332964753493285, | |
| "eval_num_tokens": 4428749.0, | |
| "eval_runtime": 103.126, | |
| "eval_samples_per_second": 1.891, | |
| "eval_steps_per_second": 1.891, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.7609254498714653, | |
| "grad_norm": 2.1535301208496094, | |
| "learning_rate": 8.166296721493059e-05, | |
| "loss": 1.4319, | |
| "mean_token_accuracy": 0.9369394682347775, | |
| "num_tokens": 4600183.0, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.8637532133676094, | |
| "grad_norm": 2.2570505142211914, | |
| "learning_rate": 7.003331780818343e-05, | |
| "loss": 1.3249, | |
| "mean_token_accuracy": 0.9410863481462002, | |
| "num_tokens": 4771910.0, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.8637532133676094, | |
| "eval_loss": 0.198628231883049, | |
| "eval_mean_token_accuracy": 0.9339628476362962, | |
| "eval_num_tokens": 4771910.0, | |
| "eval_runtime": 103.3095, | |
| "eval_samples_per_second": 1.888, | |
| "eval_steps_per_second": 1.888, | |
| "step": 280 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 392, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 40, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.2200684192580608e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |