|
{ |
|
"best_metric": 0.8514851485148515, |
|
"best_model_checkpoint": "finetune_results/omarmomen/structformer_s1_final_with_pos/sst2/checkpoint-4000", |
|
"epoch": 9.47867298578199, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.7972440719604492, |
|
"eval_f1": 0.7919191919191918, |
|
"eval_loss": 0.44957396388053894, |
|
"eval_mcc": 0.5949711545663119, |
|
"eval_runtime": 0.9864, |
|
"eval_samples_per_second": 515.006, |
|
"eval_steps_per_second": 64.883, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.407582938388626e-05, |
|
"loss": 0.4711, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_accuracy": 0.8110235929489136, |
|
"eval_f1": 0.8195488721804512, |
|
"eval_loss": 0.4547339081764221, |
|
"eval_mcc": 0.6254766003729282, |
|
"eval_runtime": 0.9908, |
|
"eval_samples_per_second": 512.701, |
|
"eval_steps_per_second": 64.592, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 3.815165876777251e-05, |
|
"loss": 0.2815, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_accuracy": 0.8149606585502625, |
|
"eval_f1": 0.798283261802575, |
|
"eval_loss": 0.5317888259887695, |
|
"eval_mcc": 0.637652660463876, |
|
"eval_runtime": 0.9915, |
|
"eval_samples_per_second": 512.337, |
|
"eval_steps_per_second": 64.546, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 3.222748815165877e-05, |
|
"loss": 0.1564, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"eval_accuracy": 0.836614191532135, |
|
"eval_f1": 0.8400770712909442, |
|
"eval_loss": 0.5775780081748962, |
|
"eval_mcc": 0.6741723640208644, |
|
"eval_runtime": 0.9911, |
|
"eval_samples_per_second": 512.56, |
|
"eval_steps_per_second": 64.574, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 2.6303317535545023e-05, |
|
"loss": 0.0854, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"eval_accuracy": 0.8267716765403748, |
|
"eval_f1": 0.8333333333333333, |
|
"eval_loss": 0.6006239056587219, |
|
"eval_mcc": 0.6561201797627784, |
|
"eval_runtime": 0.9911, |
|
"eval_samples_per_second": 512.571, |
|
"eval_steps_per_second": 64.576, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"eval_accuracy": 0.834645688533783, |
|
"eval_f1": 0.8333333333333334, |
|
"eval_loss": 0.593051552772522, |
|
"eval_mcc": 0.6692967103749653, |
|
"eval_runtime": 0.9927, |
|
"eval_samples_per_second": 511.722, |
|
"eval_steps_per_second": 64.469, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 2.037914691943128e-05, |
|
"loss": 0.0488, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"eval_accuracy": 0.8425197005271912, |
|
"eval_f1": 0.846743295019157, |
|
"eval_loss": 0.7115373611450195, |
|
"eval_mcc": 0.6864752706187329, |
|
"eval_runtime": 0.9914, |
|
"eval_samples_per_second": 512.395, |
|
"eval_steps_per_second": 64.554, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 1.4454976303317535e-05, |
|
"loss": 0.0314, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"eval_accuracy": 0.8484252095222473, |
|
"eval_f1": 0.8487229862475442, |
|
"eval_loss": 0.8149858713150024, |
|
"eval_mcc": 0.6969083992056365, |
|
"eval_runtime": 0.9906, |
|
"eval_samples_per_second": 512.795, |
|
"eval_steps_per_second": 64.604, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 8.530805687203793e-06, |
|
"loss": 0.0174, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"eval_accuracy": 0.834645688533783, |
|
"eval_f1": 0.8346456692913385, |
|
"eval_loss": 0.8690735101699829, |
|
"eval_mcc": 0.6693172130512284, |
|
"eval_runtime": 0.9909, |
|
"eval_samples_per_second": 512.668, |
|
"eval_steps_per_second": 64.588, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 2.6066350710900472e-06, |
|
"loss": 0.0107, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"eval_accuracy": 0.8523622155189514, |
|
"eval_f1": 0.8514851485148515, |
|
"eval_loss": 0.9377376437187195, |
|
"eval_mcc": 0.7047207175597211, |
|
"eval_runtime": 0.99, |
|
"eval_samples_per_second": 513.131, |
|
"eval_steps_per_second": 64.646, |
|
"step": 4000 |
|
} |
|
], |
|
"max_steps": 4220, |
|
"num_train_epochs": 10, |
|
"total_flos": 3.998323026461491e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|