| { | |
| "best_metric": 78.2193331519916, | |
| "best_model_checkpoint": "/root/turkic_qa/en_uzn_models/en_uzn_xlm_roberta_large_model/checkpoint-2760", | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 3450, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "step": 690, | |
| "train_exact_match": 56.043956043956044, | |
| "train_f1": 73.10238632897224, | |
| "train_runtime": 28.967, | |
| "train_samples_per_second": 43.567, | |
| "train_steps_per_second": 1.588 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 34.386253356933594, | |
| "learning_rate": 1e-05, | |
| "loss": 3.1538, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_exact_match": 53.28125, | |
| "eval_f1": 68.90784509260168, | |
| "eval_runtime": 92.5664, | |
| "eval_samples_per_second": 43.774, | |
| "eval_steps_per_second": 1.566, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 1380, | |
| "train_exact_match": 71.22877122877122, | |
| "train_f1": 84.717686864544, | |
| "train_runtime": 29.1704, | |
| "train_samples_per_second": 43.64, | |
| "train_steps_per_second": 1.577 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 118.70819854736328, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 1.105, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_exact_match": 63.1875, | |
| "eval_f1": 77.04679698006159, | |
| "eval_runtime": 92.8422, | |
| "eval_samples_per_second": 43.644, | |
| "eval_steps_per_second": 1.562, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 2070, | |
| "train_exact_match": 76.62337662337663, | |
| "train_f1": 88.54201062497397, | |
| "train_runtime": 29.7625, | |
| "train_samples_per_second": 43.511, | |
| "train_steps_per_second": 1.579 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 41.994937896728516, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7535, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_exact_match": 64.125, | |
| "eval_f1": 77.84371511436862, | |
| "eval_runtime": 92.7563, | |
| "eval_samples_per_second": 43.684, | |
| "eval_steps_per_second": 1.563, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 2760, | |
| "train_exact_match": 82.01798201798202, | |
| "train_f1": 91.71025821891249, | |
| "train_runtime": 29.7394, | |
| "train_samples_per_second": 43.545, | |
| "train_steps_per_second": 1.58 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 416.8191223144531, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.5495, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_exact_match": 65.03125, | |
| "eval_f1": 78.2193331519916, | |
| "eval_runtime": 92.5461, | |
| "eval_samples_per_second": 43.784, | |
| "eval_steps_per_second": 1.567, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 3450, | |
| "train_exact_match": 83.71628371628371, | |
| "train_f1": 92.89072366821784, | |
| "train_runtime": 28.6071, | |
| "train_samples_per_second": 43.311, | |
| "train_steps_per_second": 1.573 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 5.402435779571533, | |
| "learning_rate": 0.0, | |
| "loss": 0.418, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_exact_match": 64.875, | |
| "eval_f1": 78.00354149557015, | |
| "eval_runtime": 92.9043, | |
| "eval_samples_per_second": 43.615, | |
| "eval_steps_per_second": 1.561, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 3450, | |
| "total_flos": 6.719078999672064e+16, | |
| "train_loss": 1.1959592426687047, | |
| "train_runtime": 6180.983, | |
| "train_samples_per_second": 15.607, | |
| "train_steps_per_second": 0.558 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 3450, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "total_flos": 6.719078999672064e+16, | |
| "train_batch_size": 28, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |