{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 471, "global_step": 4710, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "grad_norm": 23.178159713745117, "learning_rate": 3.004181408813123e-06, "loss": 3.3296, "step": 471 }, { "epoch": 0.1, "eval_nli-pairs_loss": 1.8879033327102661, "eval_nli-pairs_runtime": 14.5841, "eval_nli-pairs_samples_per_second": 466.81, "eval_nli-pairs_steps_per_second": 29.21, "step": 471 }, { "epoch": 0.1, "eval_scitail-pairs-pos_loss": 1.3438984155654907, "eval_scitail-pairs-pos_runtime": 3.3055, "eval_scitail-pairs-pos_samples_per_second": 394.499, "eval_scitail-pairs-pos_steps_per_second": 24.807, "step": 471 }, { "epoch": 0.1, "eval_qnli-contrastive_loss": 2.2597947120666504, "eval_qnli-contrastive_runtime": 15.4075, "eval_qnli-contrastive_samples_per_second": 354.567, "eval_qnli-contrastive_steps_per_second": 22.197, "step": 471 }, { "epoch": 0.2, "grad_norm": 20.00649070739746, "learning_rate": 6.021228690897395e-06, "loss": 1.8704, "step": 942 }, { "epoch": 0.2, "eval_nli-pairs_loss": 0.9545981884002686, "eval_nli-pairs_runtime": 14.5243, "eval_nli-pairs_samples_per_second": 468.731, "eval_nli-pairs_steps_per_second": 29.33, "step": 942 }, { "epoch": 0.2, "eval_scitail-pairs-pos_loss": 0.5628724098205566, "eval_scitail-pairs-pos_runtime": 3.3029, "eval_scitail-pairs-pos_samples_per_second": 394.801, "eval_scitail-pairs-pos_steps_per_second": 24.826, "step": 942 }, { "epoch": 0.2, "eval_qnli-contrastive_loss": 1.840173602104187, "eval_qnli-contrastive_runtime": 15.4134, "eval_qnli-contrastive_samples_per_second": 354.433, "eval_qnli-contrastive_steps_per_second": 22.189, "step": 942 }, { "epoch": 0.3, "grad_norm": 14.465508460998535, "learning_rate": 9.051141846252816e-06, "loss": 1.2621, "step": 1413 }, { "epoch": 0.3, "eval_nli-pairs_loss": 0.715168297290802, "eval_nli-pairs_runtime": 14.4626, "eval_nli-pairs_samples_per_second": 470.731, "eval_nli-pairs_steps_per_second": 29.455, "step": 1413 }, { "epoch": 0.3, "eval_scitail-pairs-pos_loss": 0.45529162883758545, "eval_scitail-pairs-pos_runtime": 3.3513, "eval_scitail-pairs-pos_samples_per_second": 389.098, "eval_scitail-pairs-pos_steps_per_second": 24.468, "step": 1413 }, { "epoch": 0.3, "eval_qnli-contrastive_loss": 1.388743281364441, "eval_qnli-contrastive_runtime": 15.4261, "eval_qnli-contrastive_samples_per_second": 354.139, "eval_qnli-contrastive_steps_per_second": 22.17, "step": 1413 }, { "epoch": 0.4, "grad_norm": 164.2409210205078, "learning_rate": 1.2081055001608235e-05, "loss": 1.2512, "step": 1884 }, { "epoch": 0.4, "eval_nli-pairs_loss": 0.5274420976638794, "eval_nli-pairs_runtime": 14.4658, "eval_nli-pairs_samples_per_second": 470.628, "eval_nli-pairs_steps_per_second": 29.449, "step": 1884 }, { "epoch": 0.4, "eval_scitail-pairs-pos_loss": 0.3621281683444977, "eval_scitail-pairs-pos_runtime": 3.3054, "eval_scitail-pairs-pos_samples_per_second": 394.502, "eval_scitail-pairs-pos_steps_per_second": 24.808, "step": 1884 }, { "epoch": 0.4, "eval_qnli-contrastive_loss": 0.8418154120445251, "eval_qnli-contrastive_runtime": 15.4336, "eval_qnli-contrastive_samples_per_second": 353.967, "eval_qnli-contrastive_steps_per_second": 22.159, "step": 1884 }, { "epoch": 0.5, "grad_norm": 1.1174694299697876, "learning_rate": 1.5110968156963654e-05, "loss": 1.1724, "step": 2355 }, { "epoch": 0.5, "eval_nli-pairs_loss": 0.49269717931747437, "eval_nli-pairs_runtime": 14.6969, "eval_nli-pairs_samples_per_second": 463.228, "eval_nli-pairs_steps_per_second": 28.986, "step": 2355 }, { "epoch": 0.5, "eval_scitail-pairs-pos_loss": 0.39243820309638977, "eval_scitail-pairs-pos_runtime": 3.3462, "eval_scitail-pairs-pos_samples_per_second": 389.699, "eval_scitail-pairs-pos_steps_per_second": 24.506, "step": 2355 }, { "epoch": 0.5, "eval_qnli-contrastive_loss": 0.14236953854560852, "eval_qnli-contrastive_runtime": 15.7375, "eval_qnli-contrastive_samples_per_second": 347.133, "eval_qnli-contrastive_steps_per_second": 21.732, "step": 2355 }, { "epoch": 0.6, "grad_norm": 8.20367431640625, "learning_rate": 1.8140881312319075e-05, "loss": 0.9036, "step": 2826 }, { "epoch": 0.6, "eval_nli-pairs_loss": 0.46205422282218933, "eval_nli-pairs_runtime": 14.6645, "eval_nli-pairs_samples_per_second": 464.249, "eval_nli-pairs_steps_per_second": 29.05, "step": 2826 }, { "epoch": 0.6, "eval_scitail-pairs-pos_loss": 0.37769660353660583, "eval_scitail-pairs-pos_runtime": 3.3324, "eval_scitail-pairs-pos_samples_per_second": 391.314, "eval_scitail-pairs-pos_steps_per_second": 24.607, "step": 2826 }, { "epoch": 0.6, "eval_qnli-contrastive_loss": 0.3408704996109009, "eval_qnli-contrastive_runtime": 15.4886, "eval_qnli-contrastive_samples_per_second": 352.711, "eval_qnli-contrastive_steps_per_second": 22.081, "step": 2826 }, { "epoch": 0.7, "grad_norm": 13.231554985046387, "learning_rate": 1.995898723197675e-05, "loss": 1.0374, "step": 3297 }, { "epoch": 0.7, "eval_nli-pairs_loss": 0.41105732321739197, "eval_nli-pairs_runtime": 14.6153, "eval_nli-pairs_samples_per_second": 465.813, "eval_nli-pairs_steps_per_second": 29.148, "step": 3297 }, { "epoch": 0.7, "eval_scitail-pairs-pos_loss": 0.3417491614818573, "eval_scitail-pairs-pos_runtime": 3.3206, "eval_scitail-pairs-pos_samples_per_second": 392.697, "eval_scitail-pairs-pos_steps_per_second": 24.694, "step": 3297 }, { "epoch": 0.7, "eval_qnli-contrastive_loss": 0.21254216134548187, "eval_qnli-contrastive_runtime": 15.5347, "eval_qnli-contrastive_samples_per_second": 351.664, "eval_qnli-contrastive_steps_per_second": 22.015, "step": 3297 }, { "epoch": 0.8, "grad_norm": 23.010765075683594, "learning_rate": 1.9476312452068522e-05, "loss": 0.9259, "step": 3768 }, { "epoch": 0.8, "eval_nli-pairs_loss": 0.3852880597114563, "eval_nli-pairs_runtime": 14.5431, "eval_nli-pairs_samples_per_second": 468.125, "eval_nli-pairs_steps_per_second": 29.292, "step": 3768 }, { "epoch": 0.8, "eval_scitail-pairs-pos_loss": 0.2818955183029175, "eval_scitail-pairs-pos_runtime": 3.3663, "eval_scitail-pairs-pos_samples_per_second": 387.364, "eval_scitail-pairs-pos_steps_per_second": 24.359, "step": 3768 }, { "epoch": 0.8, "eval_qnli-contrastive_loss": 0.16461187601089478, "eval_qnli-contrastive_runtime": 15.6023, "eval_qnli-contrastive_samples_per_second": 350.141, "eval_qnli-contrastive_steps_per_second": 21.92, "step": 3768 }, { "epoch": 0.9, "grad_norm": 4.332469940185547, "learning_rate": 1.8475083492522773e-05, "loss": 0.8709, "step": 4239 }, { "epoch": 0.9, "eval_nli-pairs_loss": 0.37486234307289124, "eval_nli-pairs_runtime": 14.7406, "eval_nli-pairs_samples_per_second": 461.852, "eval_nli-pairs_steps_per_second": 28.9, "step": 4239 }, { "epoch": 0.9, "eval_scitail-pairs-pos_loss": 0.29122474789619446, "eval_scitail-pairs-pos_runtime": 3.5504, "eval_scitail-pairs-pos_samples_per_second": 367.283, "eval_scitail-pairs-pos_steps_per_second": 23.096, "step": 4239 }, { "epoch": 0.9, "eval_qnli-contrastive_loss": 0.11566311866044998, "eval_qnli-contrastive_runtime": 15.6925, "eval_qnli-contrastive_samples_per_second": 348.129, "eval_qnli-contrastive_steps_per_second": 21.794, "step": 4239 }, { "epoch": 1.0, "grad_norm": 26.054088592529297, "learning_rate": 1.701008869684049e-05, "loss": 0.8686, "step": 4710 }, { "epoch": 1.0, "eval_nli-pairs_loss": 0.36355406045913696, "eval_nli-pairs_runtime": 14.5214, "eval_nli-pairs_samples_per_second": 468.824, "eval_nli-pairs_steps_per_second": 29.336, "step": 4710 }, { "epoch": 1.0, "eval_scitail-pairs-pos_loss": 0.3108903765678406, "eval_scitail-pairs-pos_runtime": 3.3842, "eval_scitail-pairs-pos_samples_per_second": 385.319, "eval_scitail-pairs-pos_steps_per_second": 24.23, "step": 4710 }, { "epoch": 1.0, "eval_qnli-contrastive_loss": 0.09614822268486023, "eval_qnli-contrastive_runtime": 15.7192, "eval_qnli-contrastive_samples_per_second": 347.537, "eval_qnli-contrastive_steps_per_second": 21.757, "step": 4710 } ], "logging_steps": 471, "max_steps": 9420, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 4710, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 28, "trial_name": null, "trial_params": null }