bobox's picture
Training in progress, step 576, checkpoint
8d2850f verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.8,
"eval_steps": 32,
"global_step": 576,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003125,
"grad_norm": 3.1757984161376953,
"learning_rate": 3.125e-07,
"loss": 0.7374,
"step": 1
},
{
"epoch": 0.00625,
"grad_norm": 3.137390375137329,
"learning_rate": 6.25e-07,
"loss": 0.5723,
"step": 2
},
{
"epoch": 0.009375,
"grad_norm": 2.765856981277466,
"learning_rate": 9.375000000000001e-07,
"loss": 0.551,
"step": 3
},
{
"epoch": 0.0125,
"grad_norm": 3.468062162399292,
"learning_rate": 1.25e-06,
"loss": 0.7379,
"step": 4
},
{
"epoch": 0.015625,
"grad_norm": 2.6695668697357178,
"learning_rate": 1.5625e-06,
"loss": 0.5271,
"step": 5
},
{
"epoch": 0.01875,
"grad_norm": 2.7720863819122314,
"learning_rate": 1.8750000000000003e-06,
"loss": 0.5858,
"step": 6
},
{
"epoch": 0.021875,
"grad_norm": 3.0211267471313477,
"learning_rate": 2.1875000000000002e-06,
"loss": 0.6562,
"step": 7
},
{
"epoch": 0.025,
"grad_norm": 3.641108989715576,
"learning_rate": 2.5e-06,
"loss": 0.8228,
"step": 8
},
{
"epoch": 0.028125,
"grad_norm": 3.9061200618743896,
"learning_rate": 2.8125e-06,
"loss": 0.9988,
"step": 9
},
{
"epoch": 0.03125,
"grad_norm": 2.642423391342163,
"learning_rate": 3.125e-06,
"loss": 0.5582,
"step": 10
},
{
"epoch": 0.034375,
"grad_norm": 3.6546943187713623,
"learning_rate": 3.4375e-06,
"loss": 0.8546,
"step": 11
},
{
"epoch": 0.0375,
"grad_norm": 2.5504300594329834,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.4235,
"step": 12
},
{
"epoch": 0.040625,
"grad_norm": 2.845123529434204,
"learning_rate": 4.0625000000000005e-06,
"loss": 0.6418,
"step": 13
},
{
"epoch": 0.04375,
"grad_norm": 2.8562164306640625,
"learning_rate": 4.3750000000000005e-06,
"loss": 0.6577,
"step": 14
},
{
"epoch": 0.046875,
"grad_norm": 3.4033620357513428,
"learning_rate": 4.6875000000000004e-06,
"loss": 0.8333,
"step": 15
},
{
"epoch": 0.05,
"grad_norm": 2.148242473602295,
"learning_rate": 5e-06,
"loss": 0.4082,
"step": 16
},
{
"epoch": 0.053125,
"grad_norm": 3.685960292816162,
"learning_rate": 5.3125e-06,
"loss": 0.8101,
"step": 17
},
{
"epoch": 0.05625,
"grad_norm": 2.7071452140808105,
"learning_rate": 5.625e-06,
"loss": 0.5259,
"step": 18
},
{
"epoch": 0.059375,
"grad_norm": 3.508561611175537,
"learning_rate": 5.9375e-06,
"loss": 0.9015,
"step": 19
},
{
"epoch": 0.0625,
"grad_norm": 4.140976428985596,
"learning_rate": 6.25e-06,
"loss": 1.3915,
"step": 20
},
{
"epoch": 0.065625,
"grad_norm": 1.5563820600509644,
"learning_rate": 6.5625e-06,
"loss": 0.26,
"step": 21
},
{
"epoch": 0.06875,
"grad_norm": 3.1467344760894775,
"learning_rate": 6.875e-06,
"loss": 0.6885,
"step": 22
},
{
"epoch": 0.071875,
"grad_norm": 3.539327383041382,
"learning_rate": 7.1875e-06,
"loss": 0.9357,
"step": 23
},
{
"epoch": 0.075,
"grad_norm": 3.1691510677337646,
"learning_rate": 7.500000000000001e-06,
"loss": 0.7168,
"step": 24
},
{
"epoch": 0.078125,
"grad_norm": 3.9020121097564697,
"learning_rate": 7.8125e-06,
"loss": 0.8678,
"step": 25
},
{
"epoch": 0.08125,
"grad_norm": 2.3635435104370117,
"learning_rate": 8.125000000000001e-06,
"loss": 0.4922,
"step": 26
},
{
"epoch": 0.084375,
"grad_norm": 2.5170037746429443,
"learning_rate": 8.4375e-06,
"loss": 0.4937,
"step": 27
},
{
"epoch": 0.0875,
"grad_norm": 2.7988407611846924,
"learning_rate": 8.750000000000001e-06,
"loss": 0.5891,
"step": 28
},
{
"epoch": 0.090625,
"grad_norm": 2.99135160446167,
"learning_rate": 9.0625e-06,
"loss": 0.6921,
"step": 29
},
{
"epoch": 0.09375,
"grad_norm": 3.098013162612915,
"learning_rate": 9.375000000000001e-06,
"loss": 0.8087,
"step": 30
},
{
"epoch": 0.096875,
"grad_norm": 3.358091115951538,
"learning_rate": 9.6875e-06,
"loss": 0.805,
"step": 31
},
{
"epoch": 0.1,
"grad_norm": 3.0206046104431152,
"learning_rate": 1e-05,
"loss": 0.6141,
"step": 32
},
{
"epoch": 0.1,
"eval_VitaminC_cosine_accuracy": 0.5546875,
"eval_VitaminC_cosine_accuracy_threshold": 0.8487042188644409,
"eval_VitaminC_cosine_ap": 0.5467207830251657,
"eval_VitaminC_cosine_f1": 0.6657824933687002,
"eval_VitaminC_cosine_f1_threshold": 0.2510407269001007,
"eval_VitaminC_cosine_precision": 0.4990059642147117,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.55078125,
"eval_VitaminC_dot_accuracy_threshold": 318.7947082519531,
"eval_VitaminC_dot_ap": 0.5360598625078122,
"eval_VitaminC_dot_f1": 0.6657824933687002,
"eval_VitaminC_dot_f1_threshold": 98.82717895507812,
"eval_VitaminC_dot_precision": 0.4990059642147117,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.552734375,
"eval_VitaminC_euclidean_accuracy_threshold": 15.370981216430664,
"eval_VitaminC_euclidean_ap": 0.54465834495355,
"eval_VitaminC_euclidean_f1": 0.6657824933687002,
"eval_VitaminC_euclidean_f1_threshold": 24.364877700805664,
"eval_VitaminC_euclidean_precision": 0.4990059642147117,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.5546875,
"eval_VitaminC_manhattan_accuracy_threshold": 273.6689758300781,
"eval_VitaminC_manhattan_ap": 0.5450408710915566,
"eval_VitaminC_manhattan_f1": 0.6675531914893617,
"eval_VitaminC_manhattan_f1_threshold": 502.82244873046875,
"eval_VitaminC_manhattan_precision": 0.500998003992016,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.5546875,
"eval_VitaminC_max_accuracy_threshold": 318.7947082519531,
"eval_VitaminC_max_ap": 0.5467207830251657,
"eval_VitaminC_max_f1": 0.6675531914893617,
"eval_VitaminC_max_f1_threshold": 502.82244873046875,
"eval_VitaminC_max_precision": 0.500998003992016,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5467207830251657,
"eval_sts-test_pearson_cosine": 0.8677868917853514,
"eval_sts-test_pearson_dot": 0.8601917125112223,
"eval_sts-test_pearson_euclidean": 0.889472619726378,
"eval_sts-test_pearson_manhattan": 0.890143281884324,
"eval_sts-test_pearson_max": 0.890143281884324,
"eval_sts-test_spearman_cosine": 0.8954519734959775,
"eval_sts-test_spearman_dot": 0.8621348855070287,
"eval_sts-test_spearman_euclidean": 0.8880001748147683,
"eval_sts-test_spearman_manhattan": 0.8870461226731652,
"eval_sts-test_spearman_max": 0.8954519734959775,
"eval_vitaminc-pairs_loss": 2.332582473754883,
"eval_vitaminc-pairs_runtime": 2.2432,
"eval_vitaminc-pairs_samples_per_second": 48.146,
"eval_vitaminc-pairs_steps_per_second": 0.892,
"step": 32
},
{
"epoch": 0.1,
"eval_negation-triplets_loss": 0.8681236505508423,
"eval_negation-triplets_runtime": 0.2927,
"eval_negation-triplets_samples_per_second": 218.641,
"eval_negation-triplets_steps_per_second": 3.416,
"step": 32
},
{
"epoch": 0.1,
"eval_scitail-pairs-pos_loss": 0.07759770005941391,
"eval_scitail-pairs-pos_runtime": 0.3708,
"eval_scitail-pairs-pos_samples_per_second": 145.613,
"eval_scitail-pairs-pos_steps_per_second": 2.697,
"step": 32
},
{
"epoch": 0.1,
"eval_xsum-pairs_loss": 0.09131219983100891,
"eval_xsum-pairs_runtime": 2.8486,
"eval_xsum-pairs_samples_per_second": 44.934,
"eval_xsum-pairs_steps_per_second": 0.702,
"step": 32
},
{
"epoch": 0.1,
"eval_sciq_pairs_loss": 0.01965576782822609,
"eval_sciq_pairs_runtime": 3.6062,
"eval_sciq_pairs_samples_per_second": 35.494,
"eval_sciq_pairs_steps_per_second": 0.555,
"step": 32
},
{
"epoch": 0.1,
"eval_qasc_pairs_loss": 0.10996829718351364,
"eval_qasc_pairs_runtime": 0.5975,
"eval_qasc_pairs_samples_per_second": 214.235,
"eval_qasc_pairs_steps_per_second": 3.347,
"step": 32
},
{
"epoch": 0.1,
"eval_openbookqa_pairs_loss": 0.6932356953620911,
"eval_openbookqa_pairs_runtime": 0.5729,
"eval_openbookqa_pairs_samples_per_second": 223.415,
"eval_openbookqa_pairs_steps_per_second": 3.491,
"step": 32
},
{
"epoch": 0.1,
"eval_msmarco_pairs_loss": 0.32686129212379456,
"eval_msmarco_pairs_runtime": 1.4637,
"eval_msmarco_pairs_samples_per_second": 87.448,
"eval_msmarco_pairs_steps_per_second": 1.366,
"step": 32
},
{
"epoch": 0.1,
"eval_nq_pairs_loss": 0.1978442668914795,
"eval_nq_pairs_runtime": 2.8588,
"eval_nq_pairs_samples_per_second": 44.774,
"eval_nq_pairs_steps_per_second": 0.7,
"step": 32
},
{
"epoch": 0.1,
"eval_trivia_pairs_loss": 0.7432661652565002,
"eval_trivia_pairs_runtime": 4.3895,
"eval_trivia_pairs_samples_per_second": 29.16,
"eval_trivia_pairs_steps_per_second": 0.456,
"step": 32
},
{
"epoch": 0.1,
"eval_gooaq_pairs_loss": 0.3761173486709595,
"eval_gooaq_pairs_runtime": 1.0043,
"eval_gooaq_pairs_samples_per_second": 127.452,
"eval_gooaq_pairs_steps_per_second": 1.991,
"step": 32
},
{
"epoch": 0.1,
"eval_paws-pos_loss": 0.02476382441818714,
"eval_paws-pos_runtime": 0.6858,
"eval_paws-pos_samples_per_second": 186.635,
"eval_paws-pos_steps_per_second": 2.916,
"step": 32
},
{
"epoch": 0.103125,
"grad_norm": 3.51029109954834,
"learning_rate": 1.0312500000000002e-05,
"loss": 0.7783,
"step": 33
},
{
"epoch": 0.10625,
"grad_norm": 3.376455783843994,
"learning_rate": 1.0625e-05,
"loss": 0.8746,
"step": 34
},
{
"epoch": 0.109375,
"grad_norm": 2.7385308742523193,
"learning_rate": 1.0937500000000002e-05,
"loss": 0.5085,
"step": 35
},
{
"epoch": 0.1125,
"grad_norm": 2.782606840133667,
"learning_rate": 1.125e-05,
"loss": 0.4842,
"step": 36
},
{
"epoch": 0.115625,
"grad_norm": 3.4377782344818115,
"learning_rate": 1.1562500000000002e-05,
"loss": 0.8097,
"step": 37
},
{
"epoch": 0.11875,
"grad_norm": 2.6202378273010254,
"learning_rate": 1.1875e-05,
"loss": 0.5325,
"step": 38
},
{
"epoch": 0.121875,
"grad_norm": 3.0869128704071045,
"learning_rate": 1.2187500000000001e-05,
"loss": 0.7221,
"step": 39
},
{
"epoch": 0.125,
"grad_norm": 3.131516456604004,
"learning_rate": 1.25e-05,
"loss": 0.708,
"step": 40
},
{
"epoch": 0.128125,
"grad_norm": 2.0318033695220947,
"learning_rate": 1.2812500000000001e-05,
"loss": 0.2789,
"step": 41
},
{
"epoch": 0.13125,
"grad_norm": 3.2574217319488525,
"learning_rate": 1.3125e-05,
"loss": 0.7986,
"step": 42
},
{
"epoch": 0.134375,
"grad_norm": 3.6287729740142822,
"learning_rate": 1.3437500000000001e-05,
"loss": 0.9653,
"step": 43
},
{
"epoch": 0.1375,
"grad_norm": 3.1281752586364746,
"learning_rate": 1.375e-05,
"loss": 0.7857,
"step": 44
},
{
"epoch": 0.140625,
"grad_norm": 2.201566219329834,
"learning_rate": 1.4062500000000001e-05,
"loss": 0.2726,
"step": 45
},
{
"epoch": 0.14375,
"grad_norm": 1.8727688789367676,
"learning_rate": 1.4375e-05,
"loss": 0.2458,
"step": 46
},
{
"epoch": 0.146875,
"grad_norm": 3.156454086303711,
"learning_rate": 1.4687500000000001e-05,
"loss": 0.6988,
"step": 47
},
{
"epoch": 0.15,
"grad_norm": 3.0224971771240234,
"learning_rate": 1.5000000000000002e-05,
"loss": 0.6328,
"step": 48
},
{
"epoch": 0.153125,
"grad_norm": 3.4717319011688232,
"learning_rate": 1.5312500000000003e-05,
"loss": 0.795,
"step": 49
},
{
"epoch": 0.15625,
"grad_norm": 2.8961374759674072,
"learning_rate": 1.5625e-05,
"loss": 0.6163,
"step": 50
},
{
"epoch": 0.159375,
"grad_norm": 3.667778491973877,
"learning_rate": 1.59375e-05,
"loss": 0.8269,
"step": 51
},
{
"epoch": 0.1625,
"grad_norm": 2.350587844848633,
"learning_rate": 1.6250000000000002e-05,
"loss": 0.52,
"step": 52
},
{
"epoch": 0.165625,
"grad_norm": 3.312248468399048,
"learning_rate": 1.6562500000000003e-05,
"loss": 0.7523,
"step": 53
},
{
"epoch": 0.16875,
"grad_norm": 2.8101534843444824,
"learning_rate": 1.6875e-05,
"loss": 0.6979,
"step": 54
},
{
"epoch": 0.171875,
"grad_norm": 3.144334077835083,
"learning_rate": 1.71875e-05,
"loss": 0.7845,
"step": 55
},
{
"epoch": 0.175,
"grad_norm": 3.671412229537964,
"learning_rate": 1.7500000000000002e-05,
"loss": 0.9325,
"step": 56
},
{
"epoch": 0.178125,
"grad_norm": 3.204644203186035,
"learning_rate": 1.7812500000000003e-05,
"loss": 0.8546,
"step": 57
},
{
"epoch": 0.18125,
"grad_norm": 2.9951093196868896,
"learning_rate": 1.8125e-05,
"loss": 0.6392,
"step": 58
},
{
"epoch": 0.184375,
"grad_norm": 3.036386013031006,
"learning_rate": 1.84375e-05,
"loss": 0.5827,
"step": 59
},
{
"epoch": 0.1875,
"grad_norm": 3.0899698734283447,
"learning_rate": 1.8750000000000002e-05,
"loss": 0.5961,
"step": 60
},
{
"epoch": 0.190625,
"grad_norm": 2.3574728965759277,
"learning_rate": 1.9062500000000003e-05,
"loss": 0.3625,
"step": 61
},
{
"epoch": 0.19375,
"grad_norm": 2.4232304096221924,
"learning_rate": 1.9375e-05,
"loss": 0.2584,
"step": 62
},
{
"epoch": 0.196875,
"grad_norm": 1.9016233682632446,
"learning_rate": 1.96875e-05,
"loss": 0.4047,
"step": 63
},
{
"epoch": 0.2,
"grad_norm": 3.193114995956421,
"learning_rate": 2e-05,
"loss": 0.9429,
"step": 64
},
{
"epoch": 0.2,
"eval_VitaminC_cosine_accuracy": 0.560546875,
"eval_VitaminC_cosine_accuracy_threshold": 0.8192525506019592,
"eval_VitaminC_cosine_ap": 0.5485465805560719,
"eval_VitaminC_cosine_f1": 0.6675531914893617,
"eval_VitaminC_cosine_f1_threshold": 0.30620089173316956,
"eval_VitaminC_cosine_precision": 0.500998003992016,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.5546875,
"eval_VitaminC_dot_accuracy_threshold": 308.60137939453125,
"eval_VitaminC_dot_ap": 0.5375184580780159,
"eval_VitaminC_dot_f1": 0.6657824933687002,
"eval_VitaminC_dot_f1_threshold": 97.275634765625,
"eval_VitaminC_dot_precision": 0.4990059642147117,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.552734375,
"eval_VitaminC_euclidean_accuracy_threshold": 11.976862907409668,
"eval_VitaminC_euclidean_ap": 0.5494925067012235,
"eval_VitaminC_euclidean_f1": 0.6666666666666666,
"eval_VitaminC_euclidean_f1_threshold": 23.21343994140625,
"eval_VitaminC_euclidean_precision": 0.5,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.552734375,
"eval_VitaminC_manhattan_accuracy_threshold": 313.34185791015625,
"eval_VitaminC_manhattan_ap": 0.5475158315491966,
"eval_VitaminC_manhattan_f1": 0.6666666666666666,
"eval_VitaminC_manhattan_f1_threshold": 495.06231689453125,
"eval_VitaminC_manhattan_precision": 0.5,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.560546875,
"eval_VitaminC_max_accuracy_threshold": 313.34185791015625,
"eval_VitaminC_max_ap": 0.5494925067012235,
"eval_VitaminC_max_f1": 0.6675531914893617,
"eval_VitaminC_max_f1_threshold": 495.06231689453125,
"eval_VitaminC_max_precision": 0.500998003992016,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5494925067012235,
"eval_sts-test_pearson_cosine": 0.8681028367252808,
"eval_sts-test_pearson_dot": 0.8578643818026934,
"eval_sts-test_pearson_euclidean": 0.8913506886125709,
"eval_sts-test_pearson_manhattan": 0.8922209656727235,
"eval_sts-test_pearson_max": 0.8922209656727235,
"eval_sts-test_spearman_cosine": 0.8960442588011338,
"eval_sts-test_spearman_dot": 0.8606696844578128,
"eval_sts-test_spearman_euclidean": 0.8895474944286376,
"eval_sts-test_spearman_manhattan": 0.8895341585527426,
"eval_sts-test_spearman_max": 0.8960442588011338,
"eval_vitaminc-pairs_loss": 2.260099411010742,
"eval_vitaminc-pairs_runtime": 1.8392,
"eval_vitaminc-pairs_samples_per_second": 58.723,
"eval_vitaminc-pairs_steps_per_second": 1.087,
"step": 64
},
{
"epoch": 0.2,
"eval_negation-triplets_loss": 0.836820662021637,
"eval_negation-triplets_runtime": 0.294,
"eval_negation-triplets_samples_per_second": 217.7,
"eval_negation-triplets_steps_per_second": 3.402,
"step": 64
},
{
"epoch": 0.2,
"eval_scitail-pairs-pos_loss": 0.08362159878015518,
"eval_scitail-pairs-pos_runtime": 0.3686,
"eval_scitail-pairs-pos_samples_per_second": 146.509,
"eval_scitail-pairs-pos_steps_per_second": 2.713,
"step": 64
},
{
"epoch": 0.2,
"eval_xsum-pairs_loss": 0.08567425608634949,
"eval_xsum-pairs_runtime": 2.8489,
"eval_xsum-pairs_samples_per_second": 44.93,
"eval_xsum-pairs_steps_per_second": 0.702,
"step": 64
},
{
"epoch": 0.2,
"eval_sciq_pairs_loss": 0.019713517278432846,
"eval_sciq_pairs_runtime": 3.616,
"eval_sciq_pairs_samples_per_second": 35.399,
"eval_sciq_pairs_steps_per_second": 0.553,
"step": 64
},
{
"epoch": 0.2,
"eval_qasc_pairs_loss": 0.11403815448284149,
"eval_qasc_pairs_runtime": 0.6024,
"eval_qasc_pairs_samples_per_second": 212.48,
"eval_qasc_pairs_steps_per_second": 3.32,
"step": 64
},
{
"epoch": 0.2,
"eval_openbookqa_pairs_loss": 0.6793034076690674,
"eval_openbookqa_pairs_runtime": 0.5864,
"eval_openbookqa_pairs_samples_per_second": 218.266,
"eval_openbookqa_pairs_steps_per_second": 3.41,
"step": 64
},
{
"epoch": 0.2,
"eval_msmarco_pairs_loss": 0.34600257873535156,
"eval_msmarco_pairs_runtime": 1.4668,
"eval_msmarco_pairs_samples_per_second": 87.263,
"eval_msmarco_pairs_steps_per_second": 1.363,
"step": 64
},
{
"epoch": 0.2,
"eval_nq_pairs_loss": 0.22141708433628082,
"eval_nq_pairs_runtime": 2.8596,
"eval_nq_pairs_samples_per_second": 44.761,
"eval_nq_pairs_steps_per_second": 0.699,
"step": 64
},
{
"epoch": 0.2,
"eval_trivia_pairs_loss": 0.7303681969642639,
"eval_trivia_pairs_runtime": 4.3864,
"eval_trivia_pairs_samples_per_second": 29.181,
"eval_trivia_pairs_steps_per_second": 0.456,
"step": 64
},
{
"epoch": 0.2,
"eval_gooaq_pairs_loss": 0.38013964891433716,
"eval_gooaq_pairs_runtime": 1.0052,
"eval_gooaq_pairs_samples_per_second": 127.34,
"eval_gooaq_pairs_steps_per_second": 1.99,
"step": 64
},
{
"epoch": 0.2,
"eval_paws-pos_loss": 0.024541139602661133,
"eval_paws-pos_runtime": 0.6851,
"eval_paws-pos_samples_per_second": 186.844,
"eval_paws-pos_steps_per_second": 2.919,
"step": 64
},
{
"epoch": 0.203125,
"grad_norm": 3.5084540843963623,
"learning_rate": 2.0312500000000002e-05,
"loss": 0.7848,
"step": 65
},
{
"epoch": 0.20625,
"grad_norm": 3.749316453933716,
"learning_rate": 2.0625000000000003e-05,
"loss": 0.7589,
"step": 66
},
{
"epoch": 0.209375,
"grad_norm": 3.4131276607513428,
"learning_rate": 2.09375e-05,
"loss": 0.5905,
"step": 67
},
{
"epoch": 0.2125,
"grad_norm": 2.4543726444244385,
"learning_rate": 2.125e-05,
"loss": 0.4211,
"step": 68
},
{
"epoch": 0.215625,
"grad_norm": 2.6270904541015625,
"learning_rate": 2.1562500000000002e-05,
"loss": 0.5325,
"step": 69
},
{
"epoch": 0.21875,
"grad_norm": 2.2518444061279297,
"learning_rate": 2.1875000000000003e-05,
"loss": 0.3541,
"step": 70
},
{
"epoch": 0.221875,
"grad_norm": 3.88729190826416,
"learning_rate": 2.21875e-05,
"loss": 0.9396,
"step": 71
},
{
"epoch": 0.225,
"grad_norm": 3.2759203910827637,
"learning_rate": 2.25e-05,
"loss": 0.6997,
"step": 72
},
{
"epoch": 0.228125,
"grad_norm": 3.149787425994873,
"learning_rate": 2.2812500000000002e-05,
"loss": 0.6415,
"step": 73
},
{
"epoch": 0.23125,
"grad_norm": 4.01395845413208,
"learning_rate": 2.3125000000000003e-05,
"loss": 1.1966,
"step": 74
},
{
"epoch": 0.234375,
"grad_norm": 3.0432724952697754,
"learning_rate": 2.34375e-05,
"loss": 0.7142,
"step": 75
},
{
"epoch": 0.2375,
"grad_norm": 2.960078716278076,
"learning_rate": 2.375e-05,
"loss": 0.6048,
"step": 76
},
{
"epoch": 0.240625,
"grad_norm": 2.414846658706665,
"learning_rate": 2.4062500000000002e-05,
"loss": 0.4639,
"step": 77
},
{
"epoch": 0.24375,
"grad_norm": 4.241907119750977,
"learning_rate": 2.4375000000000003e-05,
"loss": 0.9391,
"step": 78
},
{
"epoch": 0.246875,
"grad_norm": 3.350724220275879,
"learning_rate": 2.46875e-05,
"loss": 0.6364,
"step": 79
},
{
"epoch": 0.25,
"grad_norm": 2.519324541091919,
"learning_rate": 2.5e-05,
"loss": 0.515,
"step": 80
},
{
"epoch": 0.253125,
"grad_norm": 3.655949592590332,
"learning_rate": 2.5312500000000002e-05,
"loss": 0.6505,
"step": 81
},
{
"epoch": 0.25625,
"grad_norm": 3.1521031856536865,
"learning_rate": 2.5625000000000003e-05,
"loss": 0.6149,
"step": 82
},
{
"epoch": 0.259375,
"grad_norm": 2.637176036834717,
"learning_rate": 2.5937500000000004e-05,
"loss": 0.4471,
"step": 83
},
{
"epoch": 0.2625,
"grad_norm": 4.223080158233643,
"learning_rate": 2.625e-05,
"loss": 1.4199,
"step": 84
},
{
"epoch": 0.265625,
"grad_norm": 3.141789436340332,
"learning_rate": 2.6562500000000002e-05,
"loss": 0.8484,
"step": 85
},
{
"epoch": 0.26875,
"grad_norm": 3.2342255115509033,
"learning_rate": 2.6875000000000003e-05,
"loss": 0.6412,
"step": 86
},
{
"epoch": 0.271875,
"grad_norm": 3.445375442504883,
"learning_rate": 2.7187500000000004e-05,
"loss": 0.65,
"step": 87
},
{
"epoch": 0.275,
"grad_norm": 3.395848035812378,
"learning_rate": 2.75e-05,
"loss": 0.7453,
"step": 88
},
{
"epoch": 0.278125,
"grad_norm": 3.752084493637085,
"learning_rate": 2.7812500000000002e-05,
"loss": 0.9506,
"step": 89
},
{
"epoch": 0.28125,
"grad_norm": 3.2424893379211426,
"learning_rate": 2.8125000000000003e-05,
"loss": 0.6083,
"step": 90
},
{
"epoch": 0.284375,
"grad_norm": 2.8851892948150635,
"learning_rate": 2.8437500000000003e-05,
"loss": 0.7102,
"step": 91
},
{
"epoch": 0.2875,
"grad_norm": 2.385157823562622,
"learning_rate": 2.875e-05,
"loss": 0.4037,
"step": 92
},
{
"epoch": 0.290625,
"grad_norm": 3.5539441108703613,
"learning_rate": 2.90625e-05,
"loss": 0.769,
"step": 93
},
{
"epoch": 0.29375,
"grad_norm": 3.686418056488037,
"learning_rate": 2.9375000000000003e-05,
"loss": 0.8765,
"step": 94
},
{
"epoch": 0.296875,
"grad_norm": 3.9195055961608887,
"learning_rate": 2.9687500000000003e-05,
"loss": 1.2583,
"step": 95
},
{
"epoch": 0.3,
"grad_norm": 3.5373759269714355,
"learning_rate": 3.0000000000000004e-05,
"loss": 0.8885,
"step": 96
},
{
"epoch": 0.3,
"eval_VitaminC_cosine_accuracy": 0.55859375,
"eval_VitaminC_cosine_accuracy_threshold": 0.8407348990440369,
"eval_VitaminC_cosine_ap": 0.5524635737287826,
"eval_VitaminC_cosine_f1": 0.6666666666666666,
"eval_VitaminC_cosine_f1_threshold": 0.2901695668697357,
"eval_VitaminC_cosine_precision": 0.5,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.5546875,
"eval_VitaminC_dot_accuracy_threshold": 331.7409973144531,
"eval_VitaminC_dot_ap": 0.5393192469559877,
"eval_VitaminC_dot_f1": 0.6657824933687002,
"eval_VitaminC_dot_f1_threshold": 104.93923950195312,
"eval_VitaminC_dot_precision": 0.4990059642147117,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.5546875,
"eval_VitaminC_euclidean_accuracy_threshold": 10.979323387145996,
"eval_VitaminC_euclidean_ap": 0.5510789245842218,
"eval_VitaminC_euclidean_f1": 0.6666666666666666,
"eval_VitaminC_euclidean_f1_threshold": 23.105466842651367,
"eval_VitaminC_euclidean_precision": 0.5,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.55078125,
"eval_VitaminC_manhattan_accuracy_threshold": 228.8612060546875,
"eval_VitaminC_manhattan_ap": 0.550140326019901,
"eval_VitaminC_manhattan_f1": 0.6666666666666667,
"eval_VitaminC_manhattan_f1_threshold": 479.256103515625,
"eval_VitaminC_manhattan_precision": 0.501002004008016,
"eval_VitaminC_manhattan_recall": 0.9960159362549801,
"eval_VitaminC_max_accuracy": 0.55859375,
"eval_VitaminC_max_accuracy_threshold": 331.7409973144531,
"eval_VitaminC_max_ap": 0.5524635737287826,
"eval_VitaminC_max_f1": 0.6666666666666667,
"eval_VitaminC_max_f1_threshold": 479.256103515625,
"eval_VitaminC_max_precision": 0.501002004008016,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5524635737287826,
"eval_sts-test_pearson_cosine": 0.8707252459918289,
"eval_sts-test_pearson_dot": 0.8616721319399807,
"eval_sts-test_pearson_euclidean": 0.8926205493906139,
"eval_sts-test_pearson_manhattan": 0.8931067612799872,
"eval_sts-test_pearson_max": 0.8931067612799872,
"eval_sts-test_spearman_cosine": 0.8969095691913977,
"eval_sts-test_spearman_dot": 0.8614390033923923,
"eval_sts-test_spearman_euclidean": 0.8906887410966409,
"eval_sts-test_spearman_manhattan": 0.8902939007173846,
"eval_sts-test_spearman_max": 0.8969095691913977,
"eval_vitaminc-pairs_loss": 2.259434938430786,
"eval_vitaminc-pairs_runtime": 1.8587,
"eval_vitaminc-pairs_samples_per_second": 58.104,
"eval_vitaminc-pairs_steps_per_second": 1.076,
"step": 96
},
{
"epoch": 0.3,
"eval_negation-triplets_loss": 0.8346852660179138,
"eval_negation-triplets_runtime": 0.2932,
"eval_negation-triplets_samples_per_second": 218.315,
"eval_negation-triplets_steps_per_second": 3.411,
"step": 96
},
{
"epoch": 0.3,
"eval_scitail-pairs-pos_loss": 0.07568605989217758,
"eval_scitail-pairs-pos_runtime": 0.3763,
"eval_scitail-pairs-pos_samples_per_second": 143.494,
"eval_scitail-pairs-pos_steps_per_second": 2.657,
"step": 96
},
{
"epoch": 0.3,
"eval_xsum-pairs_loss": 0.08208194375038147,
"eval_xsum-pairs_runtime": 2.8486,
"eval_xsum-pairs_samples_per_second": 44.934,
"eval_xsum-pairs_steps_per_second": 0.702,
"step": 96
},
{
"epoch": 0.3,
"eval_sciq_pairs_loss": 0.020024314522743225,
"eval_sciq_pairs_runtime": 3.6173,
"eval_sciq_pairs_samples_per_second": 35.386,
"eval_sciq_pairs_steps_per_second": 0.553,
"step": 96
},
{
"epoch": 0.3,
"eval_qasc_pairs_loss": 0.10592304170131683,
"eval_qasc_pairs_runtime": 0.5997,
"eval_qasc_pairs_samples_per_second": 213.431,
"eval_qasc_pairs_steps_per_second": 3.335,
"step": 96
},
{
"epoch": 0.3,
"eval_openbookqa_pairs_loss": 0.6809090971946716,
"eval_openbookqa_pairs_runtime": 0.5752,
"eval_openbookqa_pairs_samples_per_second": 222.54,
"eval_openbookqa_pairs_steps_per_second": 3.477,
"step": 96
},
{
"epoch": 0.3,
"eval_msmarco_pairs_loss": 0.3400232195854187,
"eval_msmarco_pairs_runtime": 1.4679,
"eval_msmarco_pairs_samples_per_second": 87.202,
"eval_msmarco_pairs_steps_per_second": 1.363,
"step": 96
},
{
"epoch": 0.3,
"eval_nq_pairs_loss": 0.2074178159236908,
"eval_nq_pairs_runtime": 2.8593,
"eval_nq_pairs_samples_per_second": 44.766,
"eval_nq_pairs_steps_per_second": 0.699,
"step": 96
},
{
"epoch": 0.3,
"eval_trivia_pairs_loss": 0.7431399822235107,
"eval_trivia_pairs_runtime": 4.4162,
"eval_trivia_pairs_samples_per_second": 28.984,
"eval_trivia_pairs_steps_per_second": 0.453,
"step": 96
},
{
"epoch": 0.3,
"eval_gooaq_pairs_loss": 0.3708875775337219,
"eval_gooaq_pairs_runtime": 1.0094,
"eval_gooaq_pairs_samples_per_second": 126.81,
"eval_gooaq_pairs_steps_per_second": 1.981,
"step": 96
},
{
"epoch": 0.3,
"eval_paws-pos_loss": 0.024763749912381172,
"eval_paws-pos_runtime": 0.6874,
"eval_paws-pos_samples_per_second": 186.212,
"eval_paws-pos_steps_per_second": 2.91,
"step": 96
},
{
"epoch": 0.303125,
"grad_norm": 3.2354822158813477,
"learning_rate": 3.03125e-05,
"loss": 0.6398,
"step": 97
},
{
"epoch": 0.30625,
"grad_norm": 3.6665022373199463,
"learning_rate": 3.0625000000000006e-05,
"loss": 0.8263,
"step": 98
},
{
"epoch": 0.309375,
"grad_norm": 3.026954412460327,
"learning_rate": 3.09375e-05,
"loss": 0.8716,
"step": 99
},
{
"epoch": 0.3125,
"grad_norm": 2.445453643798828,
"learning_rate": 3.125e-05,
"loss": 0.5523,
"step": 100
},
{
"epoch": 0.315625,
"grad_norm": 3.4408035278320312,
"learning_rate": 3.15625e-05,
"loss": 0.5811,
"step": 101
},
{
"epoch": 0.31875,
"grad_norm": 2.8406240940093994,
"learning_rate": 3.1875e-05,
"loss": 0.7602,
"step": 102
},
{
"epoch": 0.321875,
"grad_norm": 2.5201492309570312,
"learning_rate": 3.21875e-05,
"loss": 0.5337,
"step": 103
},
{
"epoch": 0.325,
"grad_norm": 3.323239326477051,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.8182,
"step": 104
},
{
"epoch": 0.328125,
"grad_norm": 3.2463977336883545,
"learning_rate": 3.2812500000000005e-05,
"loss": 0.6641,
"step": 105
},
{
"epoch": 0.33125,
"grad_norm": 3.4495010375976562,
"learning_rate": 3.3125000000000006e-05,
"loss": 1.0088,
"step": 106
},
{
"epoch": 0.334375,
"grad_norm": 2.7572243213653564,
"learning_rate": 3.34375e-05,
"loss": 0.7556,
"step": 107
},
{
"epoch": 0.3375,
"grad_norm": 3.494549512863159,
"learning_rate": 3.375e-05,
"loss": 0.713,
"step": 108
},
{
"epoch": 0.340625,
"grad_norm": 3.4666013717651367,
"learning_rate": 3.40625e-05,
"loss": 0.8385,
"step": 109
},
{
"epoch": 0.34375,
"grad_norm": 3.05104660987854,
"learning_rate": 3.4375e-05,
"loss": 0.5181,
"step": 110
},
{
"epoch": 0.346875,
"grad_norm": 3.8259003162384033,
"learning_rate": 3.46875e-05,
"loss": 1.0939,
"step": 111
},
{
"epoch": 0.35,
"grad_norm": 3.287792205810547,
"learning_rate": 3.5000000000000004e-05,
"loss": 0.5826,
"step": 112
},
{
"epoch": 0.353125,
"grad_norm": 3.9174458980560303,
"learning_rate": 3.5312500000000005e-05,
"loss": 0.7121,
"step": 113
},
{
"epoch": 0.35625,
"grad_norm": 3.424893379211426,
"learning_rate": 3.5625000000000005e-05,
"loss": 0.9371,
"step": 114
},
{
"epoch": 0.359375,
"grad_norm": 3.5157482624053955,
"learning_rate": 3.5937500000000006e-05,
"loss": 0.7739,
"step": 115
},
{
"epoch": 0.3625,
"grad_norm": 4.468640327453613,
"learning_rate": 3.625e-05,
"loss": 0.9612,
"step": 116
},
{
"epoch": 0.365625,
"grad_norm": 3.4379608631134033,
"learning_rate": 3.65625e-05,
"loss": 0.7213,
"step": 117
},
{
"epoch": 0.36875,
"grad_norm": 2.9453623294830322,
"learning_rate": 3.6875e-05,
"loss": 0.621,
"step": 118
},
{
"epoch": 0.371875,
"grad_norm": 2.4365315437316895,
"learning_rate": 3.71875e-05,
"loss": 0.5503,
"step": 119
},
{
"epoch": 0.375,
"grad_norm": 3.446967124938965,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.8439,
"step": 120
},
{
"epoch": 0.378125,
"grad_norm": 3.8797788619995117,
"learning_rate": 3.7812500000000004e-05,
"loss": 0.7813,
"step": 121
},
{
"epoch": 0.38125,
"grad_norm": 3.0103230476379395,
"learning_rate": 3.8125000000000005e-05,
"loss": 0.5637,
"step": 122
},
{
"epoch": 0.384375,
"grad_norm": 3.9547793865203857,
"learning_rate": 3.8437500000000006e-05,
"loss": 0.9052,
"step": 123
},
{
"epoch": 0.3875,
"grad_norm": 2.953261375427246,
"learning_rate": 3.875e-05,
"loss": 0.64,
"step": 124
},
{
"epoch": 0.390625,
"grad_norm": 2.914365768432617,
"learning_rate": 3.90625e-05,
"loss": 0.6529,
"step": 125
},
{
"epoch": 0.39375,
"grad_norm": 3.346844434738159,
"learning_rate": 3.9375e-05,
"loss": 0.6894,
"step": 126
},
{
"epoch": 0.396875,
"grad_norm": 3.946427583694458,
"learning_rate": 3.96875e-05,
"loss": 0.8604,
"step": 127
},
{
"epoch": 0.4,
"grad_norm": 3.3265583515167236,
"learning_rate": 4e-05,
"loss": 0.8503,
"step": 128
},
{
"epoch": 0.4,
"eval_VitaminC_cosine_accuracy": 0.55859375,
"eval_VitaminC_cosine_accuracy_threshold": 0.835027813911438,
"eval_VitaminC_cosine_ap": 0.5482054260732142,
"eval_VitaminC_cosine_f1": 0.6666666666666666,
"eval_VitaminC_cosine_f1_threshold": 0.28428012132644653,
"eval_VitaminC_cosine_precision": 0.5,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.548828125,
"eval_VitaminC_dot_accuracy_threshold": 321.1236572265625,
"eval_VitaminC_dot_ap": 0.5350248143918641,
"eval_VitaminC_dot_f1": 0.6649006622516557,
"eval_VitaminC_dot_f1_threshold": 94.1016616821289,
"eval_VitaminC_dot_precision": 0.498015873015873,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.55859375,
"eval_VitaminC_euclidean_accuracy_threshold": 13.260427474975586,
"eval_VitaminC_euclidean_ap": 0.551773706587656,
"eval_VitaminC_euclidean_f1": 0.6657824933687002,
"eval_VitaminC_euclidean_f1_threshold": 23.911056518554688,
"eval_VitaminC_euclidean_precision": 0.4990059642147117,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.55859375,
"eval_VitaminC_manhattan_accuracy_threshold": 273.4624328613281,
"eval_VitaminC_manhattan_ap": 0.5494410762635437,
"eval_VitaminC_manhattan_f1": 0.6666666666666667,
"eval_VitaminC_manhattan_f1_threshold": 472.7373046875,
"eval_VitaminC_manhattan_precision": 0.5020161290322581,
"eval_VitaminC_manhattan_recall": 0.9920318725099602,
"eval_VitaminC_max_accuracy": 0.55859375,
"eval_VitaminC_max_accuracy_threshold": 321.1236572265625,
"eval_VitaminC_max_ap": 0.551773706587656,
"eval_VitaminC_max_f1": 0.6666666666666667,
"eval_VitaminC_max_f1_threshold": 472.7373046875,
"eval_VitaminC_max_precision": 0.5020161290322581,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.551773706587656,
"eval_sts-test_pearson_cosine": 0.8672675483925697,
"eval_sts-test_pearson_dot": 0.8586110849200466,
"eval_sts-test_pearson_euclidean": 0.8915515585715386,
"eval_sts-test_pearson_manhattan": 0.8913674606593633,
"eval_sts-test_pearson_max": 0.8915515585715386,
"eval_sts-test_spearman_cosine": 0.8969123885208655,
"eval_sts-test_spearman_dot": 0.8619306407500383,
"eval_sts-test_spearman_euclidean": 0.8903670690297594,
"eval_sts-test_spearman_manhattan": 0.890351227083227,
"eval_sts-test_spearman_max": 0.8969123885208655,
"eval_vitaminc-pairs_loss": 2.0338199138641357,
"eval_vitaminc-pairs_runtime": 1.8309,
"eval_vitaminc-pairs_samples_per_second": 58.988,
"eval_vitaminc-pairs_steps_per_second": 1.092,
"step": 128
},
{
"epoch": 0.4,
"eval_negation-triplets_loss": 0.7916581630706787,
"eval_negation-triplets_runtime": 0.2912,
"eval_negation-triplets_samples_per_second": 219.766,
"eval_negation-triplets_steps_per_second": 3.434,
"step": 128
},
{
"epoch": 0.4,
"eval_scitail-pairs-pos_loss": 0.07755717635154724,
"eval_scitail-pairs-pos_runtime": 0.3716,
"eval_scitail-pairs-pos_samples_per_second": 145.312,
"eval_scitail-pairs-pos_steps_per_second": 2.691,
"step": 128
},
{
"epoch": 0.4,
"eval_xsum-pairs_loss": 0.08196285367012024,
"eval_xsum-pairs_runtime": 2.852,
"eval_xsum-pairs_samples_per_second": 44.881,
"eval_xsum-pairs_steps_per_second": 0.701,
"step": 128
},
{
"epoch": 0.4,
"eval_sciq_pairs_loss": 0.020960956811904907,
"eval_sciq_pairs_runtime": 3.5913,
"eval_sciq_pairs_samples_per_second": 35.642,
"eval_sciq_pairs_steps_per_second": 0.557,
"step": 128
},
{
"epoch": 0.4,
"eval_qasc_pairs_loss": 0.11308694630861282,
"eval_qasc_pairs_runtime": 0.595,
"eval_qasc_pairs_samples_per_second": 215.137,
"eval_qasc_pairs_steps_per_second": 3.362,
"step": 128
},
{
"epoch": 0.4,
"eval_openbookqa_pairs_loss": 0.7888042330741882,
"eval_openbookqa_pairs_runtime": 0.5711,
"eval_openbookqa_pairs_samples_per_second": 224.114,
"eval_openbookqa_pairs_steps_per_second": 3.502,
"step": 128
},
{
"epoch": 0.4,
"eval_msmarco_pairs_loss": 0.3428971469402313,
"eval_msmarco_pairs_runtime": 1.465,
"eval_msmarco_pairs_samples_per_second": 87.373,
"eval_msmarco_pairs_steps_per_second": 1.365,
"step": 128
},
{
"epoch": 0.4,
"eval_nq_pairs_loss": 0.20846250653266907,
"eval_nq_pairs_runtime": 2.8581,
"eval_nq_pairs_samples_per_second": 44.786,
"eval_nq_pairs_steps_per_second": 0.7,
"step": 128
},
{
"epoch": 0.4,
"eval_trivia_pairs_loss": 0.7110738754272461,
"eval_trivia_pairs_runtime": 4.3917,
"eval_trivia_pairs_samples_per_second": 29.146,
"eval_trivia_pairs_steps_per_second": 0.455,
"step": 128
},
{
"epoch": 0.4,
"eval_gooaq_pairs_loss": 0.3744402229785919,
"eval_gooaq_pairs_runtime": 1.0043,
"eval_gooaq_pairs_samples_per_second": 127.448,
"eval_gooaq_pairs_steps_per_second": 1.991,
"step": 128
},
{
"epoch": 0.4,
"eval_paws-pos_loss": 0.024828137829899788,
"eval_paws-pos_runtime": 0.6859,
"eval_paws-pos_samples_per_second": 186.611,
"eval_paws-pos_steps_per_second": 2.916,
"step": 128
},
{
"epoch": 0.403125,
"grad_norm": 3.7963619232177734,
"learning_rate": 3.999971762923902e-05,
"loss": 0.8171,
"step": 129
},
{
"epoch": 0.40625,
"grad_norm": 3.987645387649536,
"learning_rate": 3.999887052758717e-05,
"loss": 1.0401,
"step": 130
},
{
"epoch": 0.409375,
"grad_norm": 2.653578758239746,
"learning_rate": 3.999745872693735e-05,
"loss": 0.4243,
"step": 131
},
{
"epoch": 0.4125,
"grad_norm": 2.3737175464630127,
"learning_rate": 3.9995482280443065e-05,
"loss": 0.3778,
"step": 132
},
{
"epoch": 0.415625,
"grad_norm": 3.334118127822876,
"learning_rate": 3.99929412625164e-05,
"loss": 0.7651,
"step": 133
},
{
"epoch": 0.41875,
"grad_norm": 3.5098752975463867,
"learning_rate": 3.998983576882524e-05,
"loss": 0.6003,
"step": 134
},
{
"epoch": 0.421875,
"grad_norm": 3.023698091506958,
"learning_rate": 3.9986165916289686e-05,
"loss": 0.6023,
"step": 135
},
{
"epoch": 0.425,
"grad_norm": 3.293668746948242,
"learning_rate": 3.998193184307759e-05,
"loss": 0.6079,
"step": 136
},
{
"epoch": 0.428125,
"grad_norm": 3.326125144958496,
"learning_rate": 3.997713370859942e-05,
"loss": 0.6206,
"step": 137
},
{
"epoch": 0.43125,
"grad_norm": 3.322040557861328,
"learning_rate": 3.997177169350224e-05,
"loss": 0.4694,
"step": 138
},
{
"epoch": 0.434375,
"grad_norm": 3.1219382286071777,
"learning_rate": 3.996584599966288e-05,
"loss": 0.7528,
"step": 139
},
{
"epoch": 0.4375,
"grad_norm": 3.7076480388641357,
"learning_rate": 3.9959356850180354e-05,
"loss": 0.8395,
"step": 140
},
{
"epoch": 0.440625,
"grad_norm": 3.1098551750183105,
"learning_rate": 3.995230448936749e-05,
"loss": 0.6689,
"step": 141
},
{
"epoch": 0.44375,
"grad_norm": 3.31339168548584,
"learning_rate": 3.9944689182741674e-05,
"loss": 0.6547,
"step": 142
},
{
"epoch": 0.446875,
"grad_norm": 4.2841386795043945,
"learning_rate": 3.99365112170149e-05,
"loss": 0.9242,
"step": 143
},
{
"epoch": 0.45,
"grad_norm": 4.0628132820129395,
"learning_rate": 3.992777090008296e-05,
"loss": 0.9496,
"step": 144
},
{
"epoch": 0.453125,
"grad_norm": 3.484614849090576,
"learning_rate": 3.9918468561013834e-05,
"loss": 0.6506,
"step": 145
},
{
"epoch": 0.45625,
"grad_norm": 3.4139559268951416,
"learning_rate": 3.990860455003534e-05,
"loss": 0.786,
"step": 146
},
{
"epoch": 0.459375,
"grad_norm": 3.4322853088378906,
"learning_rate": 3.9898179238521916e-05,
"loss": 0.7414,
"step": 147
},
{
"epoch": 0.4625,
"grad_norm": 2.660554885864258,
"learning_rate": 3.9887193018980654e-05,
"loss": 0.3978,
"step": 148
},
{
"epoch": 0.465625,
"grad_norm": 2.6429054737091064,
"learning_rate": 3.9875646305036494e-05,
"loss": 0.5635,
"step": 149
},
{
"epoch": 0.46875,
"grad_norm": 4.292131423950195,
"learning_rate": 3.98635395314167e-05,
"loss": 0.9466,
"step": 150
},
{
"epoch": 0.471875,
"grad_norm": 3.1115028858184814,
"learning_rate": 3.9850873153934456e-05,
"loss": 0.5251,
"step": 151
},
{
"epoch": 0.475,
"grad_norm": 3.307051181793213,
"learning_rate": 3.983764764947172e-05,
"loss": 0.6636,
"step": 152
},
{
"epoch": 0.478125,
"grad_norm": 3.807854652404785,
"learning_rate": 3.9823863515961245e-05,
"loss": 0.7834,
"step": 153
},
{
"epoch": 0.48125,
"grad_norm": 2.9957728385925293,
"learning_rate": 3.980952127236788e-05,
"loss": 0.6177,
"step": 154
},
{
"epoch": 0.484375,
"grad_norm": 3.3072471618652344,
"learning_rate": 3.979462145866898e-05,
"loss": 0.4558,
"step": 155
},
{
"epoch": 0.4875,
"grad_norm": 3.0199949741363525,
"learning_rate": 3.977916463583412e-05,
"loss": 0.5228,
"step": 156
},
{
"epoch": 0.490625,
"grad_norm": 2.8596651554107666,
"learning_rate": 3.9763151385803936e-05,
"loss": 0.5543,
"step": 157
},
{
"epoch": 0.49375,
"grad_norm": 3.0589263439178467,
"learning_rate": 3.974658231146825e-05,
"loss": 0.7127,
"step": 158
},
{
"epoch": 0.496875,
"grad_norm": 2.489602565765381,
"learning_rate": 3.9729458036643335e-05,
"loss": 0.4227,
"step": 159
},
{
"epoch": 0.5,
"grad_norm": 3.3471999168395996,
"learning_rate": 3.971177920604846e-05,
"loss": 0.5914,
"step": 160
},
{
"epoch": 0.5,
"eval_VitaminC_cosine_accuracy": 0.55859375,
"eval_VitaminC_cosine_accuracy_threshold": 0.8433390855789185,
"eval_VitaminC_cosine_ap": 0.5529005025024077,
"eval_VitaminC_cosine_f1": 0.6657824933687002,
"eval_VitaminC_cosine_f1_threshold": 0.3040446639060974,
"eval_VitaminC_cosine_precision": 0.4990059642147117,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.55859375,
"eval_VitaminC_dot_accuracy_threshold": 309.7912902832031,
"eval_VitaminC_dot_ap": 0.5373200658982779,
"eval_VitaminC_dot_f1": 0.6666666666666666,
"eval_VitaminC_dot_f1_threshold": 122.78400421142578,
"eval_VitaminC_dot_precision": 0.5,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.5546875,
"eval_VitaminC_euclidean_accuracy_threshold": 11.011507034301758,
"eval_VitaminC_euclidean_ap": 0.5542686405562732,
"eval_VitaminC_euclidean_f1": 0.6675531914893617,
"eval_VitaminC_euclidean_f1_threshold": 22.90133285522461,
"eval_VitaminC_euclidean_precision": 0.500998003992016,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.55859375,
"eval_VitaminC_manhattan_accuracy_threshold": 293.54693603515625,
"eval_VitaminC_manhattan_ap": 0.5529507613553954,
"eval_VitaminC_manhattan_f1": 0.6666666666666667,
"eval_VitaminC_manhattan_f1_threshold": 479.09588623046875,
"eval_VitaminC_manhattan_precision": 0.501002004008016,
"eval_VitaminC_manhattan_recall": 0.9960159362549801,
"eval_VitaminC_max_accuracy": 0.55859375,
"eval_VitaminC_max_accuracy_threshold": 309.7912902832031,
"eval_VitaminC_max_ap": 0.5542686405562732,
"eval_VitaminC_max_f1": 0.6675531914893617,
"eval_VitaminC_max_f1_threshold": 479.09588623046875,
"eval_VitaminC_max_precision": 0.501002004008016,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5542686405562732,
"eval_sts-test_pearson_cosine": 0.8717931331186477,
"eval_sts-test_pearson_dot": 0.8628985772297639,
"eval_sts-test_pearson_euclidean": 0.8935960577585327,
"eval_sts-test_pearson_manhattan": 0.8926162242871916,
"eval_sts-test_pearson_max": 0.8935960577585327,
"eval_sts-test_spearman_cosine": 0.8989036406477372,
"eval_sts-test_spearman_dot": 0.8620115510306339,
"eval_sts-test_spearman_euclidean": 0.8911198747488857,
"eval_sts-test_spearman_manhattan": 0.8899440801070879,
"eval_sts-test_spearman_max": 0.8989036406477372,
"eval_vitaminc-pairs_loss": 2.0564281940460205,
"eval_vitaminc-pairs_runtime": 1.8511,
"eval_vitaminc-pairs_samples_per_second": 58.343,
"eval_vitaminc-pairs_steps_per_second": 1.08,
"step": 160
},
{
"epoch": 0.5,
"eval_negation-triplets_loss": 0.7865684032440186,
"eval_negation-triplets_runtime": 0.2987,
"eval_negation-triplets_samples_per_second": 214.291,
"eval_negation-triplets_steps_per_second": 3.348,
"step": 160
},
{
"epoch": 0.5,
"eval_scitail-pairs-pos_loss": 0.09969007223844528,
"eval_scitail-pairs-pos_runtime": 0.384,
"eval_scitail-pairs-pos_samples_per_second": 140.615,
"eval_scitail-pairs-pos_steps_per_second": 2.604,
"step": 160
},
{
"epoch": 0.5,
"eval_xsum-pairs_loss": 0.08461853861808777,
"eval_xsum-pairs_runtime": 2.8533,
"eval_xsum-pairs_samples_per_second": 44.86,
"eval_xsum-pairs_steps_per_second": 0.701,
"step": 160
},
{
"epoch": 0.5,
"eval_sciq_pairs_loss": 0.020078735426068306,
"eval_sciq_pairs_runtime": 3.6458,
"eval_sciq_pairs_samples_per_second": 35.109,
"eval_sciq_pairs_steps_per_second": 0.549,
"step": 160
},
{
"epoch": 0.5,
"eval_qasc_pairs_loss": 0.12362705171108246,
"eval_qasc_pairs_runtime": 0.6028,
"eval_qasc_pairs_samples_per_second": 212.356,
"eval_qasc_pairs_steps_per_second": 3.318,
"step": 160
},
{
"epoch": 0.5,
"eval_openbookqa_pairs_loss": 0.6668081283569336,
"eval_openbookqa_pairs_runtime": 0.5784,
"eval_openbookqa_pairs_samples_per_second": 221.308,
"eval_openbookqa_pairs_steps_per_second": 3.458,
"step": 160
},
{
"epoch": 0.5,
"eval_msmarco_pairs_loss": 0.32913729548454285,
"eval_msmarco_pairs_runtime": 1.4669,
"eval_msmarco_pairs_samples_per_second": 87.26,
"eval_msmarco_pairs_steps_per_second": 1.363,
"step": 160
},
{
"epoch": 0.5,
"eval_nq_pairs_loss": 0.2085198312997818,
"eval_nq_pairs_runtime": 2.8644,
"eval_nq_pairs_samples_per_second": 44.687,
"eval_nq_pairs_steps_per_second": 0.698,
"step": 160
},
{
"epoch": 0.5,
"eval_trivia_pairs_loss": 0.7138605117797852,
"eval_trivia_pairs_runtime": 4.3915,
"eval_trivia_pairs_samples_per_second": 29.147,
"eval_trivia_pairs_steps_per_second": 0.455,
"step": 160
},
{
"epoch": 0.5,
"eval_gooaq_pairs_loss": 0.3919322192668915,
"eval_gooaq_pairs_runtime": 1.004,
"eval_gooaq_pairs_samples_per_second": 127.484,
"eval_gooaq_pairs_steps_per_second": 1.992,
"step": 160
},
{
"epoch": 0.5,
"eval_paws-pos_loss": 0.025703923776745796,
"eval_paws-pos_runtime": 0.6869,
"eval_paws-pos_samples_per_second": 186.332,
"eval_paws-pos_steps_per_second": 2.911,
"step": 160
},
{
"epoch": 0.503125,
"grad_norm": 2.7484354972839355,
"learning_rate": 3.9693546485281616e-05,
"loss": 0.3874,
"step": 161
},
{
"epoch": 0.50625,
"grad_norm": 3.9011173248291016,
"learning_rate": 3.967476056079441e-05,
"loss": 0.8134,
"step": 162
},
{
"epoch": 0.509375,
"grad_norm": 3.723893642425537,
"learning_rate": 3.9655422139866315e-05,
"loss": 0.5596,
"step": 163
},
{
"epoch": 0.5125,
"grad_norm": 1.8328720331192017,
"learning_rate": 3.963553195057793e-05,
"loss": 0.2877,
"step": 164
},
{
"epoch": 0.515625,
"grad_norm": 2.9615490436553955,
"learning_rate": 3.9615090741783634e-05,
"loss": 0.5218,
"step": 165
},
{
"epoch": 0.51875,
"grad_norm": 3.041154146194458,
"learning_rate": 3.959409928308341e-05,
"loss": 0.5282,
"step": 166
},
{
"epoch": 0.521875,
"grad_norm": 3.439157247543335,
"learning_rate": 3.957255836479377e-05,
"loss": 0.7528,
"step": 167
},
{
"epoch": 0.525,
"grad_norm": 3.576984405517578,
"learning_rate": 3.955046879791816e-05,
"loss": 0.7174,
"step": 168
},
{
"epoch": 0.528125,
"grad_norm": 3.1042630672454834,
"learning_rate": 3.952783141411626e-05,
"loss": 0.6902,
"step": 169
},
{
"epoch": 0.53125,
"grad_norm": 3.0211422443389893,
"learning_rate": 3.9504647065672785e-05,
"loss": 0.7486,
"step": 170
},
{
"epoch": 0.534375,
"grad_norm": 3.5162508487701416,
"learning_rate": 3.9480916625465344e-05,
"loss": 0.6333,
"step": 171
},
{
"epoch": 0.5375,
"grad_norm": 3.9070920944213867,
"learning_rate": 3.9456640986931606e-05,
"loss": 1.2932,
"step": 172
},
{
"epoch": 0.540625,
"grad_norm": 3.548743724822998,
"learning_rate": 3.943182106403563e-05,
"loss": 0.6259,
"step": 173
},
{
"epoch": 0.54375,
"grad_norm": 3.64949893951416,
"learning_rate": 3.940645779123349e-05,
"loss": 0.8357,
"step": 174
},
{
"epoch": 0.546875,
"grad_norm": 2.4284133911132812,
"learning_rate": 3.938055212343807e-05,
"loss": 0.3604,
"step": 175
},
{
"epoch": 0.55,
"grad_norm": 2.9141008853912354,
"learning_rate": 3.9354105035983135e-05,
"loss": 0.6598,
"step": 176
},
{
"epoch": 0.553125,
"grad_norm": 2.0430235862731934,
"learning_rate": 3.932711752458657e-05,
"loss": 0.3169,
"step": 177
},
{
"epoch": 0.55625,
"grad_norm": 3.522728204727173,
"learning_rate": 3.929959060531291e-05,
"loss": 0.8629,
"step": 178
},
{
"epoch": 0.559375,
"grad_norm": 2.419400453567505,
"learning_rate": 3.927152531453513e-05,
"loss": 0.3648,
"step": 179
},
{
"epoch": 0.5625,
"grad_norm": 2.826747417449951,
"learning_rate": 3.924292270889555e-05,
"loss": 0.5103,
"step": 180
},
{
"epoch": 0.565625,
"grad_norm": 3.2149524688720703,
"learning_rate": 3.921378386526612e-05,
"loss": 0.6255,
"step": 181
},
{
"epoch": 0.56875,
"grad_norm": 2.2112457752227783,
"learning_rate": 3.918410988070782e-05,
"loss": 0.4382,
"step": 182
},
{
"epoch": 0.571875,
"grad_norm": 2.301940441131592,
"learning_rate": 3.915390187242941e-05,
"loss": 0.4647,
"step": 183
},
{
"epoch": 0.575,
"grad_norm": 2.272001266479492,
"learning_rate": 3.912316097774532e-05,
"loss": 0.4218,
"step": 184
},
{
"epoch": 0.578125,
"grad_norm": 3.77436900138855,
"learning_rate": 3.909188835403285e-05,
"loss": 0.8244,
"step": 185
},
{
"epoch": 0.58125,
"grad_norm": 3.236813545227051,
"learning_rate": 3.906008517868863e-05,
"loss": 0.6579,
"step": 186
},
{
"epoch": 0.584375,
"grad_norm": 3.1845405101776123,
"learning_rate": 3.9027752649084215e-05,
"loss": 0.8384,
"step": 187
},
{
"epoch": 0.5875,
"grad_norm": 2.709747791290283,
"learning_rate": 3.899489198252108e-05,
"loss": 0.5266,
"step": 188
},
{
"epoch": 0.590625,
"grad_norm": 2.5210235118865967,
"learning_rate": 3.896150441618476e-05,
"loss": 0.5079,
"step": 189
},
{
"epoch": 0.59375,
"grad_norm": 1.9979658126831055,
"learning_rate": 3.892759120709824e-05,
"loss": 0.2574,
"step": 190
},
{
"epoch": 0.596875,
"grad_norm": 2.4257137775421143,
"learning_rate": 3.8893153632074675e-05,
"loss": 0.4162,
"step": 191
},
{
"epoch": 0.6,
"grad_norm": 3.482635021209717,
"learning_rate": 3.88581929876693e-05,
"loss": 0.7872,
"step": 192
},
{
"epoch": 0.6,
"eval_VitaminC_cosine_accuracy": 0.564453125,
"eval_VitaminC_cosine_accuracy_threshold": 0.737064003944397,
"eval_VitaminC_cosine_ap": 0.5553950127875514,
"eval_VitaminC_cosine_f1": 0.6666666666666666,
"eval_VitaminC_cosine_f1_threshold": 0.312030553817749,
"eval_VitaminC_cosine_precision": 0.5,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.5625,
"eval_VitaminC_dot_accuracy_threshold": 326.57232666015625,
"eval_VitaminC_dot_ap": 0.5370581483003721,
"eval_VitaminC_dot_f1": 0.6649006622516557,
"eval_VitaminC_dot_f1_threshold": 116.00311279296875,
"eval_VitaminC_dot_precision": 0.498015873015873,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.55859375,
"eval_VitaminC_euclidean_accuracy_threshold": 13.492112159729004,
"eval_VitaminC_euclidean_ap": 0.5536857778177137,
"eval_VitaminC_euclidean_f1": 0.6657824933687002,
"eval_VitaminC_euclidean_f1_threshold": 23.840118408203125,
"eval_VitaminC_euclidean_precision": 0.4990059642147117,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.5625,
"eval_VitaminC_manhattan_accuracy_threshold": 306.6820983886719,
"eval_VitaminC_manhattan_ap": 0.5520101545849081,
"eval_VitaminC_manhattan_f1": 0.6666666666666666,
"eval_VitaminC_manhattan_f1_threshold": 490.146728515625,
"eval_VitaminC_manhattan_precision": 0.5,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.564453125,
"eval_VitaminC_max_accuracy_threshold": 326.57232666015625,
"eval_VitaminC_max_ap": 0.5553950127875514,
"eval_VitaminC_max_f1": 0.6666666666666666,
"eval_VitaminC_max_f1_threshold": 490.146728515625,
"eval_VitaminC_max_precision": 0.5,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5553950127875514,
"eval_sts-test_pearson_cosine": 0.8705183135475563,
"eval_sts-test_pearson_dot": 0.8575572680200927,
"eval_sts-test_pearson_euclidean": 0.894961141451468,
"eval_sts-test_pearson_manhattan": 0.8946364485546632,
"eval_sts-test_pearson_max": 0.894961141451468,
"eval_sts-test_spearman_cosine": 0.8981581293842179,
"eval_sts-test_spearman_dot": 0.8574014998383989,
"eval_sts-test_spearman_euclidean": 0.8924189591158167,
"eval_sts-test_spearman_manhattan": 0.8920942887144219,
"eval_sts-test_spearman_max": 0.8981581293842179,
"eval_vitaminc-pairs_loss": 2.066204786300659,
"eval_vitaminc-pairs_runtime": 1.8428,
"eval_vitaminc-pairs_samples_per_second": 58.608,
"eval_vitaminc-pairs_steps_per_second": 1.085,
"step": 192
},
{
"epoch": 0.6,
"eval_negation-triplets_loss": 0.763123095035553,
"eval_negation-triplets_runtime": 0.297,
"eval_negation-triplets_samples_per_second": 215.511,
"eval_negation-triplets_steps_per_second": 3.367,
"step": 192
},
{
"epoch": 0.6,
"eval_scitail-pairs-pos_loss": 0.07364190369844437,
"eval_scitail-pairs-pos_runtime": 0.3662,
"eval_scitail-pairs-pos_samples_per_second": 147.451,
"eval_scitail-pairs-pos_steps_per_second": 2.731,
"step": 192
},
{
"epoch": 0.6,
"eval_xsum-pairs_loss": 0.06735075265169144,
"eval_xsum-pairs_runtime": 2.8409,
"eval_xsum-pairs_samples_per_second": 45.056,
"eval_xsum-pairs_steps_per_second": 0.704,
"step": 192
},
{
"epoch": 0.6,
"eval_sciq_pairs_loss": 0.01930728368461132,
"eval_sciq_pairs_runtime": 3.6003,
"eval_sciq_pairs_samples_per_second": 35.552,
"eval_sciq_pairs_steps_per_second": 0.556,
"step": 192
},
{
"epoch": 0.6,
"eval_qasc_pairs_loss": 0.11278136074542999,
"eval_qasc_pairs_runtime": 0.5997,
"eval_qasc_pairs_samples_per_second": 213.437,
"eval_qasc_pairs_steps_per_second": 3.335,
"step": 192
},
{
"epoch": 0.6,
"eval_openbookqa_pairs_loss": 0.7505559921264648,
"eval_openbookqa_pairs_runtime": 0.5774,
"eval_openbookqa_pairs_samples_per_second": 221.691,
"eval_openbookqa_pairs_steps_per_second": 3.464,
"step": 192
},
{
"epoch": 0.6,
"eval_msmarco_pairs_loss": 0.33166375756263733,
"eval_msmarco_pairs_runtime": 1.4619,
"eval_msmarco_pairs_samples_per_second": 87.558,
"eval_msmarco_pairs_steps_per_second": 1.368,
"step": 192
},
{
"epoch": 0.6,
"eval_nq_pairs_loss": 0.21051406860351562,
"eval_nq_pairs_runtime": 2.858,
"eval_nq_pairs_samples_per_second": 44.786,
"eval_nq_pairs_steps_per_second": 0.7,
"step": 192
},
{
"epoch": 0.6,
"eval_trivia_pairs_loss": 0.7072564363479614,
"eval_trivia_pairs_runtime": 4.3854,
"eval_trivia_pairs_samples_per_second": 29.187,
"eval_trivia_pairs_steps_per_second": 0.456,
"step": 192
},
{
"epoch": 0.6,
"eval_gooaq_pairs_loss": 0.3748788833618164,
"eval_gooaq_pairs_runtime": 1.0024,
"eval_gooaq_pairs_samples_per_second": 127.692,
"eval_gooaq_pairs_steps_per_second": 1.995,
"step": 192
},
{
"epoch": 0.6,
"eval_paws-pos_loss": 0.025185449048876762,
"eval_paws-pos_runtime": 0.6844,
"eval_paws-pos_samples_per_second": 187.016,
"eval_paws-pos_steps_per_second": 2.922,
"step": 192
},
{
"epoch": 0.603125,
"grad_norm": 1.527544617652893,
"learning_rate": 3.882271059013064e-05,
"loss": 0.2606,
"step": 193
},
{
"epoch": 0.60625,
"grad_norm": 3.647446870803833,
"learning_rate": 3.878670777535087e-05,
"loss": 0.8808,
"step": 194
},
{
"epoch": 0.609375,
"grad_norm": 3.806488275527954,
"learning_rate": 3.875018589881564e-05,
"loss": 0.7685,
"step": 195
},
{
"epoch": 0.6125,
"grad_norm": 2.9896490573883057,
"learning_rate": 3.871314633555296e-05,
"loss": 0.7186,
"step": 196
},
{
"epoch": 0.615625,
"grad_norm": 1.31754732131958,
"learning_rate": 3.8675590480081455e-05,
"loss": 0.1147,
"step": 197
},
{
"epoch": 0.61875,
"grad_norm": 2.025834798812866,
"learning_rate": 3.863751974635784e-05,
"loss": 0.2816,
"step": 198
},
{
"epoch": 0.621875,
"grad_norm": 2.5674166679382324,
"learning_rate": 3.8598935567723734e-05,
"loss": 0.506,
"step": 199
},
{
"epoch": 0.625,
"grad_norm": 3.270737648010254,
"learning_rate": 3.8559839396851656e-05,
"loss": 0.5699,
"step": 200
},
{
"epoch": 0.628125,
"grad_norm": 1.6074001789093018,
"learning_rate": 3.852023270569033e-05,
"loss": 0.2746,
"step": 201
},
{
"epoch": 0.63125,
"grad_norm": 3.736549139022827,
"learning_rate": 3.8480116985409306e-05,
"loss": 0.7131,
"step": 202
},
{
"epoch": 0.634375,
"grad_norm": 3.9329938888549805,
"learning_rate": 3.843949374634278e-05,
"loss": 0.9307,
"step": 203
},
{
"epoch": 0.6375,
"grad_norm": 3.110591173171997,
"learning_rate": 3.839836451793273e-05,
"loss": 0.6033,
"step": 204
},
{
"epoch": 0.640625,
"grad_norm": 3.889007091522217,
"learning_rate": 3.8356730848671374e-05,
"loss": 0.7203,
"step": 205
},
{
"epoch": 0.64375,
"grad_norm": 3.2738683223724365,
"learning_rate": 3.8314594306042813e-05,
"loss": 0.7422,
"step": 206
},
{
"epoch": 0.646875,
"grad_norm": 3.077531099319458,
"learning_rate": 3.827195647646407e-05,
"loss": 0.6955,
"step": 207
},
{
"epoch": 0.65,
"grad_norm": 3.336914539337158,
"learning_rate": 3.822881896522533e-05,
"loss": 0.7139,
"step": 208
},
{
"epoch": 0.653125,
"grad_norm": 2.866854429244995,
"learning_rate": 3.818518339642951e-05,
"loss": 0.4741,
"step": 209
},
{
"epoch": 0.65625,
"grad_norm": 1.8859411478042603,
"learning_rate": 3.81410514129311e-05,
"loss": 0.2658,
"step": 210
},
{
"epoch": 0.659375,
"grad_norm": 2.938387870788574,
"learning_rate": 3.809642467627435e-05,
"loss": 0.6033,
"step": 211
},
{
"epoch": 0.6625,
"grad_norm": 3.269779920578003,
"learning_rate": 3.805130486663068e-05,
"loss": 0.7776,
"step": 212
},
{
"epoch": 0.665625,
"grad_norm": 2.8948724269866943,
"learning_rate": 3.800569368273539e-05,
"loss": 0.6791,
"step": 213
},
{
"epoch": 0.66875,
"grad_norm": 2.962749719619751,
"learning_rate": 3.795959284182381e-05,
"loss": 0.4367,
"step": 214
},
{
"epoch": 0.671875,
"grad_norm": 3.2313294410705566,
"learning_rate": 3.791300407956651e-05,
"loss": 0.7212,
"step": 215
},
{
"epoch": 0.675,
"grad_norm": 3.182274580001831,
"learning_rate": 3.7865929150004086e-05,
"loss": 0.7797,
"step": 216
},
{
"epoch": 0.678125,
"grad_norm": 2.7317817211151123,
"learning_rate": 3.781836982548101e-05,
"loss": 0.4547,
"step": 217
},
{
"epoch": 0.68125,
"grad_norm": 3.443126916885376,
"learning_rate": 3.777032789657898e-05,
"loss": 0.6771,
"step": 218
},
{
"epoch": 0.684375,
"grad_norm": 2.923877000808716,
"learning_rate": 3.772180517204946e-05,
"loss": 0.5488,
"step": 219
},
{
"epoch": 0.6875,
"grad_norm": 2.897601366043091,
"learning_rate": 3.767280347874561e-05,
"loss": 0.7352,
"step": 220
},
{
"epoch": 0.690625,
"grad_norm": 4.06088924407959,
"learning_rate": 3.762332466155348e-05,
"loss": 0.9567,
"step": 221
},
{
"epoch": 0.69375,
"grad_norm": 2.582475423812866,
"learning_rate": 3.7573370583322575e-05,
"loss": 0.4274,
"step": 222
},
{
"epoch": 0.696875,
"grad_norm": 3.7177348136901855,
"learning_rate": 3.7522943124795706e-05,
"loss": 0.7653,
"step": 223
},
{
"epoch": 0.7,
"grad_norm": 2.9613823890686035,
"learning_rate": 3.7472044184538186e-05,
"loss": 0.5672,
"step": 224
},
{
"epoch": 0.7,
"eval_VitaminC_cosine_accuracy": 0.55859375,
"eval_VitaminC_cosine_accuracy_threshold": 0.8318229913711548,
"eval_VitaminC_cosine_ap": 0.5483869647391425,
"eval_VitaminC_cosine_f1": 0.6657824933687002,
"eval_VitaminC_cosine_f1_threshold": 0.2898828089237213,
"eval_VitaminC_cosine_precision": 0.4990059642147117,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.5546875,
"eval_VitaminC_dot_accuracy_threshold": 280.6613464355469,
"eval_VitaminC_dot_ap": 0.5352389087249884,
"eval_VitaminC_dot_f1": 0.6666666666666667,
"eval_VitaminC_dot_f1_threshold": 127.79656982421875,
"eval_VitaminC_dot_precision": 0.501002004008016,
"eval_VitaminC_dot_recall": 0.9960159362549801,
"eval_VitaminC_euclidean_accuracy": 0.556640625,
"eval_VitaminC_euclidean_accuracy_threshold": 15.362771987915039,
"eval_VitaminC_euclidean_ap": 0.5487471191186046,
"eval_VitaminC_euclidean_f1": 0.6657789613848203,
"eval_VitaminC_euclidean_f1_threshold": 23.0285587310791,
"eval_VitaminC_euclidean_precision": 0.5,
"eval_VitaminC_euclidean_recall": 0.9960159362549801,
"eval_VitaminC_manhattan_accuracy": 0.556640625,
"eval_VitaminC_manhattan_accuracy_threshold": 304.9786376953125,
"eval_VitaminC_manhattan_ap": 0.5448852224007886,
"eval_VitaminC_manhattan_f1": 0.6657824933687002,
"eval_VitaminC_manhattan_f1_threshold": 503.7974548339844,
"eval_VitaminC_manhattan_precision": 0.4990059642147117,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.55859375,
"eval_VitaminC_max_accuracy_threshold": 304.9786376953125,
"eval_VitaminC_max_ap": 0.5487471191186046,
"eval_VitaminC_max_f1": 0.6666666666666667,
"eval_VitaminC_max_f1_threshold": 503.7974548339844,
"eval_VitaminC_max_precision": 0.501002004008016,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5487471191186046,
"eval_sts-test_pearson_cosine": 0.8727242216490746,
"eval_sts-test_pearson_dot": 0.8620679649117718,
"eval_sts-test_pearson_euclidean": 0.8961291746213003,
"eval_sts-test_pearson_manhattan": 0.8961616445842001,
"eval_sts-test_pearson_max": 0.8961616445842001,
"eval_sts-test_spearman_cosine": 0.9004602237727143,
"eval_sts-test_spearman_dot": 0.8617584826474656,
"eval_sts-test_spearman_euclidean": 0.8945701970021624,
"eval_sts-test_spearman_manhattan": 0.8942019836234342,
"eval_sts-test_spearman_max": 0.9004602237727143,
"eval_vitaminc-pairs_loss": 2.07513689994812,
"eval_vitaminc-pairs_runtime": 1.8489,
"eval_vitaminc-pairs_samples_per_second": 58.414,
"eval_vitaminc-pairs_steps_per_second": 1.082,
"step": 224
},
{
"epoch": 0.7,
"eval_negation-triplets_loss": 0.7822766900062561,
"eval_negation-triplets_runtime": 0.2948,
"eval_negation-triplets_samples_per_second": 217.093,
"eval_negation-triplets_steps_per_second": 3.392,
"step": 224
},
{
"epoch": 0.7,
"eval_scitail-pairs-pos_loss": 0.084584079682827,
"eval_scitail-pairs-pos_runtime": 0.365,
"eval_scitail-pairs-pos_samples_per_second": 147.944,
"eval_scitail-pairs-pos_steps_per_second": 2.74,
"step": 224
},
{
"epoch": 0.7,
"eval_xsum-pairs_loss": 0.05927089601755142,
"eval_xsum-pairs_runtime": 2.8461,
"eval_xsum-pairs_samples_per_second": 44.974,
"eval_xsum-pairs_steps_per_second": 0.703,
"step": 224
},
{
"epoch": 0.7,
"eval_sciq_pairs_loss": 0.019030971452593803,
"eval_sciq_pairs_runtime": 3.6465,
"eval_sciq_pairs_samples_per_second": 35.102,
"eval_sciq_pairs_steps_per_second": 0.548,
"step": 224
},
{
"epoch": 0.7,
"eval_qasc_pairs_loss": 0.12519867718219757,
"eval_qasc_pairs_runtime": 0.6003,
"eval_qasc_pairs_samples_per_second": 213.235,
"eval_qasc_pairs_steps_per_second": 3.332,
"step": 224
},
{
"epoch": 0.7,
"eval_openbookqa_pairs_loss": 0.7141773700714111,
"eval_openbookqa_pairs_runtime": 0.5753,
"eval_openbookqa_pairs_samples_per_second": 222.508,
"eval_openbookqa_pairs_steps_per_second": 3.477,
"step": 224
},
{
"epoch": 0.7,
"eval_msmarco_pairs_loss": 0.3040487468242645,
"eval_msmarco_pairs_runtime": 1.4648,
"eval_msmarco_pairs_samples_per_second": 87.383,
"eval_msmarco_pairs_steps_per_second": 1.365,
"step": 224
},
{
"epoch": 0.7,
"eval_nq_pairs_loss": 0.1808711141347885,
"eval_nq_pairs_runtime": 2.8595,
"eval_nq_pairs_samples_per_second": 44.764,
"eval_nq_pairs_steps_per_second": 0.699,
"step": 224
},
{
"epoch": 0.7,
"eval_trivia_pairs_loss": 0.7160522937774658,
"eval_trivia_pairs_runtime": 4.3875,
"eval_trivia_pairs_samples_per_second": 29.174,
"eval_trivia_pairs_steps_per_second": 0.456,
"step": 224
},
{
"epoch": 0.7,
"eval_gooaq_pairs_loss": 0.3398577868938446,
"eval_gooaq_pairs_runtime": 1.0189,
"eval_gooaq_pairs_samples_per_second": 125.631,
"eval_gooaq_pairs_steps_per_second": 1.963,
"step": 224
},
{
"epoch": 0.7,
"eval_paws-pos_loss": 0.0250654686242342,
"eval_paws-pos_runtime": 0.6965,
"eval_paws-pos_samples_per_second": 183.765,
"eval_paws-pos_steps_per_second": 2.871,
"step": 224
},
{
"epoch": 0.703125,
"grad_norm": 2.7675271034240723,
"learning_rate": 3.742067567886634e-05,
"loss": 0.6116,
"step": 225
},
{
"epoch": 0.70625,
"grad_norm": 3.1136417388916016,
"learning_rate": 3.7368839541775386e-05,
"loss": 0.6484,
"step": 226
},
{
"epoch": 0.709375,
"grad_norm": 3.1425583362579346,
"learning_rate": 3.731653772486657e-05,
"loss": 0.669,
"step": 227
},
{
"epoch": 0.7125,
"grad_norm": 1.8860105276107788,
"learning_rate": 3.726377219727376e-05,
"loss": 0.263,
"step": 228
},
{
"epoch": 0.715625,
"grad_norm": 2.6990439891815186,
"learning_rate": 3.721054494558923e-05,
"loss": 0.6181,
"step": 229
},
{
"epoch": 0.71875,
"grad_norm": 3.836609363555908,
"learning_rate": 3.7156857973788926e-05,
"loss": 0.8956,
"step": 230
},
{
"epoch": 0.721875,
"grad_norm": 3.0837268829345703,
"learning_rate": 3.710271330315699e-05,
"loss": 0.5363,
"step": 231
},
{
"epoch": 0.725,
"grad_norm": 3.639112710952759,
"learning_rate": 3.704811297220967e-05,
"loss": 0.823,
"step": 232
},
{
"epoch": 0.728125,
"grad_norm": 3.301112651824951,
"learning_rate": 3.699305903661858e-05,
"loss": 0.7795,
"step": 233
},
{
"epoch": 0.73125,
"grad_norm": 2.289018154144287,
"learning_rate": 3.693755356913326e-05,
"loss": 0.3688,
"step": 234
},
{
"epoch": 0.734375,
"grad_norm": 2.259490966796875,
"learning_rate": 3.688159865950319e-05,
"loss": 0.3835,
"step": 235
},
{
"epoch": 0.7375,
"grad_norm": 2.2043821811676025,
"learning_rate": 3.6825196414399096e-05,
"loss": 0.3393,
"step": 236
},
{
"epoch": 0.740625,
"grad_norm": 2.6866259574890137,
"learning_rate": 3.6768348957333635e-05,
"loss": 0.4792,
"step": 237
},
{
"epoch": 0.74375,
"grad_norm": 2.561917304992676,
"learning_rate": 3.671105842858142e-05,
"loss": 0.3966,
"step": 238
},
{
"epoch": 0.746875,
"grad_norm": 2.1512343883514404,
"learning_rate": 3.6653326985098486e-05,
"loss": 0.2902,
"step": 239
},
{
"epoch": 0.75,
"grad_norm": 3.7423007488250732,
"learning_rate": 3.659515680044106e-05,
"loss": 0.6716,
"step": 240
},
{
"epoch": 0.753125,
"grad_norm": 2.6502630710601807,
"learning_rate": 3.65365500646837e-05,
"loss": 0.6783,
"step": 241
},
{
"epoch": 0.75625,
"grad_norm": 2.8291828632354736,
"learning_rate": 3.6477508984336886e-05,
"loss": 0.4794,
"step": 242
},
{
"epoch": 0.759375,
"grad_norm": 3.7910561561584473,
"learning_rate": 3.641803578226393e-05,
"loss": 0.8283,
"step": 243
},
{
"epoch": 0.7625,
"grad_norm": 3.3968613147735596,
"learning_rate": 3.635813269759727e-05,
"loss": 0.6875,
"step": 244
},
{
"epoch": 0.765625,
"grad_norm": 3.5861093997955322,
"learning_rate": 3.629780198565419e-05,
"loss": 0.8384,
"step": 245
},
{
"epoch": 0.76875,
"grad_norm": 2.709362030029297,
"learning_rate": 3.623704591785189e-05,
"loss": 0.5796,
"step": 246
},
{
"epoch": 0.771875,
"grad_norm": 2.5690431594848633,
"learning_rate": 3.6175866781622e-05,
"loss": 0.6206,
"step": 247
},
{
"epoch": 0.775,
"grad_norm": 3.5460782051086426,
"learning_rate": 3.611426688032439e-05,
"loss": 0.7836,
"step": 248
},
{
"epoch": 0.778125,
"grad_norm": 2.9132962226867676,
"learning_rate": 3.605224853316055e-05,
"loss": 0.615,
"step": 249
},
{
"epoch": 0.78125,
"grad_norm": 2.707908868789673,
"learning_rate": 3.5989814075086195e-05,
"loss": 0.433,
"step": 250
},
{
"epoch": 0.784375,
"grad_norm": 3.2124290466308594,
"learning_rate": 3.592696585672338e-05,
"loss": 0.7394,
"step": 251
},
{
"epoch": 0.7875,
"grad_norm": 1.3290472030639648,
"learning_rate": 3.5863706244272006e-05,
"loss": 0.1203,
"step": 252
},
{
"epoch": 0.790625,
"grad_norm": 3.5975258350372314,
"learning_rate": 3.580003761942073e-05,
"loss": 1.0909,
"step": 253
},
{
"epoch": 0.79375,
"grad_norm": 3.1402907371520996,
"learning_rate": 3.573596237925728e-05,
"loss": 0.7107,
"step": 254
},
{
"epoch": 0.796875,
"grad_norm": 2.1686770915985107,
"learning_rate": 3.567148293617825e-05,
"loss": 0.3464,
"step": 255
},
{
"epoch": 0.8,
"grad_norm": 4.112154960632324,
"learning_rate": 3.560660171779821e-05,
"loss": 0.9347,
"step": 256
},
{
"epoch": 0.8,
"eval_VitaminC_cosine_accuracy": 0.560546875,
"eval_VitaminC_cosine_accuracy_threshold": 0.825050950050354,
"eval_VitaminC_cosine_ap": 0.5480281823929228,
"eval_VitaminC_cosine_f1": 0.6666666666666667,
"eval_VitaminC_cosine_f1_threshold": 0.39935123920440674,
"eval_VitaminC_cosine_precision": 0.501002004008016,
"eval_VitaminC_cosine_recall": 0.9960159362549801,
"eval_VitaminC_dot_accuracy": 0.552734375,
"eval_VitaminC_dot_accuracy_threshold": 313.59075927734375,
"eval_VitaminC_dot_ap": 0.5329984665726657,
"eval_VitaminC_dot_f1": 0.6657789613848203,
"eval_VitaminC_dot_f1_threshold": 132.71243286132812,
"eval_VitaminC_dot_precision": 0.5,
"eval_VitaminC_dot_recall": 0.9960159362549801,
"eval_VitaminC_euclidean_accuracy": 0.556640625,
"eval_VitaminC_euclidean_accuracy_threshold": 11.227453231811523,
"eval_VitaminC_euclidean_ap": 0.5496569156706412,
"eval_VitaminC_euclidean_f1": 0.6666666666666666,
"eval_VitaminC_euclidean_f1_threshold": 22.6641788482666,
"eval_VitaminC_euclidean_precision": 0.5,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.5546875,
"eval_VitaminC_manhattan_accuracy_threshold": 230.89329528808594,
"eval_VitaminC_manhattan_ap": 0.545699310794812,
"eval_VitaminC_manhattan_f1": 0.6657824933687002,
"eval_VitaminC_manhattan_f1_threshold": 483.625244140625,
"eval_VitaminC_manhattan_precision": 0.4990059642147117,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.560546875,
"eval_VitaminC_max_accuracy_threshold": 313.59075927734375,
"eval_VitaminC_max_ap": 0.5496569156706412,
"eval_VitaminC_max_f1": 0.6666666666666667,
"eval_VitaminC_max_f1_threshold": 483.625244140625,
"eval_VitaminC_max_precision": 0.501002004008016,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5496569156706412,
"eval_sts-test_pearson_cosine": 0.8711436629553765,
"eval_sts-test_pearson_dot": 0.859333025320038,
"eval_sts-test_pearson_euclidean": 0.8967955144362856,
"eval_sts-test_pearson_manhattan": 0.8972988934332646,
"eval_sts-test_pearson_max": 0.8972988934332646,
"eval_sts-test_spearman_cosine": 0.8987000224084064,
"eval_sts-test_spearman_dot": 0.8599958647150425,
"eval_sts-test_spearman_euclidean": 0.8935259263175941,
"eval_sts-test_spearman_manhattan": 0.8939636384052635,
"eval_sts-test_spearman_max": 0.8987000224084064,
"eval_vitaminc-pairs_loss": 2.0033843517303467,
"eval_vitaminc-pairs_runtime": 1.8717,
"eval_vitaminc-pairs_samples_per_second": 57.701,
"eval_vitaminc-pairs_steps_per_second": 1.069,
"step": 256
},
{
"epoch": 0.8,
"eval_negation-triplets_loss": 0.7500894069671631,
"eval_negation-triplets_runtime": 0.2995,
"eval_negation-triplets_samples_per_second": 213.659,
"eval_negation-triplets_steps_per_second": 3.338,
"step": 256
},
{
"epoch": 0.8,
"eval_scitail-pairs-pos_loss": 0.07255758345127106,
"eval_scitail-pairs-pos_runtime": 0.386,
"eval_scitail-pairs-pos_samples_per_second": 139.883,
"eval_scitail-pairs-pos_steps_per_second": 2.59,
"step": 256
},
{
"epoch": 0.8,
"eval_xsum-pairs_loss": 0.056476954370737076,
"eval_xsum-pairs_runtime": 2.8548,
"eval_xsum-pairs_samples_per_second": 44.837,
"eval_xsum-pairs_steps_per_second": 0.701,
"step": 256
},
{
"epoch": 0.8,
"eval_sciq_pairs_loss": 0.01967025361955166,
"eval_sciq_pairs_runtime": 3.7336,
"eval_sciq_pairs_samples_per_second": 34.283,
"eval_sciq_pairs_steps_per_second": 0.536,
"step": 256
},
{
"epoch": 0.8,
"eval_qasc_pairs_loss": 0.1263607293367386,
"eval_qasc_pairs_runtime": 0.6107,
"eval_qasc_pairs_samples_per_second": 209.594,
"eval_qasc_pairs_steps_per_second": 3.275,
"step": 256
},
{
"epoch": 0.8,
"eval_openbookqa_pairs_loss": 0.7773354649543762,
"eval_openbookqa_pairs_runtime": 0.5903,
"eval_openbookqa_pairs_samples_per_second": 216.831,
"eval_openbookqa_pairs_steps_per_second": 3.388,
"step": 256
},
{
"epoch": 0.8,
"eval_msmarco_pairs_loss": 0.2844376862049103,
"eval_msmarco_pairs_runtime": 1.4722,
"eval_msmarco_pairs_samples_per_second": 86.947,
"eval_msmarco_pairs_steps_per_second": 1.359,
"step": 256
},
{
"epoch": 0.8,
"eval_nq_pairs_loss": 0.17289823293685913,
"eval_nq_pairs_runtime": 2.8665,
"eval_nq_pairs_samples_per_second": 44.654,
"eval_nq_pairs_steps_per_second": 0.698,
"step": 256
},
{
"epoch": 0.8,
"eval_trivia_pairs_loss": 0.6546728610992432,
"eval_trivia_pairs_runtime": 4.3994,
"eval_trivia_pairs_samples_per_second": 29.095,
"eval_trivia_pairs_steps_per_second": 0.455,
"step": 256
},
{
"epoch": 0.8,
"eval_gooaq_pairs_loss": 0.31546029448509216,
"eval_gooaq_pairs_runtime": 1.0423,
"eval_gooaq_pairs_samples_per_second": 122.802,
"eval_gooaq_pairs_steps_per_second": 1.919,
"step": 256
},
{
"epoch": 0.8,
"eval_paws-pos_loss": 0.02565235085785389,
"eval_paws-pos_runtime": 0.6999,
"eval_paws-pos_samples_per_second": 182.88,
"eval_paws-pos_steps_per_second": 2.857,
"step": 256
},
{
"epoch": 0.803125,
"grad_norm": 2.2415249347686768,
"learning_rate": 3.5541321166858384e-05,
"loss": 0.464,
"step": 257
},
{
"epoch": 0.80625,
"grad_norm": 2.22743821144104,
"learning_rate": 3.54756437411346e-05,
"loss": 0.4622,
"step": 258
},
{
"epoch": 0.809375,
"grad_norm": 2.5632565021514893,
"learning_rate": 3.5409571913344813e-05,
"loss": 0.5124,
"step": 259
},
{
"epoch": 0.8125,
"grad_norm": 3.4271864891052246,
"learning_rate": 3.5343108171056006e-05,
"loss": 0.832,
"step": 260
},
{
"epoch": 0.815625,
"grad_norm": 2.9892525672912598,
"learning_rate": 3.527625501659051e-05,
"loss": 0.6264,
"step": 261
},
{
"epoch": 0.81875,
"grad_norm": 2.808922529220581,
"learning_rate": 3.5209014966931795e-05,
"loss": 0.5483,
"step": 262
},
{
"epoch": 0.821875,
"grad_norm": 2.71504545211792,
"learning_rate": 3.514139055362974e-05,
"loss": 0.5929,
"step": 263
},
{
"epoch": 0.825,
"grad_norm": 2.9386723041534424,
"learning_rate": 3.507338432270528e-05,
"loss": 0.5797,
"step": 264
},
{
"epoch": 0.828125,
"grad_norm": 2.694045066833496,
"learning_rate": 3.500499883455457e-05,
"loss": 0.5292,
"step": 265
},
{
"epoch": 0.83125,
"grad_norm": 2.800262928009033,
"learning_rate": 3.493623666385258e-05,
"loss": 0.5376,
"step": 266
},
{
"epoch": 0.834375,
"grad_norm": 3.4821765422821045,
"learning_rate": 3.486710039945618e-05,
"loss": 0.7102,
"step": 267
},
{
"epoch": 0.8375,
"grad_norm": 2.337831735610962,
"learning_rate": 3.4797592644306655e-05,
"loss": 0.4605,
"step": 268
},
{
"epoch": 0.840625,
"grad_norm": 3.8004300594329834,
"learning_rate": 3.472771601533169e-05,
"loss": 1.2713,
"step": 269
},
{
"epoch": 0.84375,
"grad_norm": 3.3914785385131836,
"learning_rate": 3.465747314334687e-05,
"loss": 0.7764,
"step": 270
},
{
"epoch": 0.846875,
"grad_norm": 3.0255892276763916,
"learning_rate": 3.458686667295664e-05,
"loss": 0.7517,
"step": 271
},
{
"epoch": 0.85,
"grad_norm": 2.9869744777679443,
"learning_rate": 3.451589926245469e-05,
"loss": 0.614,
"step": 272
},
{
"epoch": 0.853125,
"grad_norm": 3.160764694213867,
"learning_rate": 3.444457358372391e-05,
"loss": 0.6046,
"step": 273
},
{
"epoch": 0.85625,
"grad_norm": 2.87579607963562,
"learning_rate": 3.43728923221358e-05,
"loss": 0.7111,
"step": 274
},
{
"epoch": 0.859375,
"grad_norm": 1.9325075149536133,
"learning_rate": 3.4300858176449344e-05,
"loss": 0.4401,
"step": 275
},
{
"epoch": 0.8625,
"grad_norm": 1.9690322875976562,
"learning_rate": 3.4228473858709404e-05,
"loss": 0.4351,
"step": 276
},
{
"epoch": 0.865625,
"grad_norm": 3.530524969100952,
"learning_rate": 3.4155742094144646e-05,
"loss": 0.7498,
"step": 277
},
{
"epoch": 0.86875,
"grad_norm": 3.321233034133911,
"learning_rate": 3.408266562106489e-05,
"loss": 0.7173,
"step": 278
},
{
"epoch": 0.871875,
"grad_norm": 2.2215065956115723,
"learning_rate": 3.400924719075804e-05,
"loss": 0.4696,
"step": 279
},
{
"epoch": 0.875,
"grad_norm": 3.1400840282440186,
"learning_rate": 3.39354895673865e-05,
"loss": 0.6246,
"step": 280
},
{
"epoch": 0.878125,
"grad_norm": 3.4510090351104736,
"learning_rate": 3.386139552788312e-05,
"loss": 0.7578,
"step": 281
},
{
"epoch": 0.88125,
"grad_norm": 2.350965976715088,
"learning_rate": 3.378696786184659e-05,
"loss": 0.3533,
"step": 282
},
{
"epoch": 0.884375,
"grad_norm": 3.5409841537475586,
"learning_rate": 3.3712209371436473e-05,
"loss": 0.7328,
"step": 283
},
{
"epoch": 0.8875,
"grad_norm": 3.4038257598876953,
"learning_rate": 3.363712287126768e-05,
"loss": 0.6964,
"step": 284
},
{
"epoch": 0.890625,
"grad_norm": 2.8739030361175537,
"learning_rate": 3.3561711188304516e-05,
"loss": 0.6431,
"step": 285
},
{
"epoch": 0.89375,
"grad_norm": 3.5703017711639404,
"learning_rate": 3.34859771617542e-05,
"loss": 0.7155,
"step": 286
},
{
"epoch": 0.896875,
"grad_norm": 2.76778244972229,
"learning_rate": 3.340992364296004e-05,
"loss": 0.6328,
"step": 287
},
{
"epoch": 0.9,
"grad_norm": 3.4040513038635254,
"learning_rate": 3.333355349529403e-05,
"loss": 0.7895,
"step": 288
},
{
"epoch": 0.9,
"eval_VitaminC_cosine_accuracy": 0.55859375,
"eval_VitaminC_cosine_accuracy_threshold": 0.8272709846496582,
"eval_VitaminC_cosine_ap": 0.5489140066962175,
"eval_VitaminC_cosine_f1": 0.6666666666666667,
"eval_VitaminC_cosine_f1_threshold": 0.3126052916049957,
"eval_VitaminC_cosine_precision": 0.501002004008016,
"eval_VitaminC_cosine_recall": 0.9960159362549801,
"eval_VitaminC_dot_accuracy": 0.552734375,
"eval_VitaminC_dot_accuracy_threshold": 303.1324157714844,
"eval_VitaminC_dot_ap": 0.5301817831729955,
"eval_VitaminC_dot_f1": 0.6675531914893617,
"eval_VitaminC_dot_f1_threshold": 120.97600555419922,
"eval_VitaminC_dot_precision": 0.500998003992016,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.55859375,
"eval_VitaminC_euclidean_accuracy_threshold": 11.374759674072266,
"eval_VitaminC_euclidean_ap": 0.551008119376775,
"eval_VitaminC_euclidean_f1": 0.6657824933687002,
"eval_VitaminC_euclidean_f1_threshold": 24.255207061767578,
"eval_VitaminC_euclidean_precision": 0.4990059642147117,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.556640625,
"eval_VitaminC_manhattan_accuracy_threshold": 230.6835174560547,
"eval_VitaminC_manhattan_ap": 0.5485867585720646,
"eval_VitaminC_manhattan_f1": 0.6649006622516557,
"eval_VitaminC_manhattan_f1_threshold": 521.4428100585938,
"eval_VitaminC_manhattan_precision": 0.498015873015873,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.55859375,
"eval_VitaminC_max_accuracy_threshold": 303.1324157714844,
"eval_VitaminC_max_ap": 0.551008119376775,
"eval_VitaminC_max_f1": 0.6675531914893617,
"eval_VitaminC_max_f1_threshold": 521.4428100585938,
"eval_VitaminC_max_precision": 0.501002004008016,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.551008119376775,
"eval_sts-test_pearson_cosine": 0.8726396664543798,
"eval_sts-test_pearson_dot": 0.8623668711287399,
"eval_sts-test_pearson_euclidean": 0.8950211806151552,
"eval_sts-test_pearson_manhattan": 0.8954158210085943,
"eval_sts-test_pearson_max": 0.8954158210085943,
"eval_sts-test_spearman_cosine": 0.897937595168081,
"eval_sts-test_spearman_dot": 0.8635840656046664,
"eval_sts-test_spearman_euclidean": 0.8912111673221239,
"eval_sts-test_spearman_manhattan": 0.8913994806300589,
"eval_sts-test_spearman_max": 0.897937595168081,
"eval_vitaminc-pairs_loss": 1.955485224723816,
"eval_vitaminc-pairs_runtime": 1.8698,
"eval_vitaminc-pairs_samples_per_second": 57.76,
"eval_vitaminc-pairs_steps_per_second": 1.07,
"step": 288
},
{
"epoch": 0.9,
"eval_negation-triplets_loss": 0.7942228317260742,
"eval_negation-triplets_runtime": 0.2979,
"eval_negation-triplets_samples_per_second": 214.818,
"eval_negation-triplets_steps_per_second": 3.357,
"step": 288
},
{
"epoch": 0.9,
"eval_scitail-pairs-pos_loss": 0.07541428506374359,
"eval_scitail-pairs-pos_runtime": 0.381,
"eval_scitail-pairs-pos_samples_per_second": 141.723,
"eval_scitail-pairs-pos_steps_per_second": 2.625,
"step": 288
},
{
"epoch": 0.9,
"eval_xsum-pairs_loss": 0.05658277869224548,
"eval_xsum-pairs_runtime": 2.8504,
"eval_xsum-pairs_samples_per_second": 44.906,
"eval_xsum-pairs_steps_per_second": 0.702,
"step": 288
},
{
"epoch": 0.9,
"eval_sciq_pairs_loss": 0.019849741831421852,
"eval_sciq_pairs_runtime": 3.6603,
"eval_sciq_pairs_samples_per_second": 34.97,
"eval_sciq_pairs_steps_per_second": 0.546,
"step": 288
},
{
"epoch": 0.9,
"eval_qasc_pairs_loss": 0.10889946669340134,
"eval_qasc_pairs_runtime": 0.6033,
"eval_qasc_pairs_samples_per_second": 212.165,
"eval_qasc_pairs_steps_per_second": 3.315,
"step": 288
},
{
"epoch": 0.9,
"eval_openbookqa_pairs_loss": 0.7712036967277527,
"eval_openbookqa_pairs_runtime": 0.585,
"eval_openbookqa_pairs_samples_per_second": 218.815,
"eval_openbookqa_pairs_steps_per_second": 3.419,
"step": 288
},
{
"epoch": 0.9,
"eval_msmarco_pairs_loss": 0.279923677444458,
"eval_msmarco_pairs_runtime": 1.4672,
"eval_msmarco_pairs_samples_per_second": 87.239,
"eval_msmarco_pairs_steps_per_second": 1.363,
"step": 288
},
{
"epoch": 0.9,
"eval_nq_pairs_loss": 0.18058110773563385,
"eval_nq_pairs_runtime": 2.8678,
"eval_nq_pairs_samples_per_second": 44.634,
"eval_nq_pairs_steps_per_second": 0.697,
"step": 288
},
{
"epoch": 0.9,
"eval_trivia_pairs_loss": 0.7307667136192322,
"eval_trivia_pairs_runtime": 4.4071,
"eval_trivia_pairs_samples_per_second": 29.044,
"eval_trivia_pairs_steps_per_second": 0.454,
"step": 288
},
{
"epoch": 0.9,
"eval_gooaq_pairs_loss": 0.33244821429252625,
"eval_gooaq_pairs_runtime": 1.0096,
"eval_gooaq_pairs_samples_per_second": 126.785,
"eval_gooaq_pairs_steps_per_second": 1.981,
"step": 288
},
{
"epoch": 0.9,
"eval_paws-pos_loss": 0.024881305173039436,
"eval_paws-pos_runtime": 0.6946,
"eval_paws-pos_samples_per_second": 184.279,
"eval_paws-pos_steps_per_second": 2.879,
"step": 288
},
{
"epoch": 0.903125,
"grad_norm": 2.7424654960632324,
"learning_rate": 3.325686959404907e-05,
"loss": 0.5752,
"step": 289
},
{
"epoch": 0.90625,
"grad_norm": 2.913073778152466,
"learning_rate": 3.3179874826330696e-05,
"loss": 0.666,
"step": 290
},
{
"epoch": 0.909375,
"grad_norm": 3.9191319942474365,
"learning_rate": 3.3102572090948395e-05,
"loss": 0.874,
"step": 291
},
{
"epoch": 0.9125,
"grad_norm": 3.086979627609253,
"learning_rate": 3.302496429830647e-05,
"loss": 0.7431,
"step": 292
},
{
"epoch": 0.915625,
"grad_norm": 3.0514609813690186,
"learning_rate": 3.294705437029443e-05,
"loss": 0.8332,
"step": 293
},
{
"epoch": 0.91875,
"grad_norm": 3.042734384536743,
"learning_rate": 3.2868845240177035e-05,
"loss": 0.7082,
"step": 294
},
{
"epoch": 0.921875,
"grad_norm": 3.4690864086151123,
"learning_rate": 3.2790339852483845e-05,
"loss": 0.6618,
"step": 295
},
{
"epoch": 0.925,
"grad_norm": 2.520153045654297,
"learning_rate": 3.2711541162898326e-05,
"loss": 0.2375,
"step": 296
},
{
"epoch": 0.928125,
"grad_norm": 2.9911270141601562,
"learning_rate": 3.2632452138146607e-05,
"loss": 0.5305,
"step": 297
},
{
"epoch": 0.93125,
"grad_norm": 2.2287964820861816,
"learning_rate": 3.255307575588577e-05,
"loss": 0.1686,
"step": 298
},
{
"epoch": 0.934375,
"grad_norm": 3.2477688789367676,
"learning_rate": 3.247341500459173e-05,
"loss": 0.7938,
"step": 299
},
{
"epoch": 0.9375,
"grad_norm": 1.9740976095199585,
"learning_rate": 3.239347288344676e-05,
"loss": 0.2629,
"step": 300
},
{
"epoch": 0.940625,
"grad_norm": 4.1774702072143555,
"learning_rate": 3.231325240222655e-05,
"loss": 0.973,
"step": 301
},
{
"epoch": 0.94375,
"grad_norm": 3.6038107872009277,
"learning_rate": 3.2232756581186846e-05,
"loss": 0.649,
"step": 302
},
{
"epoch": 0.946875,
"grad_norm": 2.0142273902893066,
"learning_rate": 3.215198845094984e-05,
"loss": 0.3329,
"step": 303
},
{
"epoch": 0.95,
"grad_norm": 3.460426092147827,
"learning_rate": 3.2070951052389975e-05,
"loss": 0.6105,
"step": 304
},
{
"epoch": 0.953125,
"grad_norm": 2.1552436351776123,
"learning_rate": 3.198964743651949e-05,
"loss": 0.3621,
"step": 305
},
{
"epoch": 0.95625,
"grad_norm": 2.6201255321502686,
"learning_rate": 3.1908080664373605e-05,
"loss": 0.5165,
"step": 306
},
{
"epoch": 0.959375,
"grad_norm": 3.296206474304199,
"learning_rate": 3.182625380689516e-05,
"loss": 0.6075,
"step": 307
},
{
"epoch": 0.9625,
"grad_norm": 2.3535473346710205,
"learning_rate": 3.17441699448191e-05,
"loss": 0.3091,
"step": 308
},
{
"epoch": 0.965625,
"grad_norm": 2.1077566146850586,
"learning_rate": 3.166183216855644e-05,
"loss": 0.2762,
"step": 309
},
{
"epoch": 0.96875,
"grad_norm": 2.85646390914917,
"learning_rate": 3.157924357807792e-05,
"loss": 0.5736,
"step": 310
},
{
"epoch": 0.971875,
"grad_norm": 2.4051146507263184,
"learning_rate": 3.149640728279728e-05,
"loss": 0.3876,
"step": 311
},
{
"epoch": 0.975,
"grad_norm": 5.062899112701416,
"learning_rate": 3.141332640145423e-05,
"loss": 1.8005,
"step": 312
},
{
"epoch": 0.978125,
"grad_norm": 2.969027042388916,
"learning_rate": 3.1330004061997e-05,
"loss": 0.6344,
"step": 313
},
{
"epoch": 0.98125,
"grad_norm": 4.5385847091674805,
"learning_rate": 3.1246443401464564e-05,
"loss": 0.9414,
"step": 314
},
{
"epoch": 0.984375,
"grad_norm": 2.6700010299682617,
"learning_rate": 3.116264756586856e-05,
"loss": 0.4782,
"step": 315
},
{
"epoch": 0.9875,
"grad_norm": 2.293757438659668,
"learning_rate": 3.107861971007485e-05,
"loss": 0.4196,
"step": 316
},
{
"epoch": 0.990625,
"grad_norm": 5.584008693695068,
"learning_rate": 3.099436299768471e-05,
"loss": 0.5288,
"step": 317
},
{
"epoch": 0.99375,
"grad_norm": 3.047480344772339,
"learning_rate": 3.0909880600915726e-05,
"loss": 0.5888,
"step": 318
},
{
"epoch": 0.996875,
"grad_norm": 3.148433208465576,
"learning_rate": 3.08251757004824e-05,
"loss": 0.4598,
"step": 319
},
{
"epoch": 1.0,
"grad_norm": 3.277242660522461,
"learning_rate": 3.074025148547635e-05,
"loss": 0.5085,
"step": 320
},
{
"epoch": 1.0,
"eval_VitaminC_cosine_accuracy": 0.5546875,
"eval_VitaminC_cosine_accuracy_threshold": 0.8424822092056274,
"eval_VitaminC_cosine_ap": 0.5467401178776568,
"eval_VitaminC_cosine_f1": 0.6657824933687002,
"eval_VitaminC_cosine_f1_threshold": 0.3060212731361389,
"eval_VitaminC_cosine_precision": 0.4990059642147117,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.55859375,
"eval_VitaminC_dot_accuracy_threshold": 302.82525634765625,
"eval_VitaminC_dot_ap": 0.5313187944370502,
"eval_VitaminC_dot_f1": 0.6657824933687002,
"eval_VitaminC_dot_f1_threshold": 112.19659423828125,
"eval_VitaminC_dot_precision": 0.4990059642147117,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.556640625,
"eval_VitaminC_euclidean_accuracy_threshold": 13.824159622192383,
"eval_VitaminC_euclidean_ap": 0.5479307244374829,
"eval_VitaminC_euclidean_f1": 0.6649006622516557,
"eval_VitaminC_euclidean_f1_threshold": 23.69076919555664,
"eval_VitaminC_euclidean_precision": 0.498015873015873,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.552734375,
"eval_VitaminC_manhattan_accuracy_threshold": 292.99462890625,
"eval_VitaminC_manhattan_ap": 0.5465792848292811,
"eval_VitaminC_manhattan_f1": 0.6666666666666666,
"eval_VitaminC_manhattan_f1_threshold": 489.7302551269531,
"eval_VitaminC_manhattan_precision": 0.5,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.55859375,
"eval_VitaminC_max_accuracy_threshold": 302.82525634765625,
"eval_VitaminC_max_ap": 0.5479307244374829,
"eval_VitaminC_max_f1": 0.6666666666666666,
"eval_VitaminC_max_f1_threshold": 489.7302551269531,
"eval_VitaminC_max_precision": 0.5,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5479307244374829,
"eval_sts-test_pearson_cosine": 0.87646365142741,
"eval_sts-test_pearson_dot": 0.8655190609079275,
"eval_sts-test_pearson_euclidean": 0.9009817964818363,
"eval_sts-test_pearson_manhattan": 0.9014432269871114,
"eval_sts-test_pearson_max": 0.9014432269871114,
"eval_sts-test_spearman_cosine": 0.9030024086785755,
"eval_sts-test_spearman_dot": 0.8673856405086042,
"eval_sts-test_spearman_euclidean": 0.8983721299161916,
"eval_sts-test_spearman_manhattan": 0.8981219256137521,
"eval_sts-test_spearman_max": 0.9030024086785755,
"eval_vitaminc-pairs_loss": 1.9213347434997559,
"eval_vitaminc-pairs_runtime": 1.866,
"eval_vitaminc-pairs_samples_per_second": 57.877,
"eval_vitaminc-pairs_steps_per_second": 1.072,
"step": 320
},
{
"epoch": 1.0,
"eval_negation-triplets_loss": 0.7787352204322815,
"eval_negation-triplets_runtime": 0.2979,
"eval_negation-triplets_samples_per_second": 214.834,
"eval_negation-triplets_steps_per_second": 3.357,
"step": 320
},
{
"epoch": 1.0,
"eval_scitail-pairs-pos_loss": 0.06892620027065277,
"eval_scitail-pairs-pos_runtime": 0.4252,
"eval_scitail-pairs-pos_samples_per_second": 126.994,
"eval_scitail-pairs-pos_steps_per_second": 2.352,
"step": 320
},
{
"epoch": 1.0,
"eval_xsum-pairs_loss": 0.05507522076368332,
"eval_xsum-pairs_runtime": 2.8476,
"eval_xsum-pairs_samples_per_second": 44.951,
"eval_xsum-pairs_steps_per_second": 0.702,
"step": 320
},
{
"epoch": 1.0,
"eval_sciq_pairs_loss": 0.020738935098052025,
"eval_sciq_pairs_runtime": 3.7008,
"eval_sciq_pairs_samples_per_second": 34.587,
"eval_sciq_pairs_steps_per_second": 0.54,
"step": 320
},
{
"epoch": 1.0,
"eval_qasc_pairs_loss": 0.10421090573072433,
"eval_qasc_pairs_runtime": 0.6054,
"eval_qasc_pairs_samples_per_second": 211.426,
"eval_qasc_pairs_steps_per_second": 3.304,
"step": 320
},
{
"epoch": 1.0,
"eval_openbookqa_pairs_loss": 0.694441020488739,
"eval_openbookqa_pairs_runtime": 0.6019,
"eval_openbookqa_pairs_samples_per_second": 212.646,
"eval_openbookqa_pairs_steps_per_second": 3.323,
"step": 320
},
{
"epoch": 1.0,
"eval_msmarco_pairs_loss": 0.28574398159980774,
"eval_msmarco_pairs_runtime": 1.4875,
"eval_msmarco_pairs_samples_per_second": 86.048,
"eval_msmarco_pairs_steps_per_second": 1.344,
"step": 320
},
{
"epoch": 1.0,
"eval_nq_pairs_loss": 0.17458948493003845,
"eval_nq_pairs_runtime": 2.8657,
"eval_nq_pairs_samples_per_second": 44.666,
"eval_nq_pairs_steps_per_second": 0.698,
"step": 320
},
{
"epoch": 1.0,
"eval_trivia_pairs_loss": 0.68446946144104,
"eval_trivia_pairs_runtime": 4.4,
"eval_trivia_pairs_samples_per_second": 29.091,
"eval_trivia_pairs_steps_per_second": 0.455,
"step": 320
},
{
"epoch": 1.0,
"eval_gooaq_pairs_loss": 0.3039962947368622,
"eval_gooaq_pairs_runtime": 1.0187,
"eval_gooaq_pairs_samples_per_second": 125.646,
"eval_gooaq_pairs_steps_per_second": 1.963,
"step": 320
},
{
"epoch": 1.0,
"eval_paws-pos_loss": 0.024999650195240974,
"eval_paws-pos_runtime": 0.7064,
"eval_paws-pos_samples_per_second": 181.207,
"eval_paws-pos_steps_per_second": 2.831,
"step": 320
},
{
"epoch": 1.003125,
"grad_norm": 2.7458887100219727,
"learning_rate": 3.065511115324628e-05,
"loss": 0.647,
"step": 321
},
{
"epoch": 1.00625,
"grad_norm": 2.646803140640259,
"learning_rate": 3.0569757909277566e-05,
"loss": 0.4768,
"step": 322
},
{
"epoch": 1.009375,
"grad_norm": 2.367361545562744,
"learning_rate": 3.048419496707161e-05,
"loss": 0.4834,
"step": 323
},
{
"epoch": 1.0125,
"grad_norm": 3.055002450942993,
"learning_rate": 3.0398425548024827e-05,
"loss": 0.6115,
"step": 324
},
{
"epoch": 1.015625,
"grad_norm": 2.0717179775238037,
"learning_rate": 3.0312452881307356e-05,
"loss": 0.4611,
"step": 325
},
{
"epoch": 1.01875,
"grad_norm": 2.3982598781585693,
"learning_rate": 3.022628020374152e-05,
"loss": 0.4812,
"step": 326
},
{
"epoch": 1.021875,
"grad_norm": 2.90179705619812,
"learning_rate": 3.013991075967992e-05,
"loss": 0.5914,
"step": 327
},
{
"epoch": 1.025,
"grad_norm": 3.2376556396484375,
"learning_rate": 3.00533478008833e-05,
"loss": 0.7206,
"step": 328
},
{
"epoch": 1.028125,
"grad_norm": 3.591564416885376,
"learning_rate": 2.996659458639815e-05,
"loss": 0.7854,
"step": 329
},
{
"epoch": 1.03125,
"grad_norm": 2.470400094985962,
"learning_rate": 2.9879654382433948e-05,
"loss": 0.432,
"step": 330
},
{
"epoch": 1.034375,
"grad_norm": 3.061913013458252,
"learning_rate": 2.979253046224024e-05,
"loss": 0.6365,
"step": 331
},
{
"epoch": 1.0375,
"grad_norm": 2.3621861934661865,
"learning_rate": 2.9705226105983377e-05,
"loss": 0.3754,
"step": 332
},
{
"epoch": 1.040625,
"grad_norm": 2.898756742477417,
"learning_rate": 2.9617744600623023e-05,
"loss": 0.5096,
"step": 333
},
{
"epoch": 1.04375,
"grad_norm": 2.9752399921417236,
"learning_rate": 2.9530089239788428e-05,
"loss": 0.5762,
"step": 334
},
{
"epoch": 1.046875,
"grad_norm": 3.2658884525299072,
"learning_rate": 2.9442263323654362e-05,
"loss": 0.6938,
"step": 335
},
{
"epoch": 1.05,
"grad_norm": 2.0361263751983643,
"learning_rate": 2.935427015881694e-05,
"loss": 0.343,
"step": 336
},
{
"epoch": 1.053125,
"grad_norm": 3.670530319213867,
"learning_rate": 2.926611305816908e-05,
"loss": 0.7258,
"step": 337
},
{
"epoch": 1.05625,
"grad_norm": 2.597907066345215,
"learning_rate": 2.9177795340775795e-05,
"loss": 0.4658,
"step": 338
},
{
"epoch": 1.059375,
"grad_norm": 3.1930811405181885,
"learning_rate": 2.9089320331749237e-05,
"loss": 0.7108,
"step": 339
},
{
"epoch": 1.0625,
"grad_norm": 4.060088157653809,
"learning_rate": 2.9000691362123475e-05,
"loss": 1.3076,
"step": 340
},
{
"epoch": 1.065625,
"grad_norm": 1.4222996234893799,
"learning_rate": 2.8911911768729136e-05,
"loss": 0.2397,
"step": 341
},
{
"epoch": 1.06875,
"grad_norm": 2.6759979724884033,
"learning_rate": 2.8822984894067722e-05,
"loss": 0.4853,
"step": 342
},
{
"epoch": 1.071875,
"grad_norm": 3.4097981452941895,
"learning_rate": 2.8733914086185807e-05,
"loss": 0.741,
"step": 343
},
{
"epoch": 1.075,
"grad_norm": 2.869738817214966,
"learning_rate": 2.8644702698548962e-05,
"loss": 0.6066,
"step": 344
},
{
"epoch": 1.078125,
"grad_norm": 3.412572145462036,
"learning_rate": 2.8555354089915514e-05,
"loss": 0.6838,
"step": 345
},
{
"epoch": 1.08125,
"grad_norm": 2.155133008956909,
"learning_rate": 2.846587162421007e-05,
"loss": 0.4393,
"step": 346
},
{
"epoch": 1.084375,
"grad_norm": 2.3955204486846924,
"learning_rate": 2.837625867039689e-05,
"loss": 0.4102,
"step": 347
},
{
"epoch": 1.0875,
"grad_norm": 2.5801889896392822,
"learning_rate": 2.8286518602353047e-05,
"loss": 0.4947,
"step": 348
},
{
"epoch": 1.090625,
"grad_norm": 2.63447904586792,
"learning_rate": 2.819665479874137e-05,
"loss": 0.5212,
"step": 349
},
{
"epoch": 1.09375,
"grad_norm": 2.7823500633239746,
"learning_rate": 2.8106670642883283e-05,
"loss": 0.6889,
"step": 350
},
{
"epoch": 1.096875,
"grad_norm": 2.979808807373047,
"learning_rate": 2.8016569522631384e-05,
"loss": 0.625,
"step": 351
},
{
"epoch": 1.1,
"grad_norm": 2.9141488075256348,
"learning_rate": 2.792635483024193e-05,
"loss": 0.5093,
"step": 352
},
{
"epoch": 1.1,
"eval_VitaminC_cosine_accuracy": 0.5546875,
"eval_VitaminC_cosine_accuracy_threshold": 0.8290125131607056,
"eval_VitaminC_cosine_ap": 0.5484962367283152,
"eval_VitaminC_cosine_f1": 0.6666666666666666,
"eval_VitaminC_cosine_f1_threshold": 0.3529857099056244,
"eval_VitaminC_cosine_precision": 0.5,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.548828125,
"eval_VitaminC_dot_accuracy_threshold": 324.3284606933594,
"eval_VitaminC_dot_ap": 0.5323604009341977,
"eval_VitaminC_dot_f1": 0.6666666666666667,
"eval_VitaminC_dot_f1_threshold": 137.8323211669922,
"eval_VitaminC_dot_precision": 0.501002004008016,
"eval_VitaminC_dot_recall": 0.9960159362549801,
"eval_VitaminC_euclidean_accuracy": 0.556640625,
"eval_VitaminC_euclidean_accuracy_threshold": 13.973267555236816,
"eval_VitaminC_euclidean_ap": 0.5488900714831766,
"eval_VitaminC_euclidean_f1": 0.6657824933687002,
"eval_VitaminC_euclidean_f1_threshold": 22.846126556396484,
"eval_VitaminC_euclidean_precision": 0.4990059642147117,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.5546875,
"eval_VitaminC_manhattan_accuracy_threshold": 292.48834228515625,
"eval_VitaminC_manhattan_ap": 0.5472615547862266,
"eval_VitaminC_manhattan_f1": 0.6657824933687002,
"eval_VitaminC_manhattan_f1_threshold": 487.93536376953125,
"eval_VitaminC_manhattan_precision": 0.4990059642147117,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.556640625,
"eval_VitaminC_max_accuracy_threshold": 324.3284606933594,
"eval_VitaminC_max_ap": 0.5488900714831766,
"eval_VitaminC_max_f1": 0.6666666666666667,
"eval_VitaminC_max_f1_threshold": 487.93536376953125,
"eval_VitaminC_max_precision": 0.501002004008016,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5488900714831766,
"eval_sts-test_pearson_cosine": 0.8777529500191548,
"eval_sts-test_pearson_dot": 0.8689529679551734,
"eval_sts-test_pearson_euclidean": 0.8997770430839387,
"eval_sts-test_pearson_manhattan": 0.8993770557804839,
"eval_sts-test_pearson_max": 0.8997770430839387,
"eval_sts-test_spearman_cosine": 0.9027963738711295,
"eval_sts-test_spearman_dot": 0.8692104626943614,
"eval_sts-test_spearman_euclidean": 0.897084054359563,
"eval_sts-test_spearman_manhattan": 0.8970093645043006,
"eval_sts-test_spearman_max": 0.9027963738711295,
"eval_vitaminc-pairs_loss": 1.9221601486206055,
"eval_vitaminc-pairs_runtime": 1.8539,
"eval_vitaminc-pairs_samples_per_second": 58.254,
"eval_vitaminc-pairs_steps_per_second": 1.079,
"step": 352
},
{
"epoch": 1.1,
"eval_negation-triplets_loss": 0.7761179208755493,
"eval_negation-triplets_runtime": 0.2931,
"eval_negation-triplets_samples_per_second": 218.388,
"eval_negation-triplets_steps_per_second": 3.412,
"step": 352
},
{
"epoch": 1.1,
"eval_scitail-pairs-pos_loss": 0.08009649068117142,
"eval_scitail-pairs-pos_runtime": 0.3758,
"eval_scitail-pairs-pos_samples_per_second": 143.684,
"eval_scitail-pairs-pos_steps_per_second": 2.661,
"step": 352
},
{
"epoch": 1.1,
"eval_xsum-pairs_loss": 0.062557153403759,
"eval_xsum-pairs_runtime": 2.8489,
"eval_xsum-pairs_samples_per_second": 44.93,
"eval_xsum-pairs_steps_per_second": 0.702,
"step": 352
},
{
"epoch": 1.1,
"eval_sciq_pairs_loss": 0.019746748730540276,
"eval_sciq_pairs_runtime": 3.6515,
"eval_sciq_pairs_samples_per_second": 35.054,
"eval_sciq_pairs_steps_per_second": 0.548,
"step": 352
},
{
"epoch": 1.1,
"eval_qasc_pairs_loss": 0.10993637144565582,
"eval_qasc_pairs_runtime": 0.6014,
"eval_qasc_pairs_samples_per_second": 212.82,
"eval_qasc_pairs_steps_per_second": 3.325,
"step": 352
},
{
"epoch": 1.1,
"eval_openbookqa_pairs_loss": 0.7048032879829407,
"eval_openbookqa_pairs_runtime": 0.5788,
"eval_openbookqa_pairs_samples_per_second": 221.148,
"eval_openbookqa_pairs_steps_per_second": 3.455,
"step": 352
},
{
"epoch": 1.1,
"eval_msmarco_pairs_loss": 0.27703118324279785,
"eval_msmarco_pairs_runtime": 1.468,
"eval_msmarco_pairs_samples_per_second": 87.192,
"eval_msmarco_pairs_steps_per_second": 1.362,
"step": 352
},
{
"epoch": 1.1,
"eval_nq_pairs_loss": 0.1819453090429306,
"eval_nq_pairs_runtime": 2.8689,
"eval_nq_pairs_samples_per_second": 44.616,
"eval_nq_pairs_steps_per_second": 0.697,
"step": 352
},
{
"epoch": 1.1,
"eval_trivia_pairs_loss": 0.687531054019928,
"eval_trivia_pairs_runtime": 4.399,
"eval_trivia_pairs_samples_per_second": 29.098,
"eval_trivia_pairs_steps_per_second": 0.455,
"step": 352
},
{
"epoch": 1.1,
"eval_gooaq_pairs_loss": 0.30321064591407776,
"eval_gooaq_pairs_runtime": 1.0175,
"eval_gooaq_pairs_samples_per_second": 125.792,
"eval_gooaq_pairs_steps_per_second": 1.966,
"step": 352
},
{
"epoch": 1.1,
"eval_paws-pos_loss": 0.02436799556016922,
"eval_paws-pos_runtime": 0.7162,
"eval_paws-pos_samples_per_second": 178.711,
"eval_paws-pos_steps_per_second": 2.792,
"step": 352
},
{
"epoch": 1.103125,
"grad_norm": 3.3241679668426514,
"learning_rate": 2.78360299622471e-05,
"loss": 0.6242,
"step": 353
},
{
"epoch": 1.10625,
"grad_norm": 3.031259059906006,
"learning_rate": 2.7745598319327117e-05,
"loss": 0.7228,
"step": 354
},
{
"epoch": 1.109375,
"grad_norm": 2.223773956298828,
"learning_rate": 2.7655063306182235e-05,
"loss": 0.3717,
"step": 355
},
{
"epoch": 1.1125,
"grad_norm": 2.281268835067749,
"learning_rate": 2.7564428331404524e-05,
"loss": 0.3442,
"step": 356
},
{
"epoch": 1.115625,
"grad_norm": 3.040951728820801,
"learning_rate": 2.7473696807349552e-05,
"loss": 0.649,
"step": 357
},
{
"epoch": 1.11875,
"grad_norm": 2.3970398902893066,
"learning_rate": 2.738287215000792e-05,
"loss": 0.3935,
"step": 358
},
{
"epoch": 1.121875,
"grad_norm": 2.8858048915863037,
"learning_rate": 2.7291957778876656e-05,
"loss": 0.6131,
"step": 359
},
{
"epoch": 1.125,
"grad_norm": 2.974828004837036,
"learning_rate": 2.7200957116830426e-05,
"loss": 0.5322,
"step": 360
},
{
"epoch": 1.128125,
"grad_norm": 1.7254366874694824,
"learning_rate": 2.7109873589992745e-05,
"loss": 0.2073,
"step": 361
},
{
"epoch": 1.13125,
"grad_norm": 2.895080804824829,
"learning_rate": 2.7018710627606894e-05,
"loss": 0.6735,
"step": 362
},
{
"epoch": 1.134375,
"grad_norm": 3.014303207397461,
"learning_rate": 2.69274716619069e-05,
"loss": 0.7604,
"step": 363
},
{
"epoch": 1.1375,
"grad_norm": 2.703094005584717,
"learning_rate": 2.6836160127988247e-05,
"loss": 0.6165,
"step": 364
},
{
"epoch": 1.140625,
"grad_norm": 1.903054118156433,
"learning_rate": 2.6744779463678576e-05,
"loss": 0.1963,
"step": 365
},
{
"epoch": 1.14375,
"grad_norm": 1.694141149520874,
"learning_rate": 2.665333310940825e-05,
"loss": 0.1668,
"step": 366
},
{
"epoch": 1.146875,
"grad_norm": 2.7038228511810303,
"learning_rate": 2.6561824508080824e-05,
"loss": 0.5055,
"step": 367
},
{
"epoch": 1.15,
"grad_norm": 2.6325740814208984,
"learning_rate": 2.6470257104943417e-05,
"loss": 0.4919,
"step": 368
},
{
"epoch": 1.153125,
"grad_norm": 3.161851167678833,
"learning_rate": 2.6378634347456996e-05,
"loss": 0.7166,
"step": 369
},
{
"epoch": 1.15625,
"grad_norm": 2.4141595363616943,
"learning_rate": 2.6286959685166603e-05,
"loss": 0.444,
"step": 370
},
{
"epoch": 1.159375,
"grad_norm": 3.2262306213378906,
"learning_rate": 2.6195236569571454e-05,
"loss": 0.6237,
"step": 371
},
{
"epoch": 1.1625,
"grad_norm": 2.130065441131592,
"learning_rate": 2.6103468453995017e-05,
"loss": 0.4197,
"step": 372
},
{
"epoch": 1.165625,
"grad_norm": 2.9710662364959717,
"learning_rate": 2.601165879345496e-05,
"loss": 0.5569,
"step": 373
},
{
"epoch": 1.16875,
"grad_norm": 2.55246901512146,
"learning_rate": 2.591981104453313e-05,
"loss": 0.5274,
"step": 374
},
{
"epoch": 1.171875,
"grad_norm": 2.84503436088562,
"learning_rate": 2.5827928665245356e-05,
"loss": 0.6259,
"step": 375
},
{
"epoch": 1.175,
"grad_norm": 3.342602491378784,
"learning_rate": 2.5736015114911275e-05,
"loss": 0.7696,
"step": 376
},
{
"epoch": 1.178125,
"grad_norm": 2.747089147567749,
"learning_rate": 2.5644073854024117e-05,
"loss": 0.6437,
"step": 377
},
{
"epoch": 1.18125,
"grad_norm": 2.5642967224121094,
"learning_rate": 2.5552108344120387e-05,
"loss": 0.5067,
"step": 378
},
{
"epoch": 1.184375,
"grad_norm": 2.4318668842315674,
"learning_rate": 2.546012204764955e-05,
"loss": 0.3927,
"step": 379
},
{
"epoch": 1.1875,
"grad_norm": 2.7380220890045166,
"learning_rate": 2.536811842784369e-05,
"loss": 0.4557,
"step": 380
},
{
"epoch": 1.190625,
"grad_norm": 2.0136771202087402,
"learning_rate": 2.5276100948587075e-05,
"loss": 0.2425,
"step": 381
},
{
"epoch": 1.19375,
"grad_norm": 1.7208062410354614,
"learning_rate": 2.51840730742858e-05,
"loss": 0.1677,
"step": 382
},
{
"epoch": 1.196875,
"grad_norm": 1.7164028882980347,
"learning_rate": 2.5092038269737324e-05,
"loss": 0.3555,
"step": 383
},
{
"epoch": 1.2,
"grad_norm": 3.0403032302856445,
"learning_rate": 2.5e-05,
"loss": 0.8643,
"step": 384
},
{
"epoch": 1.2,
"eval_VitaminC_cosine_accuracy": 0.55859375,
"eval_VitaminC_cosine_accuracy_threshold": 0.8228827118873596,
"eval_VitaminC_cosine_ap": 0.5496046521684337,
"eval_VitaminC_cosine_f1": 0.6657824933687002,
"eval_VitaminC_cosine_f1_threshold": 0.2927078902721405,
"eval_VitaminC_cosine_precision": 0.4990059642147117,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.5546875,
"eval_VitaminC_dot_accuracy_threshold": 303.5928649902344,
"eval_VitaminC_dot_ap": 0.5333968837571262,
"eval_VitaminC_dot_f1": 0.6657824933687002,
"eval_VitaminC_dot_f1_threshold": 99.95751953125,
"eval_VitaminC_dot_precision": 0.4990059642147117,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.552734375,
"eval_VitaminC_euclidean_accuracy_threshold": 12.029778480529785,
"eval_VitaminC_euclidean_ap": 0.5497621377316283,
"eval_VitaminC_euclidean_f1": 0.6657824933687002,
"eval_VitaminC_euclidean_f1_threshold": 23.023883819580078,
"eval_VitaminC_euclidean_precision": 0.4990059642147117,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.5546875,
"eval_VitaminC_manhattan_accuracy_threshold": 239.0825653076172,
"eval_VitaminC_manhattan_ap": 0.550887748657308,
"eval_VitaminC_manhattan_f1": 0.6666666666666666,
"eval_VitaminC_manhattan_f1_threshold": 484.42718505859375,
"eval_VitaminC_manhattan_precision": 0.5,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.55859375,
"eval_VitaminC_max_accuracy_threshold": 303.5928649902344,
"eval_VitaminC_max_ap": 0.550887748657308,
"eval_VitaminC_max_f1": 0.6666666666666666,
"eval_VitaminC_max_f1_threshold": 484.42718505859375,
"eval_VitaminC_max_precision": 0.5,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.550887748657308,
"eval_sts-test_pearson_cosine": 0.8759930670182294,
"eval_sts-test_pearson_dot": 0.8657397744839983,
"eval_sts-test_pearson_euclidean": 0.9011306400734879,
"eval_sts-test_pearson_manhattan": 0.9011853213795427,
"eval_sts-test_pearson_max": 0.9011853213795427,
"eval_sts-test_spearman_cosine": 0.9034707306290366,
"eval_sts-test_spearman_dot": 0.868673716065233,
"eval_sts-test_spearman_euclidean": 0.8986341933028996,
"eval_sts-test_spearman_manhattan": 0.8983098809115962,
"eval_sts-test_spearman_max": 0.9034707306290366,
"eval_vitaminc-pairs_loss": 1.8801089525222778,
"eval_vitaminc-pairs_runtime": 1.8688,
"eval_vitaminc-pairs_samples_per_second": 57.791,
"eval_vitaminc-pairs_steps_per_second": 1.07,
"step": 384
},
{
"epoch": 1.2,
"eval_negation-triplets_loss": 0.7317898273468018,
"eval_negation-triplets_runtime": 0.3021,
"eval_negation-triplets_samples_per_second": 211.884,
"eval_negation-triplets_steps_per_second": 3.311,
"step": 384
},
{
"epoch": 1.2,
"eval_scitail-pairs-pos_loss": 0.07107817381620407,
"eval_scitail-pairs-pos_runtime": 0.3882,
"eval_scitail-pairs-pos_samples_per_second": 139.106,
"eval_scitail-pairs-pos_steps_per_second": 2.576,
"step": 384
},
{
"epoch": 1.2,
"eval_xsum-pairs_loss": 0.05828472599387169,
"eval_xsum-pairs_runtime": 2.853,
"eval_xsum-pairs_samples_per_second": 44.865,
"eval_xsum-pairs_steps_per_second": 0.701,
"step": 384
},
{
"epoch": 1.2,
"eval_sciq_pairs_loss": 0.019503507763147354,
"eval_sciq_pairs_runtime": 3.7158,
"eval_sciq_pairs_samples_per_second": 34.448,
"eval_sciq_pairs_steps_per_second": 0.538,
"step": 384
},
{
"epoch": 1.2,
"eval_qasc_pairs_loss": 0.11732859164476395,
"eval_qasc_pairs_runtime": 0.605,
"eval_qasc_pairs_samples_per_second": 211.578,
"eval_qasc_pairs_steps_per_second": 3.306,
"step": 384
},
{
"epoch": 1.2,
"eval_openbookqa_pairs_loss": 0.7122623324394226,
"eval_openbookqa_pairs_runtime": 0.5839,
"eval_openbookqa_pairs_samples_per_second": 219.199,
"eval_openbookqa_pairs_steps_per_second": 3.425,
"step": 384
},
{
"epoch": 1.2,
"eval_msmarco_pairs_loss": 0.28523409366607666,
"eval_msmarco_pairs_runtime": 1.4705,
"eval_msmarco_pairs_samples_per_second": 87.043,
"eval_msmarco_pairs_steps_per_second": 1.36,
"step": 384
},
{
"epoch": 1.2,
"eval_nq_pairs_loss": 0.178893581032753,
"eval_nq_pairs_runtime": 2.8648,
"eval_nq_pairs_samples_per_second": 44.681,
"eval_nq_pairs_steps_per_second": 0.698,
"step": 384
},
{
"epoch": 1.2,
"eval_trivia_pairs_loss": 0.636802613735199,
"eval_trivia_pairs_runtime": 4.3993,
"eval_trivia_pairs_samples_per_second": 29.096,
"eval_trivia_pairs_steps_per_second": 0.455,
"step": 384
},
{
"epoch": 1.2,
"eval_gooaq_pairs_loss": 0.3245222866535187,
"eval_gooaq_pairs_runtime": 1.0085,
"eval_gooaq_pairs_samples_per_second": 126.919,
"eval_gooaq_pairs_steps_per_second": 1.983,
"step": 384
},
{
"epoch": 1.2,
"eval_paws-pos_loss": 0.024447523057460785,
"eval_paws-pos_runtime": 0.6966,
"eval_paws-pos_samples_per_second": 183.741,
"eval_paws-pos_steps_per_second": 2.871,
"step": 384
},
{
"epoch": 1.203125,
"grad_norm": 3.0316460132598877,
"learning_rate": 2.4907961730262685e-05,
"loss": 0.6056,
"step": 385
},
{
"epoch": 1.20625,
"grad_norm": 3.3051912784576416,
"learning_rate": 2.4815926925714205e-05,
"loss": 0.5924,
"step": 386
},
{
"epoch": 1.209375,
"grad_norm": 2.5136680603027344,
"learning_rate": 2.4723899051412934e-05,
"loss": 0.4131,
"step": 387
},
{
"epoch": 1.2125,
"grad_norm": 2.1033709049224854,
"learning_rate": 2.463188157215632e-05,
"loss": 0.3347,
"step": 388
},
{
"epoch": 1.215625,
"grad_norm": 2.217355728149414,
"learning_rate": 2.4539877952350458e-05,
"loss": 0.4317,
"step": 389
},
{
"epoch": 1.21875,
"grad_norm": 1.9194687604904175,
"learning_rate": 2.444789165587962e-05,
"loss": 0.2488,
"step": 390
},
{
"epoch": 1.221875,
"grad_norm": 3.4252638816833496,
"learning_rate": 2.435592614597589e-05,
"loss": 0.6856,
"step": 391
},
{
"epoch": 1.225,
"grad_norm": 2.816314935684204,
"learning_rate": 2.4263984885088735e-05,
"loss": 0.5261,
"step": 392
},
{
"epoch": 1.228125,
"grad_norm": 2.5925676822662354,
"learning_rate": 2.4172071334754654e-05,
"loss": 0.4683,
"step": 393
},
{
"epoch": 1.23125,
"grad_norm": 3.6116645336151123,
"learning_rate": 2.4080188955466874e-05,
"loss": 1.066,
"step": 394
},
{
"epoch": 1.234375,
"grad_norm": 2.6395368576049805,
"learning_rate": 2.398834120654504e-05,
"loss": 0.5434,
"step": 395
},
{
"epoch": 1.2375,
"grad_norm": 2.5325918197631836,
"learning_rate": 2.3896531546004992e-05,
"loss": 0.4129,
"step": 396
},
{
"epoch": 1.240625,
"grad_norm": 1.9665679931640625,
"learning_rate": 2.380476343042855e-05,
"loss": 0.3367,
"step": 397
},
{
"epoch": 1.24375,
"grad_norm": 3.6547625064849854,
"learning_rate": 2.3713040314833403e-05,
"loss": 0.716,
"step": 398
},
{
"epoch": 1.246875,
"grad_norm": 2.7950963973999023,
"learning_rate": 2.3621365652543013e-05,
"loss": 0.4767,
"step": 399
},
{
"epoch": 1.25,
"grad_norm": 1.975703239440918,
"learning_rate": 2.3529742895056592e-05,
"loss": 0.3659,
"step": 400
},
{
"epoch": 1.253125,
"grad_norm": 2.8645551204681396,
"learning_rate": 2.3438175491919185e-05,
"loss": 0.4731,
"step": 401
},
{
"epoch": 1.25625,
"grad_norm": 2.649005889892578,
"learning_rate": 2.3346666890591757e-05,
"loss": 0.4562,
"step": 402
},
{
"epoch": 1.259375,
"grad_norm": 2.2082812786102295,
"learning_rate": 2.3255220536321427e-05,
"loss": 0.3397,
"step": 403
},
{
"epoch": 1.2625,
"grad_norm": 3.8959875106811523,
"learning_rate": 2.3163839872011763e-05,
"loss": 1.2082,
"step": 404
},
{
"epoch": 1.265625,
"grad_norm": 2.6099252700805664,
"learning_rate": 2.307252833809311e-05,
"loss": 0.6162,
"step": 405
},
{
"epoch": 1.26875,
"grad_norm": 2.4495608806610107,
"learning_rate": 2.298128937239311e-05,
"loss": 0.4767,
"step": 406
},
{
"epoch": 1.271875,
"grad_norm": 2.724579095840454,
"learning_rate": 2.2890126410007264e-05,
"loss": 0.4384,
"step": 407
},
{
"epoch": 1.275,
"grad_norm": 2.7510993480682373,
"learning_rate": 2.2799042883169576e-05,
"loss": 0.5368,
"step": 408
},
{
"epoch": 1.278125,
"grad_norm": 2.994795083999634,
"learning_rate": 2.270804222112335e-05,
"loss": 0.6885,
"step": 409
},
{
"epoch": 1.28125,
"grad_norm": 2.59830904006958,
"learning_rate": 2.2617127849992082e-05,
"loss": 0.4318,
"step": 410
},
{
"epoch": 1.284375,
"grad_norm": 2.603785276412964,
"learning_rate": 2.252630319265045e-05,
"loss": 0.5648,
"step": 411
},
{
"epoch": 1.2875,
"grad_norm": 1.8414777517318726,
"learning_rate": 2.2435571668595482e-05,
"loss": 0.3,
"step": 412
},
{
"epoch": 1.290625,
"grad_norm": 3.080265998840332,
"learning_rate": 2.2344936693817774e-05,
"loss": 0.573,
"step": 413
},
{
"epoch": 1.29375,
"grad_norm": 3.2287120819091797,
"learning_rate": 2.225440168067289e-05,
"loss": 0.6759,
"step": 414
},
{
"epoch": 1.296875,
"grad_norm": 3.5036377906799316,
"learning_rate": 2.216397003775291e-05,
"loss": 1.0739,
"step": 415
},
{
"epoch": 1.3,
"grad_norm": 3.4340429306030273,
"learning_rate": 2.207364516975808e-05,
"loss": 0.6794,
"step": 416
},
{
"epoch": 1.3,
"eval_VitaminC_cosine_accuracy": 0.556640625,
"eval_VitaminC_cosine_accuracy_threshold": 0.8248050212860107,
"eval_VitaminC_cosine_ap": 0.549721039851088,
"eval_VitaminC_cosine_f1": 0.6675531914893617,
"eval_VitaminC_cosine_f1_threshold": 0.3625495135784149,
"eval_VitaminC_cosine_precision": 0.500998003992016,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.546875,
"eval_VitaminC_dot_accuracy_threshold": 315.43896484375,
"eval_VitaminC_dot_ap": 0.5352429908255126,
"eval_VitaminC_dot_f1": 0.6675531914893617,
"eval_VitaminC_dot_f1_threshold": 129.65655517578125,
"eval_VitaminC_dot_precision": 0.500998003992016,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.5546875,
"eval_VitaminC_euclidean_accuracy_threshold": 12.217185974121094,
"eval_VitaminC_euclidean_ap": 0.5506836806067088,
"eval_VitaminC_euclidean_f1": 0.6657824933687002,
"eval_VitaminC_euclidean_f1_threshold": 23.268470764160156,
"eval_VitaminC_euclidean_precision": 0.4990059642147117,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.552734375,
"eval_VitaminC_manhattan_accuracy_threshold": 228.33251953125,
"eval_VitaminC_manhattan_ap": 0.5499105636757091,
"eval_VitaminC_manhattan_f1": 0.6666666666666667,
"eval_VitaminC_manhattan_f1_threshold": 475.83892822265625,
"eval_VitaminC_manhattan_precision": 0.501002004008016,
"eval_VitaminC_manhattan_recall": 0.9960159362549801,
"eval_VitaminC_max_accuracy": 0.556640625,
"eval_VitaminC_max_accuracy_threshold": 315.43896484375,
"eval_VitaminC_max_ap": 0.5506836806067088,
"eval_VitaminC_max_f1": 0.6675531914893617,
"eval_VitaminC_max_f1_threshold": 475.83892822265625,
"eval_VitaminC_max_precision": 0.501002004008016,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5506836806067088,
"eval_sts-test_pearson_cosine": 0.8783564854148046,
"eval_sts-test_pearson_dot": 0.8688921197467538,
"eval_sts-test_pearson_euclidean": 0.901280483137533,
"eval_sts-test_pearson_manhattan": 0.9014338360947061,
"eval_sts-test_pearson_max": 0.9014338360947061,
"eval_sts-test_spearman_cosine": 0.9035353066992244,
"eval_sts-test_spearman_dot": 0.8704091252307301,
"eval_sts-test_spearman_euclidean": 0.8982903693616295,
"eval_sts-test_spearman_manhattan": 0.897955987936513,
"eval_sts-test_spearman_max": 0.9035353066992244,
"eval_vitaminc-pairs_loss": 1.8975528478622437,
"eval_vitaminc-pairs_runtime": 1.8521,
"eval_vitaminc-pairs_samples_per_second": 58.313,
"eval_vitaminc-pairs_steps_per_second": 1.08,
"step": 416
},
{
"epoch": 1.3,
"eval_negation-triplets_loss": 0.7549135684967041,
"eval_negation-triplets_runtime": 0.2958,
"eval_negation-triplets_samples_per_second": 216.337,
"eval_negation-triplets_steps_per_second": 3.38,
"step": 416
},
{
"epoch": 1.3,
"eval_scitail-pairs-pos_loss": 0.07042308896780014,
"eval_scitail-pairs-pos_runtime": 0.3833,
"eval_scitail-pairs-pos_samples_per_second": 140.89,
"eval_scitail-pairs-pos_steps_per_second": 2.609,
"step": 416
},
{
"epoch": 1.3,
"eval_xsum-pairs_loss": 0.054973307996988297,
"eval_xsum-pairs_runtime": 2.8675,
"eval_xsum-pairs_samples_per_second": 44.639,
"eval_xsum-pairs_steps_per_second": 0.697,
"step": 416
},
{
"epoch": 1.3,
"eval_sciq_pairs_loss": 0.019865412265062332,
"eval_sciq_pairs_runtime": 3.6462,
"eval_sciq_pairs_samples_per_second": 35.105,
"eval_sciq_pairs_steps_per_second": 0.549,
"step": 416
},
{
"epoch": 1.3,
"eval_qasc_pairs_loss": 0.10839240998029709,
"eval_qasc_pairs_runtime": 0.6001,
"eval_qasc_pairs_samples_per_second": 213.308,
"eval_qasc_pairs_steps_per_second": 3.333,
"step": 416
},
{
"epoch": 1.3,
"eval_openbookqa_pairs_loss": 0.709105908870697,
"eval_openbookqa_pairs_runtime": 0.5773,
"eval_openbookqa_pairs_samples_per_second": 221.728,
"eval_openbookqa_pairs_steps_per_second": 3.464,
"step": 416
},
{
"epoch": 1.3,
"eval_msmarco_pairs_loss": 0.2810967266559601,
"eval_msmarco_pairs_runtime": 1.4691,
"eval_msmarco_pairs_samples_per_second": 87.125,
"eval_msmarco_pairs_steps_per_second": 1.361,
"step": 416
},
{
"epoch": 1.3,
"eval_nq_pairs_loss": 0.16148869693279266,
"eval_nq_pairs_runtime": 2.8649,
"eval_nq_pairs_samples_per_second": 44.679,
"eval_nq_pairs_steps_per_second": 0.698,
"step": 416
},
{
"epoch": 1.3,
"eval_trivia_pairs_loss": 0.6475186944007874,
"eval_trivia_pairs_runtime": 4.403,
"eval_trivia_pairs_samples_per_second": 29.071,
"eval_trivia_pairs_steps_per_second": 0.454,
"step": 416
},
{
"epoch": 1.3,
"eval_gooaq_pairs_loss": 0.31666722893714905,
"eval_gooaq_pairs_runtime": 1.0071,
"eval_gooaq_pairs_samples_per_second": 127.1,
"eval_gooaq_pairs_steps_per_second": 1.986,
"step": 416
},
{
"epoch": 1.3,
"eval_paws-pos_loss": 0.025139717385172844,
"eval_paws-pos_runtime": 0.6875,
"eval_paws-pos_samples_per_second": 186.173,
"eval_paws-pos_steps_per_second": 2.909,
"step": 416
},
{
"epoch": 1.303125,
"grad_norm": 2.347867012023926,
"learning_rate": 2.1983430477368622e-05,
"loss": 0.4515,
"step": 417
},
{
"epoch": 1.30625,
"grad_norm": 2.957559585571289,
"learning_rate": 2.1893329357116726e-05,
"loss": 0.5992,
"step": 418
},
{
"epoch": 1.309375,
"grad_norm": 2.799776792526245,
"learning_rate": 2.180334520125863e-05,
"loss": 0.7221,
"step": 419
},
{
"epoch": 1.3125,
"grad_norm": 1.9639122486114502,
"learning_rate": 2.1713481397646955e-05,
"loss": 0.3968,
"step": 420
},
{
"epoch": 1.315625,
"grad_norm": 2.6604442596435547,
"learning_rate": 2.162374132960311e-05,
"loss": 0.4198,
"step": 421
},
{
"epoch": 1.31875,
"grad_norm": 2.5121357440948486,
"learning_rate": 2.1534128375789932e-05,
"loss": 0.6268,
"step": 422
},
{
"epoch": 1.321875,
"grad_norm": 2.014528274536133,
"learning_rate": 2.1444645910084495e-05,
"loss": 0.3976,
"step": 423
},
{
"epoch": 1.325,
"grad_norm": 2.713228464126587,
"learning_rate": 2.1355297301451044e-05,
"loss": 0.6003,
"step": 424
},
{
"epoch": 1.328125,
"grad_norm": 2.6102914810180664,
"learning_rate": 2.12660859138142e-05,
"loss": 0.4381,
"step": 425
},
{
"epoch": 1.33125,
"grad_norm": 3.1329894065856934,
"learning_rate": 2.1177015105932287e-05,
"loss": 0.8803,
"step": 426
},
{
"epoch": 1.334375,
"grad_norm": 2.3437535762786865,
"learning_rate": 2.108808823127087e-05,
"loss": 0.5635,
"step": 427
},
{
"epoch": 1.3375,
"grad_norm": 2.732607841491699,
"learning_rate": 2.0999308637876527e-05,
"loss": 0.5262,
"step": 428
},
{
"epoch": 1.340625,
"grad_norm": 2.553740978240967,
"learning_rate": 2.091067966825077e-05,
"loss": 0.6506,
"step": 429
},
{
"epoch": 1.34375,
"grad_norm": 2.2489590644836426,
"learning_rate": 2.0822204659224207e-05,
"loss": 0.3486,
"step": 430
},
{
"epoch": 1.346875,
"grad_norm": 3.328228235244751,
"learning_rate": 2.0733886941830926e-05,
"loss": 0.9099,
"step": 431
},
{
"epoch": 1.35,
"grad_norm": 2.4730563163757324,
"learning_rate": 2.064572984118307e-05,
"loss": 0.4199,
"step": 432
},
{
"epoch": 1.353125,
"grad_norm": 2.7208938598632812,
"learning_rate": 2.055773667634564e-05,
"loss": 0.4908,
"step": 433
},
{
"epoch": 1.35625,
"grad_norm": 2.666827440261841,
"learning_rate": 2.0469910760211578e-05,
"loss": 0.6869,
"step": 434
},
{
"epoch": 1.359375,
"grad_norm": 2.515075922012329,
"learning_rate": 2.038225539937698e-05,
"loss": 0.5644,
"step": 435
},
{
"epoch": 1.3625,
"grad_norm": 3.286777973175049,
"learning_rate": 2.0294773894016632e-05,
"loss": 0.6714,
"step": 436
},
{
"epoch": 1.365625,
"grad_norm": 2.477515935897827,
"learning_rate": 2.0207469537759766e-05,
"loss": 0.4976,
"step": 437
},
{
"epoch": 1.36875,
"grad_norm": 2.30999493598938,
"learning_rate": 2.0120345617566058e-05,
"loss": 0.4468,
"step": 438
},
{
"epoch": 1.371875,
"grad_norm": 2.011974573135376,
"learning_rate": 2.003340541360186e-05,
"loss": 0.3923,
"step": 439
},
{
"epoch": 1.375,
"grad_norm": 2.466869592666626,
"learning_rate": 1.9946652199116702e-05,
"loss": 0.5753,
"step": 440
},
{
"epoch": 1.378125,
"grad_norm": 2.6485002040863037,
"learning_rate": 1.986008924032009e-05,
"loss": 0.5134,
"step": 441
},
{
"epoch": 1.38125,
"grad_norm": 2.3299734592437744,
"learning_rate": 1.9773719796258484e-05,
"loss": 0.3858,
"step": 442
},
{
"epoch": 1.384375,
"grad_norm": 3.0803678035736084,
"learning_rate": 1.9687547118692646e-05,
"loss": 0.6681,
"step": 443
},
{
"epoch": 1.3875,
"grad_norm": 2.463984727859497,
"learning_rate": 1.960157445197518e-05,
"loss": 0.4702,
"step": 444
},
{
"epoch": 1.390625,
"grad_norm": 2.5118319988250732,
"learning_rate": 1.9515805032928393e-05,
"loss": 0.501,
"step": 445
},
{
"epoch": 1.39375,
"grad_norm": 2.670452356338501,
"learning_rate": 1.943024209072244e-05,
"loss": 0.459,
"step": 446
},
{
"epoch": 1.396875,
"grad_norm": 2.8598179817199707,
"learning_rate": 1.9344888846753727e-05,
"loss": 0.5879,
"step": 447
},
{
"epoch": 1.4,
"grad_norm": 2.703799247741699,
"learning_rate": 1.9259748514523654e-05,
"loss": 0.6276,
"step": 448
},
{
"epoch": 1.4,
"eval_VitaminC_cosine_accuracy": 0.5546875,
"eval_VitaminC_cosine_accuracy_threshold": 0.8286198973655701,
"eval_VitaminC_cosine_ap": 0.5491639681085214,
"eval_VitaminC_cosine_f1": 0.6666666666666667,
"eval_VitaminC_cosine_f1_threshold": 0.3577578365802765,
"eval_VitaminC_cosine_precision": 0.501002004008016,
"eval_VitaminC_cosine_recall": 0.9960159362549801,
"eval_VitaminC_dot_accuracy": 0.552734375,
"eval_VitaminC_dot_accuracy_threshold": 305.3611145019531,
"eval_VitaminC_dot_ap": 0.5346765167717246,
"eval_VitaminC_dot_f1": 0.6675531914893617,
"eval_VitaminC_dot_f1_threshold": 120.80284118652344,
"eval_VitaminC_dot_precision": 0.500998003992016,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.552734375,
"eval_VitaminC_euclidean_accuracy_threshold": 15.638836860656738,
"eval_VitaminC_euclidean_ap": 0.551666574153856,
"eval_VitaminC_euclidean_f1": 0.6675531914893617,
"eval_VitaminC_euclidean_f1_threshold": 22.694026947021484,
"eval_VitaminC_euclidean_precision": 0.500998003992016,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.55859375,
"eval_VitaminC_manhattan_accuracy_threshold": 345.3646240234375,
"eval_VitaminC_manhattan_ap": 0.5493612263798584,
"eval_VitaminC_manhattan_f1": 0.6657824933687002,
"eval_VitaminC_manhattan_f1_threshold": 489.2554931640625,
"eval_VitaminC_manhattan_precision": 0.4990059642147117,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.55859375,
"eval_VitaminC_max_accuracy_threshold": 345.3646240234375,
"eval_VitaminC_max_ap": 0.551666574153856,
"eval_VitaminC_max_f1": 0.6675531914893617,
"eval_VitaminC_max_f1_threshold": 489.2554931640625,
"eval_VitaminC_max_precision": 0.501002004008016,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.551666574153856,
"eval_sts-test_pearson_cosine": 0.8776689405218701,
"eval_sts-test_pearson_dot": 0.8671968346407674,
"eval_sts-test_pearson_euclidean": 0.9011981135741202,
"eval_sts-test_pearson_manhattan": 0.901224194183572,
"eval_sts-test_pearson_max": 0.901224194183572,
"eval_sts-test_spearman_cosine": 0.9040082380296086,
"eval_sts-test_spearman_dot": 0.8686231471398608,
"eval_sts-test_spearman_euclidean": 0.8983323907960761,
"eval_sts-test_spearman_manhattan": 0.898603359683801,
"eval_sts-test_spearman_max": 0.9040082380296086,
"eval_vitaminc-pairs_loss": 1.8429665565490723,
"eval_vitaminc-pairs_runtime": 1.8248,
"eval_vitaminc-pairs_samples_per_second": 59.185,
"eval_vitaminc-pairs_steps_per_second": 1.096,
"step": 448
},
{
"epoch": 1.4,
"eval_negation-triplets_loss": 0.6982068419456482,
"eval_negation-triplets_runtime": 0.2935,
"eval_negation-triplets_samples_per_second": 218.06,
"eval_negation-triplets_steps_per_second": 3.407,
"step": 448
},
{
"epoch": 1.4,
"eval_scitail-pairs-pos_loss": 0.05678475275635719,
"eval_scitail-pairs-pos_runtime": 0.3607,
"eval_scitail-pairs-pos_samples_per_second": 149.722,
"eval_scitail-pairs-pos_steps_per_second": 2.773,
"step": 448
},
{
"epoch": 1.4,
"eval_xsum-pairs_loss": 0.04836395010352135,
"eval_xsum-pairs_runtime": 2.8385,
"eval_xsum-pairs_samples_per_second": 45.094,
"eval_xsum-pairs_steps_per_second": 0.705,
"step": 448
},
{
"epoch": 1.4,
"eval_sciq_pairs_loss": 0.019589349627494812,
"eval_sciq_pairs_runtime": 3.6678,
"eval_sciq_pairs_samples_per_second": 34.898,
"eval_sciq_pairs_steps_per_second": 0.545,
"step": 448
},
{
"epoch": 1.4,
"eval_qasc_pairs_loss": 0.11168085038661957,
"eval_qasc_pairs_runtime": 0.5997,
"eval_qasc_pairs_samples_per_second": 213.44,
"eval_qasc_pairs_steps_per_second": 3.335,
"step": 448
},
{
"epoch": 1.4,
"eval_openbookqa_pairs_loss": 0.7535218596458435,
"eval_openbookqa_pairs_runtime": 0.5778,
"eval_openbookqa_pairs_samples_per_second": 221.542,
"eval_openbookqa_pairs_steps_per_second": 3.462,
"step": 448
},
{
"epoch": 1.4,
"eval_msmarco_pairs_loss": 0.27821871638298035,
"eval_msmarco_pairs_runtime": 1.4582,
"eval_msmarco_pairs_samples_per_second": 87.779,
"eval_msmarco_pairs_steps_per_second": 1.372,
"step": 448
},
{
"epoch": 1.4,
"eval_nq_pairs_loss": 0.15653903782367706,
"eval_nq_pairs_runtime": 2.8546,
"eval_nq_pairs_samples_per_second": 44.84,
"eval_nq_pairs_steps_per_second": 0.701,
"step": 448
},
{
"epoch": 1.4,
"eval_trivia_pairs_loss": 0.6306825280189514,
"eval_trivia_pairs_runtime": 4.3878,
"eval_trivia_pairs_samples_per_second": 29.172,
"eval_trivia_pairs_steps_per_second": 0.456,
"step": 448
},
{
"epoch": 1.4,
"eval_gooaq_pairs_loss": 0.3191468417644501,
"eval_gooaq_pairs_runtime": 0.9973,
"eval_gooaq_pairs_samples_per_second": 128.345,
"eval_gooaq_pairs_steps_per_second": 2.005,
"step": 448
},
{
"epoch": 1.4,
"eval_paws-pos_loss": 0.024477336555719376,
"eval_paws-pos_runtime": 0.6847,
"eval_paws-pos_samples_per_second": 186.937,
"eval_paws-pos_steps_per_second": 2.921,
"step": 448
},
{
"epoch": 1.403125,
"grad_norm": 2.7174854278564453,
"learning_rate": 1.917482429951761e-05,
"loss": 0.5358,
"step": 449
},
{
"epoch": 1.40625,
"grad_norm": 2.997868061065674,
"learning_rate": 1.909011939908428e-05,
"loss": 0.8326,
"step": 450
},
{
"epoch": 1.409375,
"grad_norm": 2.0322728157043457,
"learning_rate": 1.90056370023153e-05,
"loss": 0.2866,
"step": 451
},
{
"epoch": 1.4125,
"grad_norm": 1.7908676862716675,
"learning_rate": 1.8921380289925155e-05,
"loss": 0.247,
"step": 452
},
{
"epoch": 1.415625,
"grad_norm": 2.5119776725769043,
"learning_rate": 1.8837352434131445e-05,
"loss": 0.519,
"step": 453
},
{
"epoch": 1.41875,
"grad_norm": 2.468385696411133,
"learning_rate": 1.8753556598535448e-05,
"loss": 0.4117,
"step": 454
},
{
"epoch": 1.421875,
"grad_norm": 2.097646713256836,
"learning_rate": 1.8669995938003007e-05,
"loss": 0.437,
"step": 455
},
{
"epoch": 1.425,
"grad_norm": 2.275872230529785,
"learning_rate": 1.8586673598545775e-05,
"loss": 0.3619,
"step": 456
},
{
"epoch": 1.428125,
"grad_norm": 2.5506107807159424,
"learning_rate": 1.8503592717202724e-05,
"loss": 0.4273,
"step": 457
},
{
"epoch": 1.43125,
"grad_norm": 2.219841718673706,
"learning_rate": 1.842075642192209e-05,
"loss": 0.2739,
"step": 458
},
{
"epoch": 1.434375,
"grad_norm": 2.54673433303833,
"learning_rate": 1.8338167831443567e-05,
"loss": 0.5714,
"step": 459
},
{
"epoch": 1.4375,
"grad_norm": 2.696007251739502,
"learning_rate": 1.82558300551809e-05,
"loss": 0.5485,
"step": 460
},
{
"epoch": 1.440625,
"grad_norm": 2.292741537094116,
"learning_rate": 1.8173746193104848e-05,
"loss": 0.4829,
"step": 461
},
{
"epoch": 1.44375,
"grad_norm": 2.3757193088531494,
"learning_rate": 1.80919193356264e-05,
"loss": 0.4904,
"step": 462
},
{
"epoch": 1.446875,
"grad_norm": 3.299426555633545,
"learning_rate": 1.801035256348051e-05,
"loss": 0.6449,
"step": 463
},
{
"epoch": 1.45,
"grad_norm": 3.2711825370788574,
"learning_rate": 1.7929048947610038e-05,
"loss": 0.6896,
"step": 464
},
{
"epoch": 1.453125,
"grad_norm": 2.4364447593688965,
"learning_rate": 1.7848011549050174e-05,
"loss": 0.4174,
"step": 465
},
{
"epoch": 1.45625,
"grad_norm": 2.7479851245880127,
"learning_rate": 1.776724341881316e-05,
"loss": 0.5254,
"step": 466
},
{
"epoch": 1.459375,
"grad_norm": 2.636861801147461,
"learning_rate": 1.7686747597773465e-05,
"loss": 0.5287,
"step": 467
},
{
"epoch": 1.4625,
"grad_norm": 1.8790123462677002,
"learning_rate": 1.7606527116553243e-05,
"loss": 0.2421,
"step": 468
},
{
"epoch": 1.465625,
"grad_norm": 2.039740800857544,
"learning_rate": 1.7526584995408277e-05,
"loss": 0.3939,
"step": 469
},
{
"epoch": 1.46875,
"grad_norm": 3.1484439373016357,
"learning_rate": 1.744692424411424e-05,
"loss": 0.7248,
"step": 470
},
{
"epoch": 1.471875,
"grad_norm": 2.309475898742676,
"learning_rate": 1.7367547861853396e-05,
"loss": 0.3479,
"step": 471
},
{
"epoch": 1.475,
"grad_norm": 2.4634172916412354,
"learning_rate": 1.7288458837101676e-05,
"loss": 0.472,
"step": 472
},
{
"epoch": 1.478125,
"grad_norm": 2.701162815093994,
"learning_rate": 1.7209660147516157e-05,
"loss": 0.5639,
"step": 473
},
{
"epoch": 1.48125,
"grad_norm": 2.2868311405181885,
"learning_rate": 1.713115475982297e-05,
"loss": 0.4077,
"step": 474
},
{
"epoch": 1.484375,
"grad_norm": 2.256727933883667,
"learning_rate": 1.705294562970558e-05,
"loss": 0.3173,
"step": 475
},
{
"epoch": 1.4875,
"grad_norm": 2.110504388809204,
"learning_rate": 1.6975035701693544e-05,
"loss": 0.3307,
"step": 476
},
{
"epoch": 1.490625,
"grad_norm": 2.267214059829712,
"learning_rate": 1.6897427909051608e-05,
"loss": 0.3761,
"step": 477
},
{
"epoch": 1.49375,
"grad_norm": 2.538956880569458,
"learning_rate": 1.6820125173669307e-05,
"loss": 0.5454,
"step": 478
},
{
"epoch": 1.496875,
"grad_norm": 1.8530148267745972,
"learning_rate": 1.6743130405950932e-05,
"loss": 0.309,
"step": 479
},
{
"epoch": 1.5,
"grad_norm": 2.507021903991699,
"learning_rate": 1.6666446504705974e-05,
"loss": 0.4082,
"step": 480
},
{
"epoch": 1.5,
"eval_VitaminC_cosine_accuracy": 0.556640625,
"eval_VitaminC_cosine_accuracy_threshold": 0.7370772361755371,
"eval_VitaminC_cosine_ap": 0.5534084328915541,
"eval_VitaminC_cosine_f1": 0.6657824933687002,
"eval_VitaminC_cosine_f1_threshold": 0.2802589535713196,
"eval_VitaminC_cosine_precision": 0.4990059642147117,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.560546875,
"eval_VitaminC_dot_accuracy_threshold": 308.4664611816406,
"eval_VitaminC_dot_ap": 0.5342245787700969,
"eval_VitaminC_dot_f1": 0.6666666666666666,
"eval_VitaminC_dot_f1_threshold": 113.09681701660156,
"eval_VitaminC_dot_precision": 0.5,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.5546875,
"eval_VitaminC_euclidean_accuracy_threshold": 11.881275177001953,
"eval_VitaminC_euclidean_ap": 0.5562125403421339,
"eval_VitaminC_euclidean_f1": 0.6657824933687002,
"eval_VitaminC_euclidean_f1_threshold": 22.934049606323242,
"eval_VitaminC_euclidean_precision": 0.4990059642147117,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.5546875,
"eval_VitaminC_manhattan_accuracy_threshold": 235.32266235351562,
"eval_VitaminC_manhattan_ap": 0.5543420221752726,
"eval_VitaminC_manhattan_f1": 0.6657824933687002,
"eval_VitaminC_manhattan_f1_threshold": 492.56402587890625,
"eval_VitaminC_manhattan_precision": 0.4990059642147117,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.560546875,
"eval_VitaminC_max_accuracy_threshold": 308.4664611816406,
"eval_VitaminC_max_ap": 0.5562125403421339,
"eval_VitaminC_max_f1": 0.6666666666666666,
"eval_VitaminC_max_f1_threshold": 492.56402587890625,
"eval_VitaminC_max_precision": 0.5,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5562125403421339,
"eval_sts-test_pearson_cosine": 0.8785940980445964,
"eval_sts-test_pearson_dot": 0.868901198999867,
"eval_sts-test_pearson_euclidean": 0.9008999462703983,
"eval_sts-test_pearson_manhattan": 0.9007358817864316,
"eval_sts-test_pearson_max": 0.9008999462703983,
"eval_sts-test_spearman_cosine": 0.9034113658980666,
"eval_sts-test_spearman_dot": 0.8689611981684112,
"eval_sts-test_spearman_euclidean": 0.8982906826204593,
"eval_sts-test_spearman_manhattan": 0.8980298275178087,
"eval_sts-test_spearman_max": 0.9034113658980666,
"eval_vitaminc-pairs_loss": 1.8594883680343628,
"eval_vitaminc-pairs_runtime": 1.8263,
"eval_vitaminc-pairs_samples_per_second": 59.137,
"eval_vitaminc-pairs_steps_per_second": 1.095,
"step": 480
},
{
"epoch": 1.5,
"eval_negation-triplets_loss": 0.7203199863433838,
"eval_negation-triplets_runtime": 0.2915,
"eval_negation-triplets_samples_per_second": 219.564,
"eval_negation-triplets_steps_per_second": 3.431,
"step": 480
},
{
"epoch": 1.5,
"eval_scitail-pairs-pos_loss": 0.07524989545345306,
"eval_scitail-pairs-pos_runtime": 0.37,
"eval_scitail-pairs-pos_samples_per_second": 145.939,
"eval_scitail-pairs-pos_steps_per_second": 2.703,
"step": 480
},
{
"epoch": 1.5,
"eval_xsum-pairs_loss": 0.04331779107451439,
"eval_xsum-pairs_runtime": 2.8387,
"eval_xsum-pairs_samples_per_second": 45.091,
"eval_xsum-pairs_steps_per_second": 0.705,
"step": 480
},
{
"epoch": 1.5,
"eval_sciq_pairs_loss": 0.018652573227882385,
"eval_sciq_pairs_runtime": 3.6202,
"eval_sciq_pairs_samples_per_second": 35.357,
"eval_sciq_pairs_steps_per_second": 0.552,
"step": 480
},
{
"epoch": 1.5,
"eval_qasc_pairs_loss": 0.10793650150299072,
"eval_qasc_pairs_runtime": 0.5983,
"eval_qasc_pairs_samples_per_second": 213.952,
"eval_qasc_pairs_steps_per_second": 3.343,
"step": 480
},
{
"epoch": 1.5,
"eval_openbookqa_pairs_loss": 0.6959180235862732,
"eval_openbookqa_pairs_runtime": 0.5741,
"eval_openbookqa_pairs_samples_per_second": 222.961,
"eval_openbookqa_pairs_steps_per_second": 3.484,
"step": 480
},
{
"epoch": 1.5,
"eval_msmarco_pairs_loss": 0.26085397601127625,
"eval_msmarco_pairs_runtime": 1.4595,
"eval_msmarco_pairs_samples_per_second": 87.699,
"eval_msmarco_pairs_steps_per_second": 1.37,
"step": 480
},
{
"epoch": 1.5,
"eval_nq_pairs_loss": 0.1553785651922226,
"eval_nq_pairs_runtime": 2.8659,
"eval_nq_pairs_samples_per_second": 44.663,
"eval_nq_pairs_steps_per_second": 0.698,
"step": 480
},
{
"epoch": 1.5,
"eval_trivia_pairs_loss": 0.6472769379615784,
"eval_trivia_pairs_runtime": 4.3924,
"eval_trivia_pairs_samples_per_second": 29.141,
"eval_trivia_pairs_steps_per_second": 0.455,
"step": 480
},
{
"epoch": 1.5,
"eval_gooaq_pairs_loss": 0.3059709370136261,
"eval_gooaq_pairs_runtime": 0.9999,
"eval_gooaq_pairs_samples_per_second": 128.009,
"eval_gooaq_pairs_steps_per_second": 2.0,
"step": 480
},
{
"epoch": 1.5,
"eval_paws-pos_loss": 0.02474558725953102,
"eval_paws-pos_runtime": 0.6798,
"eval_paws-pos_samples_per_second": 188.303,
"eval_paws-pos_steps_per_second": 2.942,
"step": 480
},
{
"epoch": 1.503125,
"grad_norm": 1.756934404373169,
"learning_rate": 1.6590076357039962e-05,
"loss": 0.2147,
"step": 481
},
{
"epoch": 1.50625,
"grad_norm": 2.775935411453247,
"learning_rate": 1.6514022838245802e-05,
"loss": 0.5614,
"step": 482
},
{
"epoch": 1.509375,
"grad_norm": 2.4856698513031006,
"learning_rate": 1.6438288811695494e-05,
"loss": 0.3865,
"step": 483
},
{
"epoch": 1.5125,
"grad_norm": 1.2785615921020508,
"learning_rate": 1.636287712873232e-05,
"loss": 0.1715,
"step": 484
},
{
"epoch": 1.515625,
"grad_norm": 2.2189393043518066,
"learning_rate": 1.6287790628563536e-05,
"loss": 0.3597,
"step": 485
},
{
"epoch": 1.51875,
"grad_norm": 2.2382972240448,
"learning_rate": 1.6213032138153418e-05,
"loss": 0.3827,
"step": 486
},
{
"epoch": 1.521875,
"grad_norm": 2.6651275157928467,
"learning_rate": 1.613860447211689e-05,
"loss": 0.4895,
"step": 487
},
{
"epoch": 1.525,
"grad_norm": 2.810739517211914,
"learning_rate": 1.60645104326135e-05,
"loss": 0.4987,
"step": 488
},
{
"epoch": 1.528125,
"grad_norm": 2.383479595184326,
"learning_rate": 1.599075280924197e-05,
"loss": 0.4482,
"step": 489
},
{
"epoch": 1.53125,
"grad_norm": 2.4470787048339844,
"learning_rate": 1.5917334378935118e-05,
"loss": 0.5808,
"step": 490
},
{
"epoch": 1.534375,
"grad_norm": 2.437572956085205,
"learning_rate": 1.584425790585536e-05,
"loss": 0.3916,
"step": 491
},
{
"epoch": 1.5375,
"grad_norm": 3.223665952682495,
"learning_rate": 1.5771526141290602e-05,
"loss": 1.0877,
"step": 492
},
{
"epoch": 1.540625,
"grad_norm": 2.521468162536621,
"learning_rate": 1.5699141823550662e-05,
"loss": 0.4119,
"step": 493
},
{
"epoch": 1.54375,
"grad_norm": 2.7671728134155273,
"learning_rate": 1.562710767786421e-05,
"loss": 0.6078,
"step": 494
},
{
"epoch": 1.546875,
"grad_norm": 1.7431325912475586,
"learning_rate": 1.5555426416276095e-05,
"loss": 0.2441,
"step": 495
},
{
"epoch": 1.55,
"grad_norm": 2.172173261642456,
"learning_rate": 1.548410073754532e-05,
"loss": 0.4769,
"step": 496
},
{
"epoch": 1.553125,
"grad_norm": 1.587640404701233,
"learning_rate": 1.5413133327043365e-05,
"loss": 0.218,
"step": 497
},
{
"epoch": 1.55625,
"grad_norm": 2.7734944820404053,
"learning_rate": 1.5342526856653133e-05,
"loss": 0.6377,
"step": 498
},
{
"epoch": 1.559375,
"grad_norm": 1.6427900791168213,
"learning_rate": 1.5272283984668313e-05,
"loss": 0.2391,
"step": 499
},
{
"epoch": 1.5625,
"grad_norm": 2.130922794342041,
"learning_rate": 1.5202407355693354e-05,
"loss": 0.3645,
"step": 500
},
{
"epoch": 1.565625,
"grad_norm": 2.3365015983581543,
"learning_rate": 1.5132899600543823e-05,
"loss": 0.4185,
"step": 501
},
{
"epoch": 1.56875,
"grad_norm": 1.7738977670669556,
"learning_rate": 1.5063763336147424e-05,
"loss": 0.3363,
"step": 502
},
{
"epoch": 1.571875,
"grad_norm": 1.8385276794433594,
"learning_rate": 1.4995001165445442e-05,
"loss": 0.3712,
"step": 503
},
{
"epoch": 1.575,
"grad_norm": 1.8053840398788452,
"learning_rate": 1.4926615677294724e-05,
"loss": 0.2995,
"step": 504
},
{
"epoch": 1.578125,
"grad_norm": 2.7845582962036133,
"learning_rate": 1.4858609446370264e-05,
"loss": 0.6178,
"step": 505
},
{
"epoch": 1.58125,
"grad_norm": 2.369316339492798,
"learning_rate": 1.4790985033068205e-05,
"loss": 0.464,
"step": 506
},
{
"epoch": 1.584375,
"grad_norm": 2.4763267040252686,
"learning_rate": 1.4723744983409498e-05,
"loss": 0.5694,
"step": 507
},
{
"epoch": 1.5875,
"grad_norm": 2.1269421577453613,
"learning_rate": 1.4656891828943997e-05,
"loss": 0.3587,
"step": 508
},
{
"epoch": 1.590625,
"grad_norm": 2.028308629989624,
"learning_rate": 1.4590428086655196e-05,
"loss": 0.3375,
"step": 509
},
{
"epoch": 1.59375,
"grad_norm": 1.3677244186401367,
"learning_rate": 1.4524356258865409e-05,
"loss": 0.1613,
"step": 510
},
{
"epoch": 1.596875,
"grad_norm": 1.846962571144104,
"learning_rate": 1.4458678833141626e-05,
"loss": 0.2811,
"step": 511
},
{
"epoch": 1.6,
"grad_norm": 2.5623536109924316,
"learning_rate": 1.4393398282201789e-05,
"loss": 0.5338,
"step": 512
},
{
"epoch": 1.6,
"eval_VitaminC_cosine_accuracy": 0.5625,
"eval_VitaminC_cosine_accuracy_threshold": 0.7150193452835083,
"eval_VitaminC_cosine_ap": 0.5536001409238264,
"eval_VitaminC_cosine_f1": 0.6666666666666667,
"eval_VitaminC_cosine_f1_threshold": 0.3747650980949402,
"eval_VitaminC_cosine_precision": 0.501002004008016,
"eval_VitaminC_cosine_recall": 0.9960159362549801,
"eval_VitaminC_dot_accuracy": 0.55859375,
"eval_VitaminC_dot_accuracy_threshold": 305.93060302734375,
"eval_VitaminC_dot_ap": 0.5361490037017673,
"eval_VitaminC_dot_f1": 0.6684563758389263,
"eval_VitaminC_dot_f1_threshold": 141.05189514160156,
"eval_VitaminC_dot_precision": 0.5040485829959515,
"eval_VitaminC_dot_recall": 0.9920318725099602,
"eval_VitaminC_euclidean_accuracy": 0.5546875,
"eval_VitaminC_euclidean_accuracy_threshold": 12.17225456237793,
"eval_VitaminC_euclidean_ap": 0.5553095900623441,
"eval_VitaminC_euclidean_f1": 0.6666666666666666,
"eval_VitaminC_euclidean_f1_threshold": 23.013614654541016,
"eval_VitaminC_euclidean_precision": 0.5,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.560546875,
"eval_VitaminC_manhattan_accuracy_threshold": 306.5001220703125,
"eval_VitaminC_manhattan_ap": 0.5528524184849768,
"eval_VitaminC_manhattan_f1": 0.6675531914893617,
"eval_VitaminC_manhattan_f1_threshold": 482.4728088378906,
"eval_VitaminC_manhattan_precision": 0.500998003992016,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.5625,
"eval_VitaminC_max_accuracy_threshold": 306.5001220703125,
"eval_VitaminC_max_ap": 0.5553095900623441,
"eval_VitaminC_max_f1": 0.6684563758389263,
"eval_VitaminC_max_f1_threshold": 482.4728088378906,
"eval_VitaminC_max_precision": 0.5040485829959515,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5553095900623441,
"eval_sts-test_pearson_cosine": 0.88002263195295,
"eval_sts-test_pearson_dot": 0.8704058648822381,
"eval_sts-test_pearson_euclidean": 0.9024307031663734,
"eval_sts-test_pearson_manhattan": 0.902236666405867,
"eval_sts-test_pearson_max": 0.9024307031663734,
"eval_sts-test_spearman_cosine": 0.9043963657196562,
"eval_sts-test_spearman_dot": 0.8703829009915547,
"eval_sts-test_spearman_euclidean": 0.8986995748957924,
"eval_sts-test_spearman_manhattan": 0.8993764824755988,
"eval_sts-test_spearman_max": 0.9043963657196562,
"eval_vitaminc-pairs_loss": 1.8544398546218872,
"eval_vitaminc-pairs_runtime": 1.8317,
"eval_vitaminc-pairs_samples_per_second": 58.961,
"eval_vitaminc-pairs_steps_per_second": 1.092,
"step": 512
},
{
"epoch": 1.6,
"eval_negation-triplets_loss": 0.7161268591880798,
"eval_negation-triplets_runtime": 0.2916,
"eval_negation-triplets_samples_per_second": 219.445,
"eval_negation-triplets_steps_per_second": 3.429,
"step": 512
},
{
"epoch": 1.6,
"eval_scitail-pairs-pos_loss": 0.07522901147603989,
"eval_scitail-pairs-pos_runtime": 0.3667,
"eval_scitail-pairs-pos_samples_per_second": 147.259,
"eval_scitail-pairs-pos_steps_per_second": 2.727,
"step": 512
},
{
"epoch": 1.6,
"eval_xsum-pairs_loss": 0.04067877307534218,
"eval_xsum-pairs_runtime": 2.8345,
"eval_xsum-pairs_samples_per_second": 45.157,
"eval_xsum-pairs_steps_per_second": 0.706,
"step": 512
},
{
"epoch": 1.6,
"eval_sciq_pairs_loss": 0.01821758784353733,
"eval_sciq_pairs_runtime": 3.6099,
"eval_sciq_pairs_samples_per_second": 35.459,
"eval_sciq_pairs_steps_per_second": 0.554,
"step": 512
},
{
"epoch": 1.6,
"eval_qasc_pairs_loss": 0.10426162928342819,
"eval_qasc_pairs_runtime": 0.5966,
"eval_qasc_pairs_samples_per_second": 214.562,
"eval_qasc_pairs_steps_per_second": 3.353,
"step": 512
},
{
"epoch": 1.6,
"eval_openbookqa_pairs_loss": 0.6913560032844543,
"eval_openbookqa_pairs_runtime": 0.5728,
"eval_openbookqa_pairs_samples_per_second": 223.453,
"eval_openbookqa_pairs_steps_per_second": 3.491,
"step": 512
},
{
"epoch": 1.6,
"eval_msmarco_pairs_loss": 0.2564995586872101,
"eval_msmarco_pairs_runtime": 1.4587,
"eval_msmarco_pairs_samples_per_second": 87.749,
"eval_msmarco_pairs_steps_per_second": 1.371,
"step": 512
},
{
"epoch": 1.6,
"eval_nq_pairs_loss": 0.14494968950748444,
"eval_nq_pairs_runtime": 2.8504,
"eval_nq_pairs_samples_per_second": 44.907,
"eval_nq_pairs_steps_per_second": 0.702,
"step": 512
},
{
"epoch": 1.6,
"eval_trivia_pairs_loss": 0.633898913860321,
"eval_trivia_pairs_runtime": 4.3846,
"eval_trivia_pairs_samples_per_second": 29.193,
"eval_trivia_pairs_steps_per_second": 0.456,
"step": 512
},
{
"epoch": 1.6,
"eval_gooaq_pairs_loss": 0.29749810695648193,
"eval_gooaq_pairs_runtime": 1.0002,
"eval_gooaq_pairs_samples_per_second": 127.979,
"eval_gooaq_pairs_steps_per_second": 2.0,
"step": 512
},
{
"epoch": 1.6,
"eval_paws-pos_loss": 0.025082813575863838,
"eval_paws-pos_runtime": 0.6849,
"eval_paws-pos_samples_per_second": 186.893,
"eval_paws-pos_steps_per_second": 2.92,
"step": 512
},
{
"epoch": 1.603125,
"grad_norm": 1.237898349761963,
"learning_rate": 1.4328517063821754e-05,
"loss": 0.1862,
"step": 513
},
{
"epoch": 1.60625,
"grad_norm": 3.120419502258301,
"learning_rate": 1.4264037620742724e-05,
"loss": 0.6092,
"step": 514
},
{
"epoch": 1.609375,
"grad_norm": 2.872905969619751,
"learning_rate": 1.4199962380579275e-05,
"loss": 0.541,
"step": 515
},
{
"epoch": 1.6125,
"grad_norm": 2.554291248321533,
"learning_rate": 1.4136293755728e-05,
"loss": 0.5297,
"step": 516
},
{
"epoch": 1.615625,
"grad_norm": 0.818438708782196,
"learning_rate": 1.4073034143276623e-05,
"loss": 0.0664,
"step": 517
},
{
"epoch": 1.61875,
"grad_norm": 1.3617022037506104,
"learning_rate": 1.401018592491381e-05,
"loss": 0.1557,
"step": 518
},
{
"epoch": 1.621875,
"grad_norm": 1.975934386253357,
"learning_rate": 1.3947751466839452e-05,
"loss": 0.3281,
"step": 519
},
{
"epoch": 1.625,
"grad_norm": 2.3073935508728027,
"learning_rate": 1.3885733119675617e-05,
"loss": 0.3828,
"step": 520
},
{
"epoch": 1.628125,
"grad_norm": 1.2710379362106323,
"learning_rate": 1.382413321837801e-05,
"loss": 0.2087,
"step": 521
},
{
"epoch": 1.63125,
"grad_norm": 2.7534079551696777,
"learning_rate": 1.3762954082148114e-05,
"loss": 0.5306,
"step": 522
},
{
"epoch": 1.634375,
"grad_norm": 3.0414681434631348,
"learning_rate": 1.3702198014345816e-05,
"loss": 0.6589,
"step": 523
},
{
"epoch": 1.6375,
"grad_norm": 2.3352811336517334,
"learning_rate": 1.3641867302402734e-05,
"loss": 0.425,
"step": 524
},
{
"epoch": 1.640625,
"grad_norm": 2.76236629486084,
"learning_rate": 1.3581964217736077e-05,
"loss": 0.5026,
"step": 525
},
{
"epoch": 1.64375,
"grad_norm": 2.5108022689819336,
"learning_rate": 1.3522491015663117e-05,
"loss": 0.5667,
"step": 526
},
{
"epoch": 1.646875,
"grad_norm": 2.4024035930633545,
"learning_rate": 1.3463449935316308e-05,
"loss": 0.4748,
"step": 527
},
{
"epoch": 1.65,
"grad_norm": 2.772578239440918,
"learning_rate": 1.3404843199558945e-05,
"loss": 0.5094,
"step": 528
},
{
"epoch": 1.653125,
"grad_norm": 2.2362611293792725,
"learning_rate": 1.3346673014901517e-05,
"loss": 0.3398,
"step": 529
},
{
"epoch": 1.65625,
"grad_norm": 1.5350793600082397,
"learning_rate": 1.3288941571418583e-05,
"loss": 0.1932,
"step": 530
},
{
"epoch": 1.659375,
"grad_norm": 2.147125720977783,
"learning_rate": 1.3231651042666376e-05,
"loss": 0.4233,
"step": 531
},
{
"epoch": 1.6625,
"grad_norm": 2.5387678146362305,
"learning_rate": 1.3174803585600908e-05,
"loss": 0.5848,
"step": 532
},
{
"epoch": 1.665625,
"grad_norm": 2.3380072116851807,
"learning_rate": 1.3118401340496819e-05,
"loss": 0.5076,
"step": 533
},
{
"epoch": 1.66875,
"grad_norm": 2.097322463989258,
"learning_rate": 1.3062446430866749e-05,
"loss": 0.286,
"step": 534
},
{
"epoch": 1.671875,
"grad_norm": 2.5456178188323975,
"learning_rate": 1.3006940963381425e-05,
"loss": 0.5221,
"step": 535
},
{
"epoch": 1.675,
"grad_norm": 2.5779526233673096,
"learning_rate": 1.295188702779033e-05,
"loss": 0.579,
"step": 536
},
{
"epoch": 1.678125,
"grad_norm": 1.9412658214569092,
"learning_rate": 1.2897286696843012e-05,
"loss": 0.2717,
"step": 537
},
{
"epoch": 1.68125,
"grad_norm": 2.2857954502105713,
"learning_rate": 1.2843142026211081e-05,
"loss": 0.4727,
"step": 538
},
{
"epoch": 1.684375,
"grad_norm": 2.2698121070861816,
"learning_rate": 1.2789455054410776e-05,
"loss": 0.3777,
"step": 539
},
{
"epoch": 1.6875,
"grad_norm": 2.2447919845581055,
"learning_rate": 1.2736227802726247e-05,
"loss": 0.537,
"step": 540
},
{
"epoch": 1.690625,
"grad_norm": 3.1389870643615723,
"learning_rate": 1.268346227513343e-05,
"loss": 0.6935,
"step": 541
},
{
"epoch": 1.69375,
"grad_norm": 1.925352931022644,
"learning_rate": 1.2631160458224625e-05,
"loss": 0.2929,
"step": 542
},
{
"epoch": 1.696875,
"grad_norm": 2.683356761932373,
"learning_rate": 1.2579324321133666e-05,
"loss": 0.5495,
"step": 543
},
{
"epoch": 1.7,
"grad_norm": 2.3518059253692627,
"learning_rate": 1.2527955815461821e-05,
"loss": 0.3767,
"step": 544
},
{
"epoch": 1.7,
"eval_VitaminC_cosine_accuracy": 0.556640625,
"eval_VitaminC_cosine_accuracy_threshold": 0.7074875235557556,
"eval_VitaminC_cosine_ap": 0.5537116985905202,
"eval_VitaminC_cosine_f1": 0.6657824933687002,
"eval_VitaminC_cosine_f1_threshold": 0.2738235890865326,
"eval_VitaminC_cosine_precision": 0.4990059642147117,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.552734375,
"eval_VitaminC_dot_accuracy_threshold": 308.73809814453125,
"eval_VitaminC_dot_ap": 0.5356558215645612,
"eval_VitaminC_dot_f1": 0.6666666666666667,
"eval_VitaminC_dot_f1_threshold": 142.89981079101562,
"eval_VitaminC_dot_precision": 0.5030425963488844,
"eval_VitaminC_dot_recall": 0.9880478087649402,
"eval_VitaminC_euclidean_accuracy": 0.552734375,
"eval_VitaminC_euclidean_accuracy_threshold": 14.646638870239258,
"eval_VitaminC_euclidean_ap": 0.5553327582256045,
"eval_VitaminC_euclidean_f1": 0.6666666666666666,
"eval_VitaminC_euclidean_f1_threshold": 23.463809967041016,
"eval_VitaminC_euclidean_precision": 0.5,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.55859375,
"eval_VitaminC_manhattan_accuracy_threshold": 310.8325500488281,
"eval_VitaminC_manhattan_ap": 0.5530353867429494,
"eval_VitaminC_manhattan_f1": 0.6657824933687002,
"eval_VitaminC_manhattan_f1_threshold": 497.66796875,
"eval_VitaminC_manhattan_precision": 0.4990059642147117,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.55859375,
"eval_VitaminC_max_accuracy_threshold": 310.8325500488281,
"eval_VitaminC_max_ap": 0.5553327582256045,
"eval_VitaminC_max_f1": 0.6666666666666667,
"eval_VitaminC_max_f1_threshold": 497.66796875,
"eval_VitaminC_max_precision": 0.5030425963488844,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5553327582256045,
"eval_sts-test_pearson_cosine": 0.8785811955197258,
"eval_sts-test_pearson_dot": 0.8673295777318735,
"eval_sts-test_pearson_euclidean": 0.9018792837542462,
"eval_sts-test_pearson_manhattan": 0.9016741452222354,
"eval_sts-test_pearson_max": 0.9018792837542462,
"eval_sts-test_spearman_cosine": 0.9040249302501078,
"eval_sts-test_spearman_dot": 0.8683179882884328,
"eval_sts-test_spearman_euclidean": 0.8988373640296166,
"eval_sts-test_spearman_manhattan": 0.8983056295417639,
"eval_sts-test_spearman_max": 0.9040249302501078,
"eval_vitaminc-pairs_loss": 1.8782049417495728,
"eval_vitaminc-pairs_runtime": 1.831,
"eval_vitaminc-pairs_samples_per_second": 58.986,
"eval_vitaminc-pairs_steps_per_second": 1.092,
"step": 544
},
{
"epoch": 1.7,
"eval_negation-triplets_loss": 0.719520628452301,
"eval_negation-triplets_runtime": 0.294,
"eval_negation-triplets_samples_per_second": 217.687,
"eval_negation-triplets_steps_per_second": 3.401,
"step": 544
},
{
"epoch": 1.7,
"eval_scitail-pairs-pos_loss": 0.06700660288333893,
"eval_scitail-pairs-pos_runtime": 0.3659,
"eval_scitail-pairs-pos_samples_per_second": 147.579,
"eval_scitail-pairs-pos_steps_per_second": 2.733,
"step": 544
},
{
"epoch": 1.7,
"eval_xsum-pairs_loss": 0.03577294573187828,
"eval_xsum-pairs_runtime": 2.8359,
"eval_xsum-pairs_samples_per_second": 45.136,
"eval_xsum-pairs_steps_per_second": 0.705,
"step": 544
},
{
"epoch": 1.7,
"eval_sciq_pairs_loss": 0.018292119726538658,
"eval_sciq_pairs_runtime": 3.6233,
"eval_sciq_pairs_samples_per_second": 35.327,
"eval_sciq_pairs_steps_per_second": 0.552,
"step": 544
},
{
"epoch": 1.7,
"eval_qasc_pairs_loss": 0.10864048451185226,
"eval_qasc_pairs_runtime": 0.5959,
"eval_qasc_pairs_samples_per_second": 214.784,
"eval_qasc_pairs_steps_per_second": 3.356,
"step": 544
},
{
"epoch": 1.7,
"eval_openbookqa_pairs_loss": 0.696479082107544,
"eval_openbookqa_pairs_runtime": 0.5743,
"eval_openbookqa_pairs_samples_per_second": 222.885,
"eval_openbookqa_pairs_steps_per_second": 3.483,
"step": 544
},
{
"epoch": 1.7,
"eval_msmarco_pairs_loss": 0.24125610291957855,
"eval_msmarco_pairs_runtime": 1.4595,
"eval_msmarco_pairs_samples_per_second": 87.699,
"eval_msmarco_pairs_steps_per_second": 1.37,
"step": 544
},
{
"epoch": 1.7,
"eval_nq_pairs_loss": 0.15616978704929352,
"eval_nq_pairs_runtime": 2.8639,
"eval_nq_pairs_samples_per_second": 44.694,
"eval_nq_pairs_steps_per_second": 0.698,
"step": 544
},
{
"epoch": 1.7,
"eval_trivia_pairs_loss": 0.6436348557472229,
"eval_trivia_pairs_runtime": 4.377,
"eval_trivia_pairs_samples_per_second": 29.244,
"eval_trivia_pairs_steps_per_second": 0.457,
"step": 544
},
{
"epoch": 1.7,
"eval_gooaq_pairs_loss": 0.30042433738708496,
"eval_gooaq_pairs_runtime": 1.0002,
"eval_gooaq_pairs_samples_per_second": 127.981,
"eval_gooaq_pairs_steps_per_second": 2.0,
"step": 544
},
{
"epoch": 1.7,
"eval_paws-pos_loss": 0.02469758875668049,
"eval_paws-pos_runtime": 0.6819,
"eval_paws-pos_samples_per_second": 187.706,
"eval_paws-pos_steps_per_second": 2.933,
"step": 544
},
{
"epoch": 1.703125,
"grad_norm": 2.188075065612793,
"learning_rate": 1.2477056875204302e-05,
"loss": 0.4054,
"step": 545
},
{
"epoch": 1.70625,
"grad_norm": 2.5551207065582275,
"learning_rate": 1.242662941667743e-05,
"loss": 0.4114,
"step": 546
},
{
"epoch": 1.709375,
"grad_norm": 2.614218235015869,
"learning_rate": 1.2376675338446527e-05,
"loss": 0.4774,
"step": 547
},
{
"epoch": 1.7125,
"grad_norm": 1.4668488502502441,
"learning_rate": 1.2327196521254394e-05,
"loss": 0.1662,
"step": 548
},
{
"epoch": 1.715625,
"grad_norm": 2.075801372528076,
"learning_rate": 1.2278194827950544e-05,
"loss": 0.4634,
"step": 549
},
{
"epoch": 1.71875,
"grad_norm": 3.1399238109588623,
"learning_rate": 1.2229672103421021e-05,
"loss": 0.6514,
"step": 550
},
{
"epoch": 1.721875,
"grad_norm": 2.308095693588257,
"learning_rate": 1.2181630174518995e-05,
"loss": 0.3672,
"step": 551
},
{
"epoch": 1.725,
"grad_norm": 2.880965232849121,
"learning_rate": 1.213407084999592e-05,
"loss": 0.6115,
"step": 552
},
{
"epoch": 1.728125,
"grad_norm": 2.7592408657073975,
"learning_rate": 1.2086995920433495e-05,
"loss": 0.5445,
"step": 553
},
{
"epoch": 1.73125,
"grad_norm": 1.6351908445358276,
"learning_rate": 1.20404071581762e-05,
"loss": 0.2447,
"step": 554
},
{
"epoch": 1.734375,
"grad_norm": 1.5117764472961426,
"learning_rate": 1.199430631726461e-05,
"loss": 0.2566,
"step": 555
},
{
"epoch": 1.7375,
"grad_norm": 1.4923957586288452,
"learning_rate": 1.194869513336933e-05,
"loss": 0.208,
"step": 556
},
{
"epoch": 1.740625,
"grad_norm": 2.0138089656829834,
"learning_rate": 1.1903575323725649e-05,
"loss": 0.3175,
"step": 557
},
{
"epoch": 1.74375,
"grad_norm": 1.8065791130065918,
"learning_rate": 1.1858948587068904e-05,
"loss": 0.2546,
"step": 558
},
{
"epoch": 1.746875,
"grad_norm": 1.4454731941223145,
"learning_rate": 1.1814816603570499e-05,
"loss": 0.1709,
"step": 559
},
{
"epoch": 1.75,
"grad_norm": 2.613529682159424,
"learning_rate": 1.1771181034774677e-05,
"loss": 0.4799,
"step": 560
},
{
"epoch": 1.753125,
"grad_norm": 2.197608470916748,
"learning_rate": 1.1728043523535934e-05,
"loss": 0.5313,
"step": 561
},
{
"epoch": 1.75625,
"grad_norm": 2.056694269180298,
"learning_rate": 1.1685405693957192e-05,
"loss": 0.3248,
"step": 562
},
{
"epoch": 1.759375,
"grad_norm": 2.9212446212768555,
"learning_rate": 1.1643269151328634e-05,
"loss": 0.6279,
"step": 563
},
{
"epoch": 1.7625,
"grad_norm": 2.4438629150390625,
"learning_rate": 1.1601635482067272e-05,
"loss": 0.5193,
"step": 564
},
{
"epoch": 1.765625,
"grad_norm": 2.960676670074463,
"learning_rate": 1.1560506253657225e-05,
"loss": 0.6262,
"step": 565
},
{
"epoch": 1.76875,
"grad_norm": 2.2354516983032227,
"learning_rate": 1.1519883014590691e-05,
"loss": 0.4297,
"step": 566
},
{
"epoch": 1.771875,
"grad_norm": 2.175459623336792,
"learning_rate": 1.1479767294309671e-05,
"loss": 0.4763,
"step": 567
},
{
"epoch": 1.775,
"grad_norm": 2.5381572246551514,
"learning_rate": 1.1440160603148352e-05,
"loss": 0.5722,
"step": 568
},
{
"epoch": 1.778125,
"grad_norm": 2.3705122470855713,
"learning_rate": 1.140106443227627e-05,
"loss": 0.4347,
"step": 569
},
{
"epoch": 1.78125,
"grad_norm": 2.0581493377685547,
"learning_rate": 1.1362480253642165e-05,
"loss": 0.3271,
"step": 570
},
{
"epoch": 1.784375,
"grad_norm": 2.5319983959198,
"learning_rate": 1.1324409519918556e-05,
"loss": 0.5433,
"step": 571
},
{
"epoch": 1.7875,
"grad_norm": 0.7258579730987549,
"learning_rate": 1.128685366444704e-05,
"loss": 0.0637,
"step": 572
},
{
"epoch": 1.790625,
"grad_norm": 3.232028007507324,
"learning_rate": 1.1249814101184362e-05,
"loss": 0.9049,
"step": 573
},
{
"epoch": 1.79375,
"grad_norm": 2.510418653488159,
"learning_rate": 1.1213292224649134e-05,
"loss": 0.495,
"step": 574
},
{
"epoch": 1.796875,
"grad_norm": 1.644942045211792,
"learning_rate": 1.1177289409869374e-05,
"loss": 0.2218,
"step": 575
},
{
"epoch": 1.8,
"grad_norm": 3.1910505294799805,
"learning_rate": 1.11418070123307e-05,
"loss": 0.7491,
"step": 576
},
{
"epoch": 1.8,
"eval_VitaminC_cosine_accuracy": 0.5546875,
"eval_VitaminC_cosine_accuracy_threshold": 0.8312963247299194,
"eval_VitaminC_cosine_ap": 0.5540818473167951,
"eval_VitaminC_cosine_f1": 0.6657754010695187,
"eval_VitaminC_cosine_f1_threshold": 0.3716816306114197,
"eval_VitaminC_cosine_precision": 0.5010060362173038,
"eval_VitaminC_cosine_recall": 0.9920318725099602,
"eval_VitaminC_dot_accuracy": 0.5546875,
"eval_VitaminC_dot_accuracy_threshold": 301.13458251953125,
"eval_VitaminC_dot_ap": 0.5336035822109861,
"eval_VitaminC_dot_f1": 0.6675639300134589,
"eval_VitaminC_dot_f1_threshold": 140.0170135498047,
"eval_VitaminC_dot_precision": 0.5040650406504065,
"eval_VitaminC_dot_recall": 0.9880478087649402,
"eval_VitaminC_euclidean_accuracy": 0.556640625,
"eval_VitaminC_euclidean_accuracy_threshold": 14.30455493927002,
"eval_VitaminC_euclidean_ap": 0.5547765455338385,
"eval_VitaminC_euclidean_f1": 0.6666666666666666,
"eval_VitaminC_euclidean_f1_threshold": 23.225872039794922,
"eval_VitaminC_euclidean_precision": 0.5,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.55859375,
"eval_VitaminC_manhattan_accuracy_threshold": 311.50494384765625,
"eval_VitaminC_manhattan_ap": 0.5520078360814107,
"eval_VitaminC_manhattan_f1": 0.6657824933687002,
"eval_VitaminC_manhattan_f1_threshold": 491.16729736328125,
"eval_VitaminC_manhattan_precision": 0.4990059642147117,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.55859375,
"eval_VitaminC_max_accuracy_threshold": 311.50494384765625,
"eval_VitaminC_max_ap": 0.5547765455338385,
"eval_VitaminC_max_f1": 0.6675639300134589,
"eval_VitaminC_max_f1_threshold": 491.16729736328125,
"eval_VitaminC_max_precision": 0.5040650406504065,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5547765455338385,
"eval_sts-test_pearson_cosine": 0.8785522027028954,
"eval_sts-test_pearson_dot": 0.8677130233704464,
"eval_sts-test_pearson_euclidean": 0.901327101812411,
"eval_sts-test_pearson_manhattan": 0.9016459799124272,
"eval_sts-test_pearson_max": 0.9016459799124272,
"eval_sts-test_spearman_cosine": 0.9038277114411557,
"eval_sts-test_spearman_dot": 0.8689599898843539,
"eval_sts-test_spearman_euclidean": 0.8982747959226655,
"eval_sts-test_spearman_manhattan": 0.8983893144005659,
"eval_sts-test_spearman_max": 0.9038277114411557,
"eval_vitaminc-pairs_loss": 1.8366389274597168,
"eval_vitaminc-pairs_runtime": 1.8298,
"eval_vitaminc-pairs_samples_per_second": 59.021,
"eval_vitaminc-pairs_steps_per_second": 1.093,
"step": 576
},
{
"epoch": 1.8,
"eval_negation-triplets_loss": 0.7222614884376526,
"eval_negation-triplets_runtime": 0.292,
"eval_negation-triplets_samples_per_second": 219.186,
"eval_negation-triplets_steps_per_second": 3.425,
"step": 576
},
{
"epoch": 1.8,
"eval_scitail-pairs-pos_loss": 0.06263165920972824,
"eval_scitail-pairs-pos_runtime": 0.3693,
"eval_scitail-pairs-pos_samples_per_second": 146.22,
"eval_scitail-pairs-pos_steps_per_second": 2.708,
"step": 576
},
{
"epoch": 1.8,
"eval_xsum-pairs_loss": 0.038485851138830185,
"eval_xsum-pairs_runtime": 2.8422,
"eval_xsum-pairs_samples_per_second": 45.035,
"eval_xsum-pairs_steps_per_second": 0.704,
"step": 576
},
{
"epoch": 1.8,
"eval_sciq_pairs_loss": 0.017885908484458923,
"eval_sciq_pairs_runtime": 3.6267,
"eval_sciq_pairs_samples_per_second": 35.293,
"eval_sciq_pairs_steps_per_second": 0.551,
"step": 576
},
{
"epoch": 1.8,
"eval_qasc_pairs_loss": 0.11011218279600143,
"eval_qasc_pairs_runtime": 0.595,
"eval_qasc_pairs_samples_per_second": 215.135,
"eval_qasc_pairs_steps_per_second": 3.361,
"step": 576
},
{
"epoch": 1.8,
"eval_openbookqa_pairs_loss": 0.6921338438987732,
"eval_openbookqa_pairs_runtime": 0.573,
"eval_openbookqa_pairs_samples_per_second": 223.4,
"eval_openbookqa_pairs_steps_per_second": 3.491,
"step": 576
},
{
"epoch": 1.8,
"eval_msmarco_pairs_loss": 0.24500073492527008,
"eval_msmarco_pairs_runtime": 1.4604,
"eval_msmarco_pairs_samples_per_second": 87.65,
"eval_msmarco_pairs_steps_per_second": 1.37,
"step": 576
},
{
"epoch": 1.8,
"eval_nq_pairs_loss": 0.14756517112255096,
"eval_nq_pairs_runtime": 2.8567,
"eval_nq_pairs_samples_per_second": 44.806,
"eval_nq_pairs_steps_per_second": 0.7,
"step": 576
},
{
"epoch": 1.8,
"eval_trivia_pairs_loss": 0.6358833909034729,
"eval_trivia_pairs_runtime": 4.3759,
"eval_trivia_pairs_samples_per_second": 29.251,
"eval_trivia_pairs_steps_per_second": 0.457,
"step": 576
},
{
"epoch": 1.8,
"eval_gooaq_pairs_loss": 0.2909858226776123,
"eval_gooaq_pairs_runtime": 1.0026,
"eval_gooaq_pairs_samples_per_second": 127.667,
"eval_gooaq_pairs_steps_per_second": 1.995,
"step": 576
},
{
"epoch": 1.8,
"eval_paws-pos_loss": 0.02510605938732624,
"eval_paws-pos_runtime": 0.6858,
"eval_paws-pos_samples_per_second": 186.641,
"eval_paws-pos_steps_per_second": 2.916,
"step": 576
}
],
"logging_steps": 1,
"max_steps": 640,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 64,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 320,
"trial_name": null,
"trial_params": null
}