{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 45, "global_step": 890, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0022471910112359553, "grad_norm": 3.9492883682250977, "learning_rate": 1.9662921348314604e-07, "loss": 0.8103, "step": 1 }, { "epoch": 0.0044943820224719105, "grad_norm": 4.117438793182373, "learning_rate": 3.932584269662921e-07, "loss": 0.8803, "step": 2 }, { "epoch": 0.006741573033707865, "grad_norm": 3.809002161026001, "learning_rate": 5.898876404494381e-07, "loss": 0.8219, "step": 3 }, { "epoch": 0.008988764044943821, "grad_norm": 0.7417504787445068, "learning_rate": 7.865168539325842e-07, "loss": 0.0574, "step": 4 }, { "epoch": 0.011235955056179775, "grad_norm": 2.707460403442383, "learning_rate": 9.831460674157302e-07, "loss": 0.3044, "step": 5 }, { "epoch": 0.01348314606741573, "grad_norm": 3.082705497741699, "learning_rate": 1.1797752808988763e-06, "loss": 0.3306, "step": 6 }, { "epoch": 0.015730337078651686, "grad_norm": 3.102416753768921, "learning_rate": 1.3764044943820223e-06, "loss": 0.759, "step": 7 }, { "epoch": 0.017977528089887642, "grad_norm": 0.6271047592163086, "learning_rate": 1.5730337078651683e-06, "loss": 0.0472, "step": 8 }, { "epoch": 0.020224719101123594, "grad_norm": 3.1362593173980713, "learning_rate": 1.7696629213483144e-06, "loss": 0.7782, "step": 9 }, { "epoch": 0.02247191011235955, "grad_norm": 1.124997615814209, "learning_rate": 1.9662921348314604e-06, "loss": 0.0757, "step": 10 }, { "epoch": 0.024719101123595506, "grad_norm": 3.194413185119629, "learning_rate": 2.1629213483146067e-06, "loss": 0.7778, "step": 11 }, { "epoch": 0.02696629213483146, "grad_norm": 3.966202974319458, "learning_rate": 2.3595505617977525e-06, "loss": 0.7111, "step": 12 }, { "epoch": 0.029213483146067417, "grad_norm": 3.63393235206604, "learning_rate": 2.5561797752808988e-06, "loss": 0.6598, "step": 13 }, { "epoch": 0.03146067415730337, "grad_norm": 4.087065696716309, "learning_rate": 2.7528089887640446e-06, "loss": 0.8901, "step": 14 }, { "epoch": 0.033707865168539325, "grad_norm": 2.769573211669922, "learning_rate": 2.949438202247191e-06, "loss": 0.3206, "step": 15 }, { "epoch": 0.035955056179775284, "grad_norm": 2.630620002746582, "learning_rate": 3.1460674157303367e-06, "loss": 0.3408, "step": 16 }, { "epoch": 0.038202247191011236, "grad_norm": 2.9570937156677246, "learning_rate": 3.342696629213483e-06, "loss": 0.5623, "step": 17 }, { "epoch": 0.04044943820224719, "grad_norm": 1.0999970436096191, "learning_rate": 3.5393258426966288e-06, "loss": 0.0758, "step": 18 }, { "epoch": 0.04269662921348315, "grad_norm": 5.516472816467285, "learning_rate": 3.735955056179775e-06, "loss": 0.994, "step": 19 }, { "epoch": 0.0449438202247191, "grad_norm": 6.245299816131592, "learning_rate": 3.932584269662921e-06, "loss": 2.4196, "step": 20 }, { "epoch": 0.04719101123595506, "grad_norm": 0.546605110168457, "learning_rate": 4.129213483146067e-06, "loss": 0.0561, "step": 21 }, { "epoch": 0.04943820224719101, "grad_norm": 0.7049635648727417, "learning_rate": 4.325842696629213e-06, "loss": 0.0827, "step": 22 }, { "epoch": 0.051685393258426963, "grad_norm": 3.1022439002990723, "learning_rate": 4.522471910112359e-06, "loss": 0.7405, "step": 23 }, { "epoch": 0.05393258426966292, "grad_norm": 4.534759044647217, "learning_rate": 4.719101123595505e-06, "loss": 0.9656, "step": 24 }, { "epoch": 0.056179775280898875, "grad_norm": 3.0486032962799072, "learning_rate": 4.915730337078652e-06, "loss": 0.7855, "step": 25 }, { "epoch": 0.058426966292134834, "grad_norm": 3.7457478046417236, "learning_rate": 5.1123595505617975e-06, "loss": 0.6349, "step": 26 }, { "epoch": 0.060674157303370786, "grad_norm": 3.2051479816436768, "learning_rate": 5.308988764044943e-06, "loss": 0.8087, "step": 27 }, { "epoch": 0.06292134831460675, "grad_norm": 4.389094829559326, "learning_rate": 5.505617977528089e-06, "loss": 0.9282, "step": 28 }, { "epoch": 0.0651685393258427, "grad_norm": 2.920410394668579, "learning_rate": 5.702247191011236e-06, "loss": 0.3377, "step": 29 }, { "epoch": 0.06741573033707865, "grad_norm": 2.7193148136138916, "learning_rate": 5.898876404494382e-06, "loss": 0.3289, "step": 30 }, { "epoch": 0.0696629213483146, "grad_norm": 4.0008225440979, "learning_rate": 6.0955056179775275e-06, "loss": 0.6314, "step": 31 }, { "epoch": 0.07191011235955057, "grad_norm": 0.5842159390449524, "learning_rate": 6.292134831460673e-06, "loss": 0.0611, "step": 32 }, { "epoch": 0.07415730337078652, "grad_norm": 3.1256043910980225, "learning_rate": 6.48876404494382e-06, "loss": 0.8942, "step": 33 }, { "epoch": 0.07640449438202247, "grad_norm": 0.9526051878929138, "learning_rate": 6.685393258426966e-06, "loss": 0.0701, "step": 34 }, { "epoch": 0.07865168539325842, "grad_norm": 4.061926364898682, "learning_rate": 6.882022471910112e-06, "loss": 0.8506, "step": 35 }, { "epoch": 0.08089887640449438, "grad_norm": 2.8898491859436035, "learning_rate": 7.0786516853932575e-06, "loss": 0.3386, "step": 36 }, { "epoch": 0.08314606741573034, "grad_norm": 0.9806709289550781, "learning_rate": 7.275280898876404e-06, "loss": 0.0701, "step": 37 }, { "epoch": 0.0853932584269663, "grad_norm": 3.8004391193389893, "learning_rate": 7.47191011235955e-06, "loss": 0.8042, "step": 38 }, { "epoch": 0.08764044943820225, "grad_norm": 4.089083194732666, "learning_rate": 7.668539325842697e-06, "loss": 0.8744, "step": 39 }, { "epoch": 0.0898876404494382, "grad_norm": 3.419440984725952, "learning_rate": 7.865168539325842e-06, "loss": 0.8644, "step": 40 }, { "epoch": 0.09213483146067415, "grad_norm": 4.094921588897705, "learning_rate": 8.061797752808988e-06, "loss": 0.8647, "step": 41 }, { "epoch": 0.09438202247191012, "grad_norm": 3.9199764728546143, "learning_rate": 8.258426966292133e-06, "loss": 0.7916, "step": 42 }, { "epoch": 0.09662921348314607, "grad_norm": 4.082360744476318, "learning_rate": 8.45505617977528e-06, "loss": 0.8599, "step": 43 }, { "epoch": 0.09887640449438202, "grad_norm": 0.6443855166435242, "learning_rate": 8.651685393258427e-06, "loss": 0.0523, "step": 44 }, { "epoch": 0.10112359550561797, "grad_norm": 4.051048278808594, "learning_rate": 8.848314606741572e-06, "loss": 0.6968, "step": 45 }, { "epoch": 0.10112359550561797, "eval_VitaminC_cosine_accuracy": 0.55859375, "eval_VitaminC_cosine_accuracy_threshold": 0.8300318121910095, "eval_VitaminC_cosine_ap": 0.5514483751609435, "eval_VitaminC_cosine_f1": 0.6657718120805369, "eval_VitaminC_cosine_f1_threshold": 0.37456807494163513, "eval_VitaminC_cosine_precision": 0.5020242914979757, "eval_VitaminC_cosine_recall": 0.9880478087649402, "eval_VitaminC_dot_accuracy": 0.5546875, "eval_VitaminC_dot_accuracy_threshold": 311.380615234375, "eval_VitaminC_dot_ap": 0.5333497363350208, "eval_VitaminC_dot_f1": 0.6684709066305818, "eval_VitaminC_dot_f1_threshold": 144.8927001953125, "eval_VitaminC_dot_precision": 0.5061475409836066, "eval_VitaminC_dot_recall": 0.9840637450199203, "eval_VitaminC_euclidean_accuracy": 0.556640625, "eval_VitaminC_euclidean_accuracy_threshold": 11.028482437133789, "eval_VitaminC_euclidean_ap": 0.5544340410314673, "eval_VitaminC_euclidean_f1": 0.6649006622516557, "eval_VitaminC_euclidean_f1_threshold": 23.38451385498047, "eval_VitaminC_euclidean_precision": 0.498015873015873, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.556640625, "eval_VitaminC_manhattan_accuracy_threshold": 232.38790893554688, "eval_VitaminC_manhattan_ap": 0.5515569514532939, "eval_VitaminC_manhattan_f1": 0.6649006622516557, "eval_VitaminC_manhattan_f1_threshold": 498.126220703125, "eval_VitaminC_manhattan_precision": 0.498015873015873, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.55859375, "eval_VitaminC_max_accuracy_threshold": 311.380615234375, "eval_VitaminC_max_ap": 0.5544340410314673, "eval_VitaminC_max_f1": 0.6684709066305818, "eval_VitaminC_max_f1_threshold": 498.126220703125, "eval_VitaminC_max_precision": 0.5061475409836066, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5544340410314673, "eval_sts-test_pearson_cosine": 0.8803067271464453, "eval_sts-test_pearson_dot": 0.8698285291814508, "eval_sts-test_pearson_euclidean": 0.9023937835918766, "eval_sts-test_pearson_manhattan": 0.9020751259156048, "eval_sts-test_pearson_max": 0.9023937835918766, "eval_sts-test_spearman_cosine": 0.9038005474254912, "eval_sts-test_spearman_dot": 0.8707897794601254, "eval_sts-test_spearman_euclidean": 0.8989733631129851, "eval_sts-test_spearman_manhattan": 0.8980189529612906, "eval_sts-test_spearman_max": 0.9038005474254912, "eval_vitaminc-pairs_loss": 1.7273772954940796, "eval_vitaminc-pairs_runtime": 1.8924, "eval_vitaminc-pairs_samples_per_second": 57.071, "eval_vitaminc-pairs_steps_per_second": 1.057, "step": 45 }, { "epoch": 0.10112359550561797, "eval_negation-triplets_loss": 0.9174526929855347, "eval_negation-triplets_runtime": 0.2972, "eval_negation-triplets_samples_per_second": 215.314, "eval_negation-triplets_steps_per_second": 3.364, "step": 45 }, { "epoch": 0.10112359550561797, "eval_scitail-pairs-pos_loss": 0.07368183881044388, "eval_scitail-pairs-pos_runtime": 0.379, "eval_scitail-pairs-pos_samples_per_second": 142.492, "eval_scitail-pairs-pos_steps_per_second": 2.639, "step": 45 }, { "epoch": 0.10112359550561797, "eval_scitail-pairs-qa_loss": 0.001584450714290142, "eval_scitail-pairs-qa_runtime": 0.5178, "eval_scitail-pairs-qa_samples_per_second": 247.198, "eval_scitail-pairs-qa_steps_per_second": 3.862, "step": 45 }, { "epoch": 0.10112359550561797, "eval_xsum-pairs_loss": 0.038235221058130264, "eval_xsum-pairs_runtime": 2.7268, "eval_xsum-pairs_samples_per_second": 46.941, "eval_xsum-pairs_steps_per_second": 0.733, "step": 45 }, { "epoch": 0.10112359550561797, "eval_sciq_pairs_loss": 0.01538097020238638, "eval_sciq_pairs_runtime": 2.7808, "eval_sciq_pairs_samples_per_second": 46.029, "eval_sciq_pairs_steps_per_second": 0.719, "step": 45 }, { "epoch": 0.10112359550561797, "eval_qasc_pairs_loss": 0.09078988432884216, "eval_qasc_pairs_runtime": 0.6473, "eval_qasc_pairs_samples_per_second": 197.758, "eval_qasc_pairs_steps_per_second": 3.09, "step": 45 }, { "epoch": 0.10112359550561797, "eval_openbookqa_pairs_loss": 0.6754768490791321, "eval_openbookqa_pairs_runtime": 0.573, "eval_openbookqa_pairs_samples_per_second": 223.397, "eval_openbookqa_pairs_steps_per_second": 3.491, "step": 45 }, { "epoch": 0.10112359550561797, "eval_msmarco_pairs_loss": 0.15991328656673431, "eval_msmarco_pairs_runtime": 1.487, "eval_msmarco_pairs_samples_per_second": 86.078, "eval_msmarco_pairs_steps_per_second": 1.345, "step": 45 }, { "epoch": 0.10112359550561797, "eval_nq_pairs_loss": 0.09591890126466751, "eval_nq_pairs_runtime": 2.3943, "eval_nq_pairs_samples_per_second": 53.459, "eval_nq_pairs_steps_per_second": 0.835, "step": 45 }, { "epoch": 0.10112359550561797, "eval_trivia_pairs_loss": 0.5305934548377991, "eval_trivia_pairs_runtime": 3.5752, "eval_trivia_pairs_samples_per_second": 35.802, "eval_trivia_pairs_steps_per_second": 0.559, "step": 45 }, { "epoch": 0.10112359550561797, "eval_gooaq_pairs_loss": 0.29681000113487244, "eval_gooaq_pairs_runtime": 0.9087, "eval_gooaq_pairs_samples_per_second": 140.861, "eval_gooaq_pairs_steps_per_second": 2.201, "step": 45 }, { "epoch": 0.10112359550561797, "eval_paws-pos_loss": 0.024501051753759384, "eval_paws-pos_runtime": 0.6773, "eval_paws-pos_samples_per_second": 188.996, "eval_paws-pos_steps_per_second": 2.953, "step": 45 }, { "epoch": 0.10337078651685393, "grad_norm": 2.9021923542022705, "learning_rate": 9.044943820224718e-06, "loss": 0.3376, "step": 46 }, { "epoch": 0.10561797752808989, "grad_norm": 3.179288625717163, "learning_rate": 9.241573033707863e-06, "loss": 0.5174, "step": 47 }, { "epoch": 0.10786516853932585, "grad_norm": 3.1919493675231934, "learning_rate": 9.43820224719101e-06, "loss": 0.8162, "step": 48 }, { "epoch": 0.1101123595505618, "grad_norm": 2.8602521419525146, "learning_rate": 9.634831460674157e-06, "loss": 0.3545, "step": 49 }, { "epoch": 0.11235955056179775, "grad_norm": 2.7570478916168213, "learning_rate": 9.831460674157303e-06, "loss": 0.315, "step": 50 }, { "epoch": 0.1146067415730337, "grad_norm": 0.8641514778137207, "learning_rate": 1.0028089887640448e-05, "loss": 0.0627, "step": 51 }, { "epoch": 0.11685393258426967, "grad_norm": 3.9437484741210938, "learning_rate": 1.0224719101123595e-05, "loss": 0.8851, "step": 52 }, { "epoch": 0.11910112359550562, "grad_norm": 4.144773006439209, "learning_rate": 1.042134831460674e-05, "loss": 0.8382, "step": 53 }, { "epoch": 0.12134831460674157, "grad_norm": 4.277736186981201, "learning_rate": 1.0617977528089887e-05, "loss": 0.733, "step": 54 }, { "epoch": 0.12359550561797752, "grad_norm": 4.025904178619385, "learning_rate": 1.0814606741573032e-05, "loss": 0.7173, "step": 55 }, { "epoch": 0.1258426966292135, "grad_norm": 3.923046827316284, "learning_rate": 1.1011235955056178e-05, "loss": 0.7659, "step": 56 }, { "epoch": 0.12808988764044943, "grad_norm": 3.2707138061523438, "learning_rate": 1.1207865168539325e-05, "loss": 0.793, "step": 57 }, { "epoch": 0.1303370786516854, "grad_norm": 3.1660959720611572, "learning_rate": 1.1404494382022472e-05, "loss": 0.5426, "step": 58 }, { "epoch": 0.13258426966292136, "grad_norm": 4.5236663818359375, "learning_rate": 1.1601123595505617e-05, "loss": 0.7641, "step": 59 }, { "epoch": 0.1348314606741573, "grad_norm": 0.5771021246910095, "learning_rate": 1.1797752808988763e-05, "loss": 0.0657, "step": 60 }, { "epoch": 0.13707865168539327, "grad_norm": 3.8541343212127686, "learning_rate": 1.1994382022471908e-05, "loss": 0.7836, "step": 61 }, { "epoch": 0.1393258426966292, "grad_norm": 4.284148693084717, "learning_rate": 1.2191011235955055e-05, "loss": 0.9306, "step": 62 }, { "epoch": 0.14157303370786517, "grad_norm": 4.175032615661621, "learning_rate": 1.23876404494382e-05, "loss": 0.8673, "step": 63 }, { "epoch": 0.14382022471910114, "grad_norm": 5.025452136993408, "learning_rate": 1.2584269662921347e-05, "loss": 0.9296, "step": 64 }, { "epoch": 0.14606741573033707, "grad_norm": 3.970745086669922, "learning_rate": 1.2780898876404493e-05, "loss": 0.8211, "step": 65 }, { "epoch": 0.14831460674157304, "grad_norm": 3.150197744369507, "learning_rate": 1.297752808988764e-05, "loss": 0.7685, "step": 66 }, { "epoch": 0.15056179775280898, "grad_norm": 4.280994415283203, "learning_rate": 1.3174157303370785e-05, "loss": 0.7139, "step": 67 }, { "epoch": 0.15280898876404495, "grad_norm": 4.288730621337891, "learning_rate": 1.3370786516853932e-05, "loss": 0.8241, "step": 68 }, { "epoch": 0.1550561797752809, "grad_norm": 3.7402424812316895, "learning_rate": 1.3567415730337077e-05, "loss": 0.6256, "step": 69 }, { "epoch": 0.15730337078651685, "grad_norm": 4.478890895843506, "learning_rate": 1.3764044943820223e-05, "loss": 0.8842, "step": 70 }, { "epoch": 0.15955056179775282, "grad_norm": 3.8147876262664795, "learning_rate": 1.3960674157303368e-05, "loss": 0.804, "step": 71 }, { "epoch": 0.16179775280898875, "grad_norm": 0.7314035296440125, "learning_rate": 1.4157303370786515e-05, "loss": 0.0989, "step": 72 }, { "epoch": 0.16404494382022472, "grad_norm": 3.074303150177002, "learning_rate": 1.4353932584269662e-05, "loss": 0.332, "step": 73 }, { "epoch": 0.1662921348314607, "grad_norm": 3.414987325668335, "learning_rate": 1.4550561797752808e-05, "loss": 0.5736, "step": 74 }, { "epoch": 0.16853932584269662, "grad_norm": 3.7946674823760986, "learning_rate": 1.4747191011235953e-05, "loss": 0.8285, "step": 75 }, { "epoch": 0.1707865168539326, "grad_norm": 4.310474395751953, "learning_rate": 1.49438202247191e-05, "loss": 0.9561, "step": 76 }, { "epoch": 0.17303370786516853, "grad_norm": 0.9791378974914551, "learning_rate": 1.5140449438202245e-05, "loss": 0.0633, "step": 77 }, { "epoch": 0.1752808988764045, "grad_norm": 0.6351795196533203, "learning_rate": 1.5337078651685393e-05, "loss": 0.0848, "step": 78 }, { "epoch": 0.17752808988764046, "grad_norm": 3.4832303524017334, "learning_rate": 1.553370786516854e-05, "loss": 0.8325, "step": 79 }, { "epoch": 0.1797752808988764, "grad_norm": 5.115800380706787, "learning_rate": 1.5730337078651683e-05, "loss": 1.0011, "step": 80 }, { "epoch": 0.18202247191011237, "grad_norm": 3.552396297454834, "learning_rate": 1.592696629213483e-05, "loss": 0.8697, "step": 81 }, { "epoch": 0.1842696629213483, "grad_norm": 4.491541862487793, "learning_rate": 1.6123595505617977e-05, "loss": 0.8344, "step": 82 }, { "epoch": 0.18651685393258427, "grad_norm": 4.73278284072876, "learning_rate": 1.6320224719101122e-05, "loss": 0.9967, "step": 83 }, { "epoch": 0.18876404494382024, "grad_norm": 2.994192123413086, "learning_rate": 1.6516853932584267e-05, "loss": 0.4638, "step": 84 }, { "epoch": 0.19101123595505617, "grad_norm": 4.142394542694092, "learning_rate": 1.6713483146067415e-05, "loss": 0.8994, "step": 85 }, { "epoch": 0.19325842696629214, "grad_norm": 4.149839401245117, "learning_rate": 1.691011235955056e-05, "loss": 0.7789, "step": 86 }, { "epoch": 0.19550561797752808, "grad_norm": 0.45795938372612, "learning_rate": 1.7106741573033705e-05, "loss": 0.0555, "step": 87 }, { "epoch": 0.19775280898876405, "grad_norm": 3.4293618202209473, "learning_rate": 1.7303370786516853e-05, "loss": 0.3778, "step": 88 }, { "epoch": 0.2, "grad_norm": 4.041529655456543, "learning_rate": 1.75e-05, "loss": 0.708, "step": 89 }, { "epoch": 0.20224719101123595, "grad_norm": 0.6160458922386169, "learning_rate": 1.7696629213483143e-05, "loss": 0.0689, "step": 90 }, { "epoch": 0.20224719101123595, "eval_VitaminC_cosine_accuracy": 0.556640625, "eval_VitaminC_cosine_accuracy_threshold": 0.74173903465271, "eval_VitaminC_cosine_ap": 0.5513770735348443, "eval_VitaminC_cosine_f1": 0.6675531914893617, "eval_VitaminC_cosine_f1_threshold": 0.32480987906455994, "eval_VitaminC_cosine_precision": 0.500998003992016, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.560546875, "eval_VitaminC_dot_accuracy_threshold": 297.664794921875, "eval_VitaminC_dot_ap": 0.5340088824099496, "eval_VitaminC_dot_f1": 0.6666666666666667, "eval_VitaminC_dot_f1_threshold": 126.67618560791016, "eval_VitaminC_dot_precision": 0.501002004008016, "eval_VitaminC_dot_recall": 0.9960159362549801, "eval_VitaminC_euclidean_accuracy": 0.55859375, "eval_VitaminC_euclidean_accuracy_threshold": 14.345688819885254, "eval_VitaminC_euclidean_ap": 0.5542145004976253, "eval_VitaminC_euclidean_f1": 0.6675531914893617, "eval_VitaminC_euclidean_f1_threshold": 23.381019592285156, "eval_VitaminC_euclidean_precision": 0.500998003992016, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.552734375, "eval_VitaminC_manhattan_accuracy_threshold": 232.7296142578125, "eval_VitaminC_manhattan_ap": 0.5523953693907266, "eval_VitaminC_manhattan_f1": 0.6657824933687002, "eval_VitaminC_manhattan_f1_threshold": 496.4290466308594, "eval_VitaminC_manhattan_precision": 0.4990059642147117, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.560546875, "eval_VitaminC_max_accuracy_threshold": 297.664794921875, "eval_VitaminC_max_ap": 0.5542145004976253, "eval_VitaminC_max_f1": 0.6675531914893617, "eval_VitaminC_max_f1_threshold": 496.4290466308594, "eval_VitaminC_max_precision": 0.501002004008016, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5542145004976253, "eval_sts-test_pearson_cosine": 0.8800782580988616, "eval_sts-test_pearson_dot": 0.8687642290872662, "eval_sts-test_pearson_euclidean": 0.9034088230546415, "eval_sts-test_pearson_manhattan": 0.9030146212284895, "eval_sts-test_pearson_max": 0.9034088230546415, "eval_sts-test_spearman_cosine": 0.904560289590133, "eval_sts-test_spearman_dot": 0.8705944849554133, "eval_sts-test_spearman_euclidean": 0.8998959103665689, "eval_sts-test_spearman_manhattan": 0.8995891404697307, "eval_sts-test_spearman_max": 0.904560289590133, "eval_vitaminc-pairs_loss": 1.6141985654830933, "eval_vitaminc-pairs_runtime": 1.864, "eval_vitaminc-pairs_samples_per_second": 57.94, "eval_vitaminc-pairs_steps_per_second": 1.073, "step": 90 }, { "epoch": 0.20224719101123595, "eval_negation-triplets_loss": 0.9220322370529175, "eval_negation-triplets_runtime": 0.3199, "eval_negation-triplets_samples_per_second": 200.043, "eval_negation-triplets_steps_per_second": 3.126, "step": 90 }, { "epoch": 0.20224719101123595, "eval_scitail-pairs-pos_loss": 0.0654294565320015, "eval_scitail-pairs-pos_runtime": 0.4625, "eval_scitail-pairs-pos_samples_per_second": 116.76, "eval_scitail-pairs-pos_steps_per_second": 2.162, "step": 90 }, { "epoch": 0.20224719101123595, "eval_scitail-pairs-qa_loss": 0.0015887805493548512, "eval_scitail-pairs-qa_runtime": 0.5768, "eval_scitail-pairs-qa_samples_per_second": 221.899, "eval_scitail-pairs-qa_steps_per_second": 3.467, "step": 90 }, { "epoch": 0.20224719101123595, "eval_xsum-pairs_loss": 0.03991687670350075, "eval_xsum-pairs_runtime": 2.7403, "eval_xsum-pairs_samples_per_second": 46.71, "eval_xsum-pairs_steps_per_second": 0.73, "step": 90 }, { "epoch": 0.20224719101123595, "eval_sciq_pairs_loss": 0.01584962010383606, "eval_sciq_pairs_runtime": 2.8429, "eval_sciq_pairs_samples_per_second": 45.024, "eval_sciq_pairs_steps_per_second": 0.703, "step": 90 }, { "epoch": 0.20224719101123595, "eval_qasc_pairs_loss": 0.09112343192100525, "eval_qasc_pairs_runtime": 0.6492, "eval_qasc_pairs_samples_per_second": 197.154, "eval_qasc_pairs_steps_per_second": 3.081, "step": 90 }, { "epoch": 0.20224719101123595, "eval_openbookqa_pairs_loss": 0.7132729887962341, "eval_openbookqa_pairs_runtime": 0.5847, "eval_openbookqa_pairs_samples_per_second": 218.922, "eval_openbookqa_pairs_steps_per_second": 3.421, "step": 90 }, { "epoch": 0.20224719101123595, "eval_msmarco_pairs_loss": 0.15173853933811188, "eval_msmarco_pairs_runtime": 1.4966, "eval_msmarco_pairs_samples_per_second": 85.527, "eval_msmarco_pairs_steps_per_second": 1.336, "step": 90 }, { "epoch": 0.20224719101123595, "eval_nq_pairs_loss": 0.09653442353010178, "eval_nq_pairs_runtime": 2.3749, "eval_nq_pairs_samples_per_second": 53.897, "eval_nq_pairs_steps_per_second": 0.842, "step": 90 }, { "epoch": 0.20224719101123595, "eval_trivia_pairs_loss": 0.5191965699195862, "eval_trivia_pairs_runtime": 3.6006, "eval_trivia_pairs_samples_per_second": 35.55, "eval_trivia_pairs_steps_per_second": 0.555, "step": 90 }, { "epoch": 0.20224719101123595, "eval_gooaq_pairs_loss": 0.30713126063346863, "eval_gooaq_pairs_runtime": 0.9131, "eval_gooaq_pairs_samples_per_second": 140.178, "eval_gooaq_pairs_steps_per_second": 2.19, "step": 90 }, { "epoch": 0.20224719101123595, "eval_paws-pos_loss": 0.024471310898661613, "eval_paws-pos_runtime": 0.6872, "eval_paws-pos_samples_per_second": 186.254, "eval_paws-pos_steps_per_second": 2.91, "step": 90 }, { "epoch": 0.20449438202247192, "grad_norm": 6.209661483764648, "learning_rate": 1.7893258426966292e-05, "loss": 2.3489, "step": 91 }, { "epoch": 0.20674157303370785, "grad_norm": 3.1821141242980957, "learning_rate": 1.8089887640449437e-05, "loss": 0.741, "step": 92 }, { "epoch": 0.20898876404494382, "grad_norm": 3.871994972229004, "learning_rate": 1.8286516853932585e-05, "loss": 0.7729, "step": 93 }, { "epoch": 0.21123595505617979, "grad_norm": 0.5280765891075134, "learning_rate": 1.8483146067415727e-05, "loss": 0.0631, "step": 94 }, { "epoch": 0.21348314606741572, "grad_norm": 4.475915431976318, "learning_rate": 1.8679775280898875e-05, "loss": 0.9342, "step": 95 }, { "epoch": 0.2157303370786517, "grad_norm": 3.949381113052368, "learning_rate": 1.887640449438202e-05, "loss": 0.8581, "step": 96 }, { "epoch": 0.21797752808988763, "grad_norm": 2.910426616668701, "learning_rate": 1.907303370786517e-05, "loss": 0.5198, "step": 97 }, { "epoch": 0.2202247191011236, "grad_norm": 4.028941631317139, "learning_rate": 1.9269662921348313e-05, "loss": 0.846, "step": 98 }, { "epoch": 0.22247191011235956, "grad_norm": 4.183433532714844, "learning_rate": 1.946629213483146e-05, "loss": 0.6581, "step": 99 }, { "epoch": 0.2247191011235955, "grad_norm": 3.348114252090454, "learning_rate": 1.9662921348314607e-05, "loss": 0.3579, "step": 100 }, { "epoch": 0.22696629213483147, "grad_norm": 4.055211544036865, "learning_rate": 1.9859550561797752e-05, "loss": 0.908, "step": 101 }, { "epoch": 0.2292134831460674, "grad_norm": 1.0024710893630981, "learning_rate": 2.0056179775280897e-05, "loss": 0.0664, "step": 102 }, { "epoch": 0.23146067415730337, "grad_norm": 3.582249641418457, "learning_rate": 2.0252808988764042e-05, "loss": 0.5411, "step": 103 }, { "epoch": 0.23370786516853934, "grad_norm": 4.226349830627441, "learning_rate": 2.044943820224719e-05, "loss": 0.9163, "step": 104 }, { "epoch": 0.23595505617977527, "grad_norm": 3.002727508544922, "learning_rate": 2.0646067415730335e-05, "loss": 0.7975, "step": 105 }, { "epoch": 0.23820224719101124, "grad_norm": 3.5497515201568604, "learning_rate": 2.084269662921348e-05, "loss": 0.37, "step": 106 }, { "epoch": 0.24044943820224718, "grad_norm": 4.381045341491699, "learning_rate": 2.103932584269663e-05, "loss": 0.8495, "step": 107 }, { "epoch": 0.24269662921348314, "grad_norm": 3.926840305328369, "learning_rate": 2.1235955056179773e-05, "loss": 0.8073, "step": 108 }, { "epoch": 0.2449438202247191, "grad_norm": 3.0835390090942383, "learning_rate": 2.1432584269662922e-05, "loss": 0.7563, "step": 109 }, { "epoch": 0.24719101123595505, "grad_norm": 4.230669975280762, "learning_rate": 2.1629213483146063e-05, "loss": 0.6585, "step": 110 }, { "epoch": 0.24943820224719102, "grad_norm": 2.8849070072174072, "learning_rate": 2.1825842696629212e-05, "loss": 0.3246, "step": 111 }, { "epoch": 0.251685393258427, "grad_norm": 4.796951770782471, "learning_rate": 2.2022471910112357e-05, "loss": 0.9718, "step": 112 }, { "epoch": 0.2539325842696629, "grad_norm": 4.60318660736084, "learning_rate": 2.2219101123595505e-05, "loss": 0.8584, "step": 113 }, { "epoch": 0.25617977528089886, "grad_norm": 3.098703384399414, "learning_rate": 2.241573033707865e-05, "loss": 0.3385, "step": 114 }, { "epoch": 0.25842696629213485, "grad_norm": 2.9519224166870117, "learning_rate": 2.2612359550561795e-05, "loss": 0.323, "step": 115 }, { "epoch": 0.2606741573033708, "grad_norm": 2.913742780685425, "learning_rate": 2.2808988764044944e-05, "loss": 0.3359, "step": 116 }, { "epoch": 0.26292134831460673, "grad_norm": 4.148440837860107, "learning_rate": 2.300561797752809e-05, "loss": 0.6955, "step": 117 }, { "epoch": 0.2651685393258427, "grad_norm": 0.8463248610496521, "learning_rate": 2.3202247191011234e-05, "loss": 0.0539, "step": 118 }, { "epoch": 0.26741573033707866, "grad_norm": 0.7284589409828186, "learning_rate": 2.339887640449438e-05, "loss": 0.0507, "step": 119 }, { "epoch": 0.2696629213483146, "grad_norm": 3.615086317062378, "learning_rate": 2.3595505617977527e-05, "loss": 0.314, "step": 120 }, { "epoch": 0.27191011235955054, "grad_norm": 5.229820728302002, "learning_rate": 2.3792134831460672e-05, "loss": 1.0339, "step": 121 }, { "epoch": 0.27415730337078653, "grad_norm": 3.6847782135009766, "learning_rate": 2.3988764044943817e-05, "loss": 0.3158, "step": 122 }, { "epoch": 0.27640449438202247, "grad_norm": 4.280517578125, "learning_rate": 2.4185393258426965e-05, "loss": 0.7809, "step": 123 }, { "epoch": 0.2786516853932584, "grad_norm": 4.476150035858154, "learning_rate": 2.438202247191011e-05, "loss": 0.9516, "step": 124 }, { "epoch": 0.2808988764044944, "grad_norm": 2.7380239963531494, "learning_rate": 2.457865168539326e-05, "loss": 0.3117, "step": 125 }, { "epoch": 0.28314606741573034, "grad_norm": 3.9667162895202637, "learning_rate": 2.47752808988764e-05, "loss": 0.8366, "step": 126 }, { "epoch": 0.2853932584269663, "grad_norm": 4.552999019622803, "learning_rate": 2.497191011235955e-05, "loss": 0.8033, "step": 127 }, { "epoch": 0.2876404494382023, "grad_norm": 3.4238576889038086, "learning_rate": 2.5168539325842694e-05, "loss": 0.7253, "step": 128 }, { "epoch": 0.2898876404494382, "grad_norm": 4.677807331085205, "learning_rate": 2.5365168539325842e-05, "loss": 0.8345, "step": 129 }, { "epoch": 0.29213483146067415, "grad_norm": 4.282113075256348, "learning_rate": 2.5561797752808987e-05, "loss": 0.7532, "step": 130 }, { "epoch": 0.2943820224719101, "grad_norm": 4.375221252441406, "learning_rate": 2.5758426966292132e-05, "loss": 0.8247, "step": 131 }, { "epoch": 0.2966292134831461, "grad_norm": 3.2591633796691895, "learning_rate": 2.595505617977528e-05, "loss": 0.5175, "step": 132 }, { "epoch": 0.298876404494382, "grad_norm": 4.146636962890625, "learning_rate": 2.6151685393258425e-05, "loss": 0.7813, "step": 133 }, { "epoch": 0.30112359550561796, "grad_norm": 4.2413249015808105, "learning_rate": 2.634831460674157e-05, "loss": 0.6582, "step": 134 }, { "epoch": 0.30337078651685395, "grad_norm": 4.541455268859863, "learning_rate": 2.6544943820224715e-05, "loss": 0.3484, "step": 135 }, { "epoch": 0.30337078651685395, "eval_VitaminC_cosine_accuracy": 0.560546875, "eval_VitaminC_cosine_accuracy_threshold": 0.7956135272979736, "eval_VitaminC_cosine_ap": 0.5505565383154402, "eval_VitaminC_cosine_f1": 0.6684709066305818, "eval_VitaminC_cosine_f1_threshold": 0.40466147661209106, "eval_VitaminC_cosine_precision": 0.5061475409836066, "eval_VitaminC_cosine_recall": 0.9840637450199203, "eval_VitaminC_dot_accuracy": 0.5546875, "eval_VitaminC_dot_accuracy_threshold": 312.2774658203125, "eval_VitaminC_dot_ap": 0.5365135091766033, "eval_VitaminC_dot_f1": 0.6684856753069577, "eval_VitaminC_dot_f1_threshold": 157.33203125, "eval_VitaminC_dot_precision": 0.508298755186722, "eval_VitaminC_dot_recall": 0.9760956175298805, "eval_VitaminC_euclidean_accuracy": 0.5546875, "eval_VitaminC_euclidean_accuracy_threshold": 12.184114456176758, "eval_VitaminC_euclidean_ap": 0.5517706579195627, "eval_VitaminC_euclidean_f1": 0.6649006622516557, "eval_VitaminC_euclidean_f1_threshold": 23.68879508972168, "eval_VitaminC_euclidean_precision": 0.498015873015873, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.5546875, "eval_VitaminC_manhattan_accuracy_threshold": 241.22061157226562, "eval_VitaminC_manhattan_ap": 0.5494156168773414, "eval_VitaminC_manhattan_f1": 0.6649006622516557, "eval_VitaminC_manhattan_f1_threshold": 510.2530212402344, "eval_VitaminC_manhattan_precision": 0.498015873015873, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.560546875, "eval_VitaminC_max_accuracy_threshold": 312.2774658203125, "eval_VitaminC_max_ap": 0.5517706579195627, "eval_VitaminC_max_f1": 0.6684856753069577, "eval_VitaminC_max_f1_threshold": 510.2530212402344, "eval_VitaminC_max_precision": 0.508298755186722, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5517706579195627, "eval_sts-test_pearson_cosine": 0.8812438499723412, "eval_sts-test_pearson_dot": 0.8695651753004092, "eval_sts-test_pearson_euclidean": 0.9036940037118162, "eval_sts-test_pearson_manhattan": 0.9035516699922166, "eval_sts-test_pearson_max": 0.9036940037118162, "eval_sts-test_spearman_cosine": 0.9049742835092648, "eval_sts-test_spearman_dot": 0.8707925987895928, "eval_sts-test_spearman_euclidean": 0.9003956924537878, "eval_sts-test_spearman_manhattan": 0.9002747745455083, "eval_sts-test_spearman_max": 0.9049742835092648, "eval_vitaminc-pairs_loss": 1.5520410537719727, "eval_vitaminc-pairs_runtime": 1.8323, "eval_vitaminc-pairs_samples_per_second": 58.943, "eval_vitaminc-pairs_steps_per_second": 1.092, "step": 135 }, { "epoch": 0.30337078651685395, "eval_negation-triplets_loss": 0.9211694002151489, "eval_negation-triplets_runtime": 0.2923, "eval_negation-triplets_samples_per_second": 218.93, "eval_negation-triplets_steps_per_second": 3.421, "step": 135 }, { "epoch": 0.30337078651685395, "eval_scitail-pairs-pos_loss": 0.07377135753631592, "eval_scitail-pairs-pos_runtime": 0.3681, "eval_scitail-pairs-pos_samples_per_second": 146.691, "eval_scitail-pairs-pos_steps_per_second": 2.716, "step": 135 }, { "epoch": 0.30337078651685395, "eval_scitail-pairs-qa_loss": 0.00150959100574255, "eval_scitail-pairs-qa_runtime": 0.5123, "eval_scitail-pairs-qa_samples_per_second": 249.842, "eval_scitail-pairs-qa_steps_per_second": 3.904, "step": 135 }, { "epoch": 0.30337078651685395, "eval_xsum-pairs_loss": 0.036599572747945786, "eval_xsum-pairs_runtime": 2.7238, "eval_xsum-pairs_samples_per_second": 46.994, "eval_xsum-pairs_steps_per_second": 0.734, "step": 135 }, { "epoch": 0.30337078651685395, "eval_sciq_pairs_loss": 0.01615014858543873, "eval_sciq_pairs_runtime": 2.8064, "eval_sciq_pairs_samples_per_second": 45.61, "eval_sciq_pairs_steps_per_second": 0.713, "step": 135 }, { "epoch": 0.30337078651685395, "eval_qasc_pairs_loss": 0.09235507994890213, "eval_qasc_pairs_runtime": 0.6488, "eval_qasc_pairs_samples_per_second": 197.276, "eval_qasc_pairs_steps_per_second": 3.082, "step": 135 }, { "epoch": 0.30337078651685395, "eval_openbookqa_pairs_loss": 0.6891775727272034, "eval_openbookqa_pairs_runtime": 0.5698, "eval_openbookqa_pairs_samples_per_second": 224.641, "eval_openbookqa_pairs_steps_per_second": 3.51, "step": 135 }, { "epoch": 0.30337078651685395, "eval_msmarco_pairs_loss": 0.16766037046909332, "eval_msmarco_pairs_runtime": 1.4798, "eval_msmarco_pairs_samples_per_second": 86.499, "eval_msmarco_pairs_steps_per_second": 1.352, "step": 135 }, { "epoch": 0.30337078651685395, "eval_nq_pairs_loss": 0.09737721085548401, "eval_nq_pairs_runtime": 2.3409, "eval_nq_pairs_samples_per_second": 54.68, "eval_nq_pairs_steps_per_second": 0.854, "step": 135 }, { "epoch": 0.30337078651685395, "eval_trivia_pairs_loss": 0.5458433032035828, "eval_trivia_pairs_runtime": 3.5771, "eval_trivia_pairs_samples_per_second": 35.783, "eval_trivia_pairs_steps_per_second": 0.559, "step": 135 }, { "epoch": 0.30337078651685395, "eval_gooaq_pairs_loss": 0.3082329332828522, "eval_gooaq_pairs_runtime": 0.9181, "eval_gooaq_pairs_samples_per_second": 139.413, "eval_gooaq_pairs_steps_per_second": 2.178, "step": 135 }, { "epoch": 0.30337078651685395, "eval_paws-pos_loss": 0.02423396334052086, "eval_paws-pos_runtime": 0.6827, "eval_paws-pos_samples_per_second": 187.501, "eval_paws-pos_steps_per_second": 2.93, "step": 135 }, { "epoch": 0.3056179775280899, "grad_norm": 4.549901485443115, "learning_rate": 2.6741573033707864e-05, "loss": 0.7648, "step": 136 }, { "epoch": 0.30786516853932583, "grad_norm": 3.225851535797119, "learning_rate": 2.693820224719101e-05, "loss": 0.7554, "step": 137 }, { "epoch": 0.3101123595505618, "grad_norm": 0.6228423118591309, "learning_rate": 2.7134831460674154e-05, "loss": 0.0753, "step": 138 }, { "epoch": 0.31235955056179776, "grad_norm": 3.12802791595459, "learning_rate": 2.7331460674157302e-05, "loss": 0.4987, "step": 139 }, { "epoch": 0.3146067415730337, "grad_norm": 4.1997880935668945, "learning_rate": 2.7528089887640447e-05, "loss": 0.8543, "step": 140 }, { "epoch": 0.31685393258426964, "grad_norm": 4.3362860679626465, "learning_rate": 2.7724719101123595e-05, "loss": 0.9425, "step": 141 }, { "epoch": 0.31910112359550563, "grad_norm": 0.5599316954612732, "learning_rate": 2.7921348314606737e-05, "loss": 0.0472, "step": 142 }, { "epoch": 0.32134831460674157, "grad_norm": 3.503603458404541, "learning_rate": 2.8117977528089885e-05, "loss": 0.848, "step": 143 }, { "epoch": 0.3235955056179775, "grad_norm": 4.712310314178467, "learning_rate": 2.831460674157303e-05, "loss": 0.8946, "step": 144 }, { "epoch": 0.3258426966292135, "grad_norm": 3.1823527812957764, "learning_rate": 2.851123595505618e-05, "loss": 0.7841, "step": 145 }, { "epoch": 0.32808988764044944, "grad_norm": 4.423196315765381, "learning_rate": 2.8707865168539324e-05, "loss": 0.6653, "step": 146 }, { "epoch": 0.3303370786516854, "grad_norm": 4.137822151184082, "learning_rate": 2.890449438202247e-05, "loss": 0.3522, "step": 147 }, { "epoch": 0.3325842696629214, "grad_norm": 2.997777223587036, "learning_rate": 2.9101123595505617e-05, "loss": 0.4853, "step": 148 }, { "epoch": 0.3348314606741573, "grad_norm": 2.89650559425354, "learning_rate": 2.9297752808988762e-05, "loss": 0.4726, "step": 149 }, { "epoch": 0.33707865168539325, "grad_norm": 5.486624717712402, "learning_rate": 2.9494382022471907e-05, "loss": 0.8693, "step": 150 }, { "epoch": 0.3393258426966292, "grad_norm": 4.800889015197754, "learning_rate": 2.9691011235955052e-05, "loss": 0.8124, "step": 151 }, { "epoch": 0.3415730337078652, "grad_norm": 4.188066005706787, "learning_rate": 2.98876404494382e-05, "loss": 0.8206, "step": 152 }, { "epoch": 0.3438202247191011, "grad_norm": 4.340461254119873, "learning_rate": 3.0084269662921345e-05, "loss": 0.9406, "step": 153 }, { "epoch": 0.34606741573033706, "grad_norm": 4.658304214477539, "learning_rate": 3.028089887640449e-05, "loss": 0.7944, "step": 154 }, { "epoch": 0.34831460674157305, "grad_norm": 0.6266987919807434, "learning_rate": 3.047752808988764e-05, "loss": 0.0766, "step": 155 }, { "epoch": 0.350561797752809, "grad_norm": 4.252346515655518, "learning_rate": 3.067415730337079e-05, "loss": 0.8609, "step": 156 }, { "epoch": 0.35280898876404493, "grad_norm": 4.9649658203125, "learning_rate": 3.087078651685393e-05, "loss": 1.0533, "step": 157 }, { "epoch": 0.3550561797752809, "grad_norm": 4.485607624053955, "learning_rate": 3.106741573033708e-05, "loss": 0.8396, "step": 158 }, { "epoch": 0.35730337078651686, "grad_norm": 3.241231918334961, "learning_rate": 3.126404494382022e-05, "loss": 0.7865, "step": 159 }, { "epoch": 0.3595505617977528, "grad_norm": 6.846582889556885, "learning_rate": 3.146067415730337e-05, "loss": 2.4616, "step": 160 }, { "epoch": 0.36179775280898874, "grad_norm": 0.5514687895774841, "learning_rate": 3.165730337078651e-05, "loss": 0.0556, "step": 161 }, { "epoch": 0.36404494382022473, "grad_norm": 3.7877562046051025, "learning_rate": 3.185393258426966e-05, "loss": 0.3758, "step": 162 }, { "epoch": 0.36629213483146067, "grad_norm": 5.397939682006836, "learning_rate": 3.205056179775281e-05, "loss": 0.9312, "step": 163 }, { "epoch": 0.3685393258426966, "grad_norm": 4.301459312438965, "learning_rate": 3.2247191011235954e-05, "loss": 0.7993, "step": 164 }, { "epoch": 0.3707865168539326, "grad_norm": 4.49428129196167, "learning_rate": 3.24438202247191e-05, "loss": 0.8104, "step": 165 }, { "epoch": 0.37303370786516854, "grad_norm": 3.2210912704467773, "learning_rate": 3.2640449438202244e-05, "loss": 0.8199, "step": 166 }, { "epoch": 0.3752808988764045, "grad_norm": 5.359859466552734, "learning_rate": 3.283707865168539e-05, "loss": 1.0724, "step": 167 }, { "epoch": 0.3775280898876405, "grad_norm": 4.00059700012207, "learning_rate": 3.3033707865168534e-05, "loss": 0.3521, "step": 168 }, { "epoch": 0.3797752808988764, "grad_norm": 4.418768882751465, "learning_rate": 3.3230337078651685e-05, "loss": 0.8536, "step": 169 }, { "epoch": 0.38202247191011235, "grad_norm": 4.15454626083374, "learning_rate": 3.342696629213483e-05, "loss": 0.872, "step": 170 }, { "epoch": 0.3842696629213483, "grad_norm": 3.8060054779052734, "learning_rate": 3.3623595505617975e-05, "loss": 0.8009, "step": 171 }, { "epoch": 0.3865168539325843, "grad_norm": 3.584745407104492, "learning_rate": 3.382022471910112e-05, "loss": 0.7798, "step": 172 }, { "epoch": 0.3887640449438202, "grad_norm": 4.861410140991211, "learning_rate": 3.4016853932584265e-05, "loss": 0.5953, "step": 173 }, { "epoch": 0.39101123595505616, "grad_norm": 3.983793020248413, "learning_rate": 3.421348314606741e-05, "loss": 0.7562, "step": 174 }, { "epoch": 0.39325842696629215, "grad_norm": 4.841738224029541, "learning_rate": 3.4410112359550555e-05, "loss": 0.7227, "step": 175 }, { "epoch": 0.3955056179775281, "grad_norm": 4.787370204925537, "learning_rate": 3.460674157303371e-05, "loss": 0.8953, "step": 176 }, { "epoch": 0.39775280898876403, "grad_norm": 4.337812900543213, "learning_rate": 3.480337078651685e-05, "loss": 0.7102, "step": 177 }, { "epoch": 0.4, "grad_norm": 0.9599294662475586, "learning_rate": 3.5e-05, "loss": 0.0667, "step": 178 }, { "epoch": 0.40224719101123596, "grad_norm": 0.6864398717880249, "learning_rate": 3.4999863718440846e-05, "loss": 0.0528, "step": 179 }, { "epoch": 0.4044943820224719, "grad_norm": 4.738316059112549, "learning_rate": 3.499945487641664e-05, "loss": 0.7312, "step": 180 }, { "epoch": 0.4044943820224719, "eval_VitaminC_cosine_accuracy": 0.556640625, "eval_VitaminC_cosine_accuracy_threshold": 0.8256886005401611, "eval_VitaminC_cosine_ap": 0.5557251062538118, "eval_VitaminC_cosine_f1": 0.6666666666666667, "eval_VitaminC_cosine_f1_threshold": 0.4391498863697052, "eval_VitaminC_cosine_precision": 0.5051334702258727, "eval_VitaminC_cosine_recall": 0.9800796812749004, "eval_VitaminC_dot_accuracy": 0.556640625, "eval_VitaminC_dot_accuracy_threshold": 314.2790832519531, "eval_VitaminC_dot_ap": 0.5397120960874565, "eval_VitaminC_dot_f1": 0.6684636118598383, "eval_VitaminC_dot_f1_threshold": 144.02464294433594, "eval_VitaminC_dot_precision": 0.505091649694501, "eval_VitaminC_dot_recall": 0.9880478087649402, "eval_VitaminC_euclidean_accuracy": 0.560546875, "eval_VitaminC_euclidean_accuracy_threshold": 13.859346389770508, "eval_VitaminC_euclidean_ap": 0.5582755831276058, "eval_VitaminC_euclidean_f1": 0.667605633802817, "eval_VitaminC_euclidean_f1_threshold": 18.874879837036133, "eval_VitaminC_euclidean_precision": 0.5163398692810458, "eval_VitaminC_euclidean_recall": 0.9442231075697212, "eval_VitaminC_manhattan_accuracy": 0.560546875, "eval_VitaminC_manhattan_accuracy_threshold": 239.6153564453125, "eval_VitaminC_manhattan_ap": 0.5569115785564898, "eval_VitaminC_manhattan_f1": 0.6649006622516557, "eval_VitaminC_manhattan_f1_threshold": 501.158447265625, "eval_VitaminC_manhattan_precision": 0.498015873015873, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.560546875, "eval_VitaminC_max_accuracy_threshold": 314.2790832519531, "eval_VitaminC_max_ap": 0.5582755831276058, "eval_VitaminC_max_f1": 0.6684636118598383, "eval_VitaminC_max_f1_threshold": 501.158447265625, "eval_VitaminC_max_precision": 0.5163398692810458, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5582755831276058, "eval_sts-test_pearson_cosine": 0.8825432226222443, "eval_sts-test_pearson_dot": 0.8720125241659442, "eval_sts-test_pearson_euclidean": 0.9053801707227738, "eval_sts-test_pearson_manhattan": 0.9060044572091359, "eval_sts-test_pearson_max": 0.9060044572091359, "eval_sts-test_spearman_cosine": 0.9055030196626042, "eval_sts-test_spearman_dot": 0.8729395405548455, "eval_sts-test_spearman_euclidean": 0.9013990604854444, "eval_sts-test_spearman_manhattan": 0.9021052353902007, "eval_sts-test_spearman_max": 0.9055030196626042, "eval_vitaminc-pairs_loss": 1.5215541124343872, "eval_vitaminc-pairs_runtime": 1.8745, "eval_vitaminc-pairs_samples_per_second": 57.614, "eval_vitaminc-pairs_steps_per_second": 1.067, "step": 180 }, { "epoch": 0.4044943820224719, "eval_negation-triplets_loss": 0.9813100099563599, "eval_negation-triplets_runtime": 0.3009, "eval_negation-triplets_samples_per_second": 212.73, "eval_negation-triplets_steps_per_second": 3.324, "step": 180 }, { "epoch": 0.4044943820224719, "eval_scitail-pairs-pos_loss": 0.09161412715911865, "eval_scitail-pairs-pos_runtime": 0.3936, "eval_scitail-pairs-pos_samples_per_second": 137.188, "eval_scitail-pairs-pos_steps_per_second": 2.541, "step": 180 }, { "epoch": 0.4044943820224719, "eval_scitail-pairs-qa_loss": 0.0013133077882230282, "eval_scitail-pairs-qa_runtime": 0.5286, "eval_scitail-pairs-qa_samples_per_second": 242.147, "eval_scitail-pairs-qa_steps_per_second": 3.784, "step": 180 }, { "epoch": 0.4044943820224719, "eval_xsum-pairs_loss": 0.049595557153224945, "eval_xsum-pairs_runtime": 2.7447, "eval_xsum-pairs_samples_per_second": 46.636, "eval_xsum-pairs_steps_per_second": 0.729, "step": 180 }, { "epoch": 0.4044943820224719, "eval_sciq_pairs_loss": 0.017273178324103355, "eval_sciq_pairs_runtime": 2.8401, "eval_sciq_pairs_samples_per_second": 45.069, "eval_sciq_pairs_steps_per_second": 0.704, "step": 180 }, { "epoch": 0.4044943820224719, "eval_qasc_pairs_loss": 0.09485691040754318, "eval_qasc_pairs_runtime": 0.6594, "eval_qasc_pairs_samples_per_second": 194.113, "eval_qasc_pairs_steps_per_second": 3.033, "step": 180 }, { "epoch": 0.4044943820224719, "eval_openbookqa_pairs_loss": 0.7253161072731018, "eval_openbookqa_pairs_runtime": 0.5801, "eval_openbookqa_pairs_samples_per_second": 220.633, "eval_openbookqa_pairs_steps_per_second": 3.447, "step": 180 }, { "epoch": 0.4044943820224719, "eval_msmarco_pairs_loss": 0.17383378744125366, "eval_msmarco_pairs_runtime": 1.4824, "eval_msmarco_pairs_samples_per_second": 86.346, "eval_msmarco_pairs_steps_per_second": 1.349, "step": 180 }, { "epoch": 0.4044943820224719, "eval_nq_pairs_loss": 0.10324681550264359, "eval_nq_pairs_runtime": 2.3542, "eval_nq_pairs_samples_per_second": 54.372, "eval_nq_pairs_steps_per_second": 0.85, "step": 180 }, { "epoch": 0.4044943820224719, "eval_trivia_pairs_loss": 0.5358972549438477, "eval_trivia_pairs_runtime": 3.5881, "eval_trivia_pairs_samples_per_second": 35.673, "eval_trivia_pairs_steps_per_second": 0.557, "step": 180 }, { "epoch": 0.4044943820224719, "eval_gooaq_pairs_loss": 0.3070329427719116, "eval_gooaq_pairs_runtime": 0.9009, "eval_gooaq_pairs_samples_per_second": 142.079, "eval_gooaq_pairs_steps_per_second": 2.22, "step": 180 }, { "epoch": 0.4044943820224719, "eval_paws-pos_loss": 0.024055125191807747, "eval_paws-pos_runtime": 0.6792, "eval_paws-pos_samples_per_second": 188.469, "eval_paws-pos_steps_per_second": 2.945, "step": 180 }, { "epoch": 0.4067415730337079, "grad_norm": 5.063413143157959, "learning_rate": 3.4998773481887046e-05, "loss": 0.7809, "step": 181 }, { "epoch": 0.40898876404494383, "grad_norm": 4.108719825744629, "learning_rate": 3.499781954811798e-05, "loss": 0.8333, "step": 182 }, { "epoch": 0.41123595505617977, "grad_norm": 4.6362104415893555, "learning_rate": 3.499659309368139e-05, "loss": 0.9283, "step": 183 }, { "epoch": 0.4134831460674157, "grad_norm": 4.432968616485596, "learning_rate": 3.499509414245486e-05, "loss": 0.7011, "step": 184 }, { "epoch": 0.4157303370786517, "grad_norm": 4.040768623352051, "learning_rate": 3.4993322723621164e-05, "loss": 0.8413, "step": 185 }, { "epoch": 0.41797752808988764, "grad_norm": 5.797406196594238, "learning_rate": 3.499127887166769e-05, "loss": 1.1679, "step": 186 }, { "epoch": 0.4202247191011236, "grad_norm": 4.275143623352051, "learning_rate": 3.498896262638578e-05, "loss": 0.8701, "step": 187 }, { "epoch": 0.42247191011235957, "grad_norm": 3.920672655105591, "learning_rate": 3.498637403286993e-05, "loss": 0.8139, "step": 188 }, { "epoch": 0.4247191011235955, "grad_norm": 4.049210071563721, "learning_rate": 3.498351314151693e-05, "loss": 0.664, "step": 189 }, { "epoch": 0.42696629213483145, "grad_norm": 4.007586479187012, "learning_rate": 3.498038000802489e-05, "loss": 0.3835, "step": 190 }, { "epoch": 0.42921348314606744, "grad_norm": 3.7303507328033447, "learning_rate": 3.497697469339215e-05, "loss": 0.8516, "step": 191 }, { "epoch": 0.4314606741573034, "grad_norm": 2.96820330619812, "learning_rate": 3.497329726391606e-05, "loss": 0.5479, "step": 192 }, { "epoch": 0.4337078651685393, "grad_norm": 5.242271423339844, "learning_rate": 3.496934779119175e-05, "loss": 0.8642, "step": 193 }, { "epoch": 0.43595505617977526, "grad_norm": 2.740006685256958, "learning_rate": 3.496512635211069e-05, "loss": 0.3121, "step": 194 }, { "epoch": 0.43820224719101125, "grad_norm": 4.162242889404297, "learning_rate": 3.496063302885921e-05, "loss": 0.6932, "step": 195 }, { "epoch": 0.4404494382022472, "grad_norm": 0.632938027381897, "learning_rate": 3.495586790891689e-05, "loss": 0.0647, "step": 196 }, { "epoch": 0.44269662921348313, "grad_norm": 4.595058917999268, "learning_rate": 3.495083108505487e-05, "loss": 0.8173, "step": 197 }, { "epoch": 0.4449438202247191, "grad_norm": 3.102372646331787, "learning_rate": 3.494552265533404e-05, "loss": 0.3122, "step": 198 }, { "epoch": 0.44719101123595506, "grad_norm": 4.9895830154418945, "learning_rate": 3.493994272310313e-05, "loss": 0.7852, "step": 199 }, { "epoch": 0.449438202247191, "grad_norm": 4.032258987426758, "learning_rate": 3.493409139699669e-05, "loss": 0.811, "step": 200 }, { "epoch": 0.451685393258427, "grad_norm": 4.17324161529541, "learning_rate": 3.4927968790932973e-05, "loss": 0.7564, "step": 201 }, { "epoch": 0.45393258426966293, "grad_norm": 0.49707159399986267, "learning_rate": 3.492157502411174e-05, "loss": 0.0541, "step": 202 }, { "epoch": 0.45617977528089887, "grad_norm": 3.847059965133667, "learning_rate": 3.491491022101194e-05, "loss": 0.9085, "step": 203 }, { "epoch": 0.4584269662921348, "grad_norm": 4.565647602081299, "learning_rate": 3.4907974511389224e-05, "loss": 0.8416, "step": 204 }, { "epoch": 0.4606741573033708, "grad_norm": 0.8872150778770447, "learning_rate": 3.4900768030273515e-05, "loss": 0.0569, "step": 205 }, { "epoch": 0.46292134831460674, "grad_norm": 3.2797999382019043, "learning_rate": 3.4893290917966305e-05, "loss": 0.7998, "step": 206 }, { "epoch": 0.4651685393258427, "grad_norm": 5.683195114135742, "learning_rate": 3.4885543320037956e-05, "loss": 0.7218, "step": 207 }, { "epoch": 0.46741573033707867, "grad_norm": 5.348382949829102, "learning_rate": 3.4877525387324844e-05, "loss": 0.9292, "step": 208 }, { "epoch": 0.4696629213483146, "grad_norm": 4.3047099113464355, "learning_rate": 3.486923727592647e-05, "loss": 0.8279, "step": 209 }, { "epoch": 0.47191011235955055, "grad_norm": 4.425166130065918, "learning_rate": 3.486067914720236e-05, "loss": 0.8452, "step": 210 }, { "epoch": 0.47415730337078654, "grad_norm": 5.7947916984558105, "learning_rate": 3.485185116776896e-05, "loss": 1.1099, "step": 211 }, { "epoch": 0.4764044943820225, "grad_norm": 4.257087230682373, "learning_rate": 3.4842753509496385e-05, "loss": 0.9436, "step": 212 }, { "epoch": 0.4786516853932584, "grad_norm": 4.357375144958496, "learning_rate": 3.483338634950507e-05, "loss": 0.8389, "step": 213 }, { "epoch": 0.48089887640449436, "grad_norm": 3.666268825531006, "learning_rate": 3.482374987016233e-05, "loss": 0.3297, "step": 214 }, { "epoch": 0.48314606741573035, "grad_norm": 3.0593607425689697, "learning_rate": 3.481384425907879e-05, "loss": 0.8098, "step": 215 }, { "epoch": 0.4853932584269663, "grad_norm": 0.4539957344532013, "learning_rate": 3.480366970910476e-05, "loss": 0.0386, "step": 216 }, { "epoch": 0.48764044943820223, "grad_norm": 3.3102784156799316, "learning_rate": 3.479322641832646e-05, "loss": 0.7752, "step": 217 }, { "epoch": 0.4898876404494382, "grad_norm": 3.8798298835754395, "learning_rate": 3.4782514590062165e-05, "loss": 0.8071, "step": 218 }, { "epoch": 0.49213483146067416, "grad_norm": 6.300197124481201, "learning_rate": 3.4771534432858255e-05, "loss": 2.571, "step": 219 }, { "epoch": 0.4943820224719101, "grad_norm": 4.163381099700928, "learning_rate": 3.4760286160485145e-05, "loss": 0.5912, "step": 220 }, { "epoch": 0.4966292134831461, "grad_norm": 3.5834686756134033, "learning_rate": 3.474876999193314e-05, "loss": 0.3792, "step": 221 }, { "epoch": 0.49887640449438203, "grad_norm": 4.494593143463135, "learning_rate": 3.473698615140816e-05, "loss": 0.7456, "step": 222 }, { "epoch": 0.501123595505618, "grad_norm": 3.909142017364502, "learning_rate": 3.4724934868327366e-05, "loss": 0.7207, "step": 223 }, { "epoch": 0.503370786516854, "grad_norm": 3.0387282371520996, "learning_rate": 3.47126163773147e-05, "loss": 0.3254, "step": 224 }, { "epoch": 0.5056179775280899, "grad_norm": 0.6529088616371155, "learning_rate": 3.4700030918196344e-05, "loss": 0.0461, "step": 225 }, { "epoch": 0.5056179775280899, "eval_VitaminC_cosine_accuracy": 0.556640625, "eval_VitaminC_cosine_accuracy_threshold": 0.8303268551826477, "eval_VitaminC_cosine_ap": 0.5509523400010791, "eval_VitaminC_cosine_f1": 0.6657824933687002, "eval_VitaminC_cosine_f1_threshold": 0.2634955048561096, "eval_VitaminC_cosine_precision": 0.4990059642147117, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.55078125, "eval_VitaminC_dot_accuracy_threshold": 281.011474609375, "eval_VitaminC_dot_ap": 0.5281394234221073, "eval_VitaminC_dot_f1": 0.6711772665764546, "eval_VitaminC_dot_f1_threshold": 141.11529541015625, "eval_VitaminC_dot_precision": 0.5081967213114754, "eval_VitaminC_dot_recall": 0.9880478087649402, "eval_VitaminC_euclidean_accuracy": 0.556640625, "eval_VitaminC_euclidean_accuracy_threshold": 11.405111312866211, "eval_VitaminC_euclidean_ap": 0.5573376843815556, "eval_VitaminC_euclidean_f1": 0.6640211640211641, "eval_VitaminC_euclidean_f1_threshold": 24.63976287841797, "eval_VitaminC_euclidean_precision": 0.497029702970297, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.556640625, "eval_VitaminC_manhattan_accuracy_threshold": 349.33441162109375, "eval_VitaminC_manhattan_ap": 0.5561637270496671, "eval_VitaminC_manhattan_f1": 0.6657824933687002, "eval_VitaminC_manhattan_f1_threshold": 505.0340270996094, "eval_VitaminC_manhattan_precision": 0.4990059642147117, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.556640625, "eval_VitaminC_max_accuracy_threshold": 349.33441162109375, "eval_VitaminC_max_ap": 0.5573376843815556, "eval_VitaminC_max_f1": 0.6711772665764546, "eval_VitaminC_max_f1_threshold": 505.0340270996094, "eval_VitaminC_max_precision": 0.5081967213114754, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5573376843815556, "eval_sts-test_pearson_cosine": 0.8848200869109313, "eval_sts-test_pearson_dot": 0.8723563516714744, "eval_sts-test_pearson_euclidean": 0.9070688973489409, "eval_sts-test_pearson_manhattan": 0.9073961699007848, "eval_sts-test_pearson_max": 0.9073961699007848, "eval_sts-test_spearman_cosine": 0.9050875937031079, "eval_sts-test_spearman_dot": 0.8699468894518183, "eval_sts-test_spearman_euclidean": 0.9020747597811932, "eval_sts-test_spearman_manhattan": 0.9019608230696907, "eval_sts-test_spearman_max": 0.9050875937031079, "eval_vitaminc-pairs_loss": 1.4897230863571167, "eval_vitaminc-pairs_runtime": 1.8927, "eval_vitaminc-pairs_samples_per_second": 57.062, "eval_vitaminc-pairs_steps_per_second": 1.057, "step": 225 }, { "epoch": 0.5056179775280899, "eval_negation-triplets_loss": 0.9457363486289978, "eval_negation-triplets_runtime": 0.3019, "eval_negation-triplets_samples_per_second": 212.002, "eval_negation-triplets_steps_per_second": 3.313, "step": 225 }, { "epoch": 0.5056179775280899, "eval_scitail-pairs-pos_loss": 0.07606112211942673, "eval_scitail-pairs-pos_runtime": 0.3972, "eval_scitail-pairs-pos_samples_per_second": 135.938, "eval_scitail-pairs-pos_steps_per_second": 2.517, "step": 225 }, { "epoch": 0.5056179775280899, "eval_scitail-pairs-qa_loss": 0.001212431932799518, "eval_scitail-pairs-qa_runtime": 0.5348, "eval_scitail-pairs-qa_samples_per_second": 239.347, "eval_scitail-pairs-qa_steps_per_second": 3.74, "step": 225 }, { "epoch": 0.5056179775280899, "eval_xsum-pairs_loss": 0.02758924476802349, "eval_xsum-pairs_runtime": 2.767, "eval_xsum-pairs_samples_per_second": 46.26, "eval_xsum-pairs_steps_per_second": 0.723, "step": 225 }, { "epoch": 0.5056179775280899, "eval_sciq_pairs_loss": 0.016450434923171997, "eval_sciq_pairs_runtime": 2.8812, "eval_sciq_pairs_samples_per_second": 44.426, "eval_sciq_pairs_steps_per_second": 0.694, "step": 225 }, { "epoch": 0.5056179775280899, "eval_qasc_pairs_loss": 0.09214109182357788, "eval_qasc_pairs_runtime": 0.6597, "eval_qasc_pairs_samples_per_second": 194.029, "eval_qasc_pairs_steps_per_second": 3.032, "step": 225 }, { "epoch": 0.5056179775280899, "eval_openbookqa_pairs_loss": 0.7429620623588562, "eval_openbookqa_pairs_runtime": 0.5947, "eval_openbookqa_pairs_samples_per_second": 215.22, "eval_openbookqa_pairs_steps_per_second": 3.363, "step": 225 }, { "epoch": 0.5056179775280899, "eval_msmarco_pairs_loss": 0.17871831357479095, "eval_msmarco_pairs_runtime": 1.5003, "eval_msmarco_pairs_samples_per_second": 85.314, "eval_msmarco_pairs_steps_per_second": 1.333, "step": 225 }, { "epoch": 0.5056179775280899, "eval_nq_pairs_loss": 0.09803248196840286, "eval_nq_pairs_runtime": 2.3587, "eval_nq_pairs_samples_per_second": 54.267, "eval_nq_pairs_steps_per_second": 0.848, "step": 225 }, { "epoch": 0.5056179775280899, "eval_trivia_pairs_loss": 0.5323590636253357, "eval_trivia_pairs_runtime": 3.6206, "eval_trivia_pairs_samples_per_second": 35.354, "eval_trivia_pairs_steps_per_second": 0.552, "step": 225 }, { "epoch": 0.5056179775280899, "eval_gooaq_pairs_loss": 0.2667708098888397, "eval_gooaq_pairs_runtime": 0.9171, "eval_gooaq_pairs_samples_per_second": 139.573, "eval_gooaq_pairs_steps_per_second": 2.181, "step": 225 }, { "epoch": 0.5056179775280899, "eval_paws-pos_loss": 0.0236118845641613, "eval_paws-pos_runtime": 0.6973, "eval_paws-pos_samples_per_second": 183.563, "eval_paws-pos_steps_per_second": 2.868, "step": 225 }, { "epoch": 0.5078651685393258, "grad_norm": 3.5867371559143066, "learning_rate": 3.4687178735995997e-05, "loss": 0.347, "step": 226 }, { "epoch": 0.5101123595505618, "grad_norm": 0.37994861602783203, "learning_rate": 3.467406008093016e-05, "loss": 0.0417, "step": 227 }, { "epoch": 0.5123595505617977, "grad_norm": 4.081336975097656, "learning_rate": 3.466067520840322e-05, "loss": 0.7783, "step": 228 }, { "epoch": 0.5146067415730337, "grad_norm": 4.306976795196533, "learning_rate": 3.46470243790025e-05, "loss": 0.9027, "step": 229 }, { "epoch": 0.5168539325842697, "grad_norm": 4.0280022621154785, "learning_rate": 3.4633107858493206e-05, "loss": 0.7166, "step": 230 }, { "epoch": 0.5191011235955056, "grad_norm": 3.4807679653167725, "learning_rate": 3.461892591781319e-05, "loss": 0.705, "step": 231 }, { "epoch": 0.5213483146067416, "grad_norm": 4.166563510894775, "learning_rate": 3.4604478833067756e-05, "loss": 0.8425, "step": 232 }, { "epoch": 0.5235955056179775, "grad_norm": 3.828537940979004, "learning_rate": 3.4589766885524204e-05, "loss": 0.5362, "step": 233 }, { "epoch": 0.5258426966292135, "grad_norm": 4.316190242767334, "learning_rate": 3.4574790361606435e-05, "loss": 0.7869, "step": 234 }, { "epoch": 0.5280898876404494, "grad_norm": 4.244805335998535, "learning_rate": 3.4559549552889285e-05, "loss": 0.88, "step": 235 }, { "epoch": 0.5303370786516854, "grad_norm": 4.208700656890869, "learning_rate": 3.454404475609294e-05, "loss": 0.8077, "step": 236 }, { "epoch": 0.5325842696629214, "grad_norm": 3.1473183631896973, "learning_rate": 3.4528276273077094e-05, "loss": 0.8145, "step": 237 }, { "epoch": 0.5348314606741573, "grad_norm": 3.798297166824341, "learning_rate": 3.4512244410835094e-05, "loss": 0.78, "step": 238 }, { "epoch": 0.5370786516853933, "grad_norm": 0.535529375076294, "learning_rate": 3.449594948148796e-05, "loss": 0.0536, "step": 239 }, { "epoch": 0.5393258426966292, "grad_norm": 3.2119970321655273, "learning_rate": 3.447939180227833e-05, "loss": 0.7975, "step": 240 }, { "epoch": 0.5415730337078651, "grad_norm": 4.725860118865967, "learning_rate": 3.446257169556425e-05, "loss": 0.8932, "step": 241 }, { "epoch": 0.5438202247191011, "grad_norm": 3.867676258087158, "learning_rate": 3.4445489488812906e-05, "loss": 0.3386, "step": 242 }, { "epoch": 0.5460674157303371, "grad_norm": 3.981114387512207, "learning_rate": 3.4428145514594274e-05, "loss": 0.7741, "step": 243 }, { "epoch": 0.5483146067415731, "grad_norm": 4.034990310668945, "learning_rate": 3.4410540110574616e-05, "loss": 0.7439, "step": 244 }, { "epoch": 0.550561797752809, "grad_norm": 4.209812641143799, "learning_rate": 3.4392673619509916e-05, "loss": 0.7999, "step": 245 }, { "epoch": 0.5528089887640449, "grad_norm": 3.942631244659424, "learning_rate": 3.437454638923921e-05, "loss": 0.8542, "step": 246 }, { "epoch": 0.5550561797752809, "grad_norm": 4.087955951690674, "learning_rate": 3.435615877267783e-05, "loss": 0.6992, "step": 247 }, { "epoch": 0.5573033707865168, "grad_norm": 3.885822057723999, "learning_rate": 3.4337511127810466e-05, "loss": 0.8579, "step": 248 }, { "epoch": 0.5595505617977528, "grad_norm": 5.198770523071289, "learning_rate": 3.431860381768431e-05, "loss": 1.0221, "step": 249 }, { "epoch": 0.5617977528089888, "grad_norm": 4.321418285369873, "learning_rate": 3.4299437210401866e-05, "loss": 0.699, "step": 250 }, { "epoch": 0.5640449438202247, "grad_norm": 3.1992154121398926, "learning_rate": 3.4280011679113884e-05, "loss": 0.8523, "step": 251 }, { "epoch": 0.5662921348314607, "grad_norm": 4.94226598739624, "learning_rate": 3.4260327602012027e-05, "loss": 1.0307, "step": 252 }, { "epoch": 0.5685393258426966, "grad_norm": 3.958935499191284, "learning_rate": 3.424038536232154e-05, "loss": 0.846, "step": 253 }, { "epoch": 0.5707865168539326, "grad_norm": 4.023487091064453, "learning_rate": 3.4220185348293775e-05, "loss": 0.8361, "step": 254 }, { "epoch": 0.5730337078651685, "grad_norm": 3.275102138519287, "learning_rate": 3.4199727953198665e-05, "loss": 0.8224, "step": 255 }, { "epoch": 0.5752808988764045, "grad_norm": 3.6130261421203613, "learning_rate": 3.417901357531701e-05, "loss": 0.5301, "step": 256 }, { "epoch": 0.5775280898876405, "grad_norm": 4.571770668029785, "learning_rate": 3.415804261793277e-05, "loss": 0.3795, "step": 257 }, { "epoch": 0.5797752808988764, "grad_norm": 3.1884663105010986, "learning_rate": 3.413681548932521e-05, "loss": 0.5434, "step": 258 }, { "epoch": 0.5820224719101124, "grad_norm": 4.795211315155029, "learning_rate": 3.411533260276091e-05, "loss": 0.847, "step": 259 }, { "epoch": 0.5842696629213483, "grad_norm": 4.761318206787109, "learning_rate": 3.409359437648579e-05, "loss": 0.7323, "step": 260 }, { "epoch": 0.5865168539325842, "grad_norm": 4.4683098793029785, "learning_rate": 3.407160123371687e-05, "loss": 0.6606, "step": 261 }, { "epoch": 0.5887640449438202, "grad_norm": 0.7677178382873535, "learning_rate": 3.404935360263415e-05, "loss": 0.0543, "step": 262 }, { "epoch": 0.5910112359550562, "grad_norm": 4.110381126403809, "learning_rate": 3.4026851916372166e-05, "loss": 0.6709, "step": 263 }, { "epoch": 0.5932584269662922, "grad_norm": 4.766375541687012, "learning_rate": 3.400409661301162e-05, "loss": 0.809, "step": 264 }, { "epoch": 0.5955056179775281, "grad_norm": 5.389264106750488, "learning_rate": 3.398108813557082e-05, "loss": 1.0391, "step": 265 }, { "epoch": 0.597752808988764, "grad_norm": 3.8780810832977295, "learning_rate": 3.3957826931997094e-05, "loss": 0.7396, "step": 266 }, { "epoch": 0.6, "grad_norm": 4.399974822998047, "learning_rate": 3.393431345515801e-05, "loss": 0.7839, "step": 267 }, { "epoch": 0.6022471910112359, "grad_norm": 3.2098612785339355, "learning_rate": 3.391054816283262e-05, "loss": 0.3054, "step": 268 }, { "epoch": 0.604494382022472, "grad_norm": 3.606182098388672, "learning_rate": 3.3886531517702505e-05, "loss": 0.5258, "step": 269 }, { "epoch": 0.6067415730337079, "grad_norm": 4.3564934730529785, "learning_rate": 3.3862263987342784e-05, "loss": 0.7367, "step": 270 }, { "epoch": 0.6067415730337079, "eval_VitaminC_cosine_accuracy": 0.552734375, "eval_VitaminC_cosine_accuracy_threshold": 0.814909815788269, "eval_VitaminC_cosine_ap": 0.5506214433093293, "eval_VitaminC_cosine_f1": 0.664886515353805, "eval_VitaminC_cosine_f1_threshold": 0.3506072461605072, "eval_VitaminC_cosine_precision": 0.5, "eval_VitaminC_cosine_recall": 0.9920318725099602, "eval_VitaminC_dot_accuracy": 0.55078125, "eval_VitaminC_dot_accuracy_threshold": 316.90899658203125, "eval_VitaminC_dot_ap": 0.5353657977329522, "eval_VitaminC_dot_f1": 0.6666666666666667, "eval_VitaminC_dot_f1_threshold": 155.67796325683594, "eval_VitaminC_dot_precision": 0.506198347107438, "eval_VitaminC_dot_recall": 0.9760956175298805, "eval_VitaminC_euclidean_accuracy": 0.55078125, "eval_VitaminC_euclidean_accuracy_threshold": 10.77621841430664, "eval_VitaminC_euclidean_ap": 0.550546292530568, "eval_VitaminC_euclidean_f1": 0.6666666666666666, "eval_VitaminC_euclidean_f1_threshold": 24.22284698486328, "eval_VitaminC_euclidean_precision": 0.5, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.5546875, "eval_VitaminC_manhattan_accuracy_threshold": 335.6986389160156, "eval_VitaminC_manhattan_ap": 0.5497325043939846, "eval_VitaminC_manhattan_f1": 0.6640211640211641, "eval_VitaminC_manhattan_f1_threshold": 513.494873046875, "eval_VitaminC_manhattan_precision": 0.497029702970297, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.5546875, "eval_VitaminC_max_accuracy_threshold": 335.6986389160156, "eval_VitaminC_max_ap": 0.5506214433093293, "eval_VitaminC_max_f1": 0.6666666666666667, "eval_VitaminC_max_f1_threshold": 513.494873046875, "eval_VitaminC_max_precision": 0.506198347107438, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5506214433093293, "eval_sts-test_pearson_cosine": 0.8848372816940555, "eval_sts-test_pearson_dot": 0.8774995772730847, "eval_sts-test_pearson_euclidean": 0.9058906663416005, "eval_sts-test_pearson_manhattan": 0.9066316554236529, "eval_sts-test_pearson_max": 0.9066316554236529, "eval_sts-test_spearman_cosine": 0.9085018016884417, "eval_sts-test_spearman_dot": 0.8776881864036095, "eval_sts-test_spearman_euclidean": 0.903223569412372, "eval_sts-test_spearman_manhattan": 0.9037578547221237, "eval_sts-test_spearman_max": 0.9085018016884417, "eval_vitaminc-pairs_loss": 1.4935871362686157, "eval_vitaminc-pairs_runtime": 1.8963, "eval_vitaminc-pairs_samples_per_second": 56.952, "eval_vitaminc-pairs_steps_per_second": 1.055, "step": 270 }, { "epoch": 0.6067415730337079, "eval_negation-triplets_loss": 0.9505463242530823, "eval_negation-triplets_runtime": 0.3041, "eval_negation-triplets_samples_per_second": 210.485, "eval_negation-triplets_steps_per_second": 3.289, "step": 270 }, { "epoch": 0.6067415730337079, "eval_scitail-pairs-pos_loss": 0.09635873883962631, "eval_scitail-pairs-pos_runtime": 0.4048, "eval_scitail-pairs-pos_samples_per_second": 133.396, "eval_scitail-pairs-pos_steps_per_second": 2.47, "step": 270 }, { "epoch": 0.6067415730337079, "eval_scitail-pairs-qa_loss": 0.0009468490607105196, "eval_scitail-pairs-qa_runtime": 0.5341, "eval_scitail-pairs-qa_samples_per_second": 239.65, "eval_scitail-pairs-qa_steps_per_second": 3.745, "step": 270 }, { "epoch": 0.6067415730337079, "eval_xsum-pairs_loss": 0.026903513818979263, "eval_xsum-pairs_runtime": 2.7518, "eval_xsum-pairs_samples_per_second": 46.514, "eval_xsum-pairs_steps_per_second": 0.727, "step": 270 }, { "epoch": 0.6067415730337079, "eval_sciq_pairs_loss": 0.01619444414973259, "eval_sciq_pairs_runtime": 2.8856, "eval_sciq_pairs_samples_per_second": 44.358, "eval_sciq_pairs_steps_per_second": 0.693, "step": 270 }, { "epoch": 0.6067415730337079, "eval_qasc_pairs_loss": 0.09130185097455978, "eval_qasc_pairs_runtime": 0.6645, "eval_qasc_pairs_samples_per_second": 192.631, "eval_qasc_pairs_steps_per_second": 3.01, "step": 270 }, { "epoch": 0.6067415730337079, "eval_openbookqa_pairs_loss": 0.7336423397064209, "eval_openbookqa_pairs_runtime": 0.5935, "eval_openbookqa_pairs_samples_per_second": 215.687, "eval_openbookqa_pairs_steps_per_second": 3.37, "step": 270 }, { "epoch": 0.6067415730337079, "eval_msmarco_pairs_loss": 0.15868164598941803, "eval_msmarco_pairs_runtime": 1.5086, "eval_msmarco_pairs_samples_per_second": 84.844, "eval_msmarco_pairs_steps_per_second": 1.326, "step": 270 }, { "epoch": 0.6067415730337079, "eval_nq_pairs_loss": 0.10780799388885498, "eval_nq_pairs_runtime": 2.3746, "eval_nq_pairs_samples_per_second": 53.905, "eval_nq_pairs_steps_per_second": 0.842, "step": 270 }, { "epoch": 0.6067415730337079, "eval_trivia_pairs_loss": 0.49691149592399597, "eval_trivia_pairs_runtime": 3.5992, "eval_trivia_pairs_samples_per_second": 35.563, "eval_trivia_pairs_steps_per_second": 0.556, "step": 270 }, { "epoch": 0.6067415730337079, "eval_gooaq_pairs_loss": 0.3025541603565216, "eval_gooaq_pairs_runtime": 0.9181, "eval_gooaq_pairs_samples_per_second": 139.423, "eval_gooaq_pairs_steps_per_second": 2.178, "step": 270 }, { "epoch": 0.6067415730337079, "eval_paws-pos_loss": 0.024440350010991096, "eval_paws-pos_runtime": 0.7046, "eval_paws-pos_samples_per_second": 181.67, "eval_paws-pos_steps_per_second": 2.839, "step": 270 }, { "epoch": 0.6089887640449438, "grad_norm": 3.21183705329895, "learning_rate": 3.383774604421301e-05, "loss": 0.747, "step": 271 }, { "epoch": 0.6112359550561798, "grad_norm": 4.403411865234375, "learning_rate": 3.3812978165647975e-05, "loss": 0.7855, "step": 272 }, { "epoch": 0.6134831460674157, "grad_norm": 0.46612274646759033, "learning_rate": 3.3787960833848405e-05, "loss": 0.0473, "step": 273 }, { "epoch": 0.6157303370786517, "grad_norm": 3.30610990524292, "learning_rate": 3.3762694535871584e-05, "loss": 0.4378, "step": 274 }, { "epoch": 0.6179775280898876, "grad_norm": 3.7408640384674072, "learning_rate": 3.373717976362187e-05, "loss": 0.8767, "step": 275 }, { "epoch": 0.6202247191011236, "grad_norm": 5.345012187957764, "learning_rate": 3.3711417013841105e-05, "loss": 1.0345, "step": 276 }, { "epoch": 0.6224719101123596, "grad_norm": 3.518765449523926, "learning_rate": 3.368540678809897e-05, "loss": 0.5182, "step": 277 }, { "epoch": 0.6247191011235955, "grad_norm": 6.666887283325195, "learning_rate": 3.3659149592783186e-05, "loss": 2.5949, "step": 278 }, { "epoch": 0.6269662921348315, "grad_norm": 3.197411298751831, "learning_rate": 3.363264593908969e-05, "loss": 0.833, "step": 279 }, { "epoch": 0.6292134831460674, "grad_norm": 0.6012090444564819, "learning_rate": 3.360589634301267e-05, "loss": 0.0778, "step": 280 }, { "epoch": 0.6314606741573033, "grad_norm": 4.5016188621521, "learning_rate": 3.357890132533449e-05, "loss": 0.8048, "step": 281 }, { "epoch": 0.6337078651685393, "grad_norm": 3.865889072418213, "learning_rate": 3.35516614116156e-05, "loss": 0.7524, "step": 282 }, { "epoch": 0.6359550561797753, "grad_norm": 3.2998361587524414, "learning_rate": 3.3524177132184266e-05, "loss": 0.3246, "step": 283 }, { "epoch": 0.6382022471910113, "grad_norm": 0.6418587565422058, "learning_rate": 3.349644902212628e-05, "loss": 0.0728, "step": 284 }, { "epoch": 0.6404494382022472, "grad_norm": 5.772351264953613, "learning_rate": 3.34684776212745e-05, "loss": 2.3619, "step": 285 }, { "epoch": 0.6426966292134831, "grad_norm": 3.769488573074341, "learning_rate": 3.3440263474198376e-05, "loss": 0.7464, "step": 286 }, { "epoch": 0.6449438202247191, "grad_norm": 4.559601783752441, "learning_rate": 3.3411807130193325e-05, "loss": 0.6691, "step": 287 }, { "epoch": 0.647191011235955, "grad_norm": 0.45337462425231934, "learning_rate": 3.338310914327005e-05, "loss": 0.059, "step": 288 }, { "epoch": 0.6494382022471911, "grad_norm": 4.7184553146362305, "learning_rate": 3.3354170072143766e-05, "loss": 0.7841, "step": 289 }, { "epoch": 0.651685393258427, "grad_norm": 3.886216640472412, "learning_rate": 3.332499048022328e-05, "loss": 0.647, "step": 290 }, { "epoch": 0.6539325842696629, "grad_norm": 4.497567176818848, "learning_rate": 3.329557093560006e-05, "loss": 0.8814, "step": 291 }, { "epoch": 0.6561797752808989, "grad_norm": 3.995391368865967, "learning_rate": 3.326591201103716e-05, "loss": 0.7247, "step": 292 }, { "epoch": 0.6584269662921348, "grad_norm": 0.4348815083503723, "learning_rate": 3.323601428395809e-05, "loss": 0.059, "step": 293 }, { "epoch": 0.6606741573033708, "grad_norm": 3.6197896003723145, "learning_rate": 3.320587833643554e-05, "loss": 0.8317, "step": 294 }, { "epoch": 0.6629213483146067, "grad_norm": 4.4088215827941895, "learning_rate": 3.317550475518006e-05, "loss": 0.8548, "step": 295 }, { "epoch": 0.6651685393258427, "grad_norm": 4.541014194488525, "learning_rate": 3.314489413152867e-05, "loss": 0.9213, "step": 296 }, { "epoch": 0.6674157303370787, "grad_norm": 3.067857265472412, "learning_rate": 3.311404706143329e-05, "loss": 0.6923, "step": 297 }, { "epoch": 0.6696629213483146, "grad_norm": 4.037753582000732, "learning_rate": 3.3082964145449174e-05, "loss": 0.7777, "step": 298 }, { "epoch": 0.6719101123595506, "grad_norm": 4.280182838439941, "learning_rate": 3.305164598872322e-05, "loss": 0.7496, "step": 299 }, { "epoch": 0.6741573033707865, "grad_norm": 4.357325077056885, "learning_rate": 3.302009320098218e-05, "loss": 0.7636, "step": 300 }, { "epoch": 0.6764044943820224, "grad_norm": 4.007940292358398, "learning_rate": 3.2988306396520775e-05, "loss": 0.6867, "step": 301 }, { "epoch": 0.6786516853932584, "grad_norm": 0.8544747233390808, "learning_rate": 3.295628619418977e-05, "loss": 0.0506, "step": 302 }, { "epoch": 0.6808988764044944, "grad_norm": 3.34498929977417, "learning_rate": 3.292403321738387e-05, "loss": 0.3346, "step": 303 }, { "epoch": 0.6831460674157304, "grad_norm": 2.441420316696167, "learning_rate": 3.289154809402967e-05, "loss": 0.2485, "step": 304 }, { "epoch": 0.6853932584269663, "grad_norm": 4.533839702606201, "learning_rate": 3.285883145657334e-05, "loss": 0.8508, "step": 305 }, { "epoch": 0.6876404494382022, "grad_norm": 3.2033944129943848, "learning_rate": 3.2825883941968346e-05, "loss": 0.8464, "step": 306 }, { "epoch": 0.6898876404494382, "grad_norm": 3.6305220127105713, "learning_rate": 3.279270619166309e-05, "loss": 0.3385, "step": 307 }, { "epoch": 0.6921348314606741, "grad_norm": 4.438405990600586, "learning_rate": 3.2759298851588336e-05, "loss": 0.8837, "step": 308 }, { "epoch": 0.6943820224719102, "grad_norm": 4.252586841583252, "learning_rate": 3.272566257214474e-05, "loss": 0.9019, "step": 309 }, { "epoch": 0.6966292134831461, "grad_norm": 4.231752872467041, "learning_rate": 3.2691798008190096e-05, "loss": 0.6922, "step": 310 }, { "epoch": 0.698876404494382, "grad_norm": 3.862682342529297, "learning_rate": 3.265770581902662e-05, "loss": 0.6348, "step": 311 }, { "epoch": 0.701123595505618, "grad_norm": 3.783026933670044, "learning_rate": 3.262338666838813e-05, "loss": 0.7522, "step": 312 }, { "epoch": 0.7033707865168539, "grad_norm": 4.141933917999268, "learning_rate": 3.25888412244271e-05, "loss": 0.7843, "step": 313 }, { "epoch": 0.7056179775280899, "grad_norm": 0.7638006210327148, "learning_rate": 3.2554070159701684e-05, "loss": 0.0493, "step": 314 }, { "epoch": 0.7078651685393258, "grad_norm": 3.7285079956054688, "learning_rate": 3.2519074151162564e-05, "loss": 0.357, "step": 315 }, { "epoch": 0.7078651685393258, "eval_VitaminC_cosine_accuracy": 0.556640625, "eval_VitaminC_cosine_accuracy_threshold": 0.8318675756454468, "eval_VitaminC_cosine_ap": 0.553255462027648, "eval_VitaminC_cosine_f1": 0.6666666666666666, "eval_VitaminC_cosine_f1_threshold": 0.3080925941467285, "eval_VitaminC_cosine_precision": 0.5, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.5546875, "eval_VitaminC_dot_accuracy_threshold": 284.4936218261719, "eval_VitaminC_dot_ap": 0.5335304755231123, "eval_VitaminC_dot_f1": 0.6675531914893617, "eval_VitaminC_dot_f1_threshold": 117.11366271972656, "eval_VitaminC_dot_precision": 0.500998003992016, "eval_VitaminC_dot_recall": 1.0, "eval_VitaminC_euclidean_accuracy": 0.556640625, "eval_VitaminC_euclidean_accuracy_threshold": 14.916669845581055, "eval_VitaminC_euclidean_ap": 0.5560392780320775, "eval_VitaminC_euclidean_f1": 0.6657824933687002, "eval_VitaminC_euclidean_f1_threshold": 23.758323669433594, "eval_VitaminC_euclidean_precision": 0.4990059642147117, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.5546875, "eval_VitaminC_manhattan_accuracy_threshold": 303.769775390625, "eval_VitaminC_manhattan_ap": 0.5575735035337728, "eval_VitaminC_manhattan_f1": 0.6666666666666666, "eval_VitaminC_manhattan_f1_threshold": 500.6726989746094, "eval_VitaminC_manhattan_precision": 0.5, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.556640625, "eval_VitaminC_max_accuracy_threshold": 303.769775390625, "eval_VitaminC_max_ap": 0.5575735035337728, "eval_VitaminC_max_f1": 0.6675531914893617, "eval_VitaminC_max_f1_threshold": 500.6726989746094, "eval_VitaminC_max_precision": 0.500998003992016, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5575735035337728, "eval_sts-test_pearson_cosine": 0.884017793393225, "eval_sts-test_pearson_dot": 0.8725802033594147, "eval_sts-test_pearson_euclidean": 0.9065592531799239, "eval_sts-test_pearson_manhattan": 0.9070236641674441, "eval_sts-test_pearson_max": 0.9070236641674441, "eval_sts-test_spearman_cosine": 0.9067846957888538, "eval_sts-test_spearman_dot": 0.8716365180769119, "eval_sts-test_spearman_euclidean": 0.9026938039800204, "eval_sts-test_spearman_manhattan": 0.903306941012344, "eval_sts-test_spearman_max": 0.9067846957888538, "eval_vitaminc-pairs_loss": 1.4885247945785522, "eval_vitaminc-pairs_runtime": 1.9137, "eval_vitaminc-pairs_samples_per_second": 56.436, "eval_vitaminc-pairs_steps_per_second": 1.045, "step": 315 }, { "epoch": 0.7078651685393258, "eval_negation-triplets_loss": 0.9597576856613159, "eval_negation-triplets_runtime": 0.3023, "eval_negation-triplets_samples_per_second": 211.742, "eval_negation-triplets_steps_per_second": 3.308, "step": 315 }, { "epoch": 0.7078651685393258, "eval_scitail-pairs-pos_loss": 0.09951130300760269, "eval_scitail-pairs-pos_runtime": 0.3896, "eval_scitail-pairs-pos_samples_per_second": 138.608, "eval_scitail-pairs-pos_steps_per_second": 2.567, "step": 315 }, { "epoch": 0.7078651685393258, "eval_scitail-pairs-qa_loss": 0.0010157548822462559, "eval_scitail-pairs-qa_runtime": 0.5373, "eval_scitail-pairs-qa_samples_per_second": 238.245, "eval_scitail-pairs-qa_steps_per_second": 3.723, "step": 315 }, { "epoch": 0.7078651685393258, "eval_xsum-pairs_loss": 0.027823584154248238, "eval_xsum-pairs_runtime": 2.7408, "eval_xsum-pairs_samples_per_second": 46.701, "eval_xsum-pairs_steps_per_second": 0.73, "step": 315 }, { "epoch": 0.7078651685393258, "eval_sciq_pairs_loss": 0.015241424553096294, "eval_sciq_pairs_runtime": 2.8458, "eval_sciq_pairs_samples_per_second": 44.978, "eval_sciq_pairs_steps_per_second": 0.703, "step": 315 }, { "epoch": 0.7078651685393258, "eval_qasc_pairs_loss": 0.09173130989074707, "eval_qasc_pairs_runtime": 0.6608, "eval_qasc_pairs_samples_per_second": 193.694, "eval_qasc_pairs_steps_per_second": 3.026, "step": 315 }, { "epoch": 0.7078651685393258, "eval_openbookqa_pairs_loss": 0.6921954154968262, "eval_openbookqa_pairs_runtime": 0.5893, "eval_openbookqa_pairs_samples_per_second": 217.196, "eval_openbookqa_pairs_steps_per_second": 3.394, "step": 315 }, { "epoch": 0.7078651685393258, "eval_msmarco_pairs_loss": 0.15177518129348755, "eval_msmarco_pairs_runtime": 1.494, "eval_msmarco_pairs_samples_per_second": 85.673, "eval_msmarco_pairs_steps_per_second": 1.339, "step": 315 }, { "epoch": 0.7078651685393258, "eval_nq_pairs_loss": 0.10136909037828445, "eval_nq_pairs_runtime": 2.3524, "eval_nq_pairs_samples_per_second": 54.413, "eval_nq_pairs_steps_per_second": 0.85, "step": 315 }, { "epoch": 0.7078651685393258, "eval_trivia_pairs_loss": 0.5301617980003357, "eval_trivia_pairs_runtime": 3.5809, "eval_trivia_pairs_samples_per_second": 35.745, "eval_trivia_pairs_steps_per_second": 0.559, "step": 315 }, { "epoch": 0.7078651685393258, "eval_gooaq_pairs_loss": 0.28424739837646484, "eval_gooaq_pairs_runtime": 0.9167, "eval_gooaq_pairs_samples_per_second": 139.635, "eval_gooaq_pairs_steps_per_second": 2.182, "step": 315 }, { "epoch": 0.7078651685393258, "eval_paws-pos_loss": 0.023981213569641113, "eval_paws-pos_runtime": 0.6966, "eval_paws-pos_samples_per_second": 183.744, "eval_paws-pos_steps_per_second": 2.871, "step": 315 }, { "epoch": 0.7101123595505618, "grad_norm": 3.6374969482421875, "learning_rate": 3.248385388013984e-05, "loss": 0.841, "step": 316 }, { "epoch": 0.7123595505617978, "grad_norm": 4.251607418060303, "learning_rate": 3.2448410032329716e-05, "loss": 0.5849, "step": 317 }, { "epoch": 0.7146067415730337, "grad_norm": 4.323038101196289, "learning_rate": 3.241274329778117e-05, "loss": 0.6818, "step": 318 }, { "epoch": 0.7168539325842697, "grad_norm": 4.027289867401123, "learning_rate": 3.237685437088251e-05, "loss": 0.8269, "step": 319 }, { "epoch": 0.7191011235955056, "grad_norm": 3.014479875564575, "learning_rate": 3.234074395034787e-05, "loss": 0.6979, "step": 320 }, { "epoch": 0.7213483146067415, "grad_norm": 3.5980277061462402, "learning_rate": 3.2304412739203595e-05, "loss": 0.3218, "step": 321 }, { "epoch": 0.7235955056179775, "grad_norm": 3.2924134731292725, "learning_rate": 3.226786144477456e-05, "loss": 0.8206, "step": 322 }, { "epoch": 0.7258426966292135, "grad_norm": 2.524231195449829, "learning_rate": 3.2231090778670385e-05, "loss": 0.2106, "step": 323 }, { "epoch": 0.7280898876404495, "grad_norm": 5.464061260223389, "learning_rate": 3.2194101456771604e-05, "loss": 1.0524, "step": 324 }, { "epoch": 0.7303370786516854, "grad_norm": 3.4692578315734863, "learning_rate": 3.215689419921572e-05, "loss": 0.3774, "step": 325 }, { "epoch": 0.7325842696629213, "grad_norm": 4.947183132171631, "learning_rate": 3.211946973038315e-05, "loss": 0.9098, "step": 326 }, { "epoch": 0.7348314606741573, "grad_norm": 4.432866096496582, "learning_rate": 3.208182877888319e-05, "loss": 0.7988, "step": 327 }, { "epoch": 0.7370786516853932, "grad_norm": 4.585951328277588, "learning_rate": 3.204397207753978e-05, "loss": 0.7916, "step": 328 }, { "epoch": 0.7393258426966293, "grad_norm": 3.7288637161254883, "learning_rate": 3.200590036337724e-05, "loss": 0.6314, "step": 329 }, { "epoch": 0.7415730337078652, "grad_norm": 3.840074300765991, "learning_rate": 3.196761437760593e-05, "loss": 0.8628, "step": 330 }, { "epoch": 0.7438202247191011, "grad_norm": 0.6423048377037048, "learning_rate": 3.192911486560784e-05, "loss": 0.0688, "step": 331 }, { "epoch": 0.7460674157303371, "grad_norm": 4.148509502410889, "learning_rate": 3.1890402576922036e-05, "loss": 0.7386, "step": 332 }, { "epoch": 0.748314606741573, "grad_norm": 4.7345147132873535, "learning_rate": 3.1851478265230103e-05, "loss": 0.8458, "step": 333 }, { "epoch": 0.750561797752809, "grad_norm": 0.695708155632019, "learning_rate": 3.181234268834144e-05, "loss": 0.0442, "step": 334 }, { "epoch": 0.7528089887640449, "grad_norm": 3.434741735458374, "learning_rate": 3.177299660817856e-05, "loss": 0.317, "step": 335 }, { "epoch": 0.755056179775281, "grad_norm": 3.306964874267578, "learning_rate": 3.1733440790762176e-05, "loss": 0.8087, "step": 336 }, { "epoch": 0.7573033707865169, "grad_norm": 3.010828733444214, "learning_rate": 3.169367600619637e-05, "loss": 0.3398, "step": 337 }, { "epoch": 0.7595505617977528, "grad_norm": 4.152151584625244, "learning_rate": 3.1653703028653545e-05, "loss": 0.699, "step": 338 }, { "epoch": 0.7617977528089888, "grad_norm": 4.073326110839844, "learning_rate": 3.161352263635937e-05, "loss": 0.7901, "step": 339 }, { "epoch": 0.7640449438202247, "grad_norm": 4.365633487701416, "learning_rate": 3.157313561157764e-05, "loss": 0.8072, "step": 340 }, { "epoch": 0.7662921348314606, "grad_norm": 3.506556272506714, "learning_rate": 3.153254274059501e-05, "loss": 0.5939, "step": 341 }, { "epoch": 0.7685393258426966, "grad_norm": 4.319092273712158, "learning_rate": 3.149174481370575e-05, "loss": 0.6933, "step": 342 }, { "epoch": 0.7707865168539326, "grad_norm": 0.6184964179992676, "learning_rate": 3.145074262519629e-05, "loss": 0.0437, "step": 343 }, { "epoch": 0.7730337078651686, "grad_norm": 4.866581916809082, "learning_rate": 3.140953697332979e-05, "loss": 0.9882, "step": 344 }, { "epoch": 0.7752808988764045, "grad_norm": 3.9585559368133545, "learning_rate": 3.136812866033063e-05, "loss": 0.3707, "step": 345 }, { "epoch": 0.7775280898876404, "grad_norm": 4.253391265869141, "learning_rate": 3.132651849236871e-05, "loss": 0.7103, "step": 346 }, { "epoch": 0.7797752808988764, "grad_norm": 0.5847011208534241, "learning_rate": 3.128470727954383e-05, "loss": 0.0372, "step": 347 }, { "epoch": 0.7820224719101123, "grad_norm": 0.5127836465835571, "learning_rate": 3.124269583586989e-05, "loss": 0.028, "step": 348 }, { "epoch": 0.7842696629213484, "grad_norm": 4.145182132720947, "learning_rate": 3.120048497925904e-05, "loss": 0.7676, "step": 349 }, { "epoch": 0.7865168539325843, "grad_norm": 4.833105087280273, "learning_rate": 3.1158075531505755e-05, "loss": 0.6754, "step": 350 }, { "epoch": 0.7887640449438202, "grad_norm": 0.49345946311950684, "learning_rate": 3.1115468318270844e-05, "loss": 0.0439, "step": 351 }, { "epoch": 0.7910112359550562, "grad_norm": 3.357720375061035, "learning_rate": 3.107266416906538e-05, "loss": 0.8039, "step": 352 }, { "epoch": 0.7932584269662921, "grad_norm": 0.2371903359889984, "learning_rate": 3.1029663917234514e-05, "loss": 0.0104, "step": 353 }, { "epoch": 0.7955056179775281, "grad_norm": 0.48881796002388, "learning_rate": 3.098646839994132e-05, "loss": 0.0555, "step": 354 }, { "epoch": 0.797752808988764, "grad_norm": 3.3021090030670166, "learning_rate": 3.094307845815042e-05, "loss": 0.8646, "step": 355 }, { "epoch": 0.8, "grad_norm": 3.0412533283233643, "learning_rate": 3.0899494936611663e-05, "loss": 0.7781, "step": 356 }, { "epoch": 0.802247191011236, "grad_norm": 0.30917835235595703, "learning_rate": 3.085571868384366e-05, "loss": 0.011, "step": 357 }, { "epoch": 0.8044943820224719, "grad_norm": 3.6957950592041016, "learning_rate": 3.081175055211726e-05, "loss": 0.3267, "step": 358 }, { "epoch": 0.8067415730337079, "grad_norm": 7.202300071716309, "learning_rate": 3.0767591397438974e-05, "loss": 2.5281, "step": 359 }, { "epoch": 0.8089887640449438, "grad_norm": 2.9833834171295166, "learning_rate": 3.072324207953429e-05, "loss": 0.301, "step": 360 }, { "epoch": 0.8089887640449438, "eval_VitaminC_cosine_accuracy": 0.55859375, "eval_VitaminC_cosine_accuracy_threshold": 0.6793336868286133, "eval_VitaminC_cosine_ap": 0.5555632752592039, "eval_VitaminC_cosine_f1": 0.6657824933687002, "eval_VitaminC_cosine_f1_threshold": 0.28029173612594604, "eval_VitaminC_cosine_precision": 0.4990059642147117, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.55078125, "eval_VitaminC_dot_accuracy_threshold": 265.5102844238281, "eval_VitaminC_dot_ap": 0.5326105108889087, "eval_VitaminC_dot_f1": 0.6675531914893617, "eval_VitaminC_dot_f1_threshold": 106.37774658203125, "eval_VitaminC_dot_precision": 0.500998003992016, "eval_VitaminC_dot_recall": 1.0, "eval_VitaminC_euclidean_accuracy": 0.55859375, "eval_VitaminC_euclidean_accuracy_threshold": 15.296594619750977, "eval_VitaminC_euclidean_ap": 0.5592294311948881, "eval_VitaminC_euclidean_f1": 0.6657824933687002, "eval_VitaminC_euclidean_f1_threshold": 23.58568572998047, "eval_VitaminC_euclidean_precision": 0.4990059642147117, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.556640625, "eval_VitaminC_manhattan_accuracy_threshold": 306.79913330078125, "eval_VitaminC_manhattan_ap": 0.5598941655081213, "eval_VitaminC_manhattan_f1": 0.6649006622516557, "eval_VitaminC_manhattan_f1_threshold": 512.0101318359375, "eval_VitaminC_manhattan_precision": 0.498015873015873, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.55859375, "eval_VitaminC_max_accuracy_threshold": 306.79913330078125, "eval_VitaminC_max_ap": 0.5598941655081213, "eval_VitaminC_max_f1": 0.6675531914893617, "eval_VitaminC_max_f1_threshold": 512.0101318359375, "eval_VitaminC_max_precision": 0.500998003992016, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5598941655081213, "eval_sts-test_pearson_cosine": 0.8832151520369376, "eval_sts-test_pearson_dot": 0.8763916954110884, "eval_sts-test_pearson_euclidean": 0.9046869354209082, "eval_sts-test_pearson_manhattan": 0.9047119917370259, "eval_sts-test_pearson_max": 0.9047119917370259, "eval_sts-test_spearman_cosine": 0.9054341922225841, "eval_sts-test_spearman_dot": 0.8786041104705073, "eval_sts-test_spearman_euclidean": 0.9002407635868509, "eval_sts-test_spearman_manhattan": 0.9006719867416183, "eval_sts-test_spearman_max": 0.9054341922225841, "eval_vitaminc-pairs_loss": 1.4290639162063599, "eval_vitaminc-pairs_runtime": 1.8905, "eval_vitaminc-pairs_samples_per_second": 57.128, "eval_vitaminc-pairs_steps_per_second": 1.058, "step": 360 }, { "epoch": 0.8089887640449438, "eval_negation-triplets_loss": 0.9030703902244568, "eval_negation-triplets_runtime": 0.2986, "eval_negation-triplets_samples_per_second": 214.299, "eval_negation-triplets_steps_per_second": 3.348, "step": 360 }, { "epoch": 0.8089887640449438, "eval_scitail-pairs-pos_loss": 0.10728535801172256, "eval_scitail-pairs-pos_runtime": 0.3831, "eval_scitail-pairs-pos_samples_per_second": 140.965, "eval_scitail-pairs-pos_steps_per_second": 2.61, "step": 360 }, { "epoch": 0.8089887640449438, "eval_scitail-pairs-qa_loss": 0.0005650219391100109, "eval_scitail-pairs-qa_runtime": 0.5259, "eval_scitail-pairs-qa_samples_per_second": 243.397, "eval_scitail-pairs-qa_steps_per_second": 3.803, "step": 360 }, { "epoch": 0.8089887640449438, "eval_xsum-pairs_loss": 0.025990577414631844, "eval_xsum-pairs_runtime": 2.734, "eval_xsum-pairs_samples_per_second": 46.818, "eval_xsum-pairs_steps_per_second": 0.732, "step": 360 }, { "epoch": 0.8089887640449438, "eval_sciq_pairs_loss": 0.016017427667975426, "eval_sciq_pairs_runtime": 2.8252, "eval_sciq_pairs_samples_per_second": 45.307, "eval_sciq_pairs_steps_per_second": 0.708, "step": 360 }, { "epoch": 0.8089887640449438, "eval_qasc_pairs_loss": 0.10250324755907059, "eval_qasc_pairs_runtime": 0.6511, "eval_qasc_pairs_samples_per_second": 196.585, "eval_qasc_pairs_steps_per_second": 3.072, "step": 360 }, { "epoch": 0.8089887640449438, "eval_openbookqa_pairs_loss": 0.6710968613624573, "eval_openbookqa_pairs_runtime": 0.5776, "eval_openbookqa_pairs_samples_per_second": 221.625, "eval_openbookqa_pairs_steps_per_second": 3.463, "step": 360 }, { "epoch": 0.8089887640449438, "eval_msmarco_pairs_loss": 0.14522777497768402, "eval_msmarco_pairs_runtime": 1.4981, "eval_msmarco_pairs_samples_per_second": 85.441, "eval_msmarco_pairs_steps_per_second": 1.335, "step": 360 }, { "epoch": 0.8089887640449438, "eval_nq_pairs_loss": 0.10225611180067062, "eval_nq_pairs_runtime": 2.3595, "eval_nq_pairs_samples_per_second": 54.248, "eval_nq_pairs_steps_per_second": 0.848, "step": 360 }, { "epoch": 0.8089887640449438, "eval_trivia_pairs_loss": 0.5312957167625427, "eval_trivia_pairs_runtime": 3.5813, "eval_trivia_pairs_samples_per_second": 35.741, "eval_trivia_pairs_steps_per_second": 0.558, "step": 360 }, { "epoch": 0.8089887640449438, "eval_gooaq_pairs_loss": 0.27713337540626526, "eval_gooaq_pairs_runtime": 0.9166, "eval_gooaq_pairs_samples_per_second": 139.645, "eval_gooaq_pairs_steps_per_second": 2.182, "step": 360 }, { "epoch": 0.8089887640449438, "eval_paws-pos_loss": 0.024326296523213387, "eval_paws-pos_runtime": 0.6893, "eval_paws-pos_samples_per_second": 185.682, "eval_paws-pos_steps_per_second": 2.901, "step": 360 }, { "epoch": 0.8112359550561797, "grad_norm": 4.372533798217773, "learning_rate": 3.067870346183096e-05, "loss": 0.7533, "step": 361 }, { "epoch": 0.8134831460674158, "grad_norm": 2.6585452556610107, "learning_rate": 3.063397641144216e-05, "loss": 0.2958, "step": 362 }, { "epoch": 0.8157303370786517, "grad_norm": 4.378647327423096, "learning_rate": 3.058906179914962e-05, "loss": 0.8296, "step": 363 }, { "epoch": 0.8179775280898877, "grad_norm": 3.1601309776306152, "learning_rate": 3.0543960499386694e-05, "loss": 0.3191, "step": 364 }, { "epoch": 0.8202247191011236, "grad_norm": 3.446498394012451, "learning_rate": 3.049867339022129e-05, "loss": 0.7866, "step": 365 }, { "epoch": 0.8224719101123595, "grad_norm": 3.0058486461639404, "learning_rate": 3.0453201353338826e-05, "loss": 0.3157, "step": 366 }, { "epoch": 0.8247191011235955, "grad_norm": 4.380611419677734, "learning_rate": 3.040754527402502e-05, "loss": 0.7402, "step": 367 }, { "epoch": 0.8269662921348314, "grad_norm": 3.8081209659576416, "learning_rate": 3.036170604114869e-05, "loss": 0.4957, "step": 368 }, { "epoch": 0.8292134831460675, "grad_norm": 4.2056989669799805, "learning_rate": 3.031568454714442e-05, "loss": 0.8505, "step": 369 }, { "epoch": 0.8314606741573034, "grad_norm": 3.101804733276367, "learning_rate": 3.0269481687995207e-05, "loss": 0.7702, "step": 370 }, { "epoch": 0.8337078651685393, "grad_norm": 4.0704345703125, "learning_rate": 3.0223098363215002e-05, "loss": 0.7591, "step": 371 }, { "epoch": 0.8359550561797753, "grad_norm": 2.9631364345550537, "learning_rate": 3.0176535475831208e-05, "loss": 0.727, "step": 372 }, { "epoch": 0.8382022471910112, "grad_norm": 3.3760929107666016, "learning_rate": 3.01297939323671e-05, "loss": 0.3233, "step": 373 }, { "epoch": 0.8404494382022472, "grad_norm": 4.116260051727295, "learning_rate": 3.0082874642824164e-05, "loss": 0.8738, "step": 374 }, { "epoch": 0.8426966292134831, "grad_norm": 0.40298929810523987, "learning_rate": 3.0035778520664388e-05, "loss": 0.0393, "step": 375 }, { "epoch": 0.8449438202247191, "grad_norm": 3.0647614002227783, "learning_rate": 2.9988506482792485e-05, "loss": 0.7454, "step": 376 }, { "epoch": 0.8471910112359551, "grad_norm": 2.951953649520874, "learning_rate": 2.994105944953803e-05, "loss": 0.8297, "step": 377 }, { "epoch": 0.849438202247191, "grad_norm": 4.049951553344727, "learning_rate": 2.9893438344637538e-05, "loss": 0.7802, "step": 378 }, { "epoch": 0.851685393258427, "grad_norm": 3.7383949756622314, "learning_rate": 2.984564409521651e-05, "loss": 0.6229, "step": 379 }, { "epoch": 0.8539325842696629, "grad_norm": 0.0, "learning_rate": 2.979767763177134e-05, "loss": 0.0, "step": 380 }, { "epoch": 0.8561797752808988, "grad_norm": 3.399641513824463, "learning_rate": 2.9749539888151244e-05, "loss": 0.3506, "step": 381 }, { "epoch": 0.8584269662921349, "grad_norm": 0.48723292350769043, "learning_rate": 2.9701231801540032e-05, "loss": 0.041, "step": 382 }, { "epoch": 0.8606741573033708, "grad_norm": 3.1171765327453613, "learning_rate": 2.9652754312437897e-05, "loss": 0.725, "step": 383 }, { "epoch": 0.8629213483146068, "grad_norm": 2.6491808891296387, "learning_rate": 2.9604108364643112e-05, "loss": 0.257, "step": 384 }, { "epoch": 0.8651685393258427, "grad_norm": 4.025605201721191, "learning_rate": 2.9555294905233606e-05, "loss": 0.7912, "step": 385 }, { "epoch": 0.8674157303370786, "grad_norm": 4.142299652099609, "learning_rate": 2.9506314884548583e-05, "loss": 0.8915, "step": 386 }, { "epoch": 0.8696629213483146, "grad_norm": 2.943582534790039, "learning_rate": 2.945716925616998e-05, "loss": 0.779, "step": 387 }, { "epoch": 0.8719101123595505, "grad_norm": 4.478114604949951, "learning_rate": 2.9407858976903913e-05, "loss": 0.7828, "step": 388 }, { "epoch": 0.8741573033707866, "grad_norm": 3.9878995418548584, "learning_rate": 2.935838500676207e-05, "loss": 0.7462, "step": 389 }, { "epoch": 0.8764044943820225, "grad_norm": 3.7733311653137207, "learning_rate": 2.9308748308942983e-05, "loss": 0.7913, "step": 390 }, { "epoch": 0.8786516853932584, "grad_norm": 3.179732322692871, "learning_rate": 2.9258949849813315e-05, "loss": 0.3209, "step": 391 }, { "epoch": 0.8808988764044944, "grad_norm": 3.6665351390838623, "learning_rate": 2.9208990598889008e-05, "loss": 0.5932, "step": 392 }, { "epoch": 0.8831460674157303, "grad_norm": 0.545093834400177, "learning_rate": 2.9158871528816442e-05, "loss": 0.0613, "step": 393 }, { "epoch": 0.8853932584269663, "grad_norm": 5.226474285125732, "learning_rate": 2.9108593615353467e-05, "loss": 0.8802, "step": 394 }, { "epoch": 0.8876404494382022, "grad_norm": 3.691817283630371, "learning_rate": 2.9058157837350437e-05, "loss": 0.6116, "step": 395 }, { "epoch": 0.8898876404494382, "grad_norm": 0.4754512906074524, "learning_rate": 2.900756517673113e-05, "loss": 0.0537, "step": 396 }, { "epoch": 0.8921348314606742, "grad_norm": 2.874117374420166, "learning_rate": 2.8956816618473647e-05, "loss": 0.3006, "step": 397 }, { "epoch": 0.8943820224719101, "grad_norm": 3.8957912921905518, "learning_rate": 2.890591315059121e-05, "loss": 0.7636, "step": 398 }, { "epoch": 0.8966292134831461, "grad_norm": 3.7385432720184326, "learning_rate": 2.8854855764112973e-05, "loss": 0.612, "step": 399 }, { "epoch": 0.898876404494382, "grad_norm": 3.7403082847595215, "learning_rate": 2.880364545306468e-05, "loss": 0.54, "step": 400 }, { "epoch": 0.9011235955056179, "grad_norm": 2.7360849380493164, "learning_rate": 2.8752283214449328e-05, "loss": 0.2761, "step": 401 }, { "epoch": 0.903370786516854, "grad_norm": 8.988025665283203, "learning_rate": 2.8700770048227775e-05, "loss": 1.2668, "step": 402 }, { "epoch": 0.9056179775280899, "grad_norm": 3.411295175552368, "learning_rate": 2.864910695729925e-05, "loss": 0.8066, "step": 403 }, { "epoch": 0.9078651685393259, "grad_norm": 0.3018481135368347, "learning_rate": 2.8597294947481834e-05, "loss": 0.0094, "step": 404 }, { "epoch": 0.9101123595505618, "grad_norm": 4.116438388824463, "learning_rate": 2.8545335027492885e-05, "loss": 0.673, "step": 405 }, { "epoch": 0.9101123595505618, "eval_VitaminC_cosine_accuracy": 0.55859375, "eval_VitaminC_cosine_accuracy_threshold": 0.7188639044761658, "eval_VitaminC_cosine_ap": 0.5516905675485202, "eval_VitaminC_cosine_f1": 0.6675712347354138, "eval_VitaminC_cosine_f1_threshold": 0.42514583468437195, "eval_VitaminC_cosine_precision": 0.5061728395061729, "eval_VitaminC_cosine_recall": 0.9800796812749004, "eval_VitaminC_dot_accuracy": 0.548828125, "eval_VitaminC_dot_accuracy_threshold": 320.3775329589844, "eval_VitaminC_dot_ap": 0.5343066680873013, "eval_VitaminC_dot_f1": 0.6720867208672087, "eval_VitaminC_dot_f1_threshold": 152.709716796875, "eval_VitaminC_dot_precision": 0.5092402464065708, "eval_VitaminC_dot_recall": 0.9880478087649402, "eval_VitaminC_euclidean_accuracy": 0.556640625, "eval_VitaminC_euclidean_accuracy_threshold": 15.12228775024414, "eval_VitaminC_euclidean_ap": 0.5542894540784595, "eval_VitaminC_euclidean_f1": 0.6640211640211641, "eval_VitaminC_euclidean_f1_threshold": 24.3716983795166, "eval_VitaminC_euclidean_precision": 0.497029702970297, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.55859375, "eval_VitaminC_manhattan_accuracy_threshold": 305.93597412109375, "eval_VitaminC_manhattan_ap": 0.5533328154567183, "eval_VitaminC_manhattan_f1": 0.6649006622516557, "eval_VitaminC_manhattan_f1_threshold": 509.4247741699219, "eval_VitaminC_manhattan_precision": 0.498015873015873, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.55859375, "eval_VitaminC_max_accuracy_threshold": 320.3775329589844, "eval_VitaminC_max_ap": 0.5542894540784595, "eval_VitaminC_max_f1": 0.6720867208672087, "eval_VitaminC_max_f1_threshold": 509.4247741699219, "eval_VitaminC_max_precision": 0.5092402464065708, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5542894540784595, "eval_sts-test_pearson_cosine": 0.8820726638294588, "eval_sts-test_pearson_dot": 0.8723940521896922, "eval_sts-test_pearson_euclidean": 0.9038814103150634, "eval_sts-test_pearson_manhattan": 0.904449390563823, "eval_sts-test_pearson_max": 0.904449390563823, "eval_sts-test_spearman_cosine": 0.9051641183600871, "eval_sts-test_spearman_dot": 0.8721959088443044, "eval_sts-test_spearman_euclidean": 0.8999642007914521, "eval_sts-test_spearman_manhattan": 0.9005904051921018, "eval_sts-test_spearman_max": 0.9051641183600871, "eval_vitaminc-pairs_loss": 1.48486328125, "eval_vitaminc-pairs_runtime": 1.8874, "eval_vitaminc-pairs_samples_per_second": 57.222, "eval_vitaminc-pairs_steps_per_second": 1.06, "step": 405 }, { "epoch": 0.9101123595505618, "eval_negation-triplets_loss": 0.9023827314376831, "eval_negation-triplets_runtime": 0.302, "eval_negation-triplets_samples_per_second": 211.927, "eval_negation-triplets_steps_per_second": 3.311, "step": 405 }, { "epoch": 0.9101123595505618, "eval_scitail-pairs-pos_loss": 0.10495099425315857, "eval_scitail-pairs-pos_runtime": 0.3856, "eval_scitail-pairs-pos_samples_per_second": 140.031, "eval_scitail-pairs-pos_steps_per_second": 2.593, "step": 405 }, { "epoch": 0.9101123595505618, "eval_scitail-pairs-qa_loss": 0.0008332311408594251, "eval_scitail-pairs-qa_runtime": 0.5224, "eval_scitail-pairs-qa_samples_per_second": 245.005, "eval_scitail-pairs-qa_steps_per_second": 3.828, "step": 405 }, { "epoch": 0.9101123595505618, "eval_xsum-pairs_loss": 0.028531953692436218, "eval_xsum-pairs_runtime": 2.7425, "eval_xsum-pairs_samples_per_second": 46.672, "eval_xsum-pairs_steps_per_second": 0.729, "step": 405 }, { "epoch": 0.9101123595505618, "eval_sciq_pairs_loss": 0.015175853855907917, "eval_sciq_pairs_runtime": 2.8294, "eval_sciq_pairs_samples_per_second": 45.239, "eval_sciq_pairs_steps_per_second": 0.707, "step": 405 }, { "epoch": 0.9101123595505618, "eval_qasc_pairs_loss": 0.09416583180427551, "eval_qasc_pairs_runtime": 0.6538, "eval_qasc_pairs_samples_per_second": 195.781, "eval_qasc_pairs_steps_per_second": 3.059, "step": 405 }, { "epoch": 0.9101123595505618, "eval_openbookqa_pairs_loss": 0.715216875076294, "eval_openbookqa_pairs_runtime": 0.578, "eval_openbookqa_pairs_samples_per_second": 221.449, "eval_openbookqa_pairs_steps_per_second": 3.46, "step": 405 }, { "epoch": 0.9101123595505618, "eval_msmarco_pairs_loss": 0.1417744755744934, "eval_msmarco_pairs_runtime": 1.4882, "eval_msmarco_pairs_samples_per_second": 86.012, "eval_msmarco_pairs_steps_per_second": 1.344, "step": 405 }, { "epoch": 0.9101123595505618, "eval_nq_pairs_loss": 0.10870223492383957, "eval_nq_pairs_runtime": 2.3451, "eval_nq_pairs_samples_per_second": 54.583, "eval_nq_pairs_steps_per_second": 0.853, "step": 405 }, { "epoch": 0.9101123595505618, "eval_trivia_pairs_loss": 0.49194595217704773, "eval_trivia_pairs_runtime": 3.5796, "eval_trivia_pairs_samples_per_second": 35.759, "eval_trivia_pairs_steps_per_second": 0.559, "step": 405 }, { "epoch": 0.9101123595505618, "eval_gooaq_pairs_loss": 0.2616226375102997, "eval_gooaq_pairs_runtime": 0.9137, "eval_gooaq_pairs_samples_per_second": 140.093, "eval_gooaq_pairs_steps_per_second": 2.189, "step": 405 }, { "epoch": 0.9101123595505618, "eval_paws-pos_loss": 0.02422034554183483, "eval_paws-pos_runtime": 0.6895, "eval_paws-pos_samples_per_second": 185.641, "eval_paws-pos_steps_per_second": 2.901, "step": 405 }, { "epoch": 0.9123595505617977, "grad_norm": 3.427104949951172, "learning_rate": 2.8493228208929387e-05, "loss": 0.5189, "step": 406 }, { "epoch": 0.9146067415730337, "grad_norm": 4.941195487976074, "learning_rate": 2.8440975506248268e-05, "loss": 0.649, "step": 407 }, { "epoch": 0.9168539325842696, "grad_norm": 2.7992403507232666, "learning_rate": 2.8388577936746633e-05, "loss": 0.2982, "step": 408 }, { "epoch": 0.9191011235955057, "grad_norm": 3.8877484798431396, "learning_rate": 2.833603652054199e-05, "loss": 0.7511, "step": 409 }, { "epoch": 0.9213483146067416, "grad_norm": 3.2458090782165527, "learning_rate": 2.8283352280552348e-05, "loss": 0.5164, "step": 410 }, { "epoch": 0.9235955056179775, "grad_norm": 3.7385945320129395, "learning_rate": 2.8230526242476332e-05, "loss": 0.5924, "step": 411 }, { "epoch": 0.9258426966292135, "grad_norm": 4.369627952575684, "learning_rate": 2.8177559434773203e-05, "loss": 0.8191, "step": 412 }, { "epoch": 0.9280898876404494, "grad_norm": 2.95206356048584, "learning_rate": 2.8124452888642838e-05, "loss": 0.2311, "step": 413 }, { "epoch": 0.9303370786516854, "grad_norm": 3.984375238418579, "learning_rate": 2.8071207638005662e-05, "loss": 0.7421, "step": 414 }, { "epoch": 0.9325842696629213, "grad_norm": 3.0188541412353516, "learning_rate": 2.801782471948248e-05, "loss": 0.2936, "step": 415 }, { "epoch": 0.9348314606741573, "grad_norm": 4.104308605194092, "learning_rate": 2.7964305172374362e-05, "loss": 0.737, "step": 416 }, { "epoch": 0.9370786516853933, "grad_norm": 3.686523675918579, "learning_rate": 2.791065003864235e-05, "loss": 0.6539, "step": 417 }, { "epoch": 0.9393258426966292, "grad_norm": 3.839590311050415, "learning_rate": 2.785686036288719e-05, "loss": 0.6855, "step": 418 }, { "epoch": 0.9415730337078652, "grad_norm": 4.174718856811523, "learning_rate": 2.780293719232902e-05, "loss": 0.8134, "step": 419 }, { "epoch": 0.9438202247191011, "grad_norm": 4.046380043029785, "learning_rate": 2.7748881576786946e-05, "loss": 0.6885, "step": 420 }, { "epoch": 0.946067415730337, "grad_norm": 3.4202940464019775, "learning_rate": 2.7694694568658613e-05, "loss": 0.5581, "step": 421 }, { "epoch": 0.9483146067415731, "grad_norm": 3.787081718444824, "learning_rate": 2.764037722289973e-05, "loss": 0.8029, "step": 422 }, { "epoch": 0.950561797752809, "grad_norm": 3.870718240737915, "learning_rate": 2.7585930597003524e-05, "loss": 0.8126, "step": 423 }, { "epoch": 0.952808988764045, "grad_norm": 3.1959424018859863, "learning_rate": 2.753135575098015e-05, "loss": 0.8425, "step": 424 }, { "epoch": 0.9550561797752809, "grad_norm": 0.4186573922634125, "learning_rate": 2.7476653747336047e-05, "loss": 0.049, "step": 425 }, { "epoch": 0.9573033707865168, "grad_norm": 4.299917697906494, "learning_rate": 2.7421825651053265e-05, "loss": 0.7849, "step": 426 }, { "epoch": 0.9595505617977528, "grad_norm": 2.6435227394104004, "learning_rate": 2.736687252956873e-05, "loss": 0.068, "step": 427 }, { "epoch": 0.9617977528089887, "grad_norm": 2.717653274536133, "learning_rate": 2.7311795452753443e-05, "loss": 0.2925, "step": 428 }, { "epoch": 0.9640449438202248, "grad_norm": 3.6929807662963867, "learning_rate": 2.7256595492891683e-05, "loss": 0.777, "step": 429 }, { "epoch": 0.9662921348314607, "grad_norm": 2.8760790824890137, "learning_rate": 2.720127372466011e-05, "loss": 0.7397, "step": 430 }, { "epoch": 0.9685393258426966, "grad_norm": 0.03685740381479263, "learning_rate": 2.714583122510683e-05, "loss": 0.0007, "step": 431 }, { "epoch": 0.9707865168539326, "grad_norm": 4.058692455291748, "learning_rate": 2.709026907363047e-05, "loss": 0.8535, "step": 432 }, { "epoch": 0.9730337078651685, "grad_norm": 4.2914276123046875, "learning_rate": 2.703458835195911e-05, "loss": 0.7026, "step": 433 }, { "epoch": 0.9752808988764045, "grad_norm": 3.735518217086792, "learning_rate": 2.6978790144129262e-05, "loss": 0.7557, "step": 434 }, { "epoch": 0.9775280898876404, "grad_norm": 4.058504104614258, "learning_rate": 2.6922875536464747e-05, "loss": 0.7225, "step": 435 }, { "epoch": 0.9797752808988764, "grad_norm": 0.0, "learning_rate": 2.6866845617555555e-05, "loss": 0.0, "step": 436 }, { "epoch": 0.9820224719101124, "grad_norm": 5.648872375488281, "learning_rate": 2.6810701478236642e-05, "loss": 0.4131, "step": 437 }, { "epoch": 0.9842696629213483, "grad_norm": 2.7032744884490967, "learning_rate": 2.6754444211566702e-05, "loss": 0.2824, "step": 438 }, { "epoch": 0.9865168539325843, "grad_norm": 3.150801420211792, "learning_rate": 2.6698074912806882e-05, "loss": 0.3144, "step": 439 }, { "epoch": 0.9887640449438202, "grad_norm": 2.3572490215301514, "learning_rate": 2.6641594679399448e-05, "loss": 0.0509, "step": 440 }, { "epoch": 0.9910112359550561, "grad_norm": 3.2544448375701904, "learning_rate": 2.6585004610946452e-05, "loss": 0.7645, "step": 441 }, { "epoch": 0.9932584269662922, "grad_norm": 4.310440540313721, "learning_rate": 2.6528305809188273e-05, "loss": 0.2787, "step": 442 }, { "epoch": 0.9955056179775281, "grad_norm": 3.863487482070923, "learning_rate": 2.6471499377982225e-05, "loss": 0.64, "step": 443 }, { "epoch": 0.9977528089887641, "grad_norm": 6.1020612716674805, "learning_rate": 2.6414586423281017e-05, "loss": 0.4045, "step": 444 }, { "epoch": 1.0, "grad_norm": 3.1245224475860596, "learning_rate": 2.6357568053111255e-05, "loss": 0.7661, "step": 445 }, { "epoch": 1.002247191011236, "grad_norm": 3.7888576984405518, "learning_rate": 2.6300445377551847e-05, "loss": 0.7335, "step": 446 }, { "epoch": 1.0044943820224719, "grad_norm": 3.935758590698242, "learning_rate": 2.62432195087124e-05, "loss": 0.7835, "step": 447 }, { "epoch": 1.006741573033708, "grad_norm": 3.7737417221069336, "learning_rate": 2.6185891560711587e-05, "loss": 0.7674, "step": 448 }, { "epoch": 1.0089887640449438, "grad_norm": 0.457439124584198, "learning_rate": 2.612846264965542e-05, "loss": 0.0489, "step": 449 }, { "epoch": 1.0112359550561798, "grad_norm": 3.515545606613159, "learning_rate": 2.607093389361555e-05, "loss": 0.3104, "step": 450 }, { "epoch": 1.0112359550561798, "eval_VitaminC_cosine_accuracy": 0.5625, "eval_VitaminC_cosine_accuracy_threshold": 0.7603898048400879, "eval_VitaminC_cosine_ap": 0.5525005100698708, "eval_VitaminC_cosine_f1": 0.6685006877579092, "eval_VitaminC_cosine_f1_threshold": 0.4857867360115051, "eval_VitaminC_cosine_precision": 0.5105042016806722, "eval_VitaminC_cosine_recall": 0.9681274900398407, "eval_VitaminC_dot_accuracy": 0.548828125, "eval_VitaminC_dot_accuracy_threshold": 325.483154296875, "eval_VitaminC_dot_ap": 0.5344057014880635, "eval_VitaminC_dot_f1": 0.6675749318801091, "eval_VitaminC_dot_f1_threshold": 159.2823028564453, "eval_VitaminC_dot_precision": 0.5072463768115942, "eval_VitaminC_dot_recall": 0.9760956175298805, "eval_VitaminC_euclidean_accuracy": 0.55859375, "eval_VitaminC_euclidean_accuracy_threshold": 15.577638626098633, "eval_VitaminC_euclidean_ap": 0.5540831040718627, "eval_VitaminC_euclidean_f1": 0.6666666666666667, "eval_VitaminC_euclidean_f1_threshold": 21.39883804321289, "eval_VitaminC_euclidean_precision": 0.5030425963488844, "eval_VitaminC_euclidean_recall": 0.9880478087649402, "eval_VitaminC_manhattan_accuracy": 0.552734375, "eval_VitaminC_manhattan_accuracy_threshold": 241.35984802246094, "eval_VitaminC_manhattan_ap": 0.5536965508228381, "eval_VitaminC_manhattan_f1": 0.6657754010695187, "eval_VitaminC_manhattan_f1_threshold": 463.26080322265625, "eval_VitaminC_manhattan_precision": 0.5010060362173038, "eval_VitaminC_manhattan_recall": 0.9920318725099602, "eval_VitaminC_max_accuracy": 0.5625, "eval_VitaminC_max_accuracy_threshold": 325.483154296875, "eval_VitaminC_max_ap": 0.5540831040718627, "eval_VitaminC_max_f1": 0.6685006877579092, "eval_VitaminC_max_f1_threshold": 463.26080322265625, "eval_VitaminC_max_precision": 0.5105042016806722, "eval_VitaminC_max_recall": 0.9920318725099602, "eval_sequential_score": 0.5540831040718627, "eval_sts-test_pearson_cosine": 0.8812335915964673, "eval_sts-test_pearson_dot": 0.8749461926810898, "eval_sts-test_pearson_euclidean": 0.9037170548962163, "eval_sts-test_pearson_manhattan": 0.9046002457312785, "eval_sts-test_pearson_max": 0.9046002457312785, "eval_sts-test_spearman_cosine": 0.9043227946459288, "eval_sts-test_spearman_dot": 0.8763633253101171, "eval_sts-test_spearman_euclidean": 0.8995340964182194, "eval_sts-test_spearman_manhattan": 0.9002530254324721, "eval_sts-test_spearman_max": 0.9043227946459288, "eval_vitaminc-pairs_loss": 1.4284634590148926, "eval_vitaminc-pairs_runtime": 1.9114, "eval_vitaminc-pairs_samples_per_second": 56.503, "eval_vitaminc-pairs_steps_per_second": 1.046, "step": 450 }, { "epoch": 1.0112359550561798, "eval_negation-triplets_loss": 0.8765377402305603, "eval_negation-triplets_runtime": 0.3186, "eval_negation-triplets_samples_per_second": 200.858, "eval_negation-triplets_steps_per_second": 3.138, "step": 450 }, { "epoch": 1.0112359550561798, "eval_scitail-pairs-pos_loss": 0.06986676901578903, "eval_scitail-pairs-pos_runtime": 0.4519, "eval_scitail-pairs-pos_samples_per_second": 119.485, "eval_scitail-pairs-pos_steps_per_second": 2.213, "step": 450 }, { "epoch": 1.0112359550561798, "eval_scitail-pairs-qa_loss": 0.0008211968233808875, "eval_scitail-pairs-qa_runtime": 0.5761, "eval_scitail-pairs-qa_samples_per_second": 222.193, "eval_scitail-pairs-qa_steps_per_second": 3.472, "step": 450 }, { "epoch": 1.0112359550561798, "eval_xsum-pairs_loss": 0.028749318793416023, "eval_xsum-pairs_runtime": 2.7556, "eval_xsum-pairs_samples_per_second": 46.45, "eval_xsum-pairs_steps_per_second": 0.726, "step": 450 }, { "epoch": 1.0112359550561798, "eval_sciq_pairs_loss": 0.01784924976527691, "eval_sciq_pairs_runtime": 2.8996, "eval_sciq_pairs_samples_per_second": 44.143, "eval_sciq_pairs_steps_per_second": 0.69, "step": 450 }, { "epoch": 1.0112359550561798, "eval_qasc_pairs_loss": 0.09589868038892746, "eval_qasc_pairs_runtime": 0.6801, "eval_qasc_pairs_samples_per_second": 188.207, "eval_qasc_pairs_steps_per_second": 2.941, "step": 450 }, { "epoch": 1.0112359550561798, "eval_openbookqa_pairs_loss": 0.7216827273368835, "eval_openbookqa_pairs_runtime": 0.5958, "eval_openbookqa_pairs_samples_per_second": 214.846, "eval_openbookqa_pairs_steps_per_second": 3.357, "step": 450 }, { "epoch": 1.0112359550561798, "eval_msmarco_pairs_loss": 0.15124906599521637, "eval_msmarco_pairs_runtime": 1.5017, "eval_msmarco_pairs_samples_per_second": 85.239, "eval_msmarco_pairs_steps_per_second": 1.332, "step": 450 }, { "epoch": 1.0112359550561798, "eval_nq_pairs_loss": 0.10319234430789948, "eval_nq_pairs_runtime": 2.3696, "eval_nq_pairs_samples_per_second": 54.018, "eval_nq_pairs_steps_per_second": 0.844, "step": 450 }, { "epoch": 1.0112359550561798, "eval_trivia_pairs_loss": 0.48776012659072876, "eval_trivia_pairs_runtime": 3.5941, "eval_trivia_pairs_samples_per_second": 35.614, "eval_trivia_pairs_steps_per_second": 0.556, "step": 450 }, { "epoch": 1.0112359550561798, "eval_gooaq_pairs_loss": 0.26884058117866516, "eval_gooaq_pairs_runtime": 0.9231, "eval_gooaq_pairs_samples_per_second": 138.665, "eval_gooaq_pairs_steps_per_second": 2.167, "step": 450 }, { "epoch": 1.0112359550561798, "eval_paws-pos_loss": 0.02506968565285206, "eval_paws-pos_runtime": 0.7124, "eval_paws-pos_samples_per_second": 179.665, "eval_paws-pos_steps_per_second": 2.807, "step": 450 }, { "epoch": 1.0134831460674156, "grad_norm": 3.3714258670806885, "learning_rate": 2.6013306412607486e-05, "loss": 0.2977, "step": 451 }, { "epoch": 1.0157303370786517, "grad_norm": 3.0152552127838135, "learning_rate": 2.59555813285688e-05, "loss": 0.7256, "step": 452 }, { "epoch": 1.0179775280898877, "grad_norm": 0.36391642689704895, "learning_rate": 2.589775976533726e-05, "loss": 0.0327, "step": 453 }, { "epoch": 1.0202247191011236, "grad_norm": 2.959923505783081, "learning_rate": 2.5839842848628985e-05, "loss": 0.7372, "step": 454 }, { "epoch": 1.0224719101123596, "grad_norm": 0.8520237803459167, "learning_rate": 2.5781831706016506e-05, "loss": 0.0518, "step": 455 }, { "epoch": 1.0247191011235954, "grad_norm": 3.161862850189209, "learning_rate": 2.5723727466906813e-05, "loss": 0.7668, "step": 456 }, { "epoch": 1.0269662921348315, "grad_norm": 4.314478397369385, "learning_rate": 2.5665531262519385e-05, "loss": 0.6634, "step": 457 }, { "epoch": 1.0292134831460673, "grad_norm": 4.253237247467041, "learning_rate": 2.5607244225864135e-05, "loss": 0.6022, "step": 458 }, { "epoch": 1.0314606741573034, "grad_norm": 3.9452831745147705, "learning_rate": 2.5548867491719395e-05, "loss": 0.7255, "step": 459 }, { "epoch": 1.0337078651685394, "grad_norm": 2.962646007537842, "learning_rate": 2.549040219660978e-05, "loss": 0.2823, "step": 460 }, { "epoch": 1.0359550561797752, "grad_norm": 2.687352418899536, "learning_rate": 2.543184947878408e-05, "loss": 0.2614, "step": 461 }, { "epoch": 1.0382022471910113, "grad_norm": 2.9925472736358643, "learning_rate": 2.5373210478193118e-05, "loss": 0.5231, "step": 462 }, { "epoch": 1.0404494382022471, "grad_norm": 0.694948136806488, "learning_rate": 2.5314486336467516e-05, "loss": 0.0424, "step": 463 }, { "epoch": 1.0426966292134832, "grad_norm": 5.543944835662842, "learning_rate": 2.525567819689551e-05, "loss": 0.9838, "step": 464 }, { "epoch": 1.0449438202247192, "grad_norm": 6.339949607849121, "learning_rate": 2.5196787204400655e-05, "loss": 2.4683, "step": 465 }, { "epoch": 1.047191011235955, "grad_norm": 0.48354002833366394, "learning_rate": 2.5137814505519576e-05, "loss": 0.0497, "step": 466 }, { "epoch": 1.049438202247191, "grad_norm": 0.5280358791351318, "learning_rate": 2.5078761248379596e-05, "loss": 0.0766, "step": 467 }, { "epoch": 1.051685393258427, "grad_norm": 3.0178725719451904, "learning_rate": 2.5019628582676428e-05, "loss": 0.7333, "step": 468 }, { "epoch": 1.053932584269663, "grad_norm": 3.958301305770874, "learning_rate": 2.4960417659651765e-05, "loss": 0.7881, "step": 469 }, { "epoch": 1.0561797752808988, "grad_norm": 3.143247127532959, "learning_rate": 2.4901129632070887e-05, "loss": 0.7611, "step": 470 }, { "epoch": 1.0584269662921348, "grad_norm": 3.9136295318603516, "learning_rate": 2.48417656542002e-05, "loss": 0.6023, "step": 471 }, { "epoch": 1.060674157303371, "grad_norm": 3.149609327316284, "learning_rate": 2.4782326881784757e-05, "loss": 0.7884, "step": 472 }, { "epoch": 1.0629213483146067, "grad_norm": 4.164999961853027, "learning_rate": 2.4722814472025798e-05, "loss": 0.8465, "step": 473 }, { "epoch": 1.0651685393258428, "grad_norm": 2.767392158508301, "learning_rate": 2.466322958355817e-05, "loss": 0.2752, "step": 474 }, { "epoch": 1.0674157303370786, "grad_norm": 2.7601747512817383, "learning_rate": 2.4603573376427804e-05, "loss": 0.2648, "step": 475 }, { "epoch": 1.0696629213483146, "grad_norm": 3.3139634132385254, "learning_rate": 2.4543847012069114e-05, "loss": 0.5548, "step": 476 }, { "epoch": 1.0719101123595505, "grad_norm": 0.46029964089393616, "learning_rate": 2.4484051653282405e-05, "loss": 0.0554, "step": 477 }, { "epoch": 1.0741573033707865, "grad_norm": 3.19266939163208, "learning_rate": 2.44241884642112e-05, "loss": 0.8244, "step": 478 }, { "epoch": 1.0764044943820226, "grad_norm": 0.49586713314056396, "learning_rate": 2.4364258610319604e-05, "loss": 0.0369, "step": 479 }, { "epoch": 1.0786516853932584, "grad_norm": 3.8522789478302, "learning_rate": 2.4304263258369612e-05, "loss": 0.747, "step": 480 }, { "epoch": 1.0808988764044944, "grad_norm": 2.929213285446167, "learning_rate": 2.4244203576398378e-05, "loss": 0.2507, "step": 481 }, { "epoch": 1.0831460674157303, "grad_norm": 0.4208325147628784, "learning_rate": 2.418408073369549e-05, "loss": 0.0304, "step": 482 }, { "epoch": 1.0853932584269663, "grad_norm": 3.958451271057129, "learning_rate": 2.4123895900780194e-05, "loss": 0.7735, "step": 483 }, { "epoch": 1.0876404494382022, "grad_norm": 3.8647449016571045, "learning_rate": 2.4063650249378617e-05, "loss": 0.7526, "step": 484 }, { "epoch": 1.0898876404494382, "grad_norm": 3.1796231269836426, "learning_rate": 2.4003344952400947e-05, "loss": 0.7959, "step": 485 }, { "epoch": 1.0921348314606742, "grad_norm": 3.6608550548553467, "learning_rate": 2.3942981183918597e-05, "loss": 0.7405, "step": 486 }, { "epoch": 1.09438202247191, "grad_norm": 3.589618682861328, "learning_rate": 2.388256011914134e-05, "loss": 0.7041, "step": 487 }, { "epoch": 1.0966292134831461, "grad_norm": 3.798146963119507, "learning_rate": 2.382208293439447e-05, "loss": 0.6991, "step": 488 }, { "epoch": 1.098876404494382, "grad_norm": 0.4198363423347473, "learning_rate": 2.3761550807095828e-05, "loss": 0.0462, "step": 489 }, { "epoch": 1.101123595505618, "grad_norm": 3.7368414402008057, "learning_rate": 2.3700964915732954e-05, "loss": 0.5835, "step": 490 }, { "epoch": 1.1033707865168538, "grad_norm": 2.6319525241851807, "learning_rate": 2.364032643984009e-05, "loss": 0.2632, "step": 491 }, { "epoch": 1.1056179775280899, "grad_norm": 3.6233298778533936, "learning_rate": 2.3579636559975242e-05, "loss": 0.4681, "step": 492 }, { "epoch": 1.107865168539326, "grad_norm": 3.0172781944274902, "learning_rate": 2.351889645769719e-05, "loss": 0.7271, "step": 493 }, { "epoch": 1.1101123595505618, "grad_norm": 2.4773848056793213, "learning_rate": 2.3458107315542488e-05, "loss": 0.2582, "step": 494 }, { "epoch": 1.1123595505617978, "grad_norm": 2.33353853225708, "learning_rate": 2.3397270317002424e-05, "loss": 0.2251, "step": 495 }, { "epoch": 1.1123595505617978, "eval_VitaminC_cosine_accuracy": 0.552734375, "eval_VitaminC_cosine_accuracy_threshold": 0.7386432886123657, "eval_VitaminC_cosine_ap": 0.5475541126516937, "eval_VitaminC_cosine_f1": 0.6675639300134589, "eval_VitaminC_cosine_f1_threshold": 0.36797067523002625, "eval_VitaminC_cosine_precision": 0.5040650406504065, "eval_VitaminC_cosine_recall": 0.9880478087649402, "eval_VitaminC_dot_accuracy": 0.546875, "eval_VitaminC_dot_accuracy_threshold": 329.7876281738281, "eval_VitaminC_dot_ap": 0.5321466642848512, "eval_VitaminC_dot_f1": 0.6684931506849316, "eval_VitaminC_dot_f1_threshold": 163.3940887451172, "eval_VitaminC_dot_precision": 0.5093945720250522, "eval_VitaminC_dot_recall": 0.9721115537848606, "eval_VitaminC_euclidean_accuracy": 0.5546875, "eval_VitaminC_euclidean_accuracy_threshold": 16.11573028564453, "eval_VitaminC_euclidean_ap": 0.5516659121082983, "eval_VitaminC_euclidean_f1": 0.6657789613848203, "eval_VitaminC_euclidean_f1_threshold": 23.179344177246094, "eval_VitaminC_euclidean_precision": 0.5, "eval_VitaminC_euclidean_recall": 0.9960159362549801, "eval_VitaminC_manhattan_accuracy": 0.552734375, "eval_VitaminC_manhattan_accuracy_threshold": 311.29736328125, "eval_VitaminC_manhattan_ap": 0.5507056801905115, "eval_VitaminC_manhattan_f1": 0.6657789613848203, "eval_VitaminC_manhattan_f1_threshold": 492.5957946777344, "eval_VitaminC_manhattan_precision": 0.5, "eval_VitaminC_manhattan_recall": 0.9960159362549801, "eval_VitaminC_max_accuracy": 0.5546875, "eval_VitaminC_max_accuracy_threshold": 329.7876281738281, "eval_VitaminC_max_ap": 0.5516659121082983, "eval_VitaminC_max_f1": 0.6684931506849316, "eval_VitaminC_max_f1_threshold": 492.5957946777344, "eval_VitaminC_max_precision": 0.5093945720250522, "eval_VitaminC_max_recall": 0.9960159362549801, "eval_sequential_score": 0.5516659121082983, "eval_sts-test_pearson_cosine": 0.8841762545397394, "eval_sts-test_pearson_dot": 0.8767370504598664, "eval_sts-test_pearson_euclidean": 0.9052591981779188, "eval_sts-test_pearson_manhattan": 0.9058933866613134, "eval_sts-test_pearson_max": 0.9058933866613134, "eval_sts-test_spearman_cosine": 0.9052463711785245, "eval_sts-test_spearman_dot": 0.8788541357679011, "eval_sts-test_spearman_euclidean": 0.8998455204462421, "eval_sts-test_spearman_manhattan": 0.9007272097981753, "eval_sts-test_spearman_max": 0.9052463711785245, "eval_vitaminc-pairs_loss": 1.385530948638916, "eval_vitaminc-pairs_runtime": 1.9019, "eval_vitaminc-pairs_samples_per_second": 56.786, "eval_vitaminc-pairs_steps_per_second": 1.052, "step": 495 }, { "epoch": 1.1123595505617978, "eval_negation-triplets_loss": 0.8651055097579956, "eval_negation-triplets_runtime": 0.3083, "eval_negation-triplets_samples_per_second": 207.568, "eval_negation-triplets_steps_per_second": 3.243, "step": 495 }, { "epoch": 1.1123595505617978, "eval_scitail-pairs-pos_loss": 0.0760912150144577, "eval_scitail-pairs-pos_runtime": 0.4061, "eval_scitail-pairs-pos_samples_per_second": 132.981, "eval_scitail-pairs-pos_steps_per_second": 2.463, "step": 495 }, { "epoch": 1.1123595505617978, "eval_scitail-pairs-qa_loss": 0.0005116994143463671, "eval_scitail-pairs-qa_runtime": 0.5389, "eval_scitail-pairs-qa_samples_per_second": 237.512, "eval_scitail-pairs-qa_steps_per_second": 3.711, "step": 495 }, { "epoch": 1.1123595505617978, "eval_xsum-pairs_loss": 0.02057916484773159, "eval_xsum-pairs_runtime": 2.7698, "eval_xsum-pairs_samples_per_second": 46.213, "eval_xsum-pairs_steps_per_second": 0.722, "step": 495 }, { "epoch": 1.1123595505617978, "eval_sciq_pairs_loss": 0.015313890762627125, "eval_sciq_pairs_runtime": 2.8967, "eval_sciq_pairs_samples_per_second": 44.189, "eval_sciq_pairs_steps_per_second": 0.69, "step": 495 }, { "epoch": 1.1123595505617978, "eval_qasc_pairs_loss": 0.10294634103775024, "eval_qasc_pairs_runtime": 0.6624, "eval_qasc_pairs_samples_per_second": 193.223, "eval_qasc_pairs_steps_per_second": 3.019, "step": 495 }, { "epoch": 1.1123595505617978, "eval_openbookqa_pairs_loss": 0.679233968257904, "eval_openbookqa_pairs_runtime": 0.5955, "eval_openbookqa_pairs_samples_per_second": 214.948, "eval_openbookqa_pairs_steps_per_second": 3.359, "step": 495 }, { "epoch": 1.1123595505617978, "eval_msmarco_pairs_loss": 0.15479065477848053, "eval_msmarco_pairs_runtime": 1.498, "eval_msmarco_pairs_samples_per_second": 85.447, "eval_msmarco_pairs_steps_per_second": 1.335, "step": 495 }, { "epoch": 1.1123595505617978, "eval_nq_pairs_loss": 0.0932854488492012, "eval_nq_pairs_runtime": 2.3621, "eval_nq_pairs_samples_per_second": 54.188, "eval_nq_pairs_steps_per_second": 0.847, "step": 495 }, { "epoch": 1.1123595505617978, "eval_trivia_pairs_loss": 0.5306271910667419, "eval_trivia_pairs_runtime": 3.5969, "eval_trivia_pairs_samples_per_second": 35.586, "eval_trivia_pairs_steps_per_second": 0.556, "step": 495 }, { "epoch": 1.1123595505617978, "eval_gooaq_pairs_loss": 0.2823023796081543, "eval_gooaq_pairs_runtime": 0.9247, "eval_gooaq_pairs_samples_per_second": 138.43, "eval_gooaq_pairs_steps_per_second": 2.163, "step": 495 }, { "epoch": 1.1123595505617978, "eval_paws-pos_loss": 0.02393445000052452, "eval_paws-pos_runtime": 0.7045, "eval_paws-pos_samples_per_second": 181.692, "eval_paws-pos_steps_per_second": 2.839, "step": 495 }, { "epoch": 1.1146067415730336, "grad_norm": 0.5595234632492065, "learning_rate": 2.3336386646500005e-05, "loss": 0.0385, "step": 496 }, { "epoch": 1.1168539325842697, "grad_norm": 3.837158679962158, "learning_rate": 2.327545748936687e-05, "loss": 0.7277, "step": 497 }, { "epoch": 1.1191011235955055, "grad_norm": 3.817466974258423, "learning_rate": 2.3214484031820252e-05, "loss": 0.705, "step": 498 }, { "epoch": 1.1213483146067416, "grad_norm": 3.6540205478668213, "learning_rate": 2.315346746093984e-05, "loss": 0.6059, "step": 499 }, { "epoch": 1.1235955056179776, "grad_norm": 3.6074440479278564, "learning_rate": 2.309240896464469e-05, "loss": 0.6156, "step": 500 }, { "epoch": 1.1258426966292134, "grad_norm": 3.6017813682556152, "learning_rate": 2.30313097316701e-05, "loss": 0.6809, "step": 501 }, { "epoch": 1.1280898876404495, "grad_norm": 2.9375529289245605, "learning_rate": 2.2970170951544467e-05, "loss": 0.7104, "step": 502 }, { "epoch": 1.1303370786516853, "grad_norm": 2.9063873291015625, "learning_rate": 2.2908993814566104e-05, "loss": 0.4397, "step": 503 }, { "epoch": 1.1325842696629214, "grad_norm": 4.570181846618652, "learning_rate": 2.284777951178011e-05, "loss": 0.6952, "step": 504 }, { "epoch": 1.1348314606741572, "grad_norm": 0.4327554702758789, "learning_rate": 2.2786529234955137e-05, "loss": 0.0557, "step": 505 }, { "epoch": 1.1370786516853932, "grad_norm": 3.457714796066284, "learning_rate": 2.2725244176560217e-05, "loss": 0.6711, "step": 506 }, { "epoch": 1.1393258426966293, "grad_norm": 3.4728267192840576, "learning_rate": 2.2663925529741547e-05, "loss": 0.7173, "step": 507 }, { "epoch": 1.1415730337078651, "grad_norm": 3.583503246307373, "learning_rate": 2.2602574488299232e-05, "loss": 0.7037, "step": 508 }, { "epoch": 1.1438202247191012, "grad_norm": 4.4957661628723145, "learning_rate": 2.2541192246664077e-05, "loss": 0.8578, "step": 509 }, { "epoch": 1.146067415730337, "grad_norm": 3.447329044342041, "learning_rate": 2.2479779999874303e-05, "loss": 0.6712, "step": 510 }, { "epoch": 1.148314606741573, "grad_norm": 3.2203116416931152, "learning_rate": 2.2418338943552296e-05, "loss": 0.7472, "step": 511 }, { "epoch": 1.1505617977528089, "grad_norm": 3.551426410675049, "learning_rate": 2.235687027388135e-05, "loss": 0.5911, "step": 512 }, { "epoch": 1.152808988764045, "grad_norm": 3.8964977264404297, "learning_rate": 2.229537518758233e-05, "loss": 0.6827, "step": 513 }, { "epoch": 1.155056179775281, "grad_norm": 3.483255624771118, "learning_rate": 2.2233854881890425e-05, "loss": 0.5034, "step": 514 }, { "epoch": 1.1573033707865168, "grad_norm": 4.6676740646362305, "learning_rate": 2.2172310554531788e-05, "loss": 0.8367, "step": 515 }, { "epoch": 1.1595505617977528, "grad_norm": 3.4448702335357666, "learning_rate": 2.2110743403700276e-05, "loss": 0.6596, "step": 516 }, { "epoch": 1.1617977528089887, "grad_norm": 0.5997368097305298, "learning_rate": 2.2049154628034062e-05, "loss": 0.0859, "step": 517 }, { "epoch": 1.1640449438202247, "grad_norm": 2.822986602783203, "learning_rate": 2.1987545426592347e-05, "loss": 0.2797, "step": 518 }, { "epoch": 1.1662921348314608, "grad_norm": 3.156625270843506, "learning_rate": 2.1925916998832005e-05, "loss": 0.5181, "step": 519 }, { "epoch": 1.1685393258426966, "grad_norm": 3.550964832305908, "learning_rate": 2.1864270544584192e-05, "loss": 0.6837, "step": 520 }, { "epoch": 1.1707865168539326, "grad_norm": 3.6208648681640625, "learning_rate": 2.1802607264031045e-05, "loss": 0.7238, "step": 521 }, { "epoch": 1.1730337078651685, "grad_norm": 0.5348507165908813, "learning_rate": 2.174092835768228e-05, "loss": 0.0318, "step": 522 }, { "epoch": 1.1752808988764045, "grad_norm": 0.4848617911338806, "learning_rate": 2.167923502635183e-05, "loss": 0.0694, "step": 523 }, { "epoch": 1.1775280898876406, "grad_norm": 3.322484016418457, "learning_rate": 2.161752847113446e-05, "loss": 0.7472, "step": 524 }, { "epoch": 1.1797752808988764, "grad_norm": 5.0792999267578125, "learning_rate": 2.1555809893382403e-05, "loss": 0.8912, "step": 525 }, { "epoch": 1.1820224719101124, "grad_norm": 3.029616117477417, "learning_rate": 2.1494080494681936e-05, "loss": 0.7744, "step": 526 }, { "epoch": 1.1842696629213483, "grad_norm": 3.989457845687866, "learning_rate": 2.1432341476830015e-05, "loss": 0.6869, "step": 527 }, { "epoch": 1.1865168539325843, "grad_norm": 4.336198806762695, "learning_rate": 2.137059404181087e-05, "loss": 0.8497, "step": 528 }, { "epoch": 1.1887640449438202, "grad_norm": 2.7506906986236572, "learning_rate": 2.1308839391772608e-05, "loss": 0.4281, "step": 529 }, { "epoch": 1.1910112359550562, "grad_norm": 3.7945544719696045, "learning_rate": 2.12470787290038e-05, "loss": 0.7605, "step": 530 }, { "epoch": 1.1932584269662923, "grad_norm": 3.5343854427337646, "learning_rate": 2.1185313255910074e-05, "loss": 0.6354, "step": 531 }, { "epoch": 1.195505617977528, "grad_norm": 0.35124847292900085, "learning_rate": 2.1123544174990714e-05, "loss": 0.0518, "step": 532 }, { "epoch": 1.1977528089887641, "grad_norm": 2.397141456604004, "learning_rate": 2.106177268881524e-05, "loss": 0.2602, "step": 533 }, { "epoch": 1.2, "grad_norm": 3.2060976028442383, "learning_rate": 2.1e-05, "loss": 0.5082, "step": 534 }, { "epoch": 1.202247191011236, "grad_norm": 0.4915749132633209, "learning_rate": 2.093822731118476e-05, "loss": 0.0603, "step": 535 }, { "epoch": 1.2044943820224718, "grad_norm": 6.44587516784668, "learning_rate": 2.087645582500929e-05, "loss": 2.3371, "step": 536 }, { "epoch": 1.2067415730337079, "grad_norm": 2.940648078918457, "learning_rate": 2.0814686744089924e-05, "loss": 0.6513, "step": 537 }, { "epoch": 1.208988764044944, "grad_norm": 3.5047895908355713, "learning_rate": 2.0752921270996197e-05, "loss": 0.6053, "step": 538 }, { "epoch": 1.2112359550561798, "grad_norm": 0.4148883819580078, "learning_rate": 2.069116060822739e-05, "loss": 0.0544, "step": 539 }, { "epoch": 1.2134831460674158, "grad_norm": 3.633577823638916, "learning_rate": 2.062940595818913e-05, "loss": 0.7219, "step": 540 }, { "epoch": 1.2134831460674158, "eval_VitaminC_cosine_accuracy": 0.55859375, "eval_VitaminC_cosine_accuracy_threshold": 0.750012993812561, "eval_VitaminC_cosine_ap": 0.5498478359791117, "eval_VitaminC_cosine_f1": 0.6666666666666666, "eval_VitaminC_cosine_f1_threshold": 0.2890807092189789, "eval_VitaminC_cosine_precision": 0.5, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.544921875, "eval_VitaminC_dot_accuracy_threshold": 325.9202880859375, "eval_VitaminC_dot_ap": 0.5307476674257613, "eval_VitaminC_dot_f1": 0.6657824933687002, "eval_VitaminC_dot_f1_threshold": 100.63825988769531, "eval_VitaminC_dot_precision": 0.4990059642147117, "eval_VitaminC_dot_recall": 1.0, "eval_VitaminC_euclidean_accuracy": 0.556640625, "eval_VitaminC_euclidean_accuracy_threshold": 12.168689727783203, "eval_VitaminC_euclidean_ap": 0.5532762404675531, "eval_VitaminC_euclidean_f1": 0.6675531914893617, "eval_VitaminC_euclidean_f1_threshold": 23.49704360961914, "eval_VitaminC_euclidean_precision": 0.500998003992016, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.552734375, "eval_VitaminC_manhattan_accuracy_threshold": 251.46685791015625, "eval_VitaminC_manhattan_ap": 0.5528651894260193, "eval_VitaminC_manhattan_f1": 0.6649006622516557, "eval_VitaminC_manhattan_f1_threshold": 511.0567321777344, "eval_VitaminC_manhattan_precision": 0.498015873015873, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.55859375, "eval_VitaminC_max_accuracy_threshold": 325.9202880859375, "eval_VitaminC_max_ap": 0.5532762404675531, "eval_VitaminC_max_f1": 0.6675531914893617, "eval_VitaminC_max_f1_threshold": 511.0567321777344, "eval_VitaminC_max_precision": 0.500998003992016, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5532762404675531, "eval_sts-test_pearson_cosine": 0.8836542354588774, "eval_sts-test_pearson_dot": 0.8766446823118297, "eval_sts-test_pearson_euclidean": 0.9062930503225336, "eval_sts-test_pearson_manhattan": 0.9067495755923205, "eval_sts-test_pearson_max": 0.9067495755923205, "eval_sts-test_spearman_cosine": 0.9065594179390095, "eval_sts-test_spearman_dot": 0.8763055514316607, "eval_sts-test_spearman_euclidean": 0.9012305719863057, "eval_sts-test_spearman_manhattan": 0.901725878947386, "eval_sts-test_spearman_max": 0.9065594179390095, "eval_vitaminc-pairs_loss": 1.377655029296875, "eval_vitaminc-pairs_runtime": 1.9029, "eval_vitaminc-pairs_samples_per_second": 56.754, "eval_vitaminc-pairs_steps_per_second": 1.051, "step": 540 }, { "epoch": 1.2134831460674158, "eval_negation-triplets_loss": 0.8754605650901794, "eval_negation-triplets_runtime": 0.3041, "eval_negation-triplets_samples_per_second": 210.479, "eval_negation-triplets_steps_per_second": 3.289, "step": 540 }, { "epoch": 1.2134831460674158, "eval_scitail-pairs-pos_loss": 0.08722448348999023, "eval_scitail-pairs-pos_runtime": 0.4119, "eval_scitail-pairs-pos_samples_per_second": 131.115, "eval_scitail-pairs-pos_steps_per_second": 2.428, "step": 540 }, { "epoch": 1.2134831460674158, "eval_scitail-pairs-qa_loss": 0.0006766854785382748, "eval_scitail-pairs-qa_runtime": 0.5432, "eval_scitail-pairs-qa_samples_per_second": 235.655, "eval_scitail-pairs-qa_steps_per_second": 3.682, "step": 540 }, { "epoch": 1.2134831460674158, "eval_xsum-pairs_loss": 0.02525785192847252, "eval_xsum-pairs_runtime": 2.7625, "eval_xsum-pairs_samples_per_second": 46.335, "eval_xsum-pairs_steps_per_second": 0.724, "step": 540 }, { "epoch": 1.2134831460674158, "eval_sciq_pairs_loss": 0.016477206721901894, "eval_sciq_pairs_runtime": 2.9071, "eval_sciq_pairs_samples_per_second": 44.031, "eval_sciq_pairs_steps_per_second": 0.688, "step": 540 }, { "epoch": 1.2134831460674158, "eval_qasc_pairs_loss": 0.09754681587219238, "eval_qasc_pairs_runtime": 0.6665, "eval_qasc_pairs_samples_per_second": 192.059, "eval_qasc_pairs_steps_per_second": 3.001, "step": 540 }, { "epoch": 1.2134831460674158, "eval_openbookqa_pairs_loss": 0.6885332465171814, "eval_openbookqa_pairs_runtime": 0.5914, "eval_openbookqa_pairs_samples_per_second": 216.427, "eval_openbookqa_pairs_steps_per_second": 3.382, "step": 540 }, { "epoch": 1.2134831460674158, "eval_msmarco_pairs_loss": 0.13402031362056732, "eval_msmarco_pairs_runtime": 1.5047, "eval_msmarco_pairs_samples_per_second": 85.067, "eval_msmarco_pairs_steps_per_second": 1.329, "step": 540 }, { "epoch": 1.2134831460674158, "eval_nq_pairs_loss": 0.09435093402862549, "eval_nq_pairs_runtime": 2.366, "eval_nq_pairs_samples_per_second": 54.101, "eval_nq_pairs_steps_per_second": 0.845, "step": 540 }, { "epoch": 1.2134831460674158, "eval_trivia_pairs_loss": 0.5325451493263245, "eval_trivia_pairs_runtime": 3.6088, "eval_trivia_pairs_samples_per_second": 35.468, "eval_trivia_pairs_steps_per_second": 0.554, "step": 540 }, { "epoch": 1.2134831460674158, "eval_gooaq_pairs_loss": 0.2803599536418915, "eval_gooaq_pairs_runtime": 0.9204, "eval_gooaq_pairs_samples_per_second": 139.063, "eval_gooaq_pairs_steps_per_second": 2.173, "step": 540 }, { "epoch": 1.2134831460674158, "eval_paws-pos_loss": 0.024230225011706352, "eval_paws-pos_runtime": 0.7045, "eval_paws-pos_samples_per_second": 181.679, "eval_paws-pos_steps_per_second": 2.839, "step": 540 }, { "epoch": 1.2157303370786516, "grad_norm": 3.6125593185424805, "learning_rate": 2.056765852316999e-05, "loss": 0.6862, "step": 541 }, { "epoch": 1.2179775280898877, "grad_norm": 2.6327385902404785, "learning_rate": 2.0505919505318062e-05, "loss": 0.4639, "step": 542 }, { "epoch": 1.2202247191011235, "grad_norm": 3.4139833450317383, "learning_rate": 2.0444190106617598e-05, "loss": 0.6663, "step": 543 }, { "epoch": 1.2224719101123596, "grad_norm": 3.4155499935150146, "learning_rate": 2.0382471528865537e-05, "loss": 0.5047, "step": 544 }, { "epoch": 1.2247191011235956, "grad_norm": 2.4931771755218506, "learning_rate": 2.0320764973648166e-05, "loss": 0.2306, "step": 545 }, { "epoch": 1.2269662921348314, "grad_norm": 3.664468288421631, "learning_rate": 2.0259071642317713e-05, "loss": 0.7147, "step": 546 }, { "epoch": 1.2292134831460675, "grad_norm": 0.5503119826316833, "learning_rate": 2.0197392735968953e-05, "loss": 0.0344, "step": 547 }, { "epoch": 1.2314606741573033, "grad_norm": 2.96278977394104, "learning_rate": 2.013572945541581e-05, "loss": 0.4429, "step": 548 }, { "epoch": 1.2337078651685394, "grad_norm": 3.6760940551757812, "learning_rate": 2.0074083001167992e-05, "loss": 0.6966, "step": 549 }, { "epoch": 1.2359550561797752, "grad_norm": 2.8022167682647705, "learning_rate": 2.0012454573407644e-05, "loss": 0.6926, "step": 550 }, { "epoch": 1.2382022471910112, "grad_norm": 2.4972310066223145, "learning_rate": 1.995084537196594e-05, "loss": 0.261, "step": 551 }, { "epoch": 1.2404494382022473, "grad_norm": 3.387359142303467, "learning_rate": 1.9889256596299725e-05, "loss": 0.6558, "step": 552 }, { "epoch": 1.2426966292134831, "grad_norm": 3.303635358810425, "learning_rate": 1.9827689445468206e-05, "loss": 0.6285, "step": 553 }, { "epoch": 1.2449438202247192, "grad_norm": 2.8494338989257812, "learning_rate": 1.9766145118109576e-05, "loss": 0.6471, "step": 554 }, { "epoch": 1.247191011235955, "grad_norm": 3.350094795227051, "learning_rate": 1.9704624812417665e-05, "loss": 0.4989, "step": 555 }, { "epoch": 1.249438202247191, "grad_norm": 2.0344486236572266, "learning_rate": 1.9643129726118646e-05, "loss": 0.195, "step": 556 }, { "epoch": 1.2516853932584269, "grad_norm": 4.317070007324219, "learning_rate": 1.9581661056447698e-05, "loss": 0.8431, "step": 557 }, { "epoch": 1.253932584269663, "grad_norm": 3.8960471153259277, "learning_rate": 1.95202200001257e-05, "loss": 0.642, "step": 558 }, { "epoch": 1.256179775280899, "grad_norm": 2.2174787521362305, "learning_rate": 1.9458807753335924e-05, "loss": 0.2251, "step": 559 }, { "epoch": 1.2584269662921348, "grad_norm": 2.0047852993011475, "learning_rate": 1.9397425511700762e-05, "loss": 0.2057, "step": 560 }, { "epoch": 1.2606741573033708, "grad_norm": 2.035231828689575, "learning_rate": 1.933607447025845e-05, "loss": 0.2198, "step": 561 }, { "epoch": 1.2629213483146067, "grad_norm": 3.2453677654266357, "learning_rate": 1.9274755823439777e-05, "loss": 0.4856, "step": 562 }, { "epoch": 1.2651685393258427, "grad_norm": 0.46472156047821045, "learning_rate": 1.9213470765044864e-05, "loss": 0.0273, "step": 563 }, { "epoch": 1.2674157303370785, "grad_norm": 0.5257102251052856, "learning_rate": 1.915222048821989e-05, "loss": 0.0302, "step": 564 }, { "epoch": 1.2696629213483146, "grad_norm": 2.3320302963256836, "learning_rate": 1.9091006185433897e-05, "loss": 0.1863, "step": 565 }, { "epoch": 1.2719101123595506, "grad_norm": 4.208177089691162, "learning_rate": 1.9029829048455534e-05, "loss": 0.8053, "step": 566 }, { "epoch": 1.2741573033707865, "grad_norm": 2.2916760444641113, "learning_rate": 1.8968690268329893e-05, "loss": 0.1935, "step": 567 }, { "epoch": 1.2764044943820225, "grad_norm": 3.5239036083221436, "learning_rate": 1.8907591035355305e-05, "loss": 0.5837, "step": 568 }, { "epoch": 1.2786516853932584, "grad_norm": 3.938844680786133, "learning_rate": 1.884653253906016e-05, "loss": 0.7606, "step": 569 }, { "epoch": 1.2808988764044944, "grad_norm": 1.9056942462921143, "learning_rate": 1.8785515968179746e-05, "loss": 0.1904, "step": 570 }, { "epoch": 1.2831460674157302, "grad_norm": 3.481647253036499, "learning_rate": 1.8724542510633123e-05, "loss": 0.6585, "step": 571 }, { "epoch": 1.2853932584269663, "grad_norm": 3.712449550628662, "learning_rate": 1.8663613353499996e-05, "loss": 0.7043, "step": 572 }, { "epoch": 1.2876404494382023, "grad_norm": 2.702223539352417, "learning_rate": 1.8602729682997573e-05, "loss": 0.6083, "step": 573 }, { "epoch": 1.2898876404494382, "grad_norm": 3.8883962631225586, "learning_rate": 1.854189268445751e-05, "loss": 0.6523, "step": 574 }, { "epoch": 1.2921348314606742, "grad_norm": 3.663496494293213, "learning_rate": 1.8481103542302805e-05, "loss": 0.553, "step": 575 }, { "epoch": 1.29438202247191, "grad_norm": 3.476609945297241, "learning_rate": 1.8420363440024752e-05, "loss": 0.6234, "step": 576 }, { "epoch": 1.296629213483146, "grad_norm": 2.72796368598938, "learning_rate": 1.8359673560159906e-05, "loss": 0.4428, "step": 577 }, { "epoch": 1.298876404494382, "grad_norm": 3.249882698059082, "learning_rate": 1.829903508426704e-05, "loss": 0.5433, "step": 578 }, { "epoch": 1.301123595505618, "grad_norm": 3.393094539642334, "learning_rate": 1.823844919290417e-05, "loss": 0.4937, "step": 579 }, { "epoch": 1.303370786516854, "grad_norm": 2.7802557945251465, "learning_rate": 1.817791706560553e-05, "loss": 0.2222, "step": 580 }, { "epoch": 1.3056179775280898, "grad_norm": 3.4187793731689453, "learning_rate": 1.8117439880858653e-05, "loss": 0.5672, "step": 581 }, { "epoch": 1.3078651685393259, "grad_norm": 2.9662680625915527, "learning_rate": 1.8057018816081404e-05, "loss": 0.6562, "step": 582 }, { "epoch": 1.310112359550562, "grad_norm": 0.4572099447250366, "learning_rate": 1.7996655047599054e-05, "loss": 0.056, "step": 583 }, { "epoch": 1.3123595505617978, "grad_norm": 2.625009775161743, "learning_rate": 1.7936349750621377e-05, "loss": 0.4015, "step": 584 }, { "epoch": 1.3146067415730336, "grad_norm": 3.479508399963379, "learning_rate": 1.7876104099219804e-05, "loss": 0.6675, "step": 585 }, { "epoch": 1.3146067415730336, "eval_VitaminC_cosine_accuracy": 0.556640625, "eval_VitaminC_cosine_accuracy_threshold": 0.8350518941879272, "eval_VitaminC_cosine_ap": 0.5499225365506326, "eval_VitaminC_cosine_f1": 0.6721311475409836, "eval_VitaminC_cosine_f1_threshold": 0.4279438257217407, "eval_VitaminC_cosine_precision": 0.5114345114345115, "eval_VitaminC_cosine_recall": 0.9800796812749004, "eval_VitaminC_dot_accuracy": 0.55078125, "eval_VitaminC_dot_accuracy_threshold": 320.0419616699219, "eval_VitaminC_dot_ap": 0.532421197041184, "eval_VitaminC_dot_f1": 0.6693989071038251, "eval_VitaminC_dot_f1_threshold": 162.15530395507812, "eval_VitaminC_dot_precision": 0.5093555093555093, "eval_VitaminC_dot_recall": 0.9760956175298805, "eval_VitaminC_euclidean_accuracy": 0.552734375, "eval_VitaminC_euclidean_accuracy_threshold": 11.53189468383789, "eval_VitaminC_euclidean_ap": 0.5507235346667002, "eval_VitaminC_euclidean_f1": 0.665742024965326, "eval_VitaminC_euclidean_f1_threshold": 20.513931274414062, "eval_VitaminC_euclidean_precision": 0.5106382978723404, "eval_VitaminC_euclidean_recall": 0.9561752988047809, "eval_VitaminC_manhattan_accuracy": 0.5546875, "eval_VitaminC_manhattan_accuracy_threshold": 236.3373260498047, "eval_VitaminC_manhattan_ap": 0.5500919145575321, "eval_VitaminC_manhattan_f1": 0.664886515353805, "eval_VitaminC_manhattan_f1_threshold": 483.1922607421875, "eval_VitaminC_manhattan_precision": 0.5, "eval_VitaminC_manhattan_recall": 0.9920318725099602, "eval_VitaminC_max_accuracy": 0.556640625, "eval_VitaminC_max_accuracy_threshold": 320.0419616699219, "eval_VitaminC_max_ap": 0.5507235346667002, "eval_VitaminC_max_f1": 0.6721311475409836, "eval_VitaminC_max_f1_threshold": 483.1922607421875, "eval_VitaminC_max_precision": 0.5114345114345115, "eval_VitaminC_max_recall": 0.9920318725099602, "eval_sequential_score": 0.5507235346667002, "eval_sts-test_pearson_cosine": 0.8854424628564648, "eval_sts-test_pearson_dot": 0.8781775369503937, "eval_sts-test_pearson_euclidean": 0.9073948686610891, "eval_sts-test_pearson_manhattan": 0.9077723844704348, "eval_sts-test_pearson_max": 0.9077723844704348, "eval_sts-test_spearman_cosine": 0.9079970522112082, "eval_sts-test_spearman_dot": 0.8790970008634722, "eval_sts-test_spearman_euclidean": 0.9029484386573375, "eval_sts-test_spearman_manhattan": 0.9037780375410113, "eval_sts-test_spearman_max": 0.9079970522112082, "eval_vitaminc-pairs_loss": 1.3895310163497925, "eval_vitaminc-pairs_runtime": 1.8903, "eval_vitaminc-pairs_samples_per_second": 57.134, "eval_vitaminc-pairs_steps_per_second": 1.058, "step": 585 }, { "epoch": 1.3146067415730336, "eval_negation-triplets_loss": 0.9072961807250977, "eval_negation-triplets_runtime": 0.3009, "eval_negation-triplets_samples_per_second": 212.728, "eval_negation-triplets_steps_per_second": 3.324, "step": 585 }, { "epoch": 1.3146067415730336, "eval_scitail-pairs-pos_loss": 0.09969545155763626, "eval_scitail-pairs-pos_runtime": 0.3958, "eval_scitail-pairs-pos_samples_per_second": 136.434, "eval_scitail-pairs-pos_steps_per_second": 2.527, "step": 585 }, { "epoch": 1.3146067415730336, "eval_scitail-pairs-qa_loss": 0.0006847005570307374, "eval_scitail-pairs-qa_runtime": 0.5306, "eval_scitail-pairs-qa_samples_per_second": 241.242, "eval_scitail-pairs-qa_steps_per_second": 3.769, "step": 585 }, { "epoch": 1.3146067415730336, "eval_xsum-pairs_loss": 0.02075883559882641, "eval_xsum-pairs_runtime": 2.7478, "eval_xsum-pairs_samples_per_second": 46.583, "eval_xsum-pairs_steps_per_second": 0.728, "step": 585 }, { "epoch": 1.3146067415730336, "eval_sciq_pairs_loss": 0.01607341691851616, "eval_sciq_pairs_runtime": 2.8547, "eval_sciq_pairs_samples_per_second": 44.838, "eval_sciq_pairs_steps_per_second": 0.701, "step": 585 }, { "epoch": 1.3146067415730336, "eval_qasc_pairs_loss": 0.09826790541410446, "eval_qasc_pairs_runtime": 0.6743, "eval_qasc_pairs_samples_per_second": 189.831, "eval_qasc_pairs_steps_per_second": 2.966, "step": 585 }, { "epoch": 1.3146067415730336, "eval_openbookqa_pairs_loss": 0.7106958031654358, "eval_openbookqa_pairs_runtime": 0.6028, "eval_openbookqa_pairs_samples_per_second": 212.347, "eval_openbookqa_pairs_steps_per_second": 3.318, "step": 585 }, { "epoch": 1.3146067415730336, "eval_msmarco_pairs_loss": 0.14438961446285248, "eval_msmarco_pairs_runtime": 1.4968, "eval_msmarco_pairs_samples_per_second": 85.516, "eval_msmarco_pairs_steps_per_second": 1.336, "step": 585 }, { "epoch": 1.3146067415730336, "eval_nq_pairs_loss": 0.08692270517349243, "eval_nq_pairs_runtime": 2.3567, "eval_nq_pairs_samples_per_second": 54.312, "eval_nq_pairs_steps_per_second": 0.849, "step": 585 }, { "epoch": 1.3146067415730336, "eval_trivia_pairs_loss": 0.4988090991973877, "eval_trivia_pairs_runtime": 3.5886, "eval_trivia_pairs_samples_per_second": 35.669, "eval_trivia_pairs_steps_per_second": 0.557, "step": 585 }, { "epoch": 1.3146067415730336, "eval_gooaq_pairs_loss": 0.2779709994792938, "eval_gooaq_pairs_runtime": 0.9125, "eval_gooaq_pairs_samples_per_second": 140.269, "eval_gooaq_pairs_steps_per_second": 2.192, "step": 585 }, { "epoch": 1.3146067415730336, "eval_paws-pos_loss": 0.024158792570233345, "eval_paws-pos_runtime": 0.7009, "eval_paws-pos_samples_per_second": 182.63, "eval_paws-pos_steps_per_second": 2.854, "step": 585 }, { "epoch": 1.3168539325842696, "grad_norm": 3.7084991931915283, "learning_rate": 1.781591926630451e-05, "loss": 0.7298, "step": 586 }, { "epoch": 1.3191011235955057, "grad_norm": 0.33646777272224426, "learning_rate": 1.7755796423601624e-05, "loss": 0.0372, "step": 587 }, { "epoch": 1.3213483146067415, "grad_norm": 3.1976261138916016, "learning_rate": 1.7695736741630386e-05, "loss": 0.7247, "step": 588 }, { "epoch": 1.3235955056179776, "grad_norm": 3.706315040588379, "learning_rate": 1.7635741389680394e-05, "loss": 0.6839, "step": 589 }, { "epoch": 1.3258426966292136, "grad_norm": 3.0135083198547363, "learning_rate": 1.75758115357888e-05, "loss": 0.6848, "step": 590 }, { "epoch": 1.3280898876404494, "grad_norm": 3.324838876724243, "learning_rate": 1.751594834671759e-05, "loss": 0.4449, "step": 591 }, { "epoch": 1.3303370786516853, "grad_norm": 2.1507813930511475, "learning_rate": 1.7456152987930877e-05, "loss": 0.2104, "step": 592 }, { "epoch": 1.3325842696629213, "grad_norm": 2.267099380493164, "learning_rate": 1.73964266235722e-05, "loss": 0.391, "step": 593 }, { "epoch": 1.3348314606741574, "grad_norm": 2.256056308746338, "learning_rate": 1.7336770416441832e-05, "loss": 0.3641, "step": 594 }, { "epoch": 1.3370786516853932, "grad_norm": 3.728010416030884, "learning_rate": 1.7277185527974203e-05, "loss": 0.6953, "step": 595 }, { "epoch": 1.3393258426966292, "grad_norm": 3.9698784351348877, "learning_rate": 1.7217673118215237e-05, "loss": 0.6382, "step": 596 }, { "epoch": 1.3415730337078653, "grad_norm": 3.2924447059631348, "learning_rate": 1.7158234345799802e-05, "loss": 0.6245, "step": 597 }, { "epoch": 1.3438202247191011, "grad_norm": 3.6384663581848145, "learning_rate": 1.7098870367929108e-05, "loss": 0.6775, "step": 598 }, { "epoch": 1.346067415730337, "grad_norm": 3.9194507598876953, "learning_rate": 1.703958234034823e-05, "loss": 0.5727, "step": 599 }, { "epoch": 1.348314606741573, "grad_norm": 0.4283960163593292, "learning_rate": 1.698037141732357e-05, "loss": 0.0567, "step": 600 }, { "epoch": 1.350561797752809, "grad_norm": 3.536198377609253, "learning_rate": 1.6921238751620402e-05, "loss": 0.6258, "step": 601 }, { "epoch": 1.3528089887640449, "grad_norm": 4.333379745483398, "learning_rate": 1.6862185494480425e-05, "loss": 0.8138, "step": 602 }, { "epoch": 1.355056179775281, "grad_norm": 3.6202950477600098, "learning_rate": 1.680321279559934e-05, "loss": 0.6099, "step": 603 }, { "epoch": 1.357303370786517, "grad_norm": 3.049006938934326, "learning_rate": 1.6744321803104493e-05, "loss": 0.6801, "step": 604 }, { "epoch": 1.3595505617977528, "grad_norm": 6.1929850578308105, "learning_rate": 1.668551366353248e-05, "loss": 2.2003, "step": 605 }, { "epoch": 1.3617977528089886, "grad_norm": 0.40746456384658813, "learning_rate": 1.662678952180688e-05, "loss": 0.052, "step": 606 }, { "epoch": 1.3640449438202247, "grad_norm": 2.2396955490112305, "learning_rate": 1.656815052121592e-05, "loss": 0.2175, "step": 607 }, { "epoch": 1.3662921348314607, "grad_norm": 4.395736217498779, "learning_rate": 1.6509597803390222e-05, "loss": 0.7671, "step": 608 }, { "epoch": 1.3685393258426966, "grad_norm": 3.2075412273406982, "learning_rate": 1.6451132508280602e-05, "loss": 0.5524, "step": 609 }, { "epoch": 1.3707865168539326, "grad_norm": 3.6282618045806885, "learning_rate": 1.639275577413586e-05, "loss": 0.5868, "step": 610 }, { "epoch": 1.3730337078651687, "grad_norm": 2.8120224475860596, "learning_rate": 1.6334468737480616e-05, "loss": 0.6628, "step": 611 }, { "epoch": 1.3752808988764045, "grad_norm": 4.477191925048828, "learning_rate": 1.6276272533093184e-05, "loss": 0.8106, "step": 612 }, { "epoch": 1.3775280898876405, "grad_norm": 2.078286647796631, "learning_rate": 1.621816829398349e-05, "loss": 0.2, "step": 613 }, { "epoch": 1.3797752808988764, "grad_norm": 3.698269844055176, "learning_rate": 1.616015715137101e-05, "loss": 0.57, "step": 614 }, { "epoch": 1.3820224719101124, "grad_norm": 3.7442100048065186, "learning_rate": 1.6102240234662735e-05, "loss": 0.6329, "step": 615 }, { "epoch": 1.3842696629213482, "grad_norm": 3.1588127613067627, "learning_rate": 1.6044418671431197e-05, "loss": 0.5616, "step": 616 }, { "epoch": 1.3865168539325843, "grad_norm": 2.8839151859283447, "learning_rate": 1.5986693587392505e-05, "loss": 0.6678, "step": 617 }, { "epoch": 1.3887640449438203, "grad_norm": 3.6140055656433105, "learning_rate": 1.5929066106384448e-05, "loss": 0.454, "step": 618 }, { "epoch": 1.3910112359550562, "grad_norm": 3.0111019611358643, "learning_rate": 1.5871537350344574e-05, "loss": 0.5198, "step": 619 }, { "epoch": 1.3932584269662922, "grad_norm": 3.6177947521209717, "learning_rate": 1.581410843928841e-05, "loss": 0.5259, "step": 620 }, { "epoch": 1.395505617977528, "grad_norm": 4.257228374481201, "learning_rate": 1.5756780491287593e-05, "loss": 0.714, "step": 621 }, { "epoch": 1.397752808988764, "grad_norm": 3.3783249855041504, "learning_rate": 1.5699554622448154e-05, "loss": 0.4943, "step": 622 }, { "epoch": 1.4, "grad_norm": 0.527916431427002, "learning_rate": 1.5642431946888743e-05, "loss": 0.0324, "step": 623 }, { "epoch": 1.402247191011236, "grad_norm": 0.4558711349964142, "learning_rate": 1.5585413576718978e-05, "loss": 0.0305, "step": 624 }, { "epoch": 1.404494382022472, "grad_norm": 3.383774757385254, "learning_rate": 1.5528500622017773e-05, "loss": 0.5194, "step": 625 }, { "epoch": 1.4067415730337078, "grad_norm": 3.434595823287964, "learning_rate": 1.5471694190811722e-05, "loss": 0.5412, "step": 626 }, { "epoch": 1.4089887640449439, "grad_norm": 3.0937981605529785, "learning_rate": 1.5414995389053546e-05, "loss": 0.5688, "step": 627 }, { "epoch": 1.4112359550561797, "grad_norm": 3.6951913833618164, "learning_rate": 1.5358405320600546e-05, "loss": 0.7636, "step": 628 }, { "epoch": 1.4134831460674158, "grad_norm": 3.196275472640991, "learning_rate": 1.530192508719312e-05, "loss": 0.478, "step": 629 }, { "epoch": 1.4157303370786516, "grad_norm": 3.235971212387085, "learning_rate": 1.5245555788433292e-05, "loss": 0.5674, "step": 630 }, { "epoch": 1.4157303370786516, "eval_VitaminC_cosine_accuracy": 0.556640625, "eval_VitaminC_cosine_accuracy_threshold": 0.8177632093429565, "eval_VitaminC_cosine_ap": 0.5552558686648947, "eval_VitaminC_cosine_f1": 0.6657789613848203, "eval_VitaminC_cosine_f1_threshold": 0.324923038482666, "eval_VitaminC_cosine_precision": 0.5, "eval_VitaminC_cosine_recall": 0.9960159362549801, "eval_VitaminC_dot_accuracy": 0.55859375, "eval_VitaminC_dot_accuracy_threshold": 289.75390625, "eval_VitaminC_dot_ap": 0.5382889477754735, "eval_VitaminC_dot_f1": 0.6693657219973009, "eval_VitaminC_dot_f1_threshold": 149.46510314941406, "eval_VitaminC_dot_precision": 0.5061224489795918, "eval_VitaminC_dot_recall": 0.9880478087649402, "eval_VitaminC_euclidean_accuracy": 0.5625, "eval_VitaminC_euclidean_accuracy_threshold": 15.008248329162598, "eval_VitaminC_euclidean_ap": 0.555529307254583, "eval_VitaminC_euclidean_f1": 0.6666666666666666, "eval_VitaminC_euclidean_f1_threshold": 19.720703125, "eval_VitaminC_euclidean_precision": 0.5140388768898488, "eval_VitaminC_euclidean_recall": 0.9482071713147411, "eval_VitaminC_manhattan_accuracy": 0.556640625, "eval_VitaminC_manhattan_accuracy_threshold": 241.89620971679688, "eval_VitaminC_manhattan_ap": 0.5565558085377883, "eval_VitaminC_manhattan_f1": 0.6649006622516557, "eval_VitaminC_manhattan_f1_threshold": 509.21246337890625, "eval_VitaminC_manhattan_precision": 0.498015873015873, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.5625, "eval_VitaminC_max_accuracy_threshold": 289.75390625, "eval_VitaminC_max_ap": 0.5565558085377883, "eval_VitaminC_max_f1": 0.6693657219973009, "eval_VitaminC_max_f1_threshold": 509.21246337890625, "eval_VitaminC_max_precision": 0.5140388768898488, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5565558085377883, "eval_sts-test_pearson_cosine": 0.8842482218274867, "eval_sts-test_pearson_dot": 0.8750877929290466, "eval_sts-test_pearson_euclidean": 0.9061417543180812, "eval_sts-test_pearson_manhattan": 0.907116350231697, "eval_sts-test_pearson_max": 0.907116350231697, "eval_sts-test_spearman_cosine": 0.9058163882415199, "eval_sts-test_spearman_dot": 0.8768090478735754, "eval_sts-test_spearman_euclidean": 0.9019257380807598, "eval_sts-test_spearman_manhattan": 0.9032297003351825, "eval_sts-test_spearman_max": 0.9058163882415199, "eval_vitaminc-pairs_loss": 1.419171690940857, "eval_vitaminc-pairs_runtime": 1.9053, "eval_vitaminc-pairs_samples_per_second": 56.684, "eval_vitaminc-pairs_steps_per_second": 1.05, "step": 630 }, { "epoch": 1.4157303370786516, "eval_negation-triplets_loss": 0.93497234582901, "eval_negation-triplets_runtime": 0.3078, "eval_negation-triplets_samples_per_second": 207.955, "eval_negation-triplets_steps_per_second": 3.249, "step": 630 }, { "epoch": 1.4157303370786516, "eval_scitail-pairs-pos_loss": 0.10255613923072815, "eval_scitail-pairs-pos_runtime": 0.3984, "eval_scitail-pairs-pos_samples_per_second": 135.532, "eval_scitail-pairs-pos_steps_per_second": 2.51, "step": 630 }, { "epoch": 1.4157303370786516, "eval_scitail-pairs-qa_loss": 0.0005886165308766067, "eval_scitail-pairs-qa_runtime": 0.5368, "eval_scitail-pairs-qa_samples_per_second": 238.448, "eval_scitail-pairs-qa_steps_per_second": 3.726, "step": 630 }, { "epoch": 1.4157303370786516, "eval_xsum-pairs_loss": 0.02434428222477436, "eval_xsum-pairs_runtime": 2.7476, "eval_xsum-pairs_samples_per_second": 46.586, "eval_xsum-pairs_steps_per_second": 0.728, "step": 630 }, { "epoch": 1.4157303370786516, "eval_sciq_pairs_loss": 0.01709047146141529, "eval_sciq_pairs_runtime": 2.8888, "eval_sciq_pairs_samples_per_second": 44.31, "eval_sciq_pairs_steps_per_second": 0.692, "step": 630 }, { "epoch": 1.4157303370786516, "eval_qasc_pairs_loss": 0.08857370167970657, "eval_qasc_pairs_runtime": 0.662, "eval_qasc_pairs_samples_per_second": 193.367, "eval_qasc_pairs_steps_per_second": 3.021, "step": 630 }, { "epoch": 1.4157303370786516, "eval_openbookqa_pairs_loss": 0.7429466843605042, "eval_openbookqa_pairs_runtime": 0.5899, "eval_openbookqa_pairs_samples_per_second": 216.996, "eval_openbookqa_pairs_steps_per_second": 3.391, "step": 630 }, { "epoch": 1.4157303370786516, "eval_msmarco_pairs_loss": 0.14679844677448273, "eval_msmarco_pairs_runtime": 1.4975, "eval_msmarco_pairs_samples_per_second": 85.478, "eval_msmarco_pairs_steps_per_second": 1.336, "step": 630 }, { "epoch": 1.4157303370786516, "eval_nq_pairs_loss": 0.08464788645505905, "eval_nq_pairs_runtime": 2.3677, "eval_nq_pairs_samples_per_second": 54.06, "eval_nq_pairs_steps_per_second": 0.845, "step": 630 }, { "epoch": 1.4157303370786516, "eval_trivia_pairs_loss": 0.49148350954055786, "eval_trivia_pairs_runtime": 3.5901, "eval_trivia_pairs_samples_per_second": 35.654, "eval_trivia_pairs_steps_per_second": 0.557, "step": 630 }, { "epoch": 1.4157303370786516, "eval_gooaq_pairs_loss": 0.2605786621570587, "eval_gooaq_pairs_runtime": 0.9378, "eval_gooaq_pairs_samples_per_second": 136.484, "eval_gooaq_pairs_steps_per_second": 2.133, "step": 630 }, { "epoch": 1.4157303370786516, "eval_paws-pos_loss": 0.02402164228260517, "eval_paws-pos_runtime": 0.707, "eval_paws-pos_samples_per_second": 181.047, "eval_paws-pos_steps_per_second": 2.829, "step": 630 }, { "epoch": 1.4179775280898876, "grad_norm": 4.603442192077637, "learning_rate": 1.5189298521763352e-05, "loss": 0.9232, "step": 631 }, { "epoch": 1.4202247191011237, "grad_norm": 3.214153289794922, "learning_rate": 1.5133154382444443e-05, "loss": 0.613, "step": 632 }, { "epoch": 1.4224719101123595, "grad_norm": 3.1973836421966553, "learning_rate": 1.5077124463535252e-05, "loss": 0.5689, "step": 633 }, { "epoch": 1.4247191011235956, "grad_norm": 2.985643148422241, "learning_rate": 1.5021209855870732e-05, "loss": 0.4126, "step": 634 }, { "epoch": 1.4269662921348314, "grad_norm": 2.1112682819366455, "learning_rate": 1.4965411648040885e-05, "loss": 0.2148, "step": 635 }, { "epoch": 1.4292134831460674, "grad_norm": 2.8638250827789307, "learning_rate": 1.4909730926369527e-05, "loss": 0.7029, "step": 636 }, { "epoch": 1.4314606741573033, "grad_norm": 2.3235864639282227, "learning_rate": 1.4854168774893162e-05, "loss": 0.3989, "step": 637 }, { "epoch": 1.4337078651685393, "grad_norm": 3.925884962081909, "learning_rate": 1.4798726275339885e-05, "loss": 0.6291, "step": 638 }, { "epoch": 1.4359550561797754, "grad_norm": 1.7517139911651611, "learning_rate": 1.4743404507108308e-05, "loss": 0.158, "step": 639 }, { "epoch": 1.4382022471910112, "grad_norm": 3.1826531887054443, "learning_rate": 1.4688204547246553e-05, "loss": 0.4833, "step": 640 }, { "epoch": 1.4404494382022472, "grad_norm": 0.4674391448497772, "learning_rate": 1.4633127470431268e-05, "loss": 0.0561, "step": 641 }, { "epoch": 1.442696629213483, "grad_norm": 3.3981146812438965, "learning_rate": 1.4578174348946728e-05, "loss": 0.6613, "step": 642 }, { "epoch": 1.4449438202247191, "grad_norm": 1.869647741317749, "learning_rate": 1.4523346252663953e-05, "loss": 0.1917, "step": 643 }, { "epoch": 1.447191011235955, "grad_norm": 3.705204725265503, "learning_rate": 1.4468644249019847e-05, "loss": 0.5755, "step": 644 }, { "epoch": 1.449438202247191, "grad_norm": 3.3335964679718018, "learning_rate": 1.4414069402996472e-05, "loss": 0.5609, "step": 645 }, { "epoch": 1.451685393258427, "grad_norm": 3.096312999725342, "learning_rate": 1.4359622777100265e-05, "loss": 0.5407, "step": 646 }, { "epoch": 1.4539325842696629, "grad_norm": 0.3230539858341217, "learning_rate": 1.430530543134139e-05, "loss": 0.0455, "step": 647 }, { "epoch": 1.456179775280899, "grad_norm": 3.2401561737060547, "learning_rate": 1.425111842321305e-05, "loss": 0.6599, "step": 648 }, { "epoch": 1.4584269662921348, "grad_norm": 3.941396474838257, "learning_rate": 1.4197062807670971e-05, "loss": 0.6952, "step": 649 }, { "epoch": 1.4606741573033708, "grad_norm": 0.5517943501472473, "learning_rate": 1.4143139637112801e-05, "loss": 0.0329, "step": 650 }, { "epoch": 1.4629213483146066, "grad_norm": 3.1321299076080322, "learning_rate": 1.4089349961357648e-05, "loss": 0.6939, "step": 651 }, { "epoch": 1.4651685393258427, "grad_norm": 3.5495405197143555, "learning_rate": 1.403569482762563e-05, "loss": 0.4664, "step": 652 }, { "epoch": 1.4674157303370787, "grad_norm": 3.9030630588531494, "learning_rate": 1.3982175280517512e-05, "loss": 0.6686, "step": 653 }, { "epoch": 1.4696629213483146, "grad_norm": 3.5179669857025146, "learning_rate": 1.3928792361994335e-05, "loss": 0.6167, "step": 654 }, { "epoch": 1.4719101123595506, "grad_norm": 3.5082061290740967, "learning_rate": 1.3875547111357156e-05, "loss": 0.6612, "step": 655 }, { "epoch": 1.4741573033707867, "grad_norm": 4.214035511016846, "learning_rate": 1.382244056522679e-05, "loss": 0.8139, "step": 656 }, { "epoch": 1.4764044943820225, "grad_norm": 3.5473194122314453, "learning_rate": 1.3769473757523668e-05, "loss": 0.6813, "step": 657 }, { "epoch": 1.4786516853932583, "grad_norm": 3.59326171875, "learning_rate": 1.3716647719447648e-05, "loss": 0.6031, "step": 658 }, { "epoch": 1.4808988764044944, "grad_norm": 2.1092872619628906, "learning_rate": 1.3663963479458006e-05, "loss": 0.1783, "step": 659 }, { "epoch": 1.4831460674157304, "grad_norm": 2.6726906299591064, "learning_rate": 1.3611422063253356e-05, "loss": 0.6536, "step": 660 }, { "epoch": 1.4853932584269662, "grad_norm": 0.31012779474258423, "learning_rate": 1.3559024493751731e-05, "loss": 0.0318, "step": 661 }, { "epoch": 1.4876404494382023, "grad_norm": 2.775973320007324, "learning_rate": 1.3506771791070609e-05, "loss": 0.6372, "step": 662 }, { "epoch": 1.4898876404494383, "grad_norm": 3.1341476440429688, "learning_rate": 1.3454664972507112e-05, "loss": 0.5695, "step": 663 }, { "epoch": 1.4921348314606742, "grad_norm": 5.889291763305664, "learning_rate": 1.3402705052518162e-05, "loss": 2.3259, "step": 664 }, { "epoch": 1.49438202247191, "grad_norm": 2.9970600605010986, "learning_rate": 1.3350893042700749e-05, "loss": 0.4342, "step": 665 }, { "epoch": 1.496629213483146, "grad_norm": 2.2179033756256104, "learning_rate": 1.329922995177222e-05, "loss": 0.2176, "step": 666 }, { "epoch": 1.498876404494382, "grad_norm": 3.6805996894836426, "learning_rate": 1.324771678555067e-05, "loss": 0.5419, "step": 667 }, { "epoch": 1.501123595505618, "grad_norm": 3.182274103164673, "learning_rate": 1.3196354546935319e-05, "loss": 0.4976, "step": 668 }, { "epoch": 1.503370786516854, "grad_norm": 1.9249639511108398, "learning_rate": 1.3145144235887022e-05, "loss": 0.1964, "step": 669 }, { "epoch": 1.50561797752809, "grad_norm": 0.4039813280105591, "learning_rate": 1.3094086849408782e-05, "loss": 0.0311, "step": 670 }, { "epoch": 1.5078651685393258, "grad_norm": 1.8193391561508179, "learning_rate": 1.3043183381526351e-05, "loss": 0.1832, "step": 671 }, { "epoch": 1.5101123595505617, "grad_norm": 0.25979480147361755, "learning_rate": 1.2992434823268868e-05, "loss": 0.0345, "step": 672 }, { "epoch": 1.5123595505617977, "grad_norm": 3.0886144638061523, "learning_rate": 1.2941842162649562e-05, "loss": 0.5376, "step": 673 }, { "epoch": 1.5146067415730338, "grad_norm": 3.398355484008789, "learning_rate": 1.289140638464653e-05, "loss": 0.6316, "step": 674 }, { "epoch": 1.5168539325842696, "grad_norm": 3.3084335327148438, "learning_rate": 1.284112847118356e-05, "loss": 0.5025, "step": 675 }, { "epoch": 1.5168539325842696, "eval_VitaminC_cosine_accuracy": 0.556640625, "eval_VitaminC_cosine_accuracy_threshold": 0.6423732042312622, "eval_VitaminC_cosine_ap": 0.5559419972419435, "eval_VitaminC_cosine_f1": 0.6693548387096774, "eval_VitaminC_cosine_f1_threshold": 0.3631063997745514, "eval_VitaminC_cosine_precision": 0.5050709939148073, "eval_VitaminC_cosine_recall": 0.9920318725099602, "eval_VitaminC_dot_accuracy": 0.552734375, "eval_VitaminC_dot_accuracy_threshold": 314.47454833984375, "eval_VitaminC_dot_ap": 0.5368600163832479, "eval_VitaminC_dot_f1": 0.6720867208672087, "eval_VitaminC_dot_f1_threshold": 146.22268676757812, "eval_VitaminC_dot_precision": 0.5092402464065708, "eval_VitaminC_dot_recall": 0.9880478087649402, "eval_VitaminC_euclidean_accuracy": 0.560546875, "eval_VitaminC_euclidean_accuracy_threshold": 15.797218322753906, "eval_VitaminC_euclidean_ap": 0.5588590475965204, "eval_VitaminC_euclidean_f1": 0.6666666666666667, "eval_VitaminC_euclidean_f1_threshold": 22.122737884521484, "eval_VitaminC_euclidean_precision": 0.5040816326530613, "eval_VitaminC_euclidean_recall": 0.9840637450199203, "eval_VitaminC_manhattan_accuracy": 0.552734375, "eval_VitaminC_manhattan_accuracy_threshold": 290.7606201171875, "eval_VitaminC_manhattan_ap": 0.5586013714034852, "eval_VitaminC_manhattan_f1": 0.6657381615598886, "eval_VitaminC_manhattan_f1_threshold": 422.6849365234375, "eval_VitaminC_manhattan_precision": 0.5117773019271948, "eval_VitaminC_manhattan_recall": 0.952191235059761, "eval_VitaminC_max_accuracy": 0.560546875, "eval_VitaminC_max_accuracy_threshold": 314.47454833984375, "eval_VitaminC_max_ap": 0.5588590475965204, "eval_VitaminC_max_f1": 0.6720867208672087, "eval_VitaminC_max_f1_threshold": 422.6849365234375, "eval_VitaminC_max_precision": 0.5117773019271948, "eval_VitaminC_max_recall": 0.9920318725099602, "eval_sequential_score": 0.5588590475965204, "eval_sts-test_pearson_cosine": 0.8865581717304933, "eval_sts-test_pearson_dot": 0.8767512116280293, "eval_sts-test_pearson_euclidean": 0.9075796979057666, "eval_sts-test_pearson_manhattan": 0.9085979238979228, "eval_sts-test_pearson_max": 0.9085979238979228, "eval_sts-test_spearman_cosine": 0.906657109942627, "eval_sts-test_spearman_dot": 0.8758823835039438, "eval_sts-test_spearman_euclidean": 0.9031548314748749, "eval_sts-test_spearman_manhattan": 0.9039239266531472, "eval_sts-test_spearman_max": 0.906657109942627, "eval_vitaminc-pairs_loss": 1.3484903573989868, "eval_vitaminc-pairs_runtime": 1.897, "eval_vitaminc-pairs_samples_per_second": 56.932, "eval_vitaminc-pairs_steps_per_second": 1.054, "step": 675 }, { "epoch": 1.5168539325842696, "eval_negation-triplets_loss": 0.907042920589447, "eval_negation-triplets_runtime": 0.3044, "eval_negation-triplets_samples_per_second": 210.234, "eval_negation-triplets_steps_per_second": 3.285, "step": 675 }, { "epoch": 1.5168539325842696, "eval_scitail-pairs-pos_loss": 0.0944281667470932, "eval_scitail-pairs-pos_runtime": 0.4051, "eval_scitail-pairs-pos_samples_per_second": 133.286, "eval_scitail-pairs-pos_steps_per_second": 2.468, "step": 675 }, { "epoch": 1.5168539325842696, "eval_scitail-pairs-qa_loss": 0.0006068490329198539, "eval_scitail-pairs-qa_runtime": 0.5379, "eval_scitail-pairs-qa_samples_per_second": 237.942, "eval_scitail-pairs-qa_steps_per_second": 3.718, "step": 675 }, { "epoch": 1.5168539325842696, "eval_xsum-pairs_loss": 0.02087360806763172, "eval_xsum-pairs_runtime": 2.7473, "eval_xsum-pairs_samples_per_second": 46.592, "eval_xsum-pairs_steps_per_second": 0.728, "step": 675 }, { "epoch": 1.5168539325842696, "eval_sciq_pairs_loss": 0.016422858461737633, "eval_sciq_pairs_runtime": 2.9202, "eval_sciq_pairs_samples_per_second": 43.832, "eval_sciq_pairs_steps_per_second": 0.685, "step": 675 }, { "epoch": 1.5168539325842696, "eval_qasc_pairs_loss": 0.0907311737537384, "eval_qasc_pairs_runtime": 0.663, "eval_qasc_pairs_samples_per_second": 193.059, "eval_qasc_pairs_steps_per_second": 3.017, "step": 675 }, { "epoch": 1.5168539325842696, "eval_openbookqa_pairs_loss": 0.74045729637146, "eval_openbookqa_pairs_runtime": 0.5939, "eval_openbookqa_pairs_samples_per_second": 215.54, "eval_openbookqa_pairs_steps_per_second": 3.368, "step": 675 }, { "epoch": 1.5168539325842696, "eval_msmarco_pairs_loss": 0.14498473703861237, "eval_msmarco_pairs_runtime": 1.4994, "eval_msmarco_pairs_samples_per_second": 85.365, "eval_msmarco_pairs_steps_per_second": 1.334, "step": 675 }, { "epoch": 1.5168539325842696, "eval_nq_pairs_loss": 0.08948007225990295, "eval_nq_pairs_runtime": 2.3655, "eval_nq_pairs_samples_per_second": 54.11, "eval_nq_pairs_steps_per_second": 0.845, "step": 675 }, { "epoch": 1.5168539325842696, "eval_trivia_pairs_loss": 0.5190236568450928, "eval_trivia_pairs_runtime": 3.6004, "eval_trivia_pairs_samples_per_second": 35.552, "eval_trivia_pairs_steps_per_second": 0.555, "step": 675 }, { "epoch": 1.5168539325842696, "eval_gooaq_pairs_loss": 0.253094881772995, "eval_gooaq_pairs_runtime": 0.9388, "eval_gooaq_pairs_samples_per_second": 136.338, "eval_gooaq_pairs_steps_per_second": 2.13, "step": 675 }, { "epoch": 1.5168539325842696, "eval_paws-pos_loss": 0.023870982229709625, "eval_paws-pos_runtime": 0.707, "eval_paws-pos_samples_per_second": 181.056, "eval_paws-pos_steps_per_second": 2.829, "step": 675 }, { "epoch": 1.5191011235955056, "grad_norm": 2.906019449234009, "learning_rate": 1.2791009401110988e-05, "loss": 0.509, "step": 676 }, { "epoch": 1.5213483146067417, "grad_norm": 3.241637706756592, "learning_rate": 1.2741050150186684e-05, "loss": 0.6078, "step": 677 }, { "epoch": 1.5235955056179775, "grad_norm": 2.7639479637145996, "learning_rate": 1.2691251691057008e-05, "loss": 0.3961, "step": 678 }, { "epoch": 1.5258426966292133, "grad_norm": 3.347423553466797, "learning_rate": 1.264161499323793e-05, "loss": 0.5699, "step": 679 }, { "epoch": 1.5280898876404494, "grad_norm": 3.417011022567749, "learning_rate": 1.2592141023096081e-05, "loss": 0.6305, "step": 680 }, { "epoch": 1.5303370786516854, "grad_norm": 3.702047348022461, "learning_rate": 1.2542830743830018e-05, "loss": 0.5886, "step": 681 }, { "epoch": 1.5325842696629213, "grad_norm": 2.654712438583374, "learning_rate": 1.2493685115451417e-05, "loss": 0.6432, "step": 682 }, { "epoch": 1.5348314606741573, "grad_norm": 3.164024829864502, "learning_rate": 1.2444705094766392e-05, "loss": 0.614, "step": 683 }, { "epoch": 1.5370786516853934, "grad_norm": 0.31392890214920044, "learning_rate": 1.2395891635356883e-05, "loss": 0.0432, "step": 684 }, { "epoch": 1.5393258426966292, "grad_norm": 2.7912144660949707, "learning_rate": 1.2347245687562097e-05, "loss": 0.633, "step": 685 }, { "epoch": 1.541573033707865, "grad_norm": 3.678194999694824, "learning_rate": 1.229876819845997e-05, "loss": 0.6228, "step": 686 }, { "epoch": 1.543820224719101, "grad_norm": 2.2250537872314453, "learning_rate": 1.2250460111848757e-05, "loss": 0.2105, "step": 687 }, { "epoch": 1.5460674157303371, "grad_norm": 3.0338246822357178, "learning_rate": 1.2202322368228655e-05, "loss": 0.5429, "step": 688 }, { "epoch": 1.548314606741573, "grad_norm": 3.2483761310577393, "learning_rate": 1.2154355904783493e-05, "loss": 0.5361, "step": 689 }, { "epoch": 1.550561797752809, "grad_norm": 3.114161491394043, "learning_rate": 1.2106561655362458e-05, "loss": 0.5567, "step": 690 }, { "epoch": 1.552808988764045, "grad_norm": 3.2030935287475586, "learning_rate": 1.2058940550461972e-05, "loss": 0.6131, "step": 691 }, { "epoch": 1.5550561797752809, "grad_norm": 3.438405990600586, "learning_rate": 1.201149351720751e-05, "loss": 0.5111, "step": 692 }, { "epoch": 1.5573033707865167, "grad_norm": 3.1341006755828857, "learning_rate": 1.1964221479335612e-05, "loss": 0.6216, "step": 693 }, { "epoch": 1.5595505617977528, "grad_norm": 4.309159278869629, "learning_rate": 1.1917125357175833e-05, "loss": 0.7615, "step": 694 }, { "epoch": 1.5617977528089888, "grad_norm": 3.412702798843384, "learning_rate": 1.1870206067632897e-05, "loss": 0.51, "step": 695 }, { "epoch": 1.5640449438202246, "grad_norm": 2.8780102729797363, "learning_rate": 1.1823464524168793e-05, "loss": 0.6989, "step": 696 }, { "epoch": 1.5662921348314607, "grad_norm": 3.7508151531219482, "learning_rate": 1.1776901636785002e-05, "loss": 0.8145, "step": 697 }, { "epoch": 1.5685393258426967, "grad_norm": 3.1193883419036865, "learning_rate": 1.1730518312004793e-05, "loss": 0.5928, "step": 698 }, { "epoch": 1.5707865168539326, "grad_norm": 3.2519209384918213, "learning_rate": 1.1684315452855578e-05, "loss": 0.6046, "step": 699 }, { "epoch": 1.5730337078651684, "grad_norm": 2.7838470935821533, "learning_rate": 1.163829395885131e-05, "loss": 0.6483, "step": 700 }, { "epoch": 1.5752808988764047, "grad_norm": 2.958461046218872, "learning_rate": 1.159245472597498e-05, "loss": 0.3976, "step": 701 }, { "epoch": 1.5775280898876405, "grad_norm": 2.3022193908691406, "learning_rate": 1.1546798646661172e-05, "loss": 0.2033, "step": 702 }, { "epoch": 1.5797752808988763, "grad_norm": 2.9034605026245117, "learning_rate": 1.1501326609778704e-05, "loss": 0.4127, "step": 703 }, { "epoch": 1.5820224719101124, "grad_norm": 3.1683311462402344, "learning_rate": 1.1456039500613304e-05, "loss": 0.6008, "step": 704 }, { "epoch": 1.5842696629213484, "grad_norm": 3.1939926147460938, "learning_rate": 1.1410938200850377e-05, "loss": 0.5346, "step": 705 }, { "epoch": 1.5865168539325842, "grad_norm": 2.758613348007202, "learning_rate": 1.1366023588557834e-05, "loss": 0.4183, "step": 706 }, { "epoch": 1.58876404494382, "grad_norm": 0.3631807267665863, "learning_rate": 1.132129653816903e-05, "loss": 0.0245, "step": 707 }, { "epoch": 1.5910112359550563, "grad_norm": 2.9882872104644775, "learning_rate": 1.1276757920465702e-05, "loss": 0.4834, "step": 708 }, { "epoch": 1.5932584269662922, "grad_norm": 3.5191538333892822, "learning_rate": 1.1232408602561024e-05, "loss": 0.5815, "step": 709 }, { "epoch": 1.595505617977528, "grad_norm": 3.9756875038146973, "learning_rate": 1.1188249447882736e-05, "loss": 0.7791, "step": 710 }, { "epoch": 1.597752808988764, "grad_norm": 3.263510227203369, "learning_rate": 1.114428131615634e-05, "loss": 0.4835, "step": 711 }, { "epoch": 1.6, "grad_norm": 3.1563074588775635, "learning_rate": 1.1100505063388333e-05, "loss": 0.5797, "step": 712 }, { "epoch": 1.602247191011236, "grad_norm": 2.0562615394592285, "learning_rate": 1.1056921541849578e-05, "loss": 0.1891, "step": 713 }, { "epoch": 1.604494382022472, "grad_norm": 2.464853286743164, "learning_rate": 1.1013531600058679e-05, "loss": 0.3955, "step": 714 }, { "epoch": 1.606741573033708, "grad_norm": 3.1884946823120117, "learning_rate": 1.0970336082765479e-05, "loss": 0.497, "step": 715 }, { "epoch": 1.6089887640449438, "grad_norm": 2.6583571434020996, "learning_rate": 1.092733583093462e-05, "loss": 0.6271, "step": 716 }, { "epoch": 1.6112359550561797, "grad_norm": 3.7117207050323486, "learning_rate": 1.0884531681729152e-05, "loss": 0.5571, "step": 717 }, { "epoch": 1.6134831460674157, "grad_norm": 0.321676641702652, "learning_rate": 1.0841924468494243e-05, "loss": 0.0405, "step": 718 }, { "epoch": 1.6157303370786518, "grad_norm": 2.33481764793396, "learning_rate": 1.0799515020740955e-05, "loss": 0.2968, "step": 719 }, { "epoch": 1.6179775280898876, "grad_norm": 2.9772684574127197, "learning_rate": 1.0757304164130105e-05, "loss": 0.7262, "step": 720 }, { "epoch": 1.6179775280898876, "eval_VitaminC_cosine_accuracy": 0.560546875, "eval_VitaminC_cosine_accuracy_threshold": 0.8018198609352112, "eval_VitaminC_cosine_ap": 0.5551830056983754, "eval_VitaminC_cosine_f1": 0.6657534246575343, "eval_VitaminC_cosine_f1_threshold": 0.4161919951438904, "eval_VitaminC_cosine_precision": 0.5073068893528184, "eval_VitaminC_cosine_recall": 0.9681274900398407, "eval_VitaminC_dot_accuracy": 0.5546875, "eval_VitaminC_dot_accuracy_threshold": 320.4762268066406, "eval_VitaminC_dot_ap": 0.5305147029680797, "eval_VitaminC_dot_f1": 0.6693766937669378, "eval_VitaminC_dot_f1_threshold": 142.318115234375, "eval_VitaminC_dot_precision": 0.5071868583162218, "eval_VitaminC_dot_recall": 0.9840637450199203, "eval_VitaminC_euclidean_accuracy": 0.556640625, "eval_VitaminC_euclidean_accuracy_threshold": 15.623578071594238, "eval_VitaminC_euclidean_ap": 0.5552396175565026, "eval_VitaminC_euclidean_f1": 0.6649006622516557, "eval_VitaminC_euclidean_f1_threshold": 24.57459259033203, "eval_VitaminC_euclidean_precision": 0.498015873015873, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.5546875, "eval_VitaminC_manhattan_accuracy_threshold": 233.19009399414062, "eval_VitaminC_manhattan_ap": 0.5548444881216961, "eval_VitaminC_manhattan_f1": 0.6649006622516557, "eval_VitaminC_manhattan_f1_threshold": 521.407470703125, "eval_VitaminC_manhattan_precision": 0.498015873015873, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.560546875, "eval_VitaminC_max_accuracy_threshold": 320.4762268066406, "eval_VitaminC_max_ap": 0.5552396175565026, "eval_VitaminC_max_f1": 0.6693766937669378, "eval_VitaminC_max_f1_threshold": 521.407470703125, "eval_VitaminC_max_precision": 0.5073068893528184, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5552396175565026, "eval_sts-test_pearson_cosine": 0.887808686322094, "eval_sts-test_pearson_dot": 0.8803917434966819, "eval_sts-test_pearson_euclidean": 0.9086887931212514, "eval_sts-test_pearson_manhattan": 0.9097294718375346, "eval_sts-test_pearson_max": 0.9097294718375346, "eval_sts-test_spearman_cosine": 0.9090755128594795, "eval_sts-test_spearman_dot": 0.8816835342693723, "eval_sts-test_spearman_euclidean": 0.9045753260139605, "eval_sts-test_spearman_manhattan": 0.9058602689822915, "eval_sts-test_spearman_max": 0.9090755128594795, "eval_vitaminc-pairs_loss": 1.3776708841323853, "eval_vitaminc-pairs_runtime": 1.9, "eval_vitaminc-pairs_samples_per_second": 56.841, "eval_vitaminc-pairs_steps_per_second": 1.053, "step": 720 }, { "epoch": 1.6179775280898876, "eval_negation-triplets_loss": 0.9084223508834839, "eval_negation-triplets_runtime": 0.3036, "eval_negation-triplets_samples_per_second": 210.795, "eval_negation-triplets_steps_per_second": 3.294, "step": 720 }, { "epoch": 1.6179775280898876, "eval_scitail-pairs-pos_loss": 0.1010468602180481, "eval_scitail-pairs-pos_runtime": 0.4089, "eval_scitail-pairs-pos_samples_per_second": 132.069, "eval_scitail-pairs-pos_steps_per_second": 2.446, "step": 720 }, { "epoch": 1.6179775280898876, "eval_scitail-pairs-qa_loss": 0.0006560595356859267, "eval_scitail-pairs-qa_runtime": 0.5374, "eval_scitail-pairs-qa_samples_per_second": 238.165, "eval_scitail-pairs-qa_steps_per_second": 3.721, "step": 720 }, { "epoch": 1.6179775280898876, "eval_xsum-pairs_loss": 0.01978565938770771, "eval_xsum-pairs_runtime": 2.753, "eval_xsum-pairs_samples_per_second": 46.495, "eval_xsum-pairs_steps_per_second": 0.726, "step": 720 }, { "epoch": 1.6179775280898876, "eval_sciq_pairs_loss": 0.016458621248602867, "eval_sciq_pairs_runtime": 2.8893, "eval_sciq_pairs_samples_per_second": 44.301, "eval_sciq_pairs_steps_per_second": 0.692, "step": 720 }, { "epoch": 1.6179775280898876, "eval_qasc_pairs_loss": 0.09319126605987549, "eval_qasc_pairs_runtime": 0.6588, "eval_qasc_pairs_samples_per_second": 194.3, "eval_qasc_pairs_steps_per_second": 3.036, "step": 720 }, { "epoch": 1.6179775280898876, "eval_openbookqa_pairs_loss": 0.7171750068664551, "eval_openbookqa_pairs_runtime": 0.5942, "eval_openbookqa_pairs_samples_per_second": 215.414, "eval_openbookqa_pairs_steps_per_second": 3.366, "step": 720 }, { "epoch": 1.6179775280898876, "eval_msmarco_pairs_loss": 0.14239048957824707, "eval_msmarco_pairs_runtime": 1.5034, "eval_msmarco_pairs_samples_per_second": 85.141, "eval_msmarco_pairs_steps_per_second": 1.33, "step": 720 }, { "epoch": 1.6179775280898876, "eval_nq_pairs_loss": 0.0960390493273735, "eval_nq_pairs_runtime": 2.3578, "eval_nq_pairs_samples_per_second": 54.288, "eval_nq_pairs_steps_per_second": 0.848, "step": 720 }, { "epoch": 1.6179775280898876, "eval_trivia_pairs_loss": 0.49868205189704895, "eval_trivia_pairs_runtime": 3.591, "eval_trivia_pairs_samples_per_second": 35.645, "eval_trivia_pairs_steps_per_second": 0.557, "step": 720 }, { "epoch": 1.6179775280898876, "eval_gooaq_pairs_loss": 0.27023670077323914, "eval_gooaq_pairs_runtime": 0.9169, "eval_gooaq_pairs_samples_per_second": 139.603, "eval_gooaq_pairs_steps_per_second": 2.181, "step": 720 }, { "epoch": 1.6179775280898876, "eval_paws-pos_loss": 0.024076081812381744, "eval_paws-pos_runtime": 0.7075, "eval_paws-pos_samples_per_second": 180.921, "eval_paws-pos_steps_per_second": 2.827, "step": 720 }, { "epoch": 1.6202247191011236, "grad_norm": 4.036292552947998, "learning_rate": 1.0715292720456164e-05, "loss": 0.7611, "step": 721 }, { "epoch": 1.6224719101123597, "grad_norm": 2.709695339202881, "learning_rate": 1.0673481507631287e-05, "loss": 0.3926, "step": 722 }, { "epoch": 1.6247191011235955, "grad_norm": 5.689388751983643, "learning_rate": 1.0631871339669366e-05, "loss": 2.3127, "step": 723 }, { "epoch": 1.6269662921348313, "grad_norm": 2.782240152359009, "learning_rate": 1.05904630266702e-05, "loss": 0.7026, "step": 724 }, { "epoch": 1.6292134831460674, "grad_norm": 0.44289153814315796, "learning_rate": 1.0549257374803709e-05, "loss": 0.0685, "step": 725 }, { "epoch": 1.6314606741573034, "grad_norm": 3.638007879257202, "learning_rate": 1.0508255186294249e-05, "loss": 0.6031, "step": 726 }, { "epoch": 1.6337078651685393, "grad_norm": 3.3411712646484375, "learning_rate": 1.0467457259404982e-05, "loss": 0.579, "step": 727 }, { "epoch": 1.6359550561797753, "grad_norm": 1.792189121246338, "learning_rate": 1.0426864388422359e-05, "loss": 0.1705, "step": 728 }, { "epoch": 1.6382022471910114, "grad_norm": 0.44782668352127075, "learning_rate": 1.0386477363640624e-05, "loss": 0.0591, "step": 729 }, { "epoch": 1.6404494382022472, "grad_norm": 5.771501064300537, "learning_rate": 1.0346296971346449e-05, "loss": 2.1115, "step": 730 }, { "epoch": 1.642696629213483, "grad_norm": 3.1382172107696533, "learning_rate": 1.030632399380362e-05, "loss": 0.4871, "step": 731 }, { "epoch": 1.644943820224719, "grad_norm": 2.9366180896759033, "learning_rate": 1.0266559209237823e-05, "loss": 0.4263, "step": 732 }, { "epoch": 1.6471910112359551, "grad_norm": 0.3291671574115753, "learning_rate": 1.022700339182144e-05, "loss": 0.0484, "step": 733 }, { "epoch": 1.649438202247191, "grad_norm": 3.614391565322876, "learning_rate": 1.0187657311658554e-05, "loss": 0.5249, "step": 734 }, { "epoch": 1.651685393258427, "grad_norm": 2.7633137702941895, "learning_rate": 1.0148521734769896e-05, "loss": 0.3998, "step": 735 }, { "epoch": 1.653932584269663, "grad_norm": 3.61558198928833, "learning_rate": 1.0109597423077964e-05, "loss": 0.7226, "step": 736 }, { "epoch": 1.6561797752808989, "grad_norm": 3.115598440170288, "learning_rate": 1.0070885134392154e-05, "loss": 0.4494, "step": 737 }, { "epoch": 1.6584269662921347, "grad_norm": 0.40766555070877075, "learning_rate": 1.0032385622394063e-05, "loss": 0.0537, "step": 738 }, { "epoch": 1.6606741573033708, "grad_norm": 3.0065345764160156, "learning_rate": 9.994099636622759e-06, "loss": 0.7129, "step": 739 }, { "epoch": 1.6629213483146068, "grad_norm": 3.3741817474365234, "learning_rate": 9.956027922460216e-06, "loss": 0.6079, "step": 740 }, { "epoch": 1.6651685393258426, "grad_norm": 3.619709014892578, "learning_rate": 9.918171221116802e-06, "loss": 0.6688, "step": 741 }, { "epoch": 1.6674157303370787, "grad_norm": 2.7903146743774414, "learning_rate": 9.880530269616847e-06, "loss": 0.567, "step": 742 }, { "epoch": 1.6696629213483147, "grad_norm": 3.1688435077667236, "learning_rate": 9.843105800784284e-06, "loss": 0.5196, "step": 743 }, { "epoch": 1.6719101123595506, "grad_norm": 3.0614845752716064, "learning_rate": 9.805898543228392e-06, "loss": 0.5081, "step": 744 }, { "epoch": 1.6741573033707864, "grad_norm": 3.2361667156219482, "learning_rate": 9.768909221329611e-06, "loss": 0.5413, "step": 745 }, { "epoch": 1.6764044943820224, "grad_norm": 3.0333893299102783, "learning_rate": 9.732138555225442e-06, "loss": 0.4741, "step": 746 }, { "epoch": 1.6786516853932585, "grad_norm": 0.5086472630500793, "learning_rate": 9.6955872607964e-06, "loss": 0.0289, "step": 747 }, { "epoch": 1.6808988764044943, "grad_norm": 2.052063465118408, "learning_rate": 9.65925604965213e-06, "loss": 0.1956, "step": 748 }, { "epoch": 1.6831460674157304, "grad_norm": 2.2275161743164062, "learning_rate": 9.623145629117488e-06, "loss": 0.1967, "step": 749 }, { "epoch": 1.6853932584269664, "grad_norm": 3.2939271926879883, "learning_rate": 9.587256702218833e-06, "loss": 0.6488, "step": 750 }, { "epoch": 1.6876404494382022, "grad_norm": 2.664923906326294, "learning_rate": 9.551589967670282e-06, "loss": 0.7052, "step": 751 }, { "epoch": 1.689887640449438, "grad_norm": 1.887410283088684, "learning_rate": 9.516146119860157e-06, "loss": 0.1807, "step": 752 }, { "epoch": 1.6921348314606741, "grad_norm": 3.4962358474731445, "learning_rate": 9.480925848837433e-06, "loss": 0.6238, "step": 753 }, { "epoch": 1.6943820224719102, "grad_norm": 3.2788219451904297, "learning_rate": 9.445929840298317e-06, "loss": 0.6328, "step": 754 }, { "epoch": 1.696629213483146, "grad_norm": 3.1477396488189697, "learning_rate": 9.411158775572893e-06, "loss": 0.4677, "step": 755 }, { "epoch": 1.698876404494382, "grad_norm": 2.82737135887146, "learning_rate": 9.376613331611867e-06, "loss": 0.44, "step": 756 }, { "epoch": 1.701123595505618, "grad_norm": 3.121051549911499, "learning_rate": 9.342294180973379e-06, "loss": 0.5382, "step": 757 }, { "epoch": 1.703370786516854, "grad_norm": 3.4081382751464844, "learning_rate": 9.308201991809902e-06, "loss": 0.6094, "step": 758 }, { "epoch": 1.7056179775280897, "grad_norm": 0.4050018787384033, "learning_rate": 9.274337427855252e-06, "loss": 0.0262, "step": 759 }, { "epoch": 1.7078651685393258, "grad_norm": 2.0232627391815186, "learning_rate": 9.240701148411655e-06, "loss": 0.1995, "step": 760 }, { "epoch": 1.7101123595505618, "grad_norm": 2.8510541915893555, "learning_rate": 9.207293808336911e-06, "loss": 0.6595, "step": 761 }, { "epoch": 1.7123595505617977, "grad_norm": 2.99471116065979, "learning_rate": 9.174116058031651e-06, "loss": 0.4056, "step": 762 }, { "epoch": 1.7146067415730337, "grad_norm": 3.1772732734680176, "learning_rate": 9.14116854342666e-06, "loss": 0.4836, "step": 763 }, { "epoch": 1.7168539325842698, "grad_norm": 3.142526626586914, "learning_rate": 9.108451905970327e-06, "loss": 0.5474, "step": 764 }, { "epoch": 1.7191011235955056, "grad_norm": 2.5622060298919678, "learning_rate": 9.07596678261612e-06, "loss": 0.6019, "step": 765 }, { "epoch": 1.7191011235955056, "eval_VitaminC_cosine_accuracy": 0.5546875, "eval_VitaminC_cosine_accuracy_threshold": 0.8168195486068726, "eval_VitaminC_cosine_ap": 0.5542265425878483, "eval_VitaminC_cosine_f1": 0.6666666666666667, "eval_VitaminC_cosine_f1_threshold": 0.4378119707107544, "eval_VitaminC_cosine_precision": 0.5083682008368201, "eval_VitaminC_cosine_recall": 0.9681274900398407, "eval_VitaminC_dot_accuracy": 0.548828125, "eval_VitaminC_dot_accuracy_threshold": 316.8749694824219, "eval_VitaminC_dot_ap": 0.5313491176773233, "eval_VitaminC_dot_f1": 0.6693657219973009, "eval_VitaminC_dot_f1_threshold": 142.36911010742188, "eval_VitaminC_dot_precision": 0.5061224489795918, "eval_VitaminC_dot_recall": 0.9880478087649402, "eval_VitaminC_euclidean_accuracy": 0.556640625, "eval_VitaminC_euclidean_accuracy_threshold": 14.925283432006836, "eval_VitaminC_euclidean_ap": 0.5567864372707304, "eval_VitaminC_euclidean_f1": 0.6657824933687002, "eval_VitaminC_euclidean_f1_threshold": 24.09148406982422, "eval_VitaminC_euclidean_precision": 0.4990059642147117, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.552734375, "eval_VitaminC_manhattan_accuracy_threshold": 227.16958618164062, "eval_VitaminC_manhattan_ap": 0.5581138503752323, "eval_VitaminC_manhattan_f1": 0.6657824933687002, "eval_VitaminC_manhattan_f1_threshold": 510.7401123046875, "eval_VitaminC_manhattan_precision": 0.4990059642147117, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.556640625, "eval_VitaminC_max_accuracy_threshold": 316.8749694824219, "eval_VitaminC_max_ap": 0.5581138503752323, "eval_VitaminC_max_f1": 0.6693657219973009, "eval_VitaminC_max_f1_threshold": 510.7401123046875, "eval_VitaminC_max_precision": 0.5083682008368201, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5581138503752323, "eval_sts-test_pearson_cosine": 0.8872001916630274, "eval_sts-test_pearson_dot": 0.8772620948169378, "eval_sts-test_pearson_euclidean": 0.9089212280249863, "eval_sts-test_pearson_manhattan": 0.9095742817856052, "eval_sts-test_pearson_max": 0.9095742817856052, "eval_sts-test_spearman_cosine": 0.9093684546165476, "eval_sts-test_spearman_dot": 0.8787381852496359, "eval_sts-test_spearman_euclidean": 0.9049343206328422, "eval_sts-test_spearman_manhattan": 0.9058068359761904, "eval_sts-test_spearman_max": 0.9093684546165476, "eval_vitaminc-pairs_loss": 1.400781512260437, "eval_vitaminc-pairs_runtime": 1.884, "eval_vitaminc-pairs_samples_per_second": 57.326, "eval_vitaminc-pairs_steps_per_second": 1.062, "step": 765 }, { "epoch": 1.7191011235955056, "eval_negation-triplets_loss": 0.9053038954734802, "eval_negation-triplets_runtime": 0.3005, "eval_negation-triplets_samples_per_second": 212.958, "eval_negation-triplets_steps_per_second": 3.327, "step": 765 }, { "epoch": 1.7191011235955056, "eval_scitail-pairs-pos_loss": 0.1003662496805191, "eval_scitail-pairs-pos_runtime": 0.3961, "eval_scitail-pairs-pos_samples_per_second": 136.314, "eval_scitail-pairs-pos_steps_per_second": 2.524, "step": 765 }, { "epoch": 1.7191011235955056, "eval_scitail-pairs-qa_loss": 0.0006318774539977312, "eval_scitail-pairs-qa_runtime": 0.5296, "eval_scitail-pairs-qa_samples_per_second": 241.684, "eval_scitail-pairs-qa_steps_per_second": 3.776, "step": 765 }, { "epoch": 1.7191011235955056, "eval_xsum-pairs_loss": 0.021298767998814583, "eval_xsum-pairs_runtime": 2.7366, "eval_xsum-pairs_samples_per_second": 46.773, "eval_xsum-pairs_steps_per_second": 0.731, "step": 765 }, { "epoch": 1.7191011235955056, "eval_sciq_pairs_loss": 0.015965810045599937, "eval_sciq_pairs_runtime": 2.8798, "eval_sciq_pairs_samples_per_second": 44.448, "eval_sciq_pairs_steps_per_second": 0.695, "step": 765 }, { "epoch": 1.7191011235955056, "eval_qasc_pairs_loss": 0.09514283388853073, "eval_qasc_pairs_runtime": 0.659, "eval_qasc_pairs_samples_per_second": 194.226, "eval_qasc_pairs_steps_per_second": 3.035, "step": 765 }, { "epoch": 1.7191011235955056, "eval_openbookqa_pairs_loss": 0.7150779366493225, "eval_openbookqa_pairs_runtime": 0.5886, "eval_openbookqa_pairs_samples_per_second": 217.448, "eval_openbookqa_pairs_steps_per_second": 3.398, "step": 765 }, { "epoch": 1.7191011235955056, "eval_msmarco_pairs_loss": 0.13628047704696655, "eval_msmarco_pairs_runtime": 1.4926, "eval_msmarco_pairs_samples_per_second": 85.759, "eval_msmarco_pairs_steps_per_second": 1.34, "step": 765 }, { "epoch": 1.7191011235955056, "eval_nq_pairs_loss": 0.09483325481414795, "eval_nq_pairs_runtime": 2.3544, "eval_nq_pairs_samples_per_second": 54.365, "eval_nq_pairs_steps_per_second": 0.849, "step": 765 }, { "epoch": 1.7191011235955056, "eval_trivia_pairs_loss": 0.5036953091621399, "eval_trivia_pairs_runtime": 3.5852, "eval_trivia_pairs_samples_per_second": 35.702, "eval_trivia_pairs_steps_per_second": 0.558, "step": 765 }, { "epoch": 1.7191011235955056, "eval_gooaq_pairs_loss": 0.25342443585395813, "eval_gooaq_pairs_runtime": 0.9151, "eval_gooaq_pairs_samples_per_second": 139.873, "eval_gooaq_pairs_steps_per_second": 2.186, "step": 765 }, { "epoch": 1.7191011235955056, "eval_paws-pos_loss": 0.02396133542060852, "eval_paws-pos_runtime": 0.6961, "eval_paws-pos_samples_per_second": 183.884, "eval_paws-pos_steps_per_second": 2.873, "step": 765 }, { "epoch": 1.7213483146067414, "grad_norm": 1.7996478080749512, "learning_rate": 9.04371380581023e-06, "loss": 0.1824, "step": 766 }, { "epoch": 1.7235955056179775, "grad_norm": 2.8171517848968506, "learning_rate": 9.011693603479218e-06, "loss": 0.6398, "step": 767 }, { "epoch": 1.7258426966292135, "grad_norm": 1.8335927724838257, "learning_rate": 8.979906799017817e-06, "loss": 0.1518, "step": 768 }, { "epoch": 1.7280898876404494, "grad_norm": 3.8396594524383545, "learning_rate": 8.948354011276773e-06, "loss": 0.7804, "step": 769 }, { "epoch": 1.7303370786516854, "grad_norm": 2.0686426162719727, "learning_rate": 8.91703585455082e-06, "loss": 0.2294, "step": 770 }, { "epoch": 1.7325842696629215, "grad_norm": 3.792858123779297, "learning_rate": 8.885952938566709e-06, "loss": 0.719, "step": 771 }, { "epoch": 1.7348314606741573, "grad_norm": 3.530104398727417, "learning_rate": 8.855105868471325e-06, "loss": 0.61, "step": 772 }, { "epoch": 1.737078651685393, "grad_norm": 3.1915283203125, "learning_rate": 8.82449524481993e-06, "loss": 0.5865, "step": 773 }, { "epoch": 1.7393258426966294, "grad_norm": 2.984779119491577, "learning_rate": 8.794121663564459e-06, "loss": 0.4411, "step": 774 }, { "epoch": 1.7415730337078652, "grad_norm": 3.3878366947174072, "learning_rate": 8.763985716041908e-06, "loss": 0.6174, "step": 775 }, { "epoch": 1.743820224719101, "grad_norm": 0.42201510071754456, "learning_rate": 8.734087988962838e-06, "loss": 0.0526, "step": 776 }, { "epoch": 1.746067415730337, "grad_norm": 3.0642645359039307, "learning_rate": 8.70442906439994e-06, "loss": 0.5093, "step": 777 }, { "epoch": 1.7483146067415731, "grad_norm": 3.7137510776519775, "learning_rate": 8.675009519776724e-06, "loss": 0.6742, "step": 778 }, { "epoch": 1.750561797752809, "grad_norm": 0.45391860604286194, "learning_rate": 8.645829927856232e-06, "loss": 0.0293, "step": 779 }, { "epoch": 1.7528089887640448, "grad_norm": 1.823586344718933, "learning_rate": 8.616890856729943e-06, "loss": 0.1776, "step": 780 }, { "epoch": 1.755056179775281, "grad_norm": 2.9259328842163086, "learning_rate": 8.588192869806671e-06, "loss": 0.6964, "step": 781 }, { "epoch": 1.7573033707865169, "grad_norm": 2.0311052799224854, "learning_rate": 8.559736525801625e-06, "loss": 0.2044, "step": 782 }, { "epoch": 1.7595505617977527, "grad_norm": 3.3475892543792725, "learning_rate": 8.531522378725498e-06, "loss": 0.5221, "step": 783 }, { "epoch": 1.7617977528089888, "grad_norm": 3.183722496032715, "learning_rate": 8.503550977873718e-06, "loss": 0.579, "step": 784 }, { "epoch": 1.7640449438202248, "grad_norm": 2.976590394973755, "learning_rate": 8.47582286781573e-06, "loss": 0.5887, "step": 785 }, { "epoch": 1.7662921348314606, "grad_norm": 2.8340189456939697, "learning_rate": 8.448338588384402e-06, "loss": 0.4357, "step": 786 }, { "epoch": 1.7685393258426965, "grad_norm": 3.2601733207702637, "learning_rate": 8.42109867466551e-06, "loss": 0.5437, "step": 787 }, { "epoch": 1.7707865168539327, "grad_norm": 0.4331270754337311, "learning_rate": 8.394103656987329e-06, "loss": 0.0326, "step": 788 }, { "epoch": 1.7730337078651686, "grad_norm": 3.940329074859619, "learning_rate": 8.367354060910303e-06, "loss": 0.7279, "step": 789 }, { "epoch": 1.7752808988764044, "grad_norm": 2.0091605186462402, "learning_rate": 8.340850407216812e-06, "loss": 0.2255, "step": 790 }, { "epoch": 1.7775280898876404, "grad_norm": 3.209312915802002, "learning_rate": 8.314593211901029e-06, "loss": 0.5386, "step": 791 }, { "epoch": 1.7797752808988765, "grad_norm": 0.31416022777557373, "learning_rate": 8.288582986158893e-06, "loss": 0.0218, "step": 792 }, { "epoch": 1.7820224719101123, "grad_norm": 0.2632823586463928, "learning_rate": 8.26282023637813e-06, "loss": 0.0174, "step": 793 }, { "epoch": 1.7842696629213484, "grad_norm": 3.311159610748291, "learning_rate": 8.237305464128416e-06, "loss": 0.542, "step": 794 }, { "epoch": 1.7865168539325844, "grad_norm": 3.435530662536621, "learning_rate": 8.212039166151593e-06, "loss": 0.511, "step": 795 }, { "epoch": 1.7887640449438202, "grad_norm": 0.29949724674224854, "learning_rate": 8.187021834352023e-06, "loss": 0.0345, "step": 796 }, { "epoch": 1.791011235955056, "grad_norm": 2.876715660095215, "learning_rate": 8.162253955786986e-06, "loss": 0.6513, "step": 797 }, { "epoch": 1.7932584269662921, "grad_norm": 0.1600445955991745, "learning_rate": 8.137736012657215e-06, "loss": 0.0069, "step": 798 }, { "epoch": 1.7955056179775282, "grad_norm": 0.4229116141796112, "learning_rate": 8.11346848229749e-06, "loss": 0.0467, "step": 799 }, { "epoch": 1.797752808988764, "grad_norm": 2.8151891231536865, "learning_rate": 8.089451837167374e-06, "loss": 0.6994, "step": 800 }, { "epoch": 1.8, "grad_norm": 2.6854257583618164, "learning_rate": 8.065686544841985e-06, "loss": 0.6583, "step": 801 }, { "epoch": 1.802247191011236, "grad_norm": 0.18526576459407806, "learning_rate": 8.042173068002905e-06, "loss": 0.0059, "step": 802 }, { "epoch": 1.804494382022472, "grad_norm": 2.002289056777954, "learning_rate": 8.018911864429175e-06, "loss": 0.1896, "step": 803 }, { "epoch": 1.8067415730337077, "grad_norm": 6.600093364715576, "learning_rate": 7.995903386988378e-06, "loss": 2.2539, "step": 804 }, { "epoch": 1.8089887640449438, "grad_norm": 1.968558669090271, "learning_rate": 7.97314808362783e-06, "loss": 0.1933, "step": 805 }, { "epoch": 1.8112359550561798, "grad_norm": 3.4320321083068848, "learning_rate": 7.950646397365845e-06, "loss": 0.5681, "step": 806 }, { "epoch": 1.8134831460674157, "grad_norm": 1.6512911319732666, "learning_rate": 7.928398766283123e-06, "loss": 0.1692, "step": 807 }, { "epoch": 1.8157303370786517, "grad_norm": 3.648367166519165, "learning_rate": 7.90640562351421e-06, "loss": 0.6595, "step": 808 }, { "epoch": 1.8179775280898878, "grad_norm": 1.6848154067993164, "learning_rate": 7.884667397239081e-06, "loss": 0.1603, "step": 809 }, { "epoch": 1.8202247191011236, "grad_norm": 2.91007661819458, "learning_rate": 7.863184510674787e-06, "loss": 0.6671, "step": 810 }, { "epoch": 1.8202247191011236, "eval_VitaminC_cosine_accuracy": 0.556640625, "eval_VitaminC_cosine_accuracy_threshold": 0.7359669208526611, "eval_VitaminC_cosine_ap": 0.5545359684929722, "eval_VitaminC_cosine_f1": 0.6684782608695652, "eval_VitaminC_cosine_f1_threshold": 0.3982100784778595, "eval_VitaminC_cosine_precision": 0.5072164948453608, "eval_VitaminC_cosine_recall": 0.9800796812749004, "eval_VitaminC_dot_accuracy": 0.55078125, "eval_VitaminC_dot_accuracy_threshold": 320.06512451171875, "eval_VitaminC_dot_ap": 0.5338853081563194, "eval_VitaminC_dot_f1": 0.6711772665764546, "eval_VitaminC_dot_f1_threshold": 138.87033081054688, "eval_VitaminC_dot_precision": 0.5081967213114754, "eval_VitaminC_dot_recall": 0.9880478087649402, "eval_VitaminC_euclidean_accuracy": 0.55859375, "eval_VitaminC_euclidean_accuracy_threshold": 15.56546401977539, "eval_VitaminC_euclidean_ap": 0.5573421124721921, "eval_VitaminC_euclidean_f1": 0.6657789613848203, "eval_VitaminC_euclidean_f1_threshold": 23.108264923095703, "eval_VitaminC_euclidean_precision": 0.5, "eval_VitaminC_euclidean_recall": 0.9960159362549801, "eval_VitaminC_manhattan_accuracy": 0.55859375, "eval_VitaminC_manhattan_accuracy_threshold": 242.0438232421875, "eval_VitaminC_manhattan_ap": 0.5585886377626182, "eval_VitaminC_manhattan_f1": 0.6649006622516557, "eval_VitaminC_manhattan_f1_threshold": 522.1114501953125, "eval_VitaminC_manhattan_precision": 0.498015873015873, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.55859375, "eval_VitaminC_max_accuracy_threshold": 320.06512451171875, "eval_VitaminC_max_ap": 0.5585886377626182, "eval_VitaminC_max_f1": 0.6711772665764546, "eval_VitaminC_max_f1_threshold": 522.1114501953125, "eval_VitaminC_max_precision": 0.5081967213114754, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5585886377626182, "eval_sts-test_pearson_cosine": 0.8862376792316814, "eval_sts-test_pearson_dot": 0.8772674400865361, "eval_sts-test_pearson_euclidean": 0.9079859903622429, "eval_sts-test_pearson_manhattan": 0.9085935531378193, "eval_sts-test_pearson_max": 0.9085935531378193, "eval_sts-test_spearman_cosine": 0.908348260110608, "eval_sts-test_spearman_dot": 0.8802999595209344, "eval_sts-test_spearman_euclidean": 0.9040154429826927, "eval_sts-test_spearman_manhattan": 0.9042455092175059, "eval_sts-test_spearman_max": 0.908348260110608, "eval_vitaminc-pairs_loss": 1.3900337219238281, "eval_vitaminc-pairs_runtime": 1.8861, "eval_vitaminc-pairs_samples_per_second": 57.262, "eval_vitaminc-pairs_steps_per_second": 1.06, "step": 810 }, { "epoch": 1.8202247191011236, "eval_negation-triplets_loss": 0.8989883065223694, "eval_negation-triplets_runtime": 0.2978, "eval_negation-triplets_samples_per_second": 214.878, "eval_negation-triplets_steps_per_second": 3.357, "step": 810 }, { "epoch": 1.8202247191011236, "eval_scitail-pairs-pos_loss": 0.10765840113162994, "eval_scitail-pairs-pos_runtime": 0.3829, "eval_scitail-pairs-pos_samples_per_second": 141.019, "eval_scitail-pairs-pos_steps_per_second": 2.611, "step": 810 }, { "epoch": 1.8202247191011236, "eval_scitail-pairs-qa_loss": 0.0005394439795054495, "eval_scitail-pairs-qa_runtime": 0.5255, "eval_scitail-pairs-qa_samples_per_second": 243.591, "eval_scitail-pairs-qa_steps_per_second": 3.806, "step": 810 }, { "epoch": 1.8202247191011236, "eval_xsum-pairs_loss": 0.020049653947353363, "eval_xsum-pairs_runtime": 2.7348, "eval_xsum-pairs_samples_per_second": 46.805, "eval_xsum-pairs_steps_per_second": 0.731, "step": 810 }, { "epoch": 1.8202247191011236, "eval_sciq_pairs_loss": 0.01575099490582943, "eval_sciq_pairs_runtime": 2.8287, "eval_sciq_pairs_samples_per_second": 45.251, "eval_sciq_pairs_steps_per_second": 0.707, "step": 810 }, { "epoch": 1.8202247191011236, "eval_qasc_pairs_loss": 0.09813623130321503, "eval_qasc_pairs_runtime": 0.6548, "eval_qasc_pairs_samples_per_second": 195.48, "eval_qasc_pairs_steps_per_second": 3.054, "step": 810 }, { "epoch": 1.8202247191011236, "eval_openbookqa_pairs_loss": 0.6886358857154846, "eval_openbookqa_pairs_runtime": 0.5819, "eval_openbookqa_pairs_samples_per_second": 219.955, "eval_openbookqa_pairs_steps_per_second": 3.437, "step": 810 }, { "epoch": 1.8202247191011236, "eval_msmarco_pairs_loss": 0.12771743535995483, "eval_msmarco_pairs_runtime": 1.4904, "eval_msmarco_pairs_samples_per_second": 85.884, "eval_msmarco_pairs_steps_per_second": 1.342, "step": 810 }, { "epoch": 1.8202247191011236, "eval_nq_pairs_loss": 0.08647548407316208, "eval_nq_pairs_runtime": 2.3498, "eval_nq_pairs_samples_per_second": 54.472, "eval_nq_pairs_steps_per_second": 0.851, "step": 810 }, { "epoch": 1.8202247191011236, "eval_trivia_pairs_loss": 0.49437975883483887, "eval_trivia_pairs_runtime": 3.595, "eval_trivia_pairs_samples_per_second": 35.605, "eval_trivia_pairs_steps_per_second": 0.556, "step": 810 }, { "epoch": 1.8202247191011236, "eval_gooaq_pairs_loss": 0.2529779374599457, "eval_gooaq_pairs_runtime": 0.9117, "eval_gooaq_pairs_samples_per_second": 140.4, "eval_gooaq_pairs_steps_per_second": 2.194, "step": 810 }, { "epoch": 1.8202247191011236, "eval_paws-pos_loss": 0.024454889819025993, "eval_paws-pos_runtime": 0.6908, "eval_paws-pos_samples_per_second": 185.289, "eval_paws-pos_steps_per_second": 2.895, "step": 810 }, { "epoch": 1.8224719101123594, "grad_norm": 1.7971607446670532, "learning_rate": 7.841957382067224e-06, "loss": 0.1995, "step": 811 }, { "epoch": 1.8247191011235955, "grad_norm": 3.942342519760132, "learning_rate": 7.820986424682986e-06, "loss": 0.5579, "step": 812 }, { "epoch": 1.8269662921348315, "grad_norm": 2.735011577606201, "learning_rate": 7.800272046801332e-06, "loss": 0.3833, "step": 813 }, { "epoch": 1.8292134831460674, "grad_norm": 3.4616143703460693, "learning_rate": 7.779814651706219e-06, "loss": 0.6411, "step": 814 }, { "epoch": 1.8314606741573034, "grad_norm": 2.609600782394409, "learning_rate": 7.75961463767846e-06, "loss": 0.6034, "step": 815 }, { "epoch": 1.8337078651685395, "grad_norm": 3.4293768405914307, "learning_rate": 7.73967239798797e-06, "loss": 0.5206, "step": 816 }, { "epoch": 1.8359550561797753, "grad_norm": 2.578787088394165, "learning_rate": 7.719988320886112e-06, "loss": 0.5941, "step": 817 }, { "epoch": 1.838202247191011, "grad_norm": 2.0583581924438477, "learning_rate": 7.700562789598128e-06, "loss": 0.2062, "step": 818 }, { "epoch": 1.8404494382022472, "grad_norm": 3.2634506225585938, "learning_rate": 7.68139618231569e-06, "loss": 0.6086, "step": 819 }, { "epoch": 1.8426966292134832, "grad_norm": 0.3572952151298523, "learning_rate": 7.662488872189526e-06, "loss": 0.037, "step": 820 }, { "epoch": 1.844943820224719, "grad_norm": 2.8284902572631836, "learning_rate": 7.643841227322173e-06, "loss": 0.6257, "step": 821 }, { "epoch": 1.847191011235955, "grad_norm": 2.7638444900512695, "learning_rate": 7.625453610760782e-06, "loss": 0.7064, "step": 822 }, { "epoch": 1.8494382022471911, "grad_norm": 3.253589391708374, "learning_rate": 7.60732638049008e-06, "loss": 0.563, "step": 823 }, { "epoch": 1.851685393258427, "grad_norm": 2.748392343521118, "learning_rate": 7.5894598894253795e-06, "loss": 0.4359, "step": 824 }, { "epoch": 1.8539325842696628, "grad_norm": 0.0, "learning_rate": 7.571854485405722e-06, "loss": 0.0, "step": 825 }, { "epoch": 1.8561797752808988, "grad_norm": 2.3260743618011475, "learning_rate": 7.554510511187089e-06, "loss": 0.233, "step": 826 }, { "epoch": 1.8584269662921349, "grad_norm": 0.35933050513267517, "learning_rate": 7.537428304435747e-06, "loss": 0.0335, "step": 827 }, { "epoch": 1.8606741573033707, "grad_norm": 2.8572092056274414, "learning_rate": 7.520608197721665e-06, "loss": 0.6077, "step": 828 }, { "epoch": 1.8629213483146068, "grad_norm": 2.002993583679199, "learning_rate": 7.504050518512034e-06, "loss": 0.1707, "step": 829 }, { "epoch": 1.8651685393258428, "grad_norm": 3.3585002422332764, "learning_rate": 7.487755589164904e-06, "loss": 0.5807, "step": 830 }, { "epoch": 1.8674157303370786, "grad_norm": 3.4473483562469482, "learning_rate": 7.471723726922902e-06, "loss": 0.6566, "step": 831 }, { "epoch": 1.8696629213483145, "grad_norm": 2.6746721267700195, "learning_rate": 7.455955243907055e-06, "loss": 0.663, "step": 832 }, { "epoch": 1.8719101123595505, "grad_norm": 3.2465124130249023, "learning_rate": 7.44045044711071e-06, "loss": 0.5896, "step": 833 }, { "epoch": 1.8741573033707866, "grad_norm": 3.425652027130127, "learning_rate": 7.425209638393565e-06, "loss": 0.5418, "step": 834 }, { "epoch": 1.8764044943820224, "grad_norm": 3.13114595413208, "learning_rate": 7.410233114475789e-06, "loss": 0.5735, "step": 835 }, { "epoch": 1.8786516853932584, "grad_norm": 2.1753461360931396, "learning_rate": 7.395521166932242e-06, "loss": 0.2062, "step": 836 }, { "epoch": 1.8808988764044945, "grad_norm": 3.1065316200256348, "learning_rate": 7.381074082186805e-06, "loss": 0.4343, "step": 837 }, { "epoch": 1.8831460674157303, "grad_norm": 0.4167785942554474, "learning_rate": 7.366892141506793e-06, "loss": 0.0614, "step": 838 }, { "epoch": 1.8853932584269661, "grad_norm": 4.009187698364258, "learning_rate": 7.352975620997496e-06, "loss": 0.6301, "step": 839 }, { "epoch": 1.8876404494382022, "grad_norm": 2.7548623085021973, "learning_rate": 7.339324791596779e-06, "loss": 0.3956, "step": 840 }, { "epoch": 1.8898876404494382, "grad_norm": 0.39213764667510986, "learning_rate": 7.325939919069839e-06, "loss": 0.0479, "step": 841 }, { "epoch": 1.892134831460674, "grad_norm": 1.7549433708190918, "learning_rate": 7.312821264003997e-06, "loss": 0.1819, "step": 842 }, { "epoch": 1.8943820224719101, "grad_norm": 3.251699209213257, "learning_rate": 7.299969081803653e-06, "loss": 0.6005, "step": 843 }, { "epoch": 1.8966292134831462, "grad_norm": 3.131671905517578, "learning_rate": 7.287383622685292e-06, "loss": 0.452, "step": 844 }, { "epoch": 1.898876404494382, "grad_norm": 2.7567901611328125, "learning_rate": 7.275065131672632e-06, "loss": 0.4083, "step": 845 }, { "epoch": 1.9011235955056178, "grad_norm": 1.7303539514541626, "learning_rate": 7.263013848591836e-06, "loss": 0.1702, "step": 846 }, { "epoch": 1.903370786516854, "grad_norm": 8.231098175048828, "learning_rate": 7.251230008066854e-06, "loss": 0.9503, "step": 847 }, { "epoch": 1.90561797752809, "grad_norm": 2.864522933959961, "learning_rate": 7.239713839514851e-06, "loss": 0.6427, "step": 848 }, { "epoch": 1.9078651685393258, "grad_norm": 0.1522376835346222, "learning_rate": 7.228465567141745e-06, "loss": 0.0048, "step": 849 }, { "epoch": 1.9101123595505618, "grad_norm": 3.1846060752868652, "learning_rate": 7.217485409937831e-06, "loss": 0.4609, "step": 850 }, { "epoch": 1.9123595505617978, "grad_norm": 2.612794876098633, "learning_rate": 7.206773581673535e-06, "loss": 0.3854, "step": 851 }, { "epoch": 1.9146067415730337, "grad_norm": 3.0257163047790527, "learning_rate": 7.196330290895232e-06, "loss": 0.4411, "step": 852 }, { "epoch": 1.9168539325842695, "grad_norm": 1.7561123371124268, "learning_rate": 7.186155740921204e-06, "loss": 0.181, "step": 853 }, { "epoch": 1.9191011235955058, "grad_norm": 3.124577522277832, "learning_rate": 7.176250129837667e-06, "loss": 0.5846, "step": 854 }, { "epoch": 1.9213483146067416, "grad_norm": 2.7159767150878906, "learning_rate": 7.166613650494926e-06, "loss": 0.3585, "step": 855 }, { "epoch": 1.9213483146067416, "eval_VitaminC_cosine_accuracy": 0.556640625, "eval_VitaminC_cosine_accuracy_threshold": 0.737371027469635, "eval_VitaminC_cosine_ap": 0.5540762464644882, "eval_VitaminC_cosine_f1": 0.670299727520436, "eval_VitaminC_cosine_f1_threshold": 0.4220387935638428, "eval_VitaminC_cosine_precision": 0.5093167701863354, "eval_VitaminC_cosine_recall": 0.9800796812749004, "eval_VitaminC_dot_accuracy": 0.55078125, "eval_VitaminC_dot_accuracy_threshold": 319.16412353515625, "eval_VitaminC_dot_ap": 0.5341689006460674, "eval_VitaminC_dot_f1": 0.6720867208672087, "eval_VitaminC_dot_f1_threshold": 144.9567413330078, "eval_VitaminC_dot_precision": 0.5092402464065708, "eval_VitaminC_dot_recall": 0.9880478087649402, "eval_VitaminC_euclidean_accuracy": 0.5546875, "eval_VitaminC_euclidean_accuracy_threshold": 11.91163444519043, "eval_VitaminC_euclidean_ap": 0.5565214436993116, "eval_VitaminC_euclidean_f1": 0.6666666666666667, "eval_VitaminC_euclidean_f1_threshold": 20.597320556640625, "eval_VitaminC_euclidean_precision": 0.5083682008368201, "eval_VitaminC_euclidean_recall": 0.9681274900398407, "eval_VitaminC_manhattan_accuracy": 0.552734375, "eval_VitaminC_manhattan_accuracy_threshold": 239.43701171875, "eval_VitaminC_manhattan_ap": 0.555925496941737, "eval_VitaminC_manhattan_f1": 0.6649006622516557, "eval_VitaminC_manhattan_f1_threshold": 521.5595703125, "eval_VitaminC_manhattan_precision": 0.498015873015873, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.556640625, "eval_VitaminC_max_accuracy_threshold": 319.16412353515625, "eval_VitaminC_max_ap": 0.5565214436993116, "eval_VitaminC_max_f1": 0.6720867208672087, "eval_VitaminC_max_f1_threshold": 521.5595703125, "eval_VitaminC_max_precision": 0.5093167701863354, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5565214436993116, "eval_sts-test_pearson_cosine": 0.8878150413343597, "eval_sts-test_pearson_dot": 0.8786014862759446, "eval_sts-test_pearson_euclidean": 0.9089800587896777, "eval_sts-test_pearson_manhattan": 0.9094011707715276, "eval_sts-test_pearson_max": 0.9094011707715276, "eval_sts-test_spearman_cosine": 0.9084799183216214, "eval_sts-test_spearman_dot": 0.8800682374894496, "eval_sts-test_spearman_euclidean": 0.9044720848539306, "eval_sts-test_spearman_manhattan": 0.905363261473279, "eval_sts-test_spearman_max": 0.9084799183216214, "eval_vitaminc-pairs_loss": 1.3997077941894531, "eval_vitaminc-pairs_runtime": 1.8754, "eval_vitaminc-pairs_samples_per_second": 57.587, "eval_vitaminc-pairs_steps_per_second": 1.066, "step": 855 }, { "epoch": 1.9213483146067416, "eval_negation-triplets_loss": 0.8800344467163086, "eval_negation-triplets_runtime": 0.2974, "eval_negation-triplets_samples_per_second": 215.197, "eval_negation-triplets_steps_per_second": 3.362, "step": 855 }, { "epoch": 1.9213483146067416, "eval_scitail-pairs-pos_loss": 0.10502482950687408, "eval_scitail-pairs-pos_runtime": 0.3842, "eval_scitail-pairs-pos_samples_per_second": 140.569, "eval_scitail-pairs-pos_steps_per_second": 2.603, "step": 855 }, { "epoch": 1.9213483146067416, "eval_scitail-pairs-qa_loss": 0.000583611719775945, "eval_scitail-pairs-qa_runtime": 0.524, "eval_scitail-pairs-qa_samples_per_second": 244.278, "eval_scitail-pairs-qa_steps_per_second": 3.817, "step": 855 }, { "epoch": 1.9213483146067416, "eval_xsum-pairs_loss": 0.019778922200202942, "eval_xsum-pairs_runtime": 2.7334, "eval_xsum-pairs_samples_per_second": 46.829, "eval_xsum-pairs_steps_per_second": 0.732, "step": 855 }, { "epoch": 1.9213483146067416, "eval_sciq_pairs_loss": 0.016238627955317497, "eval_sciq_pairs_runtime": 2.8338, "eval_sciq_pairs_samples_per_second": 45.169, "eval_sciq_pairs_steps_per_second": 0.706, "step": 855 }, { "epoch": 1.9213483146067416, "eval_qasc_pairs_loss": 0.0897185355424881, "eval_qasc_pairs_runtime": 0.6521, "eval_qasc_pairs_samples_per_second": 196.295, "eval_qasc_pairs_steps_per_second": 3.067, "step": 855 }, { "epoch": 1.9213483146067416, "eval_openbookqa_pairs_loss": 0.6814875602722168, "eval_openbookqa_pairs_runtime": 0.5819, "eval_openbookqa_pairs_samples_per_second": 219.951, "eval_openbookqa_pairs_steps_per_second": 3.437, "step": 855 }, { "epoch": 1.9213483146067416, "eval_msmarco_pairs_loss": 0.1330471634864807, "eval_msmarco_pairs_runtime": 1.487, "eval_msmarco_pairs_samples_per_second": 86.082, "eval_msmarco_pairs_steps_per_second": 1.345, "step": 855 }, { "epoch": 1.9213483146067416, "eval_nq_pairs_loss": 0.09231739491224289, "eval_nq_pairs_runtime": 2.3518, "eval_nq_pairs_samples_per_second": 54.427, "eval_nq_pairs_steps_per_second": 0.85, "step": 855 }, { "epoch": 1.9213483146067416, "eval_trivia_pairs_loss": 0.49163100123405457, "eval_trivia_pairs_runtime": 3.5773, "eval_trivia_pairs_samples_per_second": 35.781, "eval_trivia_pairs_steps_per_second": 0.559, "step": 855 }, { "epoch": 1.9213483146067416, "eval_gooaq_pairs_loss": 0.2540152370929718, "eval_gooaq_pairs_runtime": 0.9247, "eval_gooaq_pairs_samples_per_second": 138.418, "eval_gooaq_pairs_steps_per_second": 2.163, "step": 855 }, { "epoch": 1.9213483146067416, "eval_paws-pos_loss": 0.024291109293699265, "eval_paws-pos_runtime": 0.6907, "eval_paws-pos_samples_per_second": 185.32, "eval_paws-pos_steps_per_second": 2.896, "step": 855 }, { "epoch": 1.9235955056179774, "grad_norm": 2.8243184089660645, "learning_rate": 7.157246490503611e-06, "loss": 0.4303, "step": 856 }, { "epoch": 1.9258426966292135, "grad_norm": 3.640563726425171, "learning_rate": 7.148148832231039e-06, "loss": 0.5627, "step": 857 }, { "epoch": 1.9280898876404495, "grad_norm": 2.036550521850586, "learning_rate": 7.1393208527976385e-06, "loss": 0.1687, "step": 858 }, { "epoch": 1.9303370786516854, "grad_norm": 3.249506950378418, "learning_rate": 7.130762724073527e-06, "loss": 0.5509, "step": 859 }, { "epoch": 1.9325842696629212, "grad_norm": 1.7619365453720093, "learning_rate": 7.122474612675153e-06, "loss": 0.175, "step": 860 }, { "epoch": 1.9348314606741575, "grad_norm": 3.532602071762085, "learning_rate": 7.114456679962048e-06, "loss": 0.605, "step": 861 }, { "epoch": 1.9370786516853933, "grad_norm": 3.2661614418029785, "learning_rate": 7.106709082033693e-06, "loss": 0.5085, "step": 862 }, { "epoch": 1.939325842696629, "grad_norm": 3.216998815536499, "learning_rate": 7.09923196972648e-06, "loss": 0.5059, "step": 863 }, { "epoch": 1.9415730337078652, "grad_norm": 3.1379799842834473, "learning_rate": 7.092025488610771e-06, "loss": 0.6114, "step": 864 }, { "epoch": 1.9438202247191012, "grad_norm": 3.0328826904296875, "learning_rate": 7.08508977898806e-06, "loss": 0.5132, "step": 865 }, { "epoch": 1.946067415730337, "grad_norm": 2.766942262649536, "learning_rate": 7.078424975888252e-06, "loss": 0.4178, "step": 866 }, { "epoch": 1.948314606741573, "grad_norm": 3.3092901706695557, "learning_rate": 7.072031209067025e-06, "loss": 0.6022, "step": 867 }, { "epoch": 1.9505617977528091, "grad_norm": 3.0691306591033936, "learning_rate": 7.06590860300331e-06, "loss": 0.5691, "step": 868 }, { "epoch": 1.952808988764045, "grad_norm": 2.871619462966919, "learning_rate": 7.060057276896864e-06, "loss": 0.7299, "step": 869 }, { "epoch": 1.9550561797752808, "grad_norm": 0.3670404255390167, "learning_rate": 7.054477344665952e-06, "loss": 0.0441, "step": 870 }, { "epoch": 1.9573033707865168, "grad_norm": 3.10213303565979, "learning_rate": 7.049168914945126e-06, "loss": 0.5855, "step": 871 }, { "epoch": 1.9595505617977529, "grad_norm": 0.7625748515129089, "learning_rate": 7.044132091083108e-06, "loss": 0.0151, "step": 872 }, { "epoch": 1.9617977528089887, "grad_norm": 1.9246089458465576, "learning_rate": 7.039366971140788e-06, "loss": 0.184, "step": 873 }, { "epoch": 1.9640449438202248, "grad_norm": 3.3005852699279785, "learning_rate": 7.034873647889306e-06, "loss": 0.6185, "step": 874 }, { "epoch": 1.9662921348314608, "grad_norm": 2.894439935684204, "learning_rate": 7.030652208808245e-06, "loss": 0.6474, "step": 875 }, { "epoch": 1.9685393258426966, "grad_norm": 0.026136571541428566, "learning_rate": 7.026702736083935e-06, "loss": 0.0005, "step": 876 }, { "epoch": 1.9707865168539325, "grad_norm": 3.418996572494507, "learning_rate": 7.023025306607848e-06, "loss": 0.6692, "step": 877 }, { "epoch": 1.9730337078651685, "grad_norm": 3.3654134273529053, "learning_rate": 7.019619991975102e-06, "loss": 0.496, "step": 878 }, { "epoch": 1.9752808988764046, "grad_norm": 3.123142957687378, "learning_rate": 7.016486858483064e-06, "loss": 0.5654, "step": 879 }, { "epoch": 1.9775280898876404, "grad_norm": 3.3420400619506836, "learning_rate": 7.013625967130067e-06, "loss": 0.4925, "step": 880 }, { "epoch": 1.9797752808988764, "grad_norm": 0.0, "learning_rate": 7.011037373614215e-06, "loss": 0.0, "step": 881 }, { "epoch": 1.9820224719101125, "grad_norm": 4.277894496917725, "learning_rate": 7.008721128332304e-06, "loss": 0.2304, "step": 882 }, { "epoch": 1.9842696629213483, "grad_norm": 1.8923455476760864, "learning_rate": 7.006677276378835e-06, "loss": 0.1772, "step": 883 }, { "epoch": 1.9865168539325841, "grad_norm": 1.7901984453201294, "learning_rate": 7.00490585754514e-06, "loss": 0.1804, "step": 884 }, { "epoch": 1.9887640449438202, "grad_norm": 0.90578693151474, "learning_rate": 7.003406906318611e-06, "loss": 0.0198, "step": 885 }, { "epoch": 1.9910112359550562, "grad_norm": 2.6822283267974854, "learning_rate": 7.002180451882019e-06, "loss": 0.6703, "step": 886 }, { "epoch": 1.993258426966292, "grad_norm": 3.0391242504119873, "learning_rate": 7.001226518112952e-06, "loss": 0.1552, "step": 887 }, { "epoch": 1.9955056179775281, "grad_norm": 3.118842363357544, "learning_rate": 7.000545123583352e-06, "loss": 0.4962, "step": 888 }, { "epoch": 1.9977528089887642, "grad_norm": 4.216142177581787, "learning_rate": 7.000136281559146e-06, "loss": 0.2099, "step": 889 }, { "epoch": 2.0, "grad_norm": 2.8663406372070312, "learning_rate": 6.999999999999999e-06, "loss": 0.6554, "step": 890 } ], "logging_steps": 1, "max_steps": 890, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 89, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 640, "trial_name": null, "trial_params": null }