|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.8, |
|
"eval_steps": 32, |
|
"global_step": 576, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003125, |
|
"grad_norm": 3.1757984161376953, |
|
"learning_rate": 3.125e-07, |
|
"loss": 0.7374, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00625, |
|
"grad_norm": 3.137390375137329, |
|
"learning_rate": 6.25e-07, |
|
"loss": 0.5723, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.009375, |
|
"grad_norm": 2.765856981277466, |
|
"learning_rate": 9.375000000000001e-07, |
|
"loss": 0.551, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0125, |
|
"grad_norm": 3.468062162399292, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.7379, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.015625, |
|
"grad_norm": 2.6695668697357178, |
|
"learning_rate": 1.5625e-06, |
|
"loss": 0.5271, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01875, |
|
"grad_norm": 2.7720863819122314, |
|
"learning_rate": 1.8750000000000003e-06, |
|
"loss": 0.5858, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.021875, |
|
"grad_norm": 3.0211267471313477, |
|
"learning_rate": 2.1875000000000002e-06, |
|
"loss": 0.6562, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.025, |
|
"grad_norm": 3.641108989715576, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.8228, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.028125, |
|
"grad_norm": 3.9061200618743896, |
|
"learning_rate": 2.8125e-06, |
|
"loss": 0.9988, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.03125, |
|
"grad_norm": 2.642423391342163, |
|
"learning_rate": 3.125e-06, |
|
"loss": 0.5582, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.034375, |
|
"grad_norm": 3.6546943187713623, |
|
"learning_rate": 3.4375e-06, |
|
"loss": 0.8546, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0375, |
|
"grad_norm": 2.5504300594329834, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.4235, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.040625, |
|
"grad_norm": 2.845123529434204, |
|
"learning_rate": 4.0625000000000005e-06, |
|
"loss": 0.6418, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.04375, |
|
"grad_norm": 2.8562164306640625, |
|
"learning_rate": 4.3750000000000005e-06, |
|
"loss": 0.6577, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.046875, |
|
"grad_norm": 3.4033620357513428, |
|
"learning_rate": 4.6875000000000004e-06, |
|
"loss": 0.8333, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.148242473602295, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4082, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.053125, |
|
"grad_norm": 3.685960292816162, |
|
"learning_rate": 5.3125e-06, |
|
"loss": 0.8101, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05625, |
|
"grad_norm": 2.7071452140808105, |
|
"learning_rate": 5.625e-06, |
|
"loss": 0.5259, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.059375, |
|
"grad_norm": 3.508561611175537, |
|
"learning_rate": 5.9375e-06, |
|
"loss": 0.9015, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0625, |
|
"grad_norm": 4.140976428985596, |
|
"learning_rate": 6.25e-06, |
|
"loss": 1.3915, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.065625, |
|
"grad_norm": 1.5563820600509644, |
|
"learning_rate": 6.5625e-06, |
|
"loss": 0.26, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06875, |
|
"grad_norm": 3.1467344760894775, |
|
"learning_rate": 6.875e-06, |
|
"loss": 0.6885, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.071875, |
|
"grad_norm": 3.539327383041382, |
|
"learning_rate": 7.1875e-06, |
|
"loss": 0.9357, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.075, |
|
"grad_norm": 3.1691510677337646, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.7168, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.078125, |
|
"grad_norm": 3.9020121097564697, |
|
"learning_rate": 7.8125e-06, |
|
"loss": 0.8678, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.08125, |
|
"grad_norm": 2.3635435104370117, |
|
"learning_rate": 8.125000000000001e-06, |
|
"loss": 0.4922, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.084375, |
|
"grad_norm": 2.5170037746429443, |
|
"learning_rate": 8.4375e-06, |
|
"loss": 0.4937, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0875, |
|
"grad_norm": 2.7988407611846924, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 0.5891, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.090625, |
|
"grad_norm": 2.99135160446167, |
|
"learning_rate": 9.0625e-06, |
|
"loss": 0.6921, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.09375, |
|
"grad_norm": 3.098013162612915, |
|
"learning_rate": 9.375000000000001e-06, |
|
"loss": 0.8087, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.096875, |
|
"grad_norm": 3.358091115951538, |
|
"learning_rate": 9.6875e-06, |
|
"loss": 0.805, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.0206046104431152, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6141, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_VitaminC_cosine_accuracy": 0.5546875, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.8487042188644409, |
|
"eval_VitaminC_cosine_ap": 0.5467207830251657, |
|
"eval_VitaminC_cosine_f1": 0.6657824933687002, |
|
"eval_VitaminC_cosine_f1_threshold": 0.2510407269001007, |
|
"eval_VitaminC_cosine_precision": 0.4990059642147117, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.55078125, |
|
"eval_VitaminC_dot_accuracy_threshold": 318.7947082519531, |
|
"eval_VitaminC_dot_ap": 0.5360598625078122, |
|
"eval_VitaminC_dot_f1": 0.6657824933687002, |
|
"eval_VitaminC_dot_f1_threshold": 98.82717895507812, |
|
"eval_VitaminC_dot_precision": 0.4990059642147117, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.552734375, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 15.370981216430664, |
|
"eval_VitaminC_euclidean_ap": 0.54465834495355, |
|
"eval_VitaminC_euclidean_f1": 0.6657824933687002, |
|
"eval_VitaminC_euclidean_f1_threshold": 24.364877700805664, |
|
"eval_VitaminC_euclidean_precision": 0.4990059642147117, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.5546875, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 273.6689758300781, |
|
"eval_VitaminC_manhattan_ap": 0.5450408710915566, |
|
"eval_VitaminC_manhattan_f1": 0.6675531914893617, |
|
"eval_VitaminC_manhattan_f1_threshold": 502.82244873046875, |
|
"eval_VitaminC_manhattan_precision": 0.500998003992016, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.5546875, |
|
"eval_VitaminC_max_accuracy_threshold": 318.7947082519531, |
|
"eval_VitaminC_max_ap": 0.5467207830251657, |
|
"eval_VitaminC_max_f1": 0.6675531914893617, |
|
"eval_VitaminC_max_f1_threshold": 502.82244873046875, |
|
"eval_VitaminC_max_precision": 0.500998003992016, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5467207830251657, |
|
"eval_sts-test_pearson_cosine": 0.8677868917853514, |
|
"eval_sts-test_pearson_dot": 0.8601917125112223, |
|
"eval_sts-test_pearson_euclidean": 0.889472619726378, |
|
"eval_sts-test_pearson_manhattan": 0.890143281884324, |
|
"eval_sts-test_pearson_max": 0.890143281884324, |
|
"eval_sts-test_spearman_cosine": 0.8954519734959775, |
|
"eval_sts-test_spearman_dot": 0.8621348855070287, |
|
"eval_sts-test_spearman_euclidean": 0.8880001748147683, |
|
"eval_sts-test_spearman_manhattan": 0.8870461226731652, |
|
"eval_sts-test_spearman_max": 0.8954519734959775, |
|
"eval_vitaminc-pairs_loss": 2.332582473754883, |
|
"eval_vitaminc-pairs_runtime": 2.2432, |
|
"eval_vitaminc-pairs_samples_per_second": 48.146, |
|
"eval_vitaminc-pairs_steps_per_second": 0.892, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_negation-triplets_loss": 0.8681236505508423, |
|
"eval_negation-triplets_runtime": 0.2927, |
|
"eval_negation-triplets_samples_per_second": 218.641, |
|
"eval_negation-triplets_steps_per_second": 3.416, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_scitail-pairs-pos_loss": 0.07759770005941391, |
|
"eval_scitail-pairs-pos_runtime": 0.3708, |
|
"eval_scitail-pairs-pos_samples_per_second": 145.613, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.697, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_xsum-pairs_loss": 0.09131219983100891, |
|
"eval_xsum-pairs_runtime": 2.8486, |
|
"eval_xsum-pairs_samples_per_second": 44.934, |
|
"eval_xsum-pairs_steps_per_second": 0.702, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_sciq_pairs_loss": 0.01965576782822609, |
|
"eval_sciq_pairs_runtime": 3.6062, |
|
"eval_sciq_pairs_samples_per_second": 35.494, |
|
"eval_sciq_pairs_steps_per_second": 0.555, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_qasc_pairs_loss": 0.10996829718351364, |
|
"eval_qasc_pairs_runtime": 0.5975, |
|
"eval_qasc_pairs_samples_per_second": 214.235, |
|
"eval_qasc_pairs_steps_per_second": 3.347, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_openbookqa_pairs_loss": 0.6932356953620911, |
|
"eval_openbookqa_pairs_runtime": 0.5729, |
|
"eval_openbookqa_pairs_samples_per_second": 223.415, |
|
"eval_openbookqa_pairs_steps_per_second": 3.491, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_msmarco_pairs_loss": 0.32686129212379456, |
|
"eval_msmarco_pairs_runtime": 1.4637, |
|
"eval_msmarco_pairs_samples_per_second": 87.448, |
|
"eval_msmarco_pairs_steps_per_second": 1.366, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_nq_pairs_loss": 0.1978442668914795, |
|
"eval_nq_pairs_runtime": 2.8588, |
|
"eval_nq_pairs_samples_per_second": 44.774, |
|
"eval_nq_pairs_steps_per_second": 0.7, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_trivia_pairs_loss": 0.7432661652565002, |
|
"eval_trivia_pairs_runtime": 4.3895, |
|
"eval_trivia_pairs_samples_per_second": 29.16, |
|
"eval_trivia_pairs_steps_per_second": 0.456, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_gooaq_pairs_loss": 0.3761173486709595, |
|
"eval_gooaq_pairs_runtime": 1.0043, |
|
"eval_gooaq_pairs_samples_per_second": 127.452, |
|
"eval_gooaq_pairs_steps_per_second": 1.991, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_paws-pos_loss": 0.02476382441818714, |
|
"eval_paws-pos_runtime": 0.6858, |
|
"eval_paws-pos_samples_per_second": 186.635, |
|
"eval_paws-pos_steps_per_second": 2.916, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.103125, |
|
"grad_norm": 3.51029109954834, |
|
"learning_rate": 1.0312500000000002e-05, |
|
"loss": 0.7783, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.10625, |
|
"grad_norm": 3.376455783843994, |
|
"learning_rate": 1.0625e-05, |
|
"loss": 0.8746, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.109375, |
|
"grad_norm": 2.7385308742523193, |
|
"learning_rate": 1.0937500000000002e-05, |
|
"loss": 0.5085, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.1125, |
|
"grad_norm": 2.782606840133667, |
|
"learning_rate": 1.125e-05, |
|
"loss": 0.4842, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.115625, |
|
"grad_norm": 3.4377782344818115, |
|
"learning_rate": 1.1562500000000002e-05, |
|
"loss": 0.8097, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.11875, |
|
"grad_norm": 2.6202378273010254, |
|
"learning_rate": 1.1875e-05, |
|
"loss": 0.5325, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.121875, |
|
"grad_norm": 3.0869128704071045, |
|
"learning_rate": 1.2187500000000001e-05, |
|
"loss": 0.7221, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 3.131516456604004, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.708, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.128125, |
|
"grad_norm": 2.0318033695220947, |
|
"learning_rate": 1.2812500000000001e-05, |
|
"loss": 0.2789, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.13125, |
|
"grad_norm": 3.2574217319488525, |
|
"learning_rate": 1.3125e-05, |
|
"loss": 0.7986, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.134375, |
|
"grad_norm": 3.6287729740142822, |
|
"learning_rate": 1.3437500000000001e-05, |
|
"loss": 0.9653, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.1375, |
|
"grad_norm": 3.1281752586364746, |
|
"learning_rate": 1.375e-05, |
|
"loss": 0.7857, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.140625, |
|
"grad_norm": 2.201566219329834, |
|
"learning_rate": 1.4062500000000001e-05, |
|
"loss": 0.2726, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.14375, |
|
"grad_norm": 1.8727688789367676, |
|
"learning_rate": 1.4375e-05, |
|
"loss": 0.2458, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.146875, |
|
"grad_norm": 3.156454086303711, |
|
"learning_rate": 1.4687500000000001e-05, |
|
"loss": 0.6988, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.0224971771240234, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 0.6328, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.153125, |
|
"grad_norm": 3.4717319011688232, |
|
"learning_rate": 1.5312500000000003e-05, |
|
"loss": 0.795, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.15625, |
|
"grad_norm": 2.8961374759674072, |
|
"learning_rate": 1.5625e-05, |
|
"loss": 0.6163, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.159375, |
|
"grad_norm": 3.667778491973877, |
|
"learning_rate": 1.59375e-05, |
|
"loss": 0.8269, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.1625, |
|
"grad_norm": 2.350587844848633, |
|
"learning_rate": 1.6250000000000002e-05, |
|
"loss": 0.52, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.165625, |
|
"grad_norm": 3.312248468399048, |
|
"learning_rate": 1.6562500000000003e-05, |
|
"loss": 0.7523, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.16875, |
|
"grad_norm": 2.8101534843444824, |
|
"learning_rate": 1.6875e-05, |
|
"loss": 0.6979, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.171875, |
|
"grad_norm": 3.144334077835083, |
|
"learning_rate": 1.71875e-05, |
|
"loss": 0.7845, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.175, |
|
"grad_norm": 3.671412229537964, |
|
"learning_rate": 1.7500000000000002e-05, |
|
"loss": 0.9325, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.178125, |
|
"grad_norm": 3.204644203186035, |
|
"learning_rate": 1.7812500000000003e-05, |
|
"loss": 0.8546, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.18125, |
|
"grad_norm": 2.9951093196868896, |
|
"learning_rate": 1.8125e-05, |
|
"loss": 0.6392, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.184375, |
|
"grad_norm": 3.036386013031006, |
|
"learning_rate": 1.84375e-05, |
|
"loss": 0.5827, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.1875, |
|
"grad_norm": 3.0899698734283447, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.5961, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.190625, |
|
"grad_norm": 2.3574728965759277, |
|
"learning_rate": 1.9062500000000003e-05, |
|
"loss": 0.3625, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.19375, |
|
"grad_norm": 2.4232304096221924, |
|
"learning_rate": 1.9375e-05, |
|
"loss": 0.2584, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.196875, |
|
"grad_norm": 1.9016233682632446, |
|
"learning_rate": 1.96875e-05, |
|
"loss": 0.4047, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.193114995956421, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9429, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_VitaminC_cosine_accuracy": 0.560546875, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.8192525506019592, |
|
"eval_VitaminC_cosine_ap": 0.5485465805560719, |
|
"eval_VitaminC_cosine_f1": 0.6675531914893617, |
|
"eval_VitaminC_cosine_f1_threshold": 0.30620089173316956, |
|
"eval_VitaminC_cosine_precision": 0.500998003992016, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.5546875, |
|
"eval_VitaminC_dot_accuracy_threshold": 308.60137939453125, |
|
"eval_VitaminC_dot_ap": 0.5375184580780159, |
|
"eval_VitaminC_dot_f1": 0.6657824933687002, |
|
"eval_VitaminC_dot_f1_threshold": 97.275634765625, |
|
"eval_VitaminC_dot_precision": 0.4990059642147117, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.552734375, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 11.976862907409668, |
|
"eval_VitaminC_euclidean_ap": 0.5494925067012235, |
|
"eval_VitaminC_euclidean_f1": 0.6666666666666666, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.21343994140625, |
|
"eval_VitaminC_euclidean_precision": 0.5, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.552734375, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 313.34185791015625, |
|
"eval_VitaminC_manhattan_ap": 0.5475158315491966, |
|
"eval_VitaminC_manhattan_f1": 0.6666666666666666, |
|
"eval_VitaminC_manhattan_f1_threshold": 495.06231689453125, |
|
"eval_VitaminC_manhattan_precision": 0.5, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.560546875, |
|
"eval_VitaminC_max_accuracy_threshold": 313.34185791015625, |
|
"eval_VitaminC_max_ap": 0.5494925067012235, |
|
"eval_VitaminC_max_f1": 0.6675531914893617, |
|
"eval_VitaminC_max_f1_threshold": 495.06231689453125, |
|
"eval_VitaminC_max_precision": 0.500998003992016, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5494925067012235, |
|
"eval_sts-test_pearson_cosine": 0.8681028367252808, |
|
"eval_sts-test_pearson_dot": 0.8578643818026934, |
|
"eval_sts-test_pearson_euclidean": 0.8913506886125709, |
|
"eval_sts-test_pearson_manhattan": 0.8922209656727235, |
|
"eval_sts-test_pearson_max": 0.8922209656727235, |
|
"eval_sts-test_spearman_cosine": 0.8960442588011338, |
|
"eval_sts-test_spearman_dot": 0.8606696844578128, |
|
"eval_sts-test_spearman_euclidean": 0.8895474944286376, |
|
"eval_sts-test_spearman_manhattan": 0.8895341585527426, |
|
"eval_sts-test_spearman_max": 0.8960442588011338, |
|
"eval_vitaminc-pairs_loss": 2.260099411010742, |
|
"eval_vitaminc-pairs_runtime": 1.8392, |
|
"eval_vitaminc-pairs_samples_per_second": 58.723, |
|
"eval_vitaminc-pairs_steps_per_second": 1.087, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_negation-triplets_loss": 0.836820662021637, |
|
"eval_negation-triplets_runtime": 0.294, |
|
"eval_negation-triplets_samples_per_second": 217.7, |
|
"eval_negation-triplets_steps_per_second": 3.402, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_scitail-pairs-pos_loss": 0.08362159878015518, |
|
"eval_scitail-pairs-pos_runtime": 0.3686, |
|
"eval_scitail-pairs-pos_samples_per_second": 146.509, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.713, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_xsum-pairs_loss": 0.08567425608634949, |
|
"eval_xsum-pairs_runtime": 2.8489, |
|
"eval_xsum-pairs_samples_per_second": 44.93, |
|
"eval_xsum-pairs_steps_per_second": 0.702, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_sciq_pairs_loss": 0.019713517278432846, |
|
"eval_sciq_pairs_runtime": 3.616, |
|
"eval_sciq_pairs_samples_per_second": 35.399, |
|
"eval_sciq_pairs_steps_per_second": 0.553, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_qasc_pairs_loss": 0.11403815448284149, |
|
"eval_qasc_pairs_runtime": 0.6024, |
|
"eval_qasc_pairs_samples_per_second": 212.48, |
|
"eval_qasc_pairs_steps_per_second": 3.32, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_openbookqa_pairs_loss": 0.6793034076690674, |
|
"eval_openbookqa_pairs_runtime": 0.5864, |
|
"eval_openbookqa_pairs_samples_per_second": 218.266, |
|
"eval_openbookqa_pairs_steps_per_second": 3.41, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_msmarco_pairs_loss": 0.34600257873535156, |
|
"eval_msmarco_pairs_runtime": 1.4668, |
|
"eval_msmarco_pairs_samples_per_second": 87.263, |
|
"eval_msmarco_pairs_steps_per_second": 1.363, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_nq_pairs_loss": 0.22141708433628082, |
|
"eval_nq_pairs_runtime": 2.8596, |
|
"eval_nq_pairs_samples_per_second": 44.761, |
|
"eval_nq_pairs_steps_per_second": 0.699, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_trivia_pairs_loss": 0.7303681969642639, |
|
"eval_trivia_pairs_runtime": 4.3864, |
|
"eval_trivia_pairs_samples_per_second": 29.181, |
|
"eval_trivia_pairs_steps_per_second": 0.456, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_gooaq_pairs_loss": 0.38013964891433716, |
|
"eval_gooaq_pairs_runtime": 1.0052, |
|
"eval_gooaq_pairs_samples_per_second": 127.34, |
|
"eval_gooaq_pairs_steps_per_second": 1.99, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_paws-pos_loss": 0.024541139602661133, |
|
"eval_paws-pos_runtime": 0.6851, |
|
"eval_paws-pos_samples_per_second": 186.844, |
|
"eval_paws-pos_steps_per_second": 2.919, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.203125, |
|
"grad_norm": 3.5084540843963623, |
|
"learning_rate": 2.0312500000000002e-05, |
|
"loss": 0.7848, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.20625, |
|
"grad_norm": 3.749316453933716, |
|
"learning_rate": 2.0625000000000003e-05, |
|
"loss": 0.7589, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.209375, |
|
"grad_norm": 3.4131276607513428, |
|
"learning_rate": 2.09375e-05, |
|
"loss": 0.5905, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.2125, |
|
"grad_norm": 2.4543726444244385, |
|
"learning_rate": 2.125e-05, |
|
"loss": 0.4211, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.215625, |
|
"grad_norm": 2.6270904541015625, |
|
"learning_rate": 2.1562500000000002e-05, |
|
"loss": 0.5325, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.21875, |
|
"grad_norm": 2.2518444061279297, |
|
"learning_rate": 2.1875000000000003e-05, |
|
"loss": 0.3541, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.221875, |
|
"grad_norm": 3.88729190826416, |
|
"learning_rate": 2.21875e-05, |
|
"loss": 0.9396, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.225, |
|
"grad_norm": 3.2759203910827637, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.6997, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.228125, |
|
"grad_norm": 3.149787425994873, |
|
"learning_rate": 2.2812500000000002e-05, |
|
"loss": 0.6415, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.23125, |
|
"grad_norm": 4.01395845413208, |
|
"learning_rate": 2.3125000000000003e-05, |
|
"loss": 1.1966, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.234375, |
|
"grad_norm": 3.0432724952697754, |
|
"learning_rate": 2.34375e-05, |
|
"loss": 0.7142, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.2375, |
|
"grad_norm": 2.960078716278076, |
|
"learning_rate": 2.375e-05, |
|
"loss": 0.6048, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.240625, |
|
"grad_norm": 2.414846658706665, |
|
"learning_rate": 2.4062500000000002e-05, |
|
"loss": 0.4639, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.24375, |
|
"grad_norm": 4.241907119750977, |
|
"learning_rate": 2.4375000000000003e-05, |
|
"loss": 0.9391, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.246875, |
|
"grad_norm": 3.350724220275879, |
|
"learning_rate": 2.46875e-05, |
|
"loss": 0.6364, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.519324541091919, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.515, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.253125, |
|
"grad_norm": 3.655949592590332, |
|
"learning_rate": 2.5312500000000002e-05, |
|
"loss": 0.6505, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.25625, |
|
"grad_norm": 3.1521031856536865, |
|
"learning_rate": 2.5625000000000003e-05, |
|
"loss": 0.6149, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.259375, |
|
"grad_norm": 2.637176036834717, |
|
"learning_rate": 2.5937500000000004e-05, |
|
"loss": 0.4471, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.2625, |
|
"grad_norm": 4.223080158233643, |
|
"learning_rate": 2.625e-05, |
|
"loss": 1.4199, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.265625, |
|
"grad_norm": 3.141789436340332, |
|
"learning_rate": 2.6562500000000002e-05, |
|
"loss": 0.8484, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.26875, |
|
"grad_norm": 3.2342255115509033, |
|
"learning_rate": 2.6875000000000003e-05, |
|
"loss": 0.6412, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.271875, |
|
"grad_norm": 3.445375442504883, |
|
"learning_rate": 2.7187500000000004e-05, |
|
"loss": 0.65, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.275, |
|
"grad_norm": 3.395848035812378, |
|
"learning_rate": 2.75e-05, |
|
"loss": 0.7453, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.278125, |
|
"grad_norm": 3.752084493637085, |
|
"learning_rate": 2.7812500000000002e-05, |
|
"loss": 0.9506, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.28125, |
|
"grad_norm": 3.2424893379211426, |
|
"learning_rate": 2.8125000000000003e-05, |
|
"loss": 0.6083, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.284375, |
|
"grad_norm": 2.8851892948150635, |
|
"learning_rate": 2.8437500000000003e-05, |
|
"loss": 0.7102, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.2875, |
|
"grad_norm": 2.385157823562622, |
|
"learning_rate": 2.875e-05, |
|
"loss": 0.4037, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.290625, |
|
"grad_norm": 3.5539441108703613, |
|
"learning_rate": 2.90625e-05, |
|
"loss": 0.769, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.29375, |
|
"grad_norm": 3.686418056488037, |
|
"learning_rate": 2.9375000000000003e-05, |
|
"loss": 0.8765, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.296875, |
|
"grad_norm": 3.9195055961608887, |
|
"learning_rate": 2.9687500000000003e-05, |
|
"loss": 1.2583, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.5373759269714355, |
|
"learning_rate": 3.0000000000000004e-05, |
|
"loss": 0.8885, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_VitaminC_cosine_accuracy": 0.55859375, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.8407348990440369, |
|
"eval_VitaminC_cosine_ap": 0.5524635737287826, |
|
"eval_VitaminC_cosine_f1": 0.6666666666666666, |
|
"eval_VitaminC_cosine_f1_threshold": 0.2901695668697357, |
|
"eval_VitaminC_cosine_precision": 0.5, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.5546875, |
|
"eval_VitaminC_dot_accuracy_threshold": 331.7409973144531, |
|
"eval_VitaminC_dot_ap": 0.5393192469559877, |
|
"eval_VitaminC_dot_f1": 0.6657824933687002, |
|
"eval_VitaminC_dot_f1_threshold": 104.93923950195312, |
|
"eval_VitaminC_dot_precision": 0.4990059642147117, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.5546875, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 10.979323387145996, |
|
"eval_VitaminC_euclidean_ap": 0.5510789245842218, |
|
"eval_VitaminC_euclidean_f1": 0.6666666666666666, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.105466842651367, |
|
"eval_VitaminC_euclidean_precision": 0.5, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.55078125, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 228.8612060546875, |
|
"eval_VitaminC_manhattan_ap": 0.550140326019901, |
|
"eval_VitaminC_manhattan_f1": 0.6666666666666667, |
|
"eval_VitaminC_manhattan_f1_threshold": 479.256103515625, |
|
"eval_VitaminC_manhattan_precision": 0.501002004008016, |
|
"eval_VitaminC_manhattan_recall": 0.9960159362549801, |
|
"eval_VitaminC_max_accuracy": 0.55859375, |
|
"eval_VitaminC_max_accuracy_threshold": 331.7409973144531, |
|
"eval_VitaminC_max_ap": 0.5524635737287826, |
|
"eval_VitaminC_max_f1": 0.6666666666666667, |
|
"eval_VitaminC_max_f1_threshold": 479.256103515625, |
|
"eval_VitaminC_max_precision": 0.501002004008016, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5524635737287826, |
|
"eval_sts-test_pearson_cosine": 0.8707252459918289, |
|
"eval_sts-test_pearson_dot": 0.8616721319399807, |
|
"eval_sts-test_pearson_euclidean": 0.8926205493906139, |
|
"eval_sts-test_pearson_manhattan": 0.8931067612799872, |
|
"eval_sts-test_pearson_max": 0.8931067612799872, |
|
"eval_sts-test_spearman_cosine": 0.8969095691913977, |
|
"eval_sts-test_spearman_dot": 0.8614390033923923, |
|
"eval_sts-test_spearman_euclidean": 0.8906887410966409, |
|
"eval_sts-test_spearman_manhattan": 0.8902939007173846, |
|
"eval_sts-test_spearman_max": 0.8969095691913977, |
|
"eval_vitaminc-pairs_loss": 2.259434938430786, |
|
"eval_vitaminc-pairs_runtime": 1.8587, |
|
"eval_vitaminc-pairs_samples_per_second": 58.104, |
|
"eval_vitaminc-pairs_steps_per_second": 1.076, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_negation-triplets_loss": 0.8346852660179138, |
|
"eval_negation-triplets_runtime": 0.2932, |
|
"eval_negation-triplets_samples_per_second": 218.315, |
|
"eval_negation-triplets_steps_per_second": 3.411, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_scitail-pairs-pos_loss": 0.07568605989217758, |
|
"eval_scitail-pairs-pos_runtime": 0.3763, |
|
"eval_scitail-pairs-pos_samples_per_second": 143.494, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.657, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_xsum-pairs_loss": 0.08208194375038147, |
|
"eval_xsum-pairs_runtime": 2.8486, |
|
"eval_xsum-pairs_samples_per_second": 44.934, |
|
"eval_xsum-pairs_steps_per_second": 0.702, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_sciq_pairs_loss": 0.020024314522743225, |
|
"eval_sciq_pairs_runtime": 3.6173, |
|
"eval_sciq_pairs_samples_per_second": 35.386, |
|
"eval_sciq_pairs_steps_per_second": 0.553, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_qasc_pairs_loss": 0.10592304170131683, |
|
"eval_qasc_pairs_runtime": 0.5997, |
|
"eval_qasc_pairs_samples_per_second": 213.431, |
|
"eval_qasc_pairs_steps_per_second": 3.335, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_openbookqa_pairs_loss": 0.6809090971946716, |
|
"eval_openbookqa_pairs_runtime": 0.5752, |
|
"eval_openbookqa_pairs_samples_per_second": 222.54, |
|
"eval_openbookqa_pairs_steps_per_second": 3.477, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_msmarco_pairs_loss": 0.3400232195854187, |
|
"eval_msmarco_pairs_runtime": 1.4679, |
|
"eval_msmarco_pairs_samples_per_second": 87.202, |
|
"eval_msmarco_pairs_steps_per_second": 1.363, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_nq_pairs_loss": 0.2074178159236908, |
|
"eval_nq_pairs_runtime": 2.8593, |
|
"eval_nq_pairs_samples_per_second": 44.766, |
|
"eval_nq_pairs_steps_per_second": 0.699, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_trivia_pairs_loss": 0.7431399822235107, |
|
"eval_trivia_pairs_runtime": 4.4162, |
|
"eval_trivia_pairs_samples_per_second": 28.984, |
|
"eval_trivia_pairs_steps_per_second": 0.453, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_gooaq_pairs_loss": 0.3708875775337219, |
|
"eval_gooaq_pairs_runtime": 1.0094, |
|
"eval_gooaq_pairs_samples_per_second": 126.81, |
|
"eval_gooaq_pairs_steps_per_second": 1.981, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_paws-pos_loss": 0.024763749912381172, |
|
"eval_paws-pos_runtime": 0.6874, |
|
"eval_paws-pos_samples_per_second": 186.212, |
|
"eval_paws-pos_steps_per_second": 2.91, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.303125, |
|
"grad_norm": 3.2354822158813477, |
|
"learning_rate": 3.03125e-05, |
|
"loss": 0.6398, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.30625, |
|
"grad_norm": 3.6665022373199463, |
|
"learning_rate": 3.0625000000000006e-05, |
|
"loss": 0.8263, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.309375, |
|
"grad_norm": 3.026954412460327, |
|
"learning_rate": 3.09375e-05, |
|
"loss": 0.8716, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 2.445453643798828, |
|
"learning_rate": 3.125e-05, |
|
"loss": 0.5523, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.315625, |
|
"grad_norm": 3.4408035278320312, |
|
"learning_rate": 3.15625e-05, |
|
"loss": 0.5811, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.31875, |
|
"grad_norm": 2.8406240940093994, |
|
"learning_rate": 3.1875e-05, |
|
"loss": 0.7602, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.321875, |
|
"grad_norm": 2.5201492309570312, |
|
"learning_rate": 3.21875e-05, |
|
"loss": 0.5337, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.325, |
|
"grad_norm": 3.323239326477051, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.8182, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.328125, |
|
"grad_norm": 3.2463977336883545, |
|
"learning_rate": 3.2812500000000005e-05, |
|
"loss": 0.6641, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.33125, |
|
"grad_norm": 3.4495010375976562, |
|
"learning_rate": 3.3125000000000006e-05, |
|
"loss": 1.0088, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.334375, |
|
"grad_norm": 2.7572243213653564, |
|
"learning_rate": 3.34375e-05, |
|
"loss": 0.7556, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.3375, |
|
"grad_norm": 3.494549512863159, |
|
"learning_rate": 3.375e-05, |
|
"loss": 0.713, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.340625, |
|
"grad_norm": 3.4666013717651367, |
|
"learning_rate": 3.40625e-05, |
|
"loss": 0.8385, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.34375, |
|
"grad_norm": 3.05104660987854, |
|
"learning_rate": 3.4375e-05, |
|
"loss": 0.5181, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.346875, |
|
"grad_norm": 3.8259003162384033, |
|
"learning_rate": 3.46875e-05, |
|
"loss": 1.0939, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 3.287792205810547, |
|
"learning_rate": 3.5000000000000004e-05, |
|
"loss": 0.5826, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.353125, |
|
"grad_norm": 3.9174458980560303, |
|
"learning_rate": 3.5312500000000005e-05, |
|
"loss": 0.7121, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.35625, |
|
"grad_norm": 3.424893379211426, |
|
"learning_rate": 3.5625000000000005e-05, |
|
"loss": 0.9371, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.359375, |
|
"grad_norm": 3.5157482624053955, |
|
"learning_rate": 3.5937500000000006e-05, |
|
"loss": 0.7739, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.3625, |
|
"grad_norm": 4.468640327453613, |
|
"learning_rate": 3.625e-05, |
|
"loss": 0.9612, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.365625, |
|
"grad_norm": 3.4379608631134033, |
|
"learning_rate": 3.65625e-05, |
|
"loss": 0.7213, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.36875, |
|
"grad_norm": 2.9453623294830322, |
|
"learning_rate": 3.6875e-05, |
|
"loss": 0.621, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.371875, |
|
"grad_norm": 2.4365315437316895, |
|
"learning_rate": 3.71875e-05, |
|
"loss": 0.5503, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 3.446967124938965, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.8439, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.378125, |
|
"grad_norm": 3.8797788619995117, |
|
"learning_rate": 3.7812500000000004e-05, |
|
"loss": 0.7813, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.38125, |
|
"grad_norm": 3.0103230476379395, |
|
"learning_rate": 3.8125000000000005e-05, |
|
"loss": 0.5637, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.384375, |
|
"grad_norm": 3.9547793865203857, |
|
"learning_rate": 3.8437500000000006e-05, |
|
"loss": 0.9052, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.3875, |
|
"grad_norm": 2.953261375427246, |
|
"learning_rate": 3.875e-05, |
|
"loss": 0.64, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.390625, |
|
"grad_norm": 2.914365768432617, |
|
"learning_rate": 3.90625e-05, |
|
"loss": 0.6529, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.39375, |
|
"grad_norm": 3.346844434738159, |
|
"learning_rate": 3.9375e-05, |
|
"loss": 0.6894, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.396875, |
|
"grad_norm": 3.946427583694458, |
|
"learning_rate": 3.96875e-05, |
|
"loss": 0.8604, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.3265583515167236, |
|
"learning_rate": 4e-05, |
|
"loss": 0.8503, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_VitaminC_cosine_accuracy": 0.55859375, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.835027813911438, |
|
"eval_VitaminC_cosine_ap": 0.5482054260732142, |
|
"eval_VitaminC_cosine_f1": 0.6666666666666666, |
|
"eval_VitaminC_cosine_f1_threshold": 0.28428012132644653, |
|
"eval_VitaminC_cosine_precision": 0.5, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.548828125, |
|
"eval_VitaminC_dot_accuracy_threshold": 321.1236572265625, |
|
"eval_VitaminC_dot_ap": 0.5350248143918641, |
|
"eval_VitaminC_dot_f1": 0.6649006622516557, |
|
"eval_VitaminC_dot_f1_threshold": 94.1016616821289, |
|
"eval_VitaminC_dot_precision": 0.498015873015873, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.55859375, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 13.260427474975586, |
|
"eval_VitaminC_euclidean_ap": 0.551773706587656, |
|
"eval_VitaminC_euclidean_f1": 0.6657824933687002, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.911056518554688, |
|
"eval_VitaminC_euclidean_precision": 0.4990059642147117, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.55859375, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 273.4624328613281, |
|
"eval_VitaminC_manhattan_ap": 0.5494410762635437, |
|
"eval_VitaminC_manhattan_f1": 0.6666666666666667, |
|
"eval_VitaminC_manhattan_f1_threshold": 472.7373046875, |
|
"eval_VitaminC_manhattan_precision": 0.5020161290322581, |
|
"eval_VitaminC_manhattan_recall": 0.9920318725099602, |
|
"eval_VitaminC_max_accuracy": 0.55859375, |
|
"eval_VitaminC_max_accuracy_threshold": 321.1236572265625, |
|
"eval_VitaminC_max_ap": 0.551773706587656, |
|
"eval_VitaminC_max_f1": 0.6666666666666667, |
|
"eval_VitaminC_max_f1_threshold": 472.7373046875, |
|
"eval_VitaminC_max_precision": 0.5020161290322581, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.551773706587656, |
|
"eval_sts-test_pearson_cosine": 0.8672675483925697, |
|
"eval_sts-test_pearson_dot": 0.8586110849200466, |
|
"eval_sts-test_pearson_euclidean": 0.8915515585715386, |
|
"eval_sts-test_pearson_manhattan": 0.8913674606593633, |
|
"eval_sts-test_pearson_max": 0.8915515585715386, |
|
"eval_sts-test_spearman_cosine": 0.8969123885208655, |
|
"eval_sts-test_spearman_dot": 0.8619306407500383, |
|
"eval_sts-test_spearman_euclidean": 0.8903670690297594, |
|
"eval_sts-test_spearman_manhattan": 0.890351227083227, |
|
"eval_sts-test_spearman_max": 0.8969123885208655, |
|
"eval_vitaminc-pairs_loss": 2.0338199138641357, |
|
"eval_vitaminc-pairs_runtime": 1.8309, |
|
"eval_vitaminc-pairs_samples_per_second": 58.988, |
|
"eval_vitaminc-pairs_steps_per_second": 1.092, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_negation-triplets_loss": 0.7916581630706787, |
|
"eval_negation-triplets_runtime": 0.2912, |
|
"eval_negation-triplets_samples_per_second": 219.766, |
|
"eval_negation-triplets_steps_per_second": 3.434, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_scitail-pairs-pos_loss": 0.07755717635154724, |
|
"eval_scitail-pairs-pos_runtime": 0.3716, |
|
"eval_scitail-pairs-pos_samples_per_second": 145.312, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.691, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_xsum-pairs_loss": 0.08196285367012024, |
|
"eval_xsum-pairs_runtime": 2.852, |
|
"eval_xsum-pairs_samples_per_second": 44.881, |
|
"eval_xsum-pairs_steps_per_second": 0.701, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_sciq_pairs_loss": 0.020960956811904907, |
|
"eval_sciq_pairs_runtime": 3.5913, |
|
"eval_sciq_pairs_samples_per_second": 35.642, |
|
"eval_sciq_pairs_steps_per_second": 0.557, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_qasc_pairs_loss": 0.11308694630861282, |
|
"eval_qasc_pairs_runtime": 0.595, |
|
"eval_qasc_pairs_samples_per_second": 215.137, |
|
"eval_qasc_pairs_steps_per_second": 3.362, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_openbookqa_pairs_loss": 0.7888042330741882, |
|
"eval_openbookqa_pairs_runtime": 0.5711, |
|
"eval_openbookqa_pairs_samples_per_second": 224.114, |
|
"eval_openbookqa_pairs_steps_per_second": 3.502, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_msmarco_pairs_loss": 0.3428971469402313, |
|
"eval_msmarco_pairs_runtime": 1.465, |
|
"eval_msmarco_pairs_samples_per_second": 87.373, |
|
"eval_msmarco_pairs_steps_per_second": 1.365, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_nq_pairs_loss": 0.20846250653266907, |
|
"eval_nq_pairs_runtime": 2.8581, |
|
"eval_nq_pairs_samples_per_second": 44.786, |
|
"eval_nq_pairs_steps_per_second": 0.7, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_trivia_pairs_loss": 0.7110738754272461, |
|
"eval_trivia_pairs_runtime": 4.3917, |
|
"eval_trivia_pairs_samples_per_second": 29.146, |
|
"eval_trivia_pairs_steps_per_second": 0.455, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_gooaq_pairs_loss": 0.3744402229785919, |
|
"eval_gooaq_pairs_runtime": 1.0043, |
|
"eval_gooaq_pairs_samples_per_second": 127.448, |
|
"eval_gooaq_pairs_steps_per_second": 1.991, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_paws-pos_loss": 0.024828137829899788, |
|
"eval_paws-pos_runtime": 0.6859, |
|
"eval_paws-pos_samples_per_second": 186.611, |
|
"eval_paws-pos_steps_per_second": 2.916, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.403125, |
|
"grad_norm": 3.7963619232177734, |
|
"learning_rate": 3.999971762923902e-05, |
|
"loss": 0.8171, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.40625, |
|
"grad_norm": 3.987645387649536, |
|
"learning_rate": 3.999887052758717e-05, |
|
"loss": 1.0401, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.409375, |
|
"grad_norm": 2.653578758239746, |
|
"learning_rate": 3.999745872693735e-05, |
|
"loss": 0.4243, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.4125, |
|
"grad_norm": 2.3737175464630127, |
|
"learning_rate": 3.9995482280443065e-05, |
|
"loss": 0.3778, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.415625, |
|
"grad_norm": 3.334118127822876, |
|
"learning_rate": 3.99929412625164e-05, |
|
"loss": 0.7651, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.41875, |
|
"grad_norm": 3.5098752975463867, |
|
"learning_rate": 3.998983576882524e-05, |
|
"loss": 0.6003, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.421875, |
|
"grad_norm": 3.023698091506958, |
|
"learning_rate": 3.9986165916289686e-05, |
|
"loss": 0.6023, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.425, |
|
"grad_norm": 3.293668746948242, |
|
"learning_rate": 3.998193184307759e-05, |
|
"loss": 0.6079, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.428125, |
|
"grad_norm": 3.326125144958496, |
|
"learning_rate": 3.997713370859942e-05, |
|
"loss": 0.6206, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.43125, |
|
"grad_norm": 3.322040557861328, |
|
"learning_rate": 3.997177169350224e-05, |
|
"loss": 0.4694, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.434375, |
|
"grad_norm": 3.1219382286071777, |
|
"learning_rate": 3.996584599966288e-05, |
|
"loss": 0.7528, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.4375, |
|
"grad_norm": 3.7076480388641357, |
|
"learning_rate": 3.9959356850180354e-05, |
|
"loss": 0.8395, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.440625, |
|
"grad_norm": 3.1098551750183105, |
|
"learning_rate": 3.995230448936749e-05, |
|
"loss": 0.6689, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.44375, |
|
"grad_norm": 3.31339168548584, |
|
"learning_rate": 3.9944689182741674e-05, |
|
"loss": 0.6547, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.446875, |
|
"grad_norm": 4.2841386795043945, |
|
"learning_rate": 3.99365112170149e-05, |
|
"loss": 0.9242, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 4.0628132820129395, |
|
"learning_rate": 3.992777090008296e-05, |
|
"loss": 0.9496, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.453125, |
|
"grad_norm": 3.484614849090576, |
|
"learning_rate": 3.9918468561013834e-05, |
|
"loss": 0.6506, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.45625, |
|
"grad_norm": 3.4139559268951416, |
|
"learning_rate": 3.990860455003534e-05, |
|
"loss": 0.786, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.459375, |
|
"grad_norm": 3.4322853088378906, |
|
"learning_rate": 3.9898179238521916e-05, |
|
"loss": 0.7414, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.4625, |
|
"grad_norm": 2.660554885864258, |
|
"learning_rate": 3.9887193018980654e-05, |
|
"loss": 0.3978, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.465625, |
|
"grad_norm": 2.6429054737091064, |
|
"learning_rate": 3.9875646305036494e-05, |
|
"loss": 0.5635, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.46875, |
|
"grad_norm": 4.292131423950195, |
|
"learning_rate": 3.98635395314167e-05, |
|
"loss": 0.9466, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.471875, |
|
"grad_norm": 3.1115028858184814, |
|
"learning_rate": 3.9850873153934456e-05, |
|
"loss": 0.5251, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.475, |
|
"grad_norm": 3.307051181793213, |
|
"learning_rate": 3.983764764947172e-05, |
|
"loss": 0.6636, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.478125, |
|
"grad_norm": 3.807854652404785, |
|
"learning_rate": 3.9823863515961245e-05, |
|
"loss": 0.7834, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.48125, |
|
"grad_norm": 2.9957728385925293, |
|
"learning_rate": 3.980952127236788e-05, |
|
"loss": 0.6177, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.484375, |
|
"grad_norm": 3.3072471618652344, |
|
"learning_rate": 3.979462145866898e-05, |
|
"loss": 0.4558, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.4875, |
|
"grad_norm": 3.0199949741363525, |
|
"learning_rate": 3.977916463583412e-05, |
|
"loss": 0.5228, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.490625, |
|
"grad_norm": 2.8596651554107666, |
|
"learning_rate": 3.9763151385803936e-05, |
|
"loss": 0.5543, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.49375, |
|
"grad_norm": 3.0589263439178467, |
|
"learning_rate": 3.974658231146825e-05, |
|
"loss": 0.7127, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.496875, |
|
"grad_norm": 2.489602565765381, |
|
"learning_rate": 3.9729458036643335e-05, |
|
"loss": 0.4227, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.3471999168395996, |
|
"learning_rate": 3.971177920604846e-05, |
|
"loss": 0.5914, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_VitaminC_cosine_accuracy": 0.55859375, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.8433390855789185, |
|
"eval_VitaminC_cosine_ap": 0.5529005025024077, |
|
"eval_VitaminC_cosine_f1": 0.6657824933687002, |
|
"eval_VitaminC_cosine_f1_threshold": 0.3040446639060974, |
|
"eval_VitaminC_cosine_precision": 0.4990059642147117, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.55859375, |
|
"eval_VitaminC_dot_accuracy_threshold": 309.7912902832031, |
|
"eval_VitaminC_dot_ap": 0.5373200658982779, |
|
"eval_VitaminC_dot_f1": 0.6666666666666666, |
|
"eval_VitaminC_dot_f1_threshold": 122.78400421142578, |
|
"eval_VitaminC_dot_precision": 0.5, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.5546875, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 11.011507034301758, |
|
"eval_VitaminC_euclidean_ap": 0.5542686405562732, |
|
"eval_VitaminC_euclidean_f1": 0.6675531914893617, |
|
"eval_VitaminC_euclidean_f1_threshold": 22.90133285522461, |
|
"eval_VitaminC_euclidean_precision": 0.500998003992016, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.55859375, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 293.54693603515625, |
|
"eval_VitaminC_manhattan_ap": 0.5529507613553954, |
|
"eval_VitaminC_manhattan_f1": 0.6666666666666667, |
|
"eval_VitaminC_manhattan_f1_threshold": 479.09588623046875, |
|
"eval_VitaminC_manhattan_precision": 0.501002004008016, |
|
"eval_VitaminC_manhattan_recall": 0.9960159362549801, |
|
"eval_VitaminC_max_accuracy": 0.55859375, |
|
"eval_VitaminC_max_accuracy_threshold": 309.7912902832031, |
|
"eval_VitaminC_max_ap": 0.5542686405562732, |
|
"eval_VitaminC_max_f1": 0.6675531914893617, |
|
"eval_VitaminC_max_f1_threshold": 479.09588623046875, |
|
"eval_VitaminC_max_precision": 0.501002004008016, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5542686405562732, |
|
"eval_sts-test_pearson_cosine": 0.8717931331186477, |
|
"eval_sts-test_pearson_dot": 0.8628985772297639, |
|
"eval_sts-test_pearson_euclidean": 0.8935960577585327, |
|
"eval_sts-test_pearson_manhattan": 0.8926162242871916, |
|
"eval_sts-test_pearson_max": 0.8935960577585327, |
|
"eval_sts-test_spearman_cosine": 0.8989036406477372, |
|
"eval_sts-test_spearman_dot": 0.8620115510306339, |
|
"eval_sts-test_spearman_euclidean": 0.8911198747488857, |
|
"eval_sts-test_spearman_manhattan": 0.8899440801070879, |
|
"eval_sts-test_spearman_max": 0.8989036406477372, |
|
"eval_vitaminc-pairs_loss": 2.0564281940460205, |
|
"eval_vitaminc-pairs_runtime": 1.8511, |
|
"eval_vitaminc-pairs_samples_per_second": 58.343, |
|
"eval_vitaminc-pairs_steps_per_second": 1.08, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_negation-triplets_loss": 0.7865684032440186, |
|
"eval_negation-triplets_runtime": 0.2987, |
|
"eval_negation-triplets_samples_per_second": 214.291, |
|
"eval_negation-triplets_steps_per_second": 3.348, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_scitail-pairs-pos_loss": 0.09969007223844528, |
|
"eval_scitail-pairs-pos_runtime": 0.384, |
|
"eval_scitail-pairs-pos_samples_per_second": 140.615, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.604, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_xsum-pairs_loss": 0.08461853861808777, |
|
"eval_xsum-pairs_runtime": 2.8533, |
|
"eval_xsum-pairs_samples_per_second": 44.86, |
|
"eval_xsum-pairs_steps_per_second": 0.701, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_sciq_pairs_loss": 0.020078735426068306, |
|
"eval_sciq_pairs_runtime": 3.6458, |
|
"eval_sciq_pairs_samples_per_second": 35.109, |
|
"eval_sciq_pairs_steps_per_second": 0.549, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_qasc_pairs_loss": 0.12362705171108246, |
|
"eval_qasc_pairs_runtime": 0.6028, |
|
"eval_qasc_pairs_samples_per_second": 212.356, |
|
"eval_qasc_pairs_steps_per_second": 3.318, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_openbookqa_pairs_loss": 0.6668081283569336, |
|
"eval_openbookqa_pairs_runtime": 0.5784, |
|
"eval_openbookqa_pairs_samples_per_second": 221.308, |
|
"eval_openbookqa_pairs_steps_per_second": 3.458, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_msmarco_pairs_loss": 0.32913729548454285, |
|
"eval_msmarco_pairs_runtime": 1.4669, |
|
"eval_msmarco_pairs_samples_per_second": 87.26, |
|
"eval_msmarco_pairs_steps_per_second": 1.363, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_nq_pairs_loss": 0.2085198312997818, |
|
"eval_nq_pairs_runtime": 2.8644, |
|
"eval_nq_pairs_samples_per_second": 44.687, |
|
"eval_nq_pairs_steps_per_second": 0.698, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_trivia_pairs_loss": 0.7138605117797852, |
|
"eval_trivia_pairs_runtime": 4.3915, |
|
"eval_trivia_pairs_samples_per_second": 29.147, |
|
"eval_trivia_pairs_steps_per_second": 0.455, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_gooaq_pairs_loss": 0.3919322192668915, |
|
"eval_gooaq_pairs_runtime": 1.004, |
|
"eval_gooaq_pairs_samples_per_second": 127.484, |
|
"eval_gooaq_pairs_steps_per_second": 1.992, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_paws-pos_loss": 0.025703923776745796, |
|
"eval_paws-pos_runtime": 0.6869, |
|
"eval_paws-pos_samples_per_second": 186.332, |
|
"eval_paws-pos_steps_per_second": 2.911, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.503125, |
|
"grad_norm": 2.7484354972839355, |
|
"learning_rate": 3.9693546485281616e-05, |
|
"loss": 0.3874, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.50625, |
|
"grad_norm": 3.9011173248291016, |
|
"learning_rate": 3.967476056079441e-05, |
|
"loss": 0.8134, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.509375, |
|
"grad_norm": 3.723893642425537, |
|
"learning_rate": 3.9655422139866315e-05, |
|
"loss": 0.5596, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.5125, |
|
"grad_norm": 1.8328720331192017, |
|
"learning_rate": 3.963553195057793e-05, |
|
"loss": 0.2877, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.515625, |
|
"grad_norm": 2.9615490436553955, |
|
"learning_rate": 3.9615090741783634e-05, |
|
"loss": 0.5218, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.51875, |
|
"grad_norm": 3.041154146194458, |
|
"learning_rate": 3.959409928308341e-05, |
|
"loss": 0.5282, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.521875, |
|
"grad_norm": 3.439157247543335, |
|
"learning_rate": 3.957255836479377e-05, |
|
"loss": 0.7528, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.525, |
|
"grad_norm": 3.576984405517578, |
|
"learning_rate": 3.955046879791816e-05, |
|
"loss": 0.7174, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.528125, |
|
"grad_norm": 3.1042630672454834, |
|
"learning_rate": 3.952783141411626e-05, |
|
"loss": 0.6902, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.53125, |
|
"grad_norm": 3.0211422443389893, |
|
"learning_rate": 3.9504647065672785e-05, |
|
"loss": 0.7486, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.534375, |
|
"grad_norm": 3.5162508487701416, |
|
"learning_rate": 3.9480916625465344e-05, |
|
"loss": 0.6333, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.5375, |
|
"grad_norm": 3.9070920944213867, |
|
"learning_rate": 3.9456640986931606e-05, |
|
"loss": 1.2932, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.540625, |
|
"grad_norm": 3.548743724822998, |
|
"learning_rate": 3.943182106403563e-05, |
|
"loss": 0.6259, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.54375, |
|
"grad_norm": 3.64949893951416, |
|
"learning_rate": 3.940645779123349e-05, |
|
"loss": 0.8357, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.546875, |
|
"grad_norm": 2.4284133911132812, |
|
"learning_rate": 3.938055212343807e-05, |
|
"loss": 0.3604, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.9141008853912354, |
|
"learning_rate": 3.9354105035983135e-05, |
|
"loss": 0.6598, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.553125, |
|
"grad_norm": 2.0430235862731934, |
|
"learning_rate": 3.932711752458657e-05, |
|
"loss": 0.3169, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.55625, |
|
"grad_norm": 3.522728204727173, |
|
"learning_rate": 3.929959060531291e-05, |
|
"loss": 0.8629, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.559375, |
|
"grad_norm": 2.419400453567505, |
|
"learning_rate": 3.927152531453513e-05, |
|
"loss": 0.3648, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.5625, |
|
"grad_norm": 2.826747417449951, |
|
"learning_rate": 3.924292270889555e-05, |
|
"loss": 0.5103, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.565625, |
|
"grad_norm": 3.2149524688720703, |
|
"learning_rate": 3.921378386526612e-05, |
|
"loss": 0.6255, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.56875, |
|
"grad_norm": 2.2112457752227783, |
|
"learning_rate": 3.918410988070782e-05, |
|
"loss": 0.4382, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.571875, |
|
"grad_norm": 2.301940441131592, |
|
"learning_rate": 3.915390187242941e-05, |
|
"loss": 0.4647, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.575, |
|
"grad_norm": 2.272001266479492, |
|
"learning_rate": 3.912316097774532e-05, |
|
"loss": 0.4218, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.578125, |
|
"grad_norm": 3.77436900138855, |
|
"learning_rate": 3.909188835403285e-05, |
|
"loss": 0.8244, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.58125, |
|
"grad_norm": 3.236813545227051, |
|
"learning_rate": 3.906008517868863e-05, |
|
"loss": 0.6579, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.584375, |
|
"grad_norm": 3.1845405101776123, |
|
"learning_rate": 3.9027752649084215e-05, |
|
"loss": 0.8384, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.5875, |
|
"grad_norm": 2.709747791290283, |
|
"learning_rate": 3.899489198252108e-05, |
|
"loss": 0.5266, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.590625, |
|
"grad_norm": 2.5210235118865967, |
|
"learning_rate": 3.896150441618476e-05, |
|
"loss": 0.5079, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.59375, |
|
"grad_norm": 1.9979658126831055, |
|
"learning_rate": 3.892759120709824e-05, |
|
"loss": 0.2574, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.596875, |
|
"grad_norm": 2.4257137775421143, |
|
"learning_rate": 3.8893153632074675e-05, |
|
"loss": 0.4162, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 3.482635021209717, |
|
"learning_rate": 3.88581929876693e-05, |
|
"loss": 0.7872, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_VitaminC_cosine_accuracy": 0.564453125, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.737064003944397, |
|
"eval_VitaminC_cosine_ap": 0.5553950127875514, |
|
"eval_VitaminC_cosine_f1": 0.6666666666666666, |
|
"eval_VitaminC_cosine_f1_threshold": 0.312030553817749, |
|
"eval_VitaminC_cosine_precision": 0.5, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.5625, |
|
"eval_VitaminC_dot_accuracy_threshold": 326.57232666015625, |
|
"eval_VitaminC_dot_ap": 0.5370581483003721, |
|
"eval_VitaminC_dot_f1": 0.6649006622516557, |
|
"eval_VitaminC_dot_f1_threshold": 116.00311279296875, |
|
"eval_VitaminC_dot_precision": 0.498015873015873, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.55859375, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 13.492112159729004, |
|
"eval_VitaminC_euclidean_ap": 0.5536857778177137, |
|
"eval_VitaminC_euclidean_f1": 0.6657824933687002, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.840118408203125, |
|
"eval_VitaminC_euclidean_precision": 0.4990059642147117, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.5625, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 306.6820983886719, |
|
"eval_VitaminC_manhattan_ap": 0.5520101545849081, |
|
"eval_VitaminC_manhattan_f1": 0.6666666666666666, |
|
"eval_VitaminC_manhattan_f1_threshold": 490.146728515625, |
|
"eval_VitaminC_manhattan_precision": 0.5, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.564453125, |
|
"eval_VitaminC_max_accuracy_threshold": 326.57232666015625, |
|
"eval_VitaminC_max_ap": 0.5553950127875514, |
|
"eval_VitaminC_max_f1": 0.6666666666666666, |
|
"eval_VitaminC_max_f1_threshold": 490.146728515625, |
|
"eval_VitaminC_max_precision": 0.5, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5553950127875514, |
|
"eval_sts-test_pearson_cosine": 0.8705183135475563, |
|
"eval_sts-test_pearson_dot": 0.8575572680200927, |
|
"eval_sts-test_pearson_euclidean": 0.894961141451468, |
|
"eval_sts-test_pearson_manhattan": 0.8946364485546632, |
|
"eval_sts-test_pearson_max": 0.894961141451468, |
|
"eval_sts-test_spearman_cosine": 0.8981581293842179, |
|
"eval_sts-test_spearman_dot": 0.8574014998383989, |
|
"eval_sts-test_spearman_euclidean": 0.8924189591158167, |
|
"eval_sts-test_spearman_manhattan": 0.8920942887144219, |
|
"eval_sts-test_spearman_max": 0.8981581293842179, |
|
"eval_vitaminc-pairs_loss": 2.066204786300659, |
|
"eval_vitaminc-pairs_runtime": 1.8428, |
|
"eval_vitaminc-pairs_samples_per_second": 58.608, |
|
"eval_vitaminc-pairs_steps_per_second": 1.085, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_negation-triplets_loss": 0.763123095035553, |
|
"eval_negation-triplets_runtime": 0.297, |
|
"eval_negation-triplets_samples_per_second": 215.511, |
|
"eval_negation-triplets_steps_per_second": 3.367, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_scitail-pairs-pos_loss": 0.07364190369844437, |
|
"eval_scitail-pairs-pos_runtime": 0.3662, |
|
"eval_scitail-pairs-pos_samples_per_second": 147.451, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.731, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_xsum-pairs_loss": 0.06735075265169144, |
|
"eval_xsum-pairs_runtime": 2.8409, |
|
"eval_xsum-pairs_samples_per_second": 45.056, |
|
"eval_xsum-pairs_steps_per_second": 0.704, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_sciq_pairs_loss": 0.01930728368461132, |
|
"eval_sciq_pairs_runtime": 3.6003, |
|
"eval_sciq_pairs_samples_per_second": 35.552, |
|
"eval_sciq_pairs_steps_per_second": 0.556, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_qasc_pairs_loss": 0.11278136074542999, |
|
"eval_qasc_pairs_runtime": 0.5997, |
|
"eval_qasc_pairs_samples_per_second": 213.437, |
|
"eval_qasc_pairs_steps_per_second": 3.335, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_openbookqa_pairs_loss": 0.7505559921264648, |
|
"eval_openbookqa_pairs_runtime": 0.5774, |
|
"eval_openbookqa_pairs_samples_per_second": 221.691, |
|
"eval_openbookqa_pairs_steps_per_second": 3.464, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_msmarco_pairs_loss": 0.33166375756263733, |
|
"eval_msmarco_pairs_runtime": 1.4619, |
|
"eval_msmarco_pairs_samples_per_second": 87.558, |
|
"eval_msmarco_pairs_steps_per_second": 1.368, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_nq_pairs_loss": 0.21051406860351562, |
|
"eval_nq_pairs_runtime": 2.858, |
|
"eval_nq_pairs_samples_per_second": 44.786, |
|
"eval_nq_pairs_steps_per_second": 0.7, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_trivia_pairs_loss": 0.7072564363479614, |
|
"eval_trivia_pairs_runtime": 4.3854, |
|
"eval_trivia_pairs_samples_per_second": 29.187, |
|
"eval_trivia_pairs_steps_per_second": 0.456, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_gooaq_pairs_loss": 0.3748788833618164, |
|
"eval_gooaq_pairs_runtime": 1.0024, |
|
"eval_gooaq_pairs_samples_per_second": 127.692, |
|
"eval_gooaq_pairs_steps_per_second": 1.995, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_paws-pos_loss": 0.025185449048876762, |
|
"eval_paws-pos_runtime": 0.6844, |
|
"eval_paws-pos_samples_per_second": 187.016, |
|
"eval_paws-pos_steps_per_second": 2.922, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.603125, |
|
"grad_norm": 1.527544617652893, |
|
"learning_rate": 3.882271059013064e-05, |
|
"loss": 0.2606, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.60625, |
|
"grad_norm": 3.647446870803833, |
|
"learning_rate": 3.878670777535087e-05, |
|
"loss": 0.8808, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.609375, |
|
"grad_norm": 3.806488275527954, |
|
"learning_rate": 3.875018589881564e-05, |
|
"loss": 0.7685, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.6125, |
|
"grad_norm": 2.9896490573883057, |
|
"learning_rate": 3.871314633555296e-05, |
|
"loss": 0.7186, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.615625, |
|
"grad_norm": 1.31754732131958, |
|
"learning_rate": 3.8675590480081455e-05, |
|
"loss": 0.1147, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.61875, |
|
"grad_norm": 2.025834798812866, |
|
"learning_rate": 3.863751974635784e-05, |
|
"loss": 0.2816, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.621875, |
|
"grad_norm": 2.5674166679382324, |
|
"learning_rate": 3.8598935567723734e-05, |
|
"loss": 0.506, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 3.270737648010254, |
|
"learning_rate": 3.8559839396851656e-05, |
|
"loss": 0.5699, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.628125, |
|
"grad_norm": 1.6074001789093018, |
|
"learning_rate": 3.852023270569033e-05, |
|
"loss": 0.2746, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.63125, |
|
"grad_norm": 3.736549139022827, |
|
"learning_rate": 3.8480116985409306e-05, |
|
"loss": 0.7131, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.634375, |
|
"grad_norm": 3.9329938888549805, |
|
"learning_rate": 3.843949374634278e-05, |
|
"loss": 0.9307, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.6375, |
|
"grad_norm": 3.110591173171997, |
|
"learning_rate": 3.839836451793273e-05, |
|
"loss": 0.6033, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.640625, |
|
"grad_norm": 3.889007091522217, |
|
"learning_rate": 3.8356730848671374e-05, |
|
"loss": 0.7203, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.64375, |
|
"grad_norm": 3.2738683223724365, |
|
"learning_rate": 3.8314594306042813e-05, |
|
"loss": 0.7422, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.646875, |
|
"grad_norm": 3.077531099319458, |
|
"learning_rate": 3.827195647646407e-05, |
|
"loss": 0.6955, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 3.336914539337158, |
|
"learning_rate": 3.822881896522533e-05, |
|
"loss": 0.7139, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.653125, |
|
"grad_norm": 2.866854429244995, |
|
"learning_rate": 3.818518339642951e-05, |
|
"loss": 0.4741, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.65625, |
|
"grad_norm": 1.8859411478042603, |
|
"learning_rate": 3.81410514129311e-05, |
|
"loss": 0.2658, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.659375, |
|
"grad_norm": 2.938387870788574, |
|
"learning_rate": 3.809642467627435e-05, |
|
"loss": 0.6033, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.6625, |
|
"grad_norm": 3.269779920578003, |
|
"learning_rate": 3.805130486663068e-05, |
|
"loss": 0.7776, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.665625, |
|
"grad_norm": 2.8948724269866943, |
|
"learning_rate": 3.800569368273539e-05, |
|
"loss": 0.6791, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.66875, |
|
"grad_norm": 2.962749719619751, |
|
"learning_rate": 3.795959284182381e-05, |
|
"loss": 0.4367, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.671875, |
|
"grad_norm": 3.2313294410705566, |
|
"learning_rate": 3.791300407956651e-05, |
|
"loss": 0.7212, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.675, |
|
"grad_norm": 3.182274580001831, |
|
"learning_rate": 3.7865929150004086e-05, |
|
"loss": 0.7797, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.678125, |
|
"grad_norm": 2.7317817211151123, |
|
"learning_rate": 3.781836982548101e-05, |
|
"loss": 0.4547, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.68125, |
|
"grad_norm": 3.443126916885376, |
|
"learning_rate": 3.777032789657898e-05, |
|
"loss": 0.6771, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.684375, |
|
"grad_norm": 2.923877000808716, |
|
"learning_rate": 3.772180517204946e-05, |
|
"loss": 0.5488, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.6875, |
|
"grad_norm": 2.897601366043091, |
|
"learning_rate": 3.767280347874561e-05, |
|
"loss": 0.7352, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.690625, |
|
"grad_norm": 4.06088924407959, |
|
"learning_rate": 3.762332466155348e-05, |
|
"loss": 0.9567, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.69375, |
|
"grad_norm": 2.582475423812866, |
|
"learning_rate": 3.7573370583322575e-05, |
|
"loss": 0.4274, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.696875, |
|
"grad_norm": 3.7177348136901855, |
|
"learning_rate": 3.7522943124795706e-05, |
|
"loss": 0.7653, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 2.9613823890686035, |
|
"learning_rate": 3.7472044184538186e-05, |
|
"loss": 0.5672, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_VitaminC_cosine_accuracy": 0.55859375, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.8318229913711548, |
|
"eval_VitaminC_cosine_ap": 0.5483869647391425, |
|
"eval_VitaminC_cosine_f1": 0.6657824933687002, |
|
"eval_VitaminC_cosine_f1_threshold": 0.2898828089237213, |
|
"eval_VitaminC_cosine_precision": 0.4990059642147117, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.5546875, |
|
"eval_VitaminC_dot_accuracy_threshold": 280.6613464355469, |
|
"eval_VitaminC_dot_ap": 0.5352389087249884, |
|
"eval_VitaminC_dot_f1": 0.6666666666666667, |
|
"eval_VitaminC_dot_f1_threshold": 127.79656982421875, |
|
"eval_VitaminC_dot_precision": 0.501002004008016, |
|
"eval_VitaminC_dot_recall": 0.9960159362549801, |
|
"eval_VitaminC_euclidean_accuracy": 0.556640625, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 15.362771987915039, |
|
"eval_VitaminC_euclidean_ap": 0.5487471191186046, |
|
"eval_VitaminC_euclidean_f1": 0.6657789613848203, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.0285587310791, |
|
"eval_VitaminC_euclidean_precision": 0.5, |
|
"eval_VitaminC_euclidean_recall": 0.9960159362549801, |
|
"eval_VitaminC_manhattan_accuracy": 0.556640625, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 304.9786376953125, |
|
"eval_VitaminC_manhattan_ap": 0.5448852224007886, |
|
"eval_VitaminC_manhattan_f1": 0.6657824933687002, |
|
"eval_VitaminC_manhattan_f1_threshold": 503.7974548339844, |
|
"eval_VitaminC_manhattan_precision": 0.4990059642147117, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.55859375, |
|
"eval_VitaminC_max_accuracy_threshold": 304.9786376953125, |
|
"eval_VitaminC_max_ap": 0.5487471191186046, |
|
"eval_VitaminC_max_f1": 0.6666666666666667, |
|
"eval_VitaminC_max_f1_threshold": 503.7974548339844, |
|
"eval_VitaminC_max_precision": 0.501002004008016, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5487471191186046, |
|
"eval_sts-test_pearson_cosine": 0.8727242216490746, |
|
"eval_sts-test_pearson_dot": 0.8620679649117718, |
|
"eval_sts-test_pearson_euclidean": 0.8961291746213003, |
|
"eval_sts-test_pearson_manhattan": 0.8961616445842001, |
|
"eval_sts-test_pearson_max": 0.8961616445842001, |
|
"eval_sts-test_spearman_cosine": 0.9004602237727143, |
|
"eval_sts-test_spearman_dot": 0.8617584826474656, |
|
"eval_sts-test_spearman_euclidean": 0.8945701970021624, |
|
"eval_sts-test_spearman_manhattan": 0.8942019836234342, |
|
"eval_sts-test_spearman_max": 0.9004602237727143, |
|
"eval_vitaminc-pairs_loss": 2.07513689994812, |
|
"eval_vitaminc-pairs_runtime": 1.8489, |
|
"eval_vitaminc-pairs_samples_per_second": 58.414, |
|
"eval_vitaminc-pairs_steps_per_second": 1.082, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_negation-triplets_loss": 0.7822766900062561, |
|
"eval_negation-triplets_runtime": 0.2948, |
|
"eval_negation-triplets_samples_per_second": 217.093, |
|
"eval_negation-triplets_steps_per_second": 3.392, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_scitail-pairs-pos_loss": 0.084584079682827, |
|
"eval_scitail-pairs-pos_runtime": 0.365, |
|
"eval_scitail-pairs-pos_samples_per_second": 147.944, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.74, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_xsum-pairs_loss": 0.05927089601755142, |
|
"eval_xsum-pairs_runtime": 2.8461, |
|
"eval_xsum-pairs_samples_per_second": 44.974, |
|
"eval_xsum-pairs_steps_per_second": 0.703, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_sciq_pairs_loss": 0.019030971452593803, |
|
"eval_sciq_pairs_runtime": 3.6465, |
|
"eval_sciq_pairs_samples_per_second": 35.102, |
|
"eval_sciq_pairs_steps_per_second": 0.548, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_qasc_pairs_loss": 0.12519867718219757, |
|
"eval_qasc_pairs_runtime": 0.6003, |
|
"eval_qasc_pairs_samples_per_second": 213.235, |
|
"eval_qasc_pairs_steps_per_second": 3.332, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_openbookqa_pairs_loss": 0.7141773700714111, |
|
"eval_openbookqa_pairs_runtime": 0.5753, |
|
"eval_openbookqa_pairs_samples_per_second": 222.508, |
|
"eval_openbookqa_pairs_steps_per_second": 3.477, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_msmarco_pairs_loss": 0.3040487468242645, |
|
"eval_msmarco_pairs_runtime": 1.4648, |
|
"eval_msmarco_pairs_samples_per_second": 87.383, |
|
"eval_msmarco_pairs_steps_per_second": 1.365, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_nq_pairs_loss": 0.1808711141347885, |
|
"eval_nq_pairs_runtime": 2.8595, |
|
"eval_nq_pairs_samples_per_second": 44.764, |
|
"eval_nq_pairs_steps_per_second": 0.699, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_trivia_pairs_loss": 0.7160522937774658, |
|
"eval_trivia_pairs_runtime": 4.3875, |
|
"eval_trivia_pairs_samples_per_second": 29.174, |
|
"eval_trivia_pairs_steps_per_second": 0.456, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_gooaq_pairs_loss": 0.3398577868938446, |
|
"eval_gooaq_pairs_runtime": 1.0189, |
|
"eval_gooaq_pairs_samples_per_second": 125.631, |
|
"eval_gooaq_pairs_steps_per_second": 1.963, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_paws-pos_loss": 0.0250654686242342, |
|
"eval_paws-pos_runtime": 0.6965, |
|
"eval_paws-pos_samples_per_second": 183.765, |
|
"eval_paws-pos_steps_per_second": 2.871, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.703125, |
|
"grad_norm": 2.7675271034240723, |
|
"learning_rate": 3.742067567886634e-05, |
|
"loss": 0.6116, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.70625, |
|
"grad_norm": 3.1136417388916016, |
|
"learning_rate": 3.7368839541775386e-05, |
|
"loss": 0.6484, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.709375, |
|
"grad_norm": 3.1425583362579346, |
|
"learning_rate": 3.731653772486657e-05, |
|
"loss": 0.669, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.7125, |
|
"grad_norm": 1.8860105276107788, |
|
"learning_rate": 3.726377219727376e-05, |
|
"loss": 0.263, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.715625, |
|
"grad_norm": 2.6990439891815186, |
|
"learning_rate": 3.721054494558923e-05, |
|
"loss": 0.6181, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.71875, |
|
"grad_norm": 3.836609363555908, |
|
"learning_rate": 3.7156857973788926e-05, |
|
"loss": 0.8956, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.721875, |
|
"grad_norm": 3.0837268829345703, |
|
"learning_rate": 3.710271330315699e-05, |
|
"loss": 0.5363, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.725, |
|
"grad_norm": 3.639112710952759, |
|
"learning_rate": 3.704811297220967e-05, |
|
"loss": 0.823, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.728125, |
|
"grad_norm": 3.301112651824951, |
|
"learning_rate": 3.699305903661858e-05, |
|
"loss": 0.7795, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.73125, |
|
"grad_norm": 2.289018154144287, |
|
"learning_rate": 3.693755356913326e-05, |
|
"loss": 0.3688, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.734375, |
|
"grad_norm": 2.259490966796875, |
|
"learning_rate": 3.688159865950319e-05, |
|
"loss": 0.3835, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.7375, |
|
"grad_norm": 2.2043821811676025, |
|
"learning_rate": 3.6825196414399096e-05, |
|
"loss": 0.3393, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.740625, |
|
"grad_norm": 2.6866259574890137, |
|
"learning_rate": 3.6768348957333635e-05, |
|
"loss": 0.4792, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.74375, |
|
"grad_norm": 2.561917304992676, |
|
"learning_rate": 3.671105842858142e-05, |
|
"loss": 0.3966, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.746875, |
|
"grad_norm": 2.1512343883514404, |
|
"learning_rate": 3.6653326985098486e-05, |
|
"loss": 0.2902, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 3.7423007488250732, |
|
"learning_rate": 3.659515680044106e-05, |
|
"loss": 0.6716, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.753125, |
|
"grad_norm": 2.6502630710601807, |
|
"learning_rate": 3.65365500646837e-05, |
|
"loss": 0.6783, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.75625, |
|
"grad_norm": 2.8291828632354736, |
|
"learning_rate": 3.6477508984336886e-05, |
|
"loss": 0.4794, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.759375, |
|
"grad_norm": 3.7910561561584473, |
|
"learning_rate": 3.641803578226393e-05, |
|
"loss": 0.8283, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.7625, |
|
"grad_norm": 3.3968613147735596, |
|
"learning_rate": 3.635813269759727e-05, |
|
"loss": 0.6875, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.765625, |
|
"grad_norm": 3.5861093997955322, |
|
"learning_rate": 3.629780198565419e-05, |
|
"loss": 0.8384, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.76875, |
|
"grad_norm": 2.709362030029297, |
|
"learning_rate": 3.623704591785189e-05, |
|
"loss": 0.5796, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.771875, |
|
"grad_norm": 2.5690431594848633, |
|
"learning_rate": 3.6175866781622e-05, |
|
"loss": 0.6206, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.775, |
|
"grad_norm": 3.5460782051086426, |
|
"learning_rate": 3.611426688032439e-05, |
|
"loss": 0.7836, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.778125, |
|
"grad_norm": 2.9132962226867676, |
|
"learning_rate": 3.605224853316055e-05, |
|
"loss": 0.615, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.78125, |
|
"grad_norm": 2.707908868789673, |
|
"learning_rate": 3.5989814075086195e-05, |
|
"loss": 0.433, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.784375, |
|
"grad_norm": 3.2124290466308594, |
|
"learning_rate": 3.592696585672338e-05, |
|
"loss": 0.7394, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.7875, |
|
"grad_norm": 1.3290472030639648, |
|
"learning_rate": 3.5863706244272006e-05, |
|
"loss": 0.1203, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.790625, |
|
"grad_norm": 3.5975258350372314, |
|
"learning_rate": 3.580003761942073e-05, |
|
"loss": 1.0909, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.79375, |
|
"grad_norm": 3.1402907371520996, |
|
"learning_rate": 3.573596237925728e-05, |
|
"loss": 0.7107, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.796875, |
|
"grad_norm": 2.1686770915985107, |
|
"learning_rate": 3.567148293617825e-05, |
|
"loss": 0.3464, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 4.112154960632324, |
|
"learning_rate": 3.560660171779821e-05, |
|
"loss": 0.9347, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_VitaminC_cosine_accuracy": 0.560546875, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.825050950050354, |
|
"eval_VitaminC_cosine_ap": 0.5480281823929228, |
|
"eval_VitaminC_cosine_f1": 0.6666666666666667, |
|
"eval_VitaminC_cosine_f1_threshold": 0.39935123920440674, |
|
"eval_VitaminC_cosine_precision": 0.501002004008016, |
|
"eval_VitaminC_cosine_recall": 0.9960159362549801, |
|
"eval_VitaminC_dot_accuracy": 0.552734375, |
|
"eval_VitaminC_dot_accuracy_threshold": 313.59075927734375, |
|
"eval_VitaminC_dot_ap": 0.5329984665726657, |
|
"eval_VitaminC_dot_f1": 0.6657789613848203, |
|
"eval_VitaminC_dot_f1_threshold": 132.71243286132812, |
|
"eval_VitaminC_dot_precision": 0.5, |
|
"eval_VitaminC_dot_recall": 0.9960159362549801, |
|
"eval_VitaminC_euclidean_accuracy": 0.556640625, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 11.227453231811523, |
|
"eval_VitaminC_euclidean_ap": 0.5496569156706412, |
|
"eval_VitaminC_euclidean_f1": 0.6666666666666666, |
|
"eval_VitaminC_euclidean_f1_threshold": 22.6641788482666, |
|
"eval_VitaminC_euclidean_precision": 0.5, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.5546875, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 230.89329528808594, |
|
"eval_VitaminC_manhattan_ap": 0.545699310794812, |
|
"eval_VitaminC_manhattan_f1": 0.6657824933687002, |
|
"eval_VitaminC_manhattan_f1_threshold": 483.625244140625, |
|
"eval_VitaminC_manhattan_precision": 0.4990059642147117, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.560546875, |
|
"eval_VitaminC_max_accuracy_threshold": 313.59075927734375, |
|
"eval_VitaminC_max_ap": 0.5496569156706412, |
|
"eval_VitaminC_max_f1": 0.6666666666666667, |
|
"eval_VitaminC_max_f1_threshold": 483.625244140625, |
|
"eval_VitaminC_max_precision": 0.501002004008016, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5496569156706412, |
|
"eval_sts-test_pearson_cosine": 0.8711436629553765, |
|
"eval_sts-test_pearson_dot": 0.859333025320038, |
|
"eval_sts-test_pearson_euclidean": 0.8967955144362856, |
|
"eval_sts-test_pearson_manhattan": 0.8972988934332646, |
|
"eval_sts-test_pearson_max": 0.8972988934332646, |
|
"eval_sts-test_spearman_cosine": 0.8987000224084064, |
|
"eval_sts-test_spearman_dot": 0.8599958647150425, |
|
"eval_sts-test_spearman_euclidean": 0.8935259263175941, |
|
"eval_sts-test_spearman_manhattan": 0.8939636384052635, |
|
"eval_sts-test_spearman_max": 0.8987000224084064, |
|
"eval_vitaminc-pairs_loss": 2.0033843517303467, |
|
"eval_vitaminc-pairs_runtime": 1.8717, |
|
"eval_vitaminc-pairs_samples_per_second": 57.701, |
|
"eval_vitaminc-pairs_steps_per_second": 1.069, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_negation-triplets_loss": 0.7500894069671631, |
|
"eval_negation-triplets_runtime": 0.2995, |
|
"eval_negation-triplets_samples_per_second": 213.659, |
|
"eval_negation-triplets_steps_per_second": 3.338, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_scitail-pairs-pos_loss": 0.07255758345127106, |
|
"eval_scitail-pairs-pos_runtime": 0.386, |
|
"eval_scitail-pairs-pos_samples_per_second": 139.883, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.59, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_xsum-pairs_loss": 0.056476954370737076, |
|
"eval_xsum-pairs_runtime": 2.8548, |
|
"eval_xsum-pairs_samples_per_second": 44.837, |
|
"eval_xsum-pairs_steps_per_second": 0.701, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_sciq_pairs_loss": 0.01967025361955166, |
|
"eval_sciq_pairs_runtime": 3.7336, |
|
"eval_sciq_pairs_samples_per_second": 34.283, |
|
"eval_sciq_pairs_steps_per_second": 0.536, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_qasc_pairs_loss": 0.1263607293367386, |
|
"eval_qasc_pairs_runtime": 0.6107, |
|
"eval_qasc_pairs_samples_per_second": 209.594, |
|
"eval_qasc_pairs_steps_per_second": 3.275, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_openbookqa_pairs_loss": 0.7773354649543762, |
|
"eval_openbookqa_pairs_runtime": 0.5903, |
|
"eval_openbookqa_pairs_samples_per_second": 216.831, |
|
"eval_openbookqa_pairs_steps_per_second": 3.388, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_msmarco_pairs_loss": 0.2844376862049103, |
|
"eval_msmarco_pairs_runtime": 1.4722, |
|
"eval_msmarco_pairs_samples_per_second": 86.947, |
|
"eval_msmarco_pairs_steps_per_second": 1.359, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_nq_pairs_loss": 0.17289823293685913, |
|
"eval_nq_pairs_runtime": 2.8665, |
|
"eval_nq_pairs_samples_per_second": 44.654, |
|
"eval_nq_pairs_steps_per_second": 0.698, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_trivia_pairs_loss": 0.6546728610992432, |
|
"eval_trivia_pairs_runtime": 4.3994, |
|
"eval_trivia_pairs_samples_per_second": 29.095, |
|
"eval_trivia_pairs_steps_per_second": 0.455, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_gooaq_pairs_loss": 0.31546029448509216, |
|
"eval_gooaq_pairs_runtime": 1.0423, |
|
"eval_gooaq_pairs_samples_per_second": 122.802, |
|
"eval_gooaq_pairs_steps_per_second": 1.919, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_paws-pos_loss": 0.02565235085785389, |
|
"eval_paws-pos_runtime": 0.6999, |
|
"eval_paws-pos_samples_per_second": 182.88, |
|
"eval_paws-pos_steps_per_second": 2.857, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.803125, |
|
"grad_norm": 2.2415249347686768, |
|
"learning_rate": 3.5541321166858384e-05, |
|
"loss": 0.464, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.80625, |
|
"grad_norm": 2.22743821144104, |
|
"learning_rate": 3.54756437411346e-05, |
|
"loss": 0.4622, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.809375, |
|
"grad_norm": 2.5632565021514893, |
|
"learning_rate": 3.5409571913344813e-05, |
|
"loss": 0.5124, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.8125, |
|
"grad_norm": 3.4271864891052246, |
|
"learning_rate": 3.5343108171056006e-05, |
|
"loss": 0.832, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.815625, |
|
"grad_norm": 2.9892525672912598, |
|
"learning_rate": 3.527625501659051e-05, |
|
"loss": 0.6264, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.81875, |
|
"grad_norm": 2.808922529220581, |
|
"learning_rate": 3.5209014966931795e-05, |
|
"loss": 0.5483, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.821875, |
|
"grad_norm": 2.71504545211792, |
|
"learning_rate": 3.514139055362974e-05, |
|
"loss": 0.5929, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.825, |
|
"grad_norm": 2.9386723041534424, |
|
"learning_rate": 3.507338432270528e-05, |
|
"loss": 0.5797, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.828125, |
|
"grad_norm": 2.694045066833496, |
|
"learning_rate": 3.500499883455457e-05, |
|
"loss": 0.5292, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.83125, |
|
"grad_norm": 2.800262928009033, |
|
"learning_rate": 3.493623666385258e-05, |
|
"loss": 0.5376, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.834375, |
|
"grad_norm": 3.4821765422821045, |
|
"learning_rate": 3.486710039945618e-05, |
|
"loss": 0.7102, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.8375, |
|
"grad_norm": 2.337831735610962, |
|
"learning_rate": 3.4797592644306655e-05, |
|
"loss": 0.4605, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.840625, |
|
"grad_norm": 3.8004300594329834, |
|
"learning_rate": 3.472771601533169e-05, |
|
"loss": 1.2713, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.84375, |
|
"grad_norm": 3.3914785385131836, |
|
"learning_rate": 3.465747314334687e-05, |
|
"loss": 0.7764, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.846875, |
|
"grad_norm": 3.0255892276763916, |
|
"learning_rate": 3.458686667295664e-05, |
|
"loss": 0.7517, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 2.9869744777679443, |
|
"learning_rate": 3.451589926245469e-05, |
|
"loss": 0.614, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.853125, |
|
"grad_norm": 3.160764694213867, |
|
"learning_rate": 3.444457358372391e-05, |
|
"loss": 0.6046, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.85625, |
|
"grad_norm": 2.87579607963562, |
|
"learning_rate": 3.43728923221358e-05, |
|
"loss": 0.7111, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.859375, |
|
"grad_norm": 1.9325075149536133, |
|
"learning_rate": 3.4300858176449344e-05, |
|
"loss": 0.4401, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.8625, |
|
"grad_norm": 1.9690322875976562, |
|
"learning_rate": 3.4228473858709404e-05, |
|
"loss": 0.4351, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.865625, |
|
"grad_norm": 3.530524969100952, |
|
"learning_rate": 3.4155742094144646e-05, |
|
"loss": 0.7498, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.86875, |
|
"grad_norm": 3.321233034133911, |
|
"learning_rate": 3.408266562106489e-05, |
|
"loss": 0.7173, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.871875, |
|
"grad_norm": 2.2215065956115723, |
|
"learning_rate": 3.400924719075804e-05, |
|
"loss": 0.4696, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 3.1400840282440186, |
|
"learning_rate": 3.39354895673865e-05, |
|
"loss": 0.6246, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.878125, |
|
"grad_norm": 3.4510090351104736, |
|
"learning_rate": 3.386139552788312e-05, |
|
"loss": 0.7578, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.88125, |
|
"grad_norm": 2.350965976715088, |
|
"learning_rate": 3.378696786184659e-05, |
|
"loss": 0.3533, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.884375, |
|
"grad_norm": 3.5409841537475586, |
|
"learning_rate": 3.3712209371436473e-05, |
|
"loss": 0.7328, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.8875, |
|
"grad_norm": 3.4038257598876953, |
|
"learning_rate": 3.363712287126768e-05, |
|
"loss": 0.6964, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.890625, |
|
"grad_norm": 2.8739030361175537, |
|
"learning_rate": 3.3561711188304516e-05, |
|
"loss": 0.6431, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.89375, |
|
"grad_norm": 3.5703017711639404, |
|
"learning_rate": 3.34859771617542e-05, |
|
"loss": 0.7155, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.896875, |
|
"grad_norm": 2.76778244972229, |
|
"learning_rate": 3.340992364296004e-05, |
|
"loss": 0.6328, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 3.4040513038635254, |
|
"learning_rate": 3.333355349529403e-05, |
|
"loss": 0.7895, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_VitaminC_cosine_accuracy": 0.55859375, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.8272709846496582, |
|
"eval_VitaminC_cosine_ap": 0.5489140066962175, |
|
"eval_VitaminC_cosine_f1": 0.6666666666666667, |
|
"eval_VitaminC_cosine_f1_threshold": 0.3126052916049957, |
|
"eval_VitaminC_cosine_precision": 0.501002004008016, |
|
"eval_VitaminC_cosine_recall": 0.9960159362549801, |
|
"eval_VitaminC_dot_accuracy": 0.552734375, |
|
"eval_VitaminC_dot_accuracy_threshold": 303.1324157714844, |
|
"eval_VitaminC_dot_ap": 0.5301817831729955, |
|
"eval_VitaminC_dot_f1": 0.6675531914893617, |
|
"eval_VitaminC_dot_f1_threshold": 120.97600555419922, |
|
"eval_VitaminC_dot_precision": 0.500998003992016, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.55859375, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 11.374759674072266, |
|
"eval_VitaminC_euclidean_ap": 0.551008119376775, |
|
"eval_VitaminC_euclidean_f1": 0.6657824933687002, |
|
"eval_VitaminC_euclidean_f1_threshold": 24.255207061767578, |
|
"eval_VitaminC_euclidean_precision": 0.4990059642147117, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.556640625, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 230.6835174560547, |
|
"eval_VitaminC_manhattan_ap": 0.5485867585720646, |
|
"eval_VitaminC_manhattan_f1": 0.6649006622516557, |
|
"eval_VitaminC_manhattan_f1_threshold": 521.4428100585938, |
|
"eval_VitaminC_manhattan_precision": 0.498015873015873, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.55859375, |
|
"eval_VitaminC_max_accuracy_threshold": 303.1324157714844, |
|
"eval_VitaminC_max_ap": 0.551008119376775, |
|
"eval_VitaminC_max_f1": 0.6675531914893617, |
|
"eval_VitaminC_max_f1_threshold": 521.4428100585938, |
|
"eval_VitaminC_max_precision": 0.501002004008016, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.551008119376775, |
|
"eval_sts-test_pearson_cosine": 0.8726396664543798, |
|
"eval_sts-test_pearson_dot": 0.8623668711287399, |
|
"eval_sts-test_pearson_euclidean": 0.8950211806151552, |
|
"eval_sts-test_pearson_manhattan": 0.8954158210085943, |
|
"eval_sts-test_pearson_max": 0.8954158210085943, |
|
"eval_sts-test_spearman_cosine": 0.897937595168081, |
|
"eval_sts-test_spearman_dot": 0.8635840656046664, |
|
"eval_sts-test_spearman_euclidean": 0.8912111673221239, |
|
"eval_sts-test_spearman_manhattan": 0.8913994806300589, |
|
"eval_sts-test_spearman_max": 0.897937595168081, |
|
"eval_vitaminc-pairs_loss": 1.955485224723816, |
|
"eval_vitaminc-pairs_runtime": 1.8698, |
|
"eval_vitaminc-pairs_samples_per_second": 57.76, |
|
"eval_vitaminc-pairs_steps_per_second": 1.07, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_negation-triplets_loss": 0.7942228317260742, |
|
"eval_negation-triplets_runtime": 0.2979, |
|
"eval_negation-triplets_samples_per_second": 214.818, |
|
"eval_negation-triplets_steps_per_second": 3.357, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_scitail-pairs-pos_loss": 0.07541428506374359, |
|
"eval_scitail-pairs-pos_runtime": 0.381, |
|
"eval_scitail-pairs-pos_samples_per_second": 141.723, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.625, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_xsum-pairs_loss": 0.05658277869224548, |
|
"eval_xsum-pairs_runtime": 2.8504, |
|
"eval_xsum-pairs_samples_per_second": 44.906, |
|
"eval_xsum-pairs_steps_per_second": 0.702, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_sciq_pairs_loss": 0.019849741831421852, |
|
"eval_sciq_pairs_runtime": 3.6603, |
|
"eval_sciq_pairs_samples_per_second": 34.97, |
|
"eval_sciq_pairs_steps_per_second": 0.546, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_qasc_pairs_loss": 0.10889946669340134, |
|
"eval_qasc_pairs_runtime": 0.6033, |
|
"eval_qasc_pairs_samples_per_second": 212.165, |
|
"eval_qasc_pairs_steps_per_second": 3.315, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_openbookqa_pairs_loss": 0.7712036967277527, |
|
"eval_openbookqa_pairs_runtime": 0.585, |
|
"eval_openbookqa_pairs_samples_per_second": 218.815, |
|
"eval_openbookqa_pairs_steps_per_second": 3.419, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_msmarco_pairs_loss": 0.279923677444458, |
|
"eval_msmarco_pairs_runtime": 1.4672, |
|
"eval_msmarco_pairs_samples_per_second": 87.239, |
|
"eval_msmarco_pairs_steps_per_second": 1.363, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_nq_pairs_loss": 0.18058110773563385, |
|
"eval_nq_pairs_runtime": 2.8678, |
|
"eval_nq_pairs_samples_per_second": 44.634, |
|
"eval_nq_pairs_steps_per_second": 0.697, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_trivia_pairs_loss": 0.7307667136192322, |
|
"eval_trivia_pairs_runtime": 4.4071, |
|
"eval_trivia_pairs_samples_per_second": 29.044, |
|
"eval_trivia_pairs_steps_per_second": 0.454, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_gooaq_pairs_loss": 0.33244821429252625, |
|
"eval_gooaq_pairs_runtime": 1.0096, |
|
"eval_gooaq_pairs_samples_per_second": 126.785, |
|
"eval_gooaq_pairs_steps_per_second": 1.981, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_paws-pos_loss": 0.024881305173039436, |
|
"eval_paws-pos_runtime": 0.6946, |
|
"eval_paws-pos_samples_per_second": 184.279, |
|
"eval_paws-pos_steps_per_second": 2.879, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.903125, |
|
"grad_norm": 2.7424654960632324, |
|
"learning_rate": 3.325686959404907e-05, |
|
"loss": 0.5752, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.90625, |
|
"grad_norm": 2.913073778152466, |
|
"learning_rate": 3.3179874826330696e-05, |
|
"loss": 0.666, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.909375, |
|
"grad_norm": 3.9191319942474365, |
|
"learning_rate": 3.3102572090948395e-05, |
|
"loss": 0.874, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.9125, |
|
"grad_norm": 3.086979627609253, |
|
"learning_rate": 3.302496429830647e-05, |
|
"loss": 0.7431, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.915625, |
|
"grad_norm": 3.0514609813690186, |
|
"learning_rate": 3.294705437029443e-05, |
|
"loss": 0.8332, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.91875, |
|
"grad_norm": 3.042734384536743, |
|
"learning_rate": 3.2868845240177035e-05, |
|
"loss": 0.7082, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.921875, |
|
"grad_norm": 3.4690864086151123, |
|
"learning_rate": 3.2790339852483845e-05, |
|
"loss": 0.6618, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.925, |
|
"grad_norm": 2.520153045654297, |
|
"learning_rate": 3.2711541162898326e-05, |
|
"loss": 0.2375, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.928125, |
|
"grad_norm": 2.9911270141601562, |
|
"learning_rate": 3.2632452138146607e-05, |
|
"loss": 0.5305, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.93125, |
|
"grad_norm": 2.2287964820861816, |
|
"learning_rate": 3.255307575588577e-05, |
|
"loss": 0.1686, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.934375, |
|
"grad_norm": 3.2477688789367676, |
|
"learning_rate": 3.247341500459173e-05, |
|
"loss": 0.7938, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"grad_norm": 1.9740976095199585, |
|
"learning_rate": 3.239347288344676e-05, |
|
"loss": 0.2629, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.940625, |
|
"grad_norm": 4.1774702072143555, |
|
"learning_rate": 3.231325240222655e-05, |
|
"loss": 0.973, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.94375, |
|
"grad_norm": 3.6038107872009277, |
|
"learning_rate": 3.2232756581186846e-05, |
|
"loss": 0.649, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.946875, |
|
"grad_norm": 2.0142273902893066, |
|
"learning_rate": 3.215198845094984e-05, |
|
"loss": 0.3329, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 3.460426092147827, |
|
"learning_rate": 3.2070951052389975e-05, |
|
"loss": 0.6105, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.953125, |
|
"grad_norm": 2.1552436351776123, |
|
"learning_rate": 3.198964743651949e-05, |
|
"loss": 0.3621, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.95625, |
|
"grad_norm": 2.6201255321502686, |
|
"learning_rate": 3.1908080664373605e-05, |
|
"loss": 0.5165, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.959375, |
|
"grad_norm": 3.296206474304199, |
|
"learning_rate": 3.182625380689516e-05, |
|
"loss": 0.6075, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.9625, |
|
"grad_norm": 2.3535473346710205, |
|
"learning_rate": 3.17441699448191e-05, |
|
"loss": 0.3091, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.965625, |
|
"grad_norm": 2.1077566146850586, |
|
"learning_rate": 3.166183216855644e-05, |
|
"loss": 0.2762, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.96875, |
|
"grad_norm": 2.85646390914917, |
|
"learning_rate": 3.157924357807792e-05, |
|
"loss": 0.5736, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.971875, |
|
"grad_norm": 2.4051146507263184, |
|
"learning_rate": 3.149640728279728e-05, |
|
"loss": 0.3876, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.975, |
|
"grad_norm": 5.062899112701416, |
|
"learning_rate": 3.141332640145423e-05, |
|
"loss": 1.8005, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.978125, |
|
"grad_norm": 2.969027042388916, |
|
"learning_rate": 3.1330004061997e-05, |
|
"loss": 0.6344, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.98125, |
|
"grad_norm": 4.5385847091674805, |
|
"learning_rate": 3.1246443401464564e-05, |
|
"loss": 0.9414, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.984375, |
|
"grad_norm": 2.6700010299682617, |
|
"learning_rate": 3.116264756586856e-05, |
|
"loss": 0.4782, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.9875, |
|
"grad_norm": 2.293757438659668, |
|
"learning_rate": 3.107861971007485e-05, |
|
"loss": 0.4196, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.990625, |
|
"grad_norm": 5.584008693695068, |
|
"learning_rate": 3.099436299768471e-05, |
|
"loss": 0.5288, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.99375, |
|
"grad_norm": 3.047480344772339, |
|
"learning_rate": 3.0909880600915726e-05, |
|
"loss": 0.5888, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.996875, |
|
"grad_norm": 3.148433208465576, |
|
"learning_rate": 3.08251757004824e-05, |
|
"loss": 0.4598, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.277242660522461, |
|
"learning_rate": 3.074025148547635e-05, |
|
"loss": 0.5085, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.5546875, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.8424822092056274, |
|
"eval_VitaminC_cosine_ap": 0.5467401178776568, |
|
"eval_VitaminC_cosine_f1": 0.6657824933687002, |
|
"eval_VitaminC_cosine_f1_threshold": 0.3060212731361389, |
|
"eval_VitaminC_cosine_precision": 0.4990059642147117, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.55859375, |
|
"eval_VitaminC_dot_accuracy_threshold": 302.82525634765625, |
|
"eval_VitaminC_dot_ap": 0.5313187944370502, |
|
"eval_VitaminC_dot_f1": 0.6657824933687002, |
|
"eval_VitaminC_dot_f1_threshold": 112.19659423828125, |
|
"eval_VitaminC_dot_precision": 0.4990059642147117, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.556640625, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 13.824159622192383, |
|
"eval_VitaminC_euclidean_ap": 0.5479307244374829, |
|
"eval_VitaminC_euclidean_f1": 0.6649006622516557, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.69076919555664, |
|
"eval_VitaminC_euclidean_precision": 0.498015873015873, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.552734375, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 292.99462890625, |
|
"eval_VitaminC_manhattan_ap": 0.5465792848292811, |
|
"eval_VitaminC_manhattan_f1": 0.6666666666666666, |
|
"eval_VitaminC_manhattan_f1_threshold": 489.7302551269531, |
|
"eval_VitaminC_manhattan_precision": 0.5, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.55859375, |
|
"eval_VitaminC_max_accuracy_threshold": 302.82525634765625, |
|
"eval_VitaminC_max_ap": 0.5479307244374829, |
|
"eval_VitaminC_max_f1": 0.6666666666666666, |
|
"eval_VitaminC_max_f1_threshold": 489.7302551269531, |
|
"eval_VitaminC_max_precision": 0.5, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5479307244374829, |
|
"eval_sts-test_pearson_cosine": 0.87646365142741, |
|
"eval_sts-test_pearson_dot": 0.8655190609079275, |
|
"eval_sts-test_pearson_euclidean": 0.9009817964818363, |
|
"eval_sts-test_pearson_manhattan": 0.9014432269871114, |
|
"eval_sts-test_pearson_max": 0.9014432269871114, |
|
"eval_sts-test_spearman_cosine": 0.9030024086785755, |
|
"eval_sts-test_spearman_dot": 0.8673856405086042, |
|
"eval_sts-test_spearman_euclidean": 0.8983721299161916, |
|
"eval_sts-test_spearman_manhattan": 0.8981219256137521, |
|
"eval_sts-test_spearman_max": 0.9030024086785755, |
|
"eval_vitaminc-pairs_loss": 1.9213347434997559, |
|
"eval_vitaminc-pairs_runtime": 1.866, |
|
"eval_vitaminc-pairs_samples_per_second": 57.877, |
|
"eval_vitaminc-pairs_steps_per_second": 1.072, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_negation-triplets_loss": 0.7787352204322815, |
|
"eval_negation-triplets_runtime": 0.2979, |
|
"eval_negation-triplets_samples_per_second": 214.834, |
|
"eval_negation-triplets_steps_per_second": 3.357, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_scitail-pairs-pos_loss": 0.06892620027065277, |
|
"eval_scitail-pairs-pos_runtime": 0.4252, |
|
"eval_scitail-pairs-pos_samples_per_second": 126.994, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.352, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_xsum-pairs_loss": 0.05507522076368332, |
|
"eval_xsum-pairs_runtime": 2.8476, |
|
"eval_xsum-pairs_samples_per_second": 44.951, |
|
"eval_xsum-pairs_steps_per_second": 0.702, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_sciq_pairs_loss": 0.020738935098052025, |
|
"eval_sciq_pairs_runtime": 3.7008, |
|
"eval_sciq_pairs_samples_per_second": 34.587, |
|
"eval_sciq_pairs_steps_per_second": 0.54, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_qasc_pairs_loss": 0.10421090573072433, |
|
"eval_qasc_pairs_runtime": 0.6054, |
|
"eval_qasc_pairs_samples_per_second": 211.426, |
|
"eval_qasc_pairs_steps_per_second": 3.304, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_openbookqa_pairs_loss": 0.694441020488739, |
|
"eval_openbookqa_pairs_runtime": 0.6019, |
|
"eval_openbookqa_pairs_samples_per_second": 212.646, |
|
"eval_openbookqa_pairs_steps_per_second": 3.323, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_msmarco_pairs_loss": 0.28574398159980774, |
|
"eval_msmarco_pairs_runtime": 1.4875, |
|
"eval_msmarco_pairs_samples_per_second": 86.048, |
|
"eval_msmarco_pairs_steps_per_second": 1.344, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_nq_pairs_loss": 0.17458948493003845, |
|
"eval_nq_pairs_runtime": 2.8657, |
|
"eval_nq_pairs_samples_per_second": 44.666, |
|
"eval_nq_pairs_steps_per_second": 0.698, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_trivia_pairs_loss": 0.68446946144104, |
|
"eval_trivia_pairs_runtime": 4.4, |
|
"eval_trivia_pairs_samples_per_second": 29.091, |
|
"eval_trivia_pairs_steps_per_second": 0.455, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gooaq_pairs_loss": 0.3039962947368622, |
|
"eval_gooaq_pairs_runtime": 1.0187, |
|
"eval_gooaq_pairs_samples_per_second": 125.646, |
|
"eval_gooaq_pairs_steps_per_second": 1.963, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_paws-pos_loss": 0.024999650195240974, |
|
"eval_paws-pos_runtime": 0.7064, |
|
"eval_paws-pos_samples_per_second": 181.207, |
|
"eval_paws-pos_steps_per_second": 2.831, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.003125, |
|
"grad_norm": 2.7458887100219727, |
|
"learning_rate": 3.065511115324628e-05, |
|
"loss": 0.647, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.00625, |
|
"grad_norm": 2.646803140640259, |
|
"learning_rate": 3.0569757909277566e-05, |
|
"loss": 0.4768, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.009375, |
|
"grad_norm": 2.367361545562744, |
|
"learning_rate": 3.048419496707161e-05, |
|
"loss": 0.4834, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.0125, |
|
"grad_norm": 3.055002450942993, |
|
"learning_rate": 3.0398425548024827e-05, |
|
"loss": 0.6115, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.015625, |
|
"grad_norm": 2.0717179775238037, |
|
"learning_rate": 3.0312452881307356e-05, |
|
"loss": 0.4611, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.01875, |
|
"grad_norm": 2.3982598781585693, |
|
"learning_rate": 3.022628020374152e-05, |
|
"loss": 0.4812, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.021875, |
|
"grad_norm": 2.90179705619812, |
|
"learning_rate": 3.013991075967992e-05, |
|
"loss": 0.5914, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.025, |
|
"grad_norm": 3.2376556396484375, |
|
"learning_rate": 3.00533478008833e-05, |
|
"loss": 0.7206, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.028125, |
|
"grad_norm": 3.591564416885376, |
|
"learning_rate": 2.996659458639815e-05, |
|
"loss": 0.7854, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.03125, |
|
"grad_norm": 2.470400094985962, |
|
"learning_rate": 2.9879654382433948e-05, |
|
"loss": 0.432, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.034375, |
|
"grad_norm": 3.061913013458252, |
|
"learning_rate": 2.979253046224024e-05, |
|
"loss": 0.6365, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.0375, |
|
"grad_norm": 2.3621861934661865, |
|
"learning_rate": 2.9705226105983377e-05, |
|
"loss": 0.3754, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.040625, |
|
"grad_norm": 2.898756742477417, |
|
"learning_rate": 2.9617744600623023e-05, |
|
"loss": 0.5096, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.04375, |
|
"grad_norm": 2.9752399921417236, |
|
"learning_rate": 2.9530089239788428e-05, |
|
"loss": 0.5762, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.046875, |
|
"grad_norm": 3.2658884525299072, |
|
"learning_rate": 2.9442263323654362e-05, |
|
"loss": 0.6938, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 2.0361263751983643, |
|
"learning_rate": 2.935427015881694e-05, |
|
"loss": 0.343, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.053125, |
|
"grad_norm": 3.670530319213867, |
|
"learning_rate": 2.926611305816908e-05, |
|
"loss": 0.7258, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.05625, |
|
"grad_norm": 2.597907066345215, |
|
"learning_rate": 2.9177795340775795e-05, |
|
"loss": 0.4658, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.059375, |
|
"grad_norm": 3.1930811405181885, |
|
"learning_rate": 2.9089320331749237e-05, |
|
"loss": 0.7108, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.0625, |
|
"grad_norm": 4.060088157653809, |
|
"learning_rate": 2.9000691362123475e-05, |
|
"loss": 1.3076, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.065625, |
|
"grad_norm": 1.4222996234893799, |
|
"learning_rate": 2.8911911768729136e-05, |
|
"loss": 0.2397, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.06875, |
|
"grad_norm": 2.6759979724884033, |
|
"learning_rate": 2.8822984894067722e-05, |
|
"loss": 0.4853, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.071875, |
|
"grad_norm": 3.4097981452941895, |
|
"learning_rate": 2.8733914086185807e-05, |
|
"loss": 0.741, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.075, |
|
"grad_norm": 2.869738817214966, |
|
"learning_rate": 2.8644702698548962e-05, |
|
"loss": 0.6066, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.078125, |
|
"grad_norm": 3.412572145462036, |
|
"learning_rate": 2.8555354089915514e-05, |
|
"loss": 0.6838, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.08125, |
|
"grad_norm": 2.155133008956909, |
|
"learning_rate": 2.846587162421007e-05, |
|
"loss": 0.4393, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.084375, |
|
"grad_norm": 2.3955204486846924, |
|
"learning_rate": 2.837625867039689e-05, |
|
"loss": 0.4102, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.0875, |
|
"grad_norm": 2.5801889896392822, |
|
"learning_rate": 2.8286518602353047e-05, |
|
"loss": 0.4947, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.090625, |
|
"grad_norm": 2.63447904586792, |
|
"learning_rate": 2.819665479874137e-05, |
|
"loss": 0.5212, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.09375, |
|
"grad_norm": 2.7823500633239746, |
|
"learning_rate": 2.8106670642883283e-05, |
|
"loss": 0.6889, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.096875, |
|
"grad_norm": 2.979808807373047, |
|
"learning_rate": 2.8016569522631384e-05, |
|
"loss": 0.625, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 2.9141488075256348, |
|
"learning_rate": 2.792635483024193e-05, |
|
"loss": 0.5093, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_VitaminC_cosine_accuracy": 0.5546875, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.8290125131607056, |
|
"eval_VitaminC_cosine_ap": 0.5484962367283152, |
|
"eval_VitaminC_cosine_f1": 0.6666666666666666, |
|
"eval_VitaminC_cosine_f1_threshold": 0.3529857099056244, |
|
"eval_VitaminC_cosine_precision": 0.5, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.548828125, |
|
"eval_VitaminC_dot_accuracy_threshold": 324.3284606933594, |
|
"eval_VitaminC_dot_ap": 0.5323604009341977, |
|
"eval_VitaminC_dot_f1": 0.6666666666666667, |
|
"eval_VitaminC_dot_f1_threshold": 137.8323211669922, |
|
"eval_VitaminC_dot_precision": 0.501002004008016, |
|
"eval_VitaminC_dot_recall": 0.9960159362549801, |
|
"eval_VitaminC_euclidean_accuracy": 0.556640625, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 13.973267555236816, |
|
"eval_VitaminC_euclidean_ap": 0.5488900714831766, |
|
"eval_VitaminC_euclidean_f1": 0.6657824933687002, |
|
"eval_VitaminC_euclidean_f1_threshold": 22.846126556396484, |
|
"eval_VitaminC_euclidean_precision": 0.4990059642147117, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.5546875, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 292.48834228515625, |
|
"eval_VitaminC_manhattan_ap": 0.5472615547862266, |
|
"eval_VitaminC_manhattan_f1": 0.6657824933687002, |
|
"eval_VitaminC_manhattan_f1_threshold": 487.93536376953125, |
|
"eval_VitaminC_manhattan_precision": 0.4990059642147117, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.556640625, |
|
"eval_VitaminC_max_accuracy_threshold": 324.3284606933594, |
|
"eval_VitaminC_max_ap": 0.5488900714831766, |
|
"eval_VitaminC_max_f1": 0.6666666666666667, |
|
"eval_VitaminC_max_f1_threshold": 487.93536376953125, |
|
"eval_VitaminC_max_precision": 0.501002004008016, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5488900714831766, |
|
"eval_sts-test_pearson_cosine": 0.8777529500191548, |
|
"eval_sts-test_pearson_dot": 0.8689529679551734, |
|
"eval_sts-test_pearson_euclidean": 0.8997770430839387, |
|
"eval_sts-test_pearson_manhattan": 0.8993770557804839, |
|
"eval_sts-test_pearson_max": 0.8997770430839387, |
|
"eval_sts-test_spearman_cosine": 0.9027963738711295, |
|
"eval_sts-test_spearman_dot": 0.8692104626943614, |
|
"eval_sts-test_spearman_euclidean": 0.897084054359563, |
|
"eval_sts-test_spearman_manhattan": 0.8970093645043006, |
|
"eval_sts-test_spearman_max": 0.9027963738711295, |
|
"eval_vitaminc-pairs_loss": 1.9221601486206055, |
|
"eval_vitaminc-pairs_runtime": 1.8539, |
|
"eval_vitaminc-pairs_samples_per_second": 58.254, |
|
"eval_vitaminc-pairs_steps_per_second": 1.079, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_negation-triplets_loss": 0.7761179208755493, |
|
"eval_negation-triplets_runtime": 0.2931, |
|
"eval_negation-triplets_samples_per_second": 218.388, |
|
"eval_negation-triplets_steps_per_second": 3.412, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_scitail-pairs-pos_loss": 0.08009649068117142, |
|
"eval_scitail-pairs-pos_runtime": 0.3758, |
|
"eval_scitail-pairs-pos_samples_per_second": 143.684, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.661, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_xsum-pairs_loss": 0.062557153403759, |
|
"eval_xsum-pairs_runtime": 2.8489, |
|
"eval_xsum-pairs_samples_per_second": 44.93, |
|
"eval_xsum-pairs_steps_per_second": 0.702, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_sciq_pairs_loss": 0.019746748730540276, |
|
"eval_sciq_pairs_runtime": 3.6515, |
|
"eval_sciq_pairs_samples_per_second": 35.054, |
|
"eval_sciq_pairs_steps_per_second": 0.548, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_qasc_pairs_loss": 0.10993637144565582, |
|
"eval_qasc_pairs_runtime": 0.6014, |
|
"eval_qasc_pairs_samples_per_second": 212.82, |
|
"eval_qasc_pairs_steps_per_second": 3.325, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_openbookqa_pairs_loss": 0.7048032879829407, |
|
"eval_openbookqa_pairs_runtime": 0.5788, |
|
"eval_openbookqa_pairs_samples_per_second": 221.148, |
|
"eval_openbookqa_pairs_steps_per_second": 3.455, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_msmarco_pairs_loss": 0.27703118324279785, |
|
"eval_msmarco_pairs_runtime": 1.468, |
|
"eval_msmarco_pairs_samples_per_second": 87.192, |
|
"eval_msmarco_pairs_steps_per_second": 1.362, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_nq_pairs_loss": 0.1819453090429306, |
|
"eval_nq_pairs_runtime": 2.8689, |
|
"eval_nq_pairs_samples_per_second": 44.616, |
|
"eval_nq_pairs_steps_per_second": 0.697, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_trivia_pairs_loss": 0.687531054019928, |
|
"eval_trivia_pairs_runtime": 4.399, |
|
"eval_trivia_pairs_samples_per_second": 29.098, |
|
"eval_trivia_pairs_steps_per_second": 0.455, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_gooaq_pairs_loss": 0.30321064591407776, |
|
"eval_gooaq_pairs_runtime": 1.0175, |
|
"eval_gooaq_pairs_samples_per_second": 125.792, |
|
"eval_gooaq_pairs_steps_per_second": 1.966, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_paws-pos_loss": 0.02436799556016922, |
|
"eval_paws-pos_runtime": 0.7162, |
|
"eval_paws-pos_samples_per_second": 178.711, |
|
"eval_paws-pos_steps_per_second": 2.792, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.103125, |
|
"grad_norm": 3.3241679668426514, |
|
"learning_rate": 2.78360299622471e-05, |
|
"loss": 0.6242, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.10625, |
|
"grad_norm": 3.031259059906006, |
|
"learning_rate": 2.7745598319327117e-05, |
|
"loss": 0.7228, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.109375, |
|
"grad_norm": 2.223773956298828, |
|
"learning_rate": 2.7655063306182235e-05, |
|
"loss": 0.3717, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.1125, |
|
"grad_norm": 2.281268835067749, |
|
"learning_rate": 2.7564428331404524e-05, |
|
"loss": 0.3442, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.115625, |
|
"grad_norm": 3.040951728820801, |
|
"learning_rate": 2.7473696807349552e-05, |
|
"loss": 0.649, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.11875, |
|
"grad_norm": 2.3970398902893066, |
|
"learning_rate": 2.738287215000792e-05, |
|
"loss": 0.3935, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.121875, |
|
"grad_norm": 2.8858048915863037, |
|
"learning_rate": 2.7291957778876656e-05, |
|
"loss": 0.6131, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.125, |
|
"grad_norm": 2.974828004837036, |
|
"learning_rate": 2.7200957116830426e-05, |
|
"loss": 0.5322, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.128125, |
|
"grad_norm": 1.7254366874694824, |
|
"learning_rate": 2.7109873589992745e-05, |
|
"loss": 0.2073, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.13125, |
|
"grad_norm": 2.895080804824829, |
|
"learning_rate": 2.7018710627606894e-05, |
|
"loss": 0.6735, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.134375, |
|
"grad_norm": 3.014303207397461, |
|
"learning_rate": 2.69274716619069e-05, |
|
"loss": 0.7604, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.1375, |
|
"grad_norm": 2.703094005584717, |
|
"learning_rate": 2.6836160127988247e-05, |
|
"loss": 0.6165, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.140625, |
|
"grad_norm": 1.903054118156433, |
|
"learning_rate": 2.6744779463678576e-05, |
|
"loss": 0.1963, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.14375, |
|
"grad_norm": 1.694141149520874, |
|
"learning_rate": 2.665333310940825e-05, |
|
"loss": 0.1668, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.146875, |
|
"grad_norm": 2.7038228511810303, |
|
"learning_rate": 2.6561824508080824e-05, |
|
"loss": 0.5055, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 2.6325740814208984, |
|
"learning_rate": 2.6470257104943417e-05, |
|
"loss": 0.4919, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.153125, |
|
"grad_norm": 3.161851167678833, |
|
"learning_rate": 2.6378634347456996e-05, |
|
"loss": 0.7166, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.15625, |
|
"grad_norm": 2.4141595363616943, |
|
"learning_rate": 2.6286959685166603e-05, |
|
"loss": 0.444, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.159375, |
|
"grad_norm": 3.2262306213378906, |
|
"learning_rate": 2.6195236569571454e-05, |
|
"loss": 0.6237, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.1625, |
|
"grad_norm": 2.130065441131592, |
|
"learning_rate": 2.6103468453995017e-05, |
|
"loss": 0.4197, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.165625, |
|
"grad_norm": 2.9710662364959717, |
|
"learning_rate": 2.601165879345496e-05, |
|
"loss": 0.5569, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.16875, |
|
"grad_norm": 2.55246901512146, |
|
"learning_rate": 2.591981104453313e-05, |
|
"loss": 0.5274, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.171875, |
|
"grad_norm": 2.84503436088562, |
|
"learning_rate": 2.5827928665245356e-05, |
|
"loss": 0.6259, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.175, |
|
"grad_norm": 3.342602491378784, |
|
"learning_rate": 2.5736015114911275e-05, |
|
"loss": 0.7696, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.178125, |
|
"grad_norm": 2.747089147567749, |
|
"learning_rate": 2.5644073854024117e-05, |
|
"loss": 0.6437, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.18125, |
|
"grad_norm": 2.5642967224121094, |
|
"learning_rate": 2.5552108344120387e-05, |
|
"loss": 0.5067, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.184375, |
|
"grad_norm": 2.4318668842315674, |
|
"learning_rate": 2.546012204764955e-05, |
|
"loss": 0.3927, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.1875, |
|
"grad_norm": 2.7380220890045166, |
|
"learning_rate": 2.536811842784369e-05, |
|
"loss": 0.4557, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.190625, |
|
"grad_norm": 2.0136771202087402, |
|
"learning_rate": 2.5276100948587075e-05, |
|
"loss": 0.2425, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.19375, |
|
"grad_norm": 1.7208062410354614, |
|
"learning_rate": 2.51840730742858e-05, |
|
"loss": 0.1677, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.196875, |
|
"grad_norm": 1.7164028882980347, |
|
"learning_rate": 2.5092038269737324e-05, |
|
"loss": 0.3555, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 3.0403032302856445, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.8643, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_VitaminC_cosine_accuracy": 0.55859375, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.8228827118873596, |
|
"eval_VitaminC_cosine_ap": 0.5496046521684337, |
|
"eval_VitaminC_cosine_f1": 0.6657824933687002, |
|
"eval_VitaminC_cosine_f1_threshold": 0.2927078902721405, |
|
"eval_VitaminC_cosine_precision": 0.4990059642147117, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.5546875, |
|
"eval_VitaminC_dot_accuracy_threshold": 303.5928649902344, |
|
"eval_VitaminC_dot_ap": 0.5333968837571262, |
|
"eval_VitaminC_dot_f1": 0.6657824933687002, |
|
"eval_VitaminC_dot_f1_threshold": 99.95751953125, |
|
"eval_VitaminC_dot_precision": 0.4990059642147117, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.552734375, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 12.029778480529785, |
|
"eval_VitaminC_euclidean_ap": 0.5497621377316283, |
|
"eval_VitaminC_euclidean_f1": 0.6657824933687002, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.023883819580078, |
|
"eval_VitaminC_euclidean_precision": 0.4990059642147117, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.5546875, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 239.0825653076172, |
|
"eval_VitaminC_manhattan_ap": 0.550887748657308, |
|
"eval_VitaminC_manhattan_f1": 0.6666666666666666, |
|
"eval_VitaminC_manhattan_f1_threshold": 484.42718505859375, |
|
"eval_VitaminC_manhattan_precision": 0.5, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.55859375, |
|
"eval_VitaminC_max_accuracy_threshold": 303.5928649902344, |
|
"eval_VitaminC_max_ap": 0.550887748657308, |
|
"eval_VitaminC_max_f1": 0.6666666666666666, |
|
"eval_VitaminC_max_f1_threshold": 484.42718505859375, |
|
"eval_VitaminC_max_precision": 0.5, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.550887748657308, |
|
"eval_sts-test_pearson_cosine": 0.8759930670182294, |
|
"eval_sts-test_pearson_dot": 0.8657397744839983, |
|
"eval_sts-test_pearson_euclidean": 0.9011306400734879, |
|
"eval_sts-test_pearson_manhattan": 0.9011853213795427, |
|
"eval_sts-test_pearson_max": 0.9011853213795427, |
|
"eval_sts-test_spearman_cosine": 0.9034707306290366, |
|
"eval_sts-test_spearman_dot": 0.868673716065233, |
|
"eval_sts-test_spearman_euclidean": 0.8986341933028996, |
|
"eval_sts-test_spearman_manhattan": 0.8983098809115962, |
|
"eval_sts-test_spearman_max": 0.9034707306290366, |
|
"eval_vitaminc-pairs_loss": 1.8801089525222778, |
|
"eval_vitaminc-pairs_runtime": 1.8688, |
|
"eval_vitaminc-pairs_samples_per_second": 57.791, |
|
"eval_vitaminc-pairs_steps_per_second": 1.07, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_negation-triplets_loss": 0.7317898273468018, |
|
"eval_negation-triplets_runtime": 0.3021, |
|
"eval_negation-triplets_samples_per_second": 211.884, |
|
"eval_negation-triplets_steps_per_second": 3.311, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_scitail-pairs-pos_loss": 0.07107817381620407, |
|
"eval_scitail-pairs-pos_runtime": 0.3882, |
|
"eval_scitail-pairs-pos_samples_per_second": 139.106, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.576, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_xsum-pairs_loss": 0.05828472599387169, |
|
"eval_xsum-pairs_runtime": 2.853, |
|
"eval_xsum-pairs_samples_per_second": 44.865, |
|
"eval_xsum-pairs_steps_per_second": 0.701, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_sciq_pairs_loss": 0.019503507763147354, |
|
"eval_sciq_pairs_runtime": 3.7158, |
|
"eval_sciq_pairs_samples_per_second": 34.448, |
|
"eval_sciq_pairs_steps_per_second": 0.538, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_qasc_pairs_loss": 0.11732859164476395, |
|
"eval_qasc_pairs_runtime": 0.605, |
|
"eval_qasc_pairs_samples_per_second": 211.578, |
|
"eval_qasc_pairs_steps_per_second": 3.306, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_openbookqa_pairs_loss": 0.7122623324394226, |
|
"eval_openbookqa_pairs_runtime": 0.5839, |
|
"eval_openbookqa_pairs_samples_per_second": 219.199, |
|
"eval_openbookqa_pairs_steps_per_second": 3.425, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_msmarco_pairs_loss": 0.28523409366607666, |
|
"eval_msmarco_pairs_runtime": 1.4705, |
|
"eval_msmarco_pairs_samples_per_second": 87.043, |
|
"eval_msmarco_pairs_steps_per_second": 1.36, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_nq_pairs_loss": 0.178893581032753, |
|
"eval_nq_pairs_runtime": 2.8648, |
|
"eval_nq_pairs_samples_per_second": 44.681, |
|
"eval_nq_pairs_steps_per_second": 0.698, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_trivia_pairs_loss": 0.636802613735199, |
|
"eval_trivia_pairs_runtime": 4.3993, |
|
"eval_trivia_pairs_samples_per_second": 29.096, |
|
"eval_trivia_pairs_steps_per_second": 0.455, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_gooaq_pairs_loss": 0.3245222866535187, |
|
"eval_gooaq_pairs_runtime": 1.0085, |
|
"eval_gooaq_pairs_samples_per_second": 126.919, |
|
"eval_gooaq_pairs_steps_per_second": 1.983, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_paws-pos_loss": 0.024447523057460785, |
|
"eval_paws-pos_runtime": 0.6966, |
|
"eval_paws-pos_samples_per_second": 183.741, |
|
"eval_paws-pos_steps_per_second": 2.871, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.203125, |
|
"grad_norm": 3.0316460132598877, |
|
"learning_rate": 2.4907961730262685e-05, |
|
"loss": 0.6056, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.20625, |
|
"grad_norm": 3.3051912784576416, |
|
"learning_rate": 2.4815926925714205e-05, |
|
"loss": 0.5924, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.209375, |
|
"grad_norm": 2.5136680603027344, |
|
"learning_rate": 2.4723899051412934e-05, |
|
"loss": 0.4131, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.2125, |
|
"grad_norm": 2.1033709049224854, |
|
"learning_rate": 2.463188157215632e-05, |
|
"loss": 0.3347, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.215625, |
|
"grad_norm": 2.217355728149414, |
|
"learning_rate": 2.4539877952350458e-05, |
|
"loss": 0.4317, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.21875, |
|
"grad_norm": 1.9194687604904175, |
|
"learning_rate": 2.444789165587962e-05, |
|
"loss": 0.2488, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.221875, |
|
"grad_norm": 3.4252638816833496, |
|
"learning_rate": 2.435592614597589e-05, |
|
"loss": 0.6856, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.225, |
|
"grad_norm": 2.816314935684204, |
|
"learning_rate": 2.4263984885088735e-05, |
|
"loss": 0.5261, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.228125, |
|
"grad_norm": 2.5925676822662354, |
|
"learning_rate": 2.4172071334754654e-05, |
|
"loss": 0.4683, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.23125, |
|
"grad_norm": 3.6116645336151123, |
|
"learning_rate": 2.4080188955466874e-05, |
|
"loss": 1.066, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.234375, |
|
"grad_norm": 2.6395368576049805, |
|
"learning_rate": 2.398834120654504e-05, |
|
"loss": 0.5434, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.2375, |
|
"grad_norm": 2.5325918197631836, |
|
"learning_rate": 2.3896531546004992e-05, |
|
"loss": 0.4129, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.240625, |
|
"grad_norm": 1.9665679931640625, |
|
"learning_rate": 2.380476343042855e-05, |
|
"loss": 0.3367, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.24375, |
|
"grad_norm": 3.6547625064849854, |
|
"learning_rate": 2.3713040314833403e-05, |
|
"loss": 0.716, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.246875, |
|
"grad_norm": 2.7950963973999023, |
|
"learning_rate": 2.3621365652543013e-05, |
|
"loss": 0.4767, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.975703239440918, |
|
"learning_rate": 2.3529742895056592e-05, |
|
"loss": 0.3659, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.253125, |
|
"grad_norm": 2.8645551204681396, |
|
"learning_rate": 2.3438175491919185e-05, |
|
"loss": 0.4731, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.25625, |
|
"grad_norm": 2.649005889892578, |
|
"learning_rate": 2.3346666890591757e-05, |
|
"loss": 0.4562, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.259375, |
|
"grad_norm": 2.2082812786102295, |
|
"learning_rate": 2.3255220536321427e-05, |
|
"loss": 0.3397, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.2625, |
|
"grad_norm": 3.8959875106811523, |
|
"learning_rate": 2.3163839872011763e-05, |
|
"loss": 1.2082, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.265625, |
|
"grad_norm": 2.6099252700805664, |
|
"learning_rate": 2.307252833809311e-05, |
|
"loss": 0.6162, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.26875, |
|
"grad_norm": 2.4495608806610107, |
|
"learning_rate": 2.298128937239311e-05, |
|
"loss": 0.4767, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.271875, |
|
"grad_norm": 2.724579095840454, |
|
"learning_rate": 2.2890126410007264e-05, |
|
"loss": 0.4384, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.275, |
|
"grad_norm": 2.7510993480682373, |
|
"learning_rate": 2.2799042883169576e-05, |
|
"loss": 0.5368, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.278125, |
|
"grad_norm": 2.994795083999634, |
|
"learning_rate": 2.270804222112335e-05, |
|
"loss": 0.6885, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.28125, |
|
"grad_norm": 2.59830904006958, |
|
"learning_rate": 2.2617127849992082e-05, |
|
"loss": 0.4318, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.284375, |
|
"grad_norm": 2.603785276412964, |
|
"learning_rate": 2.252630319265045e-05, |
|
"loss": 0.5648, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.2875, |
|
"grad_norm": 1.8414777517318726, |
|
"learning_rate": 2.2435571668595482e-05, |
|
"loss": 0.3, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.290625, |
|
"grad_norm": 3.080265998840332, |
|
"learning_rate": 2.2344936693817774e-05, |
|
"loss": 0.573, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.29375, |
|
"grad_norm": 3.2287120819091797, |
|
"learning_rate": 2.225440168067289e-05, |
|
"loss": 0.6759, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.296875, |
|
"grad_norm": 3.5036377906799316, |
|
"learning_rate": 2.216397003775291e-05, |
|
"loss": 1.0739, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 3.4340429306030273, |
|
"learning_rate": 2.207364516975808e-05, |
|
"loss": 0.6794, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_VitaminC_cosine_accuracy": 0.556640625, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.8248050212860107, |
|
"eval_VitaminC_cosine_ap": 0.549721039851088, |
|
"eval_VitaminC_cosine_f1": 0.6675531914893617, |
|
"eval_VitaminC_cosine_f1_threshold": 0.3625495135784149, |
|
"eval_VitaminC_cosine_precision": 0.500998003992016, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.546875, |
|
"eval_VitaminC_dot_accuracy_threshold": 315.43896484375, |
|
"eval_VitaminC_dot_ap": 0.5352429908255126, |
|
"eval_VitaminC_dot_f1": 0.6675531914893617, |
|
"eval_VitaminC_dot_f1_threshold": 129.65655517578125, |
|
"eval_VitaminC_dot_precision": 0.500998003992016, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.5546875, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 12.217185974121094, |
|
"eval_VitaminC_euclidean_ap": 0.5506836806067088, |
|
"eval_VitaminC_euclidean_f1": 0.6657824933687002, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.268470764160156, |
|
"eval_VitaminC_euclidean_precision": 0.4990059642147117, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.552734375, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 228.33251953125, |
|
"eval_VitaminC_manhattan_ap": 0.5499105636757091, |
|
"eval_VitaminC_manhattan_f1": 0.6666666666666667, |
|
"eval_VitaminC_manhattan_f1_threshold": 475.83892822265625, |
|
"eval_VitaminC_manhattan_precision": 0.501002004008016, |
|
"eval_VitaminC_manhattan_recall": 0.9960159362549801, |
|
"eval_VitaminC_max_accuracy": 0.556640625, |
|
"eval_VitaminC_max_accuracy_threshold": 315.43896484375, |
|
"eval_VitaminC_max_ap": 0.5506836806067088, |
|
"eval_VitaminC_max_f1": 0.6675531914893617, |
|
"eval_VitaminC_max_f1_threshold": 475.83892822265625, |
|
"eval_VitaminC_max_precision": 0.501002004008016, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5506836806067088, |
|
"eval_sts-test_pearson_cosine": 0.8783564854148046, |
|
"eval_sts-test_pearson_dot": 0.8688921197467538, |
|
"eval_sts-test_pearson_euclidean": 0.901280483137533, |
|
"eval_sts-test_pearson_manhattan": 0.9014338360947061, |
|
"eval_sts-test_pearson_max": 0.9014338360947061, |
|
"eval_sts-test_spearman_cosine": 0.9035353066992244, |
|
"eval_sts-test_spearman_dot": 0.8704091252307301, |
|
"eval_sts-test_spearman_euclidean": 0.8982903693616295, |
|
"eval_sts-test_spearman_manhattan": 0.897955987936513, |
|
"eval_sts-test_spearman_max": 0.9035353066992244, |
|
"eval_vitaminc-pairs_loss": 1.8975528478622437, |
|
"eval_vitaminc-pairs_runtime": 1.8521, |
|
"eval_vitaminc-pairs_samples_per_second": 58.313, |
|
"eval_vitaminc-pairs_steps_per_second": 1.08, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_negation-triplets_loss": 0.7549135684967041, |
|
"eval_negation-triplets_runtime": 0.2958, |
|
"eval_negation-triplets_samples_per_second": 216.337, |
|
"eval_negation-triplets_steps_per_second": 3.38, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_scitail-pairs-pos_loss": 0.07042308896780014, |
|
"eval_scitail-pairs-pos_runtime": 0.3833, |
|
"eval_scitail-pairs-pos_samples_per_second": 140.89, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.609, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_xsum-pairs_loss": 0.054973307996988297, |
|
"eval_xsum-pairs_runtime": 2.8675, |
|
"eval_xsum-pairs_samples_per_second": 44.639, |
|
"eval_xsum-pairs_steps_per_second": 0.697, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_sciq_pairs_loss": 0.019865412265062332, |
|
"eval_sciq_pairs_runtime": 3.6462, |
|
"eval_sciq_pairs_samples_per_second": 35.105, |
|
"eval_sciq_pairs_steps_per_second": 0.549, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_qasc_pairs_loss": 0.10839240998029709, |
|
"eval_qasc_pairs_runtime": 0.6001, |
|
"eval_qasc_pairs_samples_per_second": 213.308, |
|
"eval_qasc_pairs_steps_per_second": 3.333, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_openbookqa_pairs_loss": 0.709105908870697, |
|
"eval_openbookqa_pairs_runtime": 0.5773, |
|
"eval_openbookqa_pairs_samples_per_second": 221.728, |
|
"eval_openbookqa_pairs_steps_per_second": 3.464, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_msmarco_pairs_loss": 0.2810967266559601, |
|
"eval_msmarco_pairs_runtime": 1.4691, |
|
"eval_msmarco_pairs_samples_per_second": 87.125, |
|
"eval_msmarco_pairs_steps_per_second": 1.361, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_nq_pairs_loss": 0.16148869693279266, |
|
"eval_nq_pairs_runtime": 2.8649, |
|
"eval_nq_pairs_samples_per_second": 44.679, |
|
"eval_nq_pairs_steps_per_second": 0.698, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_trivia_pairs_loss": 0.6475186944007874, |
|
"eval_trivia_pairs_runtime": 4.403, |
|
"eval_trivia_pairs_samples_per_second": 29.071, |
|
"eval_trivia_pairs_steps_per_second": 0.454, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_gooaq_pairs_loss": 0.31666722893714905, |
|
"eval_gooaq_pairs_runtime": 1.0071, |
|
"eval_gooaq_pairs_samples_per_second": 127.1, |
|
"eval_gooaq_pairs_steps_per_second": 1.986, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_paws-pos_loss": 0.025139717385172844, |
|
"eval_paws-pos_runtime": 0.6875, |
|
"eval_paws-pos_samples_per_second": 186.173, |
|
"eval_paws-pos_steps_per_second": 2.909, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.303125, |
|
"grad_norm": 2.347867012023926, |
|
"learning_rate": 2.1983430477368622e-05, |
|
"loss": 0.4515, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.30625, |
|
"grad_norm": 2.957559585571289, |
|
"learning_rate": 2.1893329357116726e-05, |
|
"loss": 0.5992, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.309375, |
|
"grad_norm": 2.799776792526245, |
|
"learning_rate": 2.180334520125863e-05, |
|
"loss": 0.7221, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.3125, |
|
"grad_norm": 1.9639122486114502, |
|
"learning_rate": 2.1713481397646955e-05, |
|
"loss": 0.3968, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.315625, |
|
"grad_norm": 2.6604442596435547, |
|
"learning_rate": 2.162374132960311e-05, |
|
"loss": 0.4198, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.31875, |
|
"grad_norm": 2.5121357440948486, |
|
"learning_rate": 2.1534128375789932e-05, |
|
"loss": 0.6268, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.321875, |
|
"grad_norm": 2.014528274536133, |
|
"learning_rate": 2.1444645910084495e-05, |
|
"loss": 0.3976, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.325, |
|
"grad_norm": 2.713228464126587, |
|
"learning_rate": 2.1355297301451044e-05, |
|
"loss": 0.6003, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.328125, |
|
"grad_norm": 2.6102914810180664, |
|
"learning_rate": 2.12660859138142e-05, |
|
"loss": 0.4381, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.33125, |
|
"grad_norm": 3.1329894065856934, |
|
"learning_rate": 2.1177015105932287e-05, |
|
"loss": 0.8803, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.334375, |
|
"grad_norm": 2.3437535762786865, |
|
"learning_rate": 2.108808823127087e-05, |
|
"loss": 0.5635, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.3375, |
|
"grad_norm": 2.732607841491699, |
|
"learning_rate": 2.0999308637876527e-05, |
|
"loss": 0.5262, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.340625, |
|
"grad_norm": 2.553740978240967, |
|
"learning_rate": 2.091067966825077e-05, |
|
"loss": 0.6506, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.34375, |
|
"grad_norm": 2.2489590644836426, |
|
"learning_rate": 2.0822204659224207e-05, |
|
"loss": 0.3486, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.346875, |
|
"grad_norm": 3.328228235244751, |
|
"learning_rate": 2.0733886941830926e-05, |
|
"loss": 0.9099, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 2.4730563163757324, |
|
"learning_rate": 2.064572984118307e-05, |
|
"loss": 0.4199, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.353125, |
|
"grad_norm": 2.7208938598632812, |
|
"learning_rate": 2.055773667634564e-05, |
|
"loss": 0.4908, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.35625, |
|
"grad_norm": 2.666827440261841, |
|
"learning_rate": 2.0469910760211578e-05, |
|
"loss": 0.6869, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.359375, |
|
"grad_norm": 2.515075922012329, |
|
"learning_rate": 2.038225539937698e-05, |
|
"loss": 0.5644, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.3625, |
|
"grad_norm": 3.286777973175049, |
|
"learning_rate": 2.0294773894016632e-05, |
|
"loss": 0.6714, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.365625, |
|
"grad_norm": 2.477515935897827, |
|
"learning_rate": 2.0207469537759766e-05, |
|
"loss": 0.4976, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.36875, |
|
"grad_norm": 2.30999493598938, |
|
"learning_rate": 2.0120345617566058e-05, |
|
"loss": 0.4468, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.371875, |
|
"grad_norm": 2.011974573135376, |
|
"learning_rate": 2.003340541360186e-05, |
|
"loss": 0.3923, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.375, |
|
"grad_norm": 2.466869592666626, |
|
"learning_rate": 1.9946652199116702e-05, |
|
"loss": 0.5753, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.378125, |
|
"grad_norm": 2.6485002040863037, |
|
"learning_rate": 1.986008924032009e-05, |
|
"loss": 0.5134, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.38125, |
|
"grad_norm": 2.3299734592437744, |
|
"learning_rate": 1.9773719796258484e-05, |
|
"loss": 0.3858, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.384375, |
|
"grad_norm": 3.0803678035736084, |
|
"learning_rate": 1.9687547118692646e-05, |
|
"loss": 0.6681, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.3875, |
|
"grad_norm": 2.463984727859497, |
|
"learning_rate": 1.960157445197518e-05, |
|
"loss": 0.4702, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.390625, |
|
"grad_norm": 2.5118319988250732, |
|
"learning_rate": 1.9515805032928393e-05, |
|
"loss": 0.501, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.39375, |
|
"grad_norm": 2.670452356338501, |
|
"learning_rate": 1.943024209072244e-05, |
|
"loss": 0.459, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.396875, |
|
"grad_norm": 2.8598179817199707, |
|
"learning_rate": 1.9344888846753727e-05, |
|
"loss": 0.5879, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 2.703799247741699, |
|
"learning_rate": 1.9259748514523654e-05, |
|
"loss": 0.6276, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_VitaminC_cosine_accuracy": 0.5546875, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.8286198973655701, |
|
"eval_VitaminC_cosine_ap": 0.5491639681085214, |
|
"eval_VitaminC_cosine_f1": 0.6666666666666667, |
|
"eval_VitaminC_cosine_f1_threshold": 0.3577578365802765, |
|
"eval_VitaminC_cosine_precision": 0.501002004008016, |
|
"eval_VitaminC_cosine_recall": 0.9960159362549801, |
|
"eval_VitaminC_dot_accuracy": 0.552734375, |
|
"eval_VitaminC_dot_accuracy_threshold": 305.3611145019531, |
|
"eval_VitaminC_dot_ap": 0.5346765167717246, |
|
"eval_VitaminC_dot_f1": 0.6675531914893617, |
|
"eval_VitaminC_dot_f1_threshold": 120.80284118652344, |
|
"eval_VitaminC_dot_precision": 0.500998003992016, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.552734375, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 15.638836860656738, |
|
"eval_VitaminC_euclidean_ap": 0.551666574153856, |
|
"eval_VitaminC_euclidean_f1": 0.6675531914893617, |
|
"eval_VitaminC_euclidean_f1_threshold": 22.694026947021484, |
|
"eval_VitaminC_euclidean_precision": 0.500998003992016, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.55859375, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 345.3646240234375, |
|
"eval_VitaminC_manhattan_ap": 0.5493612263798584, |
|
"eval_VitaminC_manhattan_f1": 0.6657824933687002, |
|
"eval_VitaminC_manhattan_f1_threshold": 489.2554931640625, |
|
"eval_VitaminC_manhattan_precision": 0.4990059642147117, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.55859375, |
|
"eval_VitaminC_max_accuracy_threshold": 345.3646240234375, |
|
"eval_VitaminC_max_ap": 0.551666574153856, |
|
"eval_VitaminC_max_f1": 0.6675531914893617, |
|
"eval_VitaminC_max_f1_threshold": 489.2554931640625, |
|
"eval_VitaminC_max_precision": 0.501002004008016, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.551666574153856, |
|
"eval_sts-test_pearson_cosine": 0.8776689405218701, |
|
"eval_sts-test_pearson_dot": 0.8671968346407674, |
|
"eval_sts-test_pearson_euclidean": 0.9011981135741202, |
|
"eval_sts-test_pearson_manhattan": 0.901224194183572, |
|
"eval_sts-test_pearson_max": 0.901224194183572, |
|
"eval_sts-test_spearman_cosine": 0.9040082380296086, |
|
"eval_sts-test_spearman_dot": 0.8686231471398608, |
|
"eval_sts-test_spearman_euclidean": 0.8983323907960761, |
|
"eval_sts-test_spearman_manhattan": 0.898603359683801, |
|
"eval_sts-test_spearman_max": 0.9040082380296086, |
|
"eval_vitaminc-pairs_loss": 1.8429665565490723, |
|
"eval_vitaminc-pairs_runtime": 1.8248, |
|
"eval_vitaminc-pairs_samples_per_second": 59.185, |
|
"eval_vitaminc-pairs_steps_per_second": 1.096, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_negation-triplets_loss": 0.6982068419456482, |
|
"eval_negation-triplets_runtime": 0.2935, |
|
"eval_negation-triplets_samples_per_second": 218.06, |
|
"eval_negation-triplets_steps_per_second": 3.407, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_scitail-pairs-pos_loss": 0.05678475275635719, |
|
"eval_scitail-pairs-pos_runtime": 0.3607, |
|
"eval_scitail-pairs-pos_samples_per_second": 149.722, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.773, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_xsum-pairs_loss": 0.04836395010352135, |
|
"eval_xsum-pairs_runtime": 2.8385, |
|
"eval_xsum-pairs_samples_per_second": 45.094, |
|
"eval_xsum-pairs_steps_per_second": 0.705, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_sciq_pairs_loss": 0.019589349627494812, |
|
"eval_sciq_pairs_runtime": 3.6678, |
|
"eval_sciq_pairs_samples_per_second": 34.898, |
|
"eval_sciq_pairs_steps_per_second": 0.545, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_qasc_pairs_loss": 0.11168085038661957, |
|
"eval_qasc_pairs_runtime": 0.5997, |
|
"eval_qasc_pairs_samples_per_second": 213.44, |
|
"eval_qasc_pairs_steps_per_second": 3.335, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_openbookqa_pairs_loss": 0.7535218596458435, |
|
"eval_openbookqa_pairs_runtime": 0.5778, |
|
"eval_openbookqa_pairs_samples_per_second": 221.542, |
|
"eval_openbookqa_pairs_steps_per_second": 3.462, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_msmarco_pairs_loss": 0.27821871638298035, |
|
"eval_msmarco_pairs_runtime": 1.4582, |
|
"eval_msmarco_pairs_samples_per_second": 87.779, |
|
"eval_msmarco_pairs_steps_per_second": 1.372, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_nq_pairs_loss": 0.15653903782367706, |
|
"eval_nq_pairs_runtime": 2.8546, |
|
"eval_nq_pairs_samples_per_second": 44.84, |
|
"eval_nq_pairs_steps_per_second": 0.701, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_trivia_pairs_loss": 0.6306825280189514, |
|
"eval_trivia_pairs_runtime": 4.3878, |
|
"eval_trivia_pairs_samples_per_second": 29.172, |
|
"eval_trivia_pairs_steps_per_second": 0.456, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_gooaq_pairs_loss": 0.3191468417644501, |
|
"eval_gooaq_pairs_runtime": 0.9973, |
|
"eval_gooaq_pairs_samples_per_second": 128.345, |
|
"eval_gooaq_pairs_steps_per_second": 2.005, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_paws-pos_loss": 0.024477336555719376, |
|
"eval_paws-pos_runtime": 0.6847, |
|
"eval_paws-pos_samples_per_second": 186.937, |
|
"eval_paws-pos_steps_per_second": 2.921, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.403125, |
|
"grad_norm": 2.7174854278564453, |
|
"learning_rate": 1.917482429951761e-05, |
|
"loss": 0.5358, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.40625, |
|
"grad_norm": 2.997868061065674, |
|
"learning_rate": 1.909011939908428e-05, |
|
"loss": 0.8326, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.409375, |
|
"grad_norm": 2.0322728157043457, |
|
"learning_rate": 1.90056370023153e-05, |
|
"loss": 0.2866, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.4125, |
|
"grad_norm": 1.7908676862716675, |
|
"learning_rate": 1.8921380289925155e-05, |
|
"loss": 0.247, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.415625, |
|
"grad_norm": 2.5119776725769043, |
|
"learning_rate": 1.8837352434131445e-05, |
|
"loss": 0.519, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.41875, |
|
"grad_norm": 2.468385696411133, |
|
"learning_rate": 1.8753556598535448e-05, |
|
"loss": 0.4117, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.421875, |
|
"grad_norm": 2.097646713256836, |
|
"learning_rate": 1.8669995938003007e-05, |
|
"loss": 0.437, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.425, |
|
"grad_norm": 2.275872230529785, |
|
"learning_rate": 1.8586673598545775e-05, |
|
"loss": 0.3619, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.428125, |
|
"grad_norm": 2.5506107807159424, |
|
"learning_rate": 1.8503592717202724e-05, |
|
"loss": 0.4273, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.43125, |
|
"grad_norm": 2.219841718673706, |
|
"learning_rate": 1.842075642192209e-05, |
|
"loss": 0.2739, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.434375, |
|
"grad_norm": 2.54673433303833, |
|
"learning_rate": 1.8338167831443567e-05, |
|
"loss": 0.5714, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.4375, |
|
"grad_norm": 2.696007251739502, |
|
"learning_rate": 1.82558300551809e-05, |
|
"loss": 0.5485, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.440625, |
|
"grad_norm": 2.292741537094116, |
|
"learning_rate": 1.8173746193104848e-05, |
|
"loss": 0.4829, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.44375, |
|
"grad_norm": 2.3757193088531494, |
|
"learning_rate": 1.80919193356264e-05, |
|
"loss": 0.4904, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.446875, |
|
"grad_norm": 3.299426555633545, |
|
"learning_rate": 1.801035256348051e-05, |
|
"loss": 0.6449, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 3.2711825370788574, |
|
"learning_rate": 1.7929048947610038e-05, |
|
"loss": 0.6896, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.453125, |
|
"grad_norm": 2.4364447593688965, |
|
"learning_rate": 1.7848011549050174e-05, |
|
"loss": 0.4174, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.45625, |
|
"grad_norm": 2.7479851245880127, |
|
"learning_rate": 1.776724341881316e-05, |
|
"loss": 0.5254, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.459375, |
|
"grad_norm": 2.636861801147461, |
|
"learning_rate": 1.7686747597773465e-05, |
|
"loss": 0.5287, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.4625, |
|
"grad_norm": 1.8790123462677002, |
|
"learning_rate": 1.7606527116553243e-05, |
|
"loss": 0.2421, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.465625, |
|
"grad_norm": 2.039740800857544, |
|
"learning_rate": 1.7526584995408277e-05, |
|
"loss": 0.3939, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.46875, |
|
"grad_norm": 3.1484439373016357, |
|
"learning_rate": 1.744692424411424e-05, |
|
"loss": 0.7248, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.471875, |
|
"grad_norm": 2.309475898742676, |
|
"learning_rate": 1.7367547861853396e-05, |
|
"loss": 0.3479, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.475, |
|
"grad_norm": 2.4634172916412354, |
|
"learning_rate": 1.7288458837101676e-05, |
|
"loss": 0.472, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.478125, |
|
"grad_norm": 2.701162815093994, |
|
"learning_rate": 1.7209660147516157e-05, |
|
"loss": 0.5639, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.48125, |
|
"grad_norm": 2.2868311405181885, |
|
"learning_rate": 1.713115475982297e-05, |
|
"loss": 0.4077, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.484375, |
|
"grad_norm": 2.256727933883667, |
|
"learning_rate": 1.705294562970558e-05, |
|
"loss": 0.3173, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.4875, |
|
"grad_norm": 2.110504388809204, |
|
"learning_rate": 1.6975035701693544e-05, |
|
"loss": 0.3307, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.490625, |
|
"grad_norm": 2.267214059829712, |
|
"learning_rate": 1.6897427909051608e-05, |
|
"loss": 0.3761, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.49375, |
|
"grad_norm": 2.538956880569458, |
|
"learning_rate": 1.6820125173669307e-05, |
|
"loss": 0.5454, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.496875, |
|
"grad_norm": 1.8530148267745972, |
|
"learning_rate": 1.6743130405950932e-05, |
|
"loss": 0.309, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 2.507021903991699, |
|
"learning_rate": 1.6666446504705974e-05, |
|
"loss": 0.4082, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_VitaminC_cosine_accuracy": 0.556640625, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.7370772361755371, |
|
"eval_VitaminC_cosine_ap": 0.5534084328915541, |
|
"eval_VitaminC_cosine_f1": 0.6657824933687002, |
|
"eval_VitaminC_cosine_f1_threshold": 0.2802589535713196, |
|
"eval_VitaminC_cosine_precision": 0.4990059642147117, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.560546875, |
|
"eval_VitaminC_dot_accuracy_threshold": 308.4664611816406, |
|
"eval_VitaminC_dot_ap": 0.5342245787700969, |
|
"eval_VitaminC_dot_f1": 0.6666666666666666, |
|
"eval_VitaminC_dot_f1_threshold": 113.09681701660156, |
|
"eval_VitaminC_dot_precision": 0.5, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.5546875, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 11.881275177001953, |
|
"eval_VitaminC_euclidean_ap": 0.5562125403421339, |
|
"eval_VitaminC_euclidean_f1": 0.6657824933687002, |
|
"eval_VitaminC_euclidean_f1_threshold": 22.934049606323242, |
|
"eval_VitaminC_euclidean_precision": 0.4990059642147117, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.5546875, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 235.32266235351562, |
|
"eval_VitaminC_manhattan_ap": 0.5543420221752726, |
|
"eval_VitaminC_manhattan_f1": 0.6657824933687002, |
|
"eval_VitaminC_manhattan_f1_threshold": 492.56402587890625, |
|
"eval_VitaminC_manhattan_precision": 0.4990059642147117, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.560546875, |
|
"eval_VitaminC_max_accuracy_threshold": 308.4664611816406, |
|
"eval_VitaminC_max_ap": 0.5562125403421339, |
|
"eval_VitaminC_max_f1": 0.6666666666666666, |
|
"eval_VitaminC_max_f1_threshold": 492.56402587890625, |
|
"eval_VitaminC_max_precision": 0.5, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5562125403421339, |
|
"eval_sts-test_pearson_cosine": 0.8785940980445964, |
|
"eval_sts-test_pearson_dot": 0.868901198999867, |
|
"eval_sts-test_pearson_euclidean": 0.9008999462703983, |
|
"eval_sts-test_pearson_manhattan": 0.9007358817864316, |
|
"eval_sts-test_pearson_max": 0.9008999462703983, |
|
"eval_sts-test_spearman_cosine": 0.9034113658980666, |
|
"eval_sts-test_spearman_dot": 0.8689611981684112, |
|
"eval_sts-test_spearman_euclidean": 0.8982906826204593, |
|
"eval_sts-test_spearman_manhattan": 0.8980298275178087, |
|
"eval_sts-test_spearman_max": 0.9034113658980666, |
|
"eval_vitaminc-pairs_loss": 1.8594883680343628, |
|
"eval_vitaminc-pairs_runtime": 1.8263, |
|
"eval_vitaminc-pairs_samples_per_second": 59.137, |
|
"eval_vitaminc-pairs_steps_per_second": 1.095, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_negation-triplets_loss": 0.7203199863433838, |
|
"eval_negation-triplets_runtime": 0.2915, |
|
"eval_negation-triplets_samples_per_second": 219.564, |
|
"eval_negation-triplets_steps_per_second": 3.431, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_scitail-pairs-pos_loss": 0.07524989545345306, |
|
"eval_scitail-pairs-pos_runtime": 0.37, |
|
"eval_scitail-pairs-pos_samples_per_second": 145.939, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.703, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_xsum-pairs_loss": 0.04331779107451439, |
|
"eval_xsum-pairs_runtime": 2.8387, |
|
"eval_xsum-pairs_samples_per_second": 45.091, |
|
"eval_xsum-pairs_steps_per_second": 0.705, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_sciq_pairs_loss": 0.018652573227882385, |
|
"eval_sciq_pairs_runtime": 3.6202, |
|
"eval_sciq_pairs_samples_per_second": 35.357, |
|
"eval_sciq_pairs_steps_per_second": 0.552, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_qasc_pairs_loss": 0.10793650150299072, |
|
"eval_qasc_pairs_runtime": 0.5983, |
|
"eval_qasc_pairs_samples_per_second": 213.952, |
|
"eval_qasc_pairs_steps_per_second": 3.343, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_openbookqa_pairs_loss": 0.6959180235862732, |
|
"eval_openbookqa_pairs_runtime": 0.5741, |
|
"eval_openbookqa_pairs_samples_per_second": 222.961, |
|
"eval_openbookqa_pairs_steps_per_second": 3.484, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_msmarco_pairs_loss": 0.26085397601127625, |
|
"eval_msmarco_pairs_runtime": 1.4595, |
|
"eval_msmarco_pairs_samples_per_second": 87.699, |
|
"eval_msmarco_pairs_steps_per_second": 1.37, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_nq_pairs_loss": 0.1553785651922226, |
|
"eval_nq_pairs_runtime": 2.8659, |
|
"eval_nq_pairs_samples_per_second": 44.663, |
|
"eval_nq_pairs_steps_per_second": 0.698, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_trivia_pairs_loss": 0.6472769379615784, |
|
"eval_trivia_pairs_runtime": 4.3924, |
|
"eval_trivia_pairs_samples_per_second": 29.141, |
|
"eval_trivia_pairs_steps_per_second": 0.455, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_gooaq_pairs_loss": 0.3059709370136261, |
|
"eval_gooaq_pairs_runtime": 0.9999, |
|
"eval_gooaq_pairs_samples_per_second": 128.009, |
|
"eval_gooaq_pairs_steps_per_second": 2.0, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_paws-pos_loss": 0.02474558725953102, |
|
"eval_paws-pos_runtime": 0.6798, |
|
"eval_paws-pos_samples_per_second": 188.303, |
|
"eval_paws-pos_steps_per_second": 2.942, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.503125, |
|
"grad_norm": 1.756934404373169, |
|
"learning_rate": 1.6590076357039962e-05, |
|
"loss": 0.2147, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.50625, |
|
"grad_norm": 2.775935411453247, |
|
"learning_rate": 1.6514022838245802e-05, |
|
"loss": 0.5614, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.509375, |
|
"grad_norm": 2.4856698513031006, |
|
"learning_rate": 1.6438288811695494e-05, |
|
"loss": 0.3865, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.5125, |
|
"grad_norm": 1.2785615921020508, |
|
"learning_rate": 1.636287712873232e-05, |
|
"loss": 0.1715, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.515625, |
|
"grad_norm": 2.2189393043518066, |
|
"learning_rate": 1.6287790628563536e-05, |
|
"loss": 0.3597, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.51875, |
|
"grad_norm": 2.2382972240448, |
|
"learning_rate": 1.6213032138153418e-05, |
|
"loss": 0.3827, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.521875, |
|
"grad_norm": 2.6651275157928467, |
|
"learning_rate": 1.613860447211689e-05, |
|
"loss": 0.4895, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.525, |
|
"grad_norm": 2.810739517211914, |
|
"learning_rate": 1.60645104326135e-05, |
|
"loss": 0.4987, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.528125, |
|
"grad_norm": 2.383479595184326, |
|
"learning_rate": 1.599075280924197e-05, |
|
"loss": 0.4482, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.53125, |
|
"grad_norm": 2.4470787048339844, |
|
"learning_rate": 1.5917334378935118e-05, |
|
"loss": 0.5808, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.534375, |
|
"grad_norm": 2.437572956085205, |
|
"learning_rate": 1.584425790585536e-05, |
|
"loss": 0.3916, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.5375, |
|
"grad_norm": 3.223665952682495, |
|
"learning_rate": 1.5771526141290602e-05, |
|
"loss": 1.0877, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.540625, |
|
"grad_norm": 2.521468162536621, |
|
"learning_rate": 1.5699141823550662e-05, |
|
"loss": 0.4119, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.54375, |
|
"grad_norm": 2.7671728134155273, |
|
"learning_rate": 1.562710767786421e-05, |
|
"loss": 0.6078, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.546875, |
|
"grad_norm": 1.7431325912475586, |
|
"learning_rate": 1.5555426416276095e-05, |
|
"loss": 0.2441, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 2.172173261642456, |
|
"learning_rate": 1.548410073754532e-05, |
|
"loss": 0.4769, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.553125, |
|
"grad_norm": 1.587640404701233, |
|
"learning_rate": 1.5413133327043365e-05, |
|
"loss": 0.218, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.55625, |
|
"grad_norm": 2.7734944820404053, |
|
"learning_rate": 1.5342526856653133e-05, |
|
"loss": 0.6377, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.559375, |
|
"grad_norm": 1.6427900791168213, |
|
"learning_rate": 1.5272283984668313e-05, |
|
"loss": 0.2391, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.5625, |
|
"grad_norm": 2.130922794342041, |
|
"learning_rate": 1.5202407355693354e-05, |
|
"loss": 0.3645, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.565625, |
|
"grad_norm": 2.3365015983581543, |
|
"learning_rate": 1.5132899600543823e-05, |
|
"loss": 0.4185, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.56875, |
|
"grad_norm": 1.7738977670669556, |
|
"learning_rate": 1.5063763336147424e-05, |
|
"loss": 0.3363, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.571875, |
|
"grad_norm": 1.8385276794433594, |
|
"learning_rate": 1.4995001165445442e-05, |
|
"loss": 0.3712, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.575, |
|
"grad_norm": 1.8053840398788452, |
|
"learning_rate": 1.4926615677294724e-05, |
|
"loss": 0.2995, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.578125, |
|
"grad_norm": 2.7845582962036133, |
|
"learning_rate": 1.4858609446370264e-05, |
|
"loss": 0.6178, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.58125, |
|
"grad_norm": 2.369316339492798, |
|
"learning_rate": 1.4790985033068205e-05, |
|
"loss": 0.464, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.584375, |
|
"grad_norm": 2.4763267040252686, |
|
"learning_rate": 1.4723744983409498e-05, |
|
"loss": 0.5694, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.5875, |
|
"grad_norm": 2.1269421577453613, |
|
"learning_rate": 1.4656891828943997e-05, |
|
"loss": 0.3587, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.590625, |
|
"grad_norm": 2.028308629989624, |
|
"learning_rate": 1.4590428086655196e-05, |
|
"loss": 0.3375, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.59375, |
|
"grad_norm": 1.3677244186401367, |
|
"learning_rate": 1.4524356258865409e-05, |
|
"loss": 0.1613, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.596875, |
|
"grad_norm": 1.846962571144104, |
|
"learning_rate": 1.4458678833141626e-05, |
|
"loss": 0.2811, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 2.5623536109924316, |
|
"learning_rate": 1.4393398282201789e-05, |
|
"loss": 0.5338, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_VitaminC_cosine_accuracy": 0.5625, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.7150193452835083, |
|
"eval_VitaminC_cosine_ap": 0.5536001409238264, |
|
"eval_VitaminC_cosine_f1": 0.6666666666666667, |
|
"eval_VitaminC_cosine_f1_threshold": 0.3747650980949402, |
|
"eval_VitaminC_cosine_precision": 0.501002004008016, |
|
"eval_VitaminC_cosine_recall": 0.9960159362549801, |
|
"eval_VitaminC_dot_accuracy": 0.55859375, |
|
"eval_VitaminC_dot_accuracy_threshold": 305.93060302734375, |
|
"eval_VitaminC_dot_ap": 0.5361490037017673, |
|
"eval_VitaminC_dot_f1": 0.6684563758389263, |
|
"eval_VitaminC_dot_f1_threshold": 141.05189514160156, |
|
"eval_VitaminC_dot_precision": 0.5040485829959515, |
|
"eval_VitaminC_dot_recall": 0.9920318725099602, |
|
"eval_VitaminC_euclidean_accuracy": 0.5546875, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 12.17225456237793, |
|
"eval_VitaminC_euclidean_ap": 0.5553095900623441, |
|
"eval_VitaminC_euclidean_f1": 0.6666666666666666, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.013614654541016, |
|
"eval_VitaminC_euclidean_precision": 0.5, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.560546875, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 306.5001220703125, |
|
"eval_VitaminC_manhattan_ap": 0.5528524184849768, |
|
"eval_VitaminC_manhattan_f1": 0.6675531914893617, |
|
"eval_VitaminC_manhattan_f1_threshold": 482.4728088378906, |
|
"eval_VitaminC_manhattan_precision": 0.500998003992016, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.5625, |
|
"eval_VitaminC_max_accuracy_threshold": 306.5001220703125, |
|
"eval_VitaminC_max_ap": 0.5553095900623441, |
|
"eval_VitaminC_max_f1": 0.6684563758389263, |
|
"eval_VitaminC_max_f1_threshold": 482.4728088378906, |
|
"eval_VitaminC_max_precision": 0.5040485829959515, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5553095900623441, |
|
"eval_sts-test_pearson_cosine": 0.88002263195295, |
|
"eval_sts-test_pearson_dot": 0.8704058648822381, |
|
"eval_sts-test_pearson_euclidean": 0.9024307031663734, |
|
"eval_sts-test_pearson_manhattan": 0.902236666405867, |
|
"eval_sts-test_pearson_max": 0.9024307031663734, |
|
"eval_sts-test_spearman_cosine": 0.9043963657196562, |
|
"eval_sts-test_spearman_dot": 0.8703829009915547, |
|
"eval_sts-test_spearman_euclidean": 0.8986995748957924, |
|
"eval_sts-test_spearman_manhattan": 0.8993764824755988, |
|
"eval_sts-test_spearman_max": 0.9043963657196562, |
|
"eval_vitaminc-pairs_loss": 1.8544398546218872, |
|
"eval_vitaminc-pairs_runtime": 1.8317, |
|
"eval_vitaminc-pairs_samples_per_second": 58.961, |
|
"eval_vitaminc-pairs_steps_per_second": 1.092, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_negation-triplets_loss": 0.7161268591880798, |
|
"eval_negation-triplets_runtime": 0.2916, |
|
"eval_negation-triplets_samples_per_second": 219.445, |
|
"eval_negation-triplets_steps_per_second": 3.429, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_scitail-pairs-pos_loss": 0.07522901147603989, |
|
"eval_scitail-pairs-pos_runtime": 0.3667, |
|
"eval_scitail-pairs-pos_samples_per_second": 147.259, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.727, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_xsum-pairs_loss": 0.04067877307534218, |
|
"eval_xsum-pairs_runtime": 2.8345, |
|
"eval_xsum-pairs_samples_per_second": 45.157, |
|
"eval_xsum-pairs_steps_per_second": 0.706, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_sciq_pairs_loss": 0.01821758784353733, |
|
"eval_sciq_pairs_runtime": 3.6099, |
|
"eval_sciq_pairs_samples_per_second": 35.459, |
|
"eval_sciq_pairs_steps_per_second": 0.554, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_qasc_pairs_loss": 0.10426162928342819, |
|
"eval_qasc_pairs_runtime": 0.5966, |
|
"eval_qasc_pairs_samples_per_second": 214.562, |
|
"eval_qasc_pairs_steps_per_second": 3.353, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_openbookqa_pairs_loss": 0.6913560032844543, |
|
"eval_openbookqa_pairs_runtime": 0.5728, |
|
"eval_openbookqa_pairs_samples_per_second": 223.453, |
|
"eval_openbookqa_pairs_steps_per_second": 3.491, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_msmarco_pairs_loss": 0.2564995586872101, |
|
"eval_msmarco_pairs_runtime": 1.4587, |
|
"eval_msmarco_pairs_samples_per_second": 87.749, |
|
"eval_msmarco_pairs_steps_per_second": 1.371, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_nq_pairs_loss": 0.14494968950748444, |
|
"eval_nq_pairs_runtime": 2.8504, |
|
"eval_nq_pairs_samples_per_second": 44.907, |
|
"eval_nq_pairs_steps_per_second": 0.702, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_trivia_pairs_loss": 0.633898913860321, |
|
"eval_trivia_pairs_runtime": 4.3846, |
|
"eval_trivia_pairs_samples_per_second": 29.193, |
|
"eval_trivia_pairs_steps_per_second": 0.456, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_gooaq_pairs_loss": 0.29749810695648193, |
|
"eval_gooaq_pairs_runtime": 1.0002, |
|
"eval_gooaq_pairs_samples_per_second": 127.979, |
|
"eval_gooaq_pairs_steps_per_second": 2.0, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_paws-pos_loss": 0.025082813575863838, |
|
"eval_paws-pos_runtime": 0.6849, |
|
"eval_paws-pos_samples_per_second": 186.893, |
|
"eval_paws-pos_steps_per_second": 2.92, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.603125, |
|
"grad_norm": 1.237898349761963, |
|
"learning_rate": 1.4328517063821754e-05, |
|
"loss": 0.1862, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.60625, |
|
"grad_norm": 3.120419502258301, |
|
"learning_rate": 1.4264037620742724e-05, |
|
"loss": 0.6092, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.609375, |
|
"grad_norm": 2.872905969619751, |
|
"learning_rate": 1.4199962380579275e-05, |
|
"loss": 0.541, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.6125, |
|
"grad_norm": 2.554291248321533, |
|
"learning_rate": 1.4136293755728e-05, |
|
"loss": 0.5297, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.615625, |
|
"grad_norm": 0.818438708782196, |
|
"learning_rate": 1.4073034143276623e-05, |
|
"loss": 0.0664, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.61875, |
|
"grad_norm": 1.3617022037506104, |
|
"learning_rate": 1.401018592491381e-05, |
|
"loss": 0.1557, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.621875, |
|
"grad_norm": 1.975934386253357, |
|
"learning_rate": 1.3947751466839452e-05, |
|
"loss": 0.3281, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.625, |
|
"grad_norm": 2.3073935508728027, |
|
"learning_rate": 1.3885733119675617e-05, |
|
"loss": 0.3828, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.628125, |
|
"grad_norm": 1.2710379362106323, |
|
"learning_rate": 1.382413321837801e-05, |
|
"loss": 0.2087, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.63125, |
|
"grad_norm": 2.7534079551696777, |
|
"learning_rate": 1.3762954082148114e-05, |
|
"loss": 0.5306, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.634375, |
|
"grad_norm": 3.0414681434631348, |
|
"learning_rate": 1.3702198014345816e-05, |
|
"loss": 0.6589, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.6375, |
|
"grad_norm": 2.3352811336517334, |
|
"learning_rate": 1.3641867302402734e-05, |
|
"loss": 0.425, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.640625, |
|
"grad_norm": 2.76236629486084, |
|
"learning_rate": 1.3581964217736077e-05, |
|
"loss": 0.5026, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.64375, |
|
"grad_norm": 2.5108022689819336, |
|
"learning_rate": 1.3522491015663117e-05, |
|
"loss": 0.5667, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.646875, |
|
"grad_norm": 2.4024035930633545, |
|
"learning_rate": 1.3463449935316308e-05, |
|
"loss": 0.4748, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 2.772578239440918, |
|
"learning_rate": 1.3404843199558945e-05, |
|
"loss": 0.5094, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.653125, |
|
"grad_norm": 2.2362611293792725, |
|
"learning_rate": 1.3346673014901517e-05, |
|
"loss": 0.3398, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.65625, |
|
"grad_norm": 1.5350793600082397, |
|
"learning_rate": 1.3288941571418583e-05, |
|
"loss": 0.1932, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.659375, |
|
"grad_norm": 2.147125720977783, |
|
"learning_rate": 1.3231651042666376e-05, |
|
"loss": 0.4233, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.6625, |
|
"grad_norm": 2.5387678146362305, |
|
"learning_rate": 1.3174803585600908e-05, |
|
"loss": 0.5848, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.665625, |
|
"grad_norm": 2.3380072116851807, |
|
"learning_rate": 1.3118401340496819e-05, |
|
"loss": 0.5076, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.66875, |
|
"grad_norm": 2.097322463989258, |
|
"learning_rate": 1.3062446430866749e-05, |
|
"loss": 0.286, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.671875, |
|
"grad_norm": 2.5456178188323975, |
|
"learning_rate": 1.3006940963381425e-05, |
|
"loss": 0.5221, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.675, |
|
"grad_norm": 2.5779526233673096, |
|
"learning_rate": 1.295188702779033e-05, |
|
"loss": 0.579, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.678125, |
|
"grad_norm": 1.9412658214569092, |
|
"learning_rate": 1.2897286696843012e-05, |
|
"loss": 0.2717, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.68125, |
|
"grad_norm": 2.2857954502105713, |
|
"learning_rate": 1.2843142026211081e-05, |
|
"loss": 0.4727, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.684375, |
|
"grad_norm": 2.2698121070861816, |
|
"learning_rate": 1.2789455054410776e-05, |
|
"loss": 0.3777, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.6875, |
|
"grad_norm": 2.2447919845581055, |
|
"learning_rate": 1.2736227802726247e-05, |
|
"loss": 0.537, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.690625, |
|
"grad_norm": 3.1389870643615723, |
|
"learning_rate": 1.268346227513343e-05, |
|
"loss": 0.6935, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.69375, |
|
"grad_norm": 1.925352931022644, |
|
"learning_rate": 1.2631160458224625e-05, |
|
"loss": 0.2929, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.696875, |
|
"grad_norm": 2.683356761932373, |
|
"learning_rate": 1.2579324321133666e-05, |
|
"loss": 0.5495, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 2.3518059253692627, |
|
"learning_rate": 1.2527955815461821e-05, |
|
"loss": 0.3767, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_VitaminC_cosine_accuracy": 0.556640625, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.7074875235557556, |
|
"eval_VitaminC_cosine_ap": 0.5537116985905202, |
|
"eval_VitaminC_cosine_f1": 0.6657824933687002, |
|
"eval_VitaminC_cosine_f1_threshold": 0.2738235890865326, |
|
"eval_VitaminC_cosine_precision": 0.4990059642147117, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.552734375, |
|
"eval_VitaminC_dot_accuracy_threshold": 308.73809814453125, |
|
"eval_VitaminC_dot_ap": 0.5356558215645612, |
|
"eval_VitaminC_dot_f1": 0.6666666666666667, |
|
"eval_VitaminC_dot_f1_threshold": 142.89981079101562, |
|
"eval_VitaminC_dot_precision": 0.5030425963488844, |
|
"eval_VitaminC_dot_recall": 0.9880478087649402, |
|
"eval_VitaminC_euclidean_accuracy": 0.552734375, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 14.646638870239258, |
|
"eval_VitaminC_euclidean_ap": 0.5553327582256045, |
|
"eval_VitaminC_euclidean_f1": 0.6666666666666666, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.463809967041016, |
|
"eval_VitaminC_euclidean_precision": 0.5, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.55859375, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 310.8325500488281, |
|
"eval_VitaminC_manhattan_ap": 0.5530353867429494, |
|
"eval_VitaminC_manhattan_f1": 0.6657824933687002, |
|
"eval_VitaminC_manhattan_f1_threshold": 497.66796875, |
|
"eval_VitaminC_manhattan_precision": 0.4990059642147117, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.55859375, |
|
"eval_VitaminC_max_accuracy_threshold": 310.8325500488281, |
|
"eval_VitaminC_max_ap": 0.5553327582256045, |
|
"eval_VitaminC_max_f1": 0.6666666666666667, |
|
"eval_VitaminC_max_f1_threshold": 497.66796875, |
|
"eval_VitaminC_max_precision": 0.5030425963488844, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5553327582256045, |
|
"eval_sts-test_pearson_cosine": 0.8785811955197258, |
|
"eval_sts-test_pearson_dot": 0.8673295777318735, |
|
"eval_sts-test_pearson_euclidean": 0.9018792837542462, |
|
"eval_sts-test_pearson_manhattan": 0.9016741452222354, |
|
"eval_sts-test_pearson_max": 0.9018792837542462, |
|
"eval_sts-test_spearman_cosine": 0.9040249302501078, |
|
"eval_sts-test_spearman_dot": 0.8683179882884328, |
|
"eval_sts-test_spearman_euclidean": 0.8988373640296166, |
|
"eval_sts-test_spearman_manhattan": 0.8983056295417639, |
|
"eval_sts-test_spearman_max": 0.9040249302501078, |
|
"eval_vitaminc-pairs_loss": 1.8782049417495728, |
|
"eval_vitaminc-pairs_runtime": 1.831, |
|
"eval_vitaminc-pairs_samples_per_second": 58.986, |
|
"eval_vitaminc-pairs_steps_per_second": 1.092, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_negation-triplets_loss": 0.719520628452301, |
|
"eval_negation-triplets_runtime": 0.294, |
|
"eval_negation-triplets_samples_per_second": 217.687, |
|
"eval_negation-triplets_steps_per_second": 3.401, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_scitail-pairs-pos_loss": 0.06700660288333893, |
|
"eval_scitail-pairs-pos_runtime": 0.3659, |
|
"eval_scitail-pairs-pos_samples_per_second": 147.579, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.733, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_xsum-pairs_loss": 0.03577294573187828, |
|
"eval_xsum-pairs_runtime": 2.8359, |
|
"eval_xsum-pairs_samples_per_second": 45.136, |
|
"eval_xsum-pairs_steps_per_second": 0.705, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_sciq_pairs_loss": 0.018292119726538658, |
|
"eval_sciq_pairs_runtime": 3.6233, |
|
"eval_sciq_pairs_samples_per_second": 35.327, |
|
"eval_sciq_pairs_steps_per_second": 0.552, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_qasc_pairs_loss": 0.10864048451185226, |
|
"eval_qasc_pairs_runtime": 0.5959, |
|
"eval_qasc_pairs_samples_per_second": 214.784, |
|
"eval_qasc_pairs_steps_per_second": 3.356, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_openbookqa_pairs_loss": 0.696479082107544, |
|
"eval_openbookqa_pairs_runtime": 0.5743, |
|
"eval_openbookqa_pairs_samples_per_second": 222.885, |
|
"eval_openbookqa_pairs_steps_per_second": 3.483, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_msmarco_pairs_loss": 0.24125610291957855, |
|
"eval_msmarco_pairs_runtime": 1.4595, |
|
"eval_msmarco_pairs_samples_per_second": 87.699, |
|
"eval_msmarco_pairs_steps_per_second": 1.37, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_nq_pairs_loss": 0.15616978704929352, |
|
"eval_nq_pairs_runtime": 2.8639, |
|
"eval_nq_pairs_samples_per_second": 44.694, |
|
"eval_nq_pairs_steps_per_second": 0.698, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_trivia_pairs_loss": 0.6436348557472229, |
|
"eval_trivia_pairs_runtime": 4.377, |
|
"eval_trivia_pairs_samples_per_second": 29.244, |
|
"eval_trivia_pairs_steps_per_second": 0.457, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_gooaq_pairs_loss": 0.30042433738708496, |
|
"eval_gooaq_pairs_runtime": 1.0002, |
|
"eval_gooaq_pairs_samples_per_second": 127.981, |
|
"eval_gooaq_pairs_steps_per_second": 2.0, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_paws-pos_loss": 0.02469758875668049, |
|
"eval_paws-pos_runtime": 0.6819, |
|
"eval_paws-pos_samples_per_second": 187.706, |
|
"eval_paws-pos_steps_per_second": 2.933, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.703125, |
|
"grad_norm": 2.188075065612793, |
|
"learning_rate": 1.2477056875204302e-05, |
|
"loss": 0.4054, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.70625, |
|
"grad_norm": 2.5551207065582275, |
|
"learning_rate": 1.242662941667743e-05, |
|
"loss": 0.4114, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.709375, |
|
"grad_norm": 2.614218235015869, |
|
"learning_rate": 1.2376675338446527e-05, |
|
"loss": 0.4774, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.7125, |
|
"grad_norm": 1.4668488502502441, |
|
"learning_rate": 1.2327196521254394e-05, |
|
"loss": 0.1662, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.715625, |
|
"grad_norm": 2.075801372528076, |
|
"learning_rate": 1.2278194827950544e-05, |
|
"loss": 0.4634, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.71875, |
|
"grad_norm": 3.1399238109588623, |
|
"learning_rate": 1.2229672103421021e-05, |
|
"loss": 0.6514, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.721875, |
|
"grad_norm": 2.308095693588257, |
|
"learning_rate": 1.2181630174518995e-05, |
|
"loss": 0.3672, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.725, |
|
"grad_norm": 2.880965232849121, |
|
"learning_rate": 1.213407084999592e-05, |
|
"loss": 0.6115, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.728125, |
|
"grad_norm": 2.7592408657073975, |
|
"learning_rate": 1.2086995920433495e-05, |
|
"loss": 0.5445, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.73125, |
|
"grad_norm": 1.6351908445358276, |
|
"learning_rate": 1.20404071581762e-05, |
|
"loss": 0.2447, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.734375, |
|
"grad_norm": 1.5117764472961426, |
|
"learning_rate": 1.199430631726461e-05, |
|
"loss": 0.2566, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.7375, |
|
"grad_norm": 1.4923957586288452, |
|
"learning_rate": 1.194869513336933e-05, |
|
"loss": 0.208, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.740625, |
|
"grad_norm": 2.0138089656829834, |
|
"learning_rate": 1.1903575323725649e-05, |
|
"loss": 0.3175, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.74375, |
|
"grad_norm": 1.8065791130065918, |
|
"learning_rate": 1.1858948587068904e-05, |
|
"loss": 0.2546, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.746875, |
|
"grad_norm": 1.4454731941223145, |
|
"learning_rate": 1.1814816603570499e-05, |
|
"loss": 0.1709, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 2.613529682159424, |
|
"learning_rate": 1.1771181034774677e-05, |
|
"loss": 0.4799, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.753125, |
|
"grad_norm": 2.197608470916748, |
|
"learning_rate": 1.1728043523535934e-05, |
|
"loss": 0.5313, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.75625, |
|
"grad_norm": 2.056694269180298, |
|
"learning_rate": 1.1685405693957192e-05, |
|
"loss": 0.3248, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.759375, |
|
"grad_norm": 2.9212446212768555, |
|
"learning_rate": 1.1643269151328634e-05, |
|
"loss": 0.6279, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.7625, |
|
"grad_norm": 2.4438629150390625, |
|
"learning_rate": 1.1601635482067272e-05, |
|
"loss": 0.5193, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.765625, |
|
"grad_norm": 2.960676670074463, |
|
"learning_rate": 1.1560506253657225e-05, |
|
"loss": 0.6262, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.76875, |
|
"grad_norm": 2.2354516983032227, |
|
"learning_rate": 1.1519883014590691e-05, |
|
"loss": 0.4297, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.771875, |
|
"grad_norm": 2.175459623336792, |
|
"learning_rate": 1.1479767294309671e-05, |
|
"loss": 0.4763, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.775, |
|
"grad_norm": 2.5381572246551514, |
|
"learning_rate": 1.1440160603148352e-05, |
|
"loss": 0.5722, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.778125, |
|
"grad_norm": 2.3705122470855713, |
|
"learning_rate": 1.140106443227627e-05, |
|
"loss": 0.4347, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.78125, |
|
"grad_norm": 2.0581493377685547, |
|
"learning_rate": 1.1362480253642165e-05, |
|
"loss": 0.3271, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.784375, |
|
"grad_norm": 2.5319983959198, |
|
"learning_rate": 1.1324409519918556e-05, |
|
"loss": 0.5433, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.7875, |
|
"grad_norm": 0.7258579730987549, |
|
"learning_rate": 1.128685366444704e-05, |
|
"loss": 0.0637, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.790625, |
|
"grad_norm": 3.232028007507324, |
|
"learning_rate": 1.1249814101184362e-05, |
|
"loss": 0.9049, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.79375, |
|
"grad_norm": 2.510418653488159, |
|
"learning_rate": 1.1213292224649134e-05, |
|
"loss": 0.495, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.796875, |
|
"grad_norm": 1.644942045211792, |
|
"learning_rate": 1.1177289409869374e-05, |
|
"loss": 0.2218, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 3.1910505294799805, |
|
"learning_rate": 1.11418070123307e-05, |
|
"loss": 0.7491, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_VitaminC_cosine_accuracy": 0.5546875, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.8312963247299194, |
|
"eval_VitaminC_cosine_ap": 0.5540818473167951, |
|
"eval_VitaminC_cosine_f1": 0.6657754010695187, |
|
"eval_VitaminC_cosine_f1_threshold": 0.3716816306114197, |
|
"eval_VitaminC_cosine_precision": 0.5010060362173038, |
|
"eval_VitaminC_cosine_recall": 0.9920318725099602, |
|
"eval_VitaminC_dot_accuracy": 0.5546875, |
|
"eval_VitaminC_dot_accuracy_threshold": 301.13458251953125, |
|
"eval_VitaminC_dot_ap": 0.5336035822109861, |
|
"eval_VitaminC_dot_f1": 0.6675639300134589, |
|
"eval_VitaminC_dot_f1_threshold": 140.0170135498047, |
|
"eval_VitaminC_dot_precision": 0.5040650406504065, |
|
"eval_VitaminC_dot_recall": 0.9880478087649402, |
|
"eval_VitaminC_euclidean_accuracy": 0.556640625, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 14.30455493927002, |
|
"eval_VitaminC_euclidean_ap": 0.5547765455338385, |
|
"eval_VitaminC_euclidean_f1": 0.6666666666666666, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.225872039794922, |
|
"eval_VitaminC_euclidean_precision": 0.5, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.55859375, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 311.50494384765625, |
|
"eval_VitaminC_manhattan_ap": 0.5520078360814107, |
|
"eval_VitaminC_manhattan_f1": 0.6657824933687002, |
|
"eval_VitaminC_manhattan_f1_threshold": 491.16729736328125, |
|
"eval_VitaminC_manhattan_precision": 0.4990059642147117, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.55859375, |
|
"eval_VitaminC_max_accuracy_threshold": 311.50494384765625, |
|
"eval_VitaminC_max_ap": 0.5547765455338385, |
|
"eval_VitaminC_max_f1": 0.6675639300134589, |
|
"eval_VitaminC_max_f1_threshold": 491.16729736328125, |
|
"eval_VitaminC_max_precision": 0.5040650406504065, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5547765455338385, |
|
"eval_sts-test_pearson_cosine": 0.8785522027028954, |
|
"eval_sts-test_pearson_dot": 0.8677130233704464, |
|
"eval_sts-test_pearson_euclidean": 0.901327101812411, |
|
"eval_sts-test_pearson_manhattan": 0.9016459799124272, |
|
"eval_sts-test_pearson_max": 0.9016459799124272, |
|
"eval_sts-test_spearman_cosine": 0.9038277114411557, |
|
"eval_sts-test_spearman_dot": 0.8689599898843539, |
|
"eval_sts-test_spearman_euclidean": 0.8982747959226655, |
|
"eval_sts-test_spearman_manhattan": 0.8983893144005659, |
|
"eval_sts-test_spearman_max": 0.9038277114411557, |
|
"eval_vitaminc-pairs_loss": 1.8366389274597168, |
|
"eval_vitaminc-pairs_runtime": 1.8298, |
|
"eval_vitaminc-pairs_samples_per_second": 59.021, |
|
"eval_vitaminc-pairs_steps_per_second": 1.093, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_negation-triplets_loss": 0.7222614884376526, |
|
"eval_negation-triplets_runtime": 0.292, |
|
"eval_negation-triplets_samples_per_second": 219.186, |
|
"eval_negation-triplets_steps_per_second": 3.425, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_scitail-pairs-pos_loss": 0.06263165920972824, |
|
"eval_scitail-pairs-pos_runtime": 0.3693, |
|
"eval_scitail-pairs-pos_samples_per_second": 146.22, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.708, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_xsum-pairs_loss": 0.038485851138830185, |
|
"eval_xsum-pairs_runtime": 2.8422, |
|
"eval_xsum-pairs_samples_per_second": 45.035, |
|
"eval_xsum-pairs_steps_per_second": 0.704, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_sciq_pairs_loss": 0.017885908484458923, |
|
"eval_sciq_pairs_runtime": 3.6267, |
|
"eval_sciq_pairs_samples_per_second": 35.293, |
|
"eval_sciq_pairs_steps_per_second": 0.551, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_qasc_pairs_loss": 0.11011218279600143, |
|
"eval_qasc_pairs_runtime": 0.595, |
|
"eval_qasc_pairs_samples_per_second": 215.135, |
|
"eval_qasc_pairs_steps_per_second": 3.361, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_openbookqa_pairs_loss": 0.6921338438987732, |
|
"eval_openbookqa_pairs_runtime": 0.573, |
|
"eval_openbookqa_pairs_samples_per_second": 223.4, |
|
"eval_openbookqa_pairs_steps_per_second": 3.491, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_msmarco_pairs_loss": 0.24500073492527008, |
|
"eval_msmarco_pairs_runtime": 1.4604, |
|
"eval_msmarco_pairs_samples_per_second": 87.65, |
|
"eval_msmarco_pairs_steps_per_second": 1.37, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_nq_pairs_loss": 0.14756517112255096, |
|
"eval_nq_pairs_runtime": 2.8567, |
|
"eval_nq_pairs_samples_per_second": 44.806, |
|
"eval_nq_pairs_steps_per_second": 0.7, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_trivia_pairs_loss": 0.6358833909034729, |
|
"eval_trivia_pairs_runtime": 4.3759, |
|
"eval_trivia_pairs_samples_per_second": 29.251, |
|
"eval_trivia_pairs_steps_per_second": 0.457, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_gooaq_pairs_loss": 0.2909858226776123, |
|
"eval_gooaq_pairs_runtime": 1.0026, |
|
"eval_gooaq_pairs_samples_per_second": 127.667, |
|
"eval_gooaq_pairs_steps_per_second": 1.995, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_paws-pos_loss": 0.02510605938732624, |
|
"eval_paws-pos_runtime": 0.6858, |
|
"eval_paws-pos_samples_per_second": 186.641, |
|
"eval_paws-pos_steps_per_second": 2.916, |
|
"step": 576 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 640, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 64, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 320, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|