|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 302, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -4.7122321128845215, |
|
"logits/rejected": -3.4910330772399902, |
|
"logps/chosen": -480.7637939453125, |
|
"logps/rejected": -307.30804443359375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -4.456130027770996, |
|
"logits/rejected": -3.5022683143615723, |
|
"logps/chosen": -441.0960693359375, |
|
"logps/rejected": -254.25729370117188, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -4.721767425537109, |
|
"logits/rejected": -3.4279396533966064, |
|
"logps/chosen": -440.868896484375, |
|
"logps/rejected": -285.726806640625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.494505494505495e-08, |
|
"logits/chosen": -4.769167423248291, |
|
"logits/rejected": -4.213985443115234, |
|
"logps/chosen": -339.8769226074219, |
|
"logps/rejected": -288.4583435058594, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.098901098901099e-07, |
|
"logits/chosen": -4.547554016113281, |
|
"logits/rejected": -3.9193525314331055, |
|
"logps/chosen": -351.4005126953125, |
|
"logps/rejected": -348.05291748046875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.6483516483516484e-07, |
|
"logits/chosen": -4.506200790405273, |
|
"logits/rejected": -4.258366107940674, |
|
"logps/chosen": -367.66943359375, |
|
"logps/rejected": -365.24383544921875, |
|
"loss": 0.701, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.00408935546875, |
|
"rewards/margins": -0.01570434495806694, |
|
"rewards/rejected": 0.011614990420639515, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.197802197802198e-07, |
|
"logits/chosen": -4.512852191925049, |
|
"logits/rejected": -3.491598129272461, |
|
"logps/chosen": -418.4132995605469, |
|
"logps/rejected": -340.18426513671875, |
|
"loss": 0.6811, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0052337646484375, |
|
"rewards/margins": 0.0241851806640625, |
|
"rewards/rejected": -0.018951416015625, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.7472527472527475e-07, |
|
"logits/chosen": -4.398182392120361, |
|
"logits/rejected": -3.73840069770813, |
|
"logps/chosen": -383.665771484375, |
|
"logps/rejected": -369.041748046875, |
|
"loss": 0.6499, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03215942531824112, |
|
"rewards/margins": 0.088531494140625, |
|
"rewards/rejected": -0.05637207254767418, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.296703296703297e-07, |
|
"logits/chosen": -4.551479339599609, |
|
"logits/rejected": -3.8382506370544434, |
|
"logps/chosen": -348.11627197265625, |
|
"logps/rejected": -271.0714111328125, |
|
"loss": 0.6261, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.06197509914636612, |
|
"rewards/margins": 0.13901062309741974, |
|
"rewards/rejected": -0.07703552395105362, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.846153846153847e-07, |
|
"logits/chosen": -4.554244518280029, |
|
"logits/rejected": -3.5874130725860596, |
|
"logps/chosen": -477.6059875488281, |
|
"logps/rejected": -331.3657531738281, |
|
"loss": 0.5562, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.08335571736097336, |
|
"rewards/margins": 0.2956085205078125, |
|
"rewards/rejected": -0.21225281059741974, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.395604395604396e-07, |
|
"logits/chosen": -4.534104347229004, |
|
"logits/rejected": -3.622457265853882, |
|
"logps/chosen": -452.00927734375, |
|
"logps/rejected": -308.62396240234375, |
|
"loss": 0.5024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11546631157398224, |
|
"rewards/margins": 0.42655640840530396, |
|
"rewards/rejected": -0.3110900819301605, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.945054945054946e-07, |
|
"logits/chosen": -4.5819878578186035, |
|
"logits/rejected": -4.043532848358154, |
|
"logps/chosen": -489.98583984375, |
|
"logps/rejected": -355.9280700683594, |
|
"loss": 0.3914, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.254067987203598, |
|
"rewards/margins": 0.7358551025390625, |
|
"rewards/rejected": -0.4817871153354645, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.494505494505495e-07, |
|
"logits/chosen": -4.548617839813232, |
|
"logits/rejected": -3.5121090412139893, |
|
"logps/chosen": -453.76708984375, |
|
"logps/rejected": -340.49310302734375, |
|
"loss": 0.3846, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.16141052544116974, |
|
"rewards/margins": 0.7571258544921875, |
|
"rewards/rejected": -0.595715343952179, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 6.043956043956044e-07, |
|
"logits/chosen": -4.559290885925293, |
|
"logits/rejected": -3.851238250732422, |
|
"logps/chosen": -359.08013916015625, |
|
"logps/rejected": -325.69927978515625, |
|
"loss": 0.2665, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.43179628252983093, |
|
"rewards/margins": 1.1861114501953125, |
|
"rewards/rejected": -0.754315197467804, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 6.593406593406594e-07, |
|
"logits/chosen": -4.530972003936768, |
|
"logits/rejected": -3.6676721572875977, |
|
"logps/chosen": -283.534423828125, |
|
"logps/rejected": -344.08294677734375, |
|
"loss": 0.2007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.46563720703125, |
|
"rewards/margins": 1.5038880109786987, |
|
"rewards/rejected": -1.0382508039474487, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.142857142857143e-07, |
|
"logits/chosen": -4.578598976135254, |
|
"logits/rejected": -3.969371795654297, |
|
"logps/chosen": -329.626708984375, |
|
"logps/rejected": -358.00067138671875, |
|
"loss": 0.15, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.37066346406936646, |
|
"rewards/margins": 1.8208892345428467, |
|
"rewards/rejected": -1.450225830078125, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.692307692307694e-07, |
|
"logits/chosen": -4.456603050231934, |
|
"logits/rejected": -3.9640321731567383, |
|
"logps/chosen": -325.3027648925781, |
|
"logps/rejected": -371.80010986328125, |
|
"loss": 0.2361, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.45557862520217896, |
|
"rewards/margins": 1.3231537342071533, |
|
"rewards/rejected": -0.8675751090049744, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.241758241758242e-07, |
|
"logits/chosen": -4.553553581237793, |
|
"logits/rejected": -4.103464603424072, |
|
"logps/chosen": -392.54351806640625, |
|
"logps/rejected": -390.0396728515625, |
|
"loss": 0.0861, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4598144590854645, |
|
"rewards/margins": 2.409454345703125, |
|
"rewards/rejected": -1.949639916419983, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.791208791208792e-07, |
|
"logits/chosen": -4.560757637023926, |
|
"logits/rejected": -3.3375790119171143, |
|
"logps/chosen": -458.788818359375, |
|
"logps/rejected": -237.71102905273438, |
|
"loss": 0.1327, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.553814709186554, |
|
"rewards/margins": 1.9525666236877441, |
|
"rewards/rejected": -1.3987518548965454, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.340659340659342e-07, |
|
"logits/chosen": -4.524593830108643, |
|
"logits/rejected": -3.837027072906494, |
|
"logps/chosen": -485.5209045410156, |
|
"logps/rejected": -361.74188232421875, |
|
"loss": 0.0366, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9675140380859375, |
|
"rewards/margins": 3.29044508934021, |
|
"rewards/rejected": -2.3229310512542725, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.890109890109891e-07, |
|
"logits/chosen": -4.552459239959717, |
|
"logits/rejected": -3.4426755905151367, |
|
"logps/chosen": -637.16259765625, |
|
"logps/rejected": -307.08282470703125, |
|
"loss": 0.024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.276123046875, |
|
"rewards/margins": 3.7193756103515625, |
|
"rewards/rejected": -2.4432525634765625, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.0439560439560442e-06, |
|
"logits/chosen": -4.615756034851074, |
|
"logits/rejected": -3.5999937057495117, |
|
"logps/chosen": -299.9642333984375, |
|
"logps/rejected": -382.33734130859375, |
|
"loss": 0.015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.375402808189392, |
|
"rewards/margins": 4.191192626953125, |
|
"rewards/rejected": -2.8157899379730225, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.098901098901099e-06, |
|
"logits/chosen": -4.56257963180542, |
|
"logits/rejected": -3.9953930377960205, |
|
"logps/chosen": -304.43365478515625, |
|
"logps/rejected": -345.88836669921875, |
|
"loss": 0.0122, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.693988025188446, |
|
"rewards/margins": 4.401156425476074, |
|
"rewards/rejected": -3.7071685791015625, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.153846153846154e-06, |
|
"logits/chosen": -4.4777092933654785, |
|
"logits/rejected": -3.514190435409546, |
|
"logps/chosen": -350.09747314453125, |
|
"logps/rejected": -357.66943359375, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7525604963302612, |
|
"rewards/margins": 5.601266384124756, |
|
"rewards/rejected": -3.848706007003784, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.2087912087912089e-06, |
|
"logits/chosen": -4.659963607788086, |
|
"logits/rejected": -3.676948070526123, |
|
"logps/chosen": -426.1668701171875, |
|
"logps/rejected": -333.86212158203125, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.552722215652466, |
|
"rewards/margins": 6.66888427734375, |
|
"rewards/rejected": -4.116162300109863, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.2637362637362637e-06, |
|
"logits/chosen": -4.635252952575684, |
|
"logits/rejected": -4.635253429412842, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.3186813186813187e-06, |
|
"logits/chosen": -4.54569149017334, |
|
"logits/rejected": -4.585753440856934, |
|
"logps/chosen": -268.20098876953125, |
|
"logps/rejected": -388.1652526855469, |
|
"loss": 0.0254, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.103280782699585, |
|
"rewards/margins": 3.65966796875, |
|
"rewards/rejected": -1.5563873052597046, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.3736263736263738e-06, |
|
"logits/chosen": -4.528458595275879, |
|
"logits/rejected": -3.756037473678589, |
|
"logps/chosen": -320.74591064453125, |
|
"logps/rejected": -383.32171630859375, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.2982544898986816, |
|
"rewards/margins": 8.030874252319336, |
|
"rewards/rejected": -5.7326202392578125, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"logits/chosen": -4.45478630065918, |
|
"logits/rejected": -3.7227022647857666, |
|
"logps/chosen": -421.645263671875, |
|
"logps/rejected": -337.57232666015625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.300579786300659, |
|
"rewards/margins": 8.780420303344727, |
|
"rewards/rejected": -6.479840278625488, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.4835164835164837e-06, |
|
"logits/chosen": -4.644472122192383, |
|
"logits/rejected": -3.8435280323028564, |
|
"logps/chosen": -344.5352783203125, |
|
"logps/rejected": -387.8581848144531, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.017681837081909, |
|
"rewards/margins": 7.160736083984375, |
|
"rewards/rejected": -5.143054485321045, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.5384615384615387e-06, |
|
"logits/chosen": -4.358231544494629, |
|
"logits/rejected": -3.987455129623413, |
|
"logps/chosen": -300.4159851074219, |
|
"logps/rejected": -426.6040344238281, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.130636692047119, |
|
"rewards/margins": 8.095977783203125, |
|
"rewards/rejected": -5.965341091156006, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.5934065934065933e-06, |
|
"logits/chosen": -4.457190990447998, |
|
"logits/rejected": -3.8622899055480957, |
|
"logps/chosen": -428.80010986328125, |
|
"logps/rejected": -282.08941650390625, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.919866919517517, |
|
"rewards/margins": 5.6237030029296875, |
|
"rewards/rejected": -3.70383620262146, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.6483516483516484e-06, |
|
"logits/chosen": -4.373894214630127, |
|
"logits/rejected": -3.93933367729187, |
|
"logps/chosen": -282.36163330078125, |
|
"logps/rejected": -356.90789794921875, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.413839817047119, |
|
"rewards/margins": 7.732351779937744, |
|
"rewards/rejected": -5.318511962890625, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7032967032967034e-06, |
|
"logits/chosen": -4.522324085235596, |
|
"logits/rejected": -3.5009684562683105, |
|
"logps/chosen": -299.6717834472656, |
|
"logps/rejected": -359.8671875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.1251494884490967, |
|
"rewards/margins": 8.901113510131836, |
|
"rewards/rejected": -6.775964260101318, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7582417582417585e-06, |
|
"logits/chosen": -4.407108783721924, |
|
"logits/rejected": -3.9882192611694336, |
|
"logps/chosen": -279.01580810546875, |
|
"logps/rejected": -309.4888916015625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.941366672515869, |
|
"rewards/margins": 8.681912422180176, |
|
"rewards/rejected": -5.740545749664307, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.8131868131868133e-06, |
|
"logits/chosen": -4.558385848999023, |
|
"logits/rejected": -4.195313453674316, |
|
"logps/chosen": -283.02545166015625, |
|
"logps/rejected": -402.60540771484375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7620728015899658, |
|
"rewards/margins": 9.408578872680664, |
|
"rewards/rejected": -7.646505832672119, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.8681318681318684e-06, |
|
"logits/chosen": -4.52475118637085, |
|
"logits/rejected": -4.018232345581055, |
|
"logps/chosen": -304.9980773925781, |
|
"logps/rejected": -374.7655029296875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.8536040782928467, |
|
"rewards/margins": 9.533895492553711, |
|
"rewards/rejected": -6.680291652679443, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9230769230769234e-06, |
|
"logits/chosen": -4.477925777435303, |
|
"logits/rejected": -3.7866082191467285, |
|
"logps/chosen": -269.1673278808594, |
|
"logps/rejected": -379.8326416015625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.563244581222534, |
|
"rewards/margins": 9.162829399108887, |
|
"rewards/rejected": -6.599585056304932, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9780219780219782e-06, |
|
"logits/chosen": -4.392153263092041, |
|
"logits/rejected": -4.061153411865234, |
|
"logps/chosen": -323.526611328125, |
|
"logps/rejected": -459.18817138671875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.4470367431640625, |
|
"rewards/margins": 9.656204223632812, |
|
"rewards/rejected": -7.20916748046875, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.032967032967033e-06, |
|
"logits/chosen": -4.585270881652832, |
|
"logits/rejected": -3.468134641647339, |
|
"logps/chosen": -314.48748779296875, |
|
"logps/rejected": -404.564453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.6204469203948975, |
|
"rewards/margins": 10.225598335266113, |
|
"rewards/rejected": -7.605151653289795, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.0879120879120883e-06, |
|
"logits/chosen": -4.52483606338501, |
|
"logits/rejected": -3.6769773960113525, |
|
"logps/chosen": -283.4615173339844, |
|
"logps/rejected": -368.316650390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.3265349864959717, |
|
"rewards/margins": 9.931375503540039, |
|
"rewards/rejected": -7.604840278625488, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.1428571428571427e-06, |
|
"logits/chosen": -4.958460330963135, |
|
"logits/rejected": -3.5075302124023438, |
|
"logps/chosen": -1548.0390625, |
|
"logps/rejected": -430.23260498046875, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.080407738685608, |
|
"rewards/margins": 5.01118803024292, |
|
"rewards/rejected": -3.9307801723480225, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.197802197802198e-06, |
|
"logits/chosen": -4.469013690948486, |
|
"logits/rejected": -3.7112741470336914, |
|
"logps/chosen": -329.34527587890625, |
|
"logps/rejected": -353.8309326171875, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.11572265625, |
|
"rewards/margins": 8.09234619140625, |
|
"rewards/rejected": -4.97662353515625, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.252747252747253e-06, |
|
"logits/chosen": -4.559028625488281, |
|
"logits/rejected": -3.685147523880005, |
|
"logps/chosen": -350.5050048828125, |
|
"logps/rejected": -432.88720703125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.776409864425659, |
|
"rewards/margins": 11.043191909790039, |
|
"rewards/rejected": -8.2667818069458, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.307692307692308e-06, |
|
"logits/chosen": -4.4963531494140625, |
|
"logits/rejected": -3.1627519130706787, |
|
"logps/chosen": -553.4663696289062, |
|
"logps/rejected": -415.58251953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.3123841285705566, |
|
"rewards/margins": 13.567768096923828, |
|
"rewards/rejected": -10.255383491516113, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.362637362637363e-06, |
|
"logits/chosen": -4.479090690612793, |
|
"logits/rejected": -3.9051225185394287, |
|
"logps/chosen": -281.47723388671875, |
|
"logps/rejected": -345.96337890625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.1858582496643066, |
|
"rewards/margins": 7.392383098602295, |
|
"rewards/rejected": -5.206524848937988, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.4175824175824177e-06, |
|
"logits/chosen": -4.356232166290283, |
|
"logits/rejected": -3.9175236225128174, |
|
"logps/chosen": -362.4033203125, |
|
"logps/rejected": -447.981201171875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4192932844161987, |
|
"rewards/margins": 9.018689155578613, |
|
"rewards/rejected": -7.599395751953125, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.472527472527473e-06, |
|
"logits/chosen": -4.230052471160889, |
|
"logits/rejected": -3.6365554332733154, |
|
"logps/chosen": -336.6274108886719, |
|
"logps/rejected": -369.9189453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.9184234142303467, |
|
"rewards/margins": 10.501449584960938, |
|
"rewards/rejected": -7.58302640914917, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.5274725274725274e-06, |
|
"logits/chosen": -4.472602367401123, |
|
"logits/rejected": -3.738085985183716, |
|
"logps/chosen": -372.7972717285156, |
|
"logps/rejected": -331.42022705078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.7339019775390625, |
|
"rewards/margins": 10.597005844116211, |
|
"rewards/rejected": -7.863104343414307, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.582417582417583e-06, |
|
"logits/chosen": -4.685428619384766, |
|
"logits/rejected": -3.7145485877990723, |
|
"logps/chosen": -326.884521484375, |
|
"logps/rejected": -369.75830078125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3666748106479645, |
|
"rewards/margins": 8.955459594726562, |
|
"rewards/rejected": -8.588785171508789, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.6373626373626375e-06, |
|
"logits/chosen": -4.467536926269531, |
|
"logits/rejected": -3.8350751399993896, |
|
"logps/chosen": -312.74237060546875, |
|
"logps/rejected": -340.1723937988281, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.017587423324585, |
|
"rewards/margins": 10.204119682312012, |
|
"rewards/rejected": -7.186532497406006, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.6923076923076923e-06, |
|
"logits/chosen": -4.286996364593506, |
|
"logits/rejected": -3.920314073562622, |
|
"logps/chosen": -331.720703125, |
|
"logps/rejected": -453.9488830566406, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.2590577602386475, |
|
"rewards/margins": 13.077414512634277, |
|
"rewards/rejected": -9.81835651397705, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.7472527472527476e-06, |
|
"logits/chosen": -4.573817729949951, |
|
"logits/rejected": -4.573817729949951, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.8021978021978024e-06, |
|
"logits/chosen": -4.560126781463623, |
|
"logits/rejected": -3.358957529067993, |
|
"logps/chosen": -505.0814208984375, |
|
"logps/rejected": -392.2349853515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.12652587890625, |
|
"rewards/margins": 11.479181289672852, |
|
"rewards/rejected": -9.352655410766602, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"logits/chosen": -4.519628047943115, |
|
"logits/rejected": -3.72153902053833, |
|
"logps/chosen": -297.31982421875, |
|
"logps/rejected": -359.1614990234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.254974365234375, |
|
"rewards/margins": 9.919842720031738, |
|
"rewards/rejected": -7.664868354797363, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.9120879120879125e-06, |
|
"logits/chosen": -4.356388568878174, |
|
"logits/rejected": -3.934237241744995, |
|
"logps/chosen": -305.0617370605469, |
|
"logps/rejected": -370.7964172363281, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.9260528087615967, |
|
"rewards/margins": 10.116022109985352, |
|
"rewards/rejected": -7.189969062805176, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.9670329670329673e-06, |
|
"logits/chosen": -4.416037082672119, |
|
"logits/rejected": -4.145164489746094, |
|
"logps/chosen": -205.00186157226562, |
|
"logps/rejected": -364.52728271484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.6028244495391846, |
|
"rewards/margins": 10.653569221496582, |
|
"rewards/rejected": -8.050745010375977, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.021978021978022e-06, |
|
"logits/chosen": -4.462647438049316, |
|
"logits/rejected": -3.762413501739502, |
|
"logps/chosen": -300.01239013671875, |
|
"logps/rejected": -449.92999267578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.1943695545196533, |
|
"rewards/margins": 13.079874038696289, |
|
"rewards/rejected": -9.885504722595215, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.0769230769230774e-06, |
|
"logits/chosen": -4.499337196350098, |
|
"logits/rejected": -3.3283798694610596, |
|
"logps/chosen": -415.0854797363281, |
|
"logps/rejected": -453.6877136230469, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.19559645652771, |
|
"rewards/margins": 13.638141632080078, |
|
"rewards/rejected": -10.442544937133789, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.1318681318681323e-06, |
|
"logits/chosen": -4.473818778991699, |
|
"logits/rejected": -3.3775250911712646, |
|
"logps/chosen": -632.7835083007812, |
|
"logps/rejected": -332.44622802734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.4099366664886475, |
|
"rewards/margins": 10.282444953918457, |
|
"rewards/rejected": -7.872508525848389, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.1868131868131867e-06, |
|
"logits/chosen": -4.390290260314941, |
|
"logits/rejected": -3.326557159423828, |
|
"logps/chosen": -386.25701904296875, |
|
"logps/rejected": -294.870361328125, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.3045289516448975, |
|
"rewards/margins": 8.1837739944458, |
|
"rewards/rejected": -5.879245281219482, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.2417582417582424e-06, |
|
"logits/chosen": -4.46834135055542, |
|
"logits/rejected": -4.094131946563721, |
|
"logps/chosen": -334.70654296875, |
|
"logps/rejected": -451.2972412109375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.4573609828948975, |
|
"rewards/margins": 12.338126182556152, |
|
"rewards/rejected": -9.880764961242676, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.2967032967032968e-06, |
|
"logits/chosen": -4.594233989715576, |
|
"logits/rejected": -3.5783090591430664, |
|
"logps/chosen": -278.1133117675781, |
|
"logps/rejected": -505.69195556640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.8014190196990967, |
|
"rewards/margins": 13.821588516235352, |
|
"rewards/rejected": -11.020169258117676, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.3516483516483516e-06, |
|
"logits/chosen": -4.607097625732422, |
|
"logits/rejected": -3.978281021118164, |
|
"logps/chosen": -312.921142578125, |
|
"logps/rejected": -342.41351318359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.903662085533142, |
|
"rewards/margins": 10.762219429016113, |
|
"rewards/rejected": -8.85855770111084, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.406593406593407e-06, |
|
"logits/chosen": -4.4642014503479, |
|
"logits/rejected": -3.5378057956695557, |
|
"logps/chosen": -488.7149658203125, |
|
"logps/rejected": -365.5252380371094, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4717254638671875, |
|
"rewards/margins": 9.707379341125488, |
|
"rewards/rejected": -8.2356538772583, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.4615384615384617e-06, |
|
"logits/chosen": -4.361936569213867, |
|
"logits/rejected": -4.100497245788574, |
|
"logps/chosen": -287.7290344238281, |
|
"logps/rejected": -451.6803283691406, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.551348924636841, |
|
"rewards/margins": 13.295758247375488, |
|
"rewards/rejected": -10.744409561157227, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.516483516483517e-06, |
|
"logits/chosen": -4.3527302742004395, |
|
"logits/rejected": -3.811634063720703, |
|
"logps/chosen": -360.6308898925781, |
|
"logps/rejected": -448.3175354003906, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.832226514816284, |
|
"rewards/margins": 12.640280723571777, |
|
"rewards/rejected": -8.808053970336914, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.5714285714285718e-06, |
|
"logits/chosen": -4.550887584686279, |
|
"logits/rejected": -4.550887584686279, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.6263736263736266e-06, |
|
"logits/chosen": -4.38459587097168, |
|
"logits/rejected": -3.529341697692871, |
|
"logps/chosen": -330.7218017578125, |
|
"logps/rejected": -371.4253234863281, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.72430419921875, |
|
"rewards/margins": 10.868155479431152, |
|
"rewards/rejected": -9.143851280212402, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.681318681318682e-06, |
|
"logits/chosen": -4.437158584594727, |
|
"logits/rejected": -4.42078971862793, |
|
"logps/chosen": -243.7357177734375, |
|
"logps/rejected": -423.0533447265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.9866089820861816, |
|
"rewards/margins": 12.251775741577148, |
|
"rewards/rejected": -9.265167236328125, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.7362637362637367e-06, |
|
"logits/chosen": -4.38008451461792, |
|
"logits/rejected": -3.6854677200317383, |
|
"logps/chosen": -255.04901123046875, |
|
"logps/rejected": -424.14111328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.8715972900390625, |
|
"rewards/margins": 14.87622356414795, |
|
"rewards/rejected": -11.004626274108887, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.7912087912087915e-06, |
|
"logits/chosen": -4.497178077697754, |
|
"logits/rejected": -3.6329329013824463, |
|
"logps/chosen": -349.28289794921875, |
|
"logps/rejected": -381.4493408203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.124981641769409, |
|
"rewards/margins": 10.515064239501953, |
|
"rewards/rejected": -8.390082359313965, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": -4.524789333343506, |
|
"logits/rejected": -3.4265801906585693, |
|
"logps/chosen": -365.79644775390625, |
|
"logps/rejected": -475.97869873046875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.8129639625549316, |
|
"rewards/margins": 14.072290420532227, |
|
"rewards/rejected": -11.259325981140137, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.901098901098901e-06, |
|
"logits/chosen": -4.497722625732422, |
|
"logits/rejected": -3.3662238121032715, |
|
"logps/chosen": -472.6344909667969, |
|
"logps/rejected": -421.3455505371094, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.5328400135040283, |
|
"rewards/margins": 12.811511039733887, |
|
"rewards/rejected": -9.278671264648438, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.9560439560439565e-06, |
|
"logits/chosen": -4.341458797454834, |
|
"logits/rejected": -3.5533199310302734, |
|
"logps/chosen": -340.4435119628906, |
|
"logps/rejected": -364.519775390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.8503997325897217, |
|
"rewards/margins": 11.756946563720703, |
|
"rewards/rejected": -8.906546592712402, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.010989010989012e-06, |
|
"logits/chosen": -4.497629165649414, |
|
"logits/rejected": -4.373428821563721, |
|
"logps/chosen": -253.01666259765625, |
|
"logps/rejected": -411.61370849609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.3009033203125, |
|
"rewards/margins": 12.612372398376465, |
|
"rewards/rejected": -9.311469078063965, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.065934065934066e-06, |
|
"logits/chosen": -4.67010498046875, |
|
"logits/rejected": -4.221648216247559, |
|
"logps/chosen": -267.68695068359375, |
|
"logps/rejected": -406.31768798828125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.4193084239959717, |
|
"rewards/margins": 8.557241439819336, |
|
"rewards/rejected": -5.137933254241943, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.120879120879121e-06, |
|
"logits/chosen": -4.276376724243164, |
|
"logits/rejected": -3.5325844287872314, |
|
"logps/chosen": -359.96893310546875, |
|
"logps/rejected": -416.0820617675781, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.9439544677734375, |
|
"rewards/margins": 13.037528038024902, |
|
"rewards/rejected": -10.093573570251465, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.175824175824177e-06, |
|
"logits/chosen": -4.5093512535095215, |
|
"logits/rejected": -4.4456963539123535, |
|
"logps/chosen": -192.96502685546875, |
|
"logps/rejected": -362.227783203125, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3808960020542145, |
|
"rewards/margins": 7.196664810180664, |
|
"rewards/rejected": -6.815768718719482, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.230769230769231e-06, |
|
"logits/chosen": -4.7614617347717285, |
|
"logits/rejected": -4.761462211608887, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.2857142857142855e-06, |
|
"logits/chosen": -4.45823860168457, |
|
"logits/rejected": -3.9341721534729004, |
|
"logps/chosen": -418.939697265625, |
|
"logps/rejected": -429.19378662109375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.361285448074341, |
|
"rewards/margins": 14.282876968383789, |
|
"rewards/rejected": -11.921591758728027, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.340659340659341e-06, |
|
"logits/chosen": -4.4410505294799805, |
|
"logits/rejected": -4.185164928436279, |
|
"logps/chosen": -232.5185546875, |
|
"logps/rejected": -447.5303649902344, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.2920806407928467, |
|
"rewards/margins": 13.843243598937988, |
|
"rewards/rejected": -11.551162719726562, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.395604395604396e-06, |
|
"logits/chosen": -4.416217803955078, |
|
"logits/rejected": -3.6590464115142822, |
|
"logps/chosen": -433.36480712890625, |
|
"logps/rejected": -375.2690734863281, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0846405029296875, |
|
"rewards/margins": 7.43996000289917, |
|
"rewards/rejected": -6.355319499969482, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.45054945054945e-06, |
|
"logits/chosen": -4.3372907638549805, |
|
"logits/rejected": -4.462950229644775, |
|
"logps/chosen": -397.9360656738281, |
|
"logps/rejected": -463.2792663574219, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9072632193565369, |
|
"rewards/margins": 6.424984931945801, |
|
"rewards/rejected": -7.332248210906982, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.505494505494506e-06, |
|
"logits/chosen": -4.51932954788208, |
|
"logits/rejected": -3.1768529415130615, |
|
"logps/chosen": -661.5889282226562, |
|
"logps/rejected": -380.08734130859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3720276355743408, |
|
"rewards/margins": 14.595202445983887, |
|
"rewards/rejected": -13.223175048828125, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.560439560439561e-06, |
|
"logits/chosen": -4.376767158508301, |
|
"logits/rejected": -3.4610347747802734, |
|
"logps/chosen": -433.4891357421875, |
|
"logps/rejected": -476.48748779296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.76708984375, |
|
"rewards/margins": 15.365961074829102, |
|
"rewards/rejected": -13.598871231079102, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.615384615384616e-06, |
|
"logits/chosen": -4.398746490478516, |
|
"logits/rejected": -3.6896204948425293, |
|
"logps/chosen": -230.8444061279297, |
|
"logps/rejected": -394.7328796386719, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.5491745471954346, |
|
"rewards/margins": 15.824689865112305, |
|
"rewards/rejected": -13.27551555633545, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.6703296703296706e-06, |
|
"logits/chosen": -4.4249114990234375, |
|
"logits/rejected": -3.499051570892334, |
|
"logps/chosen": -363.8670349121094, |
|
"logps/rejected": -453.15826416015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.3732330799102783, |
|
"rewards/margins": 15.843539237976074, |
|
"rewards/rejected": -13.470306396484375, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.725274725274726e-06, |
|
"logits/chosen": -4.579809665679932, |
|
"logits/rejected": -3.963841199874878, |
|
"logps/chosen": -376.39263916015625, |
|
"logps/rejected": -475.49981689453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1896270513534546, |
|
"rewards/margins": 17.53704261779785, |
|
"rewards/rejected": -16.347415924072266, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.780219780219781e-06, |
|
"logits/chosen": -4.374417304992676, |
|
"logits/rejected": -4.248178005218506, |
|
"logps/chosen": -194.196533203125, |
|
"logps/rejected": -417.26776123046875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.190634250640869, |
|
"rewards/margins": 15.594205856323242, |
|
"rewards/rejected": -13.403571128845215, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.8351648351648355e-06, |
|
"logits/chosen": -4.6437482833862305, |
|
"logits/rejected": -3.73307728767395, |
|
"logps/chosen": -413.7351379394531, |
|
"logps/rejected": -395.78302001953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.820913791656494, |
|
"rewards/margins": 16.67730712890625, |
|
"rewards/rejected": -12.856393814086914, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.890109890109891e-06, |
|
"logits/chosen": -4.21191930770874, |
|
"logits/rejected": -3.5639426708221436, |
|
"logps/chosen": -447.6401062011719, |
|
"logps/rejected": -457.892578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4168548583984375, |
|
"rewards/margins": 14.87121868133545, |
|
"rewards/rejected": -13.454363822937012, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.945054945054946e-06, |
|
"logits/chosen": -4.551489353179932, |
|
"logits/rejected": -4.471848964691162, |
|
"logps/chosen": -240.43063354492188, |
|
"logps/rejected": -433.2778625488281, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8902206420898438, |
|
"rewards/margins": 14.926170349121094, |
|
"rewards/rejected": -14.03594970703125, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 5e-06, |
|
"logits/chosen": -4.397589206695557, |
|
"logits/rejected": -4.0388078689575195, |
|
"logps/chosen": -348.8073425292969, |
|
"logps/rejected": -452.66693115234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.624746799468994, |
|
"rewards/margins": 15.00168228149414, |
|
"rewards/rejected": -11.376935005187988, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.999981426489179e-06, |
|
"logits/chosen": -4.5778608322143555, |
|
"logits/rejected": -3.8527991771698, |
|
"logps/chosen": -359.4041748046875, |
|
"logps/rejected": -409.2432861328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.038177490234375, |
|
"rewards/margins": 16.7247314453125, |
|
"rewards/rejected": -13.686553955078125, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.999925706232695e-06, |
|
"logits/chosen": -4.530111312866211, |
|
"logits/rejected": -4.155897617340088, |
|
"logps/chosen": -234.99557495117188, |
|
"logps/rejected": -426.90765380859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.2188384532928467, |
|
"rewards/margins": 14.525558471679688, |
|
"rewards/rejected": -11.306719779968262, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.9998328400584864e-06, |
|
"logits/chosen": -4.477783679962158, |
|
"logits/rejected": -4.133589267730713, |
|
"logps/chosen": -248.23715209960938, |
|
"logps/rejected": -445.22174072265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.41404128074646, |
|
"rewards/margins": 16.36020851135254, |
|
"rewards/rejected": -12.9461669921875, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.999702829346432e-06, |
|
"logits/chosen": -4.469297885894775, |
|
"logits/rejected": -4.458068370819092, |
|
"logps/chosen": -243.61090087890625, |
|
"logps/rejected": -386.2374267578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7676605582237244, |
|
"rewards/margins": 11.3004789352417, |
|
"rewards/rejected": -10.532818794250488, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.999535676028338e-06, |
|
"logits/chosen": -4.551413536071777, |
|
"logits/rejected": -3.6586270332336426, |
|
"logps/chosen": -347.52716064453125, |
|
"logps/rejected": -452.22930908203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.1838836669921875, |
|
"rewards/margins": 17.132701873779297, |
|
"rewards/rejected": -14.948819160461426, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.999331382587901e-06, |
|
"logits/chosen": -4.324283599853516, |
|
"logits/rejected": -3.1241581439971924, |
|
"logps/chosen": -483.8208923339844, |
|
"logps/rejected": -419.9326171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6294128894805908, |
|
"rewards/margins": 13.845037460327148, |
|
"rewards/rejected": -12.215624809265137, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.999089952060681e-06, |
|
"logits/chosen": -4.464879035949707, |
|
"logits/rejected": -3.448148488998413, |
|
"logps/chosen": -426.8560485839844, |
|
"logps/rejected": -436.4939270019531, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.1339569091796875, |
|
"rewards/margins": 15.621515274047852, |
|
"rewards/rejected": -13.487558364868164, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.998811388034046e-06, |
|
"logits/chosen": -4.5417256355285645, |
|
"logits/rejected": -4.166463851928711, |
|
"logps/chosen": -389.265625, |
|
"logps/rejected": -494.1796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.934008836746216, |
|
"rewards/margins": 20.474946975708008, |
|
"rewards/rejected": -17.540937423706055, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.998495694647127e-06, |
|
"logits/chosen": -4.398112773895264, |
|
"logits/rejected": -4.488321781158447, |
|
"logps/chosen": -314.2387390136719, |
|
"logps/rejected": -540.993408203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8422698974609375, |
|
"rewards/margins": 17.285541534423828, |
|
"rewards/rejected": -15.443270683288574, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.998142876590749e-06, |
|
"logits/chosen": -4.43848180770874, |
|
"logits/rejected": -3.8401870727539062, |
|
"logps/chosen": -357.3565673828125, |
|
"logps/rejected": -495.7008972167969, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.558154284954071, |
|
"rewards/margins": 18.115083694458008, |
|
"rewards/rejected": -17.556928634643555, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.997752939107372e-06, |
|
"logits/chosen": -4.379143714904785, |
|
"logits/rejected": -2.8701581954956055, |
|
"logps/chosen": -384.65484619140625, |
|
"logps/rejected": -369.762939453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.203588843345642, |
|
"rewards/margins": 10.755364418029785, |
|
"rewards/rejected": -9.551775932312012, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.997325887990999e-06, |
|
"logits/chosen": -4.355114936828613, |
|
"logits/rejected": -3.516786575317383, |
|
"logps/chosen": -395.44866943359375, |
|
"logps/rejected": -483.1910095214844, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.061553955078125, |
|
"rewards/margins": 16.13475799560547, |
|
"rewards/rejected": -14.073203086853027, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.996861729587103e-06, |
|
"logits/chosen": -4.370433330535889, |
|
"logits/rejected": -3.369537353515625, |
|
"logps/chosen": -388.32037353515625, |
|
"logps/rejected": -497.56427001953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.486157178878784, |
|
"rewards/margins": 18.97348403930664, |
|
"rewards/rejected": -15.487326622009277, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.996360470792524e-06, |
|
"logits/chosen": -4.528186321258545, |
|
"logits/rejected": -3.8962700366973877, |
|
"logps/chosen": -284.3951110839844, |
|
"logps/rejected": -496.885009765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.5079925060272217, |
|
"rewards/margins": 17.01309585571289, |
|
"rewards/rejected": -13.50510311126709, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.9958221190553705e-06, |
|
"logits/chosen": -4.446348190307617, |
|
"logits/rejected": -4.065671443939209, |
|
"logps/chosen": -374.7364501953125, |
|
"logps/rejected": -565.4706420898438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.9700074195861816, |
|
"rewards/margins": 15.873197555541992, |
|
"rewards/rejected": -11.903189659118652, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.9952466823749076e-06, |
|
"logits/chosen": -4.366666316986084, |
|
"logits/rejected": -3.5727248191833496, |
|
"logps/chosen": -303.04241943359375, |
|
"logps/rejected": -456.4177551269531, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.866119384765625, |
|
"rewards/margins": 15.641772270202637, |
|
"rewards/rejected": -13.775652885437012, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.994634169301439e-06, |
|
"logits/chosen": -4.417354583740234, |
|
"logits/rejected": -4.194676399230957, |
|
"logps/chosen": -246.4547576904297, |
|
"logps/rejected": -482.65960693359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.9938981533050537, |
|
"rewards/margins": 16.903947830200195, |
|
"rewards/rejected": -13.910049438476562, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.99398458893618e-06, |
|
"logits/chosen": -4.699535846710205, |
|
"logits/rejected": -4.523982524871826, |
|
"logps/chosen": -260.97991943359375, |
|
"logps/rejected": -504.26416015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6771606802940369, |
|
"rewards/margins": 14.61356258392334, |
|
"rewards/rejected": -15.290722846984863, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.993297950931121e-06, |
|
"logits/chosen": -4.534379005432129, |
|
"logits/rejected": -3.2884504795074463, |
|
"logps/chosen": -392.5225830078125, |
|
"logps/rejected": -483.8066101074219, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4821900129318237, |
|
"rewards/margins": 17.772390365600586, |
|
"rewards/rejected": -16.29020118713379, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.992574265488883e-06, |
|
"logits/chosen": -4.4783616065979, |
|
"logits/rejected": -3.9488227367401123, |
|
"logps/chosen": -268.69671630859375, |
|
"logps/rejected": -445.88934326171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.969137668609619, |
|
"rewards/margins": 18.754684448242188, |
|
"rewards/rejected": -14.785547256469727, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.991813543362572e-06, |
|
"logits/chosen": -4.584895610809326, |
|
"logits/rejected": -3.91094970703125, |
|
"logps/chosen": -306.7088623046875, |
|
"logps/rejected": -446.6007995605469, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.993066430091858, |
|
"rewards/margins": 13.759903907775879, |
|
"rewards/rejected": -11.766837120056152, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.991015795855611e-06, |
|
"logits/chosen": -4.340481281280518, |
|
"logits/rejected": -3.854095935821533, |
|
"logps/chosen": -297.0914001464844, |
|
"logps/rejected": -418.73492431640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8690094351768494, |
|
"rewards/margins": 13.999166488647461, |
|
"rewards/rejected": -13.130157470703125, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.990181034821578e-06, |
|
"logits/chosen": -4.376932144165039, |
|
"logits/rejected": -3.738924264907837, |
|
"logps/chosen": -426.25537109375, |
|
"logps/rejected": -413.3848876953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9265228509902954, |
|
"rewards/margins": 14.493753433227539, |
|
"rewards/rejected": -12.567230224609375, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.989309272664026e-06, |
|
"logits/chosen": -4.558651924133301, |
|
"logits/rejected": -3.5922670364379883, |
|
"logps/chosen": -469.27423095703125, |
|
"logps/rejected": -514.3047485351562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.9566650390625, |
|
"rewards/margins": 17.330196380615234, |
|
"rewards/rejected": -14.37353229522705, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.988400522336304e-06, |
|
"logits/chosen": -4.404550075531006, |
|
"logits/rejected": -3.672477960586548, |
|
"logps/chosen": -275.354248046875, |
|
"logps/rejected": -364.86968994140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3228882551193237, |
|
"rewards/margins": 13.743850708007812, |
|
"rewards/rejected": -12.4209623336792, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.987454797341358e-06, |
|
"logits/chosen": -4.511158466339111, |
|
"logits/rejected": -3.3894896507263184, |
|
"logps/chosen": -320.03155517578125, |
|
"logps/rejected": -423.61590576171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.1824281215667725, |
|
"rewards/margins": 14.258829116821289, |
|
"rewards/rejected": -12.076400756835938, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.986472111731536e-06, |
|
"logits/chosen": -4.6026153564453125, |
|
"logits/rejected": -4.254034996032715, |
|
"logps/chosen": -234.12818908691406, |
|
"logps/rejected": -384.76104736328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7228440046310425, |
|
"rewards/margins": 12.307738304138184, |
|
"rewards/rejected": -10.584894180297852, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.985452480108376e-06, |
|
"logits/chosen": -4.389405727386475, |
|
"logits/rejected": -4.311841011047363, |
|
"logps/chosen": -345.70697021484375, |
|
"logps/rejected": -424.91094970703125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3869384825229645, |
|
"rewards/margins": 11.403149604797363, |
|
"rewards/rejected": -11.01621150970459, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.984395917622387e-06, |
|
"logits/chosen": -4.487508296966553, |
|
"logits/rejected": -3.7850193977355957, |
|
"logps/chosen": -354.9103698730469, |
|
"logps/rejected": -464.49505615234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3494720458984375, |
|
"rewards/margins": 15.737408638000488, |
|
"rewards/rejected": -15.38793659210205, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.9833024399728295e-06, |
|
"logits/chosen": -4.406660079956055, |
|
"logits/rejected": -3.810431718826294, |
|
"logps/chosen": -335.26019287109375, |
|
"logps/rejected": -485.70513916015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.31449586153030396, |
|
"rewards/margins": 15.32269287109375, |
|
"rewards/rejected": -15.008196830749512, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.982172063407479e-06, |
|
"logits/chosen": -4.340198993682861, |
|
"logits/rejected": -3.715421199798584, |
|
"logps/chosen": -288.08349609375, |
|
"logps/rejected": -427.89544677734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.296283006668091, |
|
"rewards/margins": 17.811279296875, |
|
"rewards/rejected": -15.514996528625488, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.981004804722384e-06, |
|
"logits/chosen": -4.355514049530029, |
|
"logits/rejected": -4.320943355560303, |
|
"logps/chosen": -344.5791320800781, |
|
"logps/rejected": -434.83441162109375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7778748273849487, |
|
"rewards/margins": 15.279544830322266, |
|
"rewards/rejected": -13.501669883728027, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.979800681261619e-06, |
|
"logits/chosen": -4.434340476989746, |
|
"logits/rejected": -3.5260894298553467, |
|
"logps/chosen": -304.6591491699219, |
|
"logps/rejected": -368.31622314453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.424444556236267, |
|
"rewards/margins": 12.992469787597656, |
|
"rewards/rejected": -11.568025588989258, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.978559710917024e-06, |
|
"logits/chosen": -4.621590614318848, |
|
"logits/rejected": -4.356657028198242, |
|
"logps/chosen": -253.44842529296875, |
|
"logps/rejected": -314.619140625, |
|
"loss": 0.0065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.195851132273674, |
|
"rewards/margins": 5.029829502105713, |
|
"rewards/rejected": -4.833978176116943, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.9772819121279395e-06, |
|
"logits/chosen": -4.356565475463867, |
|
"logits/rejected": -4.380559921264648, |
|
"logps/chosen": -248.25204467773438, |
|
"logps/rejected": -454.09014892578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.265228271484375, |
|
"rewards/margins": 15.274236679077148, |
|
"rewards/rejected": -16.539464950561523, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.975967303880933e-06, |
|
"logits/chosen": -4.506335258483887, |
|
"logits/rejected": -3.854314088821411, |
|
"logps/chosen": -328.2158508300781, |
|
"logps/rejected": -472.5008239746094, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1369415521621704, |
|
"rewards/margins": 17.064838409423828, |
|
"rewards/rejected": -18.201780319213867, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.974615905709518e-06, |
|
"logits/chosen": -4.472858905792236, |
|
"logits/rejected": -3.3297882080078125, |
|
"logps/chosen": -406.04315185546875, |
|
"logps/rejected": -437.1851501464844, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.828503429889679, |
|
"rewards/margins": 17.225427627563477, |
|
"rewards/rejected": -16.39692497253418, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.973227737693858e-06, |
|
"logits/chosen": -4.501963138580322, |
|
"logits/rejected": -4.339019298553467, |
|
"logps/chosen": -290.47528076171875, |
|
"logps/rejected": -569.775146484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.762200951576233, |
|
"rewards/margins": 17.014074325561523, |
|
"rewards/rejected": -18.776275634765625, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.971802820460481e-06, |
|
"logits/chosen": -4.366243839263916, |
|
"logits/rejected": -3.672136068344116, |
|
"logps/chosen": -432.7600402832031, |
|
"logps/rejected": -490.27880859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8038910031318665, |
|
"rewards/margins": 19.19203758239746, |
|
"rewards/rejected": -18.388147354125977, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.970341175181957e-06, |
|
"logits/chosen": -4.175065040588379, |
|
"logits/rejected": -3.344090461730957, |
|
"logps/chosen": -656.5657958984375, |
|
"logps/rejected": -555.3231201171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.54205322265625, |
|
"rewards/margins": 20.33514404296875, |
|
"rewards/rejected": -20.877197265625, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.968842823576592e-06, |
|
"logits/chosen": -4.238167762756348, |
|
"logits/rejected": -3.387505054473877, |
|
"logps/chosen": -430.23291015625, |
|
"logps/rejected": -489.0005798339844, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.47160646319389343, |
|
"rewards/margins": 17.979446411132812, |
|
"rewards/rejected": -18.451053619384766, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.967307787908108e-06, |
|
"logits/chosen": -4.278386116027832, |
|
"logits/rejected": -3.415719509124756, |
|
"logps/chosen": -294.67205810546875, |
|
"logps/rejected": -477.9254150390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18892823159694672, |
|
"rewards/margins": 17.483989715576172, |
|
"rewards/rejected": -17.67291831970215, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.965736090985305e-06, |
|
"logits/chosen": -4.284654140472412, |
|
"logits/rejected": -3.7332231998443604, |
|
"logps/chosen": -395.2005615234375, |
|
"logps/rejected": -559.507568359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7245513796806335, |
|
"rewards/margins": 21.60517692565918, |
|
"rewards/rejected": -20.880624771118164, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.964127756161727e-06, |
|
"logits/chosen": -4.413480758666992, |
|
"logits/rejected": -3.4410207271575928, |
|
"logps/chosen": -453.9344177246094, |
|
"logps/rejected": -478.32720947265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7942901849746704, |
|
"rewards/margins": 19.258975982666016, |
|
"rewards/rejected": -18.464685440063477, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.9624828073353144e-06, |
|
"logits/chosen": -4.612053871154785, |
|
"logits/rejected": -3.6660573482513428, |
|
"logps/chosen": -454.968994140625, |
|
"logps/rejected": -491.28668212890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.370809942483902, |
|
"rewards/margins": 17.405982971191406, |
|
"rewards/rejected": -17.776792526245117, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.960801268948047e-06, |
|
"logits/chosen": -4.630027770996094, |
|
"logits/rejected": -3.9804086685180664, |
|
"logps/chosen": -334.5860290527344, |
|
"logps/rejected": -470.1821594238281, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2251189947128296, |
|
"rewards/margins": 15.139511108398438, |
|
"rewards/rejected": -13.914392471313477, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.959083165985581e-06, |
|
"logits/chosen": -4.38828706741333, |
|
"logits/rejected": -3.9800713062286377, |
|
"logps/chosen": -251.57284545898438, |
|
"logps/rejected": -336.22308349609375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5879532098770142, |
|
"rewards/margins": 8.483784675598145, |
|
"rewards/rejected": -9.071738243103027, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.957328523976879e-06, |
|
"logits/chosen": -4.331254482269287, |
|
"logits/rejected": -3.744624376296997, |
|
"logps/chosen": -555.8796997070312, |
|
"logps/rejected": -509.46636962890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.13983154296875, |
|
"rewards/margins": 23.415903091430664, |
|
"rewards/rejected": -20.276071548461914, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.9555373689938325e-06, |
|
"logits/chosen": -4.3973493576049805, |
|
"logits/rejected": -3.3343820571899414, |
|
"logps/chosen": -488.8701171875, |
|
"logps/rejected": -539.1553344726562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8417510986328125, |
|
"rewards/margins": 21.32379722595215, |
|
"rewards/rejected": -20.482046127319336, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.95370972765087e-06, |
|
"logits/chosen": -3.7870357036590576, |
|
"logits/rejected": -4.47447395324707, |
|
"logps/chosen": -113.8028793334961, |
|
"logps/rejected": -345.9452819824219, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3848892152309418, |
|
"rewards/margins": 9.808587074279785, |
|
"rewards/rejected": -9.423697471618652, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.951845627104565e-06, |
|
"logits/chosen": -4.488361835479736, |
|
"logits/rejected": -4.143915176391602, |
|
"logps/chosen": -271.135498046875, |
|
"logps/rejected": -533.8534545898438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.818115234375, |
|
"rewards/margins": 22.837081909179688, |
|
"rewards/rejected": -21.018966674804688, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.9499450950532305e-06, |
|
"logits/chosen": -4.404188632965088, |
|
"logits/rejected": -3.7939138412475586, |
|
"logps/chosen": -502.817626953125, |
|
"logps/rejected": -596.62646484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.257251024246216, |
|
"rewards/margins": 10.3175048828125, |
|
"rewards/rejected": -13.574755668640137, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.948008159736507e-06, |
|
"logits/chosen": -4.459652423858643, |
|
"logits/rejected": -3.455385208129883, |
|
"logps/chosen": -459.57611083984375, |
|
"logps/rejected": -548.26806640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.31995850801467896, |
|
"rewards/margins": 21.58222198486328, |
|
"rewards/rejected": -21.902179718017578, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.9460348499349485e-06, |
|
"logits/chosen": -4.625619888305664, |
|
"logits/rejected": -3.69688081741333, |
|
"logps/chosen": -304.876953125, |
|
"logps/rejected": -458.91082763671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4879150390625, |
|
"rewards/margins": 19.09657859802246, |
|
"rewards/rejected": -19.58449363708496, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.944025194969586e-06, |
|
"logits/chosen": -4.478323459625244, |
|
"logits/rejected": -3.7782227993011475, |
|
"logps/chosen": -332.0870361328125, |
|
"logps/rejected": -599.0357666015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.6869843006134033, |
|
"rewards/margins": 25.880760192871094, |
|
"rewards/rejected": -23.193775177001953, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.941979224701499e-06, |
|
"logits/chosen": -4.613561153411865, |
|
"logits/rejected": -4.613561153411865, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.939896969531367e-06, |
|
"logits/chosen": -4.662393093109131, |
|
"logits/rejected": -4.662393093109131, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.937778460399022e-06, |
|
"logits/chosen": -4.4899797439575195, |
|
"logits/rejected": -3.9571731090545654, |
|
"logps/chosen": -311.52081298828125, |
|
"logps/rejected": -528.713134765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.377178907394409, |
|
"rewards/margins": 24.917617797851562, |
|
"rewards/rejected": -21.54043960571289, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.935623728782986e-06, |
|
"logits/chosen": -4.431125164031982, |
|
"logits/rejected": -3.994685173034668, |
|
"logps/chosen": -318.579833984375, |
|
"logps/rejected": -469.0091552734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.33842775225639343, |
|
"rewards/margins": 20.076812744140625, |
|
"rewards/rejected": -20.415241241455078, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.933432806700004e-06, |
|
"logits/chosen": -4.436576843261719, |
|
"logits/rejected": -3.4724605083465576, |
|
"logps/chosen": -499.44195556640625, |
|
"logps/rejected": -503.2141418457031, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4404419660568237, |
|
"rewards/margins": 17.919048309326172, |
|
"rewards/rejected": -16.478607177734375, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.931205726704566e-06, |
|
"logits/chosen": -4.575839519500732, |
|
"logits/rejected": -3.600397825241089, |
|
"logps/chosen": -335.92559814453125, |
|
"logps/rejected": -512.950439453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.44965821504592896, |
|
"rewards/margins": 20.810823440551758, |
|
"rewards/rejected": -20.36116600036621, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.928942521888431e-06, |
|
"logits/chosen": -4.4273834228515625, |
|
"logits/rejected": -3.1403005123138428, |
|
"logps/chosen": -641.9811401367188, |
|
"logps/rejected": -565.1820678710938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.003668213030323386, |
|
"rewards/margins": 20.497615814208984, |
|
"rewards/rejected": -20.493947982788086, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.926643225880123e-06, |
|
"logits/chosen": -4.4851250648498535, |
|
"logits/rejected": -3.762442111968994, |
|
"logps/chosen": -542.037353515625, |
|
"logps/rejected": -583.6822509765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6874328851699829, |
|
"rewards/margins": 22.295642852783203, |
|
"rewards/rejected": -21.60820960998535, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.924307872844444e-06, |
|
"logits/chosen": -4.5215559005737305, |
|
"logits/rejected": -3.9681222438812256, |
|
"logps/chosen": -452.0367736816406, |
|
"logps/rejected": -570.5191650390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5621551871299744, |
|
"rewards/margins": 24.197097778320312, |
|
"rewards/rejected": -23.63494300842285, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.921936497481956e-06, |
|
"logits/chosen": -4.452681541442871, |
|
"logits/rejected": -4.026414394378662, |
|
"logps/chosen": -238.1045379638672, |
|
"logps/rejected": -457.61431884765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.171229600906372, |
|
"rewards/margins": 20.798799514770508, |
|
"rewards/rejected": -18.6275691986084, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.919529135028473e-06, |
|
"logits/chosen": -4.6069817543029785, |
|
"logits/rejected": -4.021325588226318, |
|
"logps/chosen": -443.1712951660156, |
|
"logps/rejected": -596.0953979492188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.43758851289749146, |
|
"rewards/margins": 20.0040283203125, |
|
"rewards/rejected": -19.56644058227539, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.917085821254532e-06, |
|
"logits/chosen": -4.430685520172119, |
|
"logits/rejected": -3.408867835998535, |
|
"logps/chosen": -333.0233459472656, |
|
"logps/rejected": -514.176025390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3141602277755737, |
|
"rewards/margins": 21.7270450592041, |
|
"rewards/rejected": -20.412885665893555, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.914606592464865e-06, |
|
"logits/chosen": -4.614257335662842, |
|
"logits/rejected": -4.173254013061523, |
|
"logps/chosen": -392.7569274902344, |
|
"logps/rejected": -600.0775146484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3662140369415283, |
|
"rewards/margins": 26.564104080200195, |
|
"rewards/rejected": -25.19788932800293, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.91209148549786e-06, |
|
"logits/chosen": -4.470552444458008, |
|
"logits/rejected": -3.6645894050598145, |
|
"logps/chosen": -278.90374755859375, |
|
"logps/rejected": -585.414306640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.7279815673828125, |
|
"rewards/margins": 22.604557037353516, |
|
"rewards/rejected": -18.876575469970703, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.909540537725007e-06, |
|
"logits/chosen": -4.464867115020752, |
|
"logits/rejected": -3.679206609725952, |
|
"logps/chosen": -399.67230224609375, |
|
"logps/rejected": -459.1192932128906, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.3072266578674316, |
|
"rewards/margins": 20.884450912475586, |
|
"rewards/rejected": -17.577224731445312, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.906953787050354e-06, |
|
"logits/chosen": -4.506504535675049, |
|
"logits/rejected": -4.483677864074707, |
|
"logps/chosen": -212.6044921875, |
|
"logps/rejected": -577.6050415039062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.012289524078369, |
|
"rewards/margins": 21.269466400146484, |
|
"rewards/rejected": -19.257177352905273, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.904331271909932e-06, |
|
"logits/chosen": -4.631925106048584, |
|
"logits/rejected": -3.576526403427124, |
|
"logps/chosen": -348.4132995605469, |
|
"logps/rejected": -490.9519348144531, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7574676871299744, |
|
"rewards/margins": 21.938953399658203, |
|
"rewards/rejected": -21.181486129760742, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.901673031271194e-06, |
|
"logits/chosen": -4.463425159454346, |
|
"logits/rejected": -3.718756914138794, |
|
"logps/chosen": -464.51141357421875, |
|
"logps/rejected": -558.5987548828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.619091808795929, |
|
"rewards/margins": 22.998170852661133, |
|
"rewards/rejected": -22.379079818725586, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.898979104632427e-06, |
|
"logits/chosen": -4.307651996612549, |
|
"logits/rejected": -3.490633010864258, |
|
"logps/chosen": -389.7247314453125, |
|
"logps/rejected": -523.1450805664062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3679230213165283, |
|
"rewards/margins": 20.981260299682617, |
|
"rewards/rejected": -19.61333656311035, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.8962495320221714e-06, |
|
"logits/chosen": -4.304553031921387, |
|
"logits/rejected": -3.776304006576538, |
|
"logps/chosen": -323.0478515625, |
|
"logps/rejected": -520.6531982421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.1722412109375, |
|
"rewards/margins": 20.817508697509766, |
|
"rewards/rejected": -18.645267486572266, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.8934843539986266e-06, |
|
"logits/chosen": -4.417074680328369, |
|
"logits/rejected": -3.540210008621216, |
|
"logps/chosen": -322.0855407714844, |
|
"logps/rejected": -536.3630981445312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.3448638916015625, |
|
"rewards/margins": 22.750934600830078, |
|
"rewards/rejected": -20.406070709228516, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.890683611649041e-06, |
|
"logits/chosen": -4.454306125640869, |
|
"logits/rejected": -3.4941771030426025, |
|
"logps/chosen": -449.34722900390625, |
|
"logps/rejected": -584.3948974609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.965466320514679, |
|
"rewards/margins": 24.43212890625, |
|
"rewards/rejected": -23.466663360595703, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.887847346589111e-06, |
|
"logits/chosen": -4.528407096862793, |
|
"logits/rejected": -4.528407096862793, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.884975600962355e-06, |
|
"logits/chosen": -4.461303234100342, |
|
"logits/rejected": -3.4503564834594727, |
|
"logps/chosen": -350.125, |
|
"logps/rejected": -544.8951416015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8698486685752869, |
|
"rewards/margins": 22.143985748291016, |
|
"rewards/rejected": -21.274137496948242, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.8820684174394935e-06, |
|
"logits/chosen": -4.2601318359375, |
|
"logits/rejected": -3.9379310607910156, |
|
"logps/chosen": -276.892822265625, |
|
"logps/rejected": -565.9779052734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.50067138671875, |
|
"rewards/margins": 27.015552520751953, |
|
"rewards/rejected": -22.514881134033203, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.879125839217808e-06, |
|
"logits/chosen": -4.442978382110596, |
|
"logits/rejected": -4.165872097015381, |
|
"logps/chosen": -276.66558837890625, |
|
"logps/rejected": -485.2083435058594, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.557089328765869, |
|
"rewards/margins": 19.78055763244629, |
|
"rewards/rejected": -17.223468780517578, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.8761479100205085e-06, |
|
"logits/chosen": -4.185311794281006, |
|
"logits/rejected": -4.506956100463867, |
|
"logps/chosen": -302.510009765625, |
|
"logps/rejected": -368.14508056640625, |
|
"loss": 0.0401, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.349456787109375, |
|
"rewards/margins": 3.197528123855591, |
|
"rewards/rejected": -2.848071336746216, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.873134674096072e-06, |
|
"logits/chosen": -4.432115077972412, |
|
"logits/rejected": -4.062368392944336, |
|
"logps/chosen": -437.7923583984375, |
|
"logps/rejected": -577.2886962890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.143060207366943, |
|
"rewards/margins": 21.335046768188477, |
|
"rewards/rejected": -26.478107452392578, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.870086176217597e-06, |
|
"logits/chosen": -4.418601989746094, |
|
"logits/rejected": -4.349551677703857, |
|
"logps/chosen": -278.1993713378906, |
|
"logps/rejected": -625.385986328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.728192090988159, |
|
"rewards/margins": 23.60893440246582, |
|
"rewards/rejected": -27.337125778198242, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.867002461682129e-06, |
|
"logits/chosen": -4.347306251525879, |
|
"logits/rejected": -4.1640944480896, |
|
"logps/chosen": -397.3504638671875, |
|
"logps/rejected": -576.0902099609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7298157215118408, |
|
"rewards/margins": 20.283533096313477, |
|
"rewards/rejected": -22.013349533081055, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.863883576309991e-06, |
|
"logits/chosen": -4.444037437438965, |
|
"logits/rejected": -3.735938310623169, |
|
"logps/chosen": -439.25177001953125, |
|
"logps/rejected": -506.2371826171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.559460401535034, |
|
"rewards/margins": 16.74394416809082, |
|
"rewards/rejected": -20.303403854370117, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.860729566444106e-06, |
|
"logits/chosen": -4.453701972961426, |
|
"logits/rejected": -3.483694076538086, |
|
"logps/chosen": -435.145751953125, |
|
"logps/rejected": -411.5998840332031, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.5900208950042725, |
|
"rewards/margins": 14.451112747192383, |
|
"rewards/rejected": -18.041133880615234, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.857540478949302e-06, |
|
"logits/chosen": -4.345010757446289, |
|
"logits/rejected": -3.3772659301757812, |
|
"logps/chosen": -424.6817626953125, |
|
"logps/rejected": -536.5455322265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.265347480773926, |
|
"rewards/margins": 19.152793884277344, |
|
"rewards/rejected": -23.418142318725586, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.854316361211619e-06, |
|
"logits/chosen": -4.456699848175049, |
|
"logits/rejected": -4.032180309295654, |
|
"logps/chosen": -479.0955505371094, |
|
"logps/rejected": -602.84130859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.73911452293396, |
|
"rewards/margins": 17.996774673461914, |
|
"rewards/rejected": -21.735889434814453, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.851057261137608e-06, |
|
"logits/chosen": -4.340909481048584, |
|
"logits/rejected": -3.292670726776123, |
|
"logps/chosen": -462.56268310546875, |
|
"logps/rejected": -523.4163208007812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.709277391433716, |
|
"rewards/margins": 19.967500686645508, |
|
"rewards/rejected": -22.67677879333496, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.847763227153612e-06, |
|
"logits/chosen": -4.428055286407471, |
|
"logits/rejected": -3.5385830402374268, |
|
"logps/chosen": -422.267822265625, |
|
"logps/rejected": -485.8017578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.4887635707855225, |
|
"rewards/margins": 19.343963623046875, |
|
"rewards/rejected": -21.832727432250977, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.844434308205052e-06, |
|
"logits/chosen": -4.273122310638428, |
|
"logits/rejected": -3.7151196002960205, |
|
"logps/chosen": -546.6180419921875, |
|
"logps/rejected": -715.68896484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.0272216796875, |
|
"rewards/margins": 25.675113677978516, |
|
"rewards/rejected": -30.702335357666016, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.841070553755697e-06, |
|
"logits/chosen": -4.607006072998047, |
|
"logits/rejected": -4.249687671661377, |
|
"logps/chosen": -261.2702941894531, |
|
"logps/rejected": -548.1873779296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2804367542266846, |
|
"rewards/margins": 21.665143966674805, |
|
"rewards/rejected": -22.945581436157227, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.837672013786931e-06, |
|
"logits/chosen": -4.519053936004639, |
|
"logits/rejected": -4.209323883056641, |
|
"logps/chosen": -303.30255126953125, |
|
"logps/rejected": -551.4930419921875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.127401828765869, |
|
"rewards/margins": 21.955106735229492, |
|
"rewards/rejected": -25.082508087158203, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.8342387387970105e-06, |
|
"logits/chosen": -4.35404634475708, |
|
"logits/rejected": -3.9764857292175293, |
|
"logps/chosen": -331.4461669921875, |
|
"logps/rejected": -582.4991455078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.6374847888946533, |
|
"rewards/margins": 22.73183250427246, |
|
"rewards/rejected": -25.36931800842285, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.830770779800309e-06, |
|
"logits/chosen": -4.2908101081848145, |
|
"logits/rejected": -3.6881422996520996, |
|
"logps/chosen": -402.2869567871094, |
|
"logps/rejected": -572.225830078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.96708083152771, |
|
"rewards/margins": 22.76718521118164, |
|
"rewards/rejected": -25.73426628112793, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.827268188326567e-06, |
|
"logits/chosen": -4.4686408042907715, |
|
"logits/rejected": -3.9434425830841064, |
|
"logps/chosen": -478.5679626464844, |
|
"logps/rejected": -673.1861572265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.687936305999756, |
|
"rewards/margins": 21.72261619567871, |
|
"rewards/rejected": -26.410552978515625, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.823731016420122e-06, |
|
"logits/chosen": -4.402027606964111, |
|
"logits/rejected": -3.672158718109131, |
|
"logps/chosen": -521.04248046875, |
|
"logps/rejected": -567.2761840820312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.248925685882568, |
|
"rewards/margins": 16.707509994506836, |
|
"rewards/rejected": -21.956436157226562, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.820159316639133e-06, |
|
"logits/chosen": -4.356218338012695, |
|
"logits/rejected": -4.386761665344238, |
|
"logps/chosen": -312.3507080078125, |
|
"logps/rejected": -663.8659057617188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5384186506271362, |
|
"rewards/margins": 29.62138557434082, |
|
"rewards/rejected": -28.08296775817871, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.816553142054806e-06, |
|
"logits/chosen": -4.566652774810791, |
|
"logits/rejected": -4.032877445220947, |
|
"logps/chosen": -390.8731689453125, |
|
"logps/rejected": -503.9954833984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.9249114990234375, |
|
"rewards/margins": 17.718976974487305, |
|
"rewards/rejected": -22.643888473510742, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.812912546250595e-06, |
|
"logits/chosen": -4.595418453216553, |
|
"logits/rejected": -3.4654226303100586, |
|
"logps/chosen": -437.74615478515625, |
|
"logps/rejected": -564.276611328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.270282030105591, |
|
"rewards/margins": 22.324874877929688, |
|
"rewards/rejected": -24.595157623291016, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.809237583321421e-06, |
|
"logits/chosen": -4.55130672454834, |
|
"logits/rejected": -3.809107542037964, |
|
"logps/chosen": -288.273681640625, |
|
"logps/rejected": -529.8118286132812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.319549560546875, |
|
"rewards/margins": 21.799253463745117, |
|
"rewards/rejected": -25.118803024291992, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.8055283078728525e-06, |
|
"logits/chosen": -4.273970127105713, |
|
"logits/rejected": -3.532085657119751, |
|
"logps/chosen": -394.9105224609375, |
|
"logps/rejected": -580.9087524414062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0409668684005737, |
|
"rewards/margins": 24.131925582885742, |
|
"rewards/rejected": -25.17289161682129, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.801784775020303e-06, |
|
"logits/chosen": -4.558983325958252, |
|
"logits/rejected": -3.609147071838379, |
|
"logps/chosen": -425.38165283203125, |
|
"logps/rejected": -579.5765991210938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.166571140289307, |
|
"rewards/margins": 21.48810577392578, |
|
"rewards/rejected": -25.65467643737793, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.798007040388212e-06, |
|
"logits/chosen": -4.523662567138672, |
|
"logits/rejected": -3.308692693710327, |
|
"logps/chosen": -537.5259399414062, |
|
"logps/rejected": -611.4498901367188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3234070539474487, |
|
"rewards/margins": 22.504859924316406, |
|
"rewards/rejected": -23.828266143798828, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.794195160109215e-06, |
|
"logits/chosen": -4.554165840148926, |
|
"logits/rejected": -4.412696361541748, |
|
"logps/chosen": -283.873046875, |
|
"logps/rejected": -653.0494384765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.464404344558716, |
|
"rewards/margins": 24.511882781982422, |
|
"rewards/rejected": -26.976287841796875, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.790349190823313e-06, |
|
"logits/chosen": -4.461995601654053, |
|
"logits/rejected": -4.220631122589111, |
|
"logps/chosen": -378.62567138671875, |
|
"logps/rejected": -555.7577514648438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.84375, |
|
"rewards/margins": 15.135431289672852, |
|
"rewards/rejected": -22.97918128967285, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.786469189677026e-06, |
|
"logits/chosen": -4.626312732696533, |
|
"logits/rejected": -3.6043701171875, |
|
"logps/chosen": -404.515625, |
|
"logps/rejected": -554.8936767578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.731201171875, |
|
"rewards/margins": 19.18717384338379, |
|
"rewards/rejected": -22.91837501525879, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.78255521432255e-06, |
|
"logits/chosen": -4.456379413604736, |
|
"logits/rejected": -4.084070205688477, |
|
"logps/chosen": -233.46734619140625, |
|
"logps/rejected": -546.2238159179688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8788391351699829, |
|
"rewards/margins": 24.65411376953125, |
|
"rewards/rejected": -25.5329532623291, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.778607322916896e-06, |
|
"logits/chosen": -4.25200891494751, |
|
"logits/rejected": -4.352073669433594, |
|
"logps/chosen": -287.51422119140625, |
|
"logps/rejected": -514.3617553710938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.367222785949707, |
|
"rewards/margins": 19.036060333251953, |
|
"rewards/rejected": -24.403284072875977, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.7746255741210256e-06, |
|
"logits/chosen": -4.257689952850342, |
|
"logits/rejected": -3.9616525173187256, |
|
"logps/chosen": -436.54052734375, |
|
"logps/rejected": -558.0086669921875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.211642742156982, |
|
"rewards/margins": 16.815370559692383, |
|
"rewards/rejected": -23.027013778686523, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.770610027098983e-06, |
|
"logits/chosen": -4.314263820648193, |
|
"logits/rejected": -3.706144332885742, |
|
"logps/chosen": -385.21514892578125, |
|
"logps/rejected": -575.6632080078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6782593131065369, |
|
"rewards/margins": 22.845712661743164, |
|
"rewards/rejected": -22.16745376586914, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.766560741517014e-06, |
|
"logits/chosen": -4.261018753051758, |
|
"logits/rejected": -3.49680757522583, |
|
"logps/chosen": -443.4586181640625, |
|
"logps/rejected": -528.631103515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.9242799282073975, |
|
"rewards/margins": 18.376794815063477, |
|
"rewards/rejected": -22.301074981689453, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.762477777542676e-06, |
|
"logits/chosen": -4.405641078948975, |
|
"logits/rejected": -3.945005178451538, |
|
"logps/chosen": -405.1590576171875, |
|
"logps/rejected": -605.96142578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.02716064453125, |
|
"rewards/margins": 23.706918716430664, |
|
"rewards/rejected": -27.734079360961914, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.7583611958439514e-06, |
|
"logits/chosen": -4.536804676055908, |
|
"logits/rejected": -3.4177980422973633, |
|
"logps/chosen": -509.9901123046875, |
|
"logps/rejected": -620.6497802734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6092407703399658, |
|
"rewards/margins": 23.246379852294922, |
|
"rewards/rejected": -24.855621337890625, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.75421105758834e-06, |
|
"logits/chosen": -4.660959243774414, |
|
"logits/rejected": -4.660958766937256, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.750027424441949e-06, |
|
"logits/chosen": -4.213469982147217, |
|
"logits/rejected": -4.410857200622559, |
|
"logps/chosen": -280.2644348144531, |
|
"logps/rejected": -588.2990112304688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.470999151468277, |
|
"rewards/margins": 24.910537719726562, |
|
"rewards/rejected": -24.439538955688477, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.745810358568588e-06, |
|
"logits/chosen": -4.407349586486816, |
|
"logits/rejected": -3.4045121669769287, |
|
"logps/chosen": -651.0145263671875, |
|
"logps/rejected": -581.0377807617188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.35540771484375, |
|
"rewards/margins": 13.205488204956055, |
|
"rewards/rejected": -22.560895919799805, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.741559922628828e-06, |
|
"logits/chosen": -4.383808612823486, |
|
"logits/rejected": -3.694657325744629, |
|
"logps/chosen": -627.8776245117188, |
|
"logps/rejected": -550.0055541992188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.867218017578125, |
|
"rewards/margins": 19.274717330932617, |
|
"rewards/rejected": -24.141935348510742, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.7372761797790836e-06, |
|
"logits/chosen": -4.419149875640869, |
|
"logits/rejected": -3.9326839447021484, |
|
"logps/chosen": -456.3083801269531, |
|
"logps/rejected": -572.00732421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.1375579833984375, |
|
"rewards/margins": 19.8563232421875, |
|
"rewards/rejected": -23.993881225585938, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.732959193670672e-06, |
|
"logits/chosen": -4.390028476715088, |
|
"logits/rejected": -3.866453170776367, |
|
"logps/chosen": -332.904052734375, |
|
"logps/rejected": -565.65673828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6805603504180908, |
|
"rewards/margins": 21.7492733001709, |
|
"rewards/rejected": -23.429834365844727, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.728609028448862e-06, |
|
"logits/chosen": -4.234015941619873, |
|
"logits/rejected": -3.2613775730133057, |
|
"logps/chosen": -456.681640625, |
|
"logps/rejected": -508.46441650390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.307135105133057, |
|
"rewards/margins": 17.87537384033203, |
|
"rewards/rejected": -22.18250846862793, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.7242257487519275e-06, |
|
"logits/chosen": -4.365071773529053, |
|
"logits/rejected": -3.898486852645874, |
|
"logps/chosen": -285.94317626953125, |
|
"logps/rejected": -444.3804931640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7177032828330994, |
|
"rewards/margins": 18.269119262695312, |
|
"rewards/rejected": -18.9868221282959, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.7198094197101826e-06, |
|
"logits/chosen": -4.433129787445068, |
|
"logits/rejected": -3.400374412536621, |
|
"logps/chosen": -395.3111267089844, |
|
"logps/rejected": -566.3218383789062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.34246826171875, |
|
"rewards/margins": 23.32318687438965, |
|
"rewards/rejected": -25.6656551361084, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.715360106945015e-06, |
|
"logits/chosen": -4.460170269012451, |
|
"logits/rejected": -4.486166477203369, |
|
"logps/chosen": -322.58245849609375, |
|
"logps/rejected": -571.2058715820312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.377227783203125, |
|
"rewards/margins": 21.1688232421875, |
|
"rewards/rejected": -25.546051025390625, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.710877876567912e-06, |
|
"logits/chosen": -4.677746772766113, |
|
"logits/rejected": -3.628592014312744, |
|
"logps/chosen": -282.5626525878906, |
|
"logps/rejected": -498.66815185546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.01987624168396, |
|
"rewards/margins": 17.119985580444336, |
|
"rewards/rejected": -19.139862060546875, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.706362795179476e-06, |
|
"logits/chosen": -4.649206638336182, |
|
"logits/rejected": -3.9550185203552246, |
|
"logps/chosen": -317.6220703125, |
|
"logps/rejected": -532.2894897460938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.211251974105835, |
|
"rewards/margins": 20.70464515686035, |
|
"rewards/rejected": -22.915897369384766, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.70181492986844e-06, |
|
"logits/chosen": -4.574020862579346, |
|
"logits/rejected": -3.7530534267425537, |
|
"logps/chosen": -390.477783203125, |
|
"logps/rejected": -538.0514526367188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6195312738418579, |
|
"rewards/margins": 23.567447662353516, |
|
"rewards/rejected": -24.186979293823242, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.6972343482106615e-06, |
|
"logits/chosen": -4.381142616271973, |
|
"logits/rejected": -3.9664149284362793, |
|
"logps/chosen": -368.2343444824219, |
|
"logps/rejected": -584.2080688476562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.74643874168396, |
|
"rewards/margins": 23.20481300354004, |
|
"rewards/rejected": -26.951251983642578, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.6926211182681295e-06, |
|
"logits/chosen": -4.515586853027344, |
|
"logits/rejected": -4.021639823913574, |
|
"logps/chosen": -422.8552551269531, |
|
"logps/rejected": -605.24853515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.3351776599884033, |
|
"rewards/margins": 23.986923217773438, |
|
"rewards/rejected": -27.322101593017578, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.687975308587944e-06, |
|
"logits/chosen": -4.370363235473633, |
|
"logits/rejected": -3.52022385597229, |
|
"logps/chosen": -421.40106201171875, |
|
"logps/rejected": -555.5052490234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.351022243499756, |
|
"rewards/margins": 20.446269989013672, |
|
"rewards/rejected": -25.797292709350586, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.683296988201301e-06, |
|
"logits/chosen": -4.45542049407959, |
|
"logits/rejected": -3.843148946762085, |
|
"logps/chosen": -250.9272003173828, |
|
"logps/rejected": -461.41656494140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5148300528526306, |
|
"rewards/margins": 22.641338348388672, |
|
"rewards/rejected": -22.126508712768555, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.6785862266224695e-06, |
|
"logits/chosen": -4.4527587890625, |
|
"logits/rejected": -3.8313660621643066, |
|
"logps/chosen": -435.7333068847656, |
|
"logps/rejected": -420.346435546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.598269939422607, |
|
"rewards/margins": 12.53331184387207, |
|
"rewards/rejected": -18.131582260131836, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.673843093847753e-06, |
|
"logits/chosen": -4.46143913269043, |
|
"logits/rejected": -4.0022873878479, |
|
"logps/chosen": -546.3322143554688, |
|
"logps/rejected": -678.267822265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.684521675109863, |
|
"rewards/margins": 22.149757385253906, |
|
"rewards/rejected": -28.834280014038086, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.669067660354456e-06, |
|
"logits/chosen": -4.453671932220459, |
|
"logits/rejected": -3.783543586730957, |
|
"logps/chosen": -489.1321105957031, |
|
"logps/rejected": -653.801513671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.456167697906494, |
|
"rewards/margins": 27.995878219604492, |
|
"rewards/rejected": -30.452045440673828, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.664259997099829e-06, |
|
"logits/chosen": -4.382017135620117, |
|
"logits/rejected": -4.235163688659668, |
|
"logps/chosen": -401.96905517578125, |
|
"logps/rejected": -611.9321899414062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.1131255626678467, |
|
"rewards/margins": 23.950090408325195, |
|
"rewards/rejected": -26.063215255737305, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.6594201755200205e-06, |
|
"logits/chosen": -4.401975154876709, |
|
"logits/rejected": -4.047399520874023, |
|
"logps/chosen": -347.8199768066406, |
|
"logps/rejected": -534.321533203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.148288249969482, |
|
"rewards/margins": 15.352182388305664, |
|
"rewards/rejected": -20.500471115112305, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.654548267529015e-06, |
|
"logits/chosen": -4.742393970489502, |
|
"logits/rejected": -3.528773546218872, |
|
"logps/chosen": -461.3231201171875, |
|
"logps/rejected": -502.95330810546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.834014892578125, |
|
"rewards/margins": 19.609724044799805, |
|
"rewards/rejected": -22.44373893737793, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.64964434551756e-06, |
|
"logits/chosen": -4.29525899887085, |
|
"logits/rejected": -3.517086982727051, |
|
"logps/chosen": -655.9037475585938, |
|
"logps/rejected": -467.7865295410156, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.464410305023193, |
|
"rewards/margins": 16.863277435302734, |
|
"rewards/rejected": -21.327688217163086, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.644708482352093e-06, |
|
"logits/chosen": -4.491517066955566, |
|
"logits/rejected": -3.6783647537231445, |
|
"logps/chosen": -402.5279846191406, |
|
"logps/rejected": -560.4140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.5269012451171875, |
|
"rewards/margins": 17.657962799072266, |
|
"rewards/rejected": -22.184864044189453, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.639740751373663e-06, |
|
"logits/chosen": -4.2944111824035645, |
|
"logits/rejected": -3.600904703140259, |
|
"logps/chosen": -518.21533203125, |
|
"logps/rejected": -547.026611328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.848297119140625, |
|
"rewards/margins": 20.74945068359375, |
|
"rewards/rejected": -24.597747802734375, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.634741226396832e-06, |
|
"logits/chosen": -4.682287693023682, |
|
"logits/rejected": -3.6956443786621094, |
|
"logps/chosen": -314.2425537109375, |
|
"logps/rejected": -630.8653564453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.6687896251678467, |
|
"rewards/margins": 24.917322158813477, |
|
"rewards/rejected": -28.586111068725586, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.629709981708586e-06, |
|
"logits/chosen": -4.4025044441223145, |
|
"logits/rejected": -3.328565835952759, |
|
"logps/chosen": -596.0982666015625, |
|
"logps/rejected": -469.07025146484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.942303657531738, |
|
"rewards/margins": 13.943493843078613, |
|
"rewards/rejected": -21.88579750061035, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.624647092067226e-06, |
|
"logits/chosen": -4.347430229187012, |
|
"logits/rejected": -4.287944316864014, |
|
"logps/chosen": -307.51995849609375, |
|
"logps/rejected": -544.3563232421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.6383118629455566, |
|
"rewards/margins": 22.841541290283203, |
|
"rewards/rejected": -25.4798526763916, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.619552632701263e-06, |
|
"logits/chosen": -4.473455905914307, |
|
"logits/rejected": -4.314274787902832, |
|
"logps/chosen": -277.60906982421875, |
|
"logps/rejected": -504.164306640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.467631816864014, |
|
"rewards/margins": 18.195838928222656, |
|
"rewards/rejected": -23.663471221923828, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.614426679308291e-06, |
|
"logits/chosen": -4.148372650146484, |
|
"logits/rejected": -3.744719982147217, |
|
"logps/chosen": -355.4638671875, |
|
"logps/rejected": -454.26422119140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.292315721511841, |
|
"rewards/margins": 16.010753631591797, |
|
"rewards/rejected": -18.303070068359375, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.609269308053872e-06, |
|
"logits/chosen": -4.353799343109131, |
|
"logits/rejected": -3.8214452266693115, |
|
"logps/chosen": -393.32989501953125, |
|
"logps/rejected": -582.2720336914062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.2819581031799316, |
|
"rewards/margins": 22.352136611938477, |
|
"rewards/rejected": -24.63409423828125, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.604080595570399e-06, |
|
"logits/chosen": -4.46610689163208, |
|
"logits/rejected": -3.906358242034912, |
|
"logps/chosen": -322.1328125, |
|
"logps/rejected": -599.1141357421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.1895081996917725, |
|
"rewards/margins": 23.97051429748535, |
|
"rewards/rejected": -26.160022735595703, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.598860618955957e-06, |
|
"logits/chosen": -4.385786056518555, |
|
"logits/rejected": -4.310035228729248, |
|
"logps/chosen": -400.26947021484375, |
|
"logps/rejected": -559.5322875976562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.774789571762085, |
|
"rewards/margins": 18.254159927368164, |
|
"rewards/rejected": -21.028949737548828, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.5936094557731815e-06, |
|
"logits/chosen": -4.240229606628418, |
|
"logits/rejected": -4.443663597106934, |
|
"logps/chosen": -345.6215515136719, |
|
"logps/rejected": -692.10107421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.39714357256889343, |
|
"rewards/margins": 27.15097427368164, |
|
"rewards/rejected": -26.753829956054688, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.588327184048099e-06, |
|
"logits/chosen": -4.332690238952637, |
|
"logits/rejected": -3.449024200439453, |
|
"logps/chosen": -496.02508544921875, |
|
"logps/rejected": -615.9970092773438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.7168214321136475, |
|
"rewards/margins": 22.373443603515625, |
|
"rewards/rejected": -26.09026527404785, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.5830138822689755e-06, |
|
"logits/chosen": -4.444944381713867, |
|
"logits/rejected": -3.7115731239318848, |
|
"logps/chosen": -449.7723083496094, |
|
"logps/rejected": -626.166015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.5469634532928467, |
|
"rewards/margins": 23.168373107910156, |
|
"rewards/rejected": -26.715335845947266, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.577669629385145e-06, |
|
"logits/chosen": -4.408154487609863, |
|
"logits/rejected": -3.651484489440918, |
|
"logps/chosen": -441.13018798828125, |
|
"logps/rejected": -643.3833618164062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.660876512527466, |
|
"rewards/margins": 22.676677703857422, |
|
"rewards/rejected": -25.337554931640625, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.572294504805841e-06, |
|
"logits/chosen": -4.424633502960205, |
|
"logits/rejected": -3.707329034805298, |
|
"logps/chosen": -487.3986511230469, |
|
"logps/rejected": -554.1790771484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.541256904602051, |
|
"rewards/margins": 15.3828706741333, |
|
"rewards/rejected": -20.92412757873535, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.566888588399007e-06, |
|
"logits/chosen": -4.403136730194092, |
|
"logits/rejected": -3.7492878437042236, |
|
"logps/chosen": -355.76708984375, |
|
"logps/rejected": -554.3052978515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.743695020675659, |
|
"rewards/margins": 20.683734893798828, |
|
"rewards/rejected": -24.42742919921875, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.561451960490123e-06, |
|
"logits/chosen": -4.675843715667725, |
|
"logits/rejected": -4.675843715667725, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.5559847018610034e-06, |
|
"logits/chosen": -4.456602096557617, |
|
"logits/rejected": -3.8642396926879883, |
|
"logps/chosen": -359.24835205078125, |
|
"logps/rejected": -608.4000244140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.042169213294983, |
|
"rewards/margins": 25.666967391967773, |
|
"rewards/rejected": -26.709136962890625, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.550486893748596e-06, |
|
"logits/chosen": -4.495731830596924, |
|
"logits/rejected": -4.421840190887451, |
|
"logps/chosen": -300.7923889160156, |
|
"logps/rejected": -369.44671630859375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9146941900253296, |
|
"rewards/margins": 9.700987815856934, |
|
"rewards/rejected": -11.615681648254395, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.544958617843782e-06, |
|
"logits/chosen": -4.233188629150391, |
|
"logits/rejected": -3.7297332286834717, |
|
"logps/chosen": -473.4989013671875, |
|
"logps/rejected": -492.3461608886719, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.732303142547607, |
|
"rewards/margins": 15.883966445922852, |
|
"rewards/rejected": -22.616270065307617, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.539399956290152e-06, |
|
"logits/chosen": -4.557020664215088, |
|
"logits/rejected": -3.960388660430908, |
|
"logps/chosen": -353.52593994140625, |
|
"logps/rejected": -604.6260986328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.712493896484375, |
|
"rewards/margins": 22.881561279296875, |
|
"rewards/rejected": -27.59405517578125, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.533810991682799e-06, |
|
"logits/chosen": -4.580037593841553, |
|
"logits/rejected": -4.580037593841553, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.528191807067074e-06, |
|
"logits/chosen": -4.481337547302246, |
|
"logits/rejected": -4.369873046875, |
|
"logps/chosen": -302.75152587890625, |
|
"logps/rejected": -500.8403625488281, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.4375977516174316, |
|
"rewards/margins": 18.154129028320312, |
|
"rewards/rejected": -21.591726303100586, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.522542485937369e-06, |
|
"logits/chosen": -4.541010856628418, |
|
"logits/rejected": -4.120445728302002, |
|
"logps/chosen": -266.5628662109375, |
|
"logps/rejected": -520.7110595703125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6100586652755737, |
|
"rewards/margins": 22.605920791625977, |
|
"rewards/rejected": -24.215978622436523, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.516863112235864e-06, |
|
"logits/chosen": -4.432363033294678, |
|
"logits/rejected": -4.100122928619385, |
|
"logps/chosen": -329.5076599121094, |
|
"logps/rejected": -615.78515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.16323547065258026, |
|
"rewards/margins": 28.81184196472168, |
|
"rewards/rejected": -28.97507667541504, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.511153770351288e-06, |
|
"logits/chosen": -4.500782012939453, |
|
"logits/rejected": -3.487731695175171, |
|
"logps/chosen": -464.27020263671875, |
|
"logps/rejected": -605.0491943359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.322766304016113, |
|
"rewards/margins": 23.01436996459961, |
|
"rewards/rejected": -28.33713722229004, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.505414545117658e-06, |
|
"logits/chosen": -4.300138473510742, |
|
"logits/rejected": -3.477233648300171, |
|
"logps/chosen": -533.3462524414062, |
|
"logps/rejected": -552.9212646484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.31990385055542, |
|
"rewards/margins": 18.3931884765625, |
|
"rewards/rejected": -23.713092803955078, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.499645521813024e-06, |
|
"logits/chosen": -4.700889587402344, |
|
"logits/rejected": -4.700889587402344, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.4938467861582e-06, |
|
"logits/chosen": -4.556081771850586, |
|
"logits/rejected": -4.198794841766357, |
|
"logps/chosen": -371.759033203125, |
|
"logps/rejected": -660.7146606445312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.564685106277466, |
|
"rewards/margins": 23.74681282043457, |
|
"rewards/rejected": -26.311498641967773, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.488018424315488e-06, |
|
"logits/chosen": -4.297285079956055, |
|
"logits/rejected": -4.350274562835693, |
|
"logps/chosen": -330.6549072265625, |
|
"logps/rejected": -572.1771240234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.0300233364105225, |
|
"rewards/margins": 23.603017807006836, |
|
"rewards/rejected": -26.633041381835938, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.482160522887404e-06, |
|
"logits/chosen": -4.338238716125488, |
|
"logits/rejected": -3.436751127243042, |
|
"logps/chosen": -326.945556640625, |
|
"logps/rejected": -512.1168823242188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8089507818222046, |
|
"rewards/margins": 21.56464385986328, |
|
"rewards/rejected": -23.373594284057617, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.476273168915382e-06, |
|
"logits/chosen": -5.034451007843018, |
|
"logits/rejected": -3.47391414642334, |
|
"logps/chosen": -1412.2579345703125, |
|
"logps/rejected": -403.28057861328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.26282960176467896, |
|
"rewards/margins": 13.486255645751953, |
|
"rewards/rejected": -13.22342586517334, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.470356449878489e-06, |
|
"logits/chosen": -4.398158073425293, |
|
"logits/rejected": -3.418801784515381, |
|
"logps/chosen": -422.3399658203125, |
|
"logps/rejected": -592.7428588867188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.6789307594299316, |
|
"rewards/margins": 21.556623458862305, |
|
"rewards/rejected": -25.235553741455078, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.464410453692122e-06, |
|
"logits/chosen": -4.643823146820068, |
|
"logits/rejected": -3.279494285583496, |
|
"logps/chosen": -560.9666748046875, |
|
"logps/rejected": -519.3795776367188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.4105286598205566, |
|
"rewards/margins": 20.03549575805664, |
|
"rewards/rejected": -22.44602394104004, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.458435268706699e-06, |
|
"logits/chosen": -4.174355506896973, |
|
"logits/rejected": -3.4881181716918945, |
|
"logps/chosen": -425.60882568359375, |
|
"logps/rejected": -419.9944763183594, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.6146514415740967, |
|
"rewards/margins": 16.718265533447266, |
|
"rewards/rejected": -19.332916259765625, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.452430983706351e-06, |
|
"logits/chosen": -4.392972469329834, |
|
"logits/rejected": -3.702141523361206, |
|
"logps/chosen": -366.05242919921875, |
|
"logps/rejected": -583.062255859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.865545630455017, |
|
"rewards/margins": 23.291095733642578, |
|
"rewards/rejected": -25.156641006469727, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.446397687907601e-06, |
|
"logits/chosen": -4.396094799041748, |
|
"logits/rejected": -4.039773464202881, |
|
"logps/chosen": -341.2452392578125, |
|
"logps/rejected": -622.6715087890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.08438720554113388, |
|
"rewards/margins": 28.248329162597656, |
|
"rewards/rejected": -28.33271598815918, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.440335470958035e-06, |
|
"logits/chosen": -4.646708011627197, |
|
"logits/rejected": -3.6073200702667236, |
|
"logps/chosen": -276.8403625488281, |
|
"logps/rejected": -427.32745361328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.796478271484375, |
|
"rewards/margins": 17.417638778686523, |
|
"rewards/rejected": -18.2141170501709, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.434244422934976e-06, |
|
"logits/chosen": -4.129358768463135, |
|
"logits/rejected": -4.367154598236084, |
|
"logps/chosen": -171.61856079101562, |
|
"logps/rejected": -549.8220825195312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7979736328125, |
|
"rewards/margins": 24.695449829101562, |
|
"rewards/rejected": -25.493423461914062, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.428124634344141e-06, |
|
"logits/chosen": -4.327607154846191, |
|
"logits/rejected": -3.922853946685791, |
|
"logps/chosen": -338.17413330078125, |
|
"logps/rejected": -567.5420532226562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.001150608062744, |
|
"rewards/margins": 21.21744155883789, |
|
"rewards/rejected": -25.218591690063477, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.421976196118297e-06, |
|
"logits/chosen": -4.414794445037842, |
|
"logits/rejected": -3.8170502185821533, |
|
"logps/chosen": -354.2364501953125, |
|
"logps/rejected": -535.7091064453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5273651480674744, |
|
"rewards/margins": 22.748092651367188, |
|
"rewards/rejected": -23.27545738220215, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.415799199615912e-06, |
|
"logits/chosen": -4.355689525604248, |
|
"logits/rejected": -3.721245050430298, |
|
"logps/chosen": -403.55718994140625, |
|
"logps/rejected": -614.30224609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.988238573074341, |
|
"rewards/margins": 26.3142032623291, |
|
"rewards/rejected": -29.30244255065918, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.409593736619795e-06, |
|
"logits/chosen": -4.30208158493042, |
|
"logits/rejected": -4.249013900756836, |
|
"logps/chosen": -330.4805603027344, |
|
"logps/rejected": -520.9468383789062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.18290114402771, |
|
"rewards/margins": 19.691268920898438, |
|
"rewards/rejected": -21.874170303344727, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.403359899335732e-06, |
|
"logits/chosen": -4.452451705932617, |
|
"logits/rejected": -3.5297634601593018, |
|
"logps/chosen": -407.70440673828125, |
|
"logps/rejected": -531.66455078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.366229295730591, |
|
"rewards/margins": 19.401885986328125, |
|
"rewards/rejected": -21.768115997314453, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.39709778039112e-06, |
|
"logits/chosen": -4.624291896820068, |
|
"logits/rejected": -3.607923984527588, |
|
"logps/chosen": -432.9666748046875, |
|
"logps/rejected": -430.0704650878906, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.567608833312988, |
|
"rewards/margins": 15.570273399353027, |
|
"rewards/rejected": -20.137882232666016, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.390807472833585e-06, |
|
"logits/chosen": -4.4363179206848145, |
|
"logits/rejected": -4.285643100738525, |
|
"logps/chosen": -346.4957275390625, |
|
"logps/rejected": -557.8546752929688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.2618744373321533, |
|
"rewards/margins": 22.152734756469727, |
|
"rewards/rejected": -24.414609909057617, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.384489070129604e-06, |
|
"logits/chosen": -4.686382293701172, |
|
"logits/rejected": -4.411667823791504, |
|
"logps/chosen": -297.61651611328125, |
|
"logps/rejected": -598.6287841796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.362146019935608, |
|
"rewards/margins": 23.59848976135254, |
|
"rewards/rejected": -24.960636138916016, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.378142666163114e-06, |
|
"logits/chosen": -4.398681163787842, |
|
"logits/rejected": -3.3622610569000244, |
|
"logps/chosen": -381.4431457519531, |
|
"logps/rejected": -480.4046630859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.9724457263946533, |
|
"rewards/margins": 19.119897842407227, |
|
"rewards/rejected": -22.092344284057617, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.371768355234116e-06, |
|
"logits/chosen": -4.680546283721924, |
|
"logits/rejected": -4.680546283721924, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.365366232057279e-06, |
|
"logits/chosen": -4.3958964347839355, |
|
"logits/rejected": -3.414220094680786, |
|
"logps/chosen": -470.550537109375, |
|
"logps/rejected": -456.4300231933594, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.259875535964966, |
|
"rewards/margins": 16.188610076904297, |
|
"rewards/rejected": -19.448486328125, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.358936391760524e-06, |
|
"logits/chosen": -4.416255950927734, |
|
"logits/rejected": -3.4653921127319336, |
|
"logps/chosen": -314.48199462890625, |
|
"logps/rejected": -503.40478515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.4892303943634033, |
|
"rewards/margins": 21.383264541625977, |
|
"rewards/rejected": -23.872495651245117, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.3524789298836175e-06, |
|
"logits/chosen": -4.381935119628906, |
|
"logits/rejected": -3.548987627029419, |
|
"logps/chosen": -456.87030029296875, |
|
"logps/rejected": -514.978515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.680187940597534, |
|
"rewards/margins": 18.483985900878906, |
|
"rewards/rejected": -22.164173126220703, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.345993942376752e-06, |
|
"logits/chosen": -4.319192409515381, |
|
"logits/rejected": -3.6070122718811035, |
|
"logps/chosen": -535.0125122070312, |
|
"logps/rejected": -593.177490234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.068231105804443, |
|
"rewards/margins": 22.915740966796875, |
|
"rewards/rejected": -26.983972549438477, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.3394815255991135e-06, |
|
"logits/chosen": -4.557577610015869, |
|
"logits/rejected": -4.233284950256348, |
|
"logps/chosen": -340.40478515625, |
|
"logps/rejected": -559.3313598632812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.232684373855591, |
|
"rewards/margins": 15.566218376159668, |
|
"rewards/rejected": -18.79890251159668, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.332941776317458e-06, |
|
"logits/chosen": -4.449789524078369, |
|
"logits/rejected": -3.7085957527160645, |
|
"logps/chosen": -460.09100341796875, |
|
"logps/rejected": -565.1859130859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.4739625453948975, |
|
"rewards/margins": 21.726104736328125, |
|
"rewards/rejected": -25.2000675201416, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.32637479170467e-06, |
|
"logits/chosen": -4.309577941894531, |
|
"logits/rejected": -3.4090349674224854, |
|
"logps/chosen": -488.4316711425781, |
|
"logps/rejected": -556.6356201171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.466006755828857, |
|
"rewards/margins": 21.675212860107422, |
|
"rewards/rejected": -26.141220092773438, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.319780669338316e-06, |
|
"logits/chosen": -4.309078216552734, |
|
"logits/rejected": -3.30332612991333, |
|
"logps/chosen": -507.0978088378906, |
|
"logps/rejected": -519.831787109375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.4428741931915283, |
|
"rewards/margins": 14.903473854064941, |
|
"rewards/rejected": -18.34634780883789, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.313159507199197e-06, |
|
"logits/chosen": -4.414469242095947, |
|
"logits/rejected": -3.480733871459961, |
|
"logps/chosen": -471.8912048339844, |
|
"logps/rejected": -557.726318359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.649487257003784, |
|
"rewards/margins": 18.816673278808594, |
|
"rewards/rejected": -22.46615982055664, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.306511403669897e-06, |
|
"logits/chosen": -4.4712090492248535, |
|
"logits/rejected": -4.122826099395752, |
|
"logps/chosen": -487.50177001953125, |
|
"logps/rejected": -632.9717407226562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.310089111328125, |
|
"rewards/margins": 18.895023345947266, |
|
"rewards/rejected": -24.20511245727539, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.299836457533313e-06, |
|
"logits/chosen": -4.493587970733643, |
|
"logits/rejected": -4.127647399902344, |
|
"logps/chosen": -348.9983215332031, |
|
"logps/rejected": -613.3372802734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7200958728790283, |
|
"rewards/margins": 21.614328384399414, |
|
"rewards/rejected": -23.33442497253418, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.293134767971193e-06, |
|
"logits/chosen": -4.367143154144287, |
|
"logits/rejected": -3.770662784576416, |
|
"logps/chosen": -465.8094482421875, |
|
"logps/rejected": -645.4409790039062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.2273621559143066, |
|
"rewards/margins": 24.89282989501953, |
|
"rewards/rejected": -28.12019157409668, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.286406434562659e-06, |
|
"logits/chosen": -4.329665660858154, |
|
"logits/rejected": -4.187549591064453, |
|
"logps/chosen": -327.8598937988281, |
|
"logps/rejected": -573.4243774414062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.743408203125, |
|
"rewards/margins": 22.067590713500977, |
|
"rewards/rejected": -22.810998916625977, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.2796515572827305e-06, |
|
"logits/chosen": -4.589941501617432, |
|
"logits/rejected": -3.3189613819122314, |
|
"logps/chosen": -437.3631591796875, |
|
"logps/rejected": -500.00341796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.332589864730835, |
|
"rewards/margins": 17.248987197875977, |
|
"rewards/rejected": -20.58157730102539, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.2728702365008356e-06, |
|
"logits/chosen": -4.512362957000732, |
|
"logits/rejected": -3.4759039878845215, |
|
"logps/chosen": -452.6970520019531, |
|
"logps/rejected": -566.7788696289062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.2685546875, |
|
"rewards/margins": 21.10858726501465, |
|
"rewards/rejected": -23.37714195251465, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.266062572979323e-06, |
|
"logits/chosen": -4.406839370727539, |
|
"logits/rejected": -3.6835734844207764, |
|
"logps/chosen": -479.810791015625, |
|
"logps/rejected": -554.9146728515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.451641798019409, |
|
"rewards/margins": 18.89285659790039, |
|
"rewards/rejected": -21.344497680664062, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.259228667871963e-06, |
|
"logits/chosen": -4.3280510902404785, |
|
"logits/rejected": -3.3908562660217285, |
|
"logps/chosen": -535.2221069335938, |
|
"logps/rejected": -520.6017456054688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.345489501953125, |
|
"rewards/margins": 21.740737915039062, |
|
"rewards/rejected": -24.086227416992188, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.252368622722443e-06, |
|
"logits/chosen": -4.508797645568848, |
|
"logits/rejected": -4.2981276512146, |
|
"logps/chosen": -387.08135986328125, |
|
"logps/rejected": -548.83203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.486175537109375, |
|
"rewards/margins": 17.451730728149414, |
|
"rewards/rejected": -22.93790626525879, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.245482539462861e-06, |
|
"logits/chosen": -4.583652973175049, |
|
"logits/rejected": -4.022202491760254, |
|
"logps/chosen": -302.711669921875, |
|
"logps/rejected": -516.1565551757812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.4421050548553467, |
|
"rewards/margins": 20.776533126831055, |
|
"rewards/rejected": -24.218637466430664, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.2385705204122104e-06, |
|
"logits/chosen": -4.387404441833496, |
|
"logits/rejected": -3.931056261062622, |
|
"logps/chosen": -387.2518310546875, |
|
"logps/rejected": -686.424560546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.06422758102417, |
|
"rewards/margins": 27.97595977783203, |
|
"rewards/rejected": -32.04018783569336, |
|
"step": 302 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 906, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|