{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999198503873898, "eval_steps": 100, "global_step": 935, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "agreement_weights/mean": 0.9900000095367432, "agreement_weights/std": 0.0, "epoch": 0.0010686615014694097, "eta/annotator_0": 0.9900000095367432, "grad_norm": 40.65554414826802, "learning_rate": 7.446808510638298e-09, "loss": 1.0, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "agreement_weights/mean": 0.9900000095367432, "agreement_weights/std": 0.0, "epoch": 0.0021373230029388193, "eta/annotator_0": 0.9900000095367432, "grad_norm": 72.28528444608695, "learning_rate": 1.4893617021276595e-08, "loss": 1.0, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 2 }, { "agreement_weights/mean": 0.9899550676345825, "agreement_weights/std": 0.00023007948766462505, "epoch": 0.0032059845044082286, "eta/annotator_0": 0.9899987578392029, "grad_norm": 18.330013671421625, "learning_rate": 2.234042553191489e-08, "loss": 1.0013, "rewards/accuracies": 0.328125, "rewards/chosen": -6.866455078125e-05, "rewards/margins": -0.00040435791015625, "rewards/rejected": 0.000335693359375, "step": 3 }, { "agreement_weights/mean": 0.9900526404380798, "agreement_weights/std": 0.0004530585138127208, "epoch": 0.004274646005877639, "eta/annotator_0": 0.9899984002113342, "grad_norm": 56.88686887027273, "learning_rate": 2.978723404255319e-08, "loss": 0.9956, "rewards/accuracies": 0.34375, "rewards/chosen": -0.000732421875, "rewards/margins": 0.0009002685546875, "rewards/rejected": -0.0016326904296875, "step": 4 }, { "agreement_weights/mean": 0.989942729473114, "agreement_weights/std": 0.00021629800903610885, "epoch": 0.0053433075073470475, "eta/annotator_0": 0.989997386932373, "grad_norm": 44.98545684468203, "learning_rate": 3.723404255319149e-08, "loss": 1.0013, "rewards/accuracies": 0.34375, "rewards/chosen": -0.000186920166015625, "rewards/margins": -0.000461578369140625, "rewards/rejected": 0.000274658203125, "step": 5 }, { "agreement_weights/mean": 0.9899063110351562, "agreement_weights/std": 0.0008455903152935207, "epoch": 0.006411969008816457, "eta/annotator_0": 0.9899943470954895, "grad_norm": 61.08204652902013, "learning_rate": 4.468085106382978e-08, "loss": 1.0019, "rewards/accuracies": 0.171875, "rewards/chosen": 0.00081634521484375, "rewards/margins": -0.000518798828125, "rewards/rejected": 0.00133514404296875, "step": 6 }, { "agreement_weights/mean": 0.9899554252624512, "agreement_weights/std": 0.00023985517327673733, "epoch": 0.007480630510285867, "eta/annotator_0": 0.9899943470954895, "grad_norm": 34.831610814146565, "learning_rate": 5.212765957446808e-08, "loss": 1.0011, "rewards/accuracies": 0.234375, "rewards/chosen": -0.00034332275390625, "rewards/margins": -0.0003814697265625, "rewards/rejected": 3.814697265625e-05, "step": 7 }, { "agreement_weights/mean": 0.9899797439575195, "agreement_weights/std": 0.00023355684243142605, "epoch": 0.008549292011755277, "eta/annotator_0": 0.9899915456771851, "grad_norm": 28.698611736827132, "learning_rate": 5.957446808510638e-08, "loss": 1.0, "rewards/accuracies": 0.265625, "rewards/chosen": -0.0003204345703125, "rewards/margins": -1.52587890625e-05, "rewards/rejected": -0.00030517578125, "step": 8 }, { "agreement_weights/mean": 0.990046501159668, "agreement_weights/std": 0.00043108267709612846, "epoch": 0.009617953513224685, "eta/annotator_0": 0.9899905920028687, "grad_norm": 36.51449157412548, "learning_rate": 6.702127659574467e-08, "loss": 0.9963, "rewards/accuracies": 0.265625, "rewards/chosen": -0.0003662109375, "rewards/margins": 0.0008392333984375, "rewards/rejected": -0.0012054443359375, "step": 9 }, { "agreement_weights/mean": 0.9899859428405762, "agreement_weights/std": 0.00019523956871125847, "epoch": 0.010686615014694095, "eta/annotator_0": 0.9899920225143433, "grad_norm": 27.610790566763157, "learning_rate": 7.446808510638298e-08, "loss": 0.999, "rewards/accuracies": 0.3125, "rewards/chosen": 0.0010528564453125, "rewards/margins": 6.103515625e-05, "rewards/rejected": 0.0009918212890625, "step": 10 }, { "agreement_weights/mean": 0.9900118112564087, "agreement_weights/std": 0.00015272361633833498, "epoch": 0.011755276516163505, "eta/annotator_0": 0.9899964332580566, "grad_norm": 49.892618386117434, "learning_rate": 8.191489361702128e-08, "loss": 0.9982, "rewards/accuracies": 0.25, "rewards/chosen": 0.00060272216796875, "rewards/margins": 0.0002288818359375, "rewards/rejected": 0.00037384033203125, "step": 11 }, { "agreement_weights/mean": 0.989962100982666, "agreement_weights/std": 0.00018992779951076955, "epoch": 0.012823938017632914, "eta/annotator_0": 0.9899964332580566, "grad_norm": 32.39073845709968, "learning_rate": 8.936170212765956e-08, "loss": 0.9999, "rewards/accuracies": 0.328125, "rewards/chosen": -0.00012969970703125, "rewards/margins": -0.00023651123046875, "rewards/rejected": 0.0001068115234375, "step": 12 }, { "agreement_weights/mean": 0.9900062084197998, "agreement_weights/std": 0.0006172743160277605, "epoch": 0.013892599519102324, "eta/annotator_0": 0.9899946451187134, "grad_norm": 61.253917165874206, "learning_rate": 9.680851063829788e-08, "loss": 0.9973, "rewards/accuracies": 0.265625, "rewards/chosen": 0.00101470947265625, "rewards/margins": 0.00061798095703125, "rewards/rejected": 0.000396728515625, "step": 13 }, { "agreement_weights/mean": 0.990034818649292, "agreement_weights/std": 0.00016295124078169465, "epoch": 0.014961261020571734, "eta/annotator_0": 0.9899940490722656, "grad_norm": 47.88502073720433, "learning_rate": 1.0425531914893615e-07, "loss": 0.9964, "rewards/accuracies": 0.46875, "rewards/chosen": -0.00014495849609375, "rewards/margins": 0.000701904296875, "rewards/rejected": -0.00084686279296875, "step": 14 }, { "agreement_weights/mean": 0.989676833152771, "agreement_weights/std": 0.0013176084030419588, "epoch": 0.016029922522041145, "eta/annotator_0": 0.9899942278862, "grad_norm": 132.10712380164946, "learning_rate": 1.1170212765957447e-07, "loss": 1.01, "rewards/accuracies": 0.28125, "rewards/chosen": -0.002960205078125, "rewards/margins": -0.00257110595703125, "rewards/rejected": -0.00038909912109375, "step": 15 }, { "agreement_weights/mean": 0.9899830222129822, "agreement_weights/std": 0.0002492568746674806, "epoch": 0.017098584023510555, "eta/annotator_0": 0.9899947643280029, "grad_norm": 65.91829873617186, "learning_rate": 1.1914893617021276e-07, "loss": 0.9987, "rewards/accuracies": 0.328125, "rewards/chosen": -0.000213623046875, "rewards/margins": 0.0002899169921875, "rewards/rejected": -0.0005035400390625, "step": 16 }, { "agreement_weights/mean": 0.9899011850357056, "agreement_weights/std": 0.0003893629473168403, "epoch": 0.018167245524979964, "eta/annotator_0": 0.9899947643280029, "grad_norm": 28.780430635328614, "learning_rate": 1.2659574468085107e-07, "loss": 1.0024, "rewards/accuracies": 0.3125, "rewards/chosen": -6.866455078125e-05, "rewards/margins": -0.00074005126953125, "rewards/rejected": 0.00067138671875, "step": 17 }, { "agreement_weights/mean": 0.9901018142700195, "agreement_weights/std": 0.0007561356760561466, "epoch": 0.01923590702644937, "eta/annotator_0": 0.9900088310241699, "grad_norm": 56.12906888361625, "learning_rate": 1.3404255319148934e-07, "loss": 0.9924, "rewards/accuracies": 0.359375, "rewards/chosen": -0.00081634521484375, "rewards/margins": 0.00189208984375, "rewards/rejected": -0.00270843505859375, "step": 18 }, { "agreement_weights/mean": 0.9900038242340088, "agreement_weights/std": 0.00023469097504857928, "epoch": 0.02030456852791878, "eta/annotator_0": 0.9900134801864624, "grad_norm": 77.69136618853747, "learning_rate": 1.4148936170212765e-07, "loss": 0.9977, "rewards/accuracies": 0.359375, "rewards/chosen": -3.0517578125e-05, "rewards/margins": 0.000476837158203125, "rewards/rejected": -0.000507354736328125, "step": 19 }, { "agreement_weights/mean": 0.9900158643722534, "agreement_weights/std": 0.0001506400149082765, "epoch": 0.02137323002938819, "eta/annotator_0": 0.9900133609771729, "grad_norm": 38.2318118003449, "learning_rate": 1.4893617021276595e-07, "loss": 0.9974, "rewards/accuracies": 0.375, "rewards/chosen": -0.00022125244140625, "rewards/margins": 0.00046539306640625, "rewards/rejected": -0.0006866455078125, "step": 20 }, { "agreement_weights/mean": 0.9900034070014954, "agreement_weights/std": 0.00018002184515353292, "epoch": 0.0224418915308576, "eta/annotator_0": 0.990013062953949, "grad_norm": 54.55689164766514, "learning_rate": 1.5638297872340423e-07, "loss": 0.9986, "rewards/accuracies": 0.328125, "rewards/chosen": 0.0, "rewards/margins": 0.0003204345703125, "rewards/rejected": -0.0003204345703125, "step": 21 }, { "agreement_weights/mean": 0.9891497492790222, "agreement_weights/std": 0.003586401231586933, "epoch": 0.02351055303232701, "eta/annotator_0": 0.990013062953949, "grad_norm": 93.64164889591754, "learning_rate": 1.6382978723404256e-07, "loss": 1.0142, "rewards/accuracies": 0.390625, "rewards/chosen": -0.0040283203125, "rewards/margins": -0.00325775146484375, "rewards/rejected": -0.00077056884765625, "step": 22 }, { "agreement_weights/mean": 0.9900084137916565, "agreement_weights/std": 0.00036747241392731667, "epoch": 0.02457921453379642, "eta/annotator_0": 0.990014910697937, "grad_norm": 45.26514292320638, "learning_rate": 1.7127659574468084e-07, "loss": 0.9967, "rewards/accuracies": 0.28125, "rewards/chosen": 0.0007781982421875, "rewards/margins": 0.00080108642578125, "rewards/rejected": -2.288818359375e-05, "step": 23 }, { "agreement_weights/mean": 0.9898564219474792, "agreement_weights/std": 0.00092642119852826, "epoch": 0.02564787603526583, "eta/annotator_0": 0.9900156259536743, "grad_norm": 72.74736054898526, "learning_rate": 1.7872340425531912e-07, "loss": 1.0019, "rewards/accuracies": 0.265625, "rewards/chosen": -0.00042724609375, "rewards/margins": -0.00057220458984375, "rewards/rejected": 0.00014495849609375, "step": 24 }, { "agreement_weights/mean": 0.9898624420166016, "agreement_weights/std": 0.0007507300470024347, "epoch": 0.026716537536735238, "eta/annotator_0": 0.9900100827217102, "grad_norm": 39.170509696147406, "learning_rate": 1.8617021276595742e-07, "loss": 1.0024, "rewards/accuracies": 0.328125, "rewards/chosen": -0.0013675689697265625, "rewards/margins": -0.0007534027099609375, "rewards/rejected": -0.000614166259765625, "step": 25 }, { "agreement_weights/mean": 0.9899629354476929, "agreement_weights/std": 0.00040288284071721137, "epoch": 0.027785199038204648, "eta/annotator_0": 0.9899933934211731, "grad_norm": 33.663272960880136, "learning_rate": 1.9361702127659575e-07, "loss": 0.9977, "rewards/accuracies": 0.25, "rewards/chosen": -0.000560760498046875, "rewards/margins": 0.000392913818359375, "rewards/rejected": -0.00095367431640625, "step": 26 }, { "agreement_weights/mean": 0.9899723529815674, "agreement_weights/std": 0.00034520504414103925, "epoch": 0.028853860539674057, "eta/annotator_0": 0.9899933934211731, "grad_norm": 75.3825892061179, "learning_rate": 2.0106382978723406e-07, "loss": 0.9975, "rewards/accuracies": 0.328125, "rewards/chosen": 0.00057220458984375, "rewards/margins": 0.0003814697265625, "rewards/rejected": 0.00019073486328125, "step": 27 }, { "agreement_weights/mean": 0.989926815032959, "agreement_weights/std": 0.0003286846331320703, "epoch": 0.029922522041143467, "eta/annotator_0": 0.9899930357933044, "grad_norm": 40.15336072718543, "learning_rate": 2.085106382978723e-07, "loss": 0.9996, "rewards/accuracies": 0.28125, "rewards/chosen": -0.00058746337890625, "rewards/margins": -9.1552734375e-05, "rewards/rejected": -0.00049591064453125, "step": 28 }, { "agreement_weights/mean": 0.9899588823318481, "agreement_weights/std": 0.0006131303962320089, "epoch": 0.030991183542612877, "eta/annotator_0": 0.9899929165840149, "grad_norm": 39.072963289318054, "learning_rate": 2.1595744680851064e-07, "loss": 0.9973, "rewards/accuracies": 0.34375, "rewards/chosen": -0.00016021728515625, "rewards/margins": 0.0005340576171875, "rewards/rejected": -0.00069427490234375, "step": 29 }, { "agreement_weights/mean": 0.9900150299072266, "agreement_weights/std": 0.0006856718682684004, "epoch": 0.03205984504408229, "eta/annotator_0": 0.9899987578392029, "grad_norm": 52.413795513793985, "learning_rate": 2.2340425531914894e-07, "loss": 0.9942, "rewards/accuracies": 0.34375, "rewards/chosen": -0.00058746337890625, "rewards/margins": 0.00131988525390625, "rewards/rejected": -0.0019073486328125, "step": 30 }, { "agreement_weights/mean": 0.9900026917457581, "agreement_weights/std": 0.00031494026188738644, "epoch": 0.033128506545551696, "eta/annotator_0": 0.9900162220001221, "grad_norm": 33.57557195875846, "learning_rate": 2.308510638297872e-07, "loss": 0.9965, "rewards/accuracies": 0.3125, "rewards/chosen": 0.000335693359375, "rewards/margins": 0.00066375732421875, "rewards/rejected": -0.00032806396484375, "step": 31 }, { "agreement_weights/mean": 0.990012526512146, "agreement_weights/std": 0.0014336255844682455, "epoch": 0.03419716804702111, "eta/annotator_0": 0.9900162220001221, "grad_norm": 107.98924371712907, "learning_rate": 2.3829787234042553e-07, "loss": 0.9912, "rewards/accuracies": 0.34375, "rewards/chosen": 0.0027923583984375, "rewards/margins": 0.0025177001953125, "rewards/rejected": 0.000274658203125, "step": 32 }, { "agreement_weights/mean": 0.9900907278060913, "agreement_weights/std": 0.00039227932575158775, "epoch": 0.035265829548490515, "eta/annotator_0": 0.9900199770927429, "grad_norm": 63.74026740167272, "learning_rate": 2.4574468085106383e-07, "loss": 0.9921, "rewards/accuracies": 0.296875, "rewards/chosen": 0.00020599365234375, "rewards/margins": 0.001800537109375, "rewards/rejected": -0.00159454345703125, "step": 33 }, { "agreement_weights/mean": 0.9895988702774048, "agreement_weights/std": 0.0017273675184696913, "epoch": 0.03633449104995993, "eta/annotator_0": 0.9900212287902832, "grad_norm": 126.58313311165088, "learning_rate": 2.5319148936170213e-07, "loss": 1.0081, "rewards/accuracies": 0.40625, "rewards/chosen": 0.0002593994140625, "rewards/margins": -0.00177001953125, "rewards/rejected": 0.00203704833984375, "step": 34 }, { "agreement_weights/mean": 0.9897785782814026, "agreement_weights/std": 0.0009952880209311843, "epoch": 0.037403152551429335, "eta/annotator_0": 0.9899922609329224, "grad_norm": 72.0563270259774, "learning_rate": 2.606382978723404e-07, "loss": 1.0041, "rewards/accuracies": 0.375, "rewards/chosen": -0.002685546875, "rewards/margins": -0.00098419189453125, "rewards/rejected": -0.001708984375, "step": 35 }, { "agreement_weights/mean": 0.9899382591247559, "agreement_weights/std": 0.0003099889145232737, "epoch": 0.03847181405289874, "eta/annotator_0": 0.9899053573608398, "grad_norm": 46.22751124511231, "learning_rate": 2.680851063829787e-07, "loss": 0.9991, "rewards/accuracies": 0.265625, "rewards/chosen": 0.000152587890625, "rewards/margins": -2.288818359375e-05, "rewards/rejected": 0.00017547607421875, "step": 36 }, { "agreement_weights/mean": 0.9898942708969116, "agreement_weights/std": 0.00037811638321727514, "epoch": 0.039540475554368154, "eta/annotator_0": 0.9899053573608398, "grad_norm": 62.76650358163003, "learning_rate": 2.75531914893617e-07, "loss": 1.0008, "rewards/accuracies": 0.25, "rewards/chosen": -0.00109100341796875, "rewards/margins": -0.00048828125, "rewards/rejected": -0.00060272216796875, "step": 37 }, { "agreement_weights/mean": 0.9900622367858887, "agreement_weights/std": 0.0007023935904726386, "epoch": 0.04060913705583756, "eta/annotator_0": 0.9899024963378906, "grad_norm": 42.246334717222155, "learning_rate": 2.829787234042553e-07, "loss": 0.9905, "rewards/accuracies": 0.390625, "rewards/chosen": -0.0034027099609375, "rewards/margins": 0.00244903564453125, "rewards/rejected": -0.00585174560546875, "step": 38 }, { "agreement_weights/mean": 0.9900236129760742, "agreement_weights/std": 0.00034342246362939477, "epoch": 0.04167779855730697, "eta/annotator_0": 0.9899015426635742, "grad_norm": 22.188043648107858, "learning_rate": 2.904255319148936e-07, "loss": 0.9942, "rewards/accuracies": 0.46875, "rewards/chosen": -0.00016021728515625, "rewards/margins": 0.00145721435546875, "rewards/rejected": -0.001617431640625, "step": 39 }, { "agreement_weights/mean": 0.9896913170814514, "agreement_weights/std": 0.0012575883883982897, "epoch": 0.04274646005877638, "eta/annotator_0": 0.9899015426635742, "grad_norm": 57.183287147820344, "learning_rate": 2.978723404255319e-07, "loss": 1.0065, "rewards/accuracies": 0.359375, "rewards/chosen": -0.0028228759765625, "rewards/margins": -0.00160980224609375, "rewards/rejected": -0.00121307373046875, "step": 40 }, { "agreement_weights/mean": 0.9897152185440063, "agreement_weights/std": 0.0011086603626608849, "epoch": 0.04381512156024579, "eta/annotator_0": 0.9899015426635742, "grad_norm": 76.78377750813148, "learning_rate": 3.053191489361702e-07, "loss": 1.0077, "rewards/accuracies": 0.28125, "rewards/chosen": -0.00390625, "rewards/margins": -0.0019683837890625, "rewards/rejected": -0.0019378662109375, "step": 41 }, { "agreement_weights/mean": 0.9900558590888977, "agreement_weights/std": 0.0004379412275739014, "epoch": 0.0448837830617152, "eta/annotator_0": 0.9899015426635742, "grad_norm": 23.96319239572694, "learning_rate": 3.1276595744680846e-07, "loss": 0.9929, "rewards/accuracies": 0.375, "rewards/chosen": -0.00060272216796875, "rewards/margins": 0.0016937255859375, "rewards/rejected": -0.00229644775390625, "step": 42 }, { "agreement_weights/mean": 0.9901957511901855, "agreement_weights/std": 0.0009078345610760152, "epoch": 0.04595244456318461, "eta/annotator_0": 0.9898568391799927, "grad_norm": 71.12997869882813, "learning_rate": 3.2021276595744677e-07, "loss": 0.9842, "rewards/accuracies": 0.421875, "rewards/chosen": -5.7220458984375e-05, "rewards/margins": 0.004058837890625, "rewards/rejected": -0.00411224365234375, "step": 43 }, { "agreement_weights/mean": 0.9898695349693298, "agreement_weights/std": 0.0003736003418453038, "epoch": 0.04702110606465402, "eta/annotator_0": 0.9898419976234436, "grad_norm": 20.073812786769068, "learning_rate": 3.276595744680851e-07, "loss": 1.0019, "rewards/accuracies": 0.3125, "rewards/chosen": -0.001678466796875, "rewards/margins": -0.0006256103515625, "rewards/rejected": -0.0010528564453125, "step": 44 }, { "agreement_weights/mean": 0.9897381067276001, "agreement_weights/std": 0.0012365051079541445, "epoch": 0.04808976756612343, "eta/annotator_0": 0.9898409843444824, "grad_norm": 50.1223688296678, "learning_rate": 3.3510638297872343e-07, "loss": 1.0034, "rewards/accuracies": 0.453125, "rewards/chosen": -0.00246429443359375, "rewards/margins": -0.00077056884765625, "rewards/rejected": -0.0016937255859375, "step": 45 }, { "agreement_weights/mean": 0.9894956350326538, "agreement_weights/std": 0.0021385198924690485, "epoch": 0.04915842906759284, "eta/annotator_0": 0.9898379445075989, "grad_norm": 90.41510736271331, "learning_rate": 3.425531914893617e-07, "loss": 1.0123, "rewards/accuracies": 0.328125, "rewards/chosen": -0.004245758056640625, "rewards/margins": -0.003154754638671875, "rewards/rejected": -0.0010986328125, "step": 46 }, { "agreement_weights/mean": 0.9897025227546692, "agreement_weights/std": 0.001183731248602271, "epoch": 0.05022709056906225, "eta/annotator_0": 0.9898379445075989, "grad_norm": 89.5943558013204, "learning_rate": 3.5e-07, "loss": 1.0061, "rewards/accuracies": 0.390625, "rewards/chosen": -0.00395965576171875, "rewards/margins": -0.001495361328125, "rewards/rejected": -0.00246429443359375, "step": 47 }, { "agreement_weights/mean": 0.9899817109107971, "agreement_weights/std": 0.0007238064426928759, "epoch": 0.05129575207053166, "eta/annotator_0": 0.9898309111595154, "grad_norm": 54.465247780577116, "learning_rate": 3.5744680851063824e-07, "loss": 0.9943, "rewards/accuracies": 0.203125, "rewards/chosen": -0.0003204345703125, "rewards/margins": 0.001220703125, "rewards/rejected": -0.0015411376953125, "step": 48 }, { "agreement_weights/mean": 0.9899741411209106, "agreement_weights/std": 0.0003807473403867334, "epoch": 0.05236441357200107, "eta/annotator_0": 0.9898285269737244, "grad_norm": 17.046384807316343, "learning_rate": 3.648936170212766e-07, "loss": 0.9949, "rewards/accuracies": 0.40625, "rewards/chosen": -0.001617431640625, "rewards/margins": 0.00101470947265625, "rewards/rejected": -0.00263214111328125, "step": 49 }, { "agreement_weights/mean": 0.9899731874465942, "agreement_weights/std": 0.0006330495816655457, "epoch": 0.053433075073470476, "eta/annotator_0": 0.98982834815979, "grad_norm": 27.053424803109746, "learning_rate": 3.7234042553191484e-07, "loss": 0.9953, "rewards/accuracies": 0.375, "rewards/chosen": -0.00304412841796875, "rewards/margins": 0.00116729736328125, "rewards/rejected": -0.00421905517578125, "step": 50 }, { "agreement_weights/mean": 0.9901506900787354, "agreement_weights/std": 0.0011448926525190473, "epoch": 0.05450173657493989, "eta/annotator_0": 0.9898279905319214, "grad_norm": 68.31383469233803, "learning_rate": 3.7978723404255315e-07, "loss": 0.9824, "rewards/accuracies": 0.359375, "rewards/chosen": -0.00226593017578125, "rewards/margins": 0.00482940673828125, "rewards/rejected": -0.00708770751953125, "step": 51 }, { "agreement_weights/mean": 0.9899532794952393, "agreement_weights/std": 0.0004044414381496608, "epoch": 0.055570398076409296, "eta/annotator_0": 0.9898279905319214, "grad_norm": 28.73356347358311, "learning_rate": 3.872340425531915e-07, "loss": 0.997, "rewards/accuracies": 0.359375, "rewards/chosen": -0.00299835205078125, "rewards/margins": 0.000530242919921875, "rewards/rejected": -0.00353240966796875, "step": 52 }, { "agreement_weights/mean": 0.989941418170929, "agreement_weights/std": 0.0009140140027739108, "epoch": 0.05663905957787871, "eta/annotator_0": 0.9898361563682556, "grad_norm": 52.26315152319984, "learning_rate": 3.9468085106382976e-07, "loss": 0.9958, "rewards/accuracies": 0.375, "rewards/chosen": -0.00261688232421875, "rewards/margins": 0.0008392333984375, "rewards/rejected": -0.00345611572265625, "step": 53 }, { "agreement_weights/mean": 0.9900424480438232, "agreement_weights/std": 0.0009696033084765077, "epoch": 0.057707721079348115, "eta/annotator_0": 0.9898388981819153, "grad_norm": 46.29011481762137, "learning_rate": 4.021276595744681e-07, "loss": 0.9911, "rewards/accuracies": 0.453125, "rewards/chosen": 0.0003509521484375, "rewards/margins": 0.00225830078125, "rewards/rejected": -0.0019073486328125, "step": 54 }, { "agreement_weights/mean": 0.9899663329124451, "agreement_weights/std": 0.0012657021870836616, "epoch": 0.05877638258081753, "eta/annotator_0": 0.9898437857627869, "grad_norm": 29.543340897786656, "learning_rate": 4.0957446808510637e-07, "loss": 0.9916, "rewards/accuracies": 0.375, "rewards/chosen": 0.00146484375, "rewards/margins": 0.0024261474609375, "rewards/rejected": -0.00095367431640625, "step": 55 }, { "agreement_weights/mean": 0.9897550344467163, "agreement_weights/std": 0.0018292663153260946, "epoch": 0.059845044082286934, "eta/annotator_0": 0.9898583889007568, "grad_norm": 36.718388503814865, "learning_rate": 4.170212765957446e-07, "loss": 0.9997, "rewards/accuracies": 0.453125, "rewards/chosen": -0.00136566162109375, "rewards/margins": 0.000461578369140625, "rewards/rejected": -0.001827239990234375, "step": 56 }, { "agreement_weights/mean": 0.9898387789726257, "agreement_weights/std": 0.0011744822841137648, "epoch": 0.06091370558375635, "eta/annotator_0": 0.9898583889007568, "grad_norm": 93.86010993347404, "learning_rate": 4.24468085106383e-07, "loss": 0.9998, "rewards/accuracies": 0.5, "rewards/chosen": 0.00232696533203125, "rewards/margins": 0.00011444091796875, "rewards/rejected": 0.00220489501953125, "step": 57 }, { "agreement_weights/mean": 0.9808279275894165, "agreement_weights/std": 0.03611587733030319, "epoch": 0.061982367085225754, "eta/annotator_0": 0.9898730516433716, "grad_norm": 55.23549065125804, "learning_rate": 4.319148936170213e-07, "loss": 1.0185, "rewards/accuracies": 0.46875, "rewards/chosen": -0.0116119384765625, "rewards/margins": -0.0096282958984375, "rewards/rejected": -0.00197601318359375, "step": 58 }, { "agreement_weights/mean": 0.9897713661193848, "agreement_weights/std": 0.0014195613330230117, "epoch": 0.06305102858669516, "eta/annotator_0": 0.9898778796195984, "grad_norm": 19.4315557130068, "learning_rate": 4.3936170212765953e-07, "loss": 0.9854, "rewards/accuracies": 0.390625, "rewards/chosen": -0.00241851806640625, "rewards/margins": 0.00384521484375, "rewards/rejected": -0.00626373291015625, "step": 59 }, { "agreement_weights/mean": 0.98909592628479, "agreement_weights/std": 0.004107869230210781, "epoch": 0.06411969008816458, "eta/annotator_0": 0.9898331761360168, "grad_norm": 84.1990330407417, "learning_rate": 4.468085106382979e-07, "loss": 0.996, "rewards/accuracies": 0.375, "rewards/chosen": -0.00146484375, "rewards/margins": 0.002349853515625, "rewards/rejected": -0.0037994384765625, "step": 60 }, { "agreement_weights/mean": 0.9893831014633179, "agreement_weights/std": 0.0023015947081148624, "epoch": 0.06518835158963399, "eta/annotator_0": 0.9896991848945618, "grad_norm": 76.8786076582835, "learning_rate": 4.5425531914893614e-07, "loss": 0.9992, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0048370361328125, "rewards/margins": 0.0007476806640625, "rewards/rejected": -0.00557708740234375, "step": 61 }, { "agreement_weights/mean": 0.9893826246261597, "agreement_weights/std": 0.002007419476285577, "epoch": 0.06625701309110339, "eta/annotator_0": 0.9896991848945618, "grad_norm": 43.149711545780264, "learning_rate": 4.617021276595744e-07, "loss": 1.0014, "rewards/accuracies": 0.4375, "rewards/chosen": -0.00732421875, "rewards/margins": -0.0003204345703125, "rewards/rejected": -0.0070037841796875, "step": 62 }, { "agreement_weights/mean": 0.9889684915542603, "agreement_weights/std": 0.003849944332614541, "epoch": 0.0673256745925728, "eta/annotator_0": 0.9896953105926514, "grad_norm": 78.75613193930528, "learning_rate": 4.6914893617021275e-07, "loss": 1.0053, "rewards/accuracies": 0.34375, "rewards/chosen": -0.00150299072265625, "rewards/margins": -0.00122833251953125, "rewards/rejected": -0.0002593994140625, "step": 63 }, { "agreement_weights/mean": 0.9897327423095703, "agreement_weights/std": 0.0018563305493444204, "epoch": 0.06839433609404222, "eta/annotator_0": 0.9896939992904663, "grad_norm": 28.472868824366806, "learning_rate": 4.7659574468085105e-07, "loss": 0.9832, "rewards/accuracies": 0.4375, "rewards/chosen": -0.00478363037109375, "rewards/margins": 0.00479888916015625, "rewards/rejected": -0.00958251953125, "step": 64 }, { "agreement_weights/mean": 0.9896204471588135, "agreement_weights/std": 0.0013754473766312003, "epoch": 0.06946299759551162, "eta/annotator_0": 0.9896970987319946, "grad_norm": 42.965236730655015, "learning_rate": 4.840425531914894e-07, "loss": 0.9928, "rewards/accuracies": 0.40625, "rewards/chosen": -0.00040435791015625, "rewards/margins": 0.00180816650390625, "rewards/rejected": -0.00220489501953125, "step": 65 }, { "agreement_weights/mean": 0.9893931150436401, "agreement_weights/std": 0.001825765473768115, "epoch": 0.07053165909698103, "eta/annotator_0": 0.9897064566612244, "grad_norm": 58.8709716049148, "learning_rate": 4.914893617021277e-07, "loss": 1.0014, "rewards/accuracies": 0.40625, "rewards/chosen": -0.00574493408203125, "rewards/margins": -0.00049591064453125, "rewards/rejected": -0.0052490234375, "step": 66 }, { "agreement_weights/mean": 0.9886138439178467, "agreement_weights/std": 0.004755516070872545, "epoch": 0.07160032059845044, "eta/annotator_0": 0.9897064566612244, "grad_norm": 94.48984812598592, "learning_rate": 4.989361702127659e-07, "loss": 1.0147, "rewards/accuracies": 0.421875, "rewards/chosen": -0.0118865966796875, "rewards/margins": -0.0028228759765625, "rewards/rejected": -0.0090484619140625, "step": 67 }, { "agreement_weights/mean": 0.9897629022598267, "agreement_weights/std": 0.001915637869387865, "epoch": 0.07266898209991986, "eta/annotator_0": 0.9896971583366394, "grad_norm": 24.382132577502926, "learning_rate": 5.063829787234043e-07, "loss": 0.9739, "rewards/accuracies": 0.390625, "rewards/chosen": 0.012603759765625, "rewards/margins": 0.01081085205078125, "rewards/rejected": 0.00180816650390625, "step": 68 }, { "agreement_weights/mean": 0.9897735118865967, "agreement_weights/std": 0.0014363240916281939, "epoch": 0.07373764360138926, "eta/annotator_0": 0.9896940588951111, "grad_norm": 41.93082205917433, "learning_rate": 5.138297872340425e-07, "loss": 0.983, "rewards/accuracies": 0.4375, "rewards/chosen": 0.00310516357421875, "rewards/margins": 0.00441741943359375, "rewards/rejected": -0.001312255859375, "step": 69 }, { "agreement_weights/mean": 0.9896574020385742, "agreement_weights/std": 0.0015951944515109062, "epoch": 0.07480630510285867, "eta/annotator_0": 0.9897163510322571, "grad_norm": 23.021026348336743, "learning_rate": 5.212765957446808e-07, "loss": 0.9866, "rewards/accuracies": 0.359375, "rewards/chosen": -0.00482177734375, "rewards/margins": 0.00359344482421875, "rewards/rejected": -0.00839996337890625, "step": 70 }, { "agreement_weights/mean": 0.983590304851532, "agreement_weights/std": 0.025207938626408577, "epoch": 0.07587496660432808, "eta/annotator_0": 0.9897833466529846, "grad_norm": 105.86639850981466, "learning_rate": 5.287234042553191e-07, "loss": 1.0135, "rewards/accuracies": 0.359375, "rewards/chosen": -0.0087432861328125, "rewards/margins": -0.00308990478515625, "rewards/rejected": -0.0056610107421875, "step": 71 }, { "agreement_weights/mean": 0.9899378418922424, "agreement_weights/std": 0.0017958201933652163, "epoch": 0.07694362810579748, "eta/annotator_0": 0.9897833466529846, "grad_norm": 33.47696648200954, "learning_rate": 5.361702127659574e-07, "loss": 0.9711, "rewards/accuracies": 0.375, "rewards/chosen": -0.000640869140625, "rewards/margins": 0.008941650390625, "rewards/rejected": -0.0095672607421875, "step": 72 }, { "agreement_weights/mean": 0.9892181158065796, "agreement_weights/std": 0.003311418928205967, "epoch": 0.0780122896072669, "eta/annotator_0": 0.9897149801254272, "grad_norm": 55.398820475942394, "learning_rate": 5.436170212765957e-07, "loss": 0.991, "rewards/accuracies": 0.40625, "rewards/chosen": -0.0207061767578125, "rewards/margins": 0.0035247802734375, "rewards/rejected": -0.02423095703125, "step": 73 }, { "agreement_weights/mean": 0.9894064664840698, "agreement_weights/std": 0.0024716465268284082, "epoch": 0.07908095110873631, "eta/annotator_0": 0.9896921515464783, "grad_norm": 18.52027243434468, "learning_rate": 5.51063829787234e-07, "loss": 0.9799, "rewards/accuracies": 0.453125, "rewards/chosen": -0.0110626220703125, "rewards/margins": 0.0071868896484375, "rewards/rejected": -0.0182952880859375, "step": 74 }, { "agreement_weights/mean": 0.9895154237747192, "agreement_weights/std": 0.0029417339246720076, "epoch": 0.08014961261020571, "eta/annotator_0": 0.9897063970565796, "grad_norm": 52.6662464985199, "learning_rate": 5.585106382978723e-07, "loss": 0.9672, "rewards/accuracies": 0.4375, "rewards/chosen": 0.0002593994140625, "rewards/margins": 0.01216888427734375, "rewards/rejected": -0.0118865966796875, "step": 75 }, { "agreement_weights/mean": 0.9893014430999756, "agreement_weights/std": 0.003673410974442959, "epoch": 0.08121827411167512, "eta/annotator_0": 0.9897491931915283, "grad_norm": 32.481081687707025, "learning_rate": 5.659574468085106e-07, "loss": 0.9761, "rewards/accuracies": 0.5, "rewards/chosen": -0.00151824951171875, "rewards/margins": 0.010589599609375, "rewards/rejected": -0.0120849609375, "step": 76 }, { "agreement_weights/mean": 0.9813879728317261, "agreement_weights/std": 0.032393928617239, "epoch": 0.08228693561314454, "eta/annotator_0": 0.9897491931915283, "grad_norm": 71.1968290222655, "learning_rate": 5.73404255319149e-07, "loss": 1.0212, "rewards/accuracies": 0.40625, "rewards/chosen": -0.009124755859375, "rewards/margins": -0.00930023193359375, "rewards/rejected": 0.00018310546875, "step": 77 }, { "agreement_weights/mean": 0.9891752600669861, "agreement_weights/std": 0.0018004082376137376, "epoch": 0.08335559711461395, "eta/annotator_0": 0.9897647500038147, "grad_norm": 26.565448780660965, "learning_rate": 5.808510638297872e-07, "loss": 0.9919, "rewards/accuracies": 0.390625, "rewards/chosen": -0.0093841552734375, "rewards/margins": 0.002529144287109375, "rewards/rejected": -0.01192474365234375, "step": 78 }, { "agreement_weights/mean": 0.9759947657585144, "agreement_weights/std": 0.052200593054294586, "epoch": 0.08442425861608335, "eta/annotator_0": 0.9897699952125549, "grad_norm": 142.49520192325986, "learning_rate": 5.882978723404256e-07, "loss": 1.0226, "rewards/accuracies": 0.40625, "rewards/chosen": -0.0325775146484375, "rewards/margins": -0.01456451416015625, "rewards/rejected": -0.0178985595703125, "step": 79 }, { "agreement_weights/mean": 0.988405704498291, "agreement_weights/std": 0.004539561457931995, "epoch": 0.08549292011755276, "eta/annotator_0": 0.9897250533103943, "grad_norm": 57.04828448319423, "learning_rate": 5.957446808510638e-07, "loss": 1.0047, "rewards/accuracies": 0.453125, "rewards/chosen": -0.02117919921875, "rewards/margins": 0.0096435546875, "rewards/rejected": -0.0308380126953125, "step": 80 }, { "agreement_weights/mean": 0.9883245229721069, "agreement_weights/std": 0.0031945309601724148, "epoch": 0.08656158161902218, "eta/annotator_0": 0.9895901083946228, "grad_norm": 28.00722402918344, "learning_rate": 6.031914893617021e-07, "loss": 0.9964, "rewards/accuracies": 0.40625, "rewards/chosen": -0.0053253173828125, "rewards/margins": 0.0021514892578125, "rewards/rejected": -0.0074920654296875, "step": 81 }, { "agreement_weights/mean": 0.9891558885574341, "agreement_weights/std": 0.003422787878662348, "epoch": 0.08763024312049159, "eta/annotator_0": 0.9895901083946228, "grad_norm": 39.89228775614616, "learning_rate": 6.106382978723404e-07, "loss": 0.9495, "rewards/accuracies": 0.5, "rewards/chosen": 0.03887939453125, "rewards/margins": 0.01580810546875, "rewards/rejected": 0.0229644775390625, "step": 82 }, { "agreement_weights/mean": 0.9885740280151367, "agreement_weights/std": 0.00509039917960763, "epoch": 0.08869890462196099, "eta/annotator_0": 0.9895726442337036, "grad_norm": 49.17402959454627, "learning_rate": 6.180851063829787e-07, "loss": 0.9568, "rewards/accuracies": 0.5, "rewards/chosen": -0.00215911865234375, "rewards/margins": 0.0206756591796875, "rewards/rejected": -0.022796630859375, "step": 83 }, { "agreement_weights/mean": 0.9888392686843872, "agreement_weights/std": 0.0029701469466090202, "epoch": 0.0897675661234304, "eta/annotator_0": 0.9895669221878052, "grad_norm": 22.447405608958412, "learning_rate": 6.255319148936169e-07, "loss": 0.9732, "rewards/accuracies": 0.546875, "rewards/chosen": -0.00646209716796875, "rewards/margins": 0.008453369140625, "rewards/rejected": -0.01494598388671875, "step": 84 }, { "agreement_weights/mean": 0.989124596118927, "agreement_weights/std": 0.0031790873035788536, "epoch": 0.09083622762489982, "eta/annotator_0": 0.9895716905593872, "grad_norm": 30.567881393224674, "learning_rate": 6.329787234042553e-07, "loss": 0.9543, "rewards/accuracies": 0.421875, "rewards/chosen": 0.01953887939453125, "rewards/margins": 0.02001953125, "rewards/rejected": -0.0004425048828125, "step": 85 }, { "agreement_weights/mean": 0.9883219003677368, "agreement_weights/std": 0.004210181068629026, "epoch": 0.09190488912636922, "eta/annotator_0": 0.9895859360694885, "grad_norm": 64.22440556659711, "learning_rate": 6.404255319148935e-07, "loss": 0.9961, "rewards/accuracies": 0.484375, "rewards/chosen": 0.0287933349609375, "rewards/margins": 0.0150604248046875, "rewards/rejected": 0.0137176513671875, "step": 86 }, { "agreement_weights/mean": 0.9881447553634644, "agreement_weights/std": 0.004617498256266117, "epoch": 0.09297355062783863, "eta/annotator_0": 0.9895859360694885, "grad_norm": 39.48462687219018, "learning_rate": 6.478723404255319e-07, "loss": 0.9977, "rewards/accuracies": 0.5, "rewards/chosen": -0.00131988525390625, "rewards/margins": 0.00220489501953125, "rewards/rejected": -0.003509521484375, "step": 87 }, { "agreement_weights/mean": 0.9806337356567383, "agreement_weights/std": 0.03905501216650009, "epoch": 0.09404221212930804, "eta/annotator_0": 0.9895502328872681, "grad_norm": 118.09576512875286, "learning_rate": 6.553191489361702e-07, "loss": 0.9948, "rewards/accuracies": 0.5, "rewards/chosen": 0.10650634765625, "rewards/margins": 0.05816650390625, "rewards/rejected": 0.0481719970703125, "step": 88 }, { "agreement_weights/mean": 0.9889658093452454, "agreement_weights/std": 0.0030485575553029776, "epoch": 0.09511087363077746, "eta/annotator_0": 0.989538311958313, "grad_norm": 20.989799968839492, "learning_rate": 6.627659574468085e-07, "loss": 0.9536, "rewards/accuracies": 0.5625, "rewards/chosen": 0.0105743408203125, "rewards/margins": 0.01535797119140625, "rewards/rejected": -0.0047607421875, "step": 89 }, { "agreement_weights/mean": 0.9874480962753296, "agreement_weights/std": 0.006730552297085524, "epoch": 0.09617953513224686, "eta/annotator_0": 0.9895597696304321, "grad_norm": 36.776069148324886, "learning_rate": 6.702127659574469e-07, "loss": 0.9891, "rewards/accuracies": 0.53125, "rewards/chosen": 0.0055694580078125, "rewards/margins": 0.00555419921875, "rewards/rejected": 7.62939453125e-06, "step": 90 }, { "agreement_weights/mean": 0.9785319566726685, "agreement_weights/std": 0.041218649595975876, "epoch": 0.09724819663371627, "eta/annotator_0": 0.9896240234375, "grad_norm": 70.35362076885963, "learning_rate": 6.776595744680851e-07, "loss": 1.0055, "rewards/accuracies": 0.546875, "rewards/chosen": 0.0266265869140625, "rewards/margins": 0.005279541015625, "rewards/rejected": 0.0213775634765625, "step": 91 }, { "agreement_weights/mean": 0.9882097244262695, "agreement_weights/std": 0.0036157267168164253, "epoch": 0.09831685813518568, "eta/annotator_0": 0.9896240234375, "grad_norm": 19.373931581035162, "learning_rate": 6.851063829787234e-07, "loss": 0.9873, "rewards/accuracies": 0.484375, "rewards/chosen": 0.00940704345703125, "rewards/margins": 0.0039520263671875, "rewards/rejected": 0.005466461181640625, "step": 92 }, { "agreement_weights/mean": 0.9883501529693604, "agreement_weights/std": 0.0029321881011128426, "epoch": 0.0993855196366551, "eta/annotator_0": 0.9896079301834106, "grad_norm": 60.02694837629806, "learning_rate": 6.925531914893617e-07, "loss": 1.0155, "rewards/accuracies": 0.5, "rewards/chosen": 0.00376129150390625, "rewards/margins": 0.02262115478515625, "rewards/rejected": -0.01877593994140625, "step": 93 }, { "agreement_weights/mean": 0.9880050420761108, "agreement_weights/std": 0.002709168242290616, "epoch": 0.1004541811381245, "eta/annotator_0": 0.9896026253700256, "grad_norm": 24.410472583470572, "learning_rate": 7e-07, "loss": 0.9853, "rewards/accuracies": 0.40625, "rewards/chosen": 0.03260040283203125, "rewards/margins": 0.0124969482421875, "rewards/rejected": 0.02010345458984375, "step": 94 }, { "agreement_weights/mean": 0.987248420715332, "agreement_weights/std": 0.00519372196868062, "epoch": 0.10152284263959391, "eta/annotator_0": 0.9896036982536316, "grad_norm": 40.18916154388461, "learning_rate": 6.999975580020363e-07, "loss": 0.992, "rewards/accuracies": 0.4375, "rewards/chosen": 0.0093536376953125, "rewards/margins": 0.00356292724609375, "rewards/rejected": 0.00579071044921875, "step": 95 }, { "agreement_weights/mean": 0.9876818060874939, "agreement_weights/std": 0.005025346763432026, "epoch": 0.10259150414106331, "eta/annotator_0": 0.9896069169044495, "grad_norm": 42.81082584784238, "learning_rate": 6.999902320422217e-07, "loss": 0.9772, "rewards/accuracies": 0.484375, "rewards/chosen": 0.00665283203125, "rewards/margins": 0.008453369140625, "rewards/rejected": -0.001800537109375, "step": 96 }, { "agreement_weights/mean": 0.9878517389297485, "agreement_weights/std": 0.0029517817310988903, "epoch": 0.10366016564253273, "eta/annotator_0": 0.9896069169044495, "grad_norm": 13.276369612112703, "learning_rate": 6.999780222227845e-07, "loss": 0.9862, "rewards/accuracies": 0.5625, "rewards/chosen": -0.00859832763671875, "rewards/margins": 0.004791259765625, "rewards/rejected": -0.0134124755859375, "step": 97 }, { "agreement_weights/mean": 0.988380491733551, "agreement_weights/std": 0.003354697022587061, "epoch": 0.10472882714400214, "eta/annotator_0": 0.9894604086875916, "grad_norm": 43.82198652149129, "learning_rate": 6.999609287141039e-07, "loss": 1.0002, "rewards/accuracies": 0.5625, "rewards/chosen": -0.00591278076171875, "rewards/margins": 0.03162384033203125, "rewards/rejected": -0.037567138671875, "step": 98 }, { "agreement_weights/mean": 0.9887106418609619, "agreement_weights/std": 0.003205983666703105, "epoch": 0.10579748864547155, "eta/annotator_0": 0.9894115328788757, "grad_norm": 57.855354652702985, "learning_rate": 6.999389517547075e-07, "loss": 0.9611, "rewards/accuracies": 0.578125, "rewards/chosen": -0.00177001953125, "rewards/margins": 0.03125, "rewards/rejected": -0.032985687255859375, "step": 99 }, { "agreement_weights/mean": 0.9878472685813904, "agreement_weights/std": 0.004612836055457592, "epoch": 0.10686615014694095, "eta/annotator_0": 0.9894112348556519, "grad_norm": 36.86398442391538, "learning_rate": 6.99912091651268e-07, "loss": 0.9677, "rewards/accuracies": 0.53125, "rewards/chosen": 0.00536346435546875, "rewards/margins": 0.0120086669921875, "rewards/rejected": -0.006683349609375, "step": 100 }, { "epoch": 0.10686615014694095, "eta/annotator_0": 0.9893665909767151, "eval_agreement_weights/mean": 0.9804137945175171, "eval_agreement_weights/std": 0.025761015713214874, "eval_loss": 0.979069709777832, "eval_rewards/accuracies": 0.5265920758247375, "eval_rewards/chosen": 0.0080897631123662, "eval_rewards/margins": 0.008312069810926914, "eval_rewards/rejected": -0.0002275172300869599, "eval_runtime": 134.0726, "eval_samples_per_second": 14.626, "eval_steps_per_second": 0.917, "step": 100 }, { "agreement_weights/mean": 0.9662894606590271, "agreement_weights/std": 0.062348511070013046, "epoch": 0.10793481164841037, "eta/annotator_0": 0.9851757287979126, "grad_norm": 83.72709747937553, "learning_rate": 6.99880348778598e-07, "loss": 0.9978, "rewards/accuracies": 0.5625, "rewards/chosen": 0.0037841796875, "rewards/margins": 0.00107574462890625, "rewards/rejected": 0.00263214111328125, "step": 101 }, { "agreement_weights/mean": 0.9801944494247437, "agreement_weights/std": 0.01785244792699814, "epoch": 0.10900347314987978, "eta/annotator_0": 0.9847947359085083, "grad_norm": 83.14523979094982, "learning_rate": 6.998437235796468e-07, "loss": 0.9605, "rewards/accuracies": 0.53125, "rewards/chosen": 0.018421173095703125, "rewards/margins": 0.048831939697265625, "rewards/rejected": -0.0303955078125, "step": 102 }, { "agreement_weights/mean": 0.9679893255233765, "agreement_weights/std": 0.06478627026081085, "epoch": 0.11007213465134918, "eta/annotator_0": 0.984413743019104, "grad_norm": 32.476031553025265, "learning_rate": 6.998022165654923e-07, "loss": 0.9185, "rewards/accuracies": 0.578125, "rewards/chosen": 0.01277923583984375, "rewards/margins": 0.01531982421875, "rewards/rejected": -0.002532958984375, "step": 103 }, { "agreement_weights/mean": 0.9808060526847839, "agreement_weights/std": 0.006545063573867083, "epoch": 0.11114079615281859, "eta/annotator_0": 0.984413743019104, "grad_norm": 28.503679572784957, "learning_rate": 6.997558283153348e-07, "loss": 0.9915, "rewards/accuracies": 0.5625, "rewards/chosen": 0.0056915283203125, "rewards/margins": 0.00298309326171875, "rewards/rejected": 0.00270843505859375, "step": 104 }, { "agreement_weights/mean": 0.9804457426071167, "agreement_weights/std": 0.0067315855994820595, "epoch": 0.112209457654288, "eta/annotator_0": 0.9845707416534424, "grad_norm": 18.949652986630017, "learning_rate": 6.997045594764886e-07, "loss": 0.9967, "rewards/accuracies": 0.53125, "rewards/chosen": -0.00113677978515625, "rewards/margins": 0.00138092041015625, "rewards/rejected": -0.0025177001953125, "step": 105 }, { "agreement_weights/mean": 0.9734346270561218, "agreement_weights/std": 0.035682037472724915, "epoch": 0.11327811915575742, "eta/annotator_0": 0.9845707416534424, "grad_norm": 42.500808349452235, "learning_rate": 6.99648410764373e-07, "loss": 0.9804, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0143890380859375, "rewards/margins": 0.0024261474609375, "rewards/rejected": -0.0167999267578125, "step": 106 }, { "agreement_weights/mean": 0.9772015810012817, "agreement_weights/std": 0.022044265642762184, "epoch": 0.11434678065722682, "eta/annotator_0": 0.983295202255249, "grad_norm": 28.997884821830134, "learning_rate": 6.995873829625028e-07, "loss": 0.9812, "rewards/accuracies": 0.46875, "rewards/chosen": 0.00457763671875, "rewards/margins": 0.0049896240234375, "rewards/rejected": -0.000396728515625, "step": 107 }, { "agreement_weights/mean": 0.9798094034194946, "agreement_weights/std": 0.006871323566883802, "epoch": 0.11541544215869623, "eta/annotator_0": 0.9820196628570557, "grad_norm": 26.423899277417984, "learning_rate": 6.995214769224766e-07, "loss": 1.0008, "rewards/accuracies": 0.546875, "rewards/chosen": -0.0122528076171875, "rewards/margins": 7.62939453125e-06, "rewards/rejected": -0.01226043701171875, "step": 108 }, { "agreement_weights/mean": 0.9785584211349487, "agreement_weights/std": 0.014845185913145542, "epoch": 0.11648410366016564, "eta/annotator_0": 0.9820196628570557, "grad_norm": 102.14727752998408, "learning_rate": 6.994506935639651e-07, "loss": 0.9887, "rewards/accuracies": 0.5, "rewards/chosen": -0.0064697265625, "rewards/margins": 0.0077056884765625, "rewards/rejected": -0.014190673828125, "step": 109 }, { "agreement_weights/mean": 0.9807953834533691, "agreement_weights/std": 0.0059119947254657745, "epoch": 0.11755276516163506, "eta/annotator_0": 0.9819704294204712, "grad_norm": 41.12511278682982, "learning_rate": 6.99375033874699e-07, "loss": 0.9727, "rewards/accuracies": 0.5, "rewards/chosen": -0.00640106201171875, "rewards/margins": 0.00823974609375, "rewards/rejected": -0.0146636962890625, "step": 110 }, { "agreement_weights/mean": 0.981022834777832, "agreement_weights/std": 0.006476962473243475, "epoch": 0.11862142666310446, "eta/annotator_0": 0.9819704294204712, "grad_norm": 31.76887704239339, "learning_rate": 6.992944989104542e-07, "loss": 0.9689, "rewards/accuracies": 0.53125, "rewards/chosen": -0.0141754150390625, "rewards/margins": 0.02175140380859375, "rewards/rejected": -0.03594970703125, "step": 111 }, { "agreement_weights/mean": 0.9810124039649963, "agreement_weights/std": 0.0059580872766673565, "epoch": 0.11969008816457387, "eta/annotator_0": 0.9819818735122681, "grad_norm": 255.76147218525867, "learning_rate": 6.992090897950376e-07, "loss": 1.0553, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0043792724609375, "rewards/margins": 0.041229248046875, "rewards/rejected": -0.045501708984375, "step": 112 }, { "agreement_weights/mean": 0.9802347421646118, "agreement_weights/std": 0.008374844677746296, "epoch": 0.12075874966604327, "eta/annotator_0": 0.9819933176040649, "grad_norm": 51.77431163742187, "learning_rate": 6.991188077202715e-07, "loss": 0.9718, "rewards/accuracies": 0.625, "rewards/chosen": -0.01259613037109375, "rewards/margins": 0.0092926025390625, "rewards/rejected": -0.021942138671875, "step": 113 }, { "agreement_weights/mean": 0.9681368470191956, "agreement_weights/std": 0.05475065857172012, "epoch": 0.1218274111675127, "eta/annotator_0": 0.9819933176040649, "grad_norm": 35.52010865761819, "learning_rate": 6.990236539459767e-07, "loss": 0.9785, "rewards/accuracies": 0.46875, "rewards/chosen": -0.01358795166015625, "rewards/margins": 0.00072479248046875, "rewards/rejected": -0.01427459716796875, "step": 114 }, { "agreement_weights/mean": 0.964770495891571, "agreement_weights/std": 0.06749631464481354, "epoch": 0.1228960726689821, "eta/annotator_0": 0.9821478724479675, "grad_norm": 128.12346700315322, "learning_rate": 6.989236297999551e-07, "loss": 1.0487, "rewards/accuracies": 0.5625, "rewards/chosen": 0.0011138916015625, "rewards/margins": -0.014892578125, "rewards/rejected": 0.015960693359375, "step": 115 }, { "agreement_weights/mean": 0.9780231714248657, "agreement_weights/std": 0.013590422458946705, "epoch": 0.12396473417045151, "eta/annotator_0": 0.9821478724479675, "grad_norm": 24.368780081191943, "learning_rate": 6.98818736677971e-07, "loss": 0.9753, "rewards/accuracies": 0.65625, "rewards/chosen": -0.0024566650390625, "rewards/margins": 0.0123291015625, "rewards/rejected": -0.0147705078125, "step": 116 }, { "agreement_weights/mean": 0.9802310466766357, "agreement_weights/std": 0.007697524502873421, "epoch": 0.12503339567192093, "eta/annotator_0": 0.9821962118148804, "grad_norm": 13.906670308161363, "learning_rate": 6.987089760437316e-07, "loss": 0.9434, "rewards/accuracies": 0.515625, "rewards/chosen": 0.00290679931640625, "rewards/margins": 0.02004241943359375, "rewards/rejected": -0.017120361328125, "step": 117 }, { "agreement_weights/mean": 0.9746713042259216, "agreement_weights/std": 0.022729000076651573, "epoch": 0.12610205717339032, "eta/annotator_0": 0.9822445511817932, "grad_norm": 27.204763753263165, "learning_rate": 6.985943494288672e-07, "loss": 1.005, "rewards/accuracies": 0.6875, "rewards/chosen": 0.003143310546875, "rewards/margins": -0.002044677734375, "rewards/rejected": 0.00518798828125, "step": 118 }, { "agreement_weights/mean": 0.9590339660644531, "agreement_weights/std": 0.06145976856350899, "epoch": 0.12717071867485974, "eta/annotator_0": 0.9822445511817932, "grad_norm": 56.159364202683896, "learning_rate": 6.984748584329087e-07, "loss": 1.0016, "rewards/accuracies": 0.53125, "rewards/chosen": 0.002655029296875, "rewards/margins": -0.0140533447265625, "rewards/rejected": 0.0167694091796875, "step": 119 }, { "agreement_weights/mean": 0.9802107810974121, "agreement_weights/std": 0.007310451939702034, "epoch": 0.12823938017632916, "eta/annotator_0": 0.9823174476623535, "grad_norm": 22.318880874017772, "learning_rate": 6.983505047232665e-07, "loss": 0.9356, "rewards/accuracies": 0.5625, "rewards/chosen": 0.0052032470703125, "rewards/margins": 0.02222442626953125, "rewards/rejected": -0.0171051025390625, "step": 120 }, { "agreement_weights/mean": 0.9778263568878174, "agreement_weights/std": 0.010028881020843983, "epoch": 0.12930804167779855, "eta/annotator_0": 0.9823174476623535, "grad_norm": 38.7471750181916, "learning_rate": 6.982212900352061e-07, "loss": 1.0064, "rewards/accuracies": 0.640625, "rewards/chosen": 0.0123443603515625, "rewards/margins": 0.01622772216796875, "rewards/rejected": -0.00390625, "step": 121 }, { "agreement_weights/mean": 0.9799700975418091, "agreement_weights/std": 0.008470378816127777, "epoch": 0.13037670317926797, "eta/annotator_0": 0.9823428392410278, "grad_norm": 17.651330591986184, "learning_rate": 6.980872161718247e-07, "loss": 0.9291, "rewards/accuracies": 0.671875, "rewards/chosen": 0.00665283203125, "rewards/margins": 0.0264739990234375, "rewards/rejected": -0.0197906494140625, "step": 122 }, { "agreement_weights/mean": 0.97711580991745, "agreement_weights/std": 0.013051356188952923, "epoch": 0.13144536468073736, "eta/annotator_0": 0.9823682308197021, "grad_norm": 17.042606806462793, "learning_rate": 6.979482850040258e-07, "loss": 0.9909, "rewards/accuracies": 0.53125, "rewards/chosen": 0.0099945068359375, "rewards/margins": 0.00246429443359375, "rewards/rejected": 0.00754547119140625, "step": 123 }, { "agreement_weights/mean": 0.9790211319923401, "agreement_weights/std": 0.007742372807115316, "epoch": 0.13251402618220678, "eta/annotator_0": 0.9823682308197021, "grad_norm": 34.92844270025021, "learning_rate": 6.97804498470493e-07, "loss": 0.9704, "rewards/accuracies": 0.640625, "rewards/chosen": 0.025115966796875, "rewards/margins": 0.00913238525390625, "rewards/rejected": 0.0159759521484375, "step": 124 }, { "agreement_weights/mean": 0.9787303805351257, "agreement_weights/std": 0.007697452791035175, "epoch": 0.1335826876836762, "eta/annotator_0": 0.9824449419975281, "grad_norm": 17.429784120023665, "learning_rate": 6.976558585776631e-07, "loss": 0.9769, "rewards/accuracies": 0.59375, "rewards/chosen": 0.0124969482421875, "rewards/margins": 0.0063629150390625, "rewards/rejected": 0.00612640380859375, "step": 125 }, { "agreement_weights/mean": 0.9777780175209045, "agreement_weights/std": 0.00899648480117321, "epoch": 0.1346513491851456, "eta/annotator_0": 0.9824449419975281, "grad_norm": 19.736119902775627, "learning_rate": 6.975023673996977e-07, "loss": 0.9899, "rewards/accuracies": 0.625, "rewards/chosen": -0.00215911865234375, "rewards/margins": 0.01120758056640625, "rewards/rejected": -0.0133819580078125, "step": 126 }, { "agreement_weights/mean": 0.9780345559120178, "agreement_weights/std": 0.010918834246695042, "epoch": 0.13572001068661502, "eta/annotator_0": 0.9823905229568481, "grad_norm": 37.426476794601086, "learning_rate": 6.973440270784549e-07, "loss": 0.9866, "rewards/accuracies": 0.625, "rewards/chosen": 0.008270263671875, "rewards/margins": 0.0185699462890625, "rewards/rejected": -0.0102386474609375, "step": 127 }, { "agreement_weights/mean": 0.9786252379417419, "agreement_weights/std": 0.007618549279868603, "epoch": 0.13678867218808444, "eta/annotator_0": 0.9823360443115234, "grad_norm": 15.941319306967959, "learning_rate": 6.971808398234589e-07, "loss": 0.9843, "rewards/accuracies": 0.5625, "rewards/chosen": -0.000885009765625, "rewards/margins": 0.00446319580078125, "rewards/rejected": -0.00536346435546875, "step": 128 }, { "agreement_weights/mean": 0.9792047142982483, "agreement_weights/std": 0.007429856341332197, "epoch": 0.13785733368955383, "eta/annotator_0": 0.9823360443115234, "grad_norm": 22.857342191410368, "learning_rate": 6.970128079118693e-07, "loss": 0.9645, "rewards/accuracies": 0.671875, "rewards/chosen": 0.005218505859375, "rewards/margins": 0.01113128662109375, "rewards/rejected": -0.00592803955078125, "step": 129 }, { "agreement_weights/mean": 0.9780741333961487, "agreement_weights/std": 0.012857185676693916, "epoch": 0.13892599519102325, "eta/annotator_0": 0.9823591709136963, "grad_norm": 35.62239413405999, "learning_rate": 6.968399336884496e-07, "loss": 0.9543, "rewards/accuracies": 0.609375, "rewards/chosen": 0.0311431884765625, "rewards/margins": 0.0159759521484375, "rewards/rejected": 0.015167236328125, "step": 130 }, { "agreement_weights/mean": 0.9769886136054993, "agreement_weights/std": 0.017107143998146057, "epoch": 0.13999465669249264, "eta/annotator_0": 0.9823591709136963, "grad_norm": 36.99465600521463, "learning_rate": 6.966622195655339e-07, "loss": 0.9858, "rewards/accuracies": 0.53125, "rewards/chosen": 0.0280303955078125, "rewards/margins": 0.02245330810546875, "rewards/rejected": 0.0055999755859375, "step": 131 }, { "agreement_weights/mean": 0.9785327315330505, "agreement_weights/std": 0.012087204493582249, "epoch": 0.14106331819396206, "eta/annotator_0": 0.9823631048202515, "grad_norm": 23.031373177410813, "learning_rate": 6.96479668022994e-07, "loss": 0.9565, "rewards/accuracies": 0.703125, "rewards/chosen": 0.0163726806640625, "rewards/margins": 0.0139007568359375, "rewards/rejected": 0.00250244140625, "step": 132 }, { "agreement_weights/mean": 0.9762165546417236, "agreement_weights/std": 0.021058756858110428, "epoch": 0.14213197969543148, "eta/annotator_0": 0.9823670983314514, "grad_norm": 24.313871639129903, "learning_rate": 6.962922816082041e-07, "loss": 0.9537, "rewards/accuracies": 0.609375, "rewards/chosen": 0.014404296875, "rewards/margins": 0.0162200927734375, "rewards/rejected": -0.0018157958984375, "step": 133 }, { "agreement_weights/mean": 0.9727802276611328, "agreement_weights/std": 0.030746085569262505, "epoch": 0.14320064119690087, "eta/annotator_0": 0.9823670983314514, "grad_norm": 28.794935816490355, "learning_rate": 6.961000629360054e-07, "loss": 0.9871, "rewards/accuracies": 0.609375, "rewards/chosen": 0.0059051513671875, "rewards/margins": 0.001300811767578125, "rewards/rejected": 0.004608154296875, "step": 134 }, { "agreement_weights/mean": 0.9697278738021851, "agreement_weights/std": 0.04630139097571373, "epoch": 0.1442693026983703, "eta/annotator_0": 0.9825600385665894, "grad_norm": 71.96977670927676, "learning_rate": 6.959030146886703e-07, "loss": 0.9647, "rewards/accuracies": 0.75, "rewards/chosen": 0.0046234130859375, "rewards/margins": 0.03023529052734375, "rewards/rejected": -0.02569580078125, "step": 135 }, { "agreement_weights/mean": 0.9734283685684204, "agreement_weights/std": 0.02477671392261982, "epoch": 0.14533796419983971, "eta/annotator_0": 0.9825600385665894, "grad_norm": 45.469591950211644, "learning_rate": 6.957011396158636e-07, "loss": 1.0031, "rewards/accuracies": 0.6875, "rewards/chosen": 0.00960540771484375, "rewards/margins": -0.00213623046875, "rewards/rejected": 0.01177215576171875, "step": 136 }, { "agreement_weights/mean": 0.9765658974647522, "agreement_weights/std": 0.015486492775380611, "epoch": 0.1464066257013091, "eta/annotator_0": 0.9826580882072449, "grad_norm": 44.82839592295095, "learning_rate": 6.954944405346058e-07, "loss": 0.9699, "rewards/accuracies": 0.578125, "rewards/chosen": 0.00144195556640625, "rewards/margins": 0.01071929931640625, "rewards/rejected": -0.00928497314453125, "step": 137 }, { "agreement_weights/mean": 0.9629358053207397, "agreement_weights/std": 0.0624961219727993, "epoch": 0.14747528720277853, "eta/annotator_0": 0.9827561378479004, "grad_norm": 40.256723748169776, "learning_rate": 6.952829203292324e-07, "loss": 1.0008, "rewards/accuracies": 0.484375, "rewards/chosen": -0.013946533203125, "rewards/margins": -0.010223388671875, "rewards/rejected": -0.003692626953125, "step": 138 }, { "agreement_weights/mean": 0.9790233373641968, "agreement_weights/std": 0.01004537008702755, "epoch": 0.14854394870424792, "eta/annotator_0": 0.9827561378479004, "grad_norm": 36.93659090877445, "learning_rate": 6.950665819513541e-07, "loss": 0.9226, "rewards/accuracies": 0.6875, "rewards/chosen": 0.01214599609375, "rewards/margins": 0.02686309814453125, "rewards/rejected": -0.0147247314453125, "step": 139 }, { "agreement_weights/mean": 0.9653189778327942, "agreement_weights/std": 0.055659160017967224, "epoch": 0.14961261020571734, "eta/annotator_0": 0.982871413230896, "grad_norm": 21.171555947273795, "learning_rate": 6.948454284198163e-07, "loss": 0.9844, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0104217529296875, "rewards/margins": -0.004486083984375, "rewards/rejected": -0.0059356689453125, "step": 140 }, { "agreement_weights/mean": 0.97681725025177, "agreement_weights/std": 0.010081904008984566, "epoch": 0.15068127170718676, "eta/annotator_0": 0.982871413230896, "grad_norm": 24.89367435360706, "learning_rate": 6.946194628206556e-07, "loss": 0.9926, "rewards/accuracies": 0.734375, "rewards/chosen": 0.00275421142578125, "rewards/margins": 0.00246429443359375, "rewards/rejected": 0.00028228759765625, "step": 141 }, { "agreement_weights/mean": 0.97612464427948, "agreement_weights/std": 0.011746834963560104, "epoch": 0.15174993320865615, "eta/annotator_0": 0.9829301238059998, "grad_norm": 29.92071804791039, "learning_rate": 6.943886883070581e-07, "loss": 0.9912, "rewards/accuracies": 0.609375, "rewards/chosen": -0.00557708740234375, "rewards/margins": 0.002838134765625, "rewards/rejected": -0.00838470458984375, "step": 142 }, { "agreement_weights/mean": 0.9769655466079712, "agreement_weights/std": 0.011853540316224098, "epoch": 0.15281859471012557, "eta/annotator_0": 0.9829888343811035, "grad_norm": 39.807341842342936, "learning_rate": 6.941531080993146e-07, "loss": 0.9586, "rewards/accuracies": 0.65625, "rewards/chosen": 0.0079345703125, "rewards/margins": 0.01560211181640625, "rewards/rejected": -0.00771331787109375, "step": 143 }, { "agreement_weights/mean": 0.9790558218955994, "agreement_weights/std": 0.011272291652858257, "epoch": 0.15388725621159496, "eta/annotator_0": 0.9829888343811035, "grad_norm": 74.10581177798827, "learning_rate": 6.93912725484776e-07, "loss": 1.0267, "rewards/accuracies": 0.6875, "rewards/chosen": 0.028961181640625, "rewards/margins": 0.07202911376953125, "rewards/rejected": -0.04317474365234375, "step": 144 }, { "agreement_weights/mean": 0.9731952548027039, "agreement_weights/std": 0.02568649873137474, "epoch": 0.15495591771306438, "eta/annotator_0": 0.9831590056419373, "grad_norm": 31.65107034456517, "learning_rate": 6.936675438178072e-07, "loss": 0.9826, "rewards/accuracies": 0.6875, "rewards/chosen": 0.00496673583984375, "rewards/margins": 0.00905609130859375, "rewards/rejected": -0.004150390625, "step": 145 }, { "agreement_weights/mean": 0.9545753598213196, "agreement_weights/std": 0.0968383178114891, "epoch": 0.1560245792145338, "eta/annotator_0": 0.9831590056419373, "grad_norm": 82.09044223773607, "learning_rate": 6.934175665197399e-07, "loss": 0.9773, "rewards/accuracies": 0.59375, "rewards/chosen": 0.008880615234375, "rewards/margins": -0.009185791015625, "rewards/rejected": 0.01813507080078125, "step": 146 }, { "agreement_weights/mean": 0.9636430740356445, "agreement_weights/std": 0.06272011995315552, "epoch": 0.1570932407160032, "eta/annotator_0": 0.9818698167800903, "grad_norm": 22.172428342091795, "learning_rate": 6.931627970788264e-07, "loss": 0.9623, "rewards/accuracies": 0.5, "rewards/chosen": -0.001007080078125, "rewards/margins": -0.00054168701171875, "rewards/rejected": -0.00042724609375, "step": 147 }, { "agreement_weights/mean": 0.9734736680984497, "agreement_weights/std": 0.022518489509820938, "epoch": 0.15816190221747262, "eta/annotator_0": 0.9805806875228882, "grad_norm": 26.028597697230026, "learning_rate": 6.929032390501891e-07, "loss": 0.9653, "rewards/accuracies": 0.65625, "rewards/chosen": 0.01947784423828125, "rewards/margins": 0.013095855712890625, "rewards/rejected": 0.00634002685546875, "step": 148 }, { "agreement_weights/mean": 0.9652773141860962, "agreement_weights/std": 0.0385996550321579, "epoch": 0.15923056371894204, "eta/annotator_0": 0.9805806875228882, "grad_norm": 62.38572237975921, "learning_rate": 6.926388960557719e-07, "loss": 1.0095, "rewards/accuracies": 0.578125, "rewards/chosen": 0.000244140625, "rewards/margins": -0.0046844482421875, "rewards/rejected": 0.004913330078125, "step": 149 }, { "agreement_weights/mean": 0.9726245403289795, "agreement_weights/std": 0.01970096118748188, "epoch": 0.16029922522041143, "eta/annotator_0": 0.9786587357521057, "grad_norm": 40.970418028903204, "learning_rate": 6.923697717842893e-07, "loss": 0.9791, "rewards/accuracies": 0.78125, "rewards/chosen": 0.0046844482421875, "rewards/margins": 0.024627685546875, "rewards/rejected": -0.01995849609375, "step": 150 }, { "agreement_weights/mean": 0.9761370420455933, "agreement_weights/std": 0.010180960409343243, "epoch": 0.16136788672188085, "eta/annotator_0": 0.9786587357521057, "grad_norm": 30.72712663731121, "learning_rate": 6.920958699911753e-07, "loss": 0.9586, "rewards/accuracies": 0.65625, "rewards/chosen": -0.0035858154296875, "rewards/margins": 0.0169525146484375, "rewards/rejected": -0.0205841064453125, "step": 151 }, { "agreement_weights/mean": 0.9734891653060913, "agreement_weights/std": 0.018371913582086563, "epoch": 0.16243654822335024, "eta/annotator_0": 0.9787237048149109, "grad_norm": 23.760882900780405, "learning_rate": 6.918171944985303e-07, "loss": 0.9711, "rewards/accuracies": 0.640625, "rewards/chosen": -0.00072479248046875, "rewards/margins": 0.0160980224609375, "rewards/rejected": -0.01680755615234375, "step": 152 }, { "agreement_weights/mean": 0.9758032560348511, "agreement_weights/std": 0.011482280679047108, "epoch": 0.16350520972481966, "eta/annotator_0": 0.9787886142730713, "grad_norm": 13.719739007006524, "learning_rate": 6.915337491950688e-07, "loss": 0.9511, "rewards/accuracies": 0.671875, "rewards/chosen": -0.0023040771484375, "rewards/margins": 0.016632080078125, "rewards/rejected": -0.01889801025390625, "step": 153 }, { "agreement_weights/mean": 0.9762541651725769, "agreement_weights/std": 0.010729135945439339, "epoch": 0.16457387122628908, "eta/annotator_0": 0.9787886142730713, "grad_norm": 19.57038559269043, "learning_rate": 6.912455380360642e-07, "loss": 0.9481, "rewards/accuracies": 0.6875, "rewards/chosen": -0.00295257568359375, "rewards/margins": 0.014892578125, "rewards/rejected": -0.0178985595703125, "step": 154 }, { "agreement_weights/mean": 0.9761942625045776, "agreement_weights/std": 0.014093001373112202, "epoch": 0.16564253272775847, "eta/annotator_0": 0.9790332913398743, "grad_norm": 35.89202567652181, "learning_rate": 6.909525650432939e-07, "loss": 0.9246, "rewards/accuracies": 0.609375, "rewards/chosen": 0.00262451171875, "rewards/margins": 0.02497100830078125, "rewards/rejected": -0.02234649658203125, "step": 155 }, { "agreement_weights/mean": 0.9753504395484924, "agreement_weights/std": 0.01687946543097496, "epoch": 0.1667111942292279, "eta/annotator_0": 0.9790332913398743, "grad_norm": 37.73273835910553, "learning_rate": 6.906548343049834e-07, "loss": 0.937, "rewards/accuracies": 0.703125, "rewards/chosen": -0.00447845458984375, "rewards/margins": 0.0313720703125, "rewards/rejected": -0.03575897216796875, "step": 156 }, { "agreement_weights/mean": 0.9731007814407349, "agreement_weights/std": 0.02273626998066902, "epoch": 0.1677798557306973, "eta/annotator_0": 0.9791064262390137, "grad_norm": 40.93549199408113, "learning_rate": 6.903523499757491e-07, "loss": 0.9599, "rewards/accuracies": 0.671875, "rewards/chosen": 0.01085662841796875, "rewards/margins": 0.014190673828125, "rewards/rejected": -0.0034027099609375, "step": 157 }, { "agreement_weights/mean": 0.9770574569702148, "agreement_weights/std": 0.009926600381731987, "epoch": 0.1688485172321667, "eta/annotator_0": 0.9791795015335083, "grad_norm": 44.86998428720927, "learning_rate": 6.9004511627654e-07, "loss": 0.9332, "rewards/accuracies": 0.734375, "rewards/chosen": 0.0152130126953125, "rewards/margins": 0.02109527587890625, "rewards/rejected": -0.005889892578125, "step": 158 }, { "agreement_weights/mean": 0.9770900011062622, "agreement_weights/std": 0.010753852315247059, "epoch": 0.16991717873363613, "eta/annotator_0": 0.9791795015335083, "grad_norm": 65.2149710509992, "learning_rate": 6.897331374945795e-07, "loss": 0.9271, "rewards/accuracies": 0.609375, "rewards/chosen": 0.0074462890625, "rewards/margins": 0.0345306396484375, "rewards/rejected": -0.027187347412109375, "step": 159 }, { "agreement_weights/mean": 0.9621210694313049, "agreement_weights/std": 0.06443238258361816, "epoch": 0.17098584023510552, "eta/annotator_0": 0.9794287085533142, "grad_norm": 17.370466155683182, "learning_rate": 6.894164179833052e-07, "loss": 0.9504, "rewards/accuracies": 0.703125, "rewards/chosen": -0.01372528076171875, "rewards/margins": 0.00562286376953125, "rewards/rejected": -0.01934814453125, "step": 160 }, { "agreement_weights/mean": 0.9760758280754089, "agreement_weights/std": 0.010678289458155632, "epoch": 0.17205450173657494, "eta/annotator_0": 0.9794287085533142, "grad_norm": 16.13289441437796, "learning_rate": 6.890949621623078e-07, "loss": 0.9582, "rewards/accuracies": 0.703125, "rewards/chosen": -0.00238037109375, "rewards/margins": 0.0156707763671875, "rewards/rejected": -0.0180511474609375, "step": 161 }, { "agreement_weights/mean": 0.9582445025444031, "agreement_weights/std": 0.07836854457855225, "epoch": 0.17312316323804436, "eta/annotator_0": 0.9794232249259949, "grad_norm": 29.586364356959937, "learning_rate": 6.887687745172697e-07, "loss": 0.9595, "rewards/accuracies": 0.625, "rewards/chosen": -0.0038299560546875, "rewards/margins": -0.0016326904296875, "rewards/rejected": -0.002227783203125, "step": 162 }, { "agreement_weights/mean": 0.964476466178894, "agreement_weights/std": 0.0491577610373497, "epoch": 0.17419182473951375, "eta/annotator_0": 0.9794177412986755, "grad_norm": 26.02416118263654, "learning_rate": 6.88437859599903e-07, "loss": 0.9692, "rewards/accuracies": 0.5625, "rewards/chosen": 0.00112152099609375, "rewards/margins": 0.00453948974609375, "rewards/rejected": -0.0033721923828125, "step": 163 }, { "agreement_weights/mean": 0.9429883360862732, "agreement_weights/std": 0.10752906650304794, "epoch": 0.17526048624098317, "eta/annotator_0": 0.9794177412986755, "grad_norm": 41.67526660645508, "learning_rate": 6.881022220278852e-07, "loss": 1.0063, "rewards/accuracies": 0.71875, "rewards/chosen": 0.0009765625, "rewards/margins": -0.0193023681640625, "rewards/rejected": 0.02032470703125, "step": 164 }, { "agreement_weights/mean": 0.9712125062942505, "agreement_weights/std": 0.021803000941872597, "epoch": 0.1763291477424526, "eta/annotator_0": 0.9786993265151978, "grad_norm": 38.64426637153513, "learning_rate": 6.877618664847947e-07, "loss": 0.9605, "rewards/accuracies": 0.59375, "rewards/chosen": 0.0084228515625, "rewards/margins": 0.0135955810546875, "rewards/rejected": -0.00524139404296875, "step": 165 }, { "agreement_weights/mean": 0.9607521891593933, "agreement_weights/std": 0.0506863035261631, "epoch": 0.17739780924392198, "eta/annotator_0": 0.9786993265151978, "grad_norm": 59.33508117056527, "learning_rate": 6.874167977200462e-07, "loss": 1.0033, "rewards/accuracies": 0.6875, "rewards/chosen": -0.00215911865234375, "rewards/margins": 0.01464080810546875, "rewards/rejected": -0.016815185546875, "step": 166 }, { "agreement_weights/mean": 0.9649646878242493, "agreement_weights/std": 0.035849928855895996, "epoch": 0.1784664707453914, "eta/annotator_0": 0.9786838889122009, "grad_norm": 27.910422588503227, "learning_rate": 6.87067020548824e-07, "loss": 0.9922, "rewards/accuracies": 0.640625, "rewards/chosen": 0.006195068359375, "rewards/margins": 0.0058746337890625, "rewards/rejected": 0.0002899169921875, "step": 167 }, { "agreement_weights/mean": 0.9748205542564392, "agreement_weights/std": 0.011766120791435242, "epoch": 0.1795351322468608, "eta/annotator_0": 0.9786683917045593, "grad_norm": 47.905094226500495, "learning_rate": 6.867125398520141e-07, "loss": 0.9303, "rewards/accuracies": 0.6875, "rewards/chosen": 0.00492095947265625, "rewards/margins": 0.04607391357421875, "rewards/rejected": -0.04105377197265625, "step": 168 }, { "agreement_weights/mean": 0.9573514461517334, "agreement_weights/std": 0.06895244121551514, "epoch": 0.18060379374833022, "eta/annotator_0": 0.9786683917045593, "grad_norm": 52.76006481868012, "learning_rate": 6.863533605761378e-07, "loss": 0.9472, "rewards/accuracies": 0.578125, "rewards/chosen": 0.026763916015625, "rewards/margins": 0.0191802978515625, "rewards/rejected": 0.0075225830078125, "step": 169 }, { "agreement_weights/mean": 0.9660154581069946, "agreement_weights/std": 0.0356830395758152, "epoch": 0.18167245524979964, "eta/annotator_0": 0.9791780710220337, "grad_norm": 45.822600198065444, "learning_rate": 6.85989487733281e-07, "loss": 0.9844, "rewards/accuracies": 0.484375, "rewards/chosen": 0.0194244384765625, "rewards/margins": 0.02667236328125, "rewards/rejected": -0.007171630859375, "step": 170 }, { "agreement_weights/mean": 0.9558113813400269, "agreement_weights/std": 0.07871359586715698, "epoch": 0.18274111675126903, "eta/annotator_0": 0.9791780710220337, "grad_norm": 39.13114177787466, "learning_rate": 6.856209264010249e-07, "loss": 0.9385, "rewards/accuracies": 0.6875, "rewards/chosen": -0.000762939453125, "rewards/margins": 0.00826263427734375, "rewards/rejected": -0.00909423828125, "step": 171 }, { "agreement_weights/mean": 0.9722248315811157, "agreement_weights/std": 0.012600093148648739, "epoch": 0.18380977825273845, "eta/annotator_0": 0.9793515801429749, "grad_norm": 16.926064305944013, "learning_rate": 6.852476817223756e-07, "loss": 0.954, "rewards/accuracies": 0.671875, "rewards/chosen": 0.00658416748046875, "rewards/margins": 0.0172119140625, "rewards/rejected": -0.01065826416015625, "step": 172 }, { "agreement_weights/mean": 0.9684218168258667, "agreement_weights/std": 0.025514913722872734, "epoch": 0.18487843975420787, "eta/annotator_0": 0.9795250296592712, "grad_norm": 27.462228428613866, "learning_rate": 6.848697589056914e-07, "loss": 0.9534, "rewards/accuracies": 0.65625, "rewards/chosen": -0.01214599609375, "rewards/margins": 0.0252685546875, "rewards/rejected": -0.037353515625, "step": 173 }, { "agreement_weights/mean": 0.9714298844337463, "agreement_weights/std": 0.018779896199703217, "epoch": 0.18594710125567726, "eta/annotator_0": 0.9795250296592712, "grad_norm": 19.395347973484316, "learning_rate": 6.844871632246113e-07, "loss": 0.9222, "rewards/accuracies": 0.734375, "rewards/chosen": 0.01670074462890625, "rewards/margins": 0.035430908203125, "rewards/rejected": -0.018768310546875, "step": 174 }, { "agreement_weights/mean": 0.9618352651596069, "agreement_weights/std": 0.05756700411438942, "epoch": 0.18701576275714668, "eta/annotator_0": 0.9796724319458008, "grad_norm": 55.35581828101787, "learning_rate": 6.840999000179798e-07, "loss": 0.9243, "rewards/accuracies": 0.71875, "rewards/chosen": -0.0146484375, "rewards/margins": 0.050537109375, "rewards/rejected": -0.0651397705078125, "step": 175 }, { "agreement_weights/mean": 0.9688884615898132, "agreement_weights/std": 0.025065910071134567, "epoch": 0.18808442425861607, "eta/annotator_0": 0.9796724319458008, "grad_norm": 37.501005445877894, "learning_rate": 6.837079746897739e-07, "loss": 0.9317, "rewards/accuracies": 0.703125, "rewards/chosen": 0.014923095703125, "rewards/margins": 0.0343170166015625, "rewards/rejected": -0.0193023681640625, "step": 176 }, { "agreement_weights/mean": 0.9686243534088135, "agreement_weights/std": 0.021395301446318626, "epoch": 0.1891530857600855, "eta/annotator_0": 0.9791667461395264, "grad_norm": 54.11135674166695, "learning_rate": 6.833113927090275e-07, "loss": 1.0052, "rewards/accuracies": 0.65625, "rewards/chosen": 0.02655792236328125, "rewards/margins": 0.0285186767578125, "rewards/rejected": -0.0020294189453125, "step": 177 }, { "agreement_weights/mean": 0.9708232879638672, "agreement_weights/std": 0.015956416726112366, "epoch": 0.1902217472615549, "eta/annotator_0": 0.9786609411239624, "grad_norm": 16.03355166050802, "learning_rate": 6.829101596097536e-07, "loss": 0.9516, "rewards/accuracies": 0.75, "rewards/chosen": -0.0029144287109375, "rewards/margins": 0.01438140869140625, "rewards/rejected": -0.01734161376953125, "step": 178 }, { "agreement_weights/mean": 0.9702721834182739, "agreement_weights/std": 0.015421897172927856, "epoch": 0.1912904087630243, "eta/annotator_0": 0.9786609411239624, "grad_norm": 54.78991317778617, "learning_rate": 6.825042809908693e-07, "loss": 0.9645, "rewards/accuracies": 0.625, "rewards/chosen": 0.01068878173828125, "rewards/margins": 0.01071929931640625, "rewards/rejected": 7.62939453125e-06, "step": 179 }, { "agreement_weights/mean": 0.9573944807052612, "agreement_weights/std": 0.07014699280261993, "epoch": 0.19235907026449373, "eta/annotator_0": 0.9788932204246521, "grad_norm": 58.3967471436799, "learning_rate": 6.820937625161159e-07, "loss": 0.9529, "rewards/accuracies": 0.6875, "rewards/chosen": 0.0181427001953125, "rewards/margins": -0.000244140625, "rewards/rejected": 0.0184173583984375, "step": 180 }, { "agreement_weights/mean": 0.9716646671295166, "agreement_weights/std": 0.01502122264355421, "epoch": 0.19342773176596312, "eta/annotator_0": 0.9788932204246521, "grad_norm": 23.694014976222896, "learning_rate": 6.816786099139808e-07, "loss": 0.9021, "rewards/accuracies": 0.6875, "rewards/chosen": 0.02686309814453125, "rewards/margins": 0.03981781005859375, "rewards/rejected": -0.0130615234375, "step": 181 }, { "agreement_weights/mean": 0.9392691850662231, "agreement_weights/std": 0.12607519328594208, "epoch": 0.19449639326743254, "eta/annotator_0": 0.9789991974830627, "grad_norm": 45.840151154466845, "learning_rate": 6.812588289776172e-07, "loss": 0.9892, "rewards/accuracies": 0.640625, "rewards/chosen": -0.0372772216796875, "rewards/margins": -0.01800537109375, "rewards/rejected": -0.01934814453125, "step": 182 }, { "agreement_weights/mean": 0.965095043182373, "agreement_weights/std": 0.033535607159137726, "epoch": 0.19556505476890196, "eta/annotator_0": 0.9791051745414734, "grad_norm": 17.08347077703346, "learning_rate": 6.808344255647632e-07, "loss": 0.9577, "rewards/accuracies": 0.65625, "rewards/chosen": 0.038188934326171875, "rewards/margins": 0.019870758056640625, "rewards/rejected": 0.0183258056640625, "step": 183 }, { "agreement_weights/mean": 0.948822021484375, "agreement_weights/std": 0.10225702822208405, "epoch": 0.19663371627037135, "eta/annotator_0": 0.9791051745414734, "grad_norm": 126.66885520596384, "learning_rate": 6.804054055976605e-07, "loss": 0.9619, "rewards/accuracies": 0.703125, "rewards/chosen": -0.013763427734375, "rewards/margins": 0.001556396484375, "rewards/rejected": -0.015289306640625, "step": 184 }, { "agreement_weights/mean": 0.9681538343429565, "agreement_weights/std": 0.022921841591596603, "epoch": 0.19770237777184077, "eta/annotator_0": 0.9795130491256714, "grad_norm": 23.44378340828048, "learning_rate": 6.799717750629712e-07, "loss": 0.9297, "rewards/accuracies": 0.671875, "rewards/chosen": -0.003692626953125, "rewards/margins": 0.02503204345703125, "rewards/rejected": -0.02874755859375, "step": 185 }, { "agreement_weights/mean": 0.9703177213668823, "agreement_weights/std": 0.014836728572845459, "epoch": 0.1987710392733102, "eta/annotator_0": 0.9795130491256714, "grad_norm": 27.582597169048206, "learning_rate": 6.795335400116948e-07, "loss": 0.9533, "rewards/accuracies": 0.578125, "rewards/chosen": 0.0077972412109375, "rewards/margins": 0.0394134521484375, "rewards/rejected": -0.0315704345703125, "step": 186 }, { "agreement_weights/mean": 0.9711627960205078, "agreement_weights/std": 0.01807752624154091, "epoch": 0.19983970077477958, "eta/annotator_0": 0.979664146900177, "grad_norm": 27.741452682362592, "learning_rate": 6.79090706559083e-07, "loss": 0.914, "rewards/accuracies": 0.8125, "rewards/chosen": 0.006683349609375, "rewards/margins": 0.049560546875, "rewards/rejected": -0.042816162109375, "step": 187 }, { "agreement_weights/mean": 0.9701703786849976, "agreement_weights/std": 0.01703907921910286, "epoch": 0.200908362276249, "eta/annotator_0": 0.9798152446746826, "grad_norm": 64.14294757183241, "learning_rate": 6.786432808845555e-07, "loss": 0.9325, "rewards/accuracies": 0.625, "rewards/chosen": -0.0048828125, "rewards/margins": 0.041351318359375, "rewards/rejected": -0.04627227783203125, "step": 188 }, { "agreement_weights/mean": 0.9673048853874207, "agreement_weights/std": 0.015572085976600647, "epoch": 0.2019770237777184, "eta/annotator_0": 0.9798152446746826, "grad_norm": 18.28170380957026, "learning_rate": 6.781912692316125e-07, "loss": 0.9925, "rewards/accuracies": 0.625, "rewards/chosen": 0.00106048583984375, "rewards/margins": 0.002593994140625, "rewards/rejected": -0.00152587890625, "step": 189 }, { "agreement_weights/mean": 0.9685297012329102, "agreement_weights/std": 0.016577495262026787, "epoch": 0.20304568527918782, "eta/annotator_0": 0.9799811244010925, "grad_norm": 27.334477328443217, "learning_rate": 6.777346779077488e-07, "loss": 0.9487, "rewards/accuracies": 0.625, "rewards/chosen": -0.00800323486328125, "rewards/margins": 0.02980804443359375, "rewards/rejected": -0.0377655029296875, "step": 190 }, { "agreement_weights/mean": 0.9719998836517334, "agreement_weights/std": 0.014324720948934555, "epoch": 0.20411434678065724, "eta/annotator_0": 0.9799811244010925, "grad_norm": 55.68166168939438, "learning_rate": 6.772735132843646e-07, "loss": 0.9266, "rewards/accuracies": 0.734375, "rewards/chosen": 0.0087738037109375, "rewards/margins": 0.048919677734375, "rewards/rejected": -0.04029083251953125, "step": 191 }, { "agreement_weights/mean": 0.9701237082481384, "agreement_weights/std": 0.019794927909970284, "epoch": 0.20518300828212663, "eta/annotator_0": 0.9797989130020142, "grad_norm": 16.969984022802933, "learning_rate": 6.768077817966776e-07, "loss": 0.9141, "rewards/accuracies": 0.65625, "rewards/chosen": 0.00823974609375, "rewards/margins": 0.036773681640625, "rewards/rejected": -0.0284576416015625, "step": 192 }, { "agreement_weights/mean": 0.9713301062583923, "agreement_weights/std": 0.013323673978447914, "epoch": 0.20625166978359605, "eta/annotator_0": 0.9796167612075806, "grad_norm": 17.762064207840783, "learning_rate": 6.763374899436326e-07, "loss": 0.9341, "rewards/accuracies": 0.78125, "rewards/chosen": 0.0164337158203125, "rewards/margins": 0.0193634033203125, "rewards/rejected": -0.00290679931640625, "step": 193 }, { "agreement_weights/mean": 0.9533059597015381, "agreement_weights/std": 0.059649378061294556, "epoch": 0.20732033128506547, "eta/annotator_0": 0.9796167612075806, "grad_norm": 29.1729477192414, "learning_rate": 6.758626442878111e-07, "loss": 0.9486, "rewards/accuracies": 0.625, "rewards/chosen": -0.032623291015625, "rewards/margins": 0.010101318359375, "rewards/rejected": -0.04266357421875, "step": 194 }, { "agreement_weights/mean": 0.9673253893852234, "agreement_weights/std": 0.020648114383220673, "epoch": 0.20838899278653486, "eta/annotator_0": 0.9774292707443237, "grad_norm": 26.467564028290493, "learning_rate": 6.753832514553393e-07, "loss": 0.9685, "rewards/accuracies": 0.703125, "rewards/chosen": -0.0099639892578125, "rewards/margins": 0.009368896484375, "rewards/rejected": -0.0192718505859375, "step": 195 }, { "agreement_weights/mean": 0.9547165632247925, "agreement_weights/std": 0.06921234726905823, "epoch": 0.20945765428800428, "eta/annotator_0": 0.9774292707443237, "grad_norm": 33.334914924164465, "learning_rate": 6.748993181357964e-07, "loss": 0.9526, "rewards/accuracies": 0.625, "rewards/chosen": 0.010955810546875, "rewards/margins": 0.0061798095703125, "rewards/rejected": 0.00467681884765625, "step": 196 }, { "agreement_weights/mean": 0.9702204465866089, "agreement_weights/std": 0.014599582180380821, "epoch": 0.21052631578947367, "eta/annotator_0": 0.9774952530860901, "grad_norm": 26.099567802075395, "learning_rate": 6.744108510821204e-07, "loss": 0.9134, "rewards/accuracies": 0.625, "rewards/chosen": 0.0296783447265625, "rewards/margins": 0.03265380859375, "rewards/rejected": -0.0029296875, "step": 197 }, { "agreement_weights/mean": 0.9709007740020752, "agreement_weights/std": 0.01271197572350502, "epoch": 0.2115949772909431, "eta/annotator_0": 0.9775612950325012, "grad_norm": 34.954806142961985, "learning_rate": 6.739178571105146e-07, "loss": 0.9316, "rewards/accuracies": 0.78125, "rewards/chosen": 0.010467529296875, "rewards/margins": 0.0357666015625, "rewards/rejected": -0.0254058837890625, "step": 198 }, { "agreement_weights/mean": 0.9649127721786499, "agreement_weights/std": 0.02013520896434784, "epoch": 0.2126636387924125, "eta/annotator_0": 0.9775612950325012, "grad_norm": 54.73740017687755, "learning_rate": 6.734203431003519e-07, "loss": 0.994, "rewards/accuracies": 0.640625, "rewards/chosen": 0.0023956298828125, "rewards/margins": 0.0019989013671875, "rewards/rejected": 0.000396728515625, "step": 199 }, { "agreement_weights/mean": 0.9548725485801697, "agreement_weights/std": 0.06826165318489075, "epoch": 0.2137323002938819, "eta/annotator_0": 0.9779037237167358, "grad_norm": 45.064514304356905, "learning_rate": 6.729183159940791e-07, "loss": 0.9629, "rewards/accuracies": 0.6875, "rewards/chosen": 0.010650634765625, "rewards/margins": 0.0352783203125, "rewards/rejected": -0.0246734619140625, "step": 200 }, { "epoch": 0.2137323002938819, "eta/annotator_0": 0.9780861735343933, "eval_agreement_weights/mean": 0.9604799151420593, "eval_agreement_weights/std": 0.038989465683698654, "eval_loss": 0.9538734555244446, "eval_rewards/accuracies": 0.6827574968338013, "eval_rewards/chosen": -0.0020283025223761797, "eval_rewards/margins": 0.02039405331015587, "eval_rewards/rejected": -0.022423721849918365, "eval_runtime": 135.4344, "eval_samples_per_second": 14.479, "eval_steps_per_second": 0.908, "step": 200 }, { "agreement_weights/mean": 0.9633479714393616, "agreement_weights/std": 0.018161069601774216, "epoch": 0.21480096179535133, "eta/annotator_0": 0.9788855314254761, "grad_norm": 19.668418961146088, "learning_rate": 6.724117827971202e-07, "loss": 0.9626, "rewards/accuracies": 0.5625, "rewards/chosen": -0.02630615234375, "rewards/margins": 0.015625, "rewards/rejected": -0.04193115234375, "step": 201 }, { "agreement_weights/mean": 0.9484650492668152, "agreement_weights/std": 0.06336817145347595, "epoch": 0.21586962329682075, "eta/annotator_0": 0.9790469408035278, "grad_norm": 54.94334094101246, "learning_rate": 6.719007505777782e-07, "loss": 0.992, "rewards/accuracies": 0.625, "rewards/chosen": 0.0215301513671875, "rewards/margins": 0.0245513916015625, "rewards/rejected": -0.00295257568359375, "step": 202 }, { "agreement_weights/mean": 0.9549992680549622, "agreement_weights/std": 0.05876603350043297, "epoch": 0.21693828479829014, "eta/annotator_0": 0.9790469408035278, "grad_norm": 19.206549574781395, "learning_rate": 6.713852264671366e-07, "loss": 0.9181, "rewards/accuracies": 0.734375, "rewards/chosen": 0.013824462890625, "rewards/margins": 0.0184478759765625, "rewards/rejected": -0.0046539306640625, "step": 203 }, { "agreement_weights/mean": 0.9394853711128235, "agreement_weights/std": 0.10346050560474396, "epoch": 0.21800694629975956, "eta/annotator_0": 0.9792140126228333, "grad_norm": 101.444755193148, "learning_rate": 6.708652176589604e-07, "loss": 0.997, "rewards/accuracies": 0.625, "rewards/chosen": 0.01763916015625, "rewards/margins": -0.01617431640625, "rewards/rejected": 0.03369140625, "step": 204 }, { "agreement_weights/mean": 0.9627442359924316, "agreement_weights/std": 0.01943862810730934, "epoch": 0.21907560780122895, "eta/annotator_0": 0.9792697429656982, "grad_norm": 30.02556386434498, "learning_rate": 6.703407314095949e-07, "loss": 0.9542, "rewards/accuracies": 0.609375, "rewards/chosen": 0.0812530517578125, "rewards/margins": 0.02175140380859375, "rewards/rejected": 0.0596466064453125, "step": 205 }, { "agreement_weights/mean": 0.962218165397644, "agreement_weights/std": 0.02345285378396511, "epoch": 0.22014426930269837, "eta/annotator_0": 0.9789354205131531, "grad_norm": 26.716900488141793, "learning_rate": 6.698117750378649e-07, "loss": 0.9512, "rewards/accuracies": 0.75, "rewards/chosen": 0.024658203125, "rewards/margins": 0.02759552001953125, "rewards/rejected": -0.00295257568359375, "step": 206 }, { "agreement_weights/mean": 0.956030011177063, "agreement_weights/std": 0.0458393357694149, "epoch": 0.2212129308041678, "eta/annotator_0": 0.9779325127601624, "grad_norm": 28.172506071014002, "learning_rate": 6.692783559249726e-07, "loss": 0.9314, "rewards/accuracies": 0.609375, "rewards/chosen": 0.01009368896484375, "rewards/margins": 0.0212554931640625, "rewards/rejected": -0.011138916015625, "step": 207 }, { "agreement_weights/mean": 0.9351367950439453, "agreement_weights/std": 0.1300864815711975, "epoch": 0.22228159230563718, "eta/annotator_0": 0.9779325127601624, "grad_norm": 128.79570977213797, "learning_rate": 6.687404815143947e-07, "loss": 1.0622, "rewards/accuracies": 0.765625, "rewards/chosen": 0.09881591796875, "rewards/margins": 0.0193023681640625, "rewards/rejected": 0.079254150390625, "step": 208 }, { "agreement_weights/mean": 0.9595288634300232, "agreement_weights/std": 0.025003734976053238, "epoch": 0.2233502538071066, "eta/annotator_0": 0.9777312874794006, "grad_norm": 20.376904697471353, "learning_rate": 6.681981593117779e-07, "loss": 0.9585, "rewards/accuracies": 0.65625, "rewards/chosen": 0.004180908203125, "rewards/margins": 0.01366424560546875, "rewards/rejected": -0.00949859619140625, "step": 209 }, { "agreement_weights/mean": 0.9529890418052673, "agreement_weights/std": 0.054165925830602646, "epoch": 0.224418915308576, "eta/annotator_0": 0.9776642322540283, "grad_norm": 32.427188934078615, "learning_rate": 6.676513968848349e-07, "loss": 0.9481, "rewards/accuracies": 0.703125, "rewards/chosen": 0.003662109375, "rewards/margins": 0.0245819091796875, "rewards/rejected": -0.020843505859375, "step": 210 }, { "agreement_weights/mean": 0.9576161503791809, "agreement_weights/std": 0.032264046370983124, "epoch": 0.22548757681004541, "eta/annotator_0": 0.9777202010154724, "grad_norm": 21.82307717670239, "learning_rate": 6.671002018632386e-07, "loss": 0.9526, "rewards/accuracies": 0.6875, "rewards/chosen": 0.0026702880859375, "rewards/margins": 0.01663970947265625, "rewards/rejected": -0.013946533203125, "step": 211 }, { "agreement_weights/mean": 0.937619686126709, "agreement_weights/std": 0.12228831648826599, "epoch": 0.22655623831151483, "eta/annotator_0": 0.9778881669044495, "grad_norm": 37.09740984372681, "learning_rate": 6.665445819385154e-07, "loss": 0.8735, "rewards/accuracies": 0.734375, "rewards/chosen": 0.01325225830078125, "rewards/margins": 0.025360107421875, "rewards/rejected": -0.01202392578125, "step": 212 }, { "agreement_weights/mean": 0.9405927062034607, "agreement_weights/std": 0.08676572889089584, "epoch": 0.22762489981298423, "eta/annotator_0": 0.9778881669044495, "grad_norm": 60.27083976152999, "learning_rate": 6.659845448639378e-07, "loss": 0.9849, "rewards/accuracies": 0.578125, "rewards/chosen": -0.00782012939453125, "rewards/margins": 0.0179443359375, "rewards/rejected": -0.02581787109375, "step": 213 }, { "agreement_weights/mean": 0.9632972478866577, "agreement_weights/std": 0.020581407472491264, "epoch": 0.22869356131445365, "eta/annotator_0": 0.9746512174606323, "grad_norm": 28.448364597445522, "learning_rate": 6.654200984544167e-07, "loss": 0.8913, "rewards/accuracies": 0.671875, "rewards/chosen": 0.00650787353515625, "rewards/margins": 0.048553466796875, "rewards/rejected": -0.0419921875, "step": 214 }, { "agreement_weights/mean": 0.9608006477355957, "agreement_weights/std": 0.019109506160020828, "epoch": 0.22976222281592307, "eta/annotator_0": 0.9735721349716187, "grad_norm": 13.459789287094887, "learning_rate": 6.648512505863923e-07, "loss": 0.9468, "rewards/accuracies": 0.671875, "rewards/chosen": -0.00360107421875, "rewards/margins": 0.0159912109375, "rewards/rejected": -0.01959991455078125, "step": 215 }, { "agreement_weights/mean": 0.9593403339385986, "agreement_weights/std": 0.030728138983249664, "epoch": 0.23083088431739246, "eta/annotator_0": 0.9733719825744629, "grad_norm": 24.253963343648007, "learning_rate": 6.642780091977232e-07, "loss": 0.9196, "rewards/accuracies": 0.828125, "rewards/chosen": -0.015869140625, "rewards/margins": 0.039459228515625, "rewards/rejected": -0.055450439453125, "step": 216 }, { "agreement_weights/mean": 0.9602344632148743, "agreement_weights/std": 0.02631964161992073, "epoch": 0.23189954581886188, "eta/annotator_0": 0.9727715253829956, "grad_norm": 16.164570671898268, "learning_rate": 6.637003822875772e-07, "loss": 0.9186, "rewards/accuracies": 0.703125, "rewards/chosen": -0.008819580078125, "rewards/margins": 0.0295257568359375, "rewards/rejected": -0.038421630859375, "step": 217 }, { "agreement_weights/mean": 0.937279462814331, "agreement_weights/std": 0.09657324850559235, "epoch": 0.23296820732033127, "eta/annotator_0": 0.9727715253829956, "grad_norm": 52.01320873068105, "learning_rate": 6.631183779163183e-07, "loss": 0.9512, "rewards/accuracies": 0.59375, "rewards/chosen": -0.038604736328125, "rewards/margins": 0.00390625, "rewards/rejected": -0.04248046875, "step": 218 }, { "agreement_weights/mean": 0.9630419015884399, "agreement_weights/std": 0.020163461565971375, "epoch": 0.2340368688218007, "eta/annotator_0": 0.973158597946167, "grad_norm": 13.935370903327684, "learning_rate": 6.625320042053952e-07, "loss": 0.8998, "rewards/accuracies": 0.796875, "rewards/chosen": -0.01659393310546875, "rewards/margins": 0.031005859375, "rewards/rejected": -0.047607421875, "step": 219 }, { "agreement_weights/mean": 0.9596883058547974, "agreement_weights/std": 0.024910878390073776, "epoch": 0.2351055303232701, "eta/annotator_0": 0.9732876420021057, "grad_norm": 24.366116310483303, "learning_rate": 6.619412693372272e-07, "loss": 0.9369, "rewards/accuracies": 0.703125, "rewards/chosen": -0.0238037109375, "rewards/margins": 0.029083251953125, "rewards/rejected": -0.0528564453125, "step": 220 }, { "agreement_weights/mean": 0.9513471126556396, "agreement_weights/std": 0.061630986630916595, "epoch": 0.2361741918247395, "eta/annotator_0": 0.9733278751373291, "grad_norm": 85.54122775422711, "learning_rate": 6.613461815550906e-07, "loss": 1.0684, "rewards/accuracies": 0.734375, "rewards/chosen": -0.0086669921875, "rewards/margins": 0.05670166015625, "rewards/rejected": -0.065216064453125, "step": 221 }, { "agreement_weights/mean": 0.963257908821106, "agreement_weights/std": 0.023760905489325523, "epoch": 0.23724285332620892, "eta/annotator_0": 0.9734485149383545, "grad_norm": 28.69535680910714, "learning_rate": 6.607467491630036e-07, "loss": 0.8823, "rewards/accuracies": 0.8125, "rewards/chosen": -0.027557373046875, "rewards/margins": 0.06817626953125, "rewards/rejected": -0.0958251953125, "step": 222 }, { "agreement_weights/mean": 0.9456185698509216, "agreement_weights/std": 0.08353953808546066, "epoch": 0.23831151482767834, "eta/annotator_0": 0.9734485149383545, "grad_norm": 40.35498979590651, "learning_rate": 6.601429805256098e-07, "loss": 0.9205, "rewards/accuracies": 0.71875, "rewards/chosen": -0.02045440673828125, "rewards/margins": 0.0100250244140625, "rewards/rejected": -0.030517578125, "step": 223 }, { "agreement_weights/mean": 0.9374609589576721, "agreement_weights/std": 0.092196024954319, "epoch": 0.23938017632914774, "eta/annotator_0": 0.9736905097961426, "grad_norm": 32.8242966505304, "learning_rate": 6.595348840680624e-07, "loss": 0.8618, "rewards/accuracies": 0.796875, "rewards/chosen": -0.007293701171875, "rewards/margins": 0.025360107421875, "rewards/rejected": -0.03271484375, "step": 224 }, { "agreement_weights/mean": 0.9462652802467346, "agreement_weights/std": 0.07409913092851639, "epoch": 0.24044883783061716, "eta/annotator_0": 0.9737712144851685, "grad_norm": 36.206548152087244, "learning_rate": 6.589224682759061e-07, "loss": 0.9319, "rewards/accuracies": 0.671875, "rewards/chosen": -0.001190185546875, "rewards/margins": 0.008636474609375, "rewards/rejected": -0.009857177734375, "step": 225 }, { "agreement_weights/mean": 0.942173957824707, "agreement_weights/std": 0.08207502961158752, "epoch": 0.24151749933208655, "eta/annotator_0": 0.9725661873817444, "grad_norm": 29.10375101177535, "learning_rate": 6.583057416949585e-07, "loss": 0.9555, "rewards/accuracies": 0.734375, "rewards/chosen": -0.0161895751953125, "rewards/margins": 0.0099334716796875, "rewards/rejected": -0.026031494140625, "step": 226 }, { "agreement_weights/mean": 0.9547814130783081, "agreement_weights/std": 0.04267825186252594, "epoch": 0.24258616083355597, "eta/annotator_0": 0.9689510464668274, "grad_norm": 32.7758313402149, "learning_rate": 6.576847129311916e-07, "loss": 0.9161, "rewards/accuracies": 0.75, "rewards/chosen": -0.01263427734375, "rewards/margins": 0.03338623046875, "rewards/rejected": -0.046051025390625, "step": 227 }, { "agreement_weights/mean": 0.958931028842926, "agreement_weights/std": 0.02724943310022354, "epoch": 0.2436548223350254, "eta/annotator_0": 0.9689510464668274, "grad_norm": 16.38767077868374, "learning_rate": 6.570593906506108e-07, "loss": 0.9164, "rewards/accuracies": 0.703125, "rewards/chosen": -0.0228118896484375, "rewards/margins": 0.02764892578125, "rewards/rejected": -0.0504150390625, "step": 228 }, { "agreement_weights/mean": 0.9412965178489685, "agreement_weights/std": 0.09611333906650543, "epoch": 0.24472348383649478, "eta/annotator_0": 0.9690300226211548, "grad_norm": 63.675863441295455, "learning_rate": 6.564297835791345e-07, "loss": 0.9051, "rewards/accuracies": 0.71875, "rewards/chosen": -0.0063934326171875, "rewards/margins": 0.0341949462890625, "rewards/rejected": -0.040557861328125, "step": 229 }, { "agreement_weights/mean": 0.9207655191421509, "agreement_weights/std": 0.14528930187225342, "epoch": 0.2457921453379642, "eta/annotator_0": 0.9690563678741455, "grad_norm": 66.00718888740998, "learning_rate": 6.557959005024727e-07, "loss": 0.9857, "rewards/accuracies": 0.59375, "rewards/chosen": 0.023406982421875, "rewards/margins": -0.0219573974609375, "rewards/rejected": 0.045379638671875, "step": 230 }, { "agreement_weights/mean": 0.9572017192840576, "agreement_weights/std": 0.03532657399773598, "epoch": 0.24686080683943362, "eta/annotator_0": 0.967217206954956, "grad_norm": 18.098153882606283, "learning_rate": 6.551577502660033e-07, "loss": 0.9026, "rewards/accuracies": 0.78125, "rewards/chosen": 0.016143798828125, "rewards/margins": 0.034149169921875, "rewards/rejected": -0.01793670654296875, "step": 231 }, { "agreement_weights/mean": 0.930273175239563, "agreement_weights/std": 0.08700281381607056, "epoch": 0.24792946834090301, "eta/annotator_0": 0.9616996645927429, "grad_norm": 33.74028657101383, "learning_rate": 6.545153417746496e-07, "loss": 0.9515, "rewards/accuracies": 0.59375, "rewards/chosen": -0.00673675537109375, "rewards/margins": -0.00048828125, "rewards/rejected": -0.00634765625, "step": 232 }, { "agreement_weights/mean": 0.9428408145904541, "agreement_weights/std": 0.08183562755584717, "epoch": 0.24899812984237243, "eta/annotator_0": 0.9616996645927429, "grad_norm": 23.050201847071676, "learning_rate": 6.53868683992756e-07, "loss": 0.9043, "rewards/accuracies": 0.6875, "rewards/chosen": 0.00118255615234375, "rewards/margins": 0.02203369140625, "rewards/rejected": -0.0208740234375, "step": 233 }, { "agreement_weights/mean": 0.9215424060821533, "agreement_weights/std": 0.10151238739490509, "epoch": 0.25006679134384185, "eta/annotator_0": 0.9563248157501221, "grad_norm": 111.89146070266577, "learning_rate": 6.53217785943962e-07, "loss": 1.1864, "rewards/accuracies": 0.6875, "rewards/chosen": 0.00344085693359375, "rewards/margins": 0.05572509765625, "rewards/rejected": -0.05230712890625, "step": 234 }, { "agreement_weights/mean": 0.9572738409042358, "agreement_weights/std": 0.021351102739572525, "epoch": 0.25113545284531125, "eta/annotator_0": 0.9545332193374634, "grad_norm": 97.68185571636155, "learning_rate": 6.525626567110778e-07, "loss": 1.051, "rewards/accuracies": 0.71875, "rewards/chosen": 0.02227783203125, "rewards/margins": 0.0507354736328125, "rewards/rejected": -0.02855682373046875, "step": 235 }, { "agreement_weights/mean": 0.9360005259513855, "agreement_weights/std": 0.08993753045797348, "epoch": 0.25220411434678064, "eta/annotator_0": 0.9547154903411865, "grad_norm": 72.58403168692787, "learning_rate": 6.51903305435956e-07, "loss": 0.9809, "rewards/accuracies": 0.703125, "rewards/chosen": -0.001953125, "rewards/margins": 0.03235626220703125, "rewards/rejected": -0.0343780517578125, "step": 236 }, { "agreement_weights/mean": 0.9310452938079834, "agreement_weights/std": 0.07244430482387543, "epoch": 0.2532727758482501, "eta/annotator_0": 0.9552621841430664, "grad_norm": 42.707772923628326, "learning_rate": 6.512397413193652e-07, "loss": 0.9716, "rewards/accuracies": 0.640625, "rewards/chosen": -0.02392578125, "rewards/margins": 0.0098876953125, "rewards/rejected": -0.0338287353515625, "step": 237 }, { "agreement_weights/mean": 0.9437007904052734, "agreement_weights/std": 0.07316192984580994, "epoch": 0.2543414373497195, "eta/annotator_0": 0.9552621841430664, "grad_norm": 30.691659365405656, "learning_rate": 6.505719736208607e-07, "loss": 0.9071, "rewards/accuracies": 0.671875, "rewards/chosen": 0.0006103515625, "rewards/margins": 0.03448486328125, "rewards/rejected": -0.03375244140625, "step": 238 }, { "agreement_weights/mean": 0.92875075340271, "agreement_weights/std": 0.10269168764352798, "epoch": 0.25541009885118887, "eta/annotator_0": 0.9542205333709717, "grad_norm": 42.318113050480164, "learning_rate": 6.499000116586562e-07, "loss": 0.9167, "rewards/accuracies": 0.703125, "rewards/chosen": -0.00347137451171875, "rewards/margins": 0.03656005859375, "rewards/rejected": -0.03985595703125, "step": 239 }, { "agreement_weights/mean": 0.9409088492393494, "agreement_weights/std": 0.06351719051599503, "epoch": 0.2564787603526583, "eta/annotator_0": 0.9538732767105103, "grad_norm": 30.45427432299174, "learning_rate": 6.49223864809493e-07, "loss": 0.9415, "rewards/accuracies": 0.75, "rewards/chosen": -0.0170135498046875, "rewards/margins": 0.0098419189453125, "rewards/rejected": -0.026824951171875, "step": 240 }, { "agreement_weights/mean": 0.925057053565979, "agreement_weights/std": 0.09977032244205475, "epoch": 0.2575474218541277, "eta/annotator_0": 0.9537525773048401, "grad_norm": 35.669762538028465, "learning_rate": 6.485435425085097e-07, "loss": 0.9854, "rewards/accuracies": 0.671875, "rewards/chosen": -0.0118408203125, "rewards/margins": -0.0083160400390625, "rewards/rejected": -0.003509521484375, "step": 241 }, { "agreement_weights/mean": 0.948823094367981, "agreement_weights/std": 0.04125112295150757, "epoch": 0.2586160833555971, "eta/annotator_0": 0.95339035987854, "grad_norm": 33.30367948149001, "learning_rate": 6.478590542491098e-07, "loss": 0.9161, "rewards/accuracies": 0.75, "rewards/chosen": -0.02785491943359375, "rewards/margins": 0.02469635009765625, "rewards/rejected": -0.0525665283203125, "step": 242 }, { "agreement_weights/mean": 0.9511932134628296, "agreement_weights/std": 0.024816079065203667, "epoch": 0.25968474485706655, "eta/annotator_0": 0.95339035987854, "grad_norm": 35.17823483953403, "learning_rate": 6.471704095828305e-07, "loss": 0.9383, "rewards/accuracies": 0.671875, "rewards/chosen": -0.030487060546875, "rewards/margins": 0.019866943359375, "rewards/rejected": -0.05029296875, "step": 243 }, { "agreement_weights/mean": 0.9423929452896118, "agreement_weights/std": 0.0762510672211647, "epoch": 0.26075340635853594, "eta/annotator_0": 0.9542790651321411, "grad_norm": 62.77677262734024, "learning_rate": 6.464776181192079e-07, "loss": 0.8656, "rewards/accuracies": 0.75, "rewards/chosen": -0.0465087890625, "rewards/margins": 0.02911376953125, "rewards/rejected": -0.0755615234375, "step": 244 }, { "agreement_weights/mean": 0.955879271030426, "agreement_weights/std": 0.026513176038861275, "epoch": 0.26182206786000534, "eta/annotator_0": 0.9545753002166748, "grad_norm": 23.368257790964478, "learning_rate": 6.457806895256442e-07, "loss": 0.8705, "rewards/accuracies": 0.765625, "rewards/chosen": -0.029083251953125, "rewards/margins": 0.04364013671875, "rewards/rejected": -0.07281494140625, "step": 245 }, { "agreement_weights/mean": 0.9382665157318115, "agreement_weights/std": 0.08080232888460159, "epoch": 0.26289072936147473, "eta/annotator_0": 0.9534544944763184, "grad_norm": 63.48218203931672, "learning_rate": 6.450796335272718e-07, "loss": 0.9153, "rewards/accuracies": 0.65625, "rewards/chosen": -0.047119140625, "rewards/margins": 0.035491943359375, "rewards/rejected": -0.08258056640625, "step": 246 }, { "agreement_weights/mean": 0.9365394711494446, "agreement_weights/std": 0.09901509433984756, "epoch": 0.2639593908629442, "eta/annotator_0": 0.9500921368598938, "grad_norm": 35.14808393120187, "learning_rate": 6.443744599068182e-07, "loss": 0.8899, "rewards/accuracies": 0.765625, "rewards/chosen": -0.06756591796875, "rewards/margins": 0.038726806640625, "rewards/rejected": -0.1065673828125, "step": 247 }, { "agreement_weights/mean": 0.9415568113327026, "agreement_weights/std": 0.05331488698720932, "epoch": 0.26502805236441357, "eta/annotator_0": 0.9500921368598938, "grad_norm": 22.459445396786844, "learning_rate": 6.436651785044697e-07, "loss": 0.9568, "rewards/accuracies": 0.6875, "rewards/chosen": -0.06402587890625, "rewards/margins": 0.007110595703125, "rewards/rejected": -0.07122802734375, "step": 248 }, { "agreement_weights/mean": 0.9540218114852905, "agreement_weights/std": 0.034339290112257004, "epoch": 0.26609671386588296, "eta/annotator_0": 0.9507395029067993, "grad_norm": 36.110397553735744, "learning_rate": 6.429517992177332e-07, "loss": 0.8484, "rewards/accuracies": 0.734375, "rewards/chosen": -0.06079864501953125, "rewards/margins": 0.06801605224609375, "rewards/rejected": -0.12872314453125, "step": 249 }, { "agreement_weights/mean": 0.9298950433731079, "agreement_weights/std": 0.10303311049938202, "epoch": 0.2671653753673524, "eta/annotator_0": 0.9509552717208862, "grad_norm": 24.960231234121974, "learning_rate": 6.422343320012987e-07, "loss": 0.932, "rewards/accuracies": 0.59375, "rewards/chosen": -0.013275146484375, "rewards/margins": 0.012115478515625, "rewards/rejected": -0.025360107421875, "step": 250 }, { "agreement_weights/mean": 0.9314273595809937, "agreement_weights/std": 0.0786937028169632, "epoch": 0.2682340368688218, "eta/annotator_0": 0.9504642486572266, "grad_norm": 38.57946337927432, "learning_rate": 6.415127868669004e-07, "loss": 0.9034, "rewards/accuracies": 0.71875, "rewards/chosen": -0.040191650390625, "rewards/margins": 0.016204833984375, "rewards/rejected": -0.05645751953125, "step": 251 }, { "agreement_weights/mean": 0.9516934156417847, "agreement_weights/std": 0.023799974471330643, "epoch": 0.2693026983702912, "eta/annotator_0": 0.9489911794662476, "grad_norm": 14.661693480398124, "learning_rate": 6.40787173883177e-07, "loss": 0.9125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0222015380859375, "rewards/margins": 0.0335693359375, "rewards/rejected": -0.0557861328125, "step": 252 }, { "agreement_weights/mean": 0.9553577899932861, "agreement_weights/std": 0.026620063930749893, "epoch": 0.27037135987176064, "eta/annotator_0": 0.9489911794662476, "grad_norm": 24.204624030698497, "learning_rate": 6.400575031755309e-07, "loss": 0.8498, "rewards/accuracies": 0.765625, "rewards/chosen": -0.05218505859375, "rewards/margins": 0.0606689453125, "rewards/rejected": -0.1129150390625, "step": 253 }, { "agreement_weights/mean": 0.9202696084976196, "agreement_weights/std": 0.1152820736169815, "epoch": 0.27144002137323003, "eta/annotator_0": 0.9473097324371338, "grad_norm": 53.454331206744826, "learning_rate": 6.393237849259871e-07, "loss": 0.9382, "rewards/accuracies": 0.625, "rewards/chosen": -0.0765380859375, "rewards/margins": 0.005828857421875, "rewards/rejected": -0.082305908203125, "step": 254 }, { "agreement_weights/mean": 0.9555579423904419, "agreement_weights/std": 0.0240610521286726, "epoch": 0.2725086828746994, "eta/annotator_0": 0.946749210357666, "grad_norm": 79.81056532729703, "learning_rate": 6.38586029373051e-07, "loss": 0.915, "rewards/accuracies": 0.75, "rewards/chosen": -0.04046630859375, "rewards/margins": 0.07098388671875, "rewards/rejected": -0.111328125, "step": 255 }, { "agreement_weights/mean": 0.9473749399185181, "agreement_weights/std": 0.031468652188777924, "epoch": 0.2735773443761689, "eta/annotator_0": 0.946538507938385, "grad_norm": 38.021641361401734, "learning_rate": 6.378442468115661e-07, "loss": 0.9503, "rewards/accuracies": 0.609375, "rewards/chosen": -0.037109375, "rewards/margins": 0.02581787109375, "rewards/rejected": -0.06304931640625, "step": 256 }, { "agreement_weights/mean": 0.9535497426986694, "agreement_weights/std": 0.029965009540319443, "epoch": 0.27464600587763827, "eta/annotator_0": 0.9459063410758972, "grad_norm": 23.36191384004101, "learning_rate": 6.370984475925695e-07, "loss": 0.8611, "rewards/accuracies": 0.84375, "rewards/chosen": -0.03643798828125, "rewards/margins": 0.05560302734375, "rewards/rejected": -0.09222412109375, "step": 257 }, { "agreement_weights/mean": 0.9265732765197754, "agreement_weights/std": 0.12531569600105286, "epoch": 0.27571466737910766, "eta/annotator_0": 0.9459063410758972, "grad_norm": 43.124924247442515, "learning_rate": 6.363486421231477e-07, "loss": 0.9149, "rewards/accuracies": 0.734375, "rewards/chosen": -0.0836181640625, "rewards/margins": 0.05364990234375, "rewards/rejected": -0.137451171875, "step": 258 }, { "agreement_weights/mean": 0.9429365992546082, "agreement_weights/std": 0.06112617626786232, "epoch": 0.27678332888057705, "eta/annotator_0": 0.9466494917869568, "grad_norm": 38.888025941053286, "learning_rate": 6.355948408662919e-07, "loss": 0.8886, "rewards/accuracies": 0.75, "rewards/chosen": -0.055389404296875, "rewards/margins": 0.02719879150390625, "rewards/rejected": -0.082763671875, "step": 259 }, { "agreement_weights/mean": 0.9501916170120239, "agreement_weights/std": 0.03908916190266609, "epoch": 0.2778519903820465, "eta/annotator_0": 0.9468972086906433, "grad_norm": 32.97190379197208, "learning_rate": 6.348370543407514e-07, "loss": 0.8748, "rewards/accuracies": 0.71875, "rewards/chosen": -0.023101806640625, "rewards/margins": 0.0609130859375, "rewards/rejected": -0.083984375, "step": 260 }, { "agreement_weights/mean": 0.9502074122428894, "agreement_weights/std": 0.042088042944669724, "epoch": 0.2789206518835159, "eta/annotator_0": 0.946899950504303, "grad_norm": 19.069906302284707, "learning_rate": 6.340752931208869e-07, "loss": 0.8639, "rewards/accuracies": 0.84375, "rewards/chosen": -0.04205322265625, "rewards/margins": 0.044677734375, "rewards/rejected": -0.086669921875, "step": 261 }, { "agreement_weights/mean": 0.9540023803710938, "agreement_weights/std": 0.02843172289431095, "epoch": 0.2799893133849853, "eta/annotator_0": 0.9469082355499268, "grad_norm": 33.40218016319639, "learning_rate": 6.333095678365236e-07, "loss": 0.8744, "rewards/accuracies": 0.734375, "rewards/chosen": -0.044708251953125, "rewards/margins": 0.0509033203125, "rewards/rejected": -0.09576416015625, "step": 262 }, { "agreement_weights/mean": 0.9568219184875488, "agreement_weights/std": 0.030983619391918182, "epoch": 0.28105797488645473, "eta/annotator_0": 0.9469082355499268, "grad_norm": 22.125682010178366, "learning_rate": 6.325398891728019e-07, "loss": 0.8152, "rewards/accuracies": 0.71875, "rewards/chosen": -0.02972412109375, "rewards/margins": 0.0760498046875, "rewards/rejected": -0.10546875, "step": 263 }, { "agreement_weights/mean": 0.9395923018455505, "agreement_weights/std": 0.07278600335121155, "epoch": 0.2821266363879241, "eta/annotator_0": 0.9472819566726685, "grad_norm": 35.36620654408963, "learning_rate": 6.317662678700285e-07, "loss": 0.9009, "rewards/accuracies": 0.671875, "rewards/chosen": -0.048980712890625, "rewards/margins": 0.057464599609375, "rewards/rejected": -0.10650634765625, "step": 264 }, { "agreement_weights/mean": 0.9465954303741455, "agreement_weights/std": 0.05348827689886093, "epoch": 0.2831952978893935, "eta/annotator_0": 0.947406530380249, "grad_norm": 34.929952809873875, "learning_rate": 6.309887147235273e-07, "loss": 0.8648, "rewards/accuracies": 0.703125, "rewards/chosen": -0.03900146484375, "rewards/margins": 0.05853271484375, "rewards/rejected": -0.0975341796875, "step": 265 }, { "agreement_weights/mean": 0.939507007598877, "agreement_weights/std": 0.0716746523976326, "epoch": 0.28426395939086296, "eta/annotator_0": 0.9474899768829346, "grad_norm": 35.92699360938539, "learning_rate": 6.302072405834881e-07, "loss": 0.9748, "rewards/accuracies": 0.6875, "rewards/chosen": -0.03204345703125, "rewards/margins": 0.03760528564453125, "rewards/rejected": -0.0697021484375, "step": 266 }, { "agreement_weights/mean": 0.9569126963615417, "agreement_weights/std": 0.03219468891620636, "epoch": 0.28533262089233236, "eta/annotator_0": 0.9477401971817017, "grad_norm": 24.218632748772922, "learning_rate": 6.294218563548152e-07, "loss": 0.8103, "rewards/accuracies": 0.8125, "rewards/chosen": -0.03125, "rewards/margins": 0.06878662109375, "rewards/rejected": -0.099853515625, "step": 267 }, { "agreement_weights/mean": 0.954809308052063, "agreement_weights/std": 0.022835996001958847, "epoch": 0.28640128239380175, "eta/annotator_0": 0.9477401971817017, "grad_norm": 23.80048460707773, "learning_rate": 6.286325729969752e-07, "loss": 0.8936, "rewards/accuracies": 0.734375, "rewards/chosen": -0.055938720703125, "rewards/margins": 0.033599853515625, "rewards/rejected": -0.08953857421875, "step": 268 }, { "agreement_weights/mean": 0.9559794664382935, "agreement_weights/std": 0.026643093675374985, "epoch": 0.2874699438952712, "eta/annotator_0": 0.948033332824707, "grad_norm": 29.579026402677883, "learning_rate": 6.278394015238448e-07, "loss": 0.8578, "rewards/accuracies": 0.71875, "rewards/chosen": -0.029022216796875, "rewards/margins": 0.05615997314453125, "rewards/rejected": -0.0853271484375, "step": 269 }, { "agreement_weights/mean": 0.9498591423034668, "agreement_weights/std": 0.04395833611488342, "epoch": 0.2885386053967406, "eta/annotator_0": 0.9481310844421387, "grad_norm": 22.5868653105931, "learning_rate": 6.270423530035557e-07, "loss": 0.8696, "rewards/accuracies": 0.765625, "rewards/chosen": -0.067291259765625, "rewards/margins": 0.04510498046875, "rewards/rejected": -0.1123046875, "step": 270 }, { "agreement_weights/mean": 0.9566851854324341, "agreement_weights/std": 0.026513714343309402, "epoch": 0.28960726689821, "eta/annotator_0": 0.9482514262199402, "grad_norm": 22.579559036741642, "learning_rate": 6.262414385583416e-07, "loss": 0.8473, "rewards/accuracies": 0.71875, "rewards/chosen": -0.03314208984375, "rewards/margins": 0.06524658203125, "rewards/rejected": -0.0985107421875, "step": 271 }, { "agreement_weights/mean": 0.9284167885780334, "agreement_weights/std": 0.12124088406562805, "epoch": 0.29067592839967943, "eta/annotator_0": 0.9486124515533447, "grad_norm": 25.55811133224918, "learning_rate": 6.254366693643821e-07, "loss": 0.8679, "rewards/accuracies": 0.703125, "rewards/chosen": -0.0424652099609375, "rewards/margins": 0.02675628662109375, "rewards/rejected": -0.069122314453125, "step": 272 }, { "agreement_weights/mean": 0.9515237808227539, "agreement_weights/std": 0.03860624507069588, "epoch": 0.2917445899011488, "eta/annotator_0": 0.9486124515533447, "grad_norm": 138.51755947666427, "learning_rate": 6.246280566516473e-07, "loss": 1.1578, "rewards/accuracies": 0.6875, "rewards/chosen": 0.01605224609375, "rewards/margins": 0.09881591796875, "rewards/rejected": -0.0831298828125, "step": 273 }, { "agreement_weights/mean": 0.9352878928184509, "agreement_weights/std": 0.09411316365003586, "epoch": 0.2928132514026182, "eta/annotator_0": 0.9457777142524719, "grad_norm": 27.730026773438265, "learning_rate": 6.238156117037403e-07, "loss": 0.9031, "rewards/accuracies": 0.734375, "rewards/chosen": -0.018157958984375, "rewards/margins": 0.0252685546875, "rewards/rejected": -0.04339599609375, "step": 274 }, { "agreement_weights/mean": 0.940967321395874, "agreement_weights/std": 0.060552239418029785, "epoch": 0.2938819129040876, "eta/annotator_0": 0.9448327422142029, "grad_norm": 29.02494747213747, "learning_rate": 6.22999345857741e-07, "loss": 0.9251, "rewards/accuracies": 0.671875, "rewards/chosen": -0.064697265625, "rewards/margins": 0.0152587890625, "rewards/rejected": -0.0799560546875, "step": 275 }, { "agreement_weights/mean": 0.9411128759384155, "agreement_weights/std": 0.0838838666677475, "epoch": 0.29495057440555705, "eta/annotator_0": 0.9451543092727661, "grad_norm": 29.464806570342805, "learning_rate": 6.221792705040464e-07, "loss": 0.8442, "rewards/accuracies": 0.703125, "rewards/chosen": -0.0365447998046875, "rewards/margins": 0.04803466796875, "rewards/rejected": -0.08453369140625, "step": 276 }, { "agreement_weights/mean": 0.9315167665481567, "agreement_weights/std": 0.09002824127674103, "epoch": 0.29601923590702645, "eta/annotator_0": 0.9461191296577454, "grad_norm": 23.538320594016056, "learning_rate": 6.213553970862127e-07, "loss": 0.9193, "rewards/accuracies": 0.6875, "rewards/chosen": -0.045806884765625, "rewards/margins": 0.03155517578125, "rewards/rejected": -0.077362060546875, "step": 277 }, { "agreement_weights/mean": 0.9436126947402954, "agreement_weights/std": 0.07482896745204926, "epoch": 0.29708789740849584, "eta/annotator_0": 0.9461191296577454, "grad_norm": 24.12763453111441, "learning_rate": 6.205277371007956e-07, "loss": 0.8226, "rewards/accuracies": 0.8125, "rewards/chosen": -0.055267333984375, "rewards/margins": 0.04811859130859375, "rewards/rejected": -0.1033935546875, "step": 278 }, { "agreement_weights/mean": 0.9197744131088257, "agreement_weights/std": 0.10764560848474503, "epoch": 0.2981565589099653, "eta/annotator_0": 0.9452846050262451, "grad_norm": 52.01671044194528, "learning_rate": 6.196963020971893e-07, "loss": 0.8819, "rewards/accuracies": 0.734375, "rewards/chosen": -0.10076904296875, "rewards/margins": 0.0062255859375, "rewards/rejected": -0.10723876953125, "step": 279 }, { "agreement_weights/mean": 0.9483304619789124, "agreement_weights/std": 0.03345945477485657, "epoch": 0.2992252204114347, "eta/annotator_0": 0.9450064897537231, "grad_norm": 33.03808933780434, "learning_rate": 6.188611036774657e-07, "loss": 0.8973, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0748291015625, "rewards/margins": 0.042816162109375, "rewards/rejected": -0.117919921875, "step": 280 }, { "agreement_weights/mean": 0.9381481409072876, "agreement_weights/std": 0.06627361476421356, "epoch": 0.30029388191290407, "eta/annotator_0": 0.9442985653877258, "grad_norm": 36.42965903667574, "learning_rate": 6.180221534962126e-07, "loss": 0.8669, "rewards/accuracies": 0.703125, "rewards/chosen": -0.099853515625, "rewards/margins": 0.03826904296875, "rewards/rejected": -0.13818359375, "step": 281 }, { "agreement_weights/mean": 0.941310703754425, "agreement_weights/std": 0.05689692124724388, "epoch": 0.3013625434143735, "eta/annotator_0": 0.9421747326850891, "grad_norm": 43.448929239336344, "learning_rate": 6.171794632603704e-07, "loss": 0.9183, "rewards/accuracies": 0.734375, "rewards/chosen": -0.070404052734375, "rewards/margins": 0.03692626953125, "rewards/rejected": -0.1072998046875, "step": 282 }, { "agreement_weights/mean": 0.9294204711914062, "agreement_weights/std": 0.08750786632299423, "epoch": 0.3024312049158429, "eta/annotator_0": 0.9421747326850891, "grad_norm": 130.84999714945428, "learning_rate": 6.163330447290701e-07, "loss": 1.1517, "rewards/accuracies": 0.640625, "rewards/chosen": -0.07763671875, "rewards/margins": -0.000152587890625, "rewards/rejected": -0.077484130859375, "step": 283 }, { "agreement_weights/mean": 0.9287240505218506, "agreement_weights/std": 0.07653923332691193, "epoch": 0.3034998664173123, "eta/annotator_0": 0.9423196315765381, "grad_norm": 32.509817438214796, "learning_rate": 6.154829097134675e-07, "loss": 0.9376, "rewards/accuracies": 0.625, "rewards/chosen": -0.0855712890625, "rewards/margins": 0.018310546875, "rewards/rejected": -0.103759765625, "step": 284 }, { "agreement_weights/mean": 0.944936990737915, "agreement_weights/std": 0.04706009849905968, "epoch": 0.30456852791878175, "eta/annotator_0": 0.9423679113388062, "grad_norm": 43.26075437361267, "learning_rate": 6.146290700765798e-07, "loss": 0.8892, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0848388671875, "rewards/margins": 0.035858154296875, "rewards/rejected": -0.120849609375, "step": 285 }, { "agreement_weights/mean": 0.9544681310653687, "agreement_weights/std": 0.024717330932617188, "epoch": 0.30563718942025114, "eta/annotator_0": 0.9422749876976013, "grad_norm": 23.99604935549273, "learning_rate": 6.137715377331192e-07, "loss": 0.8438, "rewards/accuracies": 0.671875, "rewards/chosen": -0.0780029296875, "rewards/margins": 0.05291748046875, "rewards/rejected": -0.1307373046875, "step": 286 }, { "agreement_weights/mean": 0.9551631212234497, "agreement_weights/std": 0.022074665874242783, "epoch": 0.30670585092172054, "eta/annotator_0": 0.9419963359832764, "grad_norm": 20.489108704471196, "learning_rate": 6.129103246493272e-07, "loss": 0.8641, "rewards/accuracies": 0.71875, "rewards/chosen": -0.06695556640625, "rewards/margins": 0.0418701171875, "rewards/rejected": -0.10888671875, "step": 287 }, { "agreement_weights/mean": 0.9159344434738159, "agreement_weights/std": 0.13899928331375122, "epoch": 0.3077745124231899, "eta/annotator_0": 0.9419963359832764, "grad_norm": 40.806895943595336, "learning_rate": 6.120454428428069e-07, "loss": 0.8623, "rewards/accuracies": 0.734375, "rewards/chosen": -0.1031494140625, "rewards/margins": 0.01129150390625, "rewards/rejected": -0.1142578125, "step": 288 }, { "agreement_weights/mean": 0.9570690393447876, "agreement_weights/std": 0.02442445605993271, "epoch": 0.3088431739246594, "eta/annotator_0": 0.9427193403244019, "grad_norm": 16.448626242838657, "learning_rate": 6.111769043823564e-07, "loss": 0.8085, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08984375, "rewards/margins": 0.06817626953125, "rewards/rejected": -0.1580810546875, "step": 289 }, { "agreement_weights/mean": 0.9547990560531616, "agreement_weights/std": 0.031521886587142944, "epoch": 0.30991183542612877, "eta/annotator_0": 0.9429603815078735, "grad_norm": 25.623712110187025, "learning_rate": 6.103047213877994e-07, "loss": 0.8188, "rewards/accuracies": 0.796875, "rewards/chosen": -0.107666015625, "rewards/margins": 0.06427001953125, "rewards/rejected": -0.171630859375, "step": 290 }, { "agreement_weights/mean": 0.9515216946601868, "agreement_weights/std": 0.032314419746398926, "epoch": 0.31098049692759816, "eta/annotator_0": 0.9432920813560486, "grad_norm": 59.7604161520557, "learning_rate": 6.094289060298163e-07, "loss": 0.8937, "rewards/accuracies": 0.671875, "rewards/chosen": -0.085693359375, "rewards/margins": 0.094635009765625, "rewards/rejected": -0.1802978515625, "step": 291 }, { "agreement_weights/mean": 0.9473896026611328, "agreement_weights/std": 0.05250655114650726, "epoch": 0.3120491584290676, "eta/annotator_0": 0.9442870020866394, "grad_norm": 26.027134059995184, "learning_rate": 6.085494705297747e-07, "loss": 0.8441, "rewards/accuracies": 0.734375, "rewards/chosen": -0.1070556640625, "rewards/margins": 0.05694580078125, "rewards/rejected": -0.164306640625, "step": 292 }, { "agreement_weights/mean": 0.9508237838745117, "agreement_weights/std": 0.03786724805831909, "epoch": 0.313117819930537, "eta/annotator_0": 0.9442870020866394, "grad_norm": 41.66852967925339, "learning_rate": 6.076664271595586e-07, "loss": 0.8558, "rewards/accuracies": 0.71875, "rewards/chosen": -0.1343994140625, "rewards/margins": 0.0643310546875, "rewards/rejected": -0.198486328125, "step": 293 }, { "agreement_weights/mean": 0.9548330307006836, "agreement_weights/std": 0.030288521200418472, "epoch": 0.3141864814320064, "eta/annotator_0": 0.9427658915519714, "grad_norm": 90.9655701902884, "learning_rate": 6.067797882413971e-07, "loss": 0.9887, "rewards/accuracies": 0.703125, "rewards/chosen": -0.0911865234375, "rewards/margins": 0.110107421875, "rewards/rejected": -0.201171875, "step": 294 }, { "agreement_weights/mean": 0.9384638667106628, "agreement_weights/std": 0.0732535570859909, "epoch": 0.31525514293347584, "eta/annotator_0": 0.9422588348388672, "grad_norm": 28.26218714733016, "learning_rate": 6.05889566147693e-07, "loss": 0.8759, "rewards/accuracies": 0.640625, "rewards/chosen": -0.1165771484375, "rewards/margins": 0.044708251953125, "rewards/rejected": -0.1611328125, "step": 295 }, { "agreement_weights/mean": 0.9566227197647095, "agreement_weights/std": 0.03703407198190689, "epoch": 0.31632380443494523, "eta/annotator_0": 0.9427278637886047, "grad_norm": 36.18182577726934, "learning_rate": 6.04995773300849e-07, "loss": 0.8297, "rewards/accuracies": 0.78125, "rewards/chosen": -0.1298828125, "rewards/margins": 0.09051513671875, "rewards/rejected": -0.22021484375, "step": 296 }, { "agreement_weights/mean": 0.9385548830032349, "agreement_weights/std": 0.07787124067544937, "epoch": 0.3173924659364146, "eta/annotator_0": 0.9441349506378174, "grad_norm": 36.77403979688562, "learning_rate": 6.040984221730958e-07, "loss": 0.8882, "rewards/accuracies": 0.75, "rewards/chosen": -0.09930419921875, "rewards/margins": 0.02288818359375, "rewards/rejected": -0.12225341796875, "step": 297 }, { "agreement_weights/mean": 0.9487975835800171, "agreement_weights/std": 0.04660636931657791, "epoch": 0.3184611274378841, "eta/annotator_0": 0.9441349506378174, "grad_norm": 39.49997002715132, "learning_rate": 6.031975252863167e-07, "loss": 0.8355, "rewards/accuracies": 0.78125, "rewards/chosen": -0.1375732421875, "rewards/margins": 0.07598876953125, "rewards/rejected": -0.213623046875, "step": 298 }, { "agreement_weights/mean": 0.953415036201477, "agreement_weights/std": 0.03475397825241089, "epoch": 0.31952978893935347, "eta/annotator_0": 0.9444112181663513, "grad_norm": 142.4160561309153, "learning_rate": 6.022930952118739e-07, "loss": 0.9478, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08270263671875, "rewards/margins": 0.1092529296875, "rewards/rejected": -0.191650390625, "step": 299 }, { "agreement_weights/mean": 0.9612940549850464, "agreement_weights/std": 0.026577813550829887, "epoch": 0.32059845044082286, "eta/annotator_0": 0.9445033073425293, "grad_norm": 96.57619597166423, "learning_rate": 6.013851445704326e-07, "loss": 1.1043, "rewards/accuracies": 0.75, "rewards/chosen": -0.1158447265625, "rewards/margins": 0.1336669921875, "rewards/rejected": -0.24951171875, "step": 300 }, { "epoch": 0.32059845044082286, "eta/annotator_0": 0.9475704431533813, "eval_agreement_weights/mean": 0.9427698850631714, "eval_agreement_weights/std": 0.06760270893573761, "eval_loss": 0.865753173828125, "eval_rewards/accuracies": 0.7278681397438049, "eval_rewards/chosen": -0.11184071749448776, "eval_rewards/margins": 0.05710682272911072, "eval_rewards/rejected": -0.16888774931430817, "eval_runtime": 134.8174, "eval_samples_per_second": 14.546, "eval_steps_per_second": 0.912, "step": 300 }, { "agreement_weights/mean": 0.955250084400177, "agreement_weights/std": 0.03881929814815521, "epoch": 0.3216671119422923, "eta/annotator_0": 0.948610246181488, "grad_norm": 37.83636936051908, "learning_rate": 6.004736860317849e-07, "loss": 0.7861, "rewards/accuracies": 0.703125, "rewards/chosen": -0.110595703125, "rewards/margins": 0.102294921875, "rewards/rejected": -0.212890625, "step": 301 }, { "agreement_weights/mean": 0.9279405474662781, "agreement_weights/std": 0.10267005860805511, "epoch": 0.3227357734437617, "eta/annotator_0": 0.948610246181488, "grad_norm": 71.246606621445, "learning_rate": 5.995587323146728e-07, "loss": 0.8985, "rewards/accuracies": 0.71875, "rewards/chosen": -0.149658203125, "rewards/margins": 0.0392608642578125, "rewards/rejected": -0.18896484375, "step": 302 }, { "agreement_weights/mean": 0.9547960162162781, "agreement_weights/std": 0.03940389305353165, "epoch": 0.3238044349452311, "eta/annotator_0": 0.9490470886230469, "grad_norm": 97.38386338054069, "learning_rate": 5.986402961866115e-07, "loss": 0.8307, "rewards/accuracies": 0.734375, "rewards/chosen": -0.1204833984375, "rewards/margins": 0.09527587890625, "rewards/rejected": -0.21533203125, "step": 303 }, { "agreement_weights/mean": 0.9211032390594482, "agreement_weights/std": 0.13290929794311523, "epoch": 0.3248730964467005, "eta/annotator_0": 0.9494838714599609, "grad_norm": 96.74008682664231, "learning_rate": 5.977183904637103e-07, "loss": 0.9898, "rewards/accuracies": 0.640625, "rewards/chosen": -0.1385498046875, "rewards/margins": 0.0728759765625, "rewards/rejected": -0.21142578125, "step": 304 }, { "agreement_weights/mean": 0.9234318733215332, "agreement_weights/std": 0.13008761405944824, "epoch": 0.32594175794816993, "eta/annotator_0": 0.9494838714599609, "grad_norm": 36.501566997180475, "learning_rate": 5.967930280104946e-07, "loss": 0.871, "rewards/accuracies": 0.703125, "rewards/chosen": -0.109375, "rewards/margins": 0.0252227783203125, "rewards/rejected": -0.13482666015625, "step": 305 }, { "agreement_weights/mean": 0.9465819597244263, "agreement_weights/std": 0.03860505670309067, "epoch": 0.3270104194496393, "eta/annotator_0": 0.9501209855079651, "grad_norm": 26.346659797009945, "learning_rate": 5.958642217397253e-07, "loss": 0.8878, "rewards/accuracies": 0.734375, "rewards/chosen": -0.099609375, "rewards/margins": 0.0383758544921875, "rewards/rejected": -0.13818359375, "step": 306 }, { "agreement_weights/mean": 0.9453064203262329, "agreement_weights/std": 0.07559485733509064, "epoch": 0.3280790809511087, "eta/annotator_0": 0.9501209855079651, "grad_norm": 36.58941859447347, "learning_rate": 5.9493198461222e-07, "loss": 0.8333, "rewards/accuracies": 0.78125, "rewards/chosen": -0.114990234375, "rewards/margins": 0.0858154296875, "rewards/rejected": -0.20068359375, "step": 307 }, { "agreement_weights/mean": 0.9452036619186401, "agreement_weights/std": 0.036912109702825546, "epoch": 0.32914774245257816, "eta/annotator_0": 0.9499382376670837, "grad_norm": 25.535188632811874, "learning_rate": 5.939963296366709e-07, "loss": 0.9033, "rewards/accuracies": 0.609375, "rewards/chosen": -0.074462890625, "rewards/margins": 0.039581298828125, "rewards/rejected": -0.114013671875, "step": 308 }, { "agreement_weights/mean": 0.9391476511955261, "agreement_weights/std": 0.07309576869010925, "epoch": 0.33021640395404755, "eta/annotator_0": 0.9497554898262024, "grad_norm": 45.13330555564565, "learning_rate": 5.930572698694641e-07, "loss": 0.8419, "rewards/accuracies": 0.703125, "rewards/chosen": -0.10107421875, "rewards/margins": 0.07257080078125, "rewards/rejected": -0.173583984375, "step": 309 }, { "agreement_weights/mean": 0.9503974318504333, "agreement_weights/std": 0.0395372174680233, "epoch": 0.33128506545551695, "eta/annotator_0": 0.9497554898262024, "grad_norm": 28.41056149847039, "learning_rate": 5.921148184144968e-07, "loss": 0.8833, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08782958984375, "rewards/margins": 0.0653076171875, "rewards/rejected": -0.153076171875, "step": 310 }, { "agreement_weights/mean": 0.9353450536727905, "agreement_weights/std": 0.09451915323734283, "epoch": 0.3323537269569864, "eta/annotator_0": 0.9513258934020996, "grad_norm": 25.6308320312519, "learning_rate": 5.911689884229951e-07, "loss": 0.8504, "rewards/accuracies": 0.71875, "rewards/chosen": -0.100341796875, "rewards/margins": 0.047882080078125, "rewards/rejected": -0.148193359375, "step": 311 }, { "agreement_weights/mean": 0.9294730424880981, "agreement_weights/std": 0.08619679510593414, "epoch": 0.3334223884584558, "eta/annotator_0": 0.9513258934020996, "grad_norm": 33.6373560463721, "learning_rate": 5.902197930933298e-07, "loss": 0.8924, "rewards/accuracies": 0.6875, "rewards/chosen": -0.076171875, "rewards/margins": 0.041046142578125, "rewards/rejected": -0.1173095703125, "step": 312 }, { "agreement_weights/mean": 0.951790452003479, "agreement_weights/std": 0.03551465645432472, "epoch": 0.3344910499599252, "eta/annotator_0": 0.9517030715942383, "grad_norm": 24.508219833537265, "learning_rate": 5.892672456708326e-07, "loss": 0.8498, "rewards/accuracies": 0.75, "rewards/chosen": -0.0679779052734375, "rewards/margins": 0.0670928955078125, "rewards/rejected": -0.13525390625, "step": 313 }, { "agreement_weights/mean": 0.9559763669967651, "agreement_weights/std": 0.04050162807106972, "epoch": 0.3355597114613946, "eta/annotator_0": 0.9520803689956665, "grad_norm": 30.347335160137327, "learning_rate": 5.883113594476117e-07, "loss": 0.7832, "rewards/accuracies": 0.828125, "rewards/chosen": -0.04736328125, "rewards/margins": 0.0928955078125, "rewards/rejected": -0.140380859375, "step": 314 }, { "agreement_weights/mean": 0.9352656602859497, "agreement_weights/std": 0.07855648547410965, "epoch": 0.336628372962864, "eta/annotator_0": 0.9520803689956665, "grad_norm": 29.75135672678236, "learning_rate": 5.873521477623649e-07, "loss": 0.8968, "rewards/accuracies": 0.703125, "rewards/chosen": -0.0814208984375, "rewards/margins": 0.0393524169921875, "rewards/rejected": -0.12060546875, "step": 315 }, { "agreement_weights/mean": 0.9323169589042664, "agreement_weights/std": 0.0979207381606102, "epoch": 0.3376970344643334, "eta/annotator_0": 0.9517191052436829, "grad_norm": 99.30162868929659, "learning_rate": 5.863896240001953e-07, "loss": 0.9513, "rewards/accuracies": 0.671875, "rewards/chosen": -0.1102294921875, "rewards/margins": 0.029541015625, "rewards/rejected": -0.13979339599609375, "step": 316 }, { "agreement_weights/mean": 0.9375380277633667, "agreement_weights/std": 0.09353651851415634, "epoch": 0.3387656959658028, "eta/annotator_0": 0.9517191052436829, "grad_norm": 87.09771194980848, "learning_rate": 5.854238015924232e-07, "loss": 0.9443, "rewards/accuracies": 0.734375, "rewards/chosen": -0.0674591064453125, "rewards/margins": 0.09912109375, "rewards/rejected": -0.16650390625, "step": 317 }, { "agreement_weights/mean": 0.9505991339683533, "agreement_weights/std": 0.05400260165333748, "epoch": 0.33983435746727225, "eta/annotator_0": 0.95186448097229, "grad_norm": 33.21762639223801, "learning_rate": 5.844546940163994e-07, "loss": 0.7814, "rewards/accuracies": 0.796875, "rewards/chosen": -0.0830078125, "rewards/margins": 0.07623291015625, "rewards/rejected": -0.159423828125, "step": 318 }, { "agreement_weights/mean": 0.9305624961853027, "agreement_weights/std": 0.09238345921039581, "epoch": 0.34090301896874164, "eta/annotator_0": 0.9520097970962524, "grad_norm": 114.57097272180913, "learning_rate": 5.834823147953162e-07, "loss": 1.103, "rewards/accuracies": 0.671875, "rewards/chosen": -0.1376953125, "rewards/margins": 0.0358123779296875, "rewards/rejected": -0.17333984375, "step": 319 }, { "agreement_weights/mean": 0.9395648837089539, "agreement_weights/std": 0.06808660179376602, "epoch": 0.34197168047021104, "eta/annotator_0": 0.9520097970962524, "grad_norm": 28.54589030195734, "learning_rate": 5.825066774980201e-07, "loss": 0.8582, "rewards/accuracies": 0.734375, "rewards/chosen": -0.097412109375, "rewards/margins": 0.040985107421875, "rewards/rejected": -0.1383056640625, "step": 320 }, { "agreement_weights/mean": 0.928691029548645, "agreement_weights/std": 0.10293145477771759, "epoch": 0.3430403419716805, "eta/annotator_0": 0.952533483505249, "grad_norm": 42.13772405889092, "learning_rate": 5.815277957388211e-07, "loss": 0.8793, "rewards/accuracies": 0.625, "rewards/chosen": -0.0722503662109375, "rewards/margins": 0.045745849609375, "rewards/rejected": -0.1180419921875, "step": 321 }, { "agreement_weights/mean": 0.9429168701171875, "agreement_weights/std": 0.06817039847373962, "epoch": 0.3441090034731499, "eta/annotator_0": 0.952533483505249, "grad_norm": 47.59226380034781, "learning_rate": 5.805456831773038e-07, "loss": 0.8744, "rewards/accuracies": 0.765625, "rewards/chosen": -0.1121826171875, "rewards/margins": 0.08997344970703125, "rewards/rejected": -0.2022705078125, "step": 322 }, { "agreement_weights/mean": 0.9527214765548706, "agreement_weights/std": 0.03583855181932449, "epoch": 0.34517766497461927, "eta/annotator_0": 0.9515057802200317, "grad_norm": 39.48800241842724, "learning_rate": 5.795603535181359e-07, "loss": 0.8405, "rewards/accuracies": 0.75, "rewards/chosen": -0.113525390625, "rewards/margins": 0.07366943359375, "rewards/rejected": -0.18701171875, "step": 323 }, { "agreement_weights/mean": 0.9477037191390991, "agreement_weights/std": 0.042721085250377655, "epoch": 0.3462463264760887, "eta/annotator_0": 0.9504779577255249, "grad_norm": 52.88541367971384, "learning_rate": 5.785718205108776e-07, "loss": 0.8683, "rewards/accuracies": 0.703125, "rewards/chosen": -0.1302490234375, "rewards/margins": 0.065765380859375, "rewards/rejected": -0.196044921875, "step": 324 }, { "agreement_weights/mean": 0.9567831754684448, "agreement_weights/std": 0.04272889345884323, "epoch": 0.3473149879775581, "eta/annotator_0": 0.9504779577255249, "grad_norm": 27.13606258502526, "learning_rate": 5.775800979497894e-07, "loss": 0.7432, "rewards/accuracies": 0.84375, "rewards/chosen": -0.1116943359375, "rewards/margins": 0.1087646484375, "rewards/rejected": -0.220947265625, "step": 325 }, { "agreement_weights/mean": 0.9414723515510559, "agreement_weights/std": 0.08514156192541122, "epoch": 0.3483836494790275, "eta/annotator_0": 0.9463915228843689, "grad_norm": 33.686978877840254, "learning_rate": 5.765851996736397e-07, "loss": 0.7913, "rewards/accuracies": 0.796875, "rewards/chosen": -0.1312255859375, "rewards/margins": 0.066497802734375, "rewards/rejected": -0.19775390625, "step": 326 }, { "agreement_weights/mean": 0.9562709927558899, "agreement_weights/std": 0.029553137719631195, "epoch": 0.34945231098049695, "eta/annotator_0": 0.9463915228843689, "grad_norm": 33.66636647047542, "learning_rate": 5.755871395655117e-07, "loss": 0.8353, "rewards/accuracies": 0.765625, "rewards/chosen": -0.126953125, "rewards/margins": 0.07586669921875, "rewards/rejected": -0.20263671875, "step": 327 }, { "agreement_weights/mean": 0.9465333223342896, "agreement_weights/std": 0.03941868990659714, "epoch": 0.35052097248196634, "eta/annotator_0": 0.9470200538635254, "grad_norm": 111.03729252620103, "learning_rate": 5.745859315526097e-07, "loss": 0.9725, "rewards/accuracies": 0.734375, "rewards/chosen": -0.15576171875, "rewards/margins": 0.0839691162109375, "rewards/rejected": -0.239501953125, "step": 328 }, { "agreement_weights/mean": 0.9424245357513428, "agreement_weights/std": 0.08417924493551254, "epoch": 0.35158963398343573, "eta/annotator_0": 0.9476485252380371, "grad_norm": 28.43306089853463, "learning_rate": 5.735815896060648e-07, "loss": 0.785, "rewards/accuracies": 0.78125, "rewards/chosen": -0.13134765625, "rewards/margins": 0.0559539794921875, "rewards/rejected": -0.187255859375, "step": 329 }, { "agreement_weights/mean": 0.9553465247154236, "agreement_weights/std": 0.023107318207621574, "epoch": 0.3526582954849052, "eta/annotator_0": 0.9476485252380371, "grad_norm": 33.80121582740315, "learning_rate": 5.725741277407398e-07, "loss": 0.8493, "rewards/accuracies": 0.796875, "rewards/chosen": -0.111083984375, "rewards/margins": 0.04974365234375, "rewards/rejected": -0.1607666015625, "step": 330 }, { "agreement_weights/mean": 0.9441205263137817, "agreement_weights/std": 0.06748949736356735, "epoch": 0.3537269569863746, "eta/annotator_0": 0.9488288760185242, "grad_norm": 46.337271918396326, "learning_rate": 5.715635600150336e-07, "loss": 0.8393, "rewards/accuracies": 0.734375, "rewards/chosen": -0.142333984375, "rewards/margins": 0.071533203125, "rewards/rejected": -0.2138671875, "step": 331 }, { "agreement_weights/mean": 0.9500930905342102, "agreement_weights/std": 0.035454194992780685, "epoch": 0.35479561848784397, "eta/annotator_0": 0.9488288760185242, "grad_norm": 104.78157826809175, "learning_rate": 5.705499005306851e-07, "loss": 1.1231, "rewards/accuracies": 0.65625, "rewards/chosen": -0.1356201171875, "rewards/margins": 0.0955810546875, "rewards/rejected": -0.231201171875, "step": 332 }, { "agreement_weights/mean": 0.9437679648399353, "agreement_weights/std": 0.08353368937969208, "epoch": 0.35586427998931336, "eta/annotator_0": 0.9487804174423218, "grad_norm": 33.50325947202919, "learning_rate": 5.695331634325771e-07, "loss": 0.7927, "rewards/accuracies": 0.78125, "rewards/chosen": -0.1416015625, "rewards/margins": 0.051025390625, "rewards/rejected": -0.192626953125, "step": 333 }, { "agreement_weights/mean": 0.9501356482505798, "agreement_weights/std": 0.04954961687326431, "epoch": 0.3569329414907828, "eta/annotator_0": 0.9487320184707642, "grad_norm": 30.20284651518934, "learning_rate": 5.685133629085371e-07, "loss": 0.8071, "rewards/accuracies": 0.734375, "rewards/chosen": -0.1351318359375, "rewards/margins": 0.0819091796875, "rewards/rejected": -0.21728515625, "step": 334 }, { "agreement_weights/mean": 0.9412158727645874, "agreement_weights/std": 0.06916482746601105, "epoch": 0.3580016029922522, "eta/annotator_0": 0.9487320184707642, "grad_norm": 49.59560054684916, "learning_rate": 5.674905131891414e-07, "loss": 0.8764, "rewards/accuracies": 0.75, "rewards/chosen": -0.13134765625, "rewards/margins": 0.063507080078125, "rewards/rejected": -0.19482421875, "step": 335 }, { "agreement_weights/mean": 0.9111082553863525, "agreement_weights/std": 0.17757678031921387, "epoch": 0.3590702644937216, "eta/annotator_0": 0.9486842751502991, "grad_norm": 74.87340672647167, "learning_rate": 5.664646285475152e-07, "loss": 0.8666, "rewards/accuracies": 0.734375, "rewards/chosen": -0.2119140625, "rewards/margins": 0.03289794921875, "rewards/rejected": -0.245361328125, "step": 336 }, { "agreement_weights/mean": 0.9498663544654846, "agreement_weights/std": 0.03576754033565521, "epoch": 0.36013892599519104, "eta/annotator_0": 0.9486842751502991, "grad_norm": 30.834184438681753, "learning_rate": 5.654357232991342e-07, "loss": 0.8684, "rewards/accuracies": 0.71875, "rewards/chosen": -0.14013671875, "rewards/margins": 0.039459228515625, "rewards/rejected": -0.1796875, "step": 337 }, { "agreement_weights/mean": 0.9141381978988647, "agreement_weights/std": 0.15540538728237152, "epoch": 0.36120758749666043, "eta/annotator_0": 0.9493794441223145, "grad_norm": 66.92307887383356, "learning_rate": 5.644038118016242e-07, "loss": 0.8206, "rewards/accuracies": 0.75, "rewards/chosen": -0.1409912109375, "rewards/margins": 0.052001953125, "rewards/rejected": -0.193115234375, "step": 338 }, { "agreement_weights/mean": 0.9241869449615479, "agreement_weights/std": 0.13375234603881836, "epoch": 0.3622762489981298, "eta/annotator_0": 0.9500746130943298, "grad_norm": 49.06281604990103, "learning_rate": 5.633689084545607e-07, "loss": 0.8885, "rewards/accuracies": 0.703125, "rewards/chosen": -0.16162109375, "rewards/margins": 0.014495849609375, "rewards/rejected": -0.17626953125, "step": 339 }, { "agreement_weights/mean": 0.9520750045776367, "agreement_weights/std": 0.06299105286598206, "epoch": 0.36334491049959927, "eta/annotator_0": 0.9500746130943298, "grad_norm": 36.10994191726589, "learning_rate": 5.623310276992691e-07, "loss": 0.7839, "rewards/accuracies": 0.796875, "rewards/chosen": -0.13671875, "rewards/margins": 0.10504150390625, "rewards/rejected": -0.24169921875, "step": 340 }, { "agreement_weights/mean": 0.9319523572921753, "agreement_weights/std": 0.09413771331310272, "epoch": 0.36441357200106866, "eta/annotator_0": 0.9511691927909851, "grad_norm": 31.866797210359824, "learning_rate": 5.612901840186216e-07, "loss": 0.8666, "rewards/accuracies": 0.671875, "rewards/chosen": -0.150390625, "rewards/margins": 0.033477783203125, "rewards/rejected": -0.18408203125, "step": 341 }, { "agreement_weights/mean": 0.9620287418365479, "agreement_weights/std": 0.026224317029118538, "epoch": 0.36548223350253806, "eta/annotator_0": 0.9511691927909851, "grad_norm": 27.711124424318825, "learning_rate": 5.602463919368363e-07, "loss": 0.7582, "rewards/accuracies": 0.796875, "rewards/chosen": -0.1357421875, "rewards/margins": 0.1148681640625, "rewards/rejected": -0.25048828125, "step": 342 }, { "agreement_weights/mean": 0.958702802658081, "agreement_weights/std": 0.030640654265880585, "epoch": 0.3665508950040075, "eta/annotator_0": 0.9504549503326416, "grad_norm": 27.931750069340662, "learning_rate": 5.591996660192741e-07, "loss": 0.7732, "rewards/accuracies": 0.765625, "rewards/chosen": -0.11083984375, "rewards/margins": 0.08966064453125, "rewards/rejected": -0.200439453125, "step": 343 }, { "agreement_weights/mean": 0.9454098343849182, "agreement_weights/std": 0.06414537876844406, "epoch": 0.3676195565054769, "eta/annotator_0": 0.9497407078742981, "grad_norm": 34.64649735187946, "learning_rate": 5.581500208722351e-07, "loss": 0.8346, "rewards/accuracies": 0.734375, "rewards/chosen": -0.136474609375, "rewards/margins": 0.08978271484375, "rewards/rejected": -0.226318359375, "step": 344 }, { "agreement_weights/mean": 0.9340512752532959, "agreement_weights/std": 0.13421286642551422, "epoch": 0.3686882180069463, "eta/annotator_0": 0.9497407078742981, "grad_norm": 85.00184058476411, "learning_rate": 5.570974711427556e-07, "loss": 0.7945, "rewards/accuracies": 0.796875, "rewards/chosen": -0.11956787109375, "rewards/margins": 0.0552978515625, "rewards/rejected": -0.17529296875, "step": 345 }, { "agreement_weights/mean": 0.9339848160743713, "agreement_weights/std": 0.08018407225608826, "epoch": 0.36975687950841574, "eta/annotator_0": 0.9469912052154541, "grad_norm": 27.478365498967495, "learning_rate": 5.560420315184031e-07, "loss": 0.8599, "rewards/accuracies": 0.625, "rewards/chosen": -0.13623046875, "rewards/margins": 0.0421295166015625, "rewards/rejected": -0.17822265625, "step": 346 }, { "agreement_weights/mean": 0.9551892876625061, "agreement_weights/std": 0.04080036282539368, "epoch": 0.37082554100988513, "eta/annotator_0": 0.9469912052154541, "grad_norm": 37.00923375750722, "learning_rate": 5.549837167270709e-07, "loss": 0.747, "rewards/accuracies": 0.765625, "rewards/chosen": -0.153564453125, "rewards/margins": 0.09063720703125, "rewards/rejected": -0.244140625, "step": 347 }, { "agreement_weights/mean": 0.960350513458252, "agreement_weights/std": 0.025887416675686836, "epoch": 0.3718942025113545, "eta/annotator_0": 0.9477843046188354, "grad_norm": 19.622462217068964, "learning_rate": 5.539225415367741e-07, "loss": 0.7506, "rewards/accuracies": 0.796875, "rewards/chosen": -0.1190185546875, "rewards/margins": 0.0963134765625, "rewards/rejected": -0.215576171875, "step": 348 }, { "agreement_weights/mean": 0.9306018352508545, "agreement_weights/std": 0.1480480283498764, "epoch": 0.3729628640128239, "eta/annotator_0": 0.9485774040222168, "grad_norm": 103.32096065748182, "learning_rate": 5.528585207554421e-07, "loss": 0.8202, "rewards/accuracies": 0.796875, "rewards/chosen": -0.16259765625, "rewards/margins": 0.0602569580078125, "rewards/rejected": -0.22265625, "step": 349 }, { "agreement_weights/mean": 0.9300565719604492, "agreement_weights/std": 0.11280800402164459, "epoch": 0.37403152551429336, "eta/annotator_0": 0.9485774040222168, "grad_norm": 25.783140248632886, "learning_rate": 5.517916692307123e-07, "loss": 0.7447, "rewards/accuracies": 0.75, "rewards/chosen": -0.1300048828125, "rewards/margins": 0.06396484375, "rewards/rejected": -0.194091796875, "step": 350 }, { "agreement_weights/mean": 0.9482638835906982, "agreement_weights/std": 0.07265797257423401, "epoch": 0.37510018701576275, "eta/annotator_0": 0.9503259062767029, "grad_norm": 30.92528319889217, "learning_rate": 5.507220018497235e-07, "loss": 0.7567, "rewards/accuracies": 0.8125, "rewards/chosen": -0.14599609375, "rewards/margins": 0.0916748046875, "rewards/rejected": -0.237548828125, "step": 351 }, { "agreement_weights/mean": 0.9372784495353699, "agreement_weights/std": 0.0712650790810585, "epoch": 0.37616884851723215, "eta/annotator_0": 0.9503259062767029, "grad_norm": 29.256208076622723, "learning_rate": 5.496495335389074e-07, "loss": 0.8334, "rewards/accuracies": 0.65625, "rewards/chosen": -0.1444091796875, "rewards/margins": 0.05816650390625, "rewards/rejected": -0.202880859375, "step": 352 }, { "agreement_weights/mean": 0.9543055891990662, "agreement_weights/std": 0.032139722257852554, "epoch": 0.3772375100187016, "eta/annotator_0": 0.9510211944580078, "grad_norm": 27.383738911204237, "learning_rate": 5.485742792637807e-07, "loss": 0.8261, "rewards/accuracies": 0.71875, "rewards/chosen": -0.16259765625, "rewards/margins": 0.07452392578125, "rewards/rejected": -0.237548828125, "step": 353 }, { "agreement_weights/mean": 0.9509296417236328, "agreement_weights/std": 0.07032308727502823, "epoch": 0.378306171520171, "eta/annotator_0": 0.9517165422439575, "grad_norm": 31.835038929947096, "learning_rate": 5.474962540287362e-07, "loss": 0.709, "rewards/accuracies": 0.875, "rewards/chosen": -0.173828125, "rewards/margins": 0.1019287109375, "rewards/rejected": -0.275390625, "step": 354 }, { "agreement_weights/mean": 0.9569829702377319, "agreement_weights/std": 0.05955237150192261, "epoch": 0.3793748330216404, "eta/annotator_0": 0.9517165422439575, "grad_norm": 30.42548343079745, "learning_rate": 5.464154728768339e-07, "loss": 0.6787, "rewards/accuracies": 0.84375, "rewards/chosen": -0.17822265625, "rewards/margins": 0.1285400390625, "rewards/rejected": -0.306640625, "step": 355 }, { "agreement_weights/mean": 0.9302628040313721, "agreement_weights/std": 0.1386040896177292, "epoch": 0.3804434945231098, "eta/annotator_0": 0.954156219959259, "grad_norm": 76.11690057099251, "learning_rate": 5.453319508895896e-07, "loss": 0.8111, "rewards/accuracies": 0.796875, "rewards/chosen": -0.230224609375, "rewards/margins": 0.0950927734375, "rewards/rejected": -0.3251953125, "step": 356 }, { "agreement_weights/mean": 0.958899974822998, "agreement_weights/std": 0.031801097095012665, "epoch": 0.3815121560245792, "eta/annotator_0": 0.954156219959259, "grad_norm": 62.78092240896229, "learning_rate": 5.442457031867667e-07, "loss": 0.7788, "rewards/accuracies": 0.828125, "rewards/chosen": -0.18603515625, "rewards/margins": 0.0870361328125, "rewards/rejected": -0.2734375, "step": 357 }, { "agreement_weights/mean": 0.9547988772392273, "agreement_weights/std": 0.08578068017959595, "epoch": 0.3825808175260486, "eta/annotator_0": 0.9531571269035339, "grad_norm": 38.37595303941495, "learning_rate": 5.431567449261628e-07, "loss": 0.6238, "rewards/accuracies": 0.875, "rewards/chosen": -0.173583984375, "rewards/margins": 0.129150390625, "rewards/rejected": -0.302734375, "step": 358 }, { "agreement_weights/mean": 0.938931941986084, "agreement_weights/std": 0.08669944852590561, "epoch": 0.38364947902751806, "eta/annotator_0": 0.9521580338478088, "grad_norm": 22.644843190987412, "learning_rate": 5.420650913034004e-07, "loss": 0.8172, "rewards/accuracies": 0.71875, "rewards/chosen": -0.21533203125, "rewards/margins": 0.05908203125, "rewards/rejected": -0.2744140625, "step": 359 }, { "agreement_weights/mean": 0.9292659759521484, "agreement_weights/std": 0.1028866171836853, "epoch": 0.38471814052898745, "eta/annotator_0": 0.9521580338478088, "grad_norm": 38.04253030526771, "learning_rate": 5.40970757551713e-07, "loss": 0.8132, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2431640625, "rewards/margins": 0.06341552734375, "rewards/rejected": -0.30712890625, "step": 360 }, { "agreement_weights/mean": 0.9355199933052063, "agreement_weights/std": 0.10378079116344452, "epoch": 0.38578680203045684, "eta/annotator_0": 0.9468991756439209, "grad_norm": 27.28276622722264, "learning_rate": 5.398737589417339e-07, "loss": 0.7386, "rewards/accuracies": 0.734375, "rewards/chosen": -0.171630859375, "rewards/margins": 0.0977783203125, "rewards/rejected": -0.269775390625, "step": 361 }, { "agreement_weights/mean": 0.952560544013977, "agreement_weights/std": 0.05252369865775108, "epoch": 0.38685546353192624, "eta/annotator_0": 0.9468991756439209, "grad_norm": 33.88064565097361, "learning_rate": 5.387741107812823e-07, "loss": 0.7647, "rewards/accuracies": 0.765625, "rewards/chosen": -0.238037109375, "rewards/margins": 0.1070556640625, "rewards/rejected": -0.3447265625, "step": 362 }, { "agreement_weights/mean": 0.9434411525726318, "agreement_weights/std": 0.06377717852592468, "epoch": 0.3879241250333957, "eta/annotator_0": 0.9477335214614868, "grad_norm": 29.497945210814198, "learning_rate": 5.376718284151499e-07, "loss": 0.8226, "rewards/accuracies": 0.734375, "rewards/chosen": -0.245361328125, "rewards/margins": 0.06878662109375, "rewards/rejected": -0.314453125, "step": 363 }, { "agreement_weights/mean": 0.948516845703125, "agreement_weights/std": 0.08732870221138, "epoch": 0.3889927865348651, "eta/annotator_0": 0.9485678672790527, "grad_norm": 76.17073687657135, "learning_rate": 5.365669272248873e-07, "loss": 0.6896, "rewards/accuracies": 0.828125, "rewards/chosen": -0.250732421875, "rewards/margins": 0.125732421875, "rewards/rejected": -0.37548828125, "step": 364 }, { "agreement_weights/mean": 0.9399405717849731, "agreement_weights/std": 0.09403529763221741, "epoch": 0.39006144803633447, "eta/annotator_0": 0.9485678672790527, "grad_norm": 31.999488813477797, "learning_rate": 5.354594226285882e-07, "loss": 0.7483, "rewards/accuracies": 0.75, "rewards/chosen": -0.288818359375, "rewards/margins": 0.0819091796875, "rewards/rejected": -0.37060546875, "step": 365 }, { "agreement_weights/mean": 0.9505530595779419, "agreement_weights/std": 0.07807415723800659, "epoch": 0.3911301095378039, "eta/annotator_0": 0.9470714330673218, "grad_norm": 31.627976534503038, "learning_rate": 5.343493300806755e-07, "loss": 0.69, "rewards/accuracies": 0.828125, "rewards/chosen": -0.2587890625, "rewards/margins": 0.1219482421875, "rewards/rejected": -0.38037109375, "step": 366 }, { "agreement_weights/mean": 0.9509766101837158, "agreement_weights/std": 0.0649639293551445, "epoch": 0.3921987710392733, "eta/annotator_0": 0.9470714330673218, "grad_norm": 49.79920765920013, "learning_rate": 5.332366650716846e-07, "loss": 0.7661, "rewards/accuracies": 0.765625, "rewards/chosen": -0.252197265625, "rewards/margins": 0.10888671875, "rewards/rejected": -0.361572265625, "step": 367 }, { "agreement_weights/mean": 0.9409750699996948, "agreement_weights/std": 0.08328406512737274, "epoch": 0.3932674325407427, "eta/annotator_0": 0.9464754462242126, "grad_norm": 31.032530070711587, "learning_rate": 5.321214431280482e-07, "loss": 0.7524, "rewards/accuracies": 0.8125, "rewards/chosen": -0.2578125, "rewards/margins": 0.072296142578125, "rewards/rejected": -0.330078125, "step": 368 }, { "agreement_weights/mean": 0.9394659996032715, "agreement_weights/std": 0.09653401374816895, "epoch": 0.39433609404221215, "eta/annotator_0": 0.9458794593811035, "grad_norm": 37.11075034207589, "learning_rate": 5.310036798118789e-07, "loss": 0.733, "rewards/accuracies": 0.75, "rewards/chosen": -0.29638671875, "rewards/margins": 0.10247802734375, "rewards/rejected": -0.39892578125, "step": 369 }, { "agreement_weights/mean": 0.9649299383163452, "agreement_weights/std": 0.039634786546230316, "epoch": 0.39540475554368154, "eta/annotator_0": 0.9458794593811035, "grad_norm": 44.940976330900675, "learning_rate": 5.298833907207525e-07, "loss": 0.6526, "rewards/accuracies": 0.828125, "rewards/chosen": -0.26416015625, "rewards/margins": 0.143798828125, "rewards/rejected": -0.408203125, "step": 370 }, { "agreement_weights/mean": 0.9538074731826782, "agreement_weights/std": 0.044352415949106216, "epoch": 0.39647341704515093, "eta/annotator_0": 0.9461991786956787, "grad_norm": 27.33558032364589, "learning_rate": 5.287605914874898e-07, "loss": 0.7569, "rewards/accuracies": 0.65625, "rewards/chosen": -0.2734375, "rewards/margins": 0.0899658203125, "rewards/rejected": -0.36328125, "step": 371 }, { "agreement_weights/mean": 0.949595034122467, "agreement_weights/std": 0.04465171694755554, "epoch": 0.3975420785466204, "eta/annotator_0": 0.9461991786956787, "grad_norm": 39.561032011002915, "learning_rate": 5.276352977799392e-07, "loss": 0.8529, "rewards/accuracies": 0.65625, "rewards/chosen": -0.26171875, "rewards/margins": 0.083831787109375, "rewards/rejected": -0.345703125, "step": 372 }, { "agreement_weights/mean": 0.9378488063812256, "agreement_weights/std": 0.08327393978834152, "epoch": 0.3986107400480898, "eta/annotator_0": 0.9460914134979248, "grad_norm": 32.09991642244634, "learning_rate": 5.265075253007574e-07, "loss": 0.7853, "rewards/accuracies": 0.734375, "rewards/chosen": -0.33056640625, "rewards/margins": 0.0631103515625, "rewards/rejected": -0.3935546875, "step": 373 }, { "agreement_weights/mean": 0.9485194683074951, "agreement_weights/std": 0.08459806442260742, "epoch": 0.39967940154955917, "eta/annotator_0": 0.9459836483001709, "grad_norm": 41.492854673175586, "learning_rate": 5.253772897871908e-07, "loss": 0.7119, "rewards/accuracies": 0.78125, "rewards/chosen": -0.260498046875, "rewards/margins": 0.10205078125, "rewards/rejected": -0.3623046875, "step": 374 }, { "agreement_weights/mean": 0.9363464117050171, "agreement_weights/std": 0.09958768635988235, "epoch": 0.4007480630510286, "eta/annotator_0": 0.9459836483001709, "grad_norm": 38.32437086434891, "learning_rate": 5.242446070108554e-07, "loss": 0.7944, "rewards/accuracies": 0.703125, "rewards/chosen": -0.3564453125, "rewards/margins": 0.05523681640625, "rewards/rejected": -0.4111328125, "step": 375 }, { "agreement_weights/mean": 0.9320700168609619, "agreement_weights/std": 0.12115712463855743, "epoch": 0.401816724552498, "eta/annotator_0": 0.9450877904891968, "grad_norm": 40.69552590229628, "learning_rate": 5.231094927775172e-07, "loss": 0.7075, "rewards/accuracies": 0.734375, "rewards/chosen": -0.39013671875, "rewards/margins": 0.1087646484375, "rewards/rejected": -0.49951171875, "step": 376 }, { "agreement_weights/mean": 0.9047492146492004, "agreement_weights/std": 0.1529785394668579, "epoch": 0.4028853860539674, "eta/annotator_0": 0.9450877904891968, "grad_norm": 56.4068925444026, "learning_rate": 5.219719629268712e-07, "loss": 0.7454, "rewards/accuracies": 0.6875, "rewards/chosen": -0.34619140625, "rewards/margins": 0.0863037109375, "rewards/rejected": -0.43310546875, "step": 377 }, { "agreement_weights/mean": 0.9332375526428223, "agreement_weights/std": 0.11756917834281921, "epoch": 0.4039540475554368, "eta/annotator_0": 0.9455844759941101, "grad_norm": 102.11865960816087, "learning_rate": 5.208320333323208e-07, "loss": 0.8527, "rewards/accuracies": 0.75, "rewards/chosen": -0.40869140625, "rewards/margins": 0.089111328125, "rewards/rejected": -0.49755859375, "step": 378 }, { "agreement_weights/mean": 0.9274170398712158, "agreement_weights/std": 0.12390643358230591, "epoch": 0.40502270905690624, "eta/annotator_0": 0.9460811018943787, "grad_norm": 74.60809926539108, "learning_rate": 5.196897199007561e-07, "loss": 0.778, "rewards/accuracies": 0.703125, "rewards/chosen": -0.335205078125, "rewards/margins": 0.11505126953125, "rewards/rejected": -0.4501953125, "step": 379 }, { "agreement_weights/mean": 0.9325735569000244, "agreement_weights/std": 0.14572666585445404, "epoch": 0.40609137055837563, "eta/annotator_0": 0.9460811018943787, "grad_norm": 40.540409602723194, "learning_rate": 5.185450385723316e-07, "loss": 0.6362, "rewards/accuracies": 0.796875, "rewards/chosen": -0.33349609375, "rewards/margins": 0.11724853515625, "rewards/rejected": -0.4501953125, "step": 380 }, { "agreement_weights/mean": 0.9411958456039429, "agreement_weights/std": 0.0757426768541336, "epoch": 0.407160032059845, "eta/annotator_0": 0.948322594165802, "grad_norm": 31.24213518645224, "learning_rate": 5.173980053202444e-07, "loss": 0.8012, "rewards/accuracies": 0.703125, "rewards/chosen": -0.37744140625, "rewards/margins": 0.09832763671875, "rewards/rejected": -0.4755859375, "step": 381 }, { "agreement_weights/mean": 0.9501733183860779, "agreement_weights/std": 0.06505744159221649, "epoch": 0.40822869356131447, "eta/annotator_0": 0.948322594165802, "grad_norm": 32.780815564035066, "learning_rate": 5.162486361505108e-07, "loss": 0.6652, "rewards/accuracies": 0.734375, "rewards/chosen": -0.322265625, "rewards/margins": 0.12164306640625, "rewards/rejected": -0.4443359375, "step": 382 }, { "agreement_weights/mean": 0.9306560754776001, "agreement_weights/std": 0.1142519861459732, "epoch": 0.40929735506278386, "eta/annotator_0": 0.9471826553344727, "grad_norm": 52.623265395637304, "learning_rate": 5.150969471017434e-07, "loss": 0.729, "rewards/accuracies": 0.78125, "rewards/chosen": -0.36865234375, "rewards/margins": 0.0954132080078125, "rewards/rejected": -0.4638671875, "step": 383 }, { "agreement_weights/mean": 0.9501175880432129, "agreement_weights/std": 0.07664522528648376, "epoch": 0.41036601656425326, "eta/annotator_0": 0.9460426568984985, "grad_norm": 61.241851346318875, "learning_rate": 5.139429542449265e-07, "loss": 0.7081, "rewards/accuracies": 0.765625, "rewards/chosen": -0.35693359375, "rewards/margins": 0.1480712890625, "rewards/rejected": -0.50439453125, "step": 384 }, { "agreement_weights/mean": 0.9458649158477783, "agreement_weights/std": 0.09623412787914276, "epoch": 0.4114346780657227, "eta/annotator_0": 0.9460426568984985, "grad_norm": 37.206978833369384, "learning_rate": 5.127866736831931e-07, "loss": 0.6682, "rewards/accuracies": 0.78125, "rewards/chosen": -0.37646484375, "rewards/margins": 0.140380859375, "rewards/rejected": -0.51611328125, "step": 385 }, { "agreement_weights/mean": 0.9380298852920532, "agreement_weights/std": 0.10377945005893707, "epoch": 0.4125033395671921, "eta/annotator_0": 0.9470074772834778, "grad_norm": 73.61157564121807, "learning_rate": 5.116281215515987e-07, "loss": 0.7109, "rewards/accuracies": 0.6875, "rewards/chosen": -0.39111328125, "rewards/margins": 0.1408538818359375, "rewards/rejected": -0.53173828125, "step": 386 }, { "agreement_weights/mean": 0.8927596807479858, "agreement_weights/std": 0.17604011297225952, "epoch": 0.4135720010686615, "eta/annotator_0": 0.9470074772834778, "grad_norm": 43.18249453728089, "learning_rate": 5.104673140168976e-07, "loss": 0.826, "rewards/accuracies": 0.65625, "rewards/chosen": -0.439453125, "rewards/margins": 0.050567626953125, "rewards/rejected": -0.490234375, "step": 387 }, { "agreement_weights/mean": 0.9190827012062073, "agreement_weights/std": 0.14515644311904907, "epoch": 0.41464066257013094, "eta/annotator_0": 0.9450864791870117, "grad_norm": 43.56803887700657, "learning_rate": 5.093042672773161e-07, "loss": 0.6986, "rewards/accuracies": 0.71875, "rewards/chosen": -0.4267578125, "rewards/margins": 0.10009765625, "rewards/rejected": -0.52685546875, "step": 388 }, { "agreement_weights/mean": 0.9400992393493652, "agreement_weights/std": 0.09316378086805344, "epoch": 0.41570932407160033, "eta/annotator_0": 0.9431654214859009, "grad_norm": 53.864533653005466, "learning_rate": 5.081389975623272e-07, "loss": 0.7945, "rewards/accuracies": 0.75, "rewards/chosen": -0.42236328125, "rewards/margins": 0.1304931640625, "rewards/rejected": -0.55322265625, "step": 389 }, { "agreement_weights/mean": 0.9330196380615234, "agreement_weights/std": 0.11605339497327805, "epoch": 0.4167779855730697, "eta/annotator_0": 0.9431654214859009, "grad_norm": 35.54514051531669, "learning_rate": 5.06971521132424e-07, "loss": 0.6387, "rewards/accuracies": 0.796875, "rewards/chosen": -0.39013671875, "rewards/margins": 0.144775390625, "rewards/rejected": -0.53466796875, "step": 390 }, { "agreement_weights/mean": 0.9461353421211243, "agreement_weights/std": 0.08024241775274277, "epoch": 0.4178466470745391, "eta/annotator_0": 0.9434669017791748, "grad_norm": 33.07027869516364, "learning_rate": 5.058018542788925e-07, "loss": 0.6219, "rewards/accuracies": 0.765625, "rewards/chosen": -0.37158203125, "rewards/margins": 0.144866943359375, "rewards/rejected": -0.51611328125, "step": 391 }, { "agreement_weights/mean": 0.9367616772651672, "agreement_weights/std": 0.09144885092973709, "epoch": 0.41891530857600856, "eta/annotator_0": 0.9434669017791748, "grad_norm": 59.308068599058956, "learning_rate": 5.046300133235843e-07, "loss": 0.7527, "rewards/accuracies": 0.6875, "rewards/chosen": -0.38525390625, "rewards/margins": 0.1064453125, "rewards/rejected": -0.49072265625, "step": 392 }, { "agreement_weights/mean": 0.949630856513977, "agreement_weights/std": 0.06564650684595108, "epoch": 0.41998397007747795, "eta/annotator_0": 0.9437921047210693, "grad_norm": 38.07899840794374, "learning_rate": 5.034560146186897e-07, "loss": 0.7046, "rewards/accuracies": 0.765625, "rewards/chosen": -0.3984375, "rewards/margins": 0.1409912109375, "rewards/rejected": -0.53955078125, "step": 393 }, { "agreement_weights/mean": 0.9588246941566467, "agreement_weights/std": 0.05029638856649399, "epoch": 0.42105263157894735, "eta/annotator_0": 0.9441172480583191, "grad_norm": 74.20688289100165, "learning_rate": 5.022798745465082e-07, "loss": 0.736, "rewards/accuracies": 0.796875, "rewards/chosen": -0.384765625, "rewards/margins": 0.179443359375, "rewards/rejected": -0.56494140625, "step": 394 }, { "agreement_weights/mean": 0.9635837078094482, "agreement_weights/std": 0.04498417675495148, "epoch": 0.4221212930804168, "eta/annotator_0": 0.9441172480583191, "grad_norm": 26.3552762640728, "learning_rate": 5.011016095192206e-07, "loss": 0.5764, "rewards/accuracies": 0.796875, "rewards/chosen": -0.41455078125, "rewards/margins": 0.18115234375, "rewards/rejected": -0.595703125, "step": 395 }, { "agreement_weights/mean": 0.9459441304206848, "agreement_weights/std": 0.10550408810377121, "epoch": 0.4231899545818862, "eta/annotator_0": 0.9454970359802246, "grad_norm": 39.5472629079614, "learning_rate": 4.999212359786601e-07, "loss": 0.6253, "rewards/accuracies": 0.84375, "rewards/chosen": -0.4736328125, "rewards/margins": 0.1563720703125, "rewards/rejected": -0.6298828125, "step": 396 }, { "agreement_weights/mean": 0.9275963306427002, "agreement_weights/std": 0.12498652935028076, "epoch": 0.4242586160833556, "eta/annotator_0": 0.9454970359802246, "grad_norm": 71.84003584792418, "learning_rate": 4.987387703960828e-07, "loss": 0.7585, "rewards/accuracies": 0.65625, "rewards/chosen": -0.4248046875, "rewards/margins": 0.10162353515625, "rewards/rejected": -0.52734375, "step": 397 }, { "agreement_weights/mean": 0.9247004389762878, "agreement_weights/std": 0.1322248876094818, "epoch": 0.425327277584825, "eta/annotator_0": 0.9462186098098755, "grad_norm": 40.2258311175152, "learning_rate": 4.975542292719374e-07, "loss": 0.7025, "rewards/accuracies": 0.6875, "rewards/chosen": -0.42626953125, "rewards/margins": 0.1007080078125, "rewards/rejected": -0.52734375, "step": 398 }, { "agreement_weights/mean": 0.9316996335983276, "agreement_weights/std": 0.1303340494632721, "epoch": 0.4263959390862944, "eta/annotator_0": 0.9469401836395264, "grad_norm": 35.047950205860545, "learning_rate": 4.963676291356352e-07, "loss": 0.6293, "rewards/accuracies": 0.796875, "rewards/chosen": -0.4189453125, "rewards/margins": 0.126220703125, "rewards/rejected": -0.544921875, "step": 399 }, { "agreement_weights/mean": 0.9114530682563782, "agreement_weights/std": 0.14493198692798615, "epoch": 0.4274646005877638, "eta/annotator_0": 0.9469401836395264, "grad_norm": 29.123632544412192, "learning_rate": 4.951789865453201e-07, "loss": 0.6976, "rewards/accuracies": 0.640625, "rewards/chosen": -0.4033203125, "rewards/margins": 0.081298828125, "rewards/rejected": -0.4853515625, "step": 400 }, { "epoch": 0.4274646005877638, "eta/annotator_0": 0.9420806765556335, "eval_agreement_weights/mean": 0.9354277849197388, "eval_agreement_weights/std": 0.10748600959777832, "eval_loss": 0.6722361445426941, "eval_rewards/accuracies": 0.7471770644187927, "eval_rewards/chosen": -0.42536044120788574, "eval_rewards/margins": 0.13878464698791504, "eval_rewards/rejected": -0.5641752481460571, "eval_runtime": 134.8829, "eval_samples_per_second": 14.539, "eval_steps_per_second": 0.912, "step": 400 }, { "agreement_weights/mean": 0.9179915189743042, "agreement_weights/std": 0.14457300305366516, "epoch": 0.42853326208923326, "eta/annotator_0": 0.9347814321517944, "grad_norm": 41.09593833338175, "learning_rate": 4.939883180876362e-07, "loss": 0.728, "rewards/accuracies": 0.71875, "rewards/chosen": -0.38427734375, "rewards/margins": 0.1046142578125, "rewards/rejected": -0.4892578125, "step": 401 }, { "agreement_weights/mean": 0.9437952041625977, "agreement_weights/std": 0.09301115572452545, "epoch": 0.42960192359070265, "eta/annotator_0": 0.9339626431465149, "grad_norm": 30.752883581801893, "learning_rate": 4.927956403774977e-07, "loss": 0.6004, "rewards/accuracies": 0.78125, "rewards/chosen": -0.419921875, "rewards/margins": 0.160888671875, "rewards/rejected": -0.580078125, "step": 402 }, { "agreement_weights/mean": 0.9403536319732666, "agreement_weights/std": 0.09911751002073288, "epoch": 0.43067058509217204, "eta/annotator_0": 0.9315064549446106, "grad_norm": 31.48547265044142, "learning_rate": 4.916009700578563e-07, "loss": 0.604, "rewards/accuracies": 0.765625, "rewards/chosen": -0.45556640625, "rewards/margins": 0.1695556640625, "rewards/rejected": -0.625, "step": 403 }, { "agreement_weights/mean": 0.91637122631073, "agreement_weights/std": 0.16332992911338806, "epoch": 0.4317392465936415, "eta/annotator_0": 0.9315064549446106, "grad_norm": 55.4212904493065, "learning_rate": 4.90404323799469e-07, "loss": 0.5967, "rewards/accuracies": 0.8125, "rewards/chosen": -0.46240234375, "rewards/margins": 0.1246337890625, "rewards/rejected": -0.58740234375, "step": 404 }, { "agreement_weights/mean": 0.9542344212532043, "agreement_weights/std": 0.07162505388259888, "epoch": 0.4328079080951109, "eta/annotator_0": 0.9322410225868225, "grad_norm": 35.11501262625934, "learning_rate": 4.892057183006656e-07, "loss": 0.5658, "rewards/accuracies": 0.828125, "rewards/chosen": -0.4150390625, "rewards/margins": 0.1982421875, "rewards/rejected": -0.61328125, "step": 405 }, { "agreement_weights/mean": 0.9559072256088257, "agreement_weights/std": 0.048725761473178864, "epoch": 0.4338765695965803, "eta/annotator_0": 0.9324858784675598, "grad_norm": 33.00634630111288, "learning_rate": 4.880051702871159e-07, "loss": 0.6778, "rewards/accuracies": 0.78125, "rewards/chosen": -0.44091796875, "rewards/margins": 0.1417236328125, "rewards/rejected": -0.58203125, "step": 406 }, { "agreement_weights/mean": 0.941891074180603, "agreement_weights/std": 0.10602220147848129, "epoch": 0.43494523109804967, "eta/annotator_0": 0.9331755638122559, "grad_norm": 87.3798331959893, "learning_rate": 4.868026965115957e-07, "loss": 0.7418, "rewards/accuracies": 0.765625, "rewards/chosen": -0.43896484375, "rewards/margins": 0.185546875, "rewards/rejected": -0.62548828125, "step": 407 }, { "agreement_weights/mean": 0.9335135221481323, "agreement_weights/std": 0.11605003476142883, "epoch": 0.4360138925995191, "eta/annotator_0": 0.9352446794509888, "grad_norm": 33.181163765052226, "learning_rate": 4.85598313753754e-07, "loss": 0.6131, "rewards/accuracies": 0.765625, "rewards/chosen": -0.5263671875, "rewards/margins": 0.150634765625, "rewards/rejected": -0.6767578125, "step": 408 }, { "agreement_weights/mean": 0.8713384866714478, "agreement_weights/std": 0.21984301507472992, "epoch": 0.4370825541009885, "eta/annotator_0": 0.9352446794509888, "grad_norm": 51.45580197817427, "learning_rate": 4.843920388198775e-07, "loss": 0.5625, "rewards/accuracies": 0.734375, "rewards/chosen": -0.46630859375, "rewards/margins": 0.125701904296875, "rewards/rejected": -0.591796875, "step": 409 }, { "agreement_weights/mean": 0.9214959144592285, "agreement_weights/std": 0.12381689250469208, "epoch": 0.4381512156024579, "eta/annotator_0": 0.9284486174583435, "grad_norm": 36.4374495411632, "learning_rate": 4.831838885426574e-07, "loss": 0.7384, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5029296875, "rewards/margins": 0.1141357421875, "rewards/rejected": -0.6162109375, "step": 410 }, { "agreement_weights/mean": 0.9488735198974609, "agreement_weights/std": 0.07992416620254517, "epoch": 0.43921987710392735, "eta/annotator_0": 0.92618328332901, "grad_norm": 28.117570148064658, "learning_rate": 4.819738797809538e-07, "loss": 0.6265, "rewards/accuracies": 0.828125, "rewards/chosen": -0.505859375, "rewards/margins": 0.160888671875, "rewards/rejected": -0.6669921875, "step": 411 }, { "agreement_weights/mean": 0.9307898283004761, "agreement_weights/std": 0.10360471904277802, "epoch": 0.44028853860539674, "eta/annotator_0": 0.92670738697052, "grad_norm": 46.50538708244955, "learning_rate": 4.807620294195608e-07, "loss": 0.6862, "rewards/accuracies": 0.71875, "rewards/chosen": -0.46630859375, "rewards/margins": 0.12152099609375, "rewards/rejected": -0.58740234375, "step": 412 }, { "agreement_weights/mean": 0.9402911067008972, "agreement_weights/std": 0.08362613618373871, "epoch": 0.44135720010686613, "eta/annotator_0": 0.9282795190811157, "grad_norm": 83.09237980118523, "learning_rate": 4.795483543689701e-07, "loss": 0.8551, "rewards/accuracies": 0.75, "rewards/chosen": -0.4365234375, "rewards/margins": 0.171875, "rewards/rejected": -0.607421875, "step": 413 }, { "agreement_weights/mean": 0.942559003829956, "agreement_weights/std": 0.10409757494926453, "epoch": 0.4424258616083356, "eta/annotator_0": 0.9282795190811157, "grad_norm": 38.83489804089788, "learning_rate": 4.783328715651361e-07, "loss": 0.5844, "rewards/accuracies": 0.734375, "rewards/chosen": -0.50390625, "rewards/margins": 0.1448974609375, "rewards/rejected": -0.6484375, "step": 414 }, { "agreement_weights/mean": 0.9363652467727661, "agreement_weights/std": 0.11105993390083313, "epoch": 0.443494523109805, "eta/annotator_0": 0.9296146035194397, "grad_norm": 48.06544166436769, "learning_rate": 4.771155979692391e-07, "loss": 0.6573, "rewards/accuracies": 0.75, "rewards/chosen": -0.50634765625, "rewards/margins": 0.148681640625, "rewards/rejected": -0.654296875, "step": 415 }, { "agreement_weights/mean": 0.9490490555763245, "agreement_weights/std": 0.0829852744936943, "epoch": 0.44456318461127436, "eta/annotator_0": 0.9300596714019775, "grad_norm": 149.69972007521298, "learning_rate": 4.758965505674486e-07, "loss": 0.8506, "rewards/accuracies": 0.765625, "rewards/chosen": -0.42431640625, "rewards/margins": 0.23583984375, "rewards/rejected": -0.6591796875, "step": 416 }, { "agreement_weights/mean": 0.9493324756622314, "agreement_weights/std": 0.08508037030696869, "epoch": 0.4456318461127438, "eta/annotator_0": 0.9298475980758667, "grad_norm": 31.50178544467208, "learning_rate": 4.74675746370686e-07, "loss": 0.563, "rewards/accuracies": 0.78125, "rewards/chosen": -0.48291015625, "rewards/margins": 0.2054443359375, "rewards/rejected": -0.6884765625, "step": 417 }, { "agreement_weights/mean": 0.9314456582069397, "agreement_weights/std": 0.10597775876522064, "epoch": 0.4467005076142132, "eta/annotator_0": 0.929211437702179, "grad_norm": 64.25136611018178, "learning_rate": 4.734532024143875e-07, "loss": 0.7474, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5205078125, "rewards/margins": 0.203125, "rewards/rejected": -0.7236328125, "step": 418 }, { "agreement_weights/mean": 0.9302273392677307, "agreement_weights/std": 0.13446225225925446, "epoch": 0.4477691691156826, "eta/annotator_0": 0.929211437702179, "grad_norm": 49.642451722517244, "learning_rate": 4.722289357582669e-07, "loss": 0.586, "rewards/accuracies": 0.796875, "rewards/chosen": -0.54248046875, "rewards/margins": 0.1783447265625, "rewards/rejected": -0.7197265625, "step": 419 }, { "agreement_weights/mean": 0.9378600120544434, "agreement_weights/std": 0.11922898143529892, "epoch": 0.448837830617152, "eta/annotator_0": 0.9297010898590088, "grad_norm": 33.339252480214405, "learning_rate": 4.710029634860764e-07, "loss": 0.6026, "rewards/accuracies": 0.71875, "rewards/chosen": -0.51806640625, "rewards/margins": 0.1806640625, "rewards/rejected": -0.6982421875, "step": 420 }, { "agreement_weights/mean": 0.9289023876190186, "agreement_weights/std": 0.14499303698539734, "epoch": 0.44990649211862144, "eta/annotator_0": 0.9298642873764038, "grad_norm": 98.70231687582944, "learning_rate": 4.697753027053692e-07, "loss": 0.6592, "rewards/accuracies": 0.828125, "rewards/chosen": -0.55126953125, "rewards/margins": 0.1759033203125, "rewards/rejected": -0.728515625, "step": 421 }, { "agreement_weights/mean": 0.9552161693572998, "agreement_weights/std": 0.06708699464797974, "epoch": 0.45097515362009083, "eta/annotator_0": 0.9306774735450745, "grad_norm": 34.16923065000918, "learning_rate": 4.6854597054726014e-07, "loss": 0.5824, "rewards/accuracies": 0.796875, "rewards/chosen": -0.51904296875, "rewards/margins": 0.189208984375, "rewards/rejected": -0.7080078125, "step": 422 }, { "agreement_weights/mean": 0.9288409948348999, "agreement_weights/std": 0.12205024808645248, "epoch": 0.4520438151215602, "eta/annotator_0": 0.9331170916557312, "grad_norm": 39.04028751014513, "learning_rate": 4.6731498416618733e-07, "loss": 0.6726, "rewards/accuracies": 0.734375, "rewards/chosen": -0.5380859375, "rewards/margins": 0.127838134765625, "rewards/rejected": -0.6650390625, "step": 423 }, { "agreement_weights/mean": 0.9308619499206543, "agreement_weights/std": 0.13256199657917023, "epoch": 0.45311247662302967, "eta/annotator_0": 0.9331170916557312, "grad_norm": 55.164876261530424, "learning_rate": 4.6608236073967193e-07, "loss": 0.5958, "rewards/accuracies": 0.78125, "rewards/chosen": -0.5712890625, "rewards/margins": 0.13720703125, "rewards/rejected": -0.7080078125, "step": 424 }, { "agreement_weights/mean": 0.9029659032821655, "agreement_weights/std": 0.16361857950687408, "epoch": 0.45418113812449906, "eta/annotator_0": 0.9289695620536804, "grad_norm": 56.03425630759201, "learning_rate": 4.648481174680792e-07, "loss": 0.6748, "rewards/accuracies": 0.6875, "rewards/chosen": -0.578125, "rewards/margins": 0.1097412109375, "rewards/rejected": -0.6875, "step": 425 }, { "agreement_weights/mean": 0.9188280701637268, "agreement_weights/std": 0.13882429897785187, "epoch": 0.45524979962596845, "eta/annotator_0": 0.9275870323181152, "grad_norm": 70.8775367011661, "learning_rate": 4.6361227157437795e-07, "loss": 0.7554, "rewards/accuracies": 0.6875, "rewards/chosen": -0.54052734375, "rewards/margins": 0.150634765625, "rewards/rejected": -0.6923828125, "step": 426 }, { "agreement_weights/mean": 0.9305248856544495, "agreement_weights/std": 0.10340765863656998, "epoch": 0.4563184611274379, "eta/annotator_0": 0.9278191328048706, "grad_norm": 36.83087173858004, "learning_rate": 4.623748403039006e-07, "loss": 0.6892, "rewards/accuracies": 0.78125, "rewards/chosen": -0.55859375, "rewards/margins": 0.137786865234375, "rewards/rejected": -0.697265625, "step": 427 }, { "agreement_weights/mean": 0.920667827129364, "agreement_weights/std": 0.14765219390392303, "epoch": 0.4573871226289073, "eta/annotator_0": 0.9285155534744263, "grad_norm": 27.162853165824462, "learning_rate": 4.611358409241022e-07, "loss": 0.5951, "rewards/accuracies": 0.765625, "rewards/chosen": -0.556640625, "rewards/margins": 0.160888671875, "rewards/rejected": -0.7177734375, "step": 428 }, { "agreement_weights/mean": 0.9295822381973267, "agreement_weights/std": 0.12040191143751144, "epoch": 0.4584557841303767, "eta/annotator_0": 0.9285155534744263, "grad_norm": 42.15490304722067, "learning_rate": 4.598952907243195e-07, "loss": 0.5485, "rewards/accuracies": 0.734375, "rewards/chosen": -0.5126953125, "rewards/margins": 0.192138671875, "rewards/rejected": -0.705078125, "step": 429 }, { "agreement_weights/mean": 0.886294960975647, "agreement_weights/std": 0.2047816812992096, "epoch": 0.45952444563184613, "eta/annotator_0": 0.9283809661865234, "grad_norm": 62.58418016993378, "learning_rate": 4.586532070155303e-07, "loss": 0.7066, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5703125, "rewards/margins": 0.081787109375, "rewards/rejected": -0.6513671875, "step": 430 }, { "agreement_weights/mean": 0.9325924515724182, "agreement_weights/std": 0.12267883121967316, "epoch": 0.4605931071333155, "eta/annotator_0": 0.9283361434936523, "grad_norm": 75.17215387327367, "learning_rate": 4.574096071301109e-07, "loss": 0.6339, "rewards/accuracies": 0.75, "rewards/chosen": -0.623046875, "rewards/margins": 0.17449951171875, "rewards/rejected": -0.796875, "step": 431 }, { "agreement_weights/mean": 0.9295076131820679, "agreement_weights/std": 0.1299046277999878, "epoch": 0.4616617686347849, "eta/annotator_0": 0.9275649785995483, "grad_norm": 53.99266948517106, "learning_rate": 4.5616450842159506e-07, "loss": 0.5625, "rewards/accuracies": 0.78125, "rewards/chosen": -0.626953125, "rewards/margins": 0.1798095703125, "rewards/rejected": -0.8076171875, "step": 432 }, { "agreement_weights/mean": 0.9318355917930603, "agreement_weights/std": 0.10017045587301254, "epoch": 0.46273043013625437, "eta/annotator_0": 0.9252516031265259, "grad_norm": 33.73310261870168, "learning_rate": 4.5491792826443125e-07, "loss": 0.678, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5341796875, "rewards/margins": 0.1383056640625, "rewards/rejected": -0.6708984375, "step": 433 }, { "agreement_weights/mean": 0.9132512807846069, "agreement_weights/std": 0.12356273084878922, "epoch": 0.46379909163772376, "eta/annotator_0": 0.9252516031265259, "grad_norm": 34.754606976070264, "learning_rate": 4.5366988405374076e-07, "loss": 0.6925, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6005859375, "rewards/margins": 0.1009674072265625, "rewards/rejected": -0.701171875, "step": 434 }, { "agreement_weights/mean": 0.939684271812439, "agreement_weights/std": 0.07840539515018463, "epoch": 0.46486775313919315, "eta/annotator_0": 0.9275141954421997, "grad_norm": 46.033745392814744, "learning_rate": 4.5242039320507464e-07, "loss": 0.6917, "rewards/accuracies": 0.6875, "rewards/chosen": -0.55078125, "rewards/margins": 0.1622314453125, "rewards/rejected": -0.712890625, "step": 435 }, { "agreement_weights/mean": 0.8976403474807739, "agreement_weights/std": 0.17955273389816284, "epoch": 0.46593641464066254, "eta/annotator_0": 0.928268313407898, "grad_norm": 51.668930461743955, "learning_rate": 4.5116947315417046e-07, "loss": 0.7337, "rewards/accuracies": 0.671875, "rewards/chosen": -0.591796875, "rewards/margins": 0.13311767578125, "rewards/rejected": -0.724609375, "step": 436 }, { "agreement_weights/mean": 0.9476768374443054, "agreement_weights/std": 0.06834861636161804, "epoch": 0.467005076142132, "eta/annotator_0": 0.9271516799926758, "grad_norm": 54.522796684076624, "learning_rate": 4.499171413567096e-07, "loss": 0.6109, "rewards/accuracies": 0.734375, "rewards/chosen": -0.5615234375, "rewards/margins": 0.1741943359375, "rewards/rejected": -0.736328125, "step": 437 }, { "agreement_weights/mean": 0.9575808048248291, "agreement_weights/std": 0.06187209486961365, "epoch": 0.4680737376436014, "eta/annotator_0": 0.9238017797470093, "grad_norm": 47.44922765693947, "learning_rate": 4.4866341528807315e-07, "loss": 0.527, "rewards/accuracies": 0.859375, "rewards/chosen": -0.52294921875, "rewards/margins": 0.20458984375, "rewards/rejected": -0.7275390625, "step": 438 }, { "agreement_weights/mean": 0.9501718282699585, "agreement_weights/std": 0.0748201459646225, "epoch": 0.4691423991450708, "eta/annotator_0": 0.9238017797470093, "grad_norm": 45.08594577997583, "learning_rate": 4.4740831244309837e-07, "loss": 0.6227, "rewards/accuracies": 0.8125, "rewards/chosen": -0.544921875, "rewards/margins": 0.2099609375, "rewards/rejected": -0.75390625, "step": 439 }, { "agreement_weights/mean": 0.9511173963546753, "agreement_weights/std": 0.07031013816595078, "epoch": 0.4702110606465402, "eta/annotator_0": 0.9274531006813049, "grad_norm": 27.953362776400617, "learning_rate": 4.461518503358341e-07, "loss": 0.6303, "rewards/accuracies": 0.78125, "rewards/chosen": -0.56689453125, "rewards/margins": 0.1529541015625, "rewards/rejected": -0.7197265625, "step": 440 }, { "agreement_weights/mean": 0.9155597686767578, "agreement_weights/std": 0.16592727601528168, "epoch": 0.4712797221480096, "eta/annotator_0": 0.9286702275276184, "grad_norm": 67.78497843271495, "learning_rate": 4.448940464992973e-07, "loss": 0.6343, "rewards/accuracies": 0.71875, "rewards/chosen": -0.5400390625, "rewards/margins": 0.187255859375, "rewards/rejected": -0.7275390625, "step": 441 }, { "agreement_weights/mean": 0.94297194480896, "agreement_weights/std": 0.11695007979869843, "epoch": 0.472348383649479, "eta/annotator_0": 0.9276463985443115, "grad_norm": 34.73666849705776, "learning_rate": 4.436349184852274e-07, "loss": 0.5396, "rewards/accuracies": 0.78125, "rewards/chosen": -0.49365234375, "rewards/margins": 0.1875, "rewards/rejected": -0.6796875, "step": 442 }, { "agreement_weights/mean": 0.8964833617210388, "agreement_weights/std": 0.20092037320137024, "epoch": 0.47341704515094846, "eta/annotator_0": 0.9245747327804565, "grad_norm": 31.098787052603107, "learning_rate": 4.4237448386384144e-07, "loss": 0.6051, "rewards/accuracies": 0.75, "rewards/chosen": -0.5771484375, "rewards/margins": 0.118896484375, "rewards/rejected": -0.6962890625, "step": 443 }, { "agreement_weights/mean": 0.9481425285339355, "agreement_weights/std": 0.11298349499702454, "epoch": 0.47448570665241785, "eta/annotator_0": 0.9245747327804565, "grad_norm": 43.286057518319254, "learning_rate": 4.411127602235898e-07, "loss": 0.4958, "rewards/accuracies": 0.796875, "rewards/chosen": -0.52392578125, "rewards/margins": 0.238037109375, "rewards/rejected": -0.7626953125, "step": 444 }, { "agreement_weights/mean": 0.9102756381034851, "agreement_weights/std": 0.18892483413219452, "epoch": 0.47555436815388724, "eta/annotator_0": 0.9242154359817505, "grad_norm": 45.32636870124321, "learning_rate": 4.398497651709102e-07, "loss": 0.5788, "rewards/accuracies": 0.8125, "rewards/chosen": -0.57421875, "rewards/margins": 0.177978515625, "rewards/rejected": -0.7509765625, "step": 445 }, { "agreement_weights/mean": 0.9267787933349609, "agreement_weights/std": 0.12237860262393951, "epoch": 0.4766230296553567, "eta/annotator_0": 0.9240957498550415, "grad_norm": 37.80388763852777, "learning_rate": 4.3858551632998154e-07, "loss": 0.615, "rewards/accuracies": 0.78125, "rewards/chosen": -0.5732421875, "rewards/margins": 0.155029296875, "rewards/rejected": -0.7294921875, "step": 446 }, { "agreement_weights/mean": 0.9294930696487427, "agreement_weights/std": 0.13396616280078888, "epoch": 0.4776916911568261, "eta/annotator_0": 0.9251536130905151, "grad_norm": 52.05707137970486, "learning_rate": 4.3732003134247856e-07, "loss": 0.654, "rewards/accuracies": 0.71875, "rewards/chosen": -0.6298828125, "rewards/margins": 0.172119140625, "rewards/rejected": -0.802734375, "step": 447 }, { "agreement_weights/mean": 0.9476921558380127, "agreement_weights/std": 0.07889731228351593, "epoch": 0.4787603526582955, "eta/annotator_0": 0.9283269643783569, "grad_norm": 59.46394101169703, "learning_rate": 4.360533278673258e-07, "loss": 0.6914, "rewards/accuracies": 0.8125, "rewards/chosen": -0.56787109375, "rewards/margins": 0.1953125, "rewards/rejected": -0.763671875, "step": 448 }, { "agreement_weights/mean": 0.8960087299346924, "agreement_weights/std": 0.16106556355953217, "epoch": 0.4798290141597649, "eta/annotator_0": 0.9283269643783569, "grad_norm": 55.653415843238335, "learning_rate": 4.3478542358045067e-07, "loss": 0.7525, "rewards/accuracies": 0.640625, "rewards/chosen": -0.59765625, "rewards/margins": 0.12109375, "rewards/rejected": -0.7177734375, "step": 449 }, { "agreement_weights/mean": 0.9419088363647461, "agreement_weights/std": 0.08810336142778397, "epoch": 0.4808976756612343, "eta/annotator_0": 0.9265053272247314, "grad_norm": 107.77307573717101, "learning_rate": 4.335163361745371e-07, "loss": 0.6216, "rewards/accuracies": 0.828125, "rewards/chosen": -0.6083984375, "rewards/margins": 0.215087890625, "rewards/rejected": -0.82421875, "step": 450 }, { "agreement_weights/mean": 0.9405092000961304, "agreement_weights/std": 0.0863238200545311, "epoch": 0.4819663371627037, "eta/annotator_0": 0.9258981943130493, "grad_norm": 42.9898243986632, "learning_rate": 4.3224608335877844e-07, "loss": 0.6586, "rewards/accuracies": 0.734375, "rewards/chosen": -0.5810546875, "rewards/margins": 0.1434326171875, "rewards/rejected": -0.7236328125, "step": 451 }, { "agreement_weights/mean": 0.9205235242843628, "agreement_weights/std": 0.12368767708539963, "epoch": 0.4830349986641731, "eta/annotator_0": 0.9259971380233765, "grad_norm": 54.27226140426254, "learning_rate": 4.3097468285863083e-07, "loss": 0.6849, "rewards/accuracies": 0.734375, "rewards/chosen": -0.556640625, "rewards/margins": 0.1839599609375, "rewards/rejected": -0.740234375, "step": 452 }, { "agreement_weights/mean": 0.9066325426101685, "agreement_weights/std": 0.15796375274658203, "epoch": 0.48410366016564255, "eta/annotator_0": 0.9262938499450684, "grad_norm": 72.07004622167123, "learning_rate": 4.2970215241556523e-07, "loss": 0.7122, "rewards/accuracies": 0.734375, "rewards/chosen": -0.5712890625, "rewards/margins": 0.1124267578125, "rewards/rejected": -0.6826171875, "step": 453 }, { "agreement_weights/mean": 0.9280672073364258, "agreement_weights/std": 0.12473461031913757, "epoch": 0.48517232166711194, "eta/annotator_0": 0.9262938499450684, "grad_norm": 30.097293473214325, "learning_rate": 4.284285097868202e-07, "loss": 0.5825, "rewards/accuracies": 0.765625, "rewards/chosen": -0.5693359375, "rewards/margins": 0.2015380859375, "rewards/rejected": -0.771484375, "step": 454 }, { "agreement_weights/mean": 0.9299594163894653, "agreement_weights/std": 0.10940495133399963, "epoch": 0.48624098316858133, "eta/annotator_0": 0.9275718331336975, "grad_norm": 51.933678441356314, "learning_rate": 4.271537727451541e-07, "loss": 0.6556, "rewards/accuracies": 0.6875, "rewards/chosen": -0.583984375, "rewards/margins": 0.1522216796875, "rewards/rejected": -0.736328125, "step": 455 }, { "agreement_weights/mean": 0.9456788301467896, "agreement_weights/std": 0.08971020579338074, "epoch": 0.4873096446700508, "eta/annotator_0": 0.927997887134552, "grad_norm": 39.91392700831833, "learning_rate": 4.2587795907859694e-07, "loss": 0.5432, "rewards/accuracies": 0.78125, "rewards/chosen": -0.517578125, "rewards/margins": 0.20654296875, "rewards/rejected": -0.72265625, "step": 456 }, { "agreement_weights/mean": 0.942789614200592, "agreement_weights/std": 0.12014167010784149, "epoch": 0.48837830617152017, "eta/annotator_0": 0.9285774230957031, "grad_norm": 51.24645716711908, "learning_rate": 4.2460108659020234e-07, "loss": 0.5962, "rewards/accuracies": 0.828125, "rewards/chosen": -0.6064453125, "rewards/margins": 0.239013671875, "rewards/rejected": -0.845703125, "step": 457 }, { "agreement_weights/mean": 0.940678060054779, "agreement_weights/std": 0.09003544598817825, "epoch": 0.48944696767298956, "eta/annotator_0": 0.9303159117698669, "grad_norm": 42.927620359716364, "learning_rate": 4.2332317309779904e-07, "loss": 0.6622, "rewards/accuracies": 0.71875, "rewards/chosen": -0.564453125, "rewards/margins": 0.163330078125, "rewards/rejected": -0.7265625, "step": 458 }, { "agreement_weights/mean": 0.9170008897781372, "agreement_weights/std": 0.13736838102340698, "epoch": 0.490515629174459, "eta/annotator_0": 0.9303159117698669, "grad_norm": 36.000798909434316, "learning_rate": 4.220442364337419e-07, "loss": 0.636, "rewards/accuracies": 0.703125, "rewards/chosen": -0.546875, "rewards/margins": 0.1192626953125, "rewards/rejected": -0.666015625, "step": 459 }, { "agreement_weights/mean": 0.9157631993293762, "agreement_weights/std": 0.13756585121154785, "epoch": 0.4915842906759284, "eta/annotator_0": 0.931708574295044, "grad_norm": 48.46830021093893, "learning_rate": 4.2076429444466384e-07, "loss": 0.7202, "rewards/accuracies": 0.703125, "rewards/chosen": -0.6083984375, "rewards/margins": 0.158447265625, "rewards/rejected": -0.7666015625, "step": 460 }, { "agreement_weights/mean": 0.9284616112709045, "agreement_weights/std": 0.1379978507757187, "epoch": 0.4926529521773978, "eta/annotator_0": 0.9321728944778442, "grad_norm": 33.58198262710419, "learning_rate": 4.194833649912263e-07, "loss": 0.5886, "rewards/accuracies": 0.75, "rewards/chosen": -0.51416015625, "rewards/margins": 0.150634765625, "rewards/rejected": -0.6650390625, "step": 461 }, { "agreement_weights/mean": 0.9653503894805908, "agreement_weights/std": 0.06810532510280609, "epoch": 0.49372161367886724, "eta/annotator_0": 0.9328906536102295, "grad_norm": 34.95609624321963, "learning_rate": 4.1820146594786967e-07, "loss": 0.4778, "rewards/accuracies": 0.890625, "rewards/chosen": -0.6025390625, "rewards/margins": 0.28271484375, "rewards/rejected": -0.88671875, "step": 462 }, { "agreement_weights/mean": 0.9077703952789307, "agreement_weights/std": 0.16467536985874176, "epoch": 0.49479027518033664, "eta/annotator_0": 0.9350438117980957, "grad_norm": 29.19511162942957, "learning_rate": 4.1691861520256466e-07, "loss": 0.5993, "rewards/accuracies": 0.734375, "rewards/chosen": -0.6015625, "rewards/margins": 0.1650390625, "rewards/rejected": -0.767578125, "step": 463 }, { "agreement_weights/mean": 0.9267793893814087, "agreement_weights/std": 0.13665784895420074, "epoch": 0.49585893668180603, "eta/annotator_0": 0.9350438117980957, "grad_norm": 37.480735510650284, "learning_rate": 4.156348306565623e-07, "loss": 0.629, "rewards/accuracies": 0.734375, "rewards/chosen": -0.6083984375, "rewards/margins": 0.189453125, "rewards/rejected": -0.7998046875, "step": 464 }, { "agreement_weights/mean": 0.9298104643821716, "agreement_weights/std": 0.13772906363010406, "epoch": 0.4969275981832754, "eta/annotator_0": 0.9323989748954773, "grad_norm": 35.49418403536773, "learning_rate": 4.1435013022414387e-07, "loss": 0.5932, "rewards/accuracies": 0.765625, "rewards/chosen": -0.54345703125, "rewards/margins": 0.218505859375, "rewards/rejected": -0.76171875, "step": 465 }, { "agreement_weights/mean": 0.9444019794464111, "agreement_weights/std": 0.11026252061128616, "epoch": 0.49799625968474487, "eta/annotator_0": 0.9315174221992493, "grad_norm": 32.82176398770301, "learning_rate": 4.1306453183237133e-07, "loss": 0.4878, "rewards/accuracies": 0.765625, "rewards/chosen": -0.560546875, "rewards/margins": 0.24560546875, "rewards/rejected": -0.806640625, "step": 466 }, { "agreement_weights/mean": 0.9048218727111816, "agreement_weights/std": 0.1781749129295349, "epoch": 0.49906492118621426, "eta/annotator_0": 0.9305889010429382, "grad_norm": 90.07256106701772, "learning_rate": 4.11778053420837e-07, "loss": 0.7982, "rewards/accuracies": 0.734375, "rewards/chosen": -0.49853515625, "rewards/margins": 0.2034912109375, "rewards/rejected": -0.7021484375, "step": 467 }, { "agreement_weights/mean": 0.8984601497650146, "agreement_weights/std": 0.17158283293247223, "epoch": 0.5001335826876837, "eta/annotator_0": 0.9278032183647156, "grad_norm": 71.48193913791167, "learning_rate": 4.104907129414133e-07, "loss": 0.6972, "rewards/accuracies": 0.65625, "rewards/chosen": -0.5966796875, "rewards/margins": 0.129730224609375, "rewards/rejected": -0.7265625, "step": 468 }, { "agreement_weights/mean": 0.9434419870376587, "agreement_weights/std": 0.08441269397735596, "epoch": 0.501202244189153, "eta/annotator_0": 0.9278032183647156, "grad_norm": 34.94028496283814, "learning_rate": 4.09202528358002e-07, "loss": 0.6008, "rewards/accuracies": 0.75, "rewards/chosen": -0.60009765625, "rewards/margins": 0.169677734375, "rewards/rejected": -0.7705078125, "step": 469 }, { "agreement_weights/mean": 0.9209120273590088, "agreement_weights/std": 0.14718285202980042, "epoch": 0.5022709056906225, "eta/annotator_0": 0.9261964559555054, "grad_norm": 42.501603363906156, "learning_rate": 4.0791351764628387e-07, "loss": 0.6641, "rewards/accuracies": 0.765625, "rewards/chosen": -0.5029296875, "rewards/margins": 0.15185546875, "rewards/rejected": -0.654296875, "step": 470 }, { "agreement_weights/mean": 0.9418652057647705, "agreement_weights/std": 0.07706113159656525, "epoch": 0.5033395671920919, "eta/annotator_0": 0.9256609082221985, "grad_norm": 28.321398990477096, "learning_rate": 4.066236987934677e-07, "loss": 0.6518, "rewards/accuracies": 0.75, "rewards/chosen": -0.50341796875, "rewards/margins": 0.161376953125, "rewards/rejected": -0.6640625, "step": 471 }, { "agreement_weights/mean": 0.9382490515708923, "agreement_weights/std": 0.08945880830287933, "epoch": 0.5044082286935613, "eta/annotator_0": 0.9258279800415039, "grad_norm": 31.592658690715965, "learning_rate": 4.0533308979803916e-07, "loss": 0.6359, "rewards/accuracies": 0.71875, "rewards/chosen": -0.5361328125, "rewards/margins": 0.150390625, "rewards/rejected": -0.6865234375, "step": 472 }, { "agreement_weights/mean": 0.9163960814476013, "agreement_weights/std": 0.1433635652065277, "epoch": 0.5054768901950307, "eta/annotator_0": 0.9263291358947754, "grad_norm": 51.48482446134812, "learning_rate": 4.040417086695101e-07, "loss": 0.6039, "rewards/accuracies": 0.734375, "rewards/chosen": -0.5693359375, "rewards/margins": 0.131591796875, "rewards/rejected": -0.69921875, "step": 473 }, { "agreement_weights/mean": 0.911186933517456, "agreement_weights/std": 0.17390407621860504, "epoch": 0.5065455516965002, "eta/annotator_0": 0.9263291358947754, "grad_norm": 74.01072836540895, "learning_rate": 4.027495734281665e-07, "loss": 0.6412, "rewards/accuracies": 0.75, "rewards/chosen": -0.5712890625, "rewards/margins": 0.1376953125, "rewards/rejected": -0.708984375, "step": 474 }, { "agreement_weights/mean": 0.9384410381317139, "agreement_weights/std": 0.09556357562541962, "epoch": 0.5076142131979695, "eta/annotator_0": 0.9212261438369751, "grad_norm": 29.587393153881607, "learning_rate": 4.0145670210481794e-07, "loss": 0.6163, "rewards/accuracies": 0.765625, "rewards/chosen": -0.5859375, "rewards/margins": 0.14453125, "rewards/rejected": -0.728515625, "step": 475 }, { "agreement_weights/mean": 0.9365406632423401, "agreement_weights/std": 0.1027691513299942, "epoch": 0.508682874699439, "eta/annotator_0": 0.9195252060890198, "grad_norm": 95.15131086094452, "learning_rate": 4.0016311274054504e-07, "loss": 0.7204, "rewards/accuracies": 0.8125, "rewards/chosen": -0.560546875, "rewards/margins": 0.24462890625, "rewards/rejected": -0.8037109375, "step": 476 }, { "agreement_weights/mean": 0.9304564595222473, "agreement_weights/std": 0.10583914071321487, "epoch": 0.5097515362009084, "eta/annotator_0": 0.9196527004241943, "grad_norm": 35.29744978760105, "learning_rate": 3.9886882338644834e-07, "loss": 0.6128, "rewards/accuracies": 0.71875, "rewards/chosen": -0.51611328125, "rewards/margins": 0.1943359375, "rewards/rejected": -0.708984375, "step": 477 }, { "agreement_weights/mean": 0.9022091627120972, "agreement_weights/std": 0.15292316675186157, "epoch": 0.5108201977023777, "eta/annotator_0": 0.9200351238250732, "grad_norm": 61.70867860998361, "learning_rate": 3.9757385210339623e-07, "loss": 0.6146, "rewards/accuracies": 0.71875, "rewards/chosen": -0.50927734375, "rewards/margins": 0.1661376953125, "rewards/rejected": -0.67578125, "step": 478 }, { "agreement_weights/mean": 0.9432966709136963, "agreement_weights/std": 0.0940169095993042, "epoch": 0.5118888592038472, "eta/annotator_0": 0.9200351238250732, "grad_norm": 39.920717877929405, "learning_rate": 3.9627821696177286e-07, "loss": 0.5634, "rewards/accuracies": 0.78125, "rewards/chosen": -0.5400390625, "rewards/margins": 0.1729736328125, "rewards/rejected": -0.7119140625, "step": 479 }, { "agreement_weights/mean": 0.9397154450416565, "agreement_weights/std": 0.10216067731380463, "epoch": 0.5129575207053166, "eta/annotator_0": 0.9175282716751099, "grad_norm": 26.243142859292647, "learning_rate": 3.9498193604122625e-07, "loss": 0.5283, "rewards/accuracies": 0.828125, "rewards/chosen": -0.54736328125, "rewards/margins": 0.209228515625, "rewards/rejected": -0.7568359375, "step": 480 }, { "agreement_weights/mean": 0.9455174803733826, "agreement_weights/std": 0.0712900385260582, "epoch": 0.514026182206786, "eta/annotator_0": 0.9166926145553589, "grad_norm": 201.21610942122152, "learning_rate": 3.936850274304155e-07, "loss": 0.7634, "rewards/accuracies": 0.734375, "rewards/chosen": -0.53662109375, "rewards/margins": 0.2099609375, "rewards/rejected": -0.7470703125, "step": 481 }, { "agreement_weights/mean": 0.9389700293540955, "agreement_weights/std": 0.0930522009730339, "epoch": 0.5150948437082554, "eta/annotator_0": 0.9169044494628906, "grad_norm": 42.33715222473466, "learning_rate": 3.9238750922675887e-07, "loss": 0.5973, "rewards/accuracies": 0.78125, "rewards/chosen": -0.56396484375, "rewards/margins": 0.1685791015625, "rewards/rejected": -0.732421875, "step": 482 }, { "agreement_weights/mean": 0.9275095462799072, "agreement_weights/std": 0.14212074875831604, "epoch": 0.5161635052097249, "eta/annotator_0": 0.9175400733947754, "grad_norm": 228.04402504276678, "learning_rate": 3.910893995361811e-07, "loss": 0.7857, "rewards/accuracies": 0.75, "rewards/chosen": -0.455078125, "rewards/margins": 0.267333984375, "rewards/rejected": -0.7216796875, "step": 483 }, { "agreement_weights/mean": 0.9125629663467407, "agreement_weights/std": 0.13791416585445404, "epoch": 0.5172321667111942, "eta/annotator_0": 0.9175400733947754, "grad_norm": 34.87471971949488, "learning_rate": 3.8979071647286043e-07, "loss": 0.6148, "rewards/accuracies": 0.71875, "rewards/chosen": -0.587890625, "rewards/margins": 0.14385986328125, "rewards/rejected": -0.732421875, "step": 484 }, { "agreement_weights/mean": 0.9107152819633484, "agreement_weights/std": 0.15200546383857727, "epoch": 0.5183008282126637, "eta/annotator_0": 0.915610671043396, "grad_norm": 35.27388502790387, "learning_rate": 3.884914781589763e-07, "loss": 0.6286, "rewards/accuracies": 0.734375, "rewards/chosen": -0.5283203125, "rewards/margins": 0.119140625, "rewards/rejected": -0.6484375, "step": 485 }, { "agreement_weights/mean": 0.9216769337654114, "agreement_weights/std": 0.16147330403327942, "epoch": 0.5193694897141331, "eta/annotator_0": 0.9149674773216248, "grad_norm": 39.8438742758798, "learning_rate": 3.871917027244563e-07, "loss": 0.5628, "rewards/accuracies": 0.796875, "rewards/chosen": -0.52734375, "rewards/margins": 0.176513671875, "rewards/rejected": -0.7021484375, "step": 486 }, { "agreement_weights/mean": 0.9435970783233643, "agreement_weights/std": 0.0853603184223175, "epoch": 0.5204381512156024, "eta/annotator_0": 0.9128120541572571, "grad_norm": 39.88878663847779, "learning_rate": 3.858914083067231e-07, "loss": 0.6273, "rewards/accuracies": 0.78125, "rewards/chosen": -0.5908203125, "rewards/margins": 0.1806640625, "rewards/rejected": -0.7705078125, "step": 487 }, { "agreement_weights/mean": 0.9167801141738892, "agreement_weights/std": 0.16584131121635437, "epoch": 0.5215068127170719, "eta/annotator_0": 0.9063459038734436, "grad_norm": 37.3263343875788, "learning_rate": 3.845906130504414e-07, "loss": 0.6239, "rewards/accuracies": 0.75, "rewards/chosen": -0.59765625, "rewards/margins": 0.1676025390625, "rewards/rejected": -0.7646484375, "step": 488 }, { "agreement_weights/mean": 0.9534734487533569, "agreement_weights/std": 0.07862167805433273, "epoch": 0.5225754742185412, "eta/annotator_0": 0.9063459038734436, "grad_norm": 36.306778692596176, "learning_rate": 3.8328933510726456e-07, "loss": 0.5443, "rewards/accuracies": 0.875, "rewards/chosen": -0.5791015625, "rewards/margins": 0.211181640625, "rewards/rejected": -0.7900390625, "step": 489 }, { "agreement_weights/mean": 0.9015889763832092, "agreement_weights/std": 0.21119841933250427, "epoch": 0.5236441357200107, "eta/annotator_0": 0.9107836484909058, "grad_norm": 63.285063406242905, "learning_rate": 3.819875926355818e-07, "loss": 0.5969, "rewards/accuracies": 0.828125, "rewards/chosen": -0.60205078125, "rewards/margins": 0.18927001953125, "rewards/rejected": -0.7919921875, "step": 490 }, { "agreement_weights/mean": 0.9168173670768738, "agreement_weights/std": 0.12687566876411438, "epoch": 0.5247127972214801, "eta/annotator_0": 0.9122628569602966, "grad_norm": 31.04795043785807, "learning_rate": 3.806854038002643e-07, "loss": 0.6176, "rewards/accuracies": 0.703125, "rewards/chosen": -0.580078125, "rewards/margins": 0.159423828125, "rewards/rejected": -0.7392578125, "step": 491 }, { "agreement_weights/mean": 0.9229525327682495, "agreement_weights/std": 0.14175623655319214, "epoch": 0.5257814587229495, "eta/annotator_0": 0.9128334522247314, "grad_norm": 38.14695353628889, "learning_rate": 3.7938278677241193e-07, "loss": 0.6259, "rewards/accuracies": 0.734375, "rewards/chosen": -0.5888671875, "rewards/margins": 0.15234375, "rewards/rejected": -0.7421875, "step": 492 }, { "agreement_weights/mean": 0.931251049041748, "agreement_weights/std": 0.10870765894651413, "epoch": 0.5268501202244189, "eta/annotator_0": 0.9145452976226807, "grad_norm": 41.16879560718701, "learning_rate": 3.7807975972909984e-07, "loss": 0.6236, "rewards/accuracies": 0.75, "rewards/chosen": -0.52880859375, "rewards/margins": 0.1600341796875, "rewards/rejected": -0.689453125, "step": 493 }, { "agreement_weights/mean": 0.9244405031204224, "agreement_weights/std": 0.1235634833574295, "epoch": 0.5279187817258884, "eta/annotator_0": 0.9145452976226807, "grad_norm": 37.64676387484066, "learning_rate": 3.767763408531244e-07, "loss": 0.6776, "rewards/accuracies": 0.75, "rewards/chosen": -0.55810546875, "rewards/margins": 0.1444091796875, "rewards/rejected": -0.7021484375, "step": 494 }, { "agreement_weights/mean": 0.9590169191360474, "agreement_weights/std": 0.06895105540752411, "epoch": 0.5289874432273577, "eta/annotator_0": 0.9149592518806458, "grad_norm": 34.63547033916929, "learning_rate": 3.7547254833274995e-07, "loss": 0.4792, "rewards/accuracies": 0.890625, "rewards/chosen": -0.5498046875, "rewards/margins": 0.241455078125, "rewards/rejected": -0.791015625, "step": 495 }, { "agreement_weights/mean": 0.9085215330123901, "agreement_weights/std": 0.16053327918052673, "epoch": 0.5300561047288271, "eta/annotator_0": 0.915097177028656, "grad_norm": 60.43535932900263, "learning_rate": 3.741684003614545e-07, "loss": 0.6076, "rewards/accuracies": 0.8125, "rewards/chosen": -0.513671875, "rewards/margins": 0.2313232421875, "rewards/rejected": -0.7431640625, "step": 496 }, { "agreement_weights/mean": 0.9494044780731201, "agreement_weights/std": 0.08614341169595718, "epoch": 0.5311247662302966, "eta/annotator_0": 0.9157938957214355, "grad_norm": 101.68914171177242, "learning_rate": 3.728639151376765e-07, "loss": 0.7069, "rewards/accuracies": 0.8125, "rewards/chosen": -0.529296875, "rewards/margins": 0.273193359375, "rewards/rejected": -0.8017578125, "step": 497 }, { "agreement_weights/mean": 0.9407125115394592, "agreement_weights/std": 0.1229463517665863, "epoch": 0.5321934277317659, "eta/annotator_0": 0.9178842306137085, "grad_norm": 39.089890180571615, "learning_rate": 3.715591108645601e-07, "loss": 0.5169, "rewards/accuracies": 0.828125, "rewards/chosen": -0.54296875, "rewards/margins": 0.203369140625, "rewards/rejected": -0.7470703125, "step": 498 }, { "agreement_weights/mean": 0.9278267025947571, "agreement_weights/std": 0.12547828257083893, "epoch": 0.5332620892332354, "eta/annotator_0": 0.9178842306137085, "grad_norm": 85.15073084323652, "learning_rate": 3.702540057497019e-07, "loss": 0.7919, "rewards/accuracies": 0.75, "rewards/chosen": -0.5751953125, "rewards/margins": 0.2103271484375, "rewards/rejected": -0.78515625, "step": 499 }, { "agreement_weights/mean": 0.9380767941474915, "agreement_weights/std": 0.12165455520153046, "epoch": 0.5343307507347048, "eta/annotator_0": 0.9181987047195435, "grad_norm": 42.64443775094709, "learning_rate": 3.6894861800489616e-07, "loss": 0.5887, "rewards/accuracies": 0.828125, "rewards/chosen": -0.5986328125, "rewards/margins": 0.174072265625, "rewards/rejected": -0.7724609375, "step": 500 }, { "epoch": 0.5343307507347048, "eta/annotator_0": 0.9236183762550354, "eval_agreement_weights/mean": 0.9368982315063477, "eval_agreement_weights/std": 0.1115446612238884, "eval_loss": 0.6003810167312622, "eval_rewards/accuracies": 0.7746160626411438, "eval_rewards/chosen": -0.5754096508026123, "eval_rewards/margins": 0.18882888555526733, "eval_rewards/rejected": -0.7641958594322205, "eval_runtime": 133.8972, "eval_samples_per_second": 14.646, "eval_steps_per_second": 0.919, "step": 500 }, { "agreement_weights/mean": 0.9548032283782959, "agreement_weights/std": 0.08307648450136185, "epoch": 0.5353994122361742, "eta/annotator_0": 0.920089840888977, "grad_norm": 38.82958333783774, "learning_rate": 3.676429658458814e-07, "loss": 0.4744, "rewards/accuracies": 0.828125, "rewards/chosen": -0.646484375, "rewards/margins": 0.258544921875, "rewards/rejected": -0.9052734375, "step": 501 }, { "agreement_weights/mean": 0.9479148387908936, "agreement_weights/std": 0.08506707102060318, "epoch": 0.5364680737376436, "eta/annotator_0": 0.917837917804718, "grad_norm": 27.821681996547362, "learning_rate": 3.6633706749208573e-07, "loss": 0.5585, "rewards/accuracies": 0.765625, "rewards/chosen": -0.5830078125, "rewards/margins": 0.1849365234375, "rewards/rejected": -0.767578125, "step": 502 }, { "agreement_weights/mean": 0.9327526688575745, "agreement_weights/std": 0.10945755243301392, "epoch": 0.537536735239113, "eta/annotator_0": 0.917837917804718, "grad_norm": 47.28483204194552, "learning_rate": 3.6503094116637263e-07, "loss": 0.6504, "rewards/accuracies": 0.734375, "rewards/chosen": -0.578125, "rewards/margins": 0.171875, "rewards/rejected": -0.7490234375, "step": 503 }, { "agreement_weights/mean": 0.934624195098877, "agreement_weights/std": 0.12078063189983368, "epoch": 0.5386053967405824, "eta/annotator_0": 0.9198232889175415, "grad_norm": 54.75004209198961, "learning_rate": 3.637246050947868e-07, "loss": 0.5862, "rewards/accuracies": 0.75, "rewards/chosen": -0.626953125, "rewards/margins": 0.194580078125, "rewards/rejected": -0.8212890625, "step": 504 }, { "agreement_weights/mean": 0.9248836636543274, "agreement_weights/std": 0.13274073600769043, "epoch": 0.5396740582420518, "eta/annotator_0": 0.9218086004257202, "grad_norm": 70.51057836891302, "learning_rate": 3.624180775063e-07, "loss": 0.6631, "rewards/accuracies": 0.75, "rewards/chosen": -0.6220703125, "rewards/margins": 0.1695556640625, "rewards/rejected": -0.79296875, "step": 505 }, { "agreement_weights/mean": 0.9420733451843262, "agreement_weights/std": 0.11411989480257034, "epoch": 0.5407427197435213, "eta/annotator_0": 0.9218086004257202, "grad_norm": 29.356244947174403, "learning_rate": 3.6111137663255605e-07, "loss": 0.5581, "rewards/accuracies": 0.78125, "rewards/chosen": -0.6064453125, "rewards/margins": 0.1781005859375, "rewards/rejected": -0.7841796875, "step": 506 }, { "agreement_weights/mean": 0.9151833057403564, "agreement_weights/std": 0.17966023087501526, "epoch": 0.5418113812449906, "eta/annotator_0": 0.9232973456382751, "grad_norm": 35.79572282878557, "learning_rate": 3.598045207076172e-07, "loss": 0.5671, "rewards/accuracies": 0.78125, "rewards/chosen": -0.66796875, "rewards/margins": 0.1519775390625, "rewards/rejected": -0.8193359375, "step": 507 }, { "agreement_weights/mean": 0.9247288703918457, "agreement_weights/std": 0.14760513603687286, "epoch": 0.5428800427464601, "eta/annotator_0": 0.9232973456382751, "grad_norm": 47.835155106948505, "learning_rate": 3.5849752796770917e-07, "loss": 0.5478, "rewards/accuracies": 0.765625, "rewards/chosen": -0.654296875, "rewards/margins": 0.20654296875, "rewards/rejected": -0.8623046875, "step": 508 }, { "agreement_weights/mean": 0.9591065645217896, "agreement_weights/std": 0.049045272171497345, "epoch": 0.5439487042479295, "eta/annotator_0": 0.9238280057907104, "grad_norm": 73.52487738685221, "learning_rate": 3.5719041665096696e-07, "loss": 0.6189, "rewards/accuracies": 0.859375, "rewards/chosen": -0.52490234375, "rewards/margins": 0.252197265625, "rewards/rejected": -0.77734375, "step": 509 }, { "agreement_weights/mean": 0.9598895311355591, "agreement_weights/std": 0.05210903286933899, "epoch": 0.5450173657493989, "eta/annotator_0": 0.9243587255477905, "grad_norm": 46.884817458065676, "learning_rate": 3.5588320499718003e-07, "loss": 0.5766, "rewards/accuracies": 0.8125, "rewards/chosen": -0.55859375, "rewards/margins": 0.216064453125, "rewards/rejected": -0.7734375, "step": 510 }, { "agreement_weights/mean": 0.9349804520606995, "agreement_weights/std": 0.11939439177513123, "epoch": 0.5460860272508683, "eta/annotator_0": 0.9243587255477905, "grad_norm": 31.24925573330889, "learning_rate": 3.54575911247538e-07, "loss": 0.6315, "rewards/accuracies": 0.734375, "rewards/chosen": -0.591796875, "rewards/margins": 0.193115234375, "rewards/rejected": -0.783203125, "step": 511 }, { "agreement_weights/mean": 0.9297154545783997, "agreement_weights/std": 0.10767495632171631, "epoch": 0.5471546887523377, "eta/annotator_0": 0.9186702370643616, "grad_norm": 41.23406610696608, "learning_rate": 3.5326855364437643e-07, "loss": 0.645, "rewards/accuracies": 0.71875, "rewards/chosen": -0.56640625, "rewards/margins": 0.167877197265625, "rewards/rejected": -0.734375, "step": 512 }, { "agreement_weights/mean": 0.9436109066009521, "agreement_weights/std": 0.0787554532289505, "epoch": 0.5482233502538071, "eta/annotator_0": 0.9186702370643616, "grad_norm": 183.5630329210283, "learning_rate": 3.519611504309214e-07, "loss": 1.0396, "rewards/accuracies": 0.71875, "rewards/chosen": -0.66796875, "rewards/margins": 0.234375, "rewards/rejected": -0.9013671875, "step": 513 }, { "agreement_weights/mean": 0.9190218448638916, "agreement_weights/std": 0.11789419502019882, "epoch": 0.5492920117552765, "eta/annotator_0": 0.9199581742286682, "grad_norm": 31.902655650364242, "learning_rate": 3.5065371985103584e-07, "loss": 0.6625, "rewards/accuracies": 0.703125, "rewards/chosen": -0.62109375, "rewards/margins": 0.177978515625, "rewards/rejected": -0.7998046875, "step": 514 }, { "agreement_weights/mean": 0.9223611354827881, "agreement_weights/std": 0.14832128584384918, "epoch": 0.550360673256746, "eta/annotator_0": 0.9212461113929749, "grad_norm": 36.341855507427994, "learning_rate": 3.493462801489642e-07, "loss": 0.6234, "rewards/accuracies": 0.75, "rewards/chosen": -0.6689453125, "rewards/margins": 0.16796875, "rewards/rejected": -0.8359375, "step": 515 }, { "agreement_weights/mean": 0.9514025449752808, "agreement_weights/std": 0.07426433265209198, "epoch": 0.5514293347582153, "eta/annotator_0": 0.9212461113929749, "grad_norm": 43.483476127672326, "learning_rate": 3.4803884956907865e-07, "loss": 0.5517, "rewards/accuracies": 0.78125, "rewards/chosen": -0.53076171875, "rewards/margins": 0.21240234375, "rewards/rejected": -0.7451171875, "step": 516 }, { "agreement_weights/mean": 0.9127729535102844, "agreement_weights/std": 0.14503851532936096, "epoch": 0.5524979962596848, "eta/annotator_0": 0.9211886525154114, "grad_norm": 50.05817466844725, "learning_rate": 3.467314463556236e-07, "loss": 0.6032, "rewards/accuracies": 0.75, "rewards/chosen": -0.5791015625, "rewards/margins": 0.1328887939453125, "rewards/rejected": -0.7119140625, "step": 517 }, { "agreement_weights/mean": 0.9422519207000732, "agreement_weights/std": 0.11152628809213638, "epoch": 0.5535666577611541, "eta/annotator_0": 0.9211886525154114, "grad_norm": 36.63799673875774, "learning_rate": 3.454240887524619e-07, "loss": 0.4934, "rewards/accuracies": 0.828125, "rewards/chosen": -0.58203125, "rewards/margins": 0.215576171875, "rewards/rejected": -0.7978515625, "step": 518 }, { "agreement_weights/mean": 0.922091543674469, "agreement_weights/std": 0.15051385760307312, "epoch": 0.5546353192626235, "eta/annotator_0": 0.9156377911567688, "grad_norm": 36.79586851575014, "learning_rate": 3.4411679500282e-07, "loss": 0.5901, "rewards/accuracies": 0.734375, "rewards/chosen": -0.630859375, "rewards/margins": 0.1875, "rewards/rejected": -0.8173828125, "step": 519 }, { "agreement_weights/mean": 0.9550167918205261, "agreement_weights/std": 0.10072970390319824, "epoch": 0.555703980764093, "eta/annotator_0": 0.9100868701934814, "grad_norm": 39.896128650981446, "learning_rate": 3.4280958334903306e-07, "loss": 0.4445, "rewards/accuracies": 0.859375, "rewards/chosen": -0.62890625, "rewards/margins": 0.2255859375, "rewards/rejected": -0.853515625, "step": 520 }, { "agreement_weights/mean": 0.9246504306793213, "agreement_weights/std": 0.10943184792995453, "epoch": 0.5567726422655623, "eta/annotator_0": 0.9100868701934814, "grad_norm": 53.021757629419376, "learning_rate": 3.4150247203229075e-07, "loss": 0.6907, "rewards/accuracies": 0.734375, "rewards/chosen": -0.625, "rewards/margins": 0.140625, "rewards/rejected": -0.7646484375, "step": 521 }, { "agreement_weights/mean": 0.9254869818687439, "agreement_weights/std": 0.15619289875030518, "epoch": 0.5578413037670318, "eta/annotator_0": 0.9072425961494446, "grad_norm": 43.48986479550807, "learning_rate": 3.401954792923828e-07, "loss": 0.5754, "rewards/accuracies": 0.71875, "rewards/chosen": -0.583984375, "rewards/margins": 0.166259765625, "rewards/rejected": -0.75, "step": 522 }, { "agreement_weights/mean": 0.9548373222351074, "agreement_weights/std": 0.07300148904323578, "epoch": 0.5589099652685012, "eta/annotator_0": 0.9072425961494446, "grad_norm": 47.864409637176095, "learning_rate": 3.3888862336744397e-07, "loss": 0.5906, "rewards/accuracies": 0.796875, "rewards/chosen": -0.5830078125, "rewards/margins": 0.225341796875, "rewards/rejected": -0.80859375, "step": 523 }, { "agreement_weights/mean": 0.928106963634491, "agreement_weights/std": 0.13697852194309235, "epoch": 0.5599786267699706, "eta/annotator_0": 0.9093517065048218, "grad_norm": 31.824237529363508, "learning_rate": 3.3758192249369995e-07, "loss": 0.5604, "rewards/accuracies": 0.765625, "rewards/chosen": -0.59375, "rewards/margins": 0.189208984375, "rewards/rejected": -0.7822265625, "step": 524 }, { "agreement_weights/mean": 0.929425835609436, "agreement_weights/std": 0.13902811706066132, "epoch": 0.56104728827144, "eta/annotator_0": 0.9114608764648438, "grad_norm": 93.37457848342375, "learning_rate": 3.3627539490521316e-07, "loss": 0.7662, "rewards/accuracies": 0.75, "rewards/chosen": -0.623046875, "rewards/margins": 0.167083740234375, "rewards/rejected": -0.7900390625, "step": 525 }, { "agreement_weights/mean": 0.9165904521942139, "agreement_weights/std": 0.13442540168762207, "epoch": 0.5621159497729095, "eta/annotator_0": 0.9114608764648438, "grad_norm": 58.872324548668225, "learning_rate": 3.3496905883362734e-07, "loss": 0.6074, "rewards/accuracies": 0.765625, "rewards/chosen": -0.52392578125, "rewards/margins": 0.177001953125, "rewards/rejected": -0.701171875, "step": 526 }, { "agreement_weights/mean": 0.9271196126937866, "agreement_weights/std": 0.11869855970144272, "epoch": 0.5631846112743788, "eta/annotator_0": 0.9123699069023132, "grad_norm": 35.38196217635175, "learning_rate": 3.3366293250791434e-07, "loss": 0.6021, "rewards/accuracies": 0.703125, "rewards/chosen": -0.58203125, "rewards/margins": 0.169921875, "rewards/rejected": -0.751953125, "step": 527 }, { "agreement_weights/mean": 0.934712290763855, "agreement_weights/std": 0.1382947564125061, "epoch": 0.5642532727758482, "eta/annotator_0": 0.9123699069023132, "grad_norm": 38.76285428028757, "learning_rate": 3.3235703415411863e-07, "loss": 0.4752, "rewards/accuracies": 0.828125, "rewards/chosen": -0.5458984375, "rewards/margins": 0.212158203125, "rewards/rejected": -0.7578125, "step": 528 }, { "agreement_weights/mean": 0.9421031475067139, "agreement_weights/std": 0.09719514846801758, "epoch": 0.5653219342773177, "eta/annotator_0": 0.9114477634429932, "grad_norm": 51.278762095605565, "learning_rate": 3.3105138199510386e-07, "loss": 0.6305, "rewards/accuracies": 0.796875, "rewards/chosen": -0.51025390625, "rewards/margins": 0.218017578125, "rewards/rejected": -0.728515625, "step": 529 }, { "agreement_weights/mean": 0.9150844812393188, "agreement_weights/std": 0.13388963043689728, "epoch": 0.566390595778787, "eta/annotator_0": 0.9105256199836731, "grad_norm": 44.06967691182464, "learning_rate": 3.297459942502982e-07, "loss": 0.6494, "rewards/accuracies": 0.734375, "rewards/chosen": -0.607421875, "rewards/margins": 0.12890625, "rewards/rejected": -0.736328125, "step": 530 }, { "agreement_weights/mean": 0.9247009754180908, "agreement_weights/std": 0.16410203278064728, "epoch": 0.5674592572802565, "eta/annotator_0": 0.9105256199836731, "grad_norm": 42.0922065769724, "learning_rate": 3.2844088913543987e-07, "loss": 0.584, "rewards/accuracies": 0.8125, "rewards/chosen": -0.548828125, "rewards/margins": 0.1708984375, "rewards/rejected": -0.7197265625, "step": 531 }, { "agreement_weights/mean": 0.9293538928031921, "agreement_weights/std": 0.15449216961860657, "epoch": 0.5685279187817259, "eta/annotator_0": 0.9048090577125549, "grad_norm": 35.16987580978066, "learning_rate": 3.2713608486232347e-07, "loss": 0.5113, "rewards/accuracies": 0.796875, "rewards/chosen": -0.5771484375, "rewards/margins": 0.208251953125, "rewards/rejected": -0.78515625, "step": 532 }, { "agreement_weights/mean": 0.9448391199111938, "agreement_weights/std": 0.09170086681842804, "epoch": 0.5695965802831953, "eta/annotator_0": 0.9048090577125549, "grad_norm": 35.34046693053653, "learning_rate": 3.258315996385455e-07, "loss": 0.5775, "rewards/accuracies": 0.796875, "rewards/chosen": -0.5576171875, "rewards/margins": 0.204833984375, "rewards/rejected": -0.763671875, "step": 533 }, { "agreement_weights/mean": 0.8995752930641174, "agreement_weights/std": 0.18216389417648315, "epoch": 0.5706652417846647, "eta/annotator_0": 0.9058165550231934, "grad_norm": 26.929744697471357, "learning_rate": 3.2452745166725007e-07, "loss": 0.6056, "rewards/accuracies": 0.703125, "rewards/chosen": -0.5810546875, "rewards/margins": 0.12786865234375, "rewards/rejected": -0.7099609375, "step": 534 }, { "agreement_weights/mean": 0.9680887460708618, "agreement_weights/std": 0.04344375431537628, "epoch": 0.5717339032861342, "eta/annotator_0": 0.9068241119384766, "grad_norm": 45.0016610001361, "learning_rate": 3.2322365914687566e-07, "loss": 0.4792, "rewards/accuracies": 0.828125, "rewards/chosen": -0.564453125, "rewards/margins": 0.255615234375, "rewards/rejected": -0.8203125, "step": 535 }, { "agreement_weights/mean": 0.9403042793273926, "agreement_weights/std": 0.08478496968746185, "epoch": 0.5728025647876035, "eta/annotator_0": 0.9068241119384766, "grad_norm": 40.769864931939466, "learning_rate": 3.219202402709002e-07, "loss": 0.609, "rewards/accuracies": 0.71875, "rewards/chosen": -0.5654296875, "rewards/margins": 0.1607666015625, "rewards/rejected": -0.728515625, "step": 536 }, { "agreement_weights/mean": 0.9106950163841248, "agreement_weights/std": 0.19854885339736938, "epoch": 0.5738712262890729, "eta/annotator_0": 0.9109980463981628, "grad_norm": 34.94117930554764, "learning_rate": 3.2061721322758804e-07, "loss": 0.5567, "rewards/accuracies": 0.796875, "rewards/chosen": -0.580078125, "rewards/margins": 0.160400390625, "rewards/rejected": -0.7412109375, "step": 537 }, { "agreement_weights/mean": 0.9232354164123535, "agreement_weights/std": 0.1657385528087616, "epoch": 0.5749398877905424, "eta/annotator_0": 0.9109980463981628, "grad_norm": 47.7917859379344, "learning_rate": 3.1931459619973573e-07, "loss": 0.5715, "rewards/accuracies": 0.75, "rewards/chosen": -0.5654296875, "rewards/margins": 0.2060546875, "rewards/rejected": -0.771484375, "step": 538 }, { "agreement_weights/mean": 0.8831877708435059, "agreement_weights/std": 0.24589963257312775, "epoch": 0.5760085492920117, "eta/annotator_0": 0.9088696837425232, "grad_norm": 45.317592632392774, "learning_rate": 3.180124073644182e-07, "loss": 0.5309, "rewards/accuracies": 0.765625, "rewards/chosen": -0.57763671875, "rewards/margins": 0.150390625, "rewards/rejected": -0.728515625, "step": 539 }, { "agreement_weights/mean": 0.9331031441688538, "agreement_weights/std": 0.12425866723060608, "epoch": 0.5770772107934812, "eta/annotator_0": 0.9067413210868835, "grad_norm": 53.01563092232163, "learning_rate": 3.1671066489273536e-07, "loss": 0.5792, "rewards/accuracies": 0.765625, "rewards/chosen": -0.537109375, "rewards/margins": 0.142578125, "rewards/rejected": -0.6796875, "step": 540 }, { "agreement_weights/mean": 0.9205114841461182, "agreement_weights/std": 0.15448513627052307, "epoch": 0.5781458722949506, "eta/annotator_0": 0.9067413210868835, "grad_norm": 28.56297540641272, "learning_rate": 3.1540938694955865e-07, "loss": 0.5425, "rewards/accuracies": 0.78125, "rewards/chosen": -0.525390625, "rewards/margins": 0.17822265625, "rewards/rejected": -0.703125, "step": 541 }, { "agreement_weights/mean": 0.9496056437492371, "agreement_weights/std": 0.08604250103235245, "epoch": 0.57921453379642, "eta/annotator_0": 0.9075238108634949, "grad_norm": 75.87610671215427, "learning_rate": 3.1410859169327685e-07, "loss": 0.5409, "rewards/accuracies": 0.765625, "rewards/chosen": -0.55029296875, "rewards/margins": 0.245849609375, "rewards/rejected": -0.794921875, "step": 542 }, { "agreement_weights/mean": 0.9189584851264954, "agreement_weights/std": 0.1492231786251068, "epoch": 0.5802831952978894, "eta/annotator_0": 0.9075238108634949, "grad_norm": 33.242710035967185, "learning_rate": 3.128082972755436e-07, "loss": 0.6403, "rewards/accuracies": 0.75, "rewards/chosen": -0.56884765625, "rewards/margins": 0.1280517578125, "rewards/rejected": -0.6962890625, "step": 543 }, { "agreement_weights/mean": 0.9475041627883911, "agreement_weights/std": 0.10295344889163971, "epoch": 0.5813518567993589, "eta/annotator_0": 0.9064730405807495, "grad_norm": 46.824462551793296, "learning_rate": 3.115085218410237e-07, "loss": 0.5265, "rewards/accuracies": 0.859375, "rewards/chosen": -0.54248046875, "rewards/margins": 0.224609375, "rewards/rejected": -0.7666015625, "step": 544 }, { "agreement_weights/mean": 0.9022937417030334, "agreement_weights/std": 0.1362929493188858, "epoch": 0.5824205183008282, "eta/annotator_0": 0.9054223299026489, "grad_norm": 32.14305608557549, "learning_rate": 3.102092835271396e-07, "loss": 0.6186, "rewards/accuracies": 0.703125, "rewards/chosen": -0.53076171875, "rewards/margins": 0.143829345703125, "rewards/rejected": -0.67578125, "step": 545 }, { "agreement_weights/mean": 0.9483115673065186, "agreement_weights/std": 0.08759982883930206, "epoch": 0.5834891798022976, "eta/annotator_0": 0.9054223299026489, "grad_norm": 39.80011585424222, "learning_rate": 3.0891060046381903e-07, "loss": 0.5722, "rewards/accuracies": 0.796875, "rewards/chosen": -0.56787109375, "rewards/margins": 0.167236328125, "rewards/rejected": -0.734375, "step": 546 }, { "agreement_weights/mean": 0.958890974521637, "agreement_weights/std": 0.07417741417884827, "epoch": 0.584557841303767, "eta/annotator_0": 0.9074880480766296, "grad_norm": 37.46515718754711, "learning_rate": 3.0761249077324115e-07, "loss": 0.5176, "rewards/accuracies": 0.875, "rewards/chosen": -0.49658203125, "rewards/margins": 0.20654296875, "rewards/rejected": -0.7021484375, "step": 547 }, { "agreement_weights/mean": 0.9269754886627197, "agreement_weights/std": 0.11763142049312592, "epoch": 0.5856265028052364, "eta/annotator_0": 0.9074880480766296, "grad_norm": 32.65231866507491, "learning_rate": 3.0631497256958453e-07, "loss": 0.6659, "rewards/accuracies": 0.671875, "rewards/chosen": -0.5478515625, "rewards/margins": 0.13037109375, "rewards/rejected": -0.6787109375, "step": 548 }, { "agreement_weights/mean": 0.9350884556770325, "agreement_weights/std": 0.12680533528327942, "epoch": 0.5866951643067059, "eta/annotator_0": 0.908190906047821, "grad_norm": 32.59548427591769, "learning_rate": 3.0501806395877383e-07, "loss": 0.5657, "rewards/accuracies": 0.796875, "rewards/chosen": -0.5009765625, "rewards/margins": 0.170166015625, "rewards/rejected": -0.669921875, "step": 549 }, { "agreement_weights/mean": 0.9231749773025513, "agreement_weights/std": 0.13142918050289154, "epoch": 0.5877638258081752, "eta/annotator_0": 0.9088937640190125, "grad_norm": 95.49722795649141, "learning_rate": 3.037217830382271e-07, "loss": 0.8287, "rewards/accuracies": 0.703125, "rewards/chosen": -0.60546875, "rewards/margins": 0.1427001953125, "rewards/rejected": -0.7490234375, "step": 550 }, { "agreement_weights/mean": 0.9551399946212769, "agreement_weights/std": 0.06387101858854294, "epoch": 0.5888324873096447, "eta/annotator_0": 0.9088937640190125, "grad_norm": 45.484873882932845, "learning_rate": 3.0242614789660374e-07, "loss": 0.6099, "rewards/accuracies": 0.78125, "rewards/chosen": -0.5458984375, "rewards/margins": 0.220703125, "rewards/rejected": -0.7666015625, "step": 551 }, { "agreement_weights/mean": 0.9467599987983704, "agreement_weights/std": 0.08565915375947952, "epoch": 0.5899011488111141, "eta/annotator_0": 0.9137613773345947, "grad_norm": 31.297309046018796, "learning_rate": 3.0113117661355173e-07, "loss": 0.5223, "rewards/accuracies": 0.828125, "rewards/chosen": -0.57421875, "rewards/margins": 0.2164306640625, "rewards/rejected": -0.7890625, "step": 552 }, { "agreement_weights/mean": 0.9269555807113647, "agreement_weights/std": 0.14312297105789185, "epoch": 0.5909698103125834, "eta/annotator_0": 0.9137613773345947, "grad_norm": 43.79816729090528, "learning_rate": 2.99836887259455e-07, "loss": 0.6722, "rewards/accuracies": 0.765625, "rewards/chosen": -0.52099609375, "rewards/margins": 0.2255859375, "rewards/rejected": -0.74609375, "step": 553 }, { "agreement_weights/mean": 0.9507973790168762, "agreement_weights/std": 0.09486962854862213, "epoch": 0.5920384718140529, "eta/annotator_0": 0.9129003286361694, "grad_norm": 35.497731925272106, "learning_rate": 2.9854329789518197e-07, "loss": 0.493, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5712890625, "rewards/margins": 0.2958984375, "rewards/rejected": -0.869140625, "step": 554 }, { "agreement_weights/mean": 0.9271531701087952, "agreement_weights/std": 0.1270458996295929, "epoch": 0.5931071333155223, "eta/annotator_0": 0.9120393991470337, "grad_norm": 35.77833383683791, "learning_rate": 2.972504265718335e-07, "loss": 0.6212, "rewards/accuracies": 0.734375, "rewards/chosen": -0.576171875, "rewards/margins": 0.16552734375, "rewards/rejected": -0.7421875, "step": 555 }, { "agreement_weights/mean": 0.956514298915863, "agreement_weights/std": 0.08155786991119385, "epoch": 0.5941757948169917, "eta/annotator_0": 0.9120393991470337, "grad_norm": 34.58933889349784, "learning_rate": 2.959582913304899e-07, "loss": 0.4872, "rewards/accuracies": 0.8125, "rewards/chosen": -0.53076171875, "rewards/margins": 0.2205810546875, "rewards/rejected": -0.7509765625, "step": 556 }, { "agreement_weights/mean": 0.9458112716674805, "agreement_weights/std": 0.1035509929060936, "epoch": 0.5952444563184611, "eta/annotator_0": 0.9137051701545715, "grad_norm": 38.073109904480084, "learning_rate": 2.946669102019608e-07, "loss": 0.5762, "rewards/accuracies": 0.796875, "rewards/chosen": -0.513671875, "rewards/margins": 0.185791015625, "rewards/rejected": -0.7001953125, "step": 557 }, { "agreement_weights/mean": 0.946616530418396, "agreement_weights/std": 0.10818235576152802, "epoch": 0.5963131178199306, "eta/annotator_0": 0.9137051701545715, "grad_norm": 52.10584106593475, "learning_rate": 2.9337630120653233e-07, "loss": 0.5952, "rewards/accuracies": 0.828125, "rewards/chosen": -0.599609375, "rewards/margins": 0.209716796875, "rewards/rejected": -0.810546875, "step": 558 }, { "agreement_weights/mean": 0.9410592317581177, "agreement_weights/std": 0.09769255667924881, "epoch": 0.5973817793213999, "eta/annotator_0": 0.914833664894104, "grad_norm": 43.36678852706317, "learning_rate": 2.920864823537161e-07, "loss": 0.5359, "rewards/accuracies": 0.84375, "rewards/chosen": -0.5869140625, "rewards/margins": 0.24365234375, "rewards/rejected": -0.830078125, "step": 559 }, { "agreement_weights/mean": 0.899520993232727, "agreement_weights/std": 0.16585636138916016, "epoch": 0.5984504408228694, "eta/annotator_0": 0.9159622192382812, "grad_norm": 57.15342285195582, "learning_rate": 2.9079747164199806e-07, "loss": 0.5822, "rewards/accuracies": 0.734375, "rewards/chosen": -0.6044921875, "rewards/margins": 0.13177490234375, "rewards/rejected": -0.7353515625, "step": 560 }, { "agreement_weights/mean": 0.9394698143005371, "agreement_weights/std": 0.0979190543293953, "epoch": 0.5995191023243388, "eta/annotator_0": 0.9159622192382812, "grad_norm": 65.262018429608, "learning_rate": 2.895092870585867e-07, "loss": 0.6401, "rewards/accuracies": 0.765625, "rewards/chosen": -0.623046875, "rewards/margins": 0.212646484375, "rewards/rejected": -0.8359375, "step": 561 }, { "agreement_weights/mean": 0.936281681060791, "agreement_weights/std": 0.10408417880535126, "epoch": 0.6005877638258081, "eta/annotator_0": 0.9109718799591064, "grad_norm": 47.03765459855948, "learning_rate": 2.882219465791629e-07, "loss": 0.5934, "rewards/accuracies": 0.78125, "rewards/chosen": -0.580078125, "rewards/margins": 0.16943359375, "rewards/rejected": -0.7490234375, "step": 562 }, { "agreement_weights/mean": 0.9551497101783752, "agreement_weights/std": 0.08000927418470383, "epoch": 0.6016564253272776, "eta/annotator_0": 0.9109718799591064, "grad_norm": 39.99132074745771, "learning_rate": 2.8693546816762864e-07, "loss": 0.5265, "rewards/accuracies": 0.875, "rewards/chosen": -0.630859375, "rewards/margins": 0.22021484375, "rewards/rejected": -0.8515625, "step": 563 }, { "agreement_weights/mean": 0.9589588046073914, "agreement_weights/std": 0.07893750071525574, "epoch": 0.602725086828747, "eta/annotator_0": 0.9128603935241699, "grad_norm": 67.50828715636203, "learning_rate": 2.856498697758561e-07, "loss": 0.6069, "rewards/accuracies": 0.84375, "rewards/chosen": -0.5771484375, "rewards/margins": 0.23876953125, "rewards/rejected": -0.81640625, "step": 564 }, { "agreement_weights/mean": 0.9477081298828125, "agreement_weights/std": 0.08308221399784088, "epoch": 0.6037937483302164, "eta/annotator_0": 0.9147489070892334, "grad_norm": 41.117204766283436, "learning_rate": 2.843651693434376e-07, "loss": 0.5781, "rewards/accuracies": 0.78125, "rewards/chosen": -0.5986328125, "rewards/margins": 0.2001953125, "rewards/rejected": -0.7998046875, "step": 565 }, { "agreement_weights/mean": 0.9479438066482544, "agreement_weights/std": 0.10121527314186096, "epoch": 0.6048624098316858, "eta/annotator_0": 0.9147489070892334, "grad_norm": 38.28194344058386, "learning_rate": 2.830813847974353e-07, "loss": 0.5387, "rewards/accuracies": 0.796875, "rewards/chosen": -0.7001953125, "rewards/margins": 0.212890625, "rewards/rejected": -0.9130859375, "step": 566 }, { "agreement_weights/mean": 0.9480606317520142, "agreement_weights/std": 0.08684194087982178, "epoch": 0.6059310713331553, "eta/annotator_0": 0.9180116057395935, "grad_norm": 42.05745636494947, "learning_rate": 2.8179853405213035e-07, "loss": 0.6273, "rewards/accuracies": 0.765625, "rewards/chosen": -0.6279296875, "rewards/margins": 0.18310546875, "rewards/rejected": -0.810546875, "step": 567 }, { "agreement_weights/mean": 0.9155803322792053, "agreement_weights/std": 0.129023477435112, "epoch": 0.6069997328346246, "eta/annotator_0": 0.9180116057395935, "grad_norm": 43.660756689926245, "learning_rate": 2.8051663500877383e-07, "loss": 0.6673, "rewards/accuracies": 0.703125, "rewards/chosen": -0.56689453125, "rewards/margins": 0.1727294921875, "rewards/rejected": -0.740234375, "step": 568 }, { "agreement_weights/mean": 0.9099870324134827, "agreement_weights/std": 0.1388932466506958, "epoch": 0.608068394336094, "eta/annotator_0": 0.917395830154419, "grad_norm": 190.29982723642559, "learning_rate": 2.792357055553361e-07, "loss": 0.8183, "rewards/accuracies": 0.65625, "rewards/chosen": -0.6171875, "rewards/margins": 0.1590576171875, "rewards/rejected": -0.7763671875, "step": 569 }, { "agreement_weights/mean": 0.9260088205337524, "agreement_weights/std": 0.12891796231269836, "epoch": 0.6091370558375635, "eta/annotator_0": 0.9167799949645996, "grad_norm": 41.78871589431891, "learning_rate": 2.7795576356625806e-07, "loss": 0.6629, "rewards/accuracies": 0.6875, "rewards/chosen": -0.66015625, "rewards/margins": 0.16650390625, "rewards/rejected": -0.8271484375, "step": 570 }, { "agreement_weights/mean": 0.91963130235672, "agreement_weights/std": 0.14099779725074768, "epoch": 0.6102057173390328, "eta/annotator_0": 0.9167799949645996, "grad_norm": 53.08096432388243, "learning_rate": 2.766768269022011e-07, "loss": 0.6609, "rewards/accuracies": 0.6875, "rewards/chosen": -0.65625, "rewards/margins": 0.174560546875, "rewards/rejected": -0.8310546875, "step": 571 }, { "agreement_weights/mean": 0.8894519805908203, "agreement_weights/std": 0.2129870355129242, "epoch": 0.6112743788405023, "eta/annotator_0": 0.9141392707824707, "grad_norm": 70.72250508097447, "learning_rate": 2.7539891340979763e-07, "loss": 0.6573, "rewards/accuracies": 0.6875, "rewards/chosen": -0.61767578125, "rewards/margins": 0.14495849609375, "rewards/rejected": -0.76171875, "step": 572 }, { "agreement_weights/mean": 0.9359692335128784, "agreement_weights/std": 0.14985889196395874, "epoch": 0.6123430403419717, "eta/annotator_0": 0.9141392707824707, "grad_norm": 46.316497440504875, "learning_rate": 2.7412204092140303e-07, "loss": 0.4771, "rewards/accuracies": 0.796875, "rewards/chosen": -0.60791015625, "rewards/margins": 0.206787109375, "rewards/rejected": -0.814453125, "step": 573 }, { "agreement_weights/mean": 0.9283039569854736, "agreement_weights/std": 0.12911370396614075, "epoch": 0.6134117018434411, "eta/annotator_0": 0.911532998085022, "grad_norm": 48.465977686250184, "learning_rate": 2.7284622725484594e-07, "loss": 0.6289, "rewards/accuracies": 0.765625, "rewards/chosen": -0.6298828125, "rewards/margins": 0.184814453125, "rewards/rejected": -0.8154296875, "step": 574 }, { "agreement_weights/mean": 0.9496434330940247, "agreement_weights/std": 0.08262716978788376, "epoch": 0.6144803633449105, "eta/annotator_0": 0.9089266657829285, "grad_norm": 46.602670109359465, "learning_rate": 2.715714902131798e-07, "loss": 0.58, "rewards/accuracies": 0.828125, "rewards/chosen": -0.517578125, "rewards/margins": 0.235595703125, "rewards/rejected": -0.7529296875, "step": 575 }, { "agreement_weights/mean": 0.9370426535606384, "agreement_weights/std": 0.11756158620119095, "epoch": 0.6155490248463799, "eta/annotator_0": 0.9089266657829285, "grad_norm": 37.456849131839654, "learning_rate": 2.7029784758443484e-07, "loss": 0.4585, "rewards/accuracies": 0.84375, "rewards/chosen": -0.748046875, "rewards/margins": 0.2392578125, "rewards/rejected": -0.9873046875, "step": 576 }, { "agreement_weights/mean": 0.9204915165901184, "agreement_weights/std": 0.1521250158548355, "epoch": 0.6166176863478493, "eta/annotator_0": 0.909326434135437, "grad_norm": 57.32245906591869, "learning_rate": 2.6902531714136914e-07, "loss": 0.6188, "rewards/accuracies": 0.734375, "rewards/chosen": -0.6318359375, "rewards/margins": 0.161865234375, "rewards/rejected": -0.7939453125, "step": 577 }, { "agreement_weights/mean": 0.9471827149391174, "agreement_weights/std": 0.10485843569040298, "epoch": 0.6176863478493188, "eta/annotator_0": 0.909326434135437, "grad_norm": 31.42326224173642, "learning_rate": 2.677539166412215e-07, "loss": 0.4907, "rewards/accuracies": 0.8125, "rewards/chosen": -0.654296875, "rewards/margins": 0.23779296875, "rewards/rejected": -0.892578125, "step": 578 }, { "agreement_weights/mean": 0.911949634552002, "agreement_weights/std": 0.1353457272052765, "epoch": 0.6187550093507881, "eta/annotator_0": 0.9106748104095459, "grad_norm": 292.2635501434228, "learning_rate": 2.6648366382546303e-07, "loss": 1.2432, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6103515625, "rewards/margins": 0.115966796875, "rewards/rejected": -0.7255859375, "step": 579 }, { "agreement_weights/mean": 0.9590042233467102, "agreement_weights/std": 0.04459843784570694, "epoch": 0.6198236708522575, "eta/annotator_0": 0.9120231866836548, "grad_norm": 40.263589001900286, "learning_rate": 2.6521457641954935e-07, "loss": 0.6407, "rewards/accuracies": 0.796875, "rewards/chosen": -0.6337890625, "rewards/margins": 0.19580078125, "rewards/rejected": -0.830078125, "step": 580 }, { "agreement_weights/mean": 0.9526234865188599, "agreement_weights/std": 0.09312576800584793, "epoch": 0.620892332353727, "eta/annotator_0": 0.9120231866836548, "grad_norm": 33.857030487932924, "learning_rate": 2.6394667213267413e-07, "loss": 0.5367, "rewards/accuracies": 0.84375, "rewards/chosen": -0.6591796875, "rewards/margins": 0.245361328125, "rewards/rejected": -0.904296875, "step": 581 }, { "agreement_weights/mean": 0.92919921875, "agreement_weights/std": 0.14274269342422485, "epoch": 0.6219609938551963, "eta/annotator_0": 0.9148685336112976, "grad_norm": 28.71952926539672, "learning_rate": 2.6267996865752146e-07, "loss": 0.5508, "rewards/accuracies": 0.765625, "rewards/chosen": -0.6181640625, "rewards/margins": 0.1895751953125, "rewards/rejected": -0.80859375, "step": 582 }, { "agreement_weights/mean": 0.9576365947723389, "agreement_weights/std": 0.07495222240686417, "epoch": 0.6230296553566658, "eta/annotator_0": 0.9148685336112976, "grad_norm": 27.90019575949514, "learning_rate": 2.614144836700185e-07, "loss": 0.487, "rewards/accuracies": 0.875, "rewards/chosen": -0.5888671875, "rewards/margins": 0.205078125, "rewards/rejected": -0.7939453125, "step": 583 }, { "agreement_weights/mean": 0.9603458642959595, "agreement_weights/std": 0.08768250048160553, "epoch": 0.6240983168581352, "eta/annotator_0": 0.9163795709609985, "grad_norm": 32.20212071401977, "learning_rate": 2.601502348290898e-07, "loss": 0.4158, "rewards/accuracies": 0.859375, "rewards/chosen": -0.55908203125, "rewards/margins": 0.25390625, "rewards/rejected": -0.814453125, "step": 584 }, { "agreement_weights/mean": 0.9437699317932129, "agreement_weights/std": 0.08922252058982849, "epoch": 0.6251669783596046, "eta/annotator_0": 0.9178906083106995, "grad_norm": 45.8769871357806, "learning_rate": 2.5888723977641015e-07, "loss": 0.5819, "rewards/accuracies": 0.796875, "rewards/chosen": -0.580078125, "rewards/margins": 0.1669921875, "rewards/rejected": -0.7470703125, "step": 585 }, { "agreement_weights/mean": 0.9490259885787964, "agreement_weights/std": 0.06902802735567093, "epoch": 0.626235639861074, "eta/annotator_0": 0.9178906083106995, "grad_norm": 37.259658888197954, "learning_rate": 2.576255161361586e-07, "loss": 0.6495, "rewards/accuracies": 0.734375, "rewards/chosen": -0.6552734375, "rewards/margins": 0.1767578125, "rewards/rejected": -0.8330078125, "step": 586 }, { "agreement_weights/mean": 0.9481679201126099, "agreement_weights/std": 0.10308730602264404, "epoch": 0.6273043013625434, "eta/annotator_0": 0.921358585357666, "grad_norm": 34.45075549596514, "learning_rate": 2.5636508151477274e-07, "loss": 0.5688, "rewards/accuracies": 0.796875, "rewards/chosen": -0.568359375, "rewards/margins": 0.206298828125, "rewards/rejected": -0.775390625, "step": 587 }, { "agreement_weights/mean": 0.9334375858306885, "agreement_weights/std": 0.11595764756202698, "epoch": 0.6283729628640128, "eta/annotator_0": 0.921358585357666, "grad_norm": 34.31534646400481, "learning_rate": 2.5510595350070266e-07, "loss": 0.6277, "rewards/accuracies": 0.734375, "rewards/chosen": -0.640625, "rewards/margins": 0.13922119140625, "rewards/rejected": -0.7802734375, "step": 588 }, { "agreement_weights/mean": 0.9407309293746948, "agreement_weights/std": 0.12646740674972534, "epoch": 0.6294416243654822, "eta/annotator_0": 0.9218730926513672, "grad_norm": 29.9423130239541, "learning_rate": 2.5384814966416576e-07, "loss": 0.5019, "rewards/accuracies": 0.796875, "rewards/chosen": -0.6220703125, "rewards/margins": 0.23828125, "rewards/rejected": -0.859375, "step": 589 }, { "agreement_weights/mean": 0.926185667514801, "agreement_weights/std": 0.16887547075748444, "epoch": 0.6305102858669517, "eta/annotator_0": 0.9223876595497131, "grad_norm": 38.501483338664436, "learning_rate": 2.5259168755690176e-07, "loss": 0.4909, "rewards/accuracies": 0.828125, "rewards/chosen": -0.62646484375, "rewards/margins": 0.2060546875, "rewards/rejected": -0.83203125, "step": 590 }, { "agreement_weights/mean": 0.9209963083267212, "agreement_weights/std": 0.14357580244541168, "epoch": 0.631578947368421, "eta/annotator_0": 0.9223876595497131, "grad_norm": 59.34003570284384, "learning_rate": 2.513365847119268e-07, "loss": 0.6759, "rewards/accuracies": 0.71875, "rewards/chosen": -0.6162109375, "rewards/margins": 0.220947265625, "rewards/rejected": -0.8369140625, "step": 591 }, { "agreement_weights/mean": 0.9612010717391968, "agreement_weights/std": 0.06341227144002914, "epoch": 0.6326476088698905, "eta/annotator_0": 0.9256742000579834, "grad_norm": 73.90811731377616, "learning_rate": 2.5008285864329036e-07, "loss": 0.5748, "rewards/accuracies": 0.796875, "rewards/chosen": -0.6259765625, "rewards/margins": 0.2470703125, "rewards/rejected": -0.873046875, "step": 592 }, { "agreement_weights/mean": 0.9341224431991577, "agreement_weights/std": 0.14596296846866608, "epoch": 0.6337162703713599, "eta/annotator_0": 0.9256742000579834, "grad_norm": 57.41194732182848, "learning_rate": 2.488305268458296e-07, "loss": 0.6006, "rewards/accuracies": 0.8125, "rewards/chosen": -0.57421875, "rewards/margins": 0.232666015625, "rewards/rejected": -0.806640625, "step": 593 }, { "agreement_weights/mean": 0.9418824911117554, "agreement_weights/std": 0.11536265909671783, "epoch": 0.6347849318728293, "eta/annotator_0": 0.9232529401779175, "grad_norm": 73.41078066361723, "learning_rate": 2.4757960679492544e-07, "loss": 0.5756, "rewards/accuracies": 0.78125, "rewards/chosen": -0.6220703125, "rewards/margins": 0.189208984375, "rewards/rejected": -0.8115234375, "step": 594 }, { "agreement_weights/mean": 0.9148910045623779, "agreement_weights/std": 0.1566769778728485, "epoch": 0.6358535933742987, "eta/annotator_0": 0.9208316206932068, "grad_norm": 38.53416713547603, "learning_rate": 2.463301159462592e-07, "loss": 0.6324, "rewards/accuracies": 0.75, "rewards/chosen": -0.6318359375, "rewards/margins": 0.129150390625, "rewards/rejected": -0.7607421875, "step": 595 }, { "agreement_weights/mean": 0.9465201497077942, "agreement_weights/std": 0.11441721022129059, "epoch": 0.6369222548757681, "eta/annotator_0": 0.9208316206932068, "grad_norm": 40.858790554486845, "learning_rate": 2.450820717355687e-07, "loss": 0.5615, "rewards/accuracies": 0.828125, "rewards/chosen": -0.5966796875, "rewards/margins": 0.244140625, "rewards/rejected": -0.8427734375, "step": 596 }, { "agreement_weights/mean": 0.9369440078735352, "agreement_weights/std": 0.11289086937904358, "epoch": 0.6379909163772375, "eta/annotator_0": 0.9172875285148621, "grad_norm": 37.387583047017756, "learning_rate": 2.4383549157840497e-07, "loss": 0.5516, "rewards/accuracies": 0.765625, "rewards/chosen": -0.57470703125, "rewards/margins": 0.2247314453125, "rewards/rejected": -0.798828125, "step": 597 }, { "agreement_weights/mean": 0.9375609159469604, "agreement_weights/std": 0.11197095364332199, "epoch": 0.6390595778787069, "eta/annotator_0": 0.9172875285148621, "grad_norm": 40.80260382341391, "learning_rate": 2.425903928698892e-07, "loss": 0.6233, "rewards/accuracies": 0.734375, "rewards/chosen": -0.6201171875, "rewards/margins": 0.153076171875, "rewards/rejected": -0.7724609375, "step": 598 }, { "agreement_weights/mean": 0.9515920877456665, "agreement_weights/std": 0.08857648074626923, "epoch": 0.6401282393801764, "eta/annotator_0": 0.9186608791351318, "grad_norm": 43.42301656281742, "learning_rate": 2.413467929844697e-07, "loss": 0.5532, "rewards/accuracies": 0.796875, "rewards/chosen": -0.62451171875, "rewards/margins": 0.217529296875, "rewards/rejected": -0.8408203125, "step": 599 }, { "agreement_weights/mean": 0.9623844027519226, "agreement_weights/std": 0.06983551383018494, "epoch": 0.6411969008816457, "eta/annotator_0": 0.9200342893600464, "grad_norm": 124.43186850713215, "learning_rate": 2.401047092756804e-07, "loss": 0.6617, "rewards/accuracies": 0.859375, "rewards/chosen": -0.53125, "rewards/margins": 0.267822265625, "rewards/rejected": -0.7998046875, "step": 600 }, { "epoch": 0.6411969008816457, "eta/annotator_0": 0.9256768226623535, "eval_agreement_weights/mean": 0.9486365914344788, "eval_agreement_weights/std": 0.09449105709791183, "eval_loss": 0.5720981955528259, "eval_rewards/accuracies": 0.7943202257156372, "eval_rewards/chosen": -0.5814119577407837, "eval_rewards/margins": 0.20089994370937347, "eval_rewards/rejected": -0.782329797744751, "eval_runtime": 134.6351, "eval_samples_per_second": 14.565, "eval_steps_per_second": 0.914, "step": 600 }, { "agreement_weights/mean": 0.9303597211837769, "agreement_weights/std": 0.125267893075943, "epoch": 0.6422655623831152, "eta/annotator_0": 0.9267285466194153, "grad_norm": 27.98887794232155, "learning_rate": 2.388641590758979e-07, "loss": 0.6091, "rewards/accuracies": 0.71875, "rewards/chosen": -0.64453125, "rewards/margins": 0.1678466796875, "rewards/rejected": -0.8125, "step": 601 }, { "agreement_weights/mean": 0.9405956864356995, "agreement_weights/std": 0.1299871802330017, "epoch": 0.6433342238845846, "eta/annotator_0": 0.9261094927787781, "grad_norm": 40.43803047780674, "learning_rate": 2.376251596960994e-07, "loss": 0.5951, "rewards/accuracies": 0.828125, "rewards/chosen": -0.55712890625, "rewards/margins": 0.237060546875, "rewards/rejected": -0.7939453125, "step": 602 }, { "agreement_weights/mean": 0.934689998626709, "agreement_weights/std": 0.10701686143875122, "epoch": 0.644402885386054, "eta/annotator_0": 0.9245393872261047, "grad_norm": 39.34274133943029, "learning_rate": 2.3638772842562196e-07, "loss": 0.5674, "rewards/accuracies": 0.765625, "rewards/chosen": -0.54638671875, "rewards/margins": 0.169677734375, "rewards/rejected": -0.7138671875, "step": 603 }, { "agreement_weights/mean": 0.9434391856193542, "agreement_weights/std": 0.13079681992530823, "epoch": 0.6454715468875234, "eta/annotator_0": 0.9198291301727295, "grad_norm": 88.51698722995313, "learning_rate": 2.3515188253192084e-07, "loss": 0.5517, "rewards/accuracies": 0.859375, "rewards/chosen": -0.5947265625, "rewards/margins": 0.224365234375, "rewards/rejected": -0.8203125, "step": 604 }, { "agreement_weights/mean": 0.949661910533905, "agreement_weights/std": 0.08253749459981918, "epoch": 0.6465402083889927, "eta/annotator_0": 0.9198291301727295, "grad_norm": 43.21609738031398, "learning_rate": 2.339176392603281e-07, "loss": 0.6402, "rewards/accuracies": 0.78125, "rewards/chosen": -0.5693359375, "rewards/margins": 0.189453125, "rewards/rejected": -0.759765625, "step": 605 }, { "agreement_weights/mean": 0.9254319667816162, "agreement_weights/std": 0.15084055066108704, "epoch": 0.6476088698904622, "eta/annotator_0": 0.9160492420196533, "grad_norm": 40.25484042177856, "learning_rate": 2.3268501583381275e-07, "loss": 0.5504, "rewards/accuracies": 0.78125, "rewards/chosen": -0.6513671875, "rewards/margins": 0.2027587890625, "rewards/rejected": -0.853515625, "step": 606 }, { "agreement_weights/mean": 0.9473477602005005, "agreement_weights/std": 0.09401869773864746, "epoch": 0.6486775313919316, "eta/annotator_0": 0.9147893190383911, "grad_norm": 32.166424134747, "learning_rate": 2.3145402945273988e-07, "loss": 0.4885, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5400390625, "rewards/margins": 0.26123046875, "rewards/rejected": -0.8017578125, "step": 607 }, { "agreement_weights/mean": 0.9403899908065796, "agreement_weights/std": 0.1276710480451584, "epoch": 0.649746192893401, "eta/annotator_0": 0.9140280485153198, "grad_norm": 43.04807605220073, "learning_rate": 2.3022469729463085e-07, "loss": 0.624, "rewards/accuracies": 0.765625, "rewards/chosen": -0.546875, "rewards/margins": 0.168212890625, "rewards/rejected": -0.71484375, "step": 608 }, { "agreement_weights/mean": 0.9597711563110352, "agreement_weights/std": 0.09369666874408722, "epoch": 0.6508148543948704, "eta/annotator_0": 0.911744236946106, "grad_norm": 43.05431100343462, "learning_rate": 2.2899703651392368e-07, "loss": 0.4549, "rewards/accuracies": 0.859375, "rewards/chosen": -0.578125, "rewards/margins": 0.263916015625, "rewards/rejected": -0.8427734375, "step": 609 }, { "agreement_weights/mean": 0.9483532309532166, "agreement_weights/std": 0.10074485838413239, "epoch": 0.6518835158963399, "eta/annotator_0": 0.911744236946106, "grad_norm": 102.2206381053402, "learning_rate": 2.2777106424173315e-07, "loss": 0.547, "rewards/accuracies": 0.84375, "rewards/chosen": -0.48486328125, "rewards/margins": 0.2197265625, "rewards/rejected": -0.7041015625, "step": 610 }, { "agreement_weights/mean": 0.9335142374038696, "agreement_weights/std": 0.13716305792331696, "epoch": 0.6529521773978092, "eta/annotator_0": 0.9138504266738892, "grad_norm": 41.0365978014899, "learning_rate": 2.2654679758561238e-07, "loss": 0.635, "rewards/accuracies": 0.75, "rewards/chosen": -0.564453125, "rewards/margins": 0.1795654296875, "rewards/rejected": -0.7451171875, "step": 611 }, { "agreement_weights/mean": 0.9325442314147949, "agreement_weights/std": 0.12163199484348297, "epoch": 0.6540208388992786, "eta/annotator_0": 0.9145525097846985, "grad_norm": 40.141469424681006, "learning_rate": 2.253242536293141e-07, "loss": 0.6994, "rewards/accuracies": 0.734375, "rewards/chosen": -0.591796875, "rewards/margins": 0.1380615234375, "rewards/rejected": -0.73046875, "step": 612 }, { "agreement_weights/mean": 0.9280708432197571, "agreement_weights/std": 0.1387234479188919, "epoch": 0.6550895004007481, "eta/annotator_0": 0.914567232131958, "grad_norm": 40.61921350930008, "learning_rate": 2.2410344943255137e-07, "loss": 0.6725, "rewards/accuracies": 0.71875, "rewards/chosen": -0.58984375, "rewards/margins": 0.1746826171875, "rewards/rejected": -0.7646484375, "step": 613 }, { "agreement_weights/mean": 0.9347870945930481, "agreement_weights/std": 0.14356523752212524, "epoch": 0.6561581619022174, "eta/annotator_0": 0.9146113991737366, "grad_norm": 122.0726633902181, "learning_rate": 2.228844020307608e-07, "loss": 0.5741, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5712890625, "rewards/margins": 0.26171875, "rewards/rejected": -0.833984375, "step": 614 }, { "agreement_weights/mean": 0.9277690052986145, "agreement_weights/std": 0.15189117193222046, "epoch": 0.6572268234036869, "eta/annotator_0": 0.9146113991737366, "grad_norm": 48.38762327012169, "learning_rate": 2.216671284348639e-07, "loss": 0.6065, "rewards/accuracies": 0.75, "rewards/chosen": -0.583984375, "rewards/margins": 0.197021484375, "rewards/rejected": -0.7802734375, "step": 615 }, { "agreement_weights/mean": 0.9362135529518127, "agreement_weights/std": 0.1376955360174179, "epoch": 0.6582954849051563, "eta/annotator_0": 0.9159520864486694, "grad_norm": 55.153569907423915, "learning_rate": 2.2045164563102994e-07, "loss": 0.5878, "rewards/accuracies": 0.828125, "rewards/chosen": -0.5625, "rewards/margins": 0.205078125, "rewards/rejected": -0.767578125, "step": 616 }, { "agreement_weights/mean": 0.928075909614563, "agreement_weights/std": 0.13441593945026398, "epoch": 0.6593641464066257, "eta/annotator_0": 0.9163988828659058, "grad_norm": 31.371937494990714, "learning_rate": 2.1923797058043935e-07, "loss": 0.576, "rewards/accuracies": 0.734375, "rewards/chosen": -0.59375, "rewards/margins": 0.208404541015625, "rewards/rejected": -0.8017578125, "step": 617 }, { "agreement_weights/mean": 0.9555413722991943, "agreement_weights/std": 0.06723307073116302, "epoch": 0.6604328079080951, "eta/annotator_0": 0.9161254167556763, "grad_norm": 35.655544474676965, "learning_rate": 2.1802612021904617e-07, "loss": 0.6059, "rewards/accuracies": 0.8125, "rewards/chosen": -0.55517578125, "rewards/margins": 0.1629638671875, "rewards/rejected": -0.71875, "step": 618 }, { "agreement_weights/mean": 0.9509074687957764, "agreement_weights/std": 0.10252620279788971, "epoch": 0.6615014694095646, "eta/annotator_0": 0.9153050780296326, "grad_norm": 56.10839065439775, "learning_rate": 2.1681611145734256e-07, "loss": 0.536, "rewards/accuracies": 0.859375, "rewards/chosen": -0.5703125, "rewards/margins": 0.186767578125, "rewards/rejected": -0.7568359375, "step": 619 }, { "agreement_weights/mean": 0.9340941309928894, "agreement_weights/std": 0.14444825053215027, "epoch": 0.6625701309110339, "eta/annotator_0": 0.9153050780296326, "grad_norm": 39.57251154419614, "learning_rate": 2.156079611801226e-07, "loss": 0.507, "rewards/accuracies": 0.828125, "rewards/chosen": -0.6611328125, "rewards/margins": 0.238037109375, "rewards/rejected": -0.8984375, "step": 620 }, { "agreement_weights/mean": 0.9603488445281982, "agreement_weights/std": 0.07624902576208115, "epoch": 0.6636387924125033, "eta/annotator_0": 0.9160655736923218, "grad_norm": 42.833404218935804, "learning_rate": 2.1440168624624607e-07, "loss": 0.5667, "rewards/accuracies": 0.8125, "rewards/chosen": -0.57177734375, "rewards/margins": 0.24072265625, "rewards/rejected": -0.8125, "step": 621 }, { "agreement_weights/mean": 0.9626846313476562, "agreement_weights/std": 0.05489881709218025, "epoch": 0.6647074539139728, "eta/annotator_0": 0.9163190722465515, "grad_norm": 47.09842477402348, "learning_rate": 2.1319730348840421e-07, "loss": 0.5732, "rewards/accuracies": 0.8125, "rewards/chosen": -0.55078125, "rewards/margins": 0.216552734375, "rewards/rejected": -0.767578125, "step": 622 }, { "agreement_weights/mean": 0.9567136168479919, "agreement_weights/std": 0.08853264898061752, "epoch": 0.6657761154154421, "eta/annotator_0": 0.9161401987075806, "grad_norm": 36.13278572281204, "learning_rate": 2.1199482971288419e-07, "loss": 0.5411, "rewards/accuracies": 0.8125, "rewards/chosen": -0.580078125, "rewards/margins": 0.2255859375, "rewards/rejected": -0.806640625, "step": 623 }, { "agreement_weights/mean": 0.9412451982498169, "agreement_weights/std": 0.12008579075336456, "epoch": 0.6668447769169116, "eta/annotator_0": 0.915603518486023, "grad_norm": 94.16861717442582, "learning_rate": 2.107942816993344e-07, "loss": 0.5965, "rewards/accuracies": 0.8125, "rewards/chosen": -0.58935546875, "rewards/margins": 0.14923095703125, "rewards/rejected": -0.73828125, "step": 624 }, { "agreement_weights/mean": 0.9507788419723511, "agreement_weights/std": 0.089681476354599, "epoch": 0.667913438418381, "eta/annotator_0": 0.915603518486023, "grad_norm": 45.111787797042865, "learning_rate": 2.09595676200531e-07, "loss": 0.5852, "rewards/accuracies": 0.828125, "rewards/chosen": -0.6328125, "rewards/margins": 0.191162109375, "rewards/rejected": -0.8251953125, "step": 625 }, { "agreement_weights/mean": 0.9292103052139282, "agreement_weights/std": 0.14386720955371857, "epoch": 0.6689820999198504, "eta/annotator_0": 0.9186360836029053, "grad_norm": 43.55719706719897, "learning_rate": 2.0839902994214372e-07, "loss": 0.5925, "rewards/accuracies": 0.75, "rewards/chosen": -0.60546875, "rewards/margins": 0.16357421875, "rewards/rejected": -0.7705078125, "step": 626 }, { "agreement_weights/mean": 0.9604015350341797, "agreement_weights/std": 0.07196030020713806, "epoch": 0.6700507614213198, "eta/annotator_0": 0.9196469187736511, "grad_norm": 182.78531177028452, "learning_rate": 2.0720435962250223e-07, "loss": 0.778, "rewards/accuracies": 0.828125, "rewards/chosen": -0.6025390625, "rewards/margins": 0.2099609375, "rewards/rejected": -0.8115234375, "step": 627 }, { "agreement_weights/mean": 0.9375889301300049, "agreement_weights/std": 0.10297726094722748, "epoch": 0.6711194229227893, "eta/annotator_0": 0.919339656829834, "grad_norm": 45.00638542765039, "learning_rate": 2.0601168191236387e-07, "loss": 0.6542, "rewards/accuracies": 0.75, "rewards/chosen": -0.6044921875, "rewards/margins": 0.1826171875, "rewards/rejected": -0.7880859375, "step": 628 }, { "agreement_weights/mean": 0.9614737033843994, "agreement_weights/std": 0.05288107693195343, "epoch": 0.6721880844242586, "eta/annotator_0": 0.9184179902076721, "grad_norm": 48.78303371550856, "learning_rate": 2.0482101345468003e-07, "loss": 0.627, "rewards/accuracies": 0.765625, "rewards/chosen": -0.5771484375, "rewards/margins": 0.2047119140625, "rewards/rejected": -0.7822265625, "step": 629 }, { "agreement_weights/mean": 0.9534388780593872, "agreement_weights/std": 0.08347099274396896, "epoch": 0.673256745925728, "eta/annotator_0": 0.9184179902076721, "grad_norm": 40.67987704375646, "learning_rate": 2.0363237086436475e-07, "loss": 0.5708, "rewards/accuracies": 0.859375, "rewards/chosen": -0.6357421875, "rewards/margins": 0.1756591796875, "rewards/rejected": -0.8115234375, "step": 630 }, { "agreement_weights/mean": 0.9592984318733215, "agreement_weights/std": 0.05552481487393379, "epoch": 0.6743254074271975, "eta/annotator_0": 0.9202330708503723, "grad_norm": 38.56705589810788, "learning_rate": 2.0244577072806272e-07, "loss": 0.6218, "rewards/accuracies": 0.796875, "rewards/chosen": -0.6083984375, "rewards/margins": 0.1875, "rewards/rejected": -0.7958984375, "step": 631 }, { "agreement_weights/mean": 0.9354554414749146, "agreement_weights/std": 0.1096133217215538, "epoch": 0.6753940689286668, "eta/annotator_0": 0.9208381175994873, "grad_norm": 50.396372041600834, "learning_rate": 2.0126122960391718e-07, "loss": 0.6709, "rewards/accuracies": 0.703125, "rewards/chosen": -0.5517578125, "rewards/margins": 0.15869140625, "rewards/rejected": -0.7099609375, "step": 632 }, { "agreement_weights/mean": 0.9610068798065186, "agreement_weights/std": 0.0565631128847599, "epoch": 0.6764627304301363, "eta/annotator_0": 0.9216228723526001, "grad_norm": 34.379348339223974, "learning_rate": 2.000787640213398e-07, "loss": 0.5795, "rewards/accuracies": 0.75, "rewards/chosen": -0.55908203125, "rewards/margins": 0.19140625, "rewards/rejected": -0.7509765625, "step": 633 }, { "agreement_weights/mean": 0.9420973062515259, "agreement_weights/std": 0.13033251464366913, "epoch": 0.6775313919316056, "eta/annotator_0": 0.923977255821228, "grad_norm": 53.17016095105233, "learning_rate": 1.988983904807795e-07, "loss": 0.5949, "rewards/accuracies": 0.75, "rewards/chosen": -0.59033203125, "rewards/margins": 0.1630859375, "rewards/rejected": -0.7529296875, "step": 634 }, { "agreement_weights/mean": 0.9689716100692749, "agreement_weights/std": 0.04067499563097954, "epoch": 0.6786000534330751, "eta/annotator_0": 0.923977255821228, "grad_norm": 47.10019291537987, "learning_rate": 1.9772012545349191e-07, "loss": 0.5528, "rewards/accuracies": 0.75, "rewards/chosen": -0.5517578125, "rewards/margins": 0.196533203125, "rewards/rejected": -0.7490234375, "step": 635 }, { "agreement_weights/mean": 0.9554789662361145, "agreement_weights/std": 0.08526583015918732, "epoch": 0.6796687149345445, "eta/annotator_0": 0.9273424744606018, "grad_norm": 46.01229966877789, "learning_rate": 1.9654398538131027e-07, "loss": 0.6445, "rewards/accuracies": 0.796875, "rewards/chosen": -0.59130859375, "rewards/margins": 0.185546875, "rewards/rejected": -0.77734375, "step": 636 }, { "agreement_weights/mean": 0.9408116340637207, "agreement_weights/std": 0.11166661977767944, "epoch": 0.6807373764360138, "eta/annotator_0": 0.9284641742706299, "grad_norm": 32.46133954648165, "learning_rate": 1.9536998667641572e-07, "loss": 0.5543, "rewards/accuracies": 0.78125, "rewards/chosen": -0.6279296875, "rewards/margins": 0.1796875, "rewards/rejected": -0.8076171875, "step": 637 }, { "agreement_weights/mean": 0.9691193699836731, "agreement_weights/std": 0.045458536595106125, "epoch": 0.6818060379374833, "eta/annotator_0": 0.9295676946640015, "grad_norm": 43.14777650581309, "learning_rate": 1.9419814572110766e-07, "loss": 0.5246, "rewards/accuracies": 0.875, "rewards/chosen": -0.5673828125, "rewards/margins": 0.212158203125, "rewards/rejected": -0.779296875, "step": 638 }, { "agreement_weights/mean": 0.9714819192886353, "agreement_weights/std": 0.053452592343091965, "epoch": 0.6828746994389527, "eta/annotator_0": 0.9328781962394714, "grad_norm": 30.6112027418188, "learning_rate": 1.93028478867576e-07, "loss": 0.4397, "rewards/accuracies": 0.890625, "rewards/chosen": -0.5625, "rewards/margins": 0.25146484375, "rewards/rejected": -0.814453125, "step": 639 }, { "agreement_weights/mean": 0.9670782089233398, "agreement_weights/std": 0.04969583451747894, "epoch": 0.6839433609404221, "eta/annotator_0": 0.9328781962394714, "grad_norm": 42.93616371725341, "learning_rate": 1.918610024376727e-07, "loss": 0.5777, "rewards/accuracies": 0.828125, "rewards/chosen": -0.6220703125, "rewards/margins": 0.229736328125, "rewards/rejected": -0.8515625, "step": 640 }, { "agreement_weights/mean": 0.9569634199142456, "agreement_weights/std": 0.07156334817409515, "epoch": 0.6850120224418915, "eta/annotator_0": 0.934898316860199, "grad_norm": 34.167931399337434, "learning_rate": 1.9069573272268378e-07, "loss": 0.5937, "rewards/accuracies": 0.828125, "rewards/chosen": -0.56201171875, "rewards/margins": 0.1962890625, "rewards/rejected": -0.7578125, "step": 641 }, { "agreement_weights/mean": 0.9602463841438293, "agreement_weights/std": 0.08029545843601227, "epoch": 0.686080683943361, "eta/annotator_0": 0.9355717301368713, "grad_norm": 35.21362786106944, "learning_rate": 1.895326859831024e-07, "loss": 0.5803, "rewards/accuracies": 0.78125, "rewards/chosen": -0.556640625, "rewards/margins": 0.207763671875, "rewards/rejected": -0.765625, "step": 642 }, { "agreement_weights/mean": 0.9679750800132751, "agreement_weights/std": 0.06001466140151024, "epoch": 0.6871493454448303, "eta/annotator_0": 0.9364656805992126, "grad_norm": 32.84306141079309, "learning_rate": 1.883718784484012e-07, "loss": 0.4921, "rewards/accuracies": 0.828125, "rewards/chosen": -0.580078125, "rewards/margins": 0.26025390625, "rewards/rejected": -0.83984375, "step": 643 }, { "agreement_weights/mean": 0.9575557708740234, "agreement_weights/std": 0.07270912826061249, "epoch": 0.6882180069462998, "eta/annotator_0": 0.9391475319862366, "grad_norm": 34.03503197701818, "learning_rate": 1.8721332631680683e-07, "loss": 0.6043, "rewards/accuracies": 0.765625, "rewards/chosen": -0.603515625, "rewards/margins": 0.19580078125, "rewards/rejected": -0.798828125, "step": 644 }, { "agreement_weights/mean": 0.9586673378944397, "agreement_weights/std": 0.06447800993919373, "epoch": 0.6892866684477692, "eta/annotator_0": 0.9391475319862366, "grad_norm": 41.31432154307874, "learning_rate": 1.8605704575507348e-07, "loss": 0.6184, "rewards/accuracies": 0.78125, "rewards/chosen": -0.5693359375, "rewards/margins": 0.21044921875, "rewards/rejected": -0.779296875, "step": 645 }, { "agreement_weights/mean": 0.9665771126747131, "agreement_weights/std": 0.06667490303516388, "epoch": 0.6903553299492385, "eta/annotator_0": 0.9366187453269958, "grad_norm": 50.0913199909498, "learning_rate": 1.8490305289825663e-07, "loss": 0.5861, "rewards/accuracies": 0.796875, "rewards/chosen": -0.60546875, "rewards/margins": 0.244140625, "rewards/rejected": -0.849609375, "step": 646 }, { "agreement_weights/mean": 0.9741883873939514, "agreement_weights/std": 0.043632492423057556, "epoch": 0.691423991450708, "eta/annotator_0": 0.9357758164405823, "grad_norm": 27.297853091103857, "learning_rate": 1.8375136384948925e-07, "loss": 0.4758, "rewards/accuracies": 0.859375, "rewards/chosen": -0.5556640625, "rewards/margins": 0.23388671875, "rewards/rejected": -0.791015625, "step": 647 }, { "agreement_weights/mean": 0.9563730955123901, "agreement_weights/std": 0.08539050817489624, "epoch": 0.6924926529521774, "eta/annotator_0": 0.9369992017745972, "grad_norm": 75.8166514159493, "learning_rate": 1.8260199467975563e-07, "loss": 0.7446, "rewards/accuracies": 0.796875, "rewards/chosen": -0.53173828125, "rewards/margins": 0.229736328125, "rewards/rejected": -0.7607421875, "step": 648 }, { "agreement_weights/mean": 0.9596377611160278, "agreement_weights/std": 0.0799289345741272, "epoch": 0.6935613144536468, "eta/annotator_0": 0.9406693577766418, "grad_norm": 37.273664695971014, "learning_rate": 1.814549614276684e-07, "loss": 0.5834, "rewards/accuracies": 0.84375, "rewards/chosen": -0.630859375, "rewards/margins": 0.152099609375, "rewards/rejected": -0.78125, "step": 649 }, { "agreement_weights/mean": 0.9611556529998779, "agreement_weights/std": 0.07619775831699371, "epoch": 0.6946299759551162, "eta/annotator_0": 0.9406693577766418, "grad_norm": 38.197793345631915, "learning_rate": 1.8031028009924398e-07, "loss": 0.525, "rewards/accuracies": 0.859375, "rewards/chosen": -0.560546875, "rewards/margins": 0.193603515625, "rewards/rejected": -0.75390625, "step": 650 }, { "agreement_weights/mean": 0.9497256278991699, "agreement_weights/std": 0.08397112041711807, "epoch": 0.6956986374565857, "eta/annotator_0": 0.9416694045066833, "grad_norm": 56.00489391829543, "learning_rate": 1.7916796666767914e-07, "loss": 0.6316, "rewards/accuracies": 0.75, "rewards/chosen": -0.56005859375, "rewards/margins": 0.185302734375, "rewards/rejected": -0.7451171875, "step": 651 }, { "agreement_weights/mean": 0.9748672246932983, "agreement_weights/std": 0.031697843223810196, "epoch": 0.696767298958055, "eta/annotator_0": 0.9420027732849121, "grad_norm": 45.86375764339488, "learning_rate": 1.7802803707312876e-07, "loss": 0.5749, "rewards/accuracies": 0.75, "rewards/chosen": -0.57666015625, "rewards/margins": 0.220458984375, "rewards/rejected": -0.798828125, "step": 652 }, { "agreement_weights/mean": 0.9576496481895447, "agreement_weights/std": 0.0722060576081276, "epoch": 0.6978359604595245, "eta/annotator_0": 0.942091166973114, "grad_norm": 37.6957500303809, "learning_rate": 1.7689050722248287e-07, "loss": 0.6213, "rewards/accuracies": 0.71875, "rewards/chosen": -0.640625, "rewards/margins": 0.1658935546875, "rewards/rejected": -0.806640625, "step": 653 }, { "agreement_weights/mean": 0.9751446843147278, "agreement_weights/std": 0.027854975312948227, "epoch": 0.6989046219609939, "eta/annotator_0": 0.9423564076423645, "grad_norm": 37.65265198562192, "learning_rate": 1.7575539298914463e-07, "loss": 0.6441, "rewards/accuracies": 0.78125, "rewards/chosen": -0.59912109375, "rewards/margins": 0.20556640625, "rewards/rejected": -0.8056640625, "step": 654 }, { "agreement_weights/mean": 0.9457140564918518, "agreement_weights/std": 0.13338758051395416, "epoch": 0.6999732834624632, "eta/annotator_0": 0.9423564076423645, "grad_norm": 65.3437983226153, "learning_rate": 1.746227102128092e-07, "loss": 0.554, "rewards/accuracies": 0.796875, "rewards/chosen": -0.6689453125, "rewards/margins": 0.24169921875, "rewards/rejected": -0.91015625, "step": 655 }, { "agreement_weights/mean": 0.9634288549423218, "agreement_weights/std": 0.07088147848844528, "epoch": 0.7010419449639327, "eta/annotator_0": 0.9441945552825928, "grad_norm": 35.58746817353261, "learning_rate": 1.7349247469924264e-07, "loss": 0.5185, "rewards/accuracies": 0.84375, "rewards/chosen": -0.591796875, "rewards/margins": 0.208984375, "rewards/rejected": -0.7998046875, "step": 656 }, { "agreement_weights/mean": 0.9510853290557861, "agreement_weights/std": 0.10964293777942657, "epoch": 0.7021106064654021, "eta/annotator_0": 0.9448072910308838, "grad_norm": 32.27256901160451, "learning_rate": 1.7236470222006083e-07, "loss": 0.5343, "rewards/accuracies": 0.765625, "rewards/chosen": -0.66015625, "rewards/margins": 0.2144775390625, "rewards/rejected": -0.875, "step": 657 }, { "agreement_weights/mean": 0.9524763822555542, "agreement_weights/std": 0.10183306038379669, "epoch": 0.7031792679668715, "eta/annotator_0": 0.9437352418899536, "grad_norm": 135.37710239664068, "learning_rate": 1.7123940851251016e-07, "loss": 0.8069, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6748046875, "rewards/margins": 0.16082763671875, "rewards/rejected": -0.8369140625, "step": 658 }, { "agreement_weights/mean": 0.956710696220398, "agreement_weights/std": 0.07774607837200165, "epoch": 0.7042479294683409, "eta/annotator_0": 0.9405189752578735, "grad_norm": 40.671723897209745, "learning_rate": 1.7011660927924746e-07, "loss": 0.6491, "rewards/accuracies": 0.75, "rewards/chosen": -0.615234375, "rewards/margins": 0.17236328125, "rewards/rejected": -0.787109375, "step": 659 }, { "agreement_weights/mean": 0.9648303389549255, "agreement_weights/std": 0.06745314598083496, "epoch": 0.7053165909698104, "eta/annotator_0": 0.9405189752578735, "grad_norm": 44.37884771362375, "learning_rate": 1.6899632018812092e-07, "loss": 0.5241, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5849609375, "rewards/margins": 0.202880859375, "rewards/rejected": -0.787109375, "step": 660 }, { "agreement_weights/mean": 0.979529857635498, "agreement_weights/std": 0.037371598184108734, "epoch": 0.7063852524712797, "eta/annotator_0": 0.9424874186515808, "grad_norm": 55.38799004532307, "learning_rate": 1.6787855687195178e-07, "loss": 0.459, "rewards/accuracies": 0.859375, "rewards/chosen": -0.5517578125, "rewards/margins": 0.27978515625, "rewards/rejected": -0.8310546875, "step": 661 }, { "agreement_weights/mean": 0.9794843196868896, "agreement_weights/std": 0.028132878243923187, "epoch": 0.7074539139727491, "eta/annotator_0": 0.9431436061859131, "grad_norm": 33.98207579443575, "learning_rate": 1.6676333492831536e-07, "loss": 0.4846, "rewards/accuracies": 0.859375, "rewards/chosen": -0.59765625, "rewards/margins": 0.244873046875, "rewards/rejected": -0.841796875, "step": 662 }, { "agreement_weights/mean": 0.9778258800506592, "agreement_weights/std": 0.036689162254333496, "epoch": 0.7085225754742186, "eta/annotator_0": 0.9439764618873596, "grad_norm": 28.57691226147559, "learning_rate": 1.656506699193245e-07, "loss": 0.4577, "rewards/accuracies": 0.875, "rewards/chosen": -0.6396484375, "rewards/margins": 0.24658203125, "rewards/rejected": -0.8857421875, "step": 663 }, { "agreement_weights/mean": 0.943852424621582, "agreement_weights/std": 0.11138604581356049, "epoch": 0.7095912369756879, "eta/annotator_0": 0.9464750289916992, "grad_norm": 40.81803222236026, "learning_rate": 1.6454057737141182e-07, "loss": 0.629, "rewards/accuracies": 0.75, "rewards/chosen": -0.6806640625, "rewards/margins": 0.1722412109375, "rewards/rejected": -0.853515625, "step": 664 }, { "agreement_weights/mean": 0.9279455542564392, "agreement_weights/std": 0.16816776990890503, "epoch": 0.7106598984771574, "eta/annotator_0": 0.9464750289916992, "grad_norm": 45.34048884004611, "learning_rate": 1.634330727751127e-07, "loss": 0.5739, "rewards/accuracies": 0.75, "rewards/chosen": -0.6142578125, "rewards/margins": 0.181884765625, "rewards/rejected": -0.7978515625, "step": 665 }, { "agreement_weights/mean": 0.9705685377120972, "agreement_weights/std": 0.036005206406116486, "epoch": 0.7117285599786267, "eta/annotator_0": 0.9415115118026733, "grad_norm": 47.42535278661807, "learning_rate": 1.6232817158484995e-07, "loss": 0.6438, "rewards/accuracies": 0.78125, "rewards/chosen": -0.619140625, "rewards/margins": 0.2099609375, "rewards/rejected": -0.8291015625, "step": 666 }, { "agreement_weights/mean": 0.958422064781189, "agreement_weights/std": 0.10956359654664993, "epoch": 0.7127972214800962, "eta/annotator_0": 0.9398569464683533, "grad_norm": 43.760350359561365, "learning_rate": 1.6122588921871776e-07, "loss": 0.5414, "rewards/accuracies": 0.859375, "rewards/chosen": -0.6064453125, "rewards/margins": 0.264892578125, "rewards/rejected": -0.8720703125, "step": 667 }, { "agreement_weights/mean": 0.9551790952682495, "agreement_weights/std": 0.08261796087026596, "epoch": 0.7138658829815656, "eta/annotator_0": 0.9407854080200195, "grad_norm": 46.59015570609556, "learning_rate": 1.6012624105826611e-07, "loss": 0.5745, "rewards/accuracies": 0.8125, "rewards/chosen": -0.58251953125, "rewards/margins": 0.21630859375, "rewards/rejected": -0.7978515625, "step": 668 }, { "agreement_weights/mean": 0.9700945615768433, "agreement_weights/std": 0.05719522386789322, "epoch": 0.714934544483035, "eta/annotator_0": 0.9435707330703735, "grad_norm": 67.682409539064, "learning_rate": 1.5902924244828707e-07, "loss": 0.5289, "rewards/accuracies": 0.90625, "rewards/chosen": -0.556640625, "rewards/margins": 0.256103515625, "rewards/rejected": -0.8125, "step": 669 }, { "agreement_weights/mean": 0.9702168703079224, "agreement_weights/std": 0.04141305387020111, "epoch": 0.7160032059845044, "eta/annotator_0": 0.9435707330703735, "grad_norm": 48.717950117326374, "learning_rate": 1.579349086965997e-07, "loss": 0.6241, "rewards/accuracies": 0.765625, "rewards/chosen": -0.6513671875, "rewards/margins": 0.187255859375, "rewards/rejected": -0.8388671875, "step": 670 }, { "agreement_weights/mean": 0.957324206829071, "agreement_weights/std": 0.083514504134655, "epoch": 0.7170718674859738, "eta/annotator_0": 0.9463272094726562, "grad_norm": 46.4029313278053, "learning_rate": 1.5684325507383715e-07, "loss": 0.5808, "rewards/accuracies": 0.75, "rewards/chosen": -0.56884765625, "rewards/margins": 0.181396484375, "rewards/rejected": -0.75, "step": 671 }, { "agreement_weights/mean": 0.9715442061424255, "agreement_weights/std": 0.05131712928414345, "epoch": 0.7181405289874432, "eta/annotator_0": 0.9472460746765137, "grad_norm": 57.40597590658776, "learning_rate": 1.5575429681323347e-07, "loss": 0.5447, "rewards/accuracies": 0.84375, "rewards/chosen": -0.634765625, "rewards/margins": 0.21044921875, "rewards/rejected": -0.845703125, "step": 672 }, { "agreement_weights/mean": 0.9717358350753784, "agreement_weights/std": 0.0522649772465229, "epoch": 0.7192091904889126, "eta/annotator_0": 0.9480736255645752, "grad_norm": 43.510024901677305, "learning_rate": 1.5466804911041039e-07, "loss": 0.4859, "rewards/accuracies": 0.828125, "rewards/chosen": -0.64453125, "rewards/margins": 0.247314453125, "rewards/rejected": -0.8916015625, "step": 673 }, { "agreement_weights/mean": 0.9746191501617432, "agreement_weights/std": 0.05380440503358841, "epoch": 0.7202778519903821, "eta/annotator_0": 0.9505563974380493, "grad_norm": 41.24929023219144, "learning_rate": 1.535845271231662e-07, "loss": 0.4521, "rewards/accuracies": 0.90625, "rewards/chosen": -0.611328125, "rewards/margins": 0.2744140625, "rewards/rejected": -0.884765625, "step": 674 }, { "agreement_weights/mean": 0.9684584140777588, "agreement_weights/std": 0.07091091573238373, "epoch": 0.7213465134918514, "eta/annotator_0": 0.9505563974380493, "grad_norm": 37.61913697401933, "learning_rate": 1.525037459712638e-07, "loss": 0.4827, "rewards/accuracies": 0.828125, "rewards/chosen": -0.60986328125, "rewards/margins": 0.22265625, "rewards/rejected": -0.8310546875, "step": 675 }, { "agreement_weights/mean": 0.9551772475242615, "agreement_weights/std": 0.07217834144830704, "epoch": 0.7224151749933209, "eta/annotator_0": 0.9493564367294312, "grad_norm": 50.19784852454357, "learning_rate": 1.5142572073621937e-07, "loss": 0.735, "rewards/accuracies": 0.71875, "rewards/chosen": -0.6220703125, "rewards/margins": 0.183349609375, "rewards/rejected": -0.8037109375, "step": 676 }, { "agreement_weights/mean": 0.9559512734413147, "agreement_weights/std": 0.09159605205059052, "epoch": 0.7234838364947903, "eta/annotator_0": 0.9489564895629883, "grad_norm": 40.59366779210095, "learning_rate": 1.5035046646109267e-07, "loss": 0.6315, "rewards/accuracies": 0.78125, "rewards/chosen": -0.677734375, "rewards/margins": 0.204833984375, "rewards/rejected": -0.880859375, "step": 677 }, { "agreement_weights/mean": 0.9640968441963196, "agreement_weights/std": 0.06928896903991699, "epoch": 0.7245524979962596, "eta/annotator_0": 0.9486393928527832, "grad_norm": 85.892716918789, "learning_rate": 1.4927799815027647e-07, "loss": 0.7283, "rewards/accuracies": 0.765625, "rewards/chosen": -0.64501953125, "rewards/margins": 0.25390625, "rewards/rejected": -0.8984375, "step": 678 }, { "agreement_weights/mean": 0.9510443806648254, "agreement_weights/std": 0.11813227832317352, "epoch": 0.7256211594977291, "eta/annotator_0": 0.9476882219314575, "grad_norm": 37.268507978793764, "learning_rate": 1.4820833076928758e-07, "loss": 0.5447, "rewards/accuracies": 0.796875, "rewards/chosen": -0.6865234375, "rewards/margins": 0.196044921875, "rewards/rejected": -0.8837890625, "step": 679 }, { "agreement_weights/mean": 0.9512507319450378, "agreement_weights/std": 0.10553485155105591, "epoch": 0.7266898209991985, "eta/annotator_0": 0.9476882219314575, "grad_norm": 53.4054853351542, "learning_rate": 1.4714147924455792e-07, "loss": 0.5086, "rewards/accuracies": 0.875, "rewards/chosen": -0.6328125, "rewards/margins": 0.194091796875, "rewards/rejected": -0.826171875, "step": 680 }, { "agreement_weights/mean": 0.9766183495521545, "agreement_weights/std": 0.04164055734872818, "epoch": 0.7277584825006679, "eta/annotator_0": 0.9460669755935669, "grad_norm": 76.82520203430494, "learning_rate": 1.460774584632258e-07, "loss": 0.7526, "rewards/accuracies": 0.828125, "rewards/chosen": -0.5859375, "rewards/margins": 0.3046875, "rewards/rejected": -0.890625, "step": 681 }, { "agreement_weights/mean": 0.9584197998046875, "agreement_weights/std": 0.07967985421419144, "epoch": 0.7288271440021373, "eta/annotator_0": 0.9455265998840332, "grad_norm": 38.33971766718476, "learning_rate": 1.4501628327292895e-07, "loss": 0.5879, "rewards/accuracies": 0.765625, "rewards/chosen": -0.662109375, "rewards/margins": 0.206787109375, "rewards/rejected": -0.8701171875, "step": 682 }, { "agreement_weights/mean": 0.9469456076622009, "agreement_weights/std": 0.11692676693201065, "epoch": 0.7298958055036068, "eta/annotator_0": 0.9458552002906799, "grad_norm": 39.45523954413076, "learning_rate": 1.4395796848159698e-07, "loss": 0.5745, "rewards/accuracies": 0.78125, "rewards/chosen": -0.626953125, "rewards/margins": 0.18896484375, "rewards/rejected": -0.81640625, "step": 683 }, { "agreement_weights/mean": 0.9488943815231323, "agreement_weights/std": 0.10078096389770508, "epoch": 0.7309644670050761, "eta/annotator_0": 0.9468410611152649, "grad_norm": 50.24977466783253, "learning_rate": 1.4290252885724427e-07, "loss": 0.6541, "rewards/accuracies": 0.796875, "rewards/chosen": -0.693359375, "rewards/margins": 0.2222900390625, "rewards/rejected": -0.9150390625, "step": 684 }, { "agreement_weights/mean": 0.9362011551856995, "agreement_weights/std": 0.11911093443632126, "epoch": 0.7320331285065456, "eta/annotator_0": 0.9468410611152649, "grad_norm": 43.111954449687374, "learning_rate": 1.4184997912776478e-07, "loss": 0.5949, "rewards/accuracies": 0.796875, "rewards/chosen": -0.65625, "rewards/margins": 0.17529296875, "rewards/rejected": -0.8310546875, "step": 685 }, { "agreement_weights/mean": 0.9538493752479553, "agreement_weights/std": 0.10223887860774994, "epoch": 0.733101790008015, "eta/annotator_0": 0.9440810084342957, "grad_norm": 60.369620939743065, "learning_rate": 1.4080033398072593e-07, "loss": 0.558, "rewards/accuracies": 0.78125, "rewards/chosen": -0.689453125, "rewards/margins": 0.191650390625, "rewards/rejected": -0.8818359375, "step": 686 }, { "agreement_weights/mean": 0.9643711447715759, "agreement_weights/std": 0.07340732216835022, "epoch": 0.7341704515094843, "eta/annotator_0": 0.9431610107421875, "grad_norm": 67.71860788807692, "learning_rate": 1.3975360806316365e-07, "loss": 0.673, "rewards/accuracies": 0.796875, "rewards/chosen": -0.6123046875, "rewards/margins": 0.235107421875, "rewards/rejected": -0.8466796875, "step": 687 }, { "agreement_weights/mean": 0.9682733416557312, "agreement_weights/std": 0.07095304876565933, "epoch": 0.7352391130109538, "eta/annotator_0": 0.9441512823104858, "grad_norm": 42.079918252985244, "learning_rate": 1.387098159813785e-07, "loss": 0.4634, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6513671875, "rewards/margins": 0.235107421875, "rewards/rejected": -0.8876953125, "step": 688 }, { "agreement_weights/mean": 0.9482982158660889, "agreement_weights/std": 0.09473034739494324, "epoch": 0.7363077745124232, "eta/annotator_0": 0.9471220374107361, "grad_norm": 60.34152961787, "learning_rate": 1.3766897230073102e-07, "loss": 0.6468, "rewards/accuracies": 0.765625, "rewards/chosen": -0.66796875, "rewards/margins": 0.1866455078125, "rewards/rejected": -0.8544921875, "step": 689 }, { "agreement_weights/mean": 0.9685348272323608, "agreement_weights/std": 0.04933612793684006, "epoch": 0.7373764360138926, "eta/annotator_0": 0.9471220374107361, "grad_norm": 40.09419807882187, "learning_rate": 1.366310915454393e-07, "loss": 0.5735, "rewards/accuracies": 0.796875, "rewards/chosen": -0.6787109375, "rewards/margins": 0.20941162109375, "rewards/rejected": -0.888671875, "step": 690 }, { "agreement_weights/mean": 0.9576905965805054, "agreement_weights/std": 0.07982119172811508, "epoch": 0.738445097515362, "eta/annotator_0": 0.9484106302261353, "grad_norm": 43.58102545425271, "learning_rate": 1.35596188198376e-07, "loss": 0.6439, "rewards/accuracies": 0.78125, "rewards/chosen": -0.6806640625, "rewards/margins": 0.150146484375, "rewards/rejected": -0.8310546875, "step": 691 }, { "agreement_weights/mean": 0.9548745155334473, "agreement_weights/std": 0.09767884016036987, "epoch": 0.7395137590168315, "eta/annotator_0": 0.9488402009010315, "grad_norm": 60.13159658714432, "learning_rate": 1.345642767008658e-07, "loss": 0.6012, "rewards/accuracies": 0.8125, "rewards/chosen": -0.64453125, "rewards/margins": 0.24609375, "rewards/rejected": -0.8896484375, "step": 692 }, { "agreement_weights/mean": 0.9531097412109375, "agreement_weights/std": 0.10203325003385544, "epoch": 0.7405824205183008, "eta/annotator_0": 0.9485681056976318, "grad_norm": 87.44757727627088, "learning_rate": 1.3353537145248474e-07, "loss": 0.557, "rewards/accuracies": 0.828125, "rewards/chosen": -0.650390625, "rewards/margins": 0.237060546875, "rewards/rejected": -0.8876953125, "step": 693 }, { "agreement_weights/mean": 0.9762753248214722, "agreement_weights/std": 0.051340725272893906, "epoch": 0.7416510820197703, "eta/annotator_0": 0.9477518200874329, "grad_norm": 43.64459695392377, "learning_rate": 1.325094868108587e-07, "loss": 0.4797, "rewards/accuracies": 0.921875, "rewards/chosen": -0.6298828125, "rewards/margins": 0.279052734375, "rewards/rejected": -0.9072265625, "step": 694 }, { "agreement_weights/mean": 0.9540632963180542, "agreement_weights/std": 0.10796529799699783, "epoch": 0.7427197435212396, "eta/annotator_0": 0.9477518200874329, "grad_norm": 45.64357322065804, "learning_rate": 1.3148663709146299e-07, "loss": 0.4819, "rewards/accuracies": 0.859375, "rewards/chosen": -0.5771484375, "rewards/margins": 0.2232666015625, "rewards/rejected": -0.7998046875, "step": 695 }, { "agreement_weights/mean": 0.9657195806503296, "agreement_weights/std": 0.05971314758062363, "epoch": 0.743788405022709, "eta/annotator_0": 0.9496124982833862, "grad_norm": 63.51519423215016, "learning_rate": 1.3046683656742294e-07, "loss": 0.6045, "rewards/accuracies": 0.765625, "rewards/chosen": -0.5625, "rewards/margins": 0.239013671875, "rewards/rejected": -0.8017578125, "step": 696 }, { "agreement_weights/mean": 0.9648618698120117, "agreement_weights/std": 0.06796582043170929, "epoch": 0.7448570665241785, "eta/annotator_0": 0.9502326846122742, "grad_norm": 33.06174142322634, "learning_rate": 1.294500994693147e-07, "loss": 0.4953, "rewards/accuracies": 0.8125, "rewards/chosen": -0.7373046875, "rewards/margins": 0.2216796875, "rewards/rejected": -0.958984375, "step": 697 }, { "agreement_weights/mean": 0.944225549697876, "agreement_weights/std": 0.11749176681041718, "epoch": 0.7459257280256478, "eta/annotator_0": 0.9507219195365906, "grad_norm": 37.770763068165735, "learning_rate": 1.284364399849663e-07, "loss": 0.617, "rewards/accuracies": 0.71875, "rewards/chosen": -0.6904296875, "rewards/margins": 0.1527099609375, "rewards/rejected": -0.8427734375, "step": 698 }, { "agreement_weights/mean": 0.9486749172210693, "agreement_weights/std": 0.114102303981781, "epoch": 0.7469943895271173, "eta/annotator_0": 0.9521897435188293, "grad_norm": 45.60558311870531, "learning_rate": 1.274258722592602e-07, "loss": 0.6053, "rewards/accuracies": 0.78125, "rewards/chosen": -0.611328125, "rewards/margins": 0.185791015625, "rewards/rejected": -0.796875, "step": 699 }, { "agreement_weights/mean": 0.9472367167472839, "agreement_weights/std": 0.10915284603834152, "epoch": 0.7480630510285867, "eta/annotator_0": 0.9521897435188293, "grad_norm": 35.8359150005709, "learning_rate": 1.264184103939351e-07, "loss": 0.6272, "rewards/accuracies": 0.796875, "rewards/chosen": -0.6162109375, "rewards/margins": 0.215087890625, "rewards/rejected": -0.8310546875, "step": 700 }, { "epoch": 0.7480630510285867, "eta/annotator_0": 0.9527943134307861, "eval_agreement_weights/mean": 0.9597077369689941, "eval_agreement_weights/std": 0.0806705579161644, "eval_loss": 0.5673193335533142, "eval_rewards/accuracies": 0.8009259104728699, "eval_rewards/chosen": -0.6401486396789551, "eval_rewards/margins": 0.21507400274276733, "eval_rewards/rejected": -0.8550558686256409, "eval_runtime": 135.222, "eval_samples_per_second": 14.502, "eval_steps_per_second": 0.91, "step": 700 }, { "agreement_weights/mean": 0.9449939727783203, "agreement_weights/std": 0.12398204207420349, "epoch": 0.7491317125300561, "eta/annotator_0": 0.9544016718864441, "grad_norm": 42.66831937300953, "learning_rate": 1.254140684473902e-07, "loss": 0.5106, "rewards/accuracies": 0.765625, "rewards/chosen": -0.6796875, "rewards/margins": 0.235595703125, "rewards/rejected": -0.916015625, "step": 701 }, { "agreement_weights/mean": 0.9630590677261353, "agreement_weights/std": 0.06159099191427231, "epoch": 0.7502003740315255, "eta/annotator_0": 0.9544016718864441, "grad_norm": 52.605100021952154, "learning_rate": 1.2441286043448834e-07, "loss": 0.6987, "rewards/accuracies": 0.765625, "rewards/chosen": -0.6103515625, "rewards/margins": 0.1998291015625, "rewards/rejected": -0.8115234375, "step": 702 }, { "agreement_weights/mean": 0.9818776845932007, "agreement_weights/std": 0.021882548928260803, "epoch": 0.751269035532995, "eta/annotator_0": 0.9507113695144653, "grad_norm": 44.7265428826485, "learning_rate": 1.2341480032636035e-07, "loss": 0.517, "rewards/accuracies": 0.90625, "rewards/chosen": -0.5478515625, "rewards/margins": 0.25390625, "rewards/rejected": -0.8017578125, "step": 703 }, { "agreement_weights/mean": 0.9684017896652222, "agreement_weights/std": 0.06882179528474808, "epoch": 0.7523376970344643, "eta/annotator_0": 0.9507113695144653, "grad_norm": 33.52787120236422, "learning_rate": 1.2241990205021057e-07, "loss": 0.4315, "rewards/accuracies": 0.828125, "rewards/chosen": -0.6396484375, "rewards/margins": 0.25634765625, "rewards/rejected": -0.8955078125, "step": 704 }, { "agreement_weights/mean": 0.9746631383895874, "agreement_weights/std": 0.04047699272632599, "epoch": 0.7534063585359337, "eta/annotator_0": 0.9519948959350586, "grad_norm": 41.742813730439536, "learning_rate": 1.214281794891224e-07, "loss": 0.5489, "rewards/accuracies": 0.875, "rewards/chosen": -0.58984375, "rewards/margins": 0.21826171875, "rewards/rejected": -0.80859375, "step": 705 }, { "agreement_weights/mean": 0.9549882411956787, "agreement_weights/std": 0.09460203349590302, "epoch": 0.7544750200374032, "eta/annotator_0": 0.9532784223556519, "grad_norm": 46.362729789951445, "learning_rate": 1.2043964648186405e-07, "loss": 0.5546, "rewards/accuracies": 0.796875, "rewards/chosen": -0.7412109375, "rewards/margins": 0.220458984375, "rewards/rejected": -0.9609375, "step": 706 }, { "agreement_weights/mean": 0.9544867277145386, "agreement_weights/std": 0.1033538281917572, "epoch": 0.7555436815388725, "eta/annotator_0": 0.9532784223556519, "grad_norm": 45.56967302783537, "learning_rate": 1.1945431682269617e-07, "loss": 0.5683, "rewards/accuracies": 0.75, "rewards/chosen": -0.6064453125, "rewards/margins": 0.194091796875, "rewards/rejected": -0.7998046875, "step": 707 }, { "agreement_weights/mean": 0.9549784660339355, "agreement_weights/std": 0.128653421998024, "epoch": 0.756612343040342, "eta/annotator_0": 0.9544907808303833, "grad_norm": 62.27370171225922, "learning_rate": 1.1847220426117889e-07, "loss": 0.5565, "rewards/accuracies": 0.875, "rewards/chosen": -0.59423828125, "rewards/margins": 0.24609375, "rewards/rejected": -0.83984375, "step": 708 }, { "agreement_weights/mean": 0.982033371925354, "agreement_weights/std": 0.024804357439279556, "epoch": 0.7576810045418114, "eta/annotator_0": 0.9544907808303833, "grad_norm": 35.8154280503369, "learning_rate": 1.1749332250197992e-07, "loss": 0.4855, "rewards/accuracies": 0.90625, "rewards/chosen": -0.5703125, "rewards/margins": 0.224365234375, "rewards/rejected": -0.7939453125, "step": 709 }, { "agreement_weights/mean": 0.9529501795768738, "agreement_weights/std": 0.1057538241147995, "epoch": 0.7587496660432808, "eta/annotator_0": 0.9559385180473328, "grad_norm": 37.972431630618566, "learning_rate": 1.1651768520468385e-07, "loss": 0.6287, "rewards/accuracies": 0.75, "rewards/chosen": -0.6611328125, "rewards/margins": 0.17333984375, "rewards/rejected": -0.8349609375, "step": 710 }, { "agreement_weights/mean": 0.9622828364372253, "agreement_weights/std": 0.09022292494773865, "epoch": 0.7598183275447502, "eta/annotator_0": 0.9573861956596375, "grad_norm": 46.51143247829915, "learning_rate": 1.1554530598360068e-07, "loss": 0.501, "rewards/accuracies": 0.828125, "rewards/chosen": -0.6513671875, "rewards/margins": 0.238037109375, "rewards/rejected": -0.888671875, "step": 711 }, { "agreement_weights/mean": 0.9754564166069031, "agreement_weights/std": 0.04188625514507294, "epoch": 0.7608869890462197, "eta/annotator_0": 0.9573861956596375, "grad_norm": 73.38895878357468, "learning_rate": 1.1457619840757669e-07, "loss": 0.6804, "rewards/accuracies": 0.828125, "rewards/chosen": -0.6484375, "rewards/margins": 0.2607421875, "rewards/rejected": -0.91015625, "step": 712 }, { "agreement_weights/mean": 0.9690662026405334, "agreement_weights/std": 0.045888449996709824, "epoch": 0.761955650547689, "eta/annotator_0": 0.9544672966003418, "grad_norm": 35.64158589897432, "learning_rate": 1.1361037599980473e-07, "loss": 0.5733, "rewards/accuracies": 0.796875, "rewards/chosen": -0.5634765625, "rewards/margins": 0.21826171875, "rewards/rejected": -0.78125, "step": 713 }, { "agreement_weights/mean": 0.9664050340652466, "agreement_weights/std": 0.07281643897294998, "epoch": 0.7630243120491584, "eta/annotator_0": 0.9544672966003418, "grad_norm": 41.33747522172429, "learning_rate": 1.126478522376351e-07, "loss": 0.4998, "rewards/accuracies": 0.828125, "rewards/chosen": -0.6640625, "rewards/margins": 0.26513671875, "rewards/rejected": -0.9296875, "step": 714 }, { "agreement_weights/mean": 0.9517835378646851, "agreement_weights/std": 0.13330887258052826, "epoch": 0.7640929735506279, "eta/annotator_0": 0.9547767639160156, "grad_norm": 35.26193919465481, "learning_rate": 1.1168864055238839e-07, "loss": 0.4993, "rewards/accuracies": 0.828125, "rewards/chosen": -0.6591796875, "rewards/margins": 0.190673828125, "rewards/rejected": -0.8505859375, "step": 715 }, { "agreement_weights/mean": 0.961007833480835, "agreement_weights/std": 0.09441328793764114, "epoch": 0.7651616350520972, "eta/annotator_0": 0.9550861120223999, "grad_norm": 65.16421869461045, "learning_rate": 1.1073275432916727e-07, "loss": 0.5385, "rewards/accuracies": 0.796875, "rewards/chosen": -0.650390625, "rewards/margins": 0.20654296875, "rewards/rejected": -0.857421875, "step": 716 }, { "agreement_weights/mean": 0.9202790856361389, "agreement_weights/std": 0.15053357183933258, "epoch": 0.7662302965535667, "eta/annotator_0": 0.9550861120223999, "grad_norm": 73.504010595169, "learning_rate": 1.0978020690667016e-07, "loss": 0.7668, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5830078125, "rewards/margins": 0.12640380859375, "rewards/rejected": -0.708984375, "step": 717 }, { "agreement_weights/mean": 0.9511942863464355, "agreement_weights/std": 0.13359639048576355, "epoch": 0.7672989580550361, "eta/annotator_0": 0.9520274996757507, "grad_norm": 35.12807128801527, "learning_rate": 1.0883101157700491e-07, "loss": 0.4848, "rewards/accuracies": 0.828125, "rewards/chosen": -0.6826171875, "rewards/margins": 0.232177734375, "rewards/rejected": -0.9150390625, "step": 718 }, { "agreement_weights/mean": 0.9359288215637207, "agreement_weights/std": 0.1523725688457489, "epoch": 0.7683676195565055, "eta/annotator_0": 0.9520274996757507, "grad_norm": 83.63613171112353, "learning_rate": 1.0788518158550314e-07, "loss": 0.8274, "rewards/accuracies": 0.765625, "rewards/chosen": -0.681640625, "rewards/margins": 0.14447021484375, "rewards/rejected": -0.8271484375, "step": 719 }, { "agreement_weights/mean": 0.969689130783081, "agreement_weights/std": 0.050304289907217026, "epoch": 0.7694362810579749, "eta/annotator_0": 0.9507601857185364, "grad_norm": 27.12822942825533, "learning_rate": 1.0694273013053587e-07, "loss": 0.5056, "rewards/accuracies": 0.828125, "rewards/chosen": -0.6181640625, "rewards/margins": 0.225830078125, "rewards/rejected": -0.84375, "step": 720 }, { "agreement_weights/mean": 0.9650056958198547, "agreement_weights/std": 0.05473656952381134, "epoch": 0.7705049425594444, "eta/annotator_0": 0.949492871761322, "grad_norm": 42.57251084390127, "learning_rate": 1.0600367036332907e-07, "loss": 0.6205, "rewards/accuracies": 0.765625, "rewards/chosen": -0.6513671875, "rewards/margins": 0.2117919921875, "rewards/rejected": -0.86328125, "step": 721 }, { "agreement_weights/mean": 0.966475784778595, "agreement_weights/std": 0.05338292568922043, "epoch": 0.7715736040609137, "eta/annotator_0": 0.949492871761322, "grad_norm": 39.151530841560124, "learning_rate": 1.0506801538778e-07, "loss": 0.6219, "rewards/accuracies": 0.75, "rewards/chosen": -0.63671875, "rewards/margins": 0.1923828125, "rewards/rejected": -0.830078125, "step": 722 }, { "agreement_weights/mean": 0.9675735235214233, "agreement_weights/std": 0.05785664916038513, "epoch": 0.7726422655623831, "eta/annotator_0": 0.9514286518096924, "grad_norm": 45.07846006254177, "learning_rate": 1.0413577826027463e-07, "loss": 0.5532, "rewards/accuracies": 0.75, "rewards/chosen": -0.6552734375, "rewards/margins": 0.215576171875, "rewards/rejected": -0.873046875, "step": 723 }, { "agreement_weights/mean": 0.9607494473457336, "agreement_weights/std": 0.09278662502765656, "epoch": 0.7737109270638525, "eta/annotator_0": 0.9514286518096924, "grad_norm": 37.0464870353729, "learning_rate": 1.0320697198950548e-07, "loss": 0.5138, "rewards/accuracies": 0.828125, "rewards/chosen": -0.716796875, "rewards/margins": 0.238525390625, "rewards/rejected": -0.9560546875, "step": 724 }, { "agreement_weights/mean": 0.9619641304016113, "agreement_weights/std": 0.07654037326574326, "epoch": 0.7747795885653219, "eta/annotator_0": 0.9523402452468872, "grad_norm": 41.70845037407538, "learning_rate": 1.0228160953628966e-07, "loss": 0.5683, "rewards/accuracies": 0.78125, "rewards/chosen": -0.6533203125, "rewards/margins": 0.2255859375, "rewards/rejected": -0.8779296875, "step": 725 }, { "agreement_weights/mean": 0.9424902200698853, "agreement_weights/std": 0.1196051836013794, "epoch": 0.7758482500667914, "eta/annotator_0": 0.9532518982887268, "grad_norm": 35.15874693678999, "learning_rate": 1.0135970381338852e-07, "loss": 0.5808, "rewards/accuracies": 0.796875, "rewards/chosen": -0.59326171875, "rewards/margins": 0.193603515625, "rewards/rejected": -0.7880859375, "step": 726 }, { "agreement_weights/mean": 0.9549755454063416, "agreement_weights/std": 0.07397236675024033, "epoch": 0.7769169115682607, "eta/annotator_0": 0.9532518982887268, "grad_norm": 44.747549429758394, "learning_rate": 1.0044126768532729e-07, "loss": 0.7079, "rewards/accuracies": 0.703125, "rewards/chosen": -0.6826171875, "rewards/margins": 0.17962646484375, "rewards/rejected": -0.8623046875, "step": 727 }, { "agreement_weights/mean": 0.9526090621948242, "agreement_weights/std": 0.08125147223472595, "epoch": 0.7779855730697302, "eta/annotator_0": 0.9528866410255432, "grad_norm": 51.04929633430076, "learning_rate": 9.952631396821521e-08, "loss": 0.653, "rewards/accuracies": 0.78125, "rewards/chosen": -0.52880859375, "rewards/margins": 0.225341796875, "rewards/rejected": -0.75390625, "step": 728 }, { "agreement_weights/mean": 0.9683235883712769, "agreement_weights/std": 0.055544763803482056, "epoch": 0.7790542345711996, "eta/annotator_0": 0.9528866410255432, "grad_norm": 36.76277439069273, "learning_rate": 9.861485542956747e-08, "loss": 0.5568, "rewards/accuracies": 0.828125, "rewards/chosen": -0.591796875, "rewards/margins": 0.21533203125, "rewards/rejected": -0.8076171875, "step": 729 }, { "agreement_weights/mean": 0.9565619230270386, "agreement_weights/std": 0.08082730323076248, "epoch": 0.7801228960726689, "eta/annotator_0": 0.9531131982803345, "grad_norm": 30.7061649843883, "learning_rate": 9.770690478812608e-08, "loss": 0.5188, "rewards/accuracies": 0.828125, "rewards/chosen": -0.619140625, "rewards/margins": 0.210693359375, "rewards/rejected": -0.830078125, "step": 730 }, { "agreement_weights/mean": 0.93034827709198, "agreement_weights/std": 0.16532976925373077, "epoch": 0.7811915575741384, "eta/annotator_0": 0.953339695930481, "grad_norm": 36.209948456387714, "learning_rate": 9.680247471368327e-08, "loss": 0.6014, "rewards/accuracies": 0.78125, "rewards/chosen": -0.6396484375, "rewards/margins": 0.1748046875, "rewards/rejected": -0.814453125, "step": 731 }, { "agreement_weights/mean": 0.9654949307441711, "agreement_weights/std": 0.08510704338550568, "epoch": 0.7822602190756078, "eta/annotator_0": 0.953339695930481, "grad_norm": 31.734887818412403, "learning_rate": 9.590157782690428e-08, "loss": 0.471, "rewards/accuracies": 0.875, "rewards/chosen": -0.6259765625, "rewards/margins": 0.230712890625, "rewards/rejected": -0.8564453125, "step": 732 }, { "agreement_weights/mean": 0.9674408435821533, "agreement_weights/std": 0.05859784036874771, "epoch": 0.7833288805770772, "eta/annotator_0": 0.9522933959960938, "grad_norm": 37.0991708340988, "learning_rate": 9.500422669915097e-08, "loss": 0.5689, "rewards/accuracies": 0.796875, "rewards/chosen": -0.6875, "rewards/margins": 0.2216796875, "rewards/rejected": -0.9091796875, "step": 733 }, { "agreement_weights/mean": 0.953021764755249, "agreement_weights/std": 0.1100422814488411, "epoch": 0.7843975420785466, "eta/annotator_0": 0.9522933959960938, "grad_norm": 43.363497345846945, "learning_rate": 9.411043385230703e-08, "loss": 0.5902, "rewards/accuracies": 0.828125, "rewards/chosen": -0.66796875, "rewards/margins": 0.196044921875, "rewards/rejected": -0.86328125, "step": 734 }, { "agreement_weights/mean": 0.9491719603538513, "agreement_weights/std": 0.10597305744886398, "epoch": 0.7854662035800161, "eta/annotator_0": 0.951563835144043, "grad_norm": 33.765952866569776, "learning_rate": 9.322021175860282e-08, "loss": 0.6086, "rewards/accuracies": 0.765625, "rewards/chosen": -0.69921875, "rewards/margins": 0.148681640625, "rewards/rejected": -0.84765625, "step": 735 }, { "agreement_weights/mean": 0.9652099013328552, "agreement_weights/std": 0.07400251179933548, "epoch": 0.7865348650814854, "eta/annotator_0": 0.9508342742919922, "grad_norm": 44.259480185540404, "learning_rate": 9.233357284044141e-08, "loss": 0.5111, "rewards/accuracies": 0.859375, "rewards/chosen": -0.6123046875, "rewards/margins": 0.2919921875, "rewards/rejected": -0.9052734375, "step": 736 }, { "agreement_weights/mean": 0.985107421875, "agreement_weights/std": 0.020521271973848343, "epoch": 0.7876035265829548, "eta/annotator_0": 0.9508342742919922, "grad_norm": 34.729764772534416, "learning_rate": 9.145052947022527e-08, "loss": 0.4403, "rewards/accuracies": 0.890625, "rewards/chosen": -0.5869140625, "rewards/margins": 0.27294921875, "rewards/rejected": -0.8603515625, "step": 737 }, { "agreement_weights/mean": 0.9708507657051086, "agreement_weights/std": 0.05572209134697914, "epoch": 0.7886721880844243, "eta/annotator_0": 0.9521608352661133, "grad_norm": 42.55830756038985, "learning_rate": 9.057109397018373e-08, "loss": 0.5385, "rewards/accuracies": 0.890625, "rewards/chosen": -0.681640625, "rewards/margins": 0.2314453125, "rewards/rejected": -0.9130859375, "step": 738 }, { "agreement_weights/mean": 0.9279096126556396, "agreement_weights/std": 0.16248668730258942, "epoch": 0.7897408495858936, "eta/annotator_0": 0.9521608352661133, "grad_norm": 54.8121028167107, "learning_rate": 8.969527861220059e-08, "loss": 0.6826, "rewards/accuracies": 0.734375, "rewards/chosen": -0.671875, "rewards/margins": 0.17041015625, "rewards/rejected": -0.841796875, "step": 739 }, { "agreement_weights/mean": 0.9770103096961975, "agreement_weights/std": 0.03755531087517738, "epoch": 0.7908095110873631, "eta/annotator_0": 0.9526623487472534, "grad_norm": 46.20448644735305, "learning_rate": 8.882309561764356e-08, "loss": 0.5597, "rewards/accuracies": 0.796875, "rewards/chosen": -0.6396484375, "rewards/margins": 0.236083984375, "rewards/rejected": -0.8759765625, "step": 740 }, { "agreement_weights/mean": 0.9706519842147827, "agreement_weights/std": 0.05947720259428024, "epoch": 0.7918781725888325, "eta/annotator_0": 0.9531639218330383, "grad_norm": 42.60510434921805, "learning_rate": 8.795455715719307e-08, "loss": 0.5392, "rewards/accuracies": 0.796875, "rewards/chosen": -0.5947265625, "rewards/margins": 0.239990234375, "rewards/rejected": -0.8359375, "step": 741 }, { "agreement_weights/mean": 0.9626339077949524, "agreement_weights/std": 0.05719643086194992, "epoch": 0.7929468340903019, "eta/annotator_0": 0.9531639218330383, "grad_norm": 51.66610705485327, "learning_rate": 8.708967535067285e-08, "loss": 0.7044, "rewards/accuracies": 0.78125, "rewards/chosen": -0.7041015625, "rewards/margins": 0.234619140625, "rewards/rejected": -0.939453125, "step": 742 }, { "agreement_weights/mean": 0.9761002063751221, "agreement_weights/std": 0.03806145116686821, "epoch": 0.7940154955917713, "eta/annotator_0": 0.9521991014480591, "grad_norm": 32.69312239561437, "learning_rate": 8.622846226688084e-08, "loss": 0.5445, "rewards/accuracies": 0.859375, "rewards/chosen": -0.625, "rewards/margins": 0.2333984375, "rewards/rejected": -0.859375, "step": 743 }, { "agreement_weights/mean": 0.9718677997589111, "agreement_weights/std": 0.03967231139540672, "epoch": 0.7950841570932408, "eta/annotator_0": 0.9521991014480591, "grad_norm": 63.33335546986092, "learning_rate": 8.537092992342023e-08, "loss": 0.5969, "rewards/accuracies": 0.8125, "rewards/chosen": -0.603515625, "rewards/margins": 0.189453125, "rewards/rejected": -0.7919921875, "step": 744 }, { "agreement_weights/mean": 0.9570040702819824, "agreement_weights/std": 0.07470092922449112, "epoch": 0.7961528185947101, "eta/annotator_0": 0.951671302318573, "grad_norm": 52.58911272924747, "learning_rate": 8.451709028653246e-08, "loss": 0.6878, "rewards/accuracies": 0.703125, "rewards/chosen": -0.685546875, "rewards/margins": 0.1451416015625, "rewards/rejected": -0.8310546875, "step": 745 }, { "agreement_weights/mean": 0.9741562604904175, "agreement_weights/std": 0.06280773878097534, "epoch": 0.7972214800961795, "eta/annotator_0": 0.9511434435844421, "grad_norm": 33.07424089617897, "learning_rate": 8.366695527092996e-08, "loss": 0.4707, "rewards/accuracies": 0.875, "rewards/chosen": -0.638671875, "rewards/margins": 0.23681640625, "rewards/rejected": -0.8759765625, "step": 746 }, { "agreement_weights/mean": 0.9640330076217651, "agreement_weights/std": 0.0694015845656395, "epoch": 0.798290141597649, "eta/annotator_0": 0.9511434435844421, "grad_norm": 55.300658284342504, "learning_rate": 8.282053673962952e-08, "loss": 0.6752, "rewards/accuracies": 0.71875, "rewards/chosen": -0.619140625, "rewards/margins": 0.21142578125, "rewards/rejected": -0.830078125, "step": 747 }, { "agreement_weights/mean": 0.9769645929336548, "agreement_weights/std": 0.045341476798057556, "epoch": 0.7993588030991183, "eta/annotator_0": 0.9530718326568604, "grad_norm": 25.356211240509023, "learning_rate": 8.197784650378754e-08, "loss": 0.3945, "rewards/accuracies": 0.90625, "rewards/chosen": -0.55224609375, "rewards/margins": 0.2821044921875, "rewards/rejected": -0.833984375, "step": 748 }, { "agreement_weights/mean": 0.9461320042610168, "agreement_weights/std": 0.11907541751861572, "epoch": 0.8004274646005878, "eta/annotator_0": 0.9530718326568604, "grad_norm": 57.20686829501157, "learning_rate": 8.113889632253431e-08, "loss": 0.687, "rewards/accuracies": 0.75, "rewards/chosen": -0.62890625, "rewards/margins": 0.1585693359375, "rewards/rejected": -0.7880859375, "step": 749 }, { "agreement_weights/mean": 0.9735375046730042, "agreement_weights/std": 0.04874270781874657, "epoch": 0.8014961261020572, "eta/annotator_0": 0.9515140056610107, "grad_norm": 44.105587514641435, "learning_rate": 8.030369790281072e-08, "loss": 0.5587, "rewards/accuracies": 0.84375, "rewards/chosen": -0.5888671875, "rewards/margins": 0.2060546875, "rewards/rejected": -0.7958984375, "step": 750 }, { "agreement_weights/mean": 0.9829747676849365, "agreement_weights/std": 0.027586083859205246, "epoch": 0.8025647876035266, "eta/annotator_0": 0.9499561190605164, "grad_norm": 37.53984272452736, "learning_rate": 7.94722628992045e-08, "loss": 0.4612, "rewards/accuracies": 0.84375, "rewards/chosen": -0.681640625, "rewards/margins": 0.262939453125, "rewards/rejected": -0.9443359375, "step": 751 }, { "agreement_weights/mean": 0.9471787810325623, "agreement_weights/std": 0.11754292249679565, "epoch": 0.803633449104996, "eta/annotator_0": 0.9499561190605164, "grad_norm": 42.11775610299522, "learning_rate": 7.864460291378736e-08, "loss": 0.6412, "rewards/accuracies": 0.78125, "rewards/chosen": -0.6484375, "rewards/margins": 0.19384765625, "rewards/rejected": -0.841796875, "step": 752 }, { "agreement_weights/mean": 0.9556597471237183, "agreement_weights/std": 0.08274300396442413, "epoch": 0.8047021106064653, "eta/annotator_0": 0.9444608688354492, "grad_norm": 41.92681796722741, "learning_rate": 7.782072949595373e-08, "loss": 0.5868, "rewards/accuracies": 0.796875, "rewards/chosen": -0.5986328125, "rewards/margins": 0.167236328125, "rewards/rejected": -0.765625, "step": 753 }, { "agreement_weights/mean": 0.9727975726127625, "agreement_weights/std": 0.04898206144571304, "epoch": 0.8057707721079348, "eta/annotator_0": 0.9444608688354492, "grad_norm": 38.952920291762844, "learning_rate": 7.700065414225904e-08, "loss": 0.5178, "rewards/accuracies": 0.859375, "rewards/chosen": -0.6572265625, "rewards/margins": 0.2061767578125, "rewards/rejected": -0.86328125, "step": 754 }, { "agreement_weights/mean": 0.957103431224823, "agreement_weights/std": 0.07567860186100006, "epoch": 0.8068394336094042, "eta/annotator_0": 0.9452338218688965, "grad_norm": 124.01875001861187, "learning_rate": 7.618438829625956e-08, "loss": 0.6854, "rewards/accuracies": 0.796875, "rewards/chosen": -0.6025390625, "rewards/margins": 0.248779296875, "rewards/rejected": -0.8525390625, "step": 755 }, { "agreement_weights/mean": 0.9624853134155273, "agreement_weights/std": 0.09386255592107773, "epoch": 0.8079080951108736, "eta/annotator_0": 0.9460066556930542, "grad_norm": 44.94353992061785, "learning_rate": 7.537194334835265e-08, "loss": 0.5469, "rewards/accuracies": 0.828125, "rewards/chosen": -0.578125, "rewards/margins": 0.183349609375, "rewards/rejected": -0.76171875, "step": 756 }, { "agreement_weights/mean": 0.9683018326759338, "agreement_weights/std": 0.054718926548957825, "epoch": 0.808976756612343, "eta/annotator_0": 0.9460066556930542, "grad_norm": 42.27982608500245, "learning_rate": 7.456333063561785e-08, "loss": 0.652, "rewards/accuracies": 0.75, "rewards/chosen": -0.6748046875, "rewards/margins": 0.21044921875, "rewards/rejected": -0.8876953125, "step": 757 }, { "agreement_weights/mean": 0.9627256989479065, "agreement_weights/std": 0.07392068207263947, "epoch": 0.8100454181138125, "eta/annotator_0": 0.9457410573959351, "grad_norm": 38.757006953061605, "learning_rate": 7.375856144165837e-08, "loss": 0.5424, "rewards/accuracies": 0.828125, "rewards/chosen": -0.59423828125, "rewards/margins": 0.23388671875, "rewards/rejected": -0.828125, "step": 758 }, { "agreement_weights/mean": 0.9546483755111694, "agreement_weights/std": 0.09081190079450607, "epoch": 0.8111140796152818, "eta/annotator_0": 0.9457410573959351, "grad_norm": 53.97153197881422, "learning_rate": 7.295764699644435e-08, "loss": 0.68, "rewards/accuracies": 0.78125, "rewards/chosen": -0.650390625, "rewards/margins": 0.21142578125, "rewards/rejected": -0.8642578125, "step": 759 }, { "agreement_weights/mean": 0.978107750415802, "agreement_weights/std": 0.041626378893852234, "epoch": 0.8121827411167513, "eta/annotator_0": 0.9453579783439636, "grad_norm": 36.094935059033936, "learning_rate": 7.216059847615522e-08, "loss": 0.5404, "rewards/accuracies": 0.859375, "rewards/chosen": -0.6396484375, "rewards/margins": 0.21484375, "rewards/rejected": -0.8525390625, "step": 760 }, { "agreement_weights/mean": 0.949095606803894, "agreement_weights/std": 0.10853243619203568, "epoch": 0.8132514026182207, "eta/annotator_0": 0.944974958896637, "grad_norm": 48.7040504569229, "learning_rate": 7.136742700302468e-08, "loss": 0.645, "rewards/accuracies": 0.8125, "rewards/chosen": -0.607421875, "rewards/margins": 0.189697265625, "rewards/rejected": -0.7978515625, "step": 761 }, { "agreement_weights/mean": 0.9550979137420654, "agreement_weights/std": 0.11117570102214813, "epoch": 0.81432006411969, "eta/annotator_0": 0.944974958896637, "grad_norm": 30.46138859258402, "learning_rate": 7.057814364518484e-08, "loss": 0.5343, "rewards/accuracies": 0.8125, "rewards/chosen": -0.61328125, "rewards/margins": 0.1922607421875, "rewards/rejected": -0.8056640625, "step": 762 }, { "agreement_weights/mean": 0.962968111038208, "agreement_weights/std": 0.0890282541513443, "epoch": 0.8153887256211595, "eta/annotator_0": 0.9483380913734436, "grad_norm": 35.45815512170445, "learning_rate": 6.979275941651188e-08, "loss": 0.531, "rewards/accuracies": 0.796875, "rewards/chosen": -0.5947265625, "rewards/margins": 0.2017822265625, "rewards/rejected": -0.794921875, "step": 763 }, { "agreement_weights/mean": 0.9536131024360657, "agreement_weights/std": 0.08807481080293655, "epoch": 0.8164573871226289, "eta/annotator_0": 0.9483380913734436, "grad_norm": 41.220455352878915, "learning_rate": 6.901128527647264e-08, "loss": 0.6185, "rewards/accuracies": 0.78125, "rewards/chosen": -0.6025390625, "rewards/margins": 0.1754150390625, "rewards/rejected": -0.77734375, "step": 764 }, { "agreement_weights/mean": 0.9690433740615845, "agreement_weights/std": 0.05363684147596359, "epoch": 0.8175260486240983, "eta/annotator_0": 0.9490361213684082, "grad_norm": 45.40300627997314, "learning_rate": 6.823373212997159e-08, "loss": 0.6611, "rewards/accuracies": 0.765625, "rewards/chosen": -0.615234375, "rewards/margins": 0.159423828125, "rewards/rejected": -0.7734375, "step": 765 }, { "agreement_weights/mean": 0.9528319835662842, "agreement_weights/std": 0.1115921139717102, "epoch": 0.8185947101255677, "eta/annotator_0": 0.9497342109680176, "grad_norm": 37.00046420728177, "learning_rate": 6.746011082719821e-08, "loss": 0.632, "rewards/accuracies": 0.765625, "rewards/chosen": -0.6328125, "rewards/margins": 0.1734619140625, "rewards/rejected": -0.8076171875, "step": 766 }, { "agreement_weights/mean": 0.962639331817627, "agreement_weights/std": 0.08556071668863297, "epoch": 0.8196633716270372, "eta/annotator_0": 0.9497342109680176, "grad_norm": 38.597065440708334, "learning_rate": 6.669043216347634e-08, "loss": 0.4897, "rewards/accuracies": 0.828125, "rewards/chosen": -0.634765625, "rewards/margins": 0.23779296875, "rewards/rejected": -0.8720703125, "step": 767 }, { "agreement_weights/mean": 0.972648024559021, "agreement_weights/std": 0.044218480587005615, "epoch": 0.8207320331285065, "eta/annotator_0": 0.94717937707901, "grad_norm": 55.95095135377183, "learning_rate": 6.592470687911302e-08, "loss": 0.6938, "rewards/accuracies": 0.765625, "rewards/chosen": -0.6103515625, "rewards/margins": 0.2705078125, "rewards/rejected": -0.880859375, "step": 768 }, { "agreement_weights/mean": 0.9683252573013306, "agreement_weights/std": 0.06332657486200333, "epoch": 0.821800694629976, "eta/annotator_0": 0.94717937707901, "grad_norm": 42.51859650989134, "learning_rate": 6.516294565924867e-08, "loss": 0.5632, "rewards/accuracies": 0.796875, "rewards/chosen": -0.583984375, "rewards/margins": 0.20947265625, "rewards/rejected": -0.7939453125, "step": 769 }, { "agreement_weights/mean": 0.9770249128341675, "agreement_weights/std": 0.04091496020555496, "epoch": 0.8228693561314454, "eta/annotator_0": 0.9475195407867432, "grad_norm": 55.292110751031856, "learning_rate": 6.440515913370818e-08, "loss": 0.4931, "rewards/accuracies": 0.890625, "rewards/chosen": -0.587890625, "rewards/margins": 0.271240234375, "rewards/rejected": -0.8603515625, "step": 770 }, { "agreement_weights/mean": 0.9772862195968628, "agreement_weights/std": 0.03943904489278793, "epoch": 0.8239380176329147, "eta/annotator_0": 0.9478597044944763, "grad_norm": 188.0872888924968, "learning_rate": 6.365135787685236e-08, "loss": 0.6767, "rewards/accuracies": 0.84375, "rewards/chosen": -0.6064453125, "rewards/margins": 0.22802734375, "rewards/rejected": -0.8330078125, "step": 771 }, { "agreement_weights/mean": 0.9655579328536987, "agreement_weights/std": 0.08296015113592148, "epoch": 0.8250066791343842, "eta/annotator_0": 0.9478597044944763, "grad_norm": 104.31432582707117, "learning_rate": 6.290155240743054e-08, "loss": 0.669, "rewards/accuracies": 0.8125, "rewards/chosen": -0.59765625, "rewards/margins": 0.28369140625, "rewards/rejected": -0.8818359375, "step": 772 }, { "agreement_weights/mean": 0.956820547580719, "agreement_weights/std": 0.09424933791160583, "epoch": 0.8260753406358536, "eta/annotator_0": 0.9519186615943909, "grad_norm": 37.67412452454994, "learning_rate": 6.215575318843383e-08, "loss": 0.5843, "rewards/accuracies": 0.796875, "rewards/chosen": -0.6181640625, "rewards/margins": 0.182861328125, "rewards/rejected": -0.8017578125, "step": 773 }, { "agreement_weights/mean": 0.9705896973609924, "agreement_weights/std": 0.05688171833753586, "epoch": 0.827144002137323, "eta/annotator_0": 0.9519186615943909, "grad_norm": 34.42803678606352, "learning_rate": 6.141397062694887e-08, "loss": 0.5449, "rewards/accuracies": 0.796875, "rewards/chosen": -0.6396484375, "rewards/margins": 0.225830078125, "rewards/rejected": -0.8642578125, "step": 774 }, { "agreement_weights/mean": 0.9596270322799683, "agreement_weights/std": 0.08302868902683258, "epoch": 0.8282126636387924, "eta/annotator_0": 0.9499630331993103, "grad_norm": 48.953428934297975, "learning_rate": 6.067621507401287e-08, "loss": 0.6805, "rewards/accuracies": 0.78125, "rewards/chosen": -0.56103515625, "rewards/margins": 0.19189453125, "rewards/rejected": -0.75390625, "step": 775 }, { "agreement_weights/mean": 0.9710547924041748, "agreement_weights/std": 0.05222000554203987, "epoch": 0.8292813251402619, "eta/annotator_0": 0.9480074048042297, "grad_norm": 62.46543955582213, "learning_rate": 5.994249682446908e-08, "loss": 0.6278, "rewards/accuracies": 0.765625, "rewards/chosen": -0.6376953125, "rewards/margins": 0.235107421875, "rewards/rejected": -0.8740234375, "step": 776 }, { "agreement_weights/mean": 0.9589319825172424, "agreement_weights/std": 0.09497959911823273, "epoch": 0.8303499866417312, "eta/annotator_0": 0.9480074048042297, "grad_norm": 55.21538309550181, "learning_rate": 5.9212826116822936e-08, "loss": 0.5361, "rewards/accuracies": 0.84375, "rewards/chosen": -0.68359375, "rewards/margins": 0.23291015625, "rewards/rejected": -0.9169921875, "step": 777 }, { "agreement_weights/mean": 0.9559142589569092, "agreement_weights/std": 0.09231086820363998, "epoch": 0.8314186481432007, "eta/annotator_0": 0.9476706385612488, "grad_norm": 46.29141062570145, "learning_rate": 5.848721313309957e-08, "loss": 0.5846, "rewards/accuracies": 0.765625, "rewards/chosen": -0.642578125, "rewards/margins": 0.177001953125, "rewards/rejected": -0.8193359375, "step": 778 }, { "agreement_weights/mean": 0.949777364730835, "agreement_weights/std": 0.08744501322507858, "epoch": 0.8324873096446701, "eta/annotator_0": 0.9476706385612488, "grad_norm": 44.00751269138489, "learning_rate": 5.7765667998701296e-08, "loss": 0.7092, "rewards/accuracies": 0.671875, "rewards/chosen": -0.61328125, "rewards/margins": 0.12255859375, "rewards/rejected": -0.734375, "step": 779 }, { "agreement_weights/mean": 0.979421854019165, "agreement_weights/std": 0.04074616730213165, "epoch": 0.8335559711461394, "eta/annotator_0": 0.9461958408355713, "grad_norm": 97.26142524345053, "learning_rate": 5.7048200782266757e-08, "loss": 0.9819, "rewards/accuracies": 0.875, "rewards/chosen": -0.55517578125, "rewards/margins": 0.270751953125, "rewards/rejected": -0.8251953125, "step": 780 }, { "agreement_weights/mean": 0.9544617533683777, "agreement_weights/std": 0.12118552625179291, "epoch": 0.8346246326476089, "eta/annotator_0": 0.9447210431098938, "grad_norm": 52.09420747679821, "learning_rate": 5.6334821495530276e-08, "loss": 0.5542, "rewards/accuracies": 0.828125, "rewards/chosen": -0.615234375, "rewards/margins": 0.218505859375, "rewards/rejected": -0.833984375, "step": 781 }, { "agreement_weights/mean": 0.9789129495620728, "agreement_weights/std": 0.04085639864206314, "epoch": 0.8356932941490782, "eta/annotator_0": 0.9447210431098938, "grad_norm": 48.814038893622744, "learning_rate": 5.562554009318166e-08, "loss": 0.5839, "rewards/accuracies": 0.859375, "rewards/chosen": -0.583984375, "rewards/margins": 0.2978515625, "rewards/rejected": -0.8828125, "step": 782 }, { "agreement_weights/mean": 0.9718151092529297, "agreement_weights/std": 0.046651728451251984, "epoch": 0.8367619556505477, "eta/annotator_0": 0.9459062218666077, "grad_norm": 40.13583250066579, "learning_rate": 5.49203664727282e-08, "loss": 0.6144, "rewards/accuracies": 0.8125, "rewards/chosen": -0.595703125, "rewards/margins": 0.2197265625, "rewards/rejected": -0.814453125, "step": 783 }, { "agreement_weights/mean": 0.9839168787002563, "agreement_weights/std": 0.024098915979266167, "epoch": 0.8378306171520171, "eta/annotator_0": 0.9459062218666077, "grad_norm": 32.979584026666345, "learning_rate": 5.4219310474355845e-08, "loss": 0.4472, "rewards/accuracies": 0.921875, "rewards/chosen": -0.57275390625, "rewards/margins": 0.2646484375, "rewards/rejected": -0.8388671875, "step": 784 }, { "agreement_weights/mean": 0.9655001163482666, "agreement_weights/std": 0.07212841510772705, "epoch": 0.8388992786534865, "eta/annotator_0": 0.9467037916183472, "grad_norm": 48.527491719459874, "learning_rate": 5.3522381880792034e-08, "loss": 0.5456, "rewards/accuracies": 0.84375, "rewards/chosen": -0.70703125, "rewards/margins": 0.207275390625, "rewards/rejected": -0.9140625, "step": 785 }, { "agreement_weights/mean": 0.9625097513198853, "agreement_weights/std": 0.08456508815288544, "epoch": 0.8399679401549559, "eta/annotator_0": 0.9475013613700867, "grad_norm": 44.77059459726653, "learning_rate": 5.282959041716946e-08, "loss": 0.5703, "rewards/accuracies": 0.796875, "rewards/chosen": -0.63720703125, "rewards/margins": 0.18701171875, "rewards/rejected": -0.82421875, "step": 786 }, { "agreement_weights/mean": 0.9554619789123535, "agreement_weights/std": 0.1272043138742447, "epoch": 0.8410366016564254, "eta/annotator_0": 0.9475013613700867, "grad_norm": 47.76576062790787, "learning_rate": 5.214094575089014e-08, "loss": 0.4986, "rewards/accuracies": 0.859375, "rewards/chosen": -0.59228515625, "rewards/margins": 0.2314453125, "rewards/rejected": -0.82421875, "step": 787 }, { "agreement_weights/mean": 0.9767647981643677, "agreement_weights/std": 0.053438782691955566, "epoch": 0.8421052631578947, "eta/annotator_0": 0.9455434679985046, "grad_norm": 43.72465880929715, "learning_rate": 5.145645749149034e-08, "loss": 0.5145, "rewards/accuracies": 0.859375, "rewards/chosen": -0.5693359375, "rewards/margins": 0.263427734375, "rewards/rejected": -0.8330078125, "step": 788 }, { "agreement_weights/mean": 0.9670735001564026, "agreement_weights/std": 0.06667475402355194, "epoch": 0.8431739246593641, "eta/annotator_0": 0.9455434679985046, "grad_norm": 41.59535230502847, "learning_rate": 5.0776135190507005e-08, "loss": 0.5902, "rewards/accuracies": 0.8125, "rewards/chosen": -0.638671875, "rewards/margins": 0.169677734375, "rewards/rejected": -0.806640625, "step": 789 }, { "agreement_weights/mean": 0.9528321623802185, "agreement_weights/std": 0.09484586119651794, "epoch": 0.8442425861608336, "eta/annotator_0": 0.9469005465507507, "grad_norm": 38.001095603646114, "learning_rate": 5.009998834134383e-08, "loss": 0.5971, "rewards/accuracies": 0.765625, "rewards/chosen": -0.60302734375, "rewards/margins": 0.1611328125, "rewards/rejected": -0.7626953125, "step": 790 }, { "agreement_weights/mean": 0.9745066165924072, "agreement_weights/std": 0.038493312895298004, "epoch": 0.8453112476623029, "eta/annotator_0": 0.9482576847076416, "grad_norm": 36.930176909261164, "learning_rate": 4.942802637913931e-08, "loss": 0.5754, "rewards/accuracies": 0.796875, "rewards/chosen": -0.619140625, "rewards/margins": 0.186279296875, "rewards/rejected": -0.8046875, "step": 791 }, { "agreement_weights/mean": 0.9672581553459167, "agreement_weights/std": 0.07152906060218811, "epoch": 0.8463799091637724, "eta/annotator_0": 0.9482576847076416, "grad_norm": 107.31141046192127, "learning_rate": 4.8760258680634864e-08, "loss": 0.8447, "rewards/accuracies": 0.796875, "rewards/chosen": -0.5947265625, "rewards/margins": 0.274169921875, "rewards/rejected": -0.869140625, "step": 792 }, { "agreement_weights/mean": 0.9728332161903381, "agreement_weights/std": 0.05552929639816284, "epoch": 0.8474485706652418, "eta/annotator_0": 0.9432702660560608, "grad_norm": 35.72429464346419, "learning_rate": 4.8096694564043977e-08, "loss": 0.4948, "rewards/accuracies": 0.890625, "rewards/chosen": -0.5810546875, "rewards/margins": 0.22607421875, "rewards/rejected": -0.8056640625, "step": 793 }, { "agreement_weights/mean": 0.954376220703125, "agreement_weights/std": 0.1009121835231781, "epoch": 0.8485172321667112, "eta/annotator_0": 0.9432702660560608, "grad_norm": 49.850641676926706, "learning_rate": 4.7437343288922174e-08, "loss": 0.572, "rewards/accuracies": 0.796875, "rewards/chosen": -0.6103515625, "rewards/margins": 0.2255859375, "rewards/rejected": -0.8359375, "step": 794 }, { "agreement_weights/mean": 0.9707556962966919, "agreement_weights/std": 0.06085766479372978, "epoch": 0.8495858936681806, "eta/annotator_0": 0.9449037313461304, "grad_norm": 62.8818693474508, "learning_rate": 4.678221405603795e-08, "loss": 0.6211, "rewards/accuracies": 0.78125, "rewards/chosen": -0.5888671875, "rewards/margins": 0.24462890625, "rewards/rejected": -0.8330078125, "step": 795 }, { "agreement_weights/mean": 0.9711627960205078, "agreement_weights/std": 0.05337417498230934, "epoch": 0.85065455516965, "eta/annotator_0": 0.9465371966362, "grad_norm": 40.93608784335791, "learning_rate": 4.6131316007244044e-08, "loss": 0.6173, "rewards/accuracies": 0.796875, "rewards/chosen": -0.630859375, "rewards/margins": 0.204345703125, "rewards/rejected": -0.8359375, "step": 796 }, { "agreement_weights/mean": 0.9584883451461792, "agreement_weights/std": 0.08898042142391205, "epoch": 0.8517232166711194, "eta/annotator_0": 0.9465371966362, "grad_norm": 39.7771298185961, "learning_rate": 4.548465822535032e-08, "loss": 0.6256, "rewards/accuracies": 0.765625, "rewards/chosen": -0.5537109375, "rewards/margins": 0.15380859375, "rewards/rejected": -0.7080078125, "step": 797 }, { "agreement_weights/mean": 0.9620825052261353, "agreement_weights/std": 0.07815033942461014, "epoch": 0.8527918781725888, "eta/annotator_0": 0.9488078355789185, "grad_norm": 38.038660371869014, "learning_rate": 4.48422497339967e-08, "loss": 0.5701, "rewards/accuracies": 0.8125, "rewards/chosen": -0.654296875, "rewards/margins": 0.1558837890625, "rewards/rejected": -0.810546875, "step": 798 }, { "agreement_weights/mean": 0.9751351475715637, "agreement_weights/std": 0.05004357546567917, "epoch": 0.8538605396740583, "eta/annotator_0": 0.9488078355789185, "grad_norm": 47.95944939799711, "learning_rate": 4.420409949752726e-08, "loss": 0.5651, "rewards/accuracies": 0.84375, "rewards/chosen": -0.5947265625, "rewards/margins": 0.222900390625, "rewards/rejected": -0.818359375, "step": 799 }, { "agreement_weights/mean": 0.9702588319778442, "agreement_weights/std": 0.06387639045715332, "epoch": 0.8549292011755276, "eta/annotator_0": 0.9496802687644958, "grad_norm": 53.42343049914736, "learning_rate": 4.357021642086545e-08, "loss": 0.5345, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6044921875, "rewards/margins": 0.232666015625, "rewards/rejected": -0.8369140625, "step": 800 }, { "epoch": 0.8549292011755276, "eta/annotator_0": 0.9526017904281616, "eval_agreement_weights/mean": 0.9664426445960999, "eval_agreement_weights/std": 0.06836311519145966, "eval_loss": 0.5669715404510498, "eval_rewards/accuracies": 0.798893392086029, "eval_rewards/chosen": -0.6190136075019836, "eval_rewards/margins": 0.20980413258075714, "eval_rewards/rejected": -0.8286648988723755, "eval_runtime": 135.09, "eval_samples_per_second": 14.516, "eval_steps_per_second": 0.911, "step": 800 }, { "agreement_weights/mean": 0.9591683149337769, "agreement_weights/std": 0.11332603543996811, "epoch": 0.8559978626769971, "eta/annotator_0": 0.9550223350524902, "grad_norm": 47.63584073748138, "learning_rate": 4.294060934938925e-08, "loss": 0.5473, "rewards/accuracies": 0.796875, "rewards/chosen": -0.64453125, "rewards/margins": 0.21533203125, "rewards/rejected": -0.859375, "step": 801 }, { "agreement_weights/mean": 0.9888231754302979, "agreement_weights/std": 0.013417162001132965, "epoch": 0.8570665241784665, "eta/annotator_0": 0.9555246233940125, "grad_norm": 29.434557915220243, "learning_rate": 4.23152870688084e-08, "loss": 0.4223, "rewards/accuracies": 0.90625, "rewards/chosen": -0.6513671875, "rewards/margins": 0.27734375, "rewards/rejected": -0.9287109375, "step": 802 }, { "agreement_weights/mean": 0.9783529043197632, "agreement_weights/std": 0.04502930864691734, "epoch": 0.8581351856799359, "eta/annotator_0": 0.9556919932365417, "grad_norm": 49.35516044601033, "learning_rate": 4.1694258305041484e-08, "loss": 0.519, "rewards/accuracies": 0.859375, "rewards/chosen": -0.57763671875, "rewards/margins": 0.240234375, "rewards/rejected": -0.818359375, "step": 803 }, { "agreement_weights/mean": 0.9697893261909485, "agreement_weights/std": 0.07076670229434967, "epoch": 0.8592038471814053, "eta/annotator_0": 0.9562961459159851, "grad_norm": 48.433840360440996, "learning_rate": 4.1077531724093935e-08, "loss": 0.559, "rewards/accuracies": 0.8125, "rewards/chosen": -0.67578125, "rewards/margins": 0.1884765625, "rewards/rejected": -0.86328125, "step": 804 }, { "agreement_weights/mean": 0.9535999894142151, "agreement_weights/std": 0.09629624336957932, "epoch": 0.8602725086828747, "eta/annotator_0": 0.95810866355896, "grad_norm": 35.42347759469177, "learning_rate": 4.046511593193756e-08, "loss": 0.616, "rewards/accuracies": 0.703125, "rewards/chosen": -0.58837890625, "rewards/margins": 0.1474609375, "rewards/rejected": -0.7353515625, "step": 805 }, { "agreement_weights/mean": 0.9798336625099182, "agreement_weights/std": 0.031460415571928024, "epoch": 0.8613411701843441, "eta/annotator_0": 0.95810866355896, "grad_norm": 39.97764264801187, "learning_rate": 3.985701947439024e-08, "loss": 0.557, "rewards/accuracies": 0.796875, "rewards/chosen": -0.609375, "rewards/margins": 0.222412109375, "rewards/rejected": -0.8330078125, "step": 806 }, { "agreement_weights/mean": 0.9750804305076599, "agreement_weights/std": 0.044317666441202164, "epoch": 0.8624098316858135, "eta/annotator_0": 0.9564492702484131, "grad_norm": 39.78537733381124, "learning_rate": 3.925325083699646e-08, "loss": 0.568, "rewards/accuracies": 0.796875, "rewards/chosen": -0.60546875, "rewards/margins": 0.204833984375, "rewards/rejected": -0.8095703125, "step": 807 }, { "agreement_weights/mean": 0.9433370232582092, "agreement_weights/std": 0.11527258157730103, "epoch": 0.863478493187283, "eta/annotator_0": 0.9558961391448975, "grad_norm": 61.39960854059664, "learning_rate": 3.8653818444909334e-08, "loss": 0.7442, "rewards/accuracies": 0.65625, "rewards/chosen": -0.662109375, "rewards/margins": 0.1756591796875, "rewards/rejected": -0.837890625, "step": 808 }, { "agreement_weights/mean": 0.955618143081665, "agreement_weights/std": 0.08206801861524582, "epoch": 0.8645471546887523, "eta/annotator_0": 0.9533412456512451, "grad_norm": 35.73697192642008, "learning_rate": 3.8058730662772844e-08, "loss": 0.6323, "rewards/accuracies": 0.75, "rewards/chosen": -0.603515625, "rewards/margins": 0.17919921875, "rewards/rejected": -0.783203125, "step": 809 }, { "agreement_weights/mean": 0.9651530981063843, "agreement_weights/std": 0.07212137430906296, "epoch": 0.8656158161902218, "eta/annotator_0": 0.9456766843795776, "grad_norm": 44.22312137596208, "learning_rate": 3.746799579460483e-08, "loss": 0.5174, "rewards/accuracies": 0.8125, "rewards/chosen": -0.625, "rewards/margins": 0.251220703125, "rewards/rejected": -0.8759765625, "step": 810 }, { "agreement_weights/mean": 0.942333459854126, "agreement_weights/std": 0.11300639808177948, "epoch": 0.8666844776916911, "eta/annotator_0": 0.9456766843795776, "grad_norm": 46.12174858547102, "learning_rate": 3.688162208368168e-08, "loss": 0.6611, "rewards/accuracies": 0.734375, "rewards/chosen": -0.60302734375, "rewards/margins": 0.2269287109375, "rewards/rejected": -0.8291015625, "step": 811 }, { "agreement_weights/mean": 0.9861633777618408, "agreement_weights/std": 0.02109459415078163, "epoch": 0.8677531391931605, "eta/annotator_0": 0.9466426372528076, "grad_norm": 34.485222749174035, "learning_rate": 3.62996177124228e-08, "loss": 0.452, "rewards/accuracies": 0.90625, "rewards/chosen": -0.6357421875, "rewards/margins": 0.23583984375, "rewards/rejected": -0.87109375, "step": 812 }, { "agreement_weights/mean": 0.9580209851264954, "agreement_weights/std": 0.0940166488289833, "epoch": 0.86882180069463, "eta/annotator_0": 0.9469646215438843, "grad_norm": 46.48604443134743, "learning_rate": 3.5721990802276716e-08, "loss": 0.6065, "rewards/accuracies": 0.765625, "rewards/chosen": -0.6064453125, "rewards/margins": 0.164306640625, "rewards/rejected": -0.771484375, "step": 813 }, { "agreement_weights/mean": 0.9715348482131958, "agreement_weights/std": 0.05249042809009552, "epoch": 0.8698904621960993, "eta/annotator_0": 0.947526216506958, "grad_norm": 43.05854191008516, "learning_rate": 3.5148749413607765e-08, "loss": 0.5427, "rewards/accuracies": 0.796875, "rewards/chosen": -0.56201171875, "rewards/margins": 0.248291015625, "rewards/rejected": -0.8095703125, "step": 814 }, { "agreement_weights/mean": 0.9555210471153259, "agreement_weights/std": 0.10582873225212097, "epoch": 0.8709591236975688, "eta/annotator_0": 0.949211061000824, "grad_norm": 50.68156510562654, "learning_rate": 3.457990154558319e-08, "loss": 0.5655, "rewards/accuracies": 0.78125, "rewards/chosen": -0.6103515625, "rewards/margins": 0.175048828125, "rewards/rejected": -0.7861328125, "step": 815 }, { "agreement_weights/mean": 0.9701818227767944, "agreement_weights/std": 0.04803713411092758, "epoch": 0.8720277851990382, "eta/annotator_0": 0.949211061000824, "grad_norm": 36.56881027490881, "learning_rate": 3.4015455136062165e-08, "loss": 0.5998, "rewards/accuracies": 0.75, "rewards/chosen": -0.58935546875, "rewards/margins": 0.189697265625, "rewards/rejected": -0.779296875, "step": 816 }, { "agreement_weights/mean": 0.9730526208877563, "agreement_weights/std": 0.05207952857017517, "epoch": 0.8730964467005076, "eta/annotator_0": 0.949537456035614, "grad_norm": 35.73581724925413, "learning_rate": 3.345541806148464e-08, "loss": 0.5075, "rewards/accuracies": 0.8125, "rewards/chosen": -0.634765625, "rewards/margins": 0.222900390625, "rewards/rejected": -0.857421875, "step": 817 }, { "agreement_weights/mean": 0.9448105096817017, "agreement_weights/std": 0.12071212381124496, "epoch": 0.874165108201977, "eta/annotator_0": 0.9496462345123291, "grad_norm": 37.4229484553345, "learning_rate": 3.289979813676135e-08, "loss": 0.6915, "rewards/accuracies": 0.71875, "rewards/chosen": -0.6865234375, "rewards/margins": 0.140380859375, "rewards/rejected": -0.826171875, "step": 818 }, { "agreement_weights/mean": 0.9679641127586365, "agreement_weights/std": 0.05219431221485138, "epoch": 0.8752337697034465, "eta/annotator_0": 0.9483751058578491, "grad_norm": 36.15736407961811, "learning_rate": 3.2348603115165084e-08, "loss": 0.6108, "rewards/accuracies": 0.78125, "rewards/chosen": -0.6552734375, "rewards/margins": 0.183837890625, "rewards/rejected": -0.8408203125, "step": 819 }, { "agreement_weights/mean": 0.972693145275116, "agreement_weights/std": 0.05907527729868889, "epoch": 0.8763024312049158, "eta/annotator_0": 0.9445616602897644, "grad_norm": 51.80175035702331, "learning_rate": 3.180184068822214e-08, "loss": 0.5527, "rewards/accuracies": 0.859375, "rewards/chosen": -0.6376953125, "rewards/margins": 0.217041015625, "rewards/rejected": -0.85546875, "step": 820 }, { "agreement_weights/mean": 0.9696303009986877, "agreement_weights/std": 0.06368786096572876, "epoch": 0.8773710927063852, "eta/annotator_0": 0.9445616602897644, "grad_norm": 38.08735216284517, "learning_rate": 3.125951848560535e-08, "loss": 0.5552, "rewards/accuracies": 0.8125, "rewards/chosen": -0.59765625, "rewards/margins": 0.1943359375, "rewards/rejected": -0.7939453125, "step": 821 }, { "agreement_weights/mean": 0.9849168658256531, "agreement_weights/std": 0.01819106936454773, "epoch": 0.8784397542078547, "eta/annotator_0": 0.9468872547149658, "grad_norm": 43.95293996771825, "learning_rate": 3.0721644075027394e-08, "loss": 0.5017, "rewards/accuracies": 0.84375, "rewards/chosen": -0.56640625, "rewards/margins": 0.234130859375, "rewards/rejected": -0.798828125, "step": 822 }, { "agreement_weights/mean": 0.9477639198303223, "agreement_weights/std": 0.11653255671262741, "epoch": 0.879508415709324, "eta/annotator_0": 0.9476624727249146, "grad_norm": 97.79687777411121, "learning_rate": 3.0188224962135153e-08, "loss": 0.6828, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6796875, "rewards/margins": 0.155517578125, "rewards/rejected": -0.8349609375, "step": 823 }, { "agreement_weights/mean": 0.9638271331787109, "agreement_weights/std": 0.09022349119186401, "epoch": 0.8805770772107935, "eta/annotator_0": 0.948426365852356, "grad_norm": 45.45550417822725, "learning_rate": 2.965926859040514e-08, "loss": 0.548, "rewards/accuracies": 0.859375, "rewards/chosen": -0.6064453125, "rewards/margins": 0.197998046875, "rewards/rejected": -0.8037109375, "step": 824 }, { "agreement_weights/mean": 0.9606655836105347, "agreement_weights/std": 0.05971311405301094, "epoch": 0.8816457387122629, "eta/annotator_0": 0.9507179856300354, "grad_norm": 74.94684210954453, "learning_rate": 2.913478234103962e-08, "loss": 0.8255, "rewards/accuracies": 0.703125, "rewards/chosen": -0.587890625, "rewards/margins": 0.211669921875, "rewards/rejected": -0.7998046875, "step": 825 }, { "agreement_weights/mean": 0.9712631702423096, "agreement_weights/std": 0.05707758292555809, "epoch": 0.8827144002137323, "eta/annotator_0": 0.9507179856300354, "grad_norm": 29.30801852246891, "learning_rate": 2.861477353286335e-08, "loss": 0.5353, "rewards/accuracies": 0.796875, "rewards/chosen": -0.5830078125, "rewards/margins": 0.20654296875, "rewards/rejected": -0.7890625, "step": 826 }, { "agreement_weights/mean": 0.9795390963554382, "agreement_weights/std": 0.027966732159256935, "epoch": 0.8837830617152017, "eta/annotator_0": 0.9510392546653748, "grad_norm": 40.303830065504215, "learning_rate": 2.8099249422221856e-08, "loss": 0.5813, "rewards/accuracies": 0.8125, "rewards/chosen": -0.556640625, "rewards/margins": 0.195068359375, "rewards/rejected": -0.7529296875, "step": 827 }, { "agreement_weights/mean": 0.9807809591293335, "agreement_weights/std": 0.03255971521139145, "epoch": 0.8848517232166712, "eta/annotator_0": 0.9511463046073914, "grad_norm": 76.2059776454579, "learning_rate": 2.7588217202879848e-08, "loss": 0.587, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5810546875, "rewards/margins": 0.3291015625, "rewards/rejected": -0.91015625, "step": 828 }, { "agreement_weights/mean": 0.9713739156723022, "agreement_weights/std": 0.07321824133396149, "epoch": 0.8859203847181405, "eta/annotator_0": 0.9518699645996094, "grad_norm": 36.02085937854673, "learning_rate": 2.708168400592091e-08, "loss": 0.4555, "rewards/accuracies": 0.875, "rewards/chosen": -0.642578125, "rewards/margins": 0.255615234375, "rewards/rejected": -0.8974609375, "step": 829 }, { "agreement_weights/mean": 0.9681546688079834, "agreement_weights/std": 0.05719560384750366, "epoch": 0.88698904621961, "eta/annotator_0": 0.9540409445762634, "grad_norm": 39.418411166262125, "learning_rate": 2.657965689964817e-08, "loss": 0.6178, "rewards/accuracies": 0.78125, "rewards/chosen": -0.57373046875, "rewards/margins": 0.175537109375, "rewards/rejected": -0.7490234375, "step": 830 }, { "agreement_weights/mean": 0.961790144443512, "agreement_weights/std": 0.0963590070605278, "epoch": 0.8880577077210794, "eta/annotator_0": 0.9540409445762634, "grad_norm": 46.74010140277914, "learning_rate": 2.608214288948542e-08, "loss": 0.56, "rewards/accuracies": 0.828125, "rewards/chosen": -0.6259765625, "rewards/margins": 0.21044921875, "rewards/rejected": -0.8369140625, "step": 831 }, { "agreement_weights/mean": 0.9622222185134888, "agreement_weights/std": 0.0846257209777832, "epoch": 0.8891263692225487, "eta/annotator_0": 0.952921450138092, "grad_norm": 35.616165134188826, "learning_rate": 2.5589148917879556e-08, "loss": 0.4914, "rewards/accuracies": 0.828125, "rewards/chosen": -0.6982421875, "rewards/margins": 0.21337890625, "rewards/rejected": -0.912109375, "step": 832 }, { "agreement_weights/mean": 0.972683310508728, "agreement_weights/std": 0.06223394721746445, "epoch": 0.8901950307240182, "eta/annotator_0": 0.9525482654571533, "grad_norm": 50.05842465689208, "learning_rate": 2.510068186420362e-08, "loss": 0.4893, "rewards/accuracies": 0.859375, "rewards/chosen": -0.6689453125, "rewards/margins": 0.228271484375, "rewards/rejected": -0.8974609375, "step": 833 }, { "agreement_weights/mean": 0.979125440120697, "agreement_weights/std": 0.035555314272642136, "epoch": 0.8912636922254876, "eta/annotator_0": 0.9531768560409546, "grad_norm": 33.889716919386565, "learning_rate": 2.4616748544660658e-08, "loss": 0.498, "rewards/accuracies": 0.78125, "rewards/chosen": -0.650390625, "rewards/margins": 0.2119140625, "rewards/rejected": -0.861328125, "step": 834 }, { "agreement_weights/mean": 0.9772122502326965, "agreement_weights/std": 0.04640253633260727, "epoch": 0.892332353726957, "eta/annotator_0": 0.9550626873970032, "grad_norm": 42.81227286980935, "learning_rate": 2.413735571218889e-08, "loss": 0.549, "rewards/accuracies": 0.875, "rewards/chosen": -0.630859375, "rewards/margins": 0.1787109375, "rewards/rejected": -0.8076171875, "step": 835 }, { "agreement_weights/mean": 0.9640552401542664, "agreement_weights/std": 0.08530205488204956, "epoch": 0.8934010152284264, "eta/annotator_0": 0.9550626873970032, "grad_norm": 54.89674561092988, "learning_rate": 2.3662510056367324e-08, "loss": 0.5302, "rewards/accuracies": 0.859375, "rewards/chosen": -0.623046875, "rewards/margins": 0.21826171875, "rewards/rejected": -0.83984375, "step": 836 }, { "agreement_weights/mean": 0.9786937236785889, "agreement_weights/std": 0.03493991494178772, "epoch": 0.8944696767298959, "eta/annotator_0": 0.9551790356636047, "grad_norm": 44.35850046896836, "learning_rate": 2.3192218203322318e-08, "loss": 0.5377, "rewards/accuracies": 0.796875, "rewards/chosen": -0.6162109375, "rewards/margins": 0.208984375, "rewards/rejected": -0.826171875, "step": 837 }, { "agreement_weights/mean": 0.9430354237556458, "agreement_weights/std": 0.13182659447193146, "epoch": 0.8955383382313652, "eta/annotator_0": 0.9552178382873535, "grad_norm": 56.3537491559861, "learning_rate": 2.272648671563531e-08, "loss": 0.5979, "rewards/accuracies": 0.8125, "rewards/chosen": -0.65625, "rewards/margins": 0.1796875, "rewards/rejected": -0.8369140625, "step": 838 }, { "agreement_weights/mean": 0.959385871887207, "agreement_weights/std": 0.10311545431613922, "epoch": 0.8966069997328346, "eta/annotator_0": 0.9554860591888428, "grad_norm": 51.63080271776549, "learning_rate": 2.2265322092251183e-08, "loss": 0.6102, "rewards/accuracies": 0.78125, "rewards/chosen": -0.6064453125, "rewards/margins": 0.23583984375, "rewards/rejected": -0.8408203125, "step": 839 }, { "agreement_weights/mean": 0.9493027329444885, "agreement_weights/std": 0.11297362297773361, "epoch": 0.897675661234304, "eta/annotator_0": 0.9562906622886658, "grad_norm": 34.972531338302936, "learning_rate": 2.180873076838741e-08, "loss": 0.6245, "rewards/accuracies": 0.765625, "rewards/chosen": -0.6796875, "rewards/margins": 0.1707763671875, "rewards/rejected": -0.8505859375, "step": 840 }, { "agreement_weights/mean": 0.9663382172584534, "agreement_weights/std": 0.05696861445903778, "epoch": 0.8987443227357734, "eta/annotator_0": 0.9562906622886658, "grad_norm": 35.29194112816681, "learning_rate": 2.135671911544454e-08, "loss": 0.6371, "rewards/accuracies": 0.765625, "rewards/chosen": -0.58447265625, "rewards/margins": 0.186279296875, "rewards/rejected": -0.7705078125, "step": 841 }, { "agreement_weights/mean": 0.9642590880393982, "agreement_weights/std": 0.0910407304763794, "epoch": 0.8998129842372429, "eta/annotator_0": 0.9556595087051392, "grad_norm": 32.04148055809257, "learning_rate": 2.0909293440916965e-08, "loss": 0.4977, "rewards/accuracies": 0.78125, "rewards/chosen": -0.673828125, "rewards/margins": 0.201416015625, "rewards/rejected": -0.8759765625, "step": 842 }, { "agreement_weights/mean": 0.9594435691833496, "agreement_weights/std": 0.08756335079669952, "epoch": 0.9008816457387122, "eta/annotator_0": 0.955449104309082, "grad_norm": 41.21130056348855, "learning_rate": 2.0466459988305225e-08, "loss": 0.6719, "rewards/accuracies": 0.75, "rewards/chosen": -0.6474609375, "rewards/margins": 0.21923828125, "rewards/rejected": -0.8671875, "step": 843 }, { "agreement_weights/mean": 0.9574872255325317, "agreement_weights/std": 0.10610919445753098, "epoch": 0.9019503072401817, "eta/annotator_0": 0.9547351598739624, "grad_norm": 63.556954443924866, "learning_rate": 2.0028224937028783e-08, "loss": 0.646, "rewards/accuracies": 0.796875, "rewards/chosen": -0.6181640625, "rewards/margins": 0.2220458984375, "rewards/rejected": -0.8388671875, "step": 844 }, { "agreement_weights/mean": 0.9642078876495361, "agreement_weights/std": 0.07261661440134048, "epoch": 0.9030189687416511, "eta/annotator_0": 0.952593207359314, "grad_norm": 38.59451959924764, "learning_rate": 1.95945944023395e-08, "loss": 0.555, "rewards/accuracies": 0.78125, "rewards/chosen": -0.62109375, "rewards/margins": 0.1796875, "rewards/rejected": -0.7998046875, "step": 845 }, { "agreement_weights/mean": 0.9826037883758545, "agreement_weights/std": 0.035837676376104355, "epoch": 0.9040876302431204, "eta/annotator_0": 0.952593207359314, "grad_norm": 50.47587964573878, "learning_rate": 1.9165574435236764e-08, "loss": 0.4467, "rewards/accuracies": 0.90625, "rewards/chosen": -0.6318359375, "rewards/margins": 0.27392578125, "rewards/rejected": -0.904296875, "step": 846 }, { "agreement_weights/mean": 0.9693632125854492, "agreement_weights/std": 0.06652949005365372, "epoch": 0.9051562917445899, "eta/annotator_0": 0.9535902142524719, "grad_norm": 37.59777807380897, "learning_rate": 1.874117102238283e-08, "loss": 0.5652, "rewards/accuracies": 0.828125, "rewards/chosen": -0.57177734375, "rewards/margins": 0.22607421875, "rewards/rejected": -0.7978515625, "step": 847 }, { "agreement_weights/mean": 0.9536927938461304, "agreement_weights/std": 0.08726195991039276, "epoch": 0.9062249532460593, "eta/annotator_0": 0.9539225101470947, "grad_norm": 42.84458317430555, "learning_rate": 1.8321390086019178e-08, "loss": 0.7482, "rewards/accuracies": 0.75, "rewards/chosen": -0.66015625, "rewards/margins": 0.170654296875, "rewards/rejected": -0.8310546875, "step": 848 }, { "agreement_weights/mean": 0.9786015748977661, "agreement_weights/std": 0.033098235726356506, "epoch": 0.9072936147475287, "eta/annotator_0": 0.9542803764343262, "grad_norm": 47.89118557730171, "learning_rate": 1.790623748388404e-08, "loss": 0.6064, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6064453125, "rewards/margins": 0.228759765625, "rewards/rejected": -0.8359375, "step": 849 }, { "agreement_weights/mean": 0.9736465215682983, "agreement_weights/std": 0.05183860659599304, "epoch": 0.9083622762489981, "eta/annotator_0": 0.9553540945053101, "grad_norm": 37.0104586050166, "learning_rate": 1.7495719009130667e-08, "loss": 0.4845, "rewards/accuracies": 0.875, "rewards/chosen": -0.6787109375, "rewards/margins": 0.261962890625, "rewards/rejected": -0.94140625, "step": 850 }, { "agreement_weights/mean": 0.9710091352462769, "agreement_weights/std": 0.055703304708004, "epoch": 0.9094309377504676, "eta/annotator_0": 0.9553540945053101, "grad_norm": 59.93883824352758, "learning_rate": 1.708984039024635e-08, "loss": 0.6406, "rewards/accuracies": 0.828125, "rewards/chosen": -0.60107421875, "rewards/margins": 0.24951171875, "rewards/rejected": -0.849609375, "step": 851 }, { "agreement_weights/mean": 0.970813512802124, "agreement_weights/std": 0.054186128079891205, "epoch": 0.9104995992519369, "eta/annotator_0": 0.9553767442703247, "grad_norm": 34.79409917226988, "learning_rate": 1.6688607290972624e-08, "loss": 0.563, "rewards/accuracies": 0.8125, "rewards/chosen": -0.68359375, "rewards/margins": 0.22900390625, "rewards/rejected": -0.9111328125, "step": 852 }, { "agreement_weights/mean": 0.9721664190292358, "agreement_weights/std": 0.05652683228254318, "epoch": 0.9115682607534064, "eta/annotator_0": 0.9553843140602112, "grad_norm": 55.06853877675671, "learning_rate": 1.6292025310226005e-08, "loss": 0.5872, "rewards/accuracies": 0.8125, "rewards/chosen": -0.591796875, "rewards/margins": 0.17578125, "rewards/rejected": -0.7666015625, "step": 853 }, { "agreement_weights/mean": 0.9837778210639954, "agreement_weights/std": 0.024101853370666504, "epoch": 0.9126369222548758, "eta/annotator_0": 0.9558706283569336, "grad_norm": 37.416869335130094, "learning_rate": 1.5900099982020226e-08, "loss": 0.4965, "rewards/accuracies": 0.828125, "rewards/chosen": -0.6318359375, "rewards/margins": 0.22900390625, "rewards/rejected": -0.8623046875, "step": 854 }, { "agreement_weights/mean": 0.9445316791534424, "agreement_weights/std": 0.12904232740402222, "epoch": 0.9137055837563451, "eta/annotator_0": 0.9573294520378113, "grad_norm": 39.68954337035115, "learning_rate": 1.5512836775388742e-08, "loss": 0.5242, "rewards/accuracies": 0.828125, "rewards/chosen": -0.6572265625, "rewards/margins": 0.235595703125, "rewards/rejected": -0.8935546875, "step": 855 }, { "agreement_weights/mean": 0.9464970827102661, "agreement_weights/std": 0.1053171306848526, "epoch": 0.9147742452578146, "eta/annotator_0": 0.9573294520378113, "grad_norm": 38.14566647071668, "learning_rate": 1.5130241094308454e-08, "loss": 0.5914, "rewards/accuracies": 0.734375, "rewards/chosen": -0.68359375, "rewards/margins": 0.1767578125, "rewards/rejected": -0.8603515625, "step": 856 }, { "agreement_weights/mean": 0.9834225177764893, "agreement_weights/std": 0.02768118493258953, "epoch": 0.915842906759284, "eta/annotator_0": 0.9544572234153748, "grad_norm": 34.58991522939772, "learning_rate": 1.475231827762436e-08, "loss": 0.4742, "rewards/accuracies": 0.890625, "rewards/chosen": -0.6064453125, "rewards/margins": 0.2578125, "rewards/rejected": -0.86328125, "step": 857 }, { "agreement_weights/mean": 0.9663619995117188, "agreement_weights/std": 0.08639577031135559, "epoch": 0.9169115682607534, "eta/annotator_0": 0.9534998536109924, "grad_norm": 35.39922897052788, "learning_rate": 1.4379073598975067e-08, "loss": 0.4776, "rewards/accuracies": 0.859375, "rewards/chosen": -0.6435546875, "rewards/margins": 0.255615234375, "rewards/rejected": -0.8984375, "step": 858 }, { "agreement_weights/mean": 0.9538955688476562, "agreement_weights/std": 0.10505891591310501, "epoch": 0.9179802297622228, "eta/annotator_0": 0.9539728760719299, "grad_norm": 61.07271619897938, "learning_rate": 1.4010512266719044e-08, "loss": 0.6758, "rewards/accuracies": 0.796875, "rewards/chosen": -0.6787109375, "rewards/margins": 0.198974609375, "rewards/rejected": -0.8798828125, "step": 859 }, { "agreement_weights/mean": 0.9838215708732605, "agreement_weights/std": 0.03499199077486992, "epoch": 0.9190488912636923, "eta/annotator_0": 0.9553918242454529, "grad_norm": 66.98096285649243, "learning_rate": 1.3646639423862183e-08, "loss": 0.5036, "rewards/accuracies": 0.890625, "rewards/chosen": -0.6376953125, "rewards/margins": 0.302978515625, "rewards/rejected": -0.939453125, "step": 860 }, { "agreement_weights/mean": 0.9627326726913452, "agreement_weights/std": 0.06933489441871643, "epoch": 0.9201175527651616, "eta/annotator_0": 0.9553918242454529, "grad_norm": 49.51306396611858, "learning_rate": 1.328746014798584e-08, "loss": 0.5831, "rewards/accuracies": 0.78125, "rewards/chosen": -0.60546875, "rewards/margins": 0.19287109375, "rewards/rejected": -0.7978515625, "step": 861 }, { "agreement_weights/mean": 0.9716871380805969, "agreement_weights/std": 0.06006474047899246, "epoch": 0.921186214266631, "eta/annotator_0": 0.9568341374397278, "grad_norm": 44.094641647422335, "learning_rate": 1.2932979451176074e-08, "loss": 0.5931, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6494140625, "rewards/margins": 0.169921875, "rewards/rejected": -0.8193359375, "step": 862 }, { "agreement_weights/mean": 0.9764956831932068, "agreement_weights/std": 0.046139925718307495, "epoch": 0.9222548757681005, "eta/annotator_0": 0.9573149085044861, "grad_norm": 39.22493641365572, "learning_rate": 1.2583202279953775e-08, "loss": 0.546, "rewards/accuracies": 0.78125, "rewards/chosen": -0.59375, "rewards/margins": 0.2149658203125, "rewards/rejected": -0.8076171875, "step": 863 }, { "agreement_weights/mean": 0.9553518891334534, "agreement_weights/std": 0.0806199237704277, "epoch": 0.9233235372695698, "eta/annotator_0": 0.9575512409210205, "grad_norm": 76.44929525749241, "learning_rate": 1.2238133515205273e-08, "loss": 0.8357, "rewards/accuracies": 0.703125, "rewards/chosen": -0.587890625, "rewards/margins": 0.1954345703125, "rewards/rejected": -0.78125, "step": 864 }, { "agreement_weights/mean": 0.9644626379013062, "agreement_weights/std": 0.061911582946777344, "epoch": 0.9243921987710393, "eta/annotator_0": 0.9582604169845581, "grad_norm": 40.53301397136782, "learning_rate": 1.1897777972114837e-08, "loss": 0.6493, "rewards/accuracies": 0.75, "rewards/chosen": -0.5771484375, "rewards/margins": 0.1829833984375, "rewards/rejected": -0.7607421875, "step": 865 }, { "agreement_weights/mean": 0.9243034720420837, "agreement_weights/std": 0.1314440220594406, "epoch": 0.9254608602725087, "eta/annotator_0": 0.9582604169845581, "grad_norm": 51.564621038531506, "learning_rate": 1.1562140400096982e-08, "loss": 0.7471, "rewards/accuracies": 0.671875, "rewards/chosen": -0.6611328125, "rewards/margins": 0.1304931640625, "rewards/rejected": -0.79296875, "step": 866 }, { "agreement_weights/mean": 0.9608522057533264, "agreement_weights/std": 0.09311814606189728, "epoch": 0.9265295217739781, "eta/annotator_0": 0.9539188146591187, "grad_norm": 80.36744224121917, "learning_rate": 1.12312254827303e-08, "loss": 0.6491, "rewards/accuracies": 0.796875, "rewards/chosen": -0.611328125, "rewards/margins": 0.22900390625, "rewards/rejected": -0.8408203125, "step": 867 }, { "agreement_weights/mean": 0.9780113697052002, "agreement_weights/std": 0.04321441054344177, "epoch": 0.9275981832754475, "eta/annotator_0": 0.9524716138839722, "grad_norm": 63.87163521681258, "learning_rate": 1.0905037837692332e-08, "loss": 0.5997, "rewards/accuracies": 0.859375, "rewards/chosen": -0.61083984375, "rewards/margins": 0.265869140625, "rewards/rejected": -0.8759765625, "step": 868 }, { "agreement_weights/mean": 0.9467607140541077, "agreement_weights/std": 0.12971453368663788, "epoch": 0.928666844776917, "eta/annotator_0": 0.953275203704834, "grad_norm": 51.362792668619875, "learning_rate": 1.0583582016694793e-08, "loss": 0.5923, "rewards/accuracies": 0.78125, "rewards/chosen": -0.6748046875, "rewards/margins": 0.161865234375, "rewards/rejected": -0.8369140625, "step": 869 }, { "agreement_weights/mean": 0.9818884134292603, "agreement_weights/std": 0.037826068699359894, "epoch": 0.9297355062783863, "eta/annotator_0": 0.955686092376709, "grad_norm": 33.00243130968107, "learning_rate": 1.0266862505420382e-08, "loss": 0.4507, "rewards/accuracies": 0.859375, "rewards/chosen": -0.611328125, "rewards/margins": 0.30029296875, "rewards/rejected": -0.912109375, "step": 870 }, { "agreement_weights/mean": 0.9558581113815308, "agreement_weights/std": 0.10931459814310074, "epoch": 0.9308041677798558, "eta/annotator_0": 0.955686092376709, "grad_norm": 64.46341748768668, "learning_rate": 9.95488372346e-09, "loss": 0.6793, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6318359375, "rewards/margins": 0.199951171875, "rewards/rejected": -0.83203125, "step": 871 }, { "agreement_weights/mean": 0.9760449528694153, "agreement_weights/std": 0.049609407782554626, "epoch": 0.9318728292813251, "eta/annotator_0": 0.9542019367218018, "grad_norm": 43.56118375674164, "learning_rate": 9.647650024250958e-09, "loss": 0.4892, "rewards/accuracies": 0.859375, "rewards/chosen": -0.615234375, "rewards/margins": 0.225341796875, "rewards/rejected": -0.8408203125, "step": 872 }, { "agreement_weights/mean": 0.9508323669433594, "agreement_weights/std": 0.11794479936361313, "epoch": 0.9329414907827945, "eta/annotator_0": 0.9537072777748108, "grad_norm": 47.580414279241865, "learning_rate": 9.345165695016555e-09, "loss": 0.5978, "rewards/accuracies": 0.734375, "rewards/chosen": -0.65625, "rewards/margins": 0.1962890625, "rewards/rejected": -0.8525390625, "step": 873 }, { "agreement_weights/mean": 0.9735761880874634, "agreement_weights/std": 0.047664936631917953, "epoch": 0.934010152284264, "eta/annotator_0": 0.9526494741439819, "grad_norm": 45.090132198700566, "learning_rate": 9.04743495670608e-09, "loss": 0.5517, "rewards/accuracies": 0.859375, "rewards/chosen": -0.6357421875, "rewards/margins": 0.189453125, "rewards/rejected": -0.8251953125, "step": 874 }, { "agreement_weights/mean": 0.9776684045791626, "agreement_weights/std": 0.029772112146019936, "epoch": 0.9350788137857333, "eta/annotator_0": 0.9494760036468506, "grad_norm": 50.7481007615497, "learning_rate": 8.754461963935789e-09, "loss": 0.5962, "rewards/accuracies": 0.796875, "rewards/chosen": -0.6376953125, "rewards/margins": 0.16357421875, "rewards/rejected": -0.8017578125, "step": 875 }, { "agreement_weights/mean": 0.9551459550857544, "agreement_weights/std": 0.08972693979740143, "epoch": 0.9361474752872028, "eta/annotator_0": 0.9494760036468506, "grad_norm": 42.329428864793485, "learning_rate": 8.466250804931163e-09, "loss": 0.6609, "rewards/accuracies": 0.71875, "rewards/chosen": -0.6884765625, "rewards/margins": 0.1334228515625, "rewards/rejected": -0.822265625, "step": 876 }, { "agreement_weights/mean": 0.9668631553649902, "agreement_weights/std": 0.07218743115663528, "epoch": 0.9372161367886722, "eta/annotator_0": 0.9519323110580444, "grad_norm": 73.80055517667682, "learning_rate": 8.182805501469669e-09, "loss": 0.5012, "rewards/accuracies": 0.859375, "rewards/chosen": -0.681640625, "rewards/margins": 0.2392578125, "rewards/rejected": -0.9208984375, "step": 877 }, { "agreement_weights/mean": 0.9743772745132446, "agreement_weights/std": 0.06478223204612732, "epoch": 0.9382847982901416, "eta/annotator_0": 0.9527510404586792, "grad_norm": 42.561634326133564, "learning_rate": 7.904130008824766e-09, "loss": 0.446, "rewards/accuracies": 0.90625, "rewards/chosen": -0.576171875, "rewards/margins": 0.28466796875, "rewards/rejected": -0.8603515625, "step": 878 }, { "agreement_weights/mean": 0.9644467234611511, "agreement_weights/std": 0.06652852892875671, "epoch": 0.939353459791611, "eta/annotator_0": 0.9532811641693115, "grad_norm": 39.670739023074326, "learning_rate": 7.630228215710687e-09, "loss": 0.5521, "rewards/accuracies": 0.796875, "rewards/chosen": -0.6005859375, "rewards/margins": 0.21728515625, "rewards/rejected": -0.8173828125, "step": 879 }, { "agreement_weights/mean": 0.9706812500953674, "agreement_weights/std": 0.06266354769468307, "epoch": 0.9404221212930804, "eta/annotator_0": 0.9548715353012085, "grad_norm": 44.45640149511609, "learning_rate": 7.361103944228081e-09, "loss": 0.5547, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5966796875, "rewards/margins": 0.19970703125, "rewards/rejected": -0.796875, "step": 880 }, { "agreement_weights/mean": 0.9561400413513184, "agreement_weights/std": 0.09768036752939224, "epoch": 0.9414907827945498, "eta/annotator_0": 0.9548715353012085, "grad_norm": 37.2976655274895, "learning_rate": 7.096760949810848e-09, "loss": 0.6234, "rewards/accuracies": 0.765625, "rewards/chosen": -0.6630859375, "rewards/margins": 0.165771484375, "rewards/rejected": -0.830078125, "step": 881 }, { "agreement_weights/mean": 0.978463351726532, "agreement_weights/std": 0.03353984281420708, "epoch": 0.9425594442960192, "eta/annotator_0": 0.9550772905349731, "grad_norm": 39.34515842527289, "learning_rate": 6.837202921173535e-09, "loss": 0.5042, "rewards/accuracies": 0.828125, "rewards/chosen": -0.61328125, "rewards/margins": 0.2369384765625, "rewards/rejected": -0.8515625, "step": 882 }, { "agreement_weights/mean": 0.9690979719161987, "agreement_weights/std": 0.04581721872091293, "epoch": 0.9436281057974887, "eta/annotator_0": 0.9551458954811096, "grad_norm": 40.999572903574055, "learning_rate": 6.582433480259997e-09, "loss": 0.6488, "rewards/accuracies": 0.765625, "rewards/chosen": -0.716796875, "rewards/margins": 0.1793212890625, "rewards/rejected": -0.896484375, "step": 883 }, { "agreement_weights/mean": 0.9614397287368774, "agreement_weights/std": 0.09884421527385712, "epoch": 0.944696767298958, "eta/annotator_0": 0.9543485641479492, "grad_norm": 76.38821400244974, "learning_rate": 6.332456182192886e-09, "loss": 0.5683, "rewards/accuracies": 0.875, "rewards/chosen": -0.6552734375, "rewards/margins": 0.24169921875, "rewards/rejected": -0.8974609375, "step": 884 }, { "agreement_weights/mean": 0.965262770652771, "agreement_weights/std": 0.0771108940243721, "epoch": 0.9457654288004275, "eta/annotator_0": 0.9519564509391785, "grad_norm": 36.915039333652665, "learning_rate": 6.08727451522395e-09, "loss": 0.5706, "rewards/accuracies": 0.75, "rewards/chosen": -0.677734375, "rewards/margins": 0.2177734375, "rewards/rejected": -0.8955078125, "step": 885 }, { "agreement_weights/mean": 0.9518240094184875, "agreement_weights/std": 0.10404343903064728, "epoch": 0.9468340903018969, "eta/annotator_0": 0.9519564509391785, "grad_norm": 41.48429323380413, "learning_rate": 5.846891900685308e-09, "loss": 0.6084, "rewards/accuracies": 0.828125, "rewards/chosen": -0.6044921875, "rewards/margins": 0.165283203125, "rewards/rejected": -0.7705078125, "step": 886 }, { "agreement_weights/mean": 0.9704297780990601, "agreement_weights/std": 0.07727585732936859, "epoch": 0.9479027518033663, "eta/annotator_0": 0.9540026187896729, "grad_norm": 47.00112933730438, "learning_rate": 5.6113116929418836e-09, "loss": 0.5219, "rewards/accuracies": 0.828125, "rewards/chosen": -0.56982421875, "rewards/margins": 0.2626953125, "rewards/rejected": -0.8310546875, "step": 887 }, { "agreement_weights/mean": 0.95713871717453, "agreement_weights/std": 0.06981819123029709, "epoch": 0.9489714133048357, "eta/annotator_0": 0.9546846151351929, "grad_norm": 67.0967028620549, "learning_rate": 5.38053717934443e-09, "loss": 0.6276, "rewards/accuracies": 0.765625, "rewards/chosen": -0.5986328125, "rewards/margins": 0.1947021484375, "rewards/rejected": -0.7939453125, "step": 888 }, { "agreement_weights/mean": 0.9672361612319946, "agreement_weights/std": 0.08054178953170776, "epoch": 0.9500400748063051, "eta/annotator_0": 0.9535588622093201, "grad_norm": 44.94520383950516, "learning_rate": 5.154571580183792e-09, "loss": 0.5482, "rewards/accuracies": 0.84375, "rewards/chosen": -0.6103515625, "rewards/margins": 0.192626953125, "rewards/rejected": -0.8017578125, "step": 889 }, { "agreement_weights/mean": 0.9701375961303711, "agreement_weights/std": 0.07038462907075882, "epoch": 0.9511087363077745, "eta/annotator_0": 0.9501817226409912, "grad_norm": 32.18272218664583, "learning_rate": 4.933418048645871e-09, "loss": 0.5278, "rewards/accuracies": 0.859375, "rewards/chosen": -0.650390625, "rewards/margins": 0.213134765625, "rewards/rejected": -0.86328125, "step": 890 }, { "agreement_weights/mean": 0.9789856672286987, "agreement_weights/std": 0.03405822068452835, "epoch": 0.9521773978092439, "eta/annotator_0": 0.9501817226409912, "grad_norm": 36.61369752691562, "learning_rate": 4.717079670767637e-09, "loss": 0.5516, "rewards/accuracies": 0.84375, "rewards/chosen": -0.5673828125, "rewards/margins": 0.2060546875, "rewards/rejected": -0.7734375, "step": 891 }, { "agreement_weights/mean": 0.9748294949531555, "agreement_weights/std": 0.0581703819334507, "epoch": 0.9532460593107134, "eta/annotator_0": 0.9487627744674683, "grad_norm": 48.44356258047221, "learning_rate": 4.505559465394154e-09, "loss": 0.5246, "rewards/accuracies": 0.875, "rewards/chosen": -0.6591796875, "rewards/margins": 0.222900390625, "rewards/rejected": -0.880859375, "step": 892 }, { "agreement_weights/mean": 0.9799805879592896, "agreement_weights/std": 0.03840738162398338, "epoch": 0.9543147208121827, "eta/annotator_0": 0.9482897520065308, "grad_norm": 40.42775412224534, "learning_rate": 4.298860384136299e-09, "loss": 0.5502, "rewards/accuracies": 0.859375, "rewards/chosen": -0.611328125, "rewards/margins": 0.22216796875, "rewards/rejected": -0.833984375, "step": 893 }, { "agreement_weights/mean": 0.9736531376838684, "agreement_weights/std": 0.05690830200910568, "epoch": 0.9553833823136522, "eta/annotator_0": 0.9491224884986877, "grad_norm": 32.582725656798345, "learning_rate": 4.096985311329731e-09, "loss": 0.527, "rewards/accuracies": 0.828125, "rewards/chosen": -0.66552734375, "rewards/margins": 0.2080078125, "rewards/rejected": -0.875, "step": 894 }, { "agreement_weights/mean": 0.9709547162055969, "agreement_weights/std": 0.06389644742012024, "epoch": 0.9564520438151216, "eta/annotator_0": 0.951620876789093, "grad_norm": 48.35715524616882, "learning_rate": 3.89993706399448e-09, "loss": 0.5281, "rewards/accuracies": 0.859375, "rewards/chosen": -0.576171875, "rewards/margins": 0.223876953125, "rewards/rejected": -0.7998046875, "step": 895 }, { "agreement_weights/mean": 0.9847393035888672, "agreement_weights/std": 0.02897360920906067, "epoch": 0.957520705316591, "eta/annotator_0": 0.951620876789093, "grad_norm": 40.43652274306033, "learning_rate": 3.7077183917958887e-09, "loss": 0.5227, "rewards/accuracies": 0.84375, "rewards/chosen": -0.64453125, "rewards/margins": 0.281005859375, "rewards/rejected": -0.92578125, "step": 896 }, { "agreement_weights/mean": 0.9735584259033203, "agreement_weights/std": 0.045924313366413116, "epoch": 0.9585893668180604, "eta/annotator_0": 0.953108012676239, "grad_norm": 57.9251261892434, "learning_rate": 3.5203319770059583e-09, "loss": 0.6479, "rewards/accuracies": 0.734375, "rewards/chosen": -0.73046875, "rewards/margins": 0.2164306640625, "rewards/rejected": -0.9482421875, "step": 897 }, { "agreement_weights/mean": 0.957804799079895, "agreement_weights/std": 0.08477681875228882, "epoch": 0.9596580283195298, "eta/annotator_0": 0.9536036849021912, "grad_norm": 54.02213683011034, "learning_rate": 3.3377804344660367e-09, "loss": 0.5612, "rewards/accuracies": 0.828125, "rewards/chosen": -0.65234375, "rewards/margins": 0.224853515625, "rewards/rejected": -0.8759765625, "step": 898 }, { "agreement_weights/mean": 0.94504314661026, "agreement_weights/std": 0.11697618663311005, "epoch": 0.9607266898209992, "eta/annotator_0": 0.9540001153945923, "grad_norm": 70.97227663100927, "learning_rate": 3.160066311550452e-09, "loss": 0.7078, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6416015625, "rewards/margins": 0.205078125, "rewards/rejected": -0.845703125, "step": 899 }, { "agreement_weights/mean": 0.9681756496429443, "agreement_weights/std": 0.07756836712360382, "epoch": 0.9617953513224686, "eta/annotator_0": 0.9551894068717957, "grad_norm": 38.27292324845378, "learning_rate": 2.9871920881306855e-09, "loss": 0.5315, "rewards/accuracies": 0.796875, "rewards/chosen": -0.6474609375, "rewards/margins": 0.205810546875, "rewards/rejected": -0.853515625, "step": 900 }, { "epoch": 0.9617953513224686, "eta/annotator_0": 0.9569782614707947, "eval_agreement_weights/mean": 0.9680588245391846, "eval_agreement_weights/std": 0.06534840166568756, "eval_loss": 0.565337061882019, "eval_rewards/accuracies": 0.8014340400695801, "eval_rewards/chosen": -0.6247300505638123, "eval_rewards/margins": 0.21321415901184082, "eval_rewards/rejected": -0.8380335569381714, "eval_runtime": 135.0786, "eval_samples_per_second": 14.517, "eval_steps_per_second": 0.911, "step": 900 }, { "agreement_weights/mean": 0.9677245616912842, "agreement_weights/std": 0.06244734674692154, "epoch": 0.962864012823938, "eta/annotator_0": 0.9583417177200317, "grad_norm": 37.03766414210447, "learning_rate": 2.8191601765411744e-09, "loss": 0.6556, "rewards/accuracies": 0.796875, "rewards/chosen": -0.6708984375, "rewards/margins": 0.1441650390625, "rewards/rejected": -0.81640625, "step": 901 }, { "agreement_weights/mean": 0.9803606271743774, "agreement_weights/std": 0.029250483959913254, "epoch": 0.9639326743254074, "eta/annotator_0": 0.9580344557762146, "grad_norm": 36.362117285565034, "learning_rate": 2.655972921545119e-09, "loss": 0.4753, "rewards/accuracies": 0.78125, "rewards/chosen": -0.5654296875, "rewards/margins": 0.244873046875, "rewards/rejected": -0.8095703125, "step": 902 }, { "agreement_weights/mean": 0.9771341681480408, "agreement_weights/std": 0.027443068102002144, "epoch": 0.9650013358268769, "eta/annotator_0": 0.9580344557762146, "grad_norm": 55.614844080363206, "learning_rate": 2.4976326003022685e-09, "loss": 0.635, "rewards/accuracies": 0.78125, "rewards/chosen": -0.67578125, "rewards/margins": 0.152099609375, "rewards/rejected": -0.8271484375, "step": 903 }, { "agreement_weights/mean": 0.9850688576698303, "agreement_weights/std": 0.020540695637464523, "epoch": 0.9660699973283462, "eta/annotator_0": 0.9602892994880676, "grad_norm": 45.35908807700043, "learning_rate": 2.3441414223369038e-09, "loss": 0.5153, "rewards/accuracies": 0.875, "rewards/chosen": -0.6171875, "rewards/margins": 0.240234375, "rewards/rejected": -0.857421875, "step": 904 }, { "agreement_weights/mean": 0.9546188116073608, "agreement_weights/std": 0.1003277450799942, "epoch": 0.9671386588298156, "eta/annotator_0": 0.9602892994880676, "grad_norm": 42.69472752438907, "learning_rate": 2.1955015295069045e-09, "loss": 0.6416, "rewards/accuracies": 0.75, "rewards/chosen": -0.724609375, "rewards/margins": 0.14599609375, "rewards/rejected": -0.8701171875, "step": 905 }, { "agreement_weights/mean": 0.9638081192970276, "agreement_weights/std": 0.08073923736810684, "epoch": 0.9682073203312851, "eta/annotator_0": 0.9587496519088745, "grad_norm": 50.01617485289226, "learning_rate": 2.051714995974141e-09, "loss": 0.6408, "rewards/accuracies": 0.734375, "rewards/chosen": -0.6318359375, "rewards/margins": 0.1923828125, "rewards/rejected": -0.8251953125, "step": 906 }, { "agreement_weights/mean": 0.9820942282676697, "agreement_weights/std": 0.028384681791067123, "epoch": 0.9692759818327544, "eta/annotator_0": 0.9572100639343262, "grad_norm": 41.4544292672899, "learning_rate": 1.9127838281752904e-09, "loss": 0.5249, "rewards/accuracies": 0.84375, "rewards/chosen": -0.576171875, "rewards/margins": 0.253173828125, "rewards/rejected": -0.8291015625, "step": 907 }, { "agreement_weights/mean": 0.9616868495941162, "agreement_weights/std": 0.06723620742559433, "epoch": 0.9703446433342239, "eta/annotator_0": 0.9572100639343262, "grad_norm": 44.38426075260318, "learning_rate": 1.7787099647939397e-09, "loss": 0.6669, "rewards/accuracies": 0.734375, "rewards/chosen": -0.689453125, "rewards/margins": 0.1630859375, "rewards/rejected": -0.8525390625, "step": 908 }, { "agreement_weights/mean": 0.9481978416442871, "agreement_weights/std": 0.11971831321716309, "epoch": 0.9714133048356933, "eta/annotator_0": 0.9590915441513062, "grad_norm": 70.34756490932548, "learning_rate": 1.6494952767334979e-09, "loss": 0.6245, "rewards/accuracies": 0.734375, "rewards/chosen": -0.6015625, "rewards/margins": 0.2021484375, "rewards/rejected": -0.8037109375, "step": 909 }, { "agreement_weights/mean": 0.9530167579650879, "agreement_weights/std": 0.13494493067264557, "epoch": 0.9724819663371627, "eta/annotator_0": 0.9590915441513062, "grad_norm": 58.582906645145194, "learning_rate": 1.5251415670912427e-09, "loss": 0.6168, "rewards/accuracies": 0.796875, "rewards/chosen": -0.54150390625, "rewards/margins": 0.247314453125, "rewards/rejected": -0.7880859375, "step": 910 }, { "agreement_weights/mean": 0.9778558015823364, "agreement_weights/std": 0.04256213828921318, "epoch": 0.9735506278386321, "eta/annotator_0": 0.9574874043464661, "grad_norm": 45.94755881664879, "learning_rate": 1.4056505711327881e-09, "loss": 0.5173, "rewards/accuracies": 0.84375, "rewards/chosen": -0.6904296875, "rewards/margins": 0.21728515625, "rewards/rejected": -0.9072265625, "step": 911 }, { "agreement_weights/mean": 0.9705728888511658, "agreement_weights/std": 0.06000823900103569, "epoch": 0.9746192893401016, "eta/annotator_0": 0.955883264541626, "grad_norm": 97.92920548048394, "learning_rate": 1.2910239562683056e-09, "loss": 0.6322, "rewards/accuracies": 0.84375, "rewards/chosen": -0.564453125, "rewards/margins": 0.241455078125, "rewards/rejected": -0.806640625, "step": 912 }, { "agreement_weights/mean": 0.9712738990783691, "agreement_weights/std": 0.058383576571941376, "epoch": 0.9756879508415709, "eta/annotator_0": 0.955883264541626, "grad_norm": 33.3923773659715, "learning_rate": 1.1812633220290137e-09, "loss": 0.5073, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6025390625, "rewards/margins": 0.2412109375, "rewards/rejected": -0.84375, "step": 913 }, { "agreement_weights/mean": 0.9744900465011597, "agreement_weights/std": 0.05676336959004402, "epoch": 0.9767566123430403, "eta/annotator_0": 0.9578477740287781, "grad_norm": 48.54390460462348, "learning_rate": 1.076370200044835e-09, "loss": 0.471, "rewards/accuracies": 0.875, "rewards/chosen": -0.6650390625, "rewards/margins": 0.223388671875, "rewards/rejected": -0.8876953125, "step": 914 }, { "agreement_weights/mean": 0.9716757535934448, "agreement_weights/std": 0.054409634321928024, "epoch": 0.9778252738445098, "eta/annotator_0": 0.9578477740287781, "grad_norm": 63.03472392514688, "learning_rate": 9.763460540232194e-10, "loss": 0.5912, "rewards/accuracies": 0.78125, "rewards/chosen": -0.6240234375, "rewards/margins": 0.215087890625, "rewards/rejected": -0.8388671875, "step": 915 }, { "agreement_weights/mean": 0.9694978594779968, "agreement_weights/std": 0.05946382135152817, "epoch": 0.9788939353459791, "eta/annotator_0": 0.957904577255249, "grad_norm": 38.232687701582506, "learning_rate": 8.811922797284321e-10, "loss": 0.5399, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6162109375, "rewards/margins": 0.199951171875, "rewards/rejected": -0.8154296875, "step": 916 }, { "agreement_weights/mean": 0.9534528851509094, "agreement_weights/std": 0.10704923421144485, "epoch": 0.9799625968474486, "eta/annotator_0": 0.95796138048172, "grad_norm": 60.27224391606386, "learning_rate": 7.909102049623584e-10, "loss": 0.6179, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6611328125, "rewards/margins": 0.2142333984375, "rewards/rejected": -0.876953125, "step": 917 }, { "agreement_weights/mean": 0.9810919761657715, "agreement_weights/std": 0.03446976840496063, "epoch": 0.981031258348918, "eta/annotator_0": 0.95796138048172, "grad_norm": 33.33194657831, "learning_rate": 7.055010895457736e-10, "loss": 0.5201, "rewards/accuracies": 0.859375, "rewards/chosen": -0.6083984375, "rewards/margins": 0.22119140625, "rewards/rejected": -0.830078125, "step": 918 }, { "agreement_weights/mean": 0.9712321758270264, "agreement_weights/std": 0.06749338656663895, "epoch": 0.9820999198503874, "eta/annotator_0": 0.9610904455184937, "grad_norm": 46.3305156914332, "learning_rate": 6.249661253009742e-10, "loss": 0.5559, "rewards/accuracies": 0.875, "rewards/chosen": -0.62890625, "rewards/margins": 0.17578125, "rewards/rejected": -0.8046875, "step": 919 }, { "agreement_weights/mean": 0.9594389796257019, "agreement_weights/std": 0.11713945865631104, "epoch": 0.9831685813518568, "eta/annotator_0": 0.9610904455184937, "grad_norm": 84.72769903465299, "learning_rate": 5.493064360348354e-10, "loss": 0.6959, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6826171875, "rewards/margins": 0.275146484375, "rewards/rejected": -0.9560546875, "step": 920 }, { "agreement_weights/mean": 0.974616527557373, "agreement_weights/std": 0.06509727984666824, "epoch": 0.9842372428533263, "eta/annotator_0": 0.9599018692970276, "grad_norm": 36.17148257896495, "learning_rate": 4.785230775234239e-10, "loss": 0.4339, "rewards/accuracies": 0.875, "rewards/chosen": -0.580078125, "rewards/margins": 0.248291015625, "rewards/rejected": -0.8291015625, "step": 921 }, { "agreement_weights/mean": 0.9576011896133423, "agreement_weights/std": 0.10153590887784958, "epoch": 0.9853059043547956, "eta/annotator_0": 0.9587133526802063, "grad_norm": 62.66902876728884, "learning_rate": 4.1261703749715383e-10, "loss": 0.5945, "rewards/accuracies": 0.796875, "rewards/chosen": -0.65234375, "rewards/margins": 0.229248046875, "rewards/rejected": -0.8818359375, "step": 922 }, { "agreement_weights/mean": 0.9623050689697266, "agreement_weights/std": 0.07108275592327118, "epoch": 0.986374565856265, "eta/annotator_0": 0.9587133526802063, "grad_norm": 43.87316464960468, "learning_rate": 3.515892356269146e-10, "loss": 0.6978, "rewards/accuracies": 0.796875, "rewards/chosen": -0.6044921875, "rewards/margins": 0.14312744140625, "rewards/rejected": -0.74609375, "step": 923 }, { "agreement_weights/mean": 0.9698142409324646, "agreement_weights/std": 0.06959850341081619, "epoch": 0.9874432273577345, "eta/annotator_0": 0.9601885676383972, "grad_norm": 43.991570858523716, "learning_rate": 2.954405235114421e-10, "loss": 0.5504, "rewards/accuracies": 0.796875, "rewards/chosen": -0.6591796875, "rewards/margins": 0.204833984375, "rewards/rejected": -0.8642578125, "step": 924 }, { "agreement_weights/mean": 0.971694827079773, "agreement_weights/std": 0.06103697419166565, "epoch": 0.9885118888592038, "eta/annotator_0": 0.9601885676383972, "grad_norm": 33.703769602259094, "learning_rate": 2.4417168466523417e-10, "loss": 0.5649, "rewards/accuracies": 0.828125, "rewards/chosen": -0.7001953125, "rewards/margins": 0.192626953125, "rewards/rejected": -0.8935546875, "step": 925 }, { "agreement_weights/mean": 0.9636772274971008, "agreement_weights/std": 0.09160663932561874, "epoch": 0.9895805503606733, "eta/annotator_0": 0.9614529609680176, "grad_norm": 45.79100825782087, "learning_rate": 1.977834345076701e-10, "loss": 0.5385, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6396484375, "rewards/margins": 0.204345703125, "rewards/rejected": -0.8447265625, "step": 926 }, { "agreement_weights/mean": 0.9741774797439575, "agreement_weights/std": 0.04952405020594597, "epoch": 0.9906492118621427, "eta/annotator_0": 0.9627174139022827, "grad_norm": 44.857960176874755, "learning_rate": 1.5627642035317968e-10, "loss": 0.5673, "rewards/accuracies": 0.78125, "rewards/chosen": -0.5458984375, "rewards/margins": 0.1978759765625, "rewards/rejected": -0.7431640625, "step": 927 }, { "agreement_weights/mean": 0.9686168432235718, "agreement_weights/std": 0.054629869759082794, "epoch": 0.9917178733636121, "eta/annotator_0": 0.9627174139022827, "grad_norm": 62.060849137499005, "learning_rate": 1.1965122140195627e-10, "loss": 0.6723, "rewards/accuracies": 0.734375, "rewards/chosen": -0.6962890625, "rewards/margins": 0.180419921875, "rewards/rejected": -0.875, "step": 928 }, { "agreement_weights/mean": 0.9770854115486145, "agreement_weights/std": 0.06285350769758224, "epoch": 0.9927865348650815, "eta/annotator_0": 0.9646680355072021, "grad_norm": 36.243071213665054, "learning_rate": 8.790834873210751e-11, "loss": 0.4256, "rewards/accuracies": 0.875, "rewards/chosen": -0.64453125, "rewards/margins": 0.24365234375, "rewards/rejected": -0.888671875, "step": 929 }, { "agreement_weights/mean": 0.9757649898529053, "agreement_weights/std": 0.05200465768575668, "epoch": 0.9938551963665508, "eta/annotator_0": 0.9646680355072021, "grad_norm": 53.40789807607805, "learning_rate": 6.104824529238883e-11, "loss": 0.5013, "rewards/accuracies": 0.828125, "rewards/chosen": -0.6494140625, "rewards/margins": 0.2236328125, "rewards/rejected": -0.8740234375, "step": 930 }, { "agreement_weights/mean": 0.9787781238555908, "agreement_weights/std": 0.03375673666596413, "epoch": 0.9949238578680203, "eta/annotator_0": 0.9656537771224976, "grad_norm": 32.60471291252973, "learning_rate": 3.9071285895986296e-11, "loss": 0.553, "rewards/accuracies": 0.8125, "rewards/chosen": -0.62646484375, "rewards/margins": 0.223876953125, "rewards/rejected": -0.8505859375, "step": 931 }, { "agreement_weights/mean": 0.9572635293006897, "agreement_weights/std": 0.08592593669891357, "epoch": 0.9959925193694897, "eta/annotator_0": 0.966639518737793, "grad_norm": 30.669109252243594, "learning_rate": 2.197777721546501e-11, "loss": 0.4737, "rewards/accuracies": 0.859375, "rewards/chosen": -0.60546875, "rewards/margins": 0.255859375, "rewards/rejected": -0.8603515625, "step": 932 }, { "agreement_weights/mean": 0.9843375086784363, "agreement_weights/std": 0.029552582651376724, "epoch": 0.9970611808709591, "eta/annotator_0": 0.966639518737793, "grad_norm": 47.18671437393699, "learning_rate": 9.76795777826167e-12, "loss": 0.5366, "rewards/accuracies": 0.890625, "rewards/chosen": -0.564453125, "rewards/margins": 0.263671875, "rewards/rejected": -0.8291015625, "step": 933 }, { "agreement_weights/mean": 0.9750593304634094, "agreement_weights/std": 0.05496371537446976, "epoch": 0.9981298423724285, "eta/annotator_0": 0.968090832233429, "grad_norm": 42.54286994167385, "learning_rate": 2.44199796365363e-12, "loss": 0.5365, "rewards/accuracies": 0.828125, "rewards/chosen": -0.56787109375, "rewards/margins": 0.26513671875, "rewards/rejected": -0.8330078125, "step": 934 }, { "agreement_weights/mean": 0.9599708318710327, "agreement_weights/std": 0.10262198746204376, "epoch": 0.999198503873898, "eta/annotator_0": 0.968090832233429, "grad_norm": 44.3566237790979, "learning_rate": 0.0, "loss": 0.5755, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6923828125, "rewards/margins": 0.224853515625, "rewards/rejected": -0.9169921875, "step": 935 }, { "epoch": 0.999198503873898, "step": 935, "total_flos": 0.0, "train_loss": 0.732532274946172, "train_runtime": 11043.6269, "train_samples_per_second": 5.422, "train_steps_per_second": 0.085 } ], "logging_steps": 1, "max_steps": 935, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }