|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9960291197882198, |
|
"eval_steps": 1000, |
|
"global_step": 754, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0026472534745201853, |
|
"grad_norm": 4.30932258831093, |
|
"learning_rate": 6.578947368421052e-09, |
|
"logits/chosen": -2.923454761505127, |
|
"logits/rejected": -3.022336483001709, |
|
"logps/chosen": -491.803955078125, |
|
"logps/rejected": -509.828369140625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.026472534745201854, |
|
"grad_norm": 4.3524814860184176, |
|
"learning_rate": 6.578947368421052e-08, |
|
"logits/chosen": -2.849837303161621, |
|
"logits/rejected": -2.918842315673828, |
|
"logps/chosen": -482.8021240234375, |
|
"logps/rejected": -468.6262512207031, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": 1.7793978258850984e-05, |
|
"rewards/margins": 0.00012061676534358412, |
|
"rewards/rejected": -0.00010282275616191328, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05294506949040371, |
|
"grad_norm": 4.058367941679181, |
|
"learning_rate": 1.3157894736842104e-07, |
|
"logits/chosen": -2.8850979804992676, |
|
"logits/rejected": -2.9686431884765625, |
|
"logps/chosen": -492.1334533691406, |
|
"logps/rejected": -473.5765075683594, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.00013926837709732354, |
|
"rewards/margins": 0.0005674505373463035, |
|
"rewards/rejected": -0.0004281821602489799, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07941760423560557, |
|
"grad_norm": 4.100822256989, |
|
"learning_rate": 1.9736842105263157e-07, |
|
"logits/chosen": -2.868659496307373, |
|
"logits/rejected": -2.9573891162872314, |
|
"logps/chosen": -475.2897033691406, |
|
"logps/rejected": -473.85064697265625, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": 0.003203944070264697, |
|
"rewards/margins": 0.006764715071767569, |
|
"rewards/rejected": -0.0035607716999948025, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.10589013898080742, |
|
"grad_norm": 4.224597004906648, |
|
"learning_rate": 2.631578947368421e-07, |
|
"logits/chosen": -2.880610704421997, |
|
"logits/rejected": -2.9531335830688477, |
|
"logps/chosen": -477.7626953125, |
|
"logps/rejected": -471.15203857421875, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.7906249761581421, |
|
"rewards/chosen": 0.0102998660877347, |
|
"rewards/margins": 0.02004994824528694, |
|
"rewards/rejected": -0.009750082157552242, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13236267372600927, |
|
"grad_norm": 4.481790796785453, |
|
"learning_rate": 3.2894736842105264e-07, |
|
"logits/chosen": -2.910224199295044, |
|
"logits/rejected": -2.966555118560791, |
|
"logps/chosen": -496.03704833984375, |
|
"logps/rejected": -483.69970703125, |
|
"loss": 0.6705, |
|
"rewards/accuracies": 0.8031250238418579, |
|
"rewards/chosen": 0.02056797966361046, |
|
"rewards/margins": 0.04747764393687248, |
|
"rewards/rejected": -0.026909660547971725, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15883520847121113, |
|
"grad_norm": 4.727420695183077, |
|
"learning_rate": 3.9473684210526315e-07, |
|
"logits/chosen": -2.908859968185425, |
|
"logits/rejected": -2.9846339225769043, |
|
"logps/chosen": -487.971923828125, |
|
"logps/rejected": -487.698486328125, |
|
"loss": 0.6321, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.047008998692035675, |
|
"rewards/margins": 0.12849071621894836, |
|
"rewards/rejected": -0.08148171752691269, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18530774321641297, |
|
"grad_norm": 5.3651750883138645, |
|
"learning_rate": 4.6052631578947365e-07, |
|
"logits/chosen": -2.9276702404022217, |
|
"logits/rejected": -2.958789348602295, |
|
"logps/chosen": -488.4877014160156, |
|
"logps/rejected": -489.53546142578125, |
|
"loss": 0.5937, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.025530118495225906, |
|
"rewards/margins": 0.22059020400047302, |
|
"rewards/rejected": -0.19506008923053741, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21178027796161483, |
|
"grad_norm": 3.6311630185736297, |
|
"learning_rate": 4.999570604073014e-07, |
|
"logits/chosen": -2.923982620239258, |
|
"logits/rejected": -2.9833145141601562, |
|
"logps/chosen": -503.56134033203125, |
|
"logps/rejected": -552.4190673828125, |
|
"loss": 0.4934, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -0.13993698358535767, |
|
"rewards/margins": 0.5944468975067139, |
|
"rewards/rejected": -0.7343839406967163, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23825281270681667, |
|
"grad_norm": 4.662865437270245, |
|
"learning_rate": 4.994741593733563e-07, |
|
"logits/chosen": -2.9195408821105957, |
|
"logits/rejected": -2.958824872970581, |
|
"logps/chosen": -534.3409423828125, |
|
"logps/rejected": -607.5826416015625, |
|
"loss": 0.4331, |
|
"rewards/accuracies": 0.871874988079071, |
|
"rewards/chosen": -0.41773176193237305, |
|
"rewards/margins": 0.9263063669204712, |
|
"rewards/rejected": -1.3440382480621338, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26472534745201853, |
|
"grad_norm": 4.763826147961603, |
|
"learning_rate": 4.984557228946769e-07, |
|
"logits/chosen": -2.815560817718506, |
|
"logits/rejected": -2.8625988960266113, |
|
"logps/chosen": -611.6197509765625, |
|
"logps/rejected": -758.5867309570312, |
|
"loss": 0.3704, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -1.2842621803283691, |
|
"rewards/margins": 1.4864912033081055, |
|
"rewards/rejected": -2.7707533836364746, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29119788219722037, |
|
"grad_norm": 4.297222720122574, |
|
"learning_rate": 4.969039372050355e-07, |
|
"logits/chosen": -2.787930488586426, |
|
"logits/rejected": -2.8286757469177246, |
|
"logps/chosen": -622.7388305664062, |
|
"logps/rejected": -840.2802734375, |
|
"loss": 0.3234, |
|
"rewards/accuracies": 0.8531249761581421, |
|
"rewards/chosen": -1.4110974073410034, |
|
"rewards/margins": 2.2375450134277344, |
|
"rewards/rejected": -3.648642063140869, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31767041694242226, |
|
"grad_norm": 3.7535819941587065, |
|
"learning_rate": 4.948221334560093e-07, |
|
"logits/chosen": -2.831071615219116, |
|
"logits/rejected": -2.843599796295166, |
|
"logps/chosen": -592.7720947265625, |
|
"logps/rejected": -807.1361083984375, |
|
"loss": 0.3176, |
|
"rewards/accuracies": 0.8531249761581421, |
|
"rewards/chosen": -1.023418664932251, |
|
"rewards/margins": 2.2339682579040527, |
|
"rewards/rejected": -3.2573866844177246, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3441429516876241, |
|
"grad_norm": 4.4946406733821185, |
|
"learning_rate": 4.922147805661402e-07, |
|
"logits/chosen": -2.83577823638916, |
|
"logits/rejected": -2.875858783721924, |
|
"logps/chosen": -629.0977783203125, |
|
"logps/rejected": -913.8206787109375, |
|
"loss": 0.2875, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.3898423910140991, |
|
"rewards/margins": 2.828009605407715, |
|
"rewards/rejected": -4.2178521156311035, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.37061548643282594, |
|
"grad_norm": 4.541566634311875, |
|
"learning_rate": 4.890874756276999e-07, |
|
"logits/chosen": -2.7425622940063477, |
|
"logits/rejected": -2.8039002418518066, |
|
"logps/chosen": -634.9403686523438, |
|
"logps/rejected": -911.8308715820312, |
|
"loss": 0.2831, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5872424840927124, |
|
"rewards/margins": 2.6655924320220947, |
|
"rewards/rejected": -4.252835273742676, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3970880211780278, |
|
"grad_norm": 3.3580041177607978, |
|
"learning_rate": 4.854469318916532e-07, |
|
"logits/chosen": -2.8008108139038086, |
|
"logits/rejected": -2.8558998107910156, |
|
"logps/chosen": -646.1107177734375, |
|
"logps/rejected": -980.4015502929688, |
|
"loss": 0.2668, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/chosen": -1.5013834238052368, |
|
"rewards/margins": 3.364828109741211, |
|
"rewards/rejected": -4.866211414337158, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42356055592322966, |
|
"grad_norm": 4.675914141471813, |
|
"learning_rate": 4.8130096435661e-07, |
|
"logits/chosen": -2.80204176902771, |
|
"logits/rejected": -2.8504693508148193, |
|
"logps/chosen": -631.5078125, |
|
"logps/rejected": -925.1160888671875, |
|
"loss": 0.2519, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -1.4621139764785767, |
|
"rewards/margins": 2.9786953926086426, |
|
"rewards/rejected": -4.440809726715088, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4500330906684315, |
|
"grad_norm": 6.483714450702139, |
|
"learning_rate": 4.766584729927049e-07, |
|
"logits/chosen": -2.768064498901367, |
|
"logits/rejected": -2.8086233139038086, |
|
"logps/chosen": -676.531982421875, |
|
"logps/rejected": -1029.0057373046875, |
|
"loss": 0.2799, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.8374897241592407, |
|
"rewards/margins": 3.5798168182373047, |
|
"rewards/rejected": -5.417306900024414, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47650562541363334, |
|
"grad_norm": 5.396515481877322, |
|
"learning_rate": 4.7152942363641345e-07, |
|
"logits/chosen": -2.7385268211364746, |
|
"logits/rejected": -2.760967493057251, |
|
"logps/chosen": -642.5712890625, |
|
"logps/rejected": -997.8201904296875, |
|
"loss": 0.2558, |
|
"rewards/accuracies": 0.8968750238418579, |
|
"rewards/chosen": -1.627642273902893, |
|
"rewards/margins": 3.5839648246765137, |
|
"rewards/rejected": -5.211607456207275, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5029781601588352, |
|
"grad_norm": 5.261191188089043, |
|
"learning_rate": 4.6592482659732045e-07, |
|
"logits/chosen": -2.7847423553466797, |
|
"logits/rejected": -2.8355870246887207, |
|
"logps/chosen": -674.9547729492188, |
|
"logps/rejected": -972.1364135742188, |
|
"loss": 0.2536, |
|
"rewards/accuracies": 0.8968750238418579, |
|
"rewards/chosen": -1.8109451532363892, |
|
"rewards/margins": 3.033442258834839, |
|
"rewards/rejected": -4.844388008117676, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5294506949040371, |
|
"grad_norm": 4.9133890145951495, |
|
"learning_rate": 4.5985671302276166e-07, |
|
"logits/chosen": -2.7557764053344727, |
|
"logits/rejected": -2.8086917400360107, |
|
"logps/chosen": -681.2697143554688, |
|
"logps/rejected": -989.4937744140625, |
|
"loss": 0.2602, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -1.8896667957305908, |
|
"rewards/margins": 3.2132556438446045, |
|
"rewards/rejected": -5.102922439575195, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5559232296492389, |
|
"grad_norm": 4.892431641268176, |
|
"learning_rate": 4.533381090710776e-07, |
|
"logits/chosen": -2.7014384269714355, |
|
"logits/rejected": -2.7446448802948, |
|
"logps/chosen": -670.3836669921875, |
|
"logps/rejected": -988.2939453125, |
|
"loss": 0.2323, |
|
"rewards/accuracies": 0.909375011920929, |
|
"rewards/chosen": -1.8962217569351196, |
|
"rewards/margins": 3.261091947555542, |
|
"rewards/rejected": -5.157313823699951, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5823957643944407, |
|
"grad_norm": 4.5229078992046166, |
|
"learning_rate": 4.463830079489196e-07, |
|
"logits/chosen": -2.7127437591552734, |
|
"logits/rejected": -2.735060930252075, |
|
"logps/chosen": -697.1866455078125, |
|
"logps/rejected": -1054.2486572265625, |
|
"loss": 0.2292, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/chosen": -2.145174026489258, |
|
"rewards/margins": 3.7120985984802246, |
|
"rewards/rejected": -5.857272148132324, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6088682991396426, |
|
"grad_norm": 5.024605464390731, |
|
"learning_rate": 4.390063398726356e-07, |
|
"logits/chosen": -2.6947622299194336, |
|
"logits/rejected": -2.6960158348083496, |
|
"logps/chosen": -719.8890380859375, |
|
"logps/rejected": -1086.1702880859375, |
|
"loss": 0.2353, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": -2.32169771194458, |
|
"rewards/margins": 3.803462266921997, |
|
"rewards/rejected": -6.12515926361084, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6353408338848445, |
|
"grad_norm": 4.497134908678504, |
|
"learning_rate": 4.3122394001821657e-07, |
|
"logits/chosen": -2.699171781539917, |
|
"logits/rejected": -2.7273011207580566, |
|
"logps/chosen": -715.7581176757812, |
|
"logps/rejected": -1008.9625244140625, |
|
"loss": 0.2481, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.174252986907959, |
|
"rewards/margins": 3.0458567142486572, |
|
"rewards/rejected": -5.220109462738037, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6618133686300464, |
|
"grad_norm": 4.8761682045650625, |
|
"learning_rate": 4.2305251452860566e-07, |
|
"logits/chosen": -2.7068121433258057, |
|
"logits/rejected": -2.7410686016082764, |
|
"logps/chosen": -667.6207885742188, |
|
"logps/rejected": -1025.73828125, |
|
"loss": 0.2279, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.91938054561615, |
|
"rewards/margins": 3.5805881023406982, |
|
"rewards/rejected": -5.4999680519104, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6882859033752482, |
|
"grad_norm": 6.933116753661563, |
|
"learning_rate": 4.1450960465134024e-07, |
|
"logits/chosen": -2.7003915309906006, |
|
"logits/rejected": -2.7421202659606934, |
|
"logps/chosen": -725.2587890625, |
|
"logps/rejected": -1111.21533203125, |
|
"loss": 0.2322, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -2.4127094745635986, |
|
"rewards/margins": 3.868704319000244, |
|
"rewards/rejected": -6.28141450881958, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.71475843812045, |
|
"grad_norm": 5.180639131738146, |
|
"learning_rate": 4.0561354908350977e-07, |
|
"logits/chosen": -2.7455577850341797, |
|
"logits/rejected": -2.768336057662964, |
|
"logps/chosen": -686.0287475585938, |
|
"logps/rejected": -1041.7474365234375, |
|
"loss": 0.2406, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.0675699710845947, |
|
"rewards/margins": 3.5837669372558594, |
|
"rewards/rejected": -5.651337146759033, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7412309728656519, |
|
"grad_norm": 6.760828116437296, |
|
"learning_rate": 3.963834446048644e-07, |
|
"logits/chosen": -2.7979235649108887, |
|
"logits/rejected": -2.795571804046631, |
|
"logps/chosen": -757.2296142578125, |
|
"logps/rejected": -1096.9935302734375, |
|
"loss": 0.235, |
|
"rewards/accuracies": 0.878125011920929, |
|
"rewards/chosen": -2.414430618286133, |
|
"rewards/margins": 3.682656764984131, |
|
"rewards/rejected": -6.097087383270264, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7677035076108537, |
|
"grad_norm": 5.924062579551431, |
|
"learning_rate": 3.868391050835793e-07, |
|
"logits/chosen": -2.707353353500366, |
|
"logits/rejected": -2.708177089691162, |
|
"logps/chosen": -735.2392578125, |
|
"logps/rejected": -1096.3634033203125, |
|
"loss": 0.2165, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.4990901947021484, |
|
"rewards/margins": 3.737868547439575, |
|
"rewards/rejected": -6.2369585037231445, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.7941760423560555, |
|
"grad_norm": 6.316712123280126, |
|
"learning_rate": 3.770010189426761e-07, |
|
"logits/chosen": -2.696866512298584, |
|
"logits/rejected": -2.720412015914917, |
|
"logps/chosen": -788.0701904296875, |
|
"logps/rejected": -1174.900390625, |
|
"loss": 0.2278, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -2.9900288581848145, |
|
"rewards/margins": 3.7810866832733154, |
|
"rewards/rejected": -6.771115779876709, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8206485771012575, |
|
"grad_norm": 5.395016037418297, |
|
"learning_rate": 3.66890305178407e-07, |
|
"logits/chosen": -2.716305732727051, |
|
"logits/rejected": -2.716601848602295, |
|
"logps/chosen": -742.38232421875, |
|
"logps/rejected": -1122.4625244140625, |
|
"loss": 0.2287, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -2.601231098175049, |
|
"rewards/margins": 3.891951084136963, |
|
"rewards/rejected": -6.4931817054748535, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8471211118464593, |
|
"grad_norm": 6.246149943149367, |
|
"learning_rate": 3.565286680250138e-07, |
|
"logits/chosen": -2.748223066329956, |
|
"logits/rejected": -2.7793920040130615, |
|
"logps/chosen": -724.46484375, |
|
"logps/rejected": -1080.5015869140625, |
|
"loss": 0.2089, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/chosen": -2.3395774364471436, |
|
"rewards/margins": 3.757481813430786, |
|
"rewards/rejected": -6.097059726715088, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.8735936465916612, |
|
"grad_norm": 6.876926943284203, |
|
"learning_rate": 3.4593835036318225e-07, |
|
"logits/chosen": -2.691286563873291, |
|
"logits/rejected": -2.7156758308410645, |
|
"logps/chosen": -759.7913208007812, |
|
"logps/rejected": -1132.6741943359375, |
|
"loss": 0.2212, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": -2.764772415161133, |
|
"rewards/margins": 3.8325703144073486, |
|
"rewards/rejected": -6.597343444824219, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.900066181336863, |
|
"grad_norm": 7.266749393582284, |
|
"learning_rate": 3.35142085972207e-07, |
|
"logits/chosen": -2.6851227283477783, |
|
"logits/rejected": -2.722938060760498, |
|
"logps/chosen": -754.1365356445312, |
|
"logps/rejected": -1132.1871337890625, |
|
"loss": 0.218, |
|
"rewards/accuracies": 0.8843749761581421, |
|
"rewards/chosen": -2.7608418464660645, |
|
"rewards/margins": 3.8383007049560547, |
|
"rewards/rejected": -6.599142551422119, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9265387160820648, |
|
"grad_norm": 5.8828162546054426, |
|
"learning_rate": 3.2416305072836555e-07, |
|
"logits/chosen": -2.703392267227173, |
|
"logits/rejected": -2.717411756515503, |
|
"logps/chosen": -759.862060546875, |
|
"logps/rejected": -1137.0098876953125, |
|
"loss": 0.2033, |
|
"rewards/accuracies": 0.909375011920929, |
|
"rewards/chosen": -2.6198081970214844, |
|
"rewards/margins": 3.8972651958465576, |
|
"rewards/rejected": -6.517073154449463, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9530112508272667, |
|
"grad_norm": 6.02673405904732, |
|
"learning_rate": 3.1302481285426197e-07, |
|
"logits/chosen": -2.7140469551086426, |
|
"logits/rejected": -2.7299208641052246, |
|
"logps/chosen": -727.9823608398438, |
|
"logps/rejected": -1152.2041015625, |
|
"loss": 0.2146, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": -2.4466357231140137, |
|
"rewards/margins": 4.253617763519287, |
|
"rewards/rejected": -6.700253486633301, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.9794837855724685, |
|
"grad_norm": 5.891168724337083, |
|
"learning_rate": 3.017512823259373e-07, |
|
"logits/chosen": -2.7166543006896973, |
|
"logits/rejected": -2.7297706604003906, |
|
"logps/chosen": -730.9952392578125, |
|
"logps/rejected": -1117.7025146484375, |
|
"loss": 0.2011, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/chosen": -2.5781686305999756, |
|
"rewards/margins": 4.00323486328125, |
|
"rewards/rejected": -6.5814032554626465, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.0059563203176705, |
|
"grad_norm": 5.4096108526897115, |
|
"learning_rate": 2.9036665954635264e-07, |
|
"logits/chosen": -2.7353827953338623, |
|
"logits/rejected": -2.765237331390381, |
|
"logps/chosen": -769.5081176757812, |
|
"logps/rejected": -1209.970947265625, |
|
"loss": 0.2135, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/chosen": -2.7778820991516113, |
|
"rewards/margins": 4.453129291534424, |
|
"rewards/rejected": -7.231011390686035, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0324288550628722, |
|
"grad_norm": 6.969996057859779, |
|
"learning_rate": 2.7889538339542523e-07, |
|
"logits/chosen": -2.715958833694458, |
|
"logits/rejected": -2.741973638534546, |
|
"logps/chosen": -760.3748779296875, |
|
"logps/rejected": -1181.979248046875, |
|
"loss": 0.1997, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/chosen": -2.8078932762145996, |
|
"rewards/margins": 4.318324089050293, |
|
"rewards/rejected": -7.126217842102051, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.0589013898080741, |
|
"grad_norm": 6.27267698414733, |
|
"learning_rate": 2.6736207876813643e-07, |
|
"logits/chosen": -2.728332042694092, |
|
"logits/rejected": -2.7608368396759033, |
|
"logps/chosen": -768.7643432617188, |
|
"logps/rejected": -1208.543701171875, |
|
"loss": 0.1963, |
|
"rewards/accuracies": 0.909375011920929, |
|
"rewards/chosen": -2.7519562244415283, |
|
"rewards/margins": 4.398838996887207, |
|
"rewards/rejected": -7.150795936584473, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0853739245532759, |
|
"grad_norm": 6.331116230324871, |
|
"learning_rate": 2.5579150371332953e-07, |
|
"logits/chosen": -2.7396435737609863, |
|
"logits/rejected": -2.7421391010284424, |
|
"logps/chosen": -809.5894775390625, |
|
"logps/rejected": -1248.327880859375, |
|
"loss": 0.1821, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/chosen": -3.0432112216949463, |
|
"rewards/margins": 4.567535400390625, |
|
"rewards/rejected": -7.61074686050415, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.1118464592984778, |
|
"grad_norm": 6.507894575900424, |
|
"learning_rate": 2.4420849628667045e-07, |
|
"logits/chosen": -2.7221198081970215, |
|
"logits/rejected": -2.7356066703796387, |
|
"logps/chosen": -795.4509887695312, |
|
"logps/rejected": -1229.848876953125, |
|
"loss": 0.205, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/chosen": -3.027529716491699, |
|
"rewards/margins": 4.336198806762695, |
|
"rewards/rejected": -7.3637285232543945, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.1383189940436798, |
|
"grad_norm": 6.866126936883296, |
|
"learning_rate": 2.3263792123186352e-07, |
|
"logits/chosen": -2.7347493171691895, |
|
"logits/rejected": -2.7523412704467773, |
|
"logps/chosen": -788.9684448242188, |
|
"logps/rejected": -1187.0565185546875, |
|
"loss": 0.2132, |
|
"rewards/accuracies": 0.8843749761581421, |
|
"rewards/chosen": -2.9675726890563965, |
|
"rewards/margins": 4.024303913116455, |
|
"rewards/rejected": -6.99187707901001, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.1647915287888815, |
|
"grad_norm": 5.664097996574406, |
|
"learning_rate": 2.211046166045748e-07, |
|
"logits/chosen": -2.699467420578003, |
|
"logits/rejected": -2.715172529220581, |
|
"logps/chosen": -787.1725463867188, |
|
"logps/rejected": -1219.5947265625, |
|
"loss": 0.1992, |
|
"rewards/accuracies": 0.909375011920929, |
|
"rewards/chosen": -2.9378509521484375, |
|
"rewards/margins": 4.4215216636657715, |
|
"rewards/rejected": -7.359372138977051, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.1912640635340834, |
|
"grad_norm": 5.8438221376315, |
|
"learning_rate": 2.096333404536474e-07, |
|
"logits/chosen": -2.6697657108306885, |
|
"logits/rejected": -2.6788392066955566, |
|
"logps/chosen": -816.9085693359375, |
|
"logps/rejected": -1204.7374267578125, |
|
"loss": 0.1939, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.2424380779266357, |
|
"rewards/margins": 4.097726821899414, |
|
"rewards/rejected": -7.340165138244629, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.2177365982792852, |
|
"grad_norm": 6.534950899522826, |
|
"learning_rate": 1.982487176740627e-07, |
|
"logits/chosen": -2.6479620933532715, |
|
"logits/rejected": -2.686981201171875, |
|
"logps/chosen": -760.5189208984375, |
|
"logps/rejected": -1172.6590576171875, |
|
"loss": 0.2034, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/chosen": -2.8477563858032227, |
|
"rewards/margins": 4.02510929107666, |
|
"rewards/rejected": -6.872865200042725, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.244209133024487, |
|
"grad_norm": 5.919455336024619, |
|
"learning_rate": 1.8697518714573804e-07, |
|
"logits/chosen": -2.7183327674865723, |
|
"logits/rejected": -2.7170839309692383, |
|
"logps/chosen": -790.9767456054688, |
|
"logps/rejected": -1145.7747802734375, |
|
"loss": 0.1854, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.942429304122925, |
|
"rewards/margins": 3.9219982624053955, |
|
"rewards/rejected": -6.864427089691162, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.270681667769689, |
|
"grad_norm": 8.510303191565324, |
|
"learning_rate": 1.758369492716345e-07, |
|
"logits/chosen": -2.7107906341552734, |
|
"logits/rejected": -2.7333004474639893, |
|
"logps/chosen": -831.3541870117188, |
|
"logps/rejected": -1250.9085693359375, |
|
"loss": 0.204, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/chosen": -3.3519797325134277, |
|
"rewards/margins": 4.182629585266113, |
|
"rewards/rejected": -7.534609317779541, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.2971542025148908, |
|
"grad_norm": 6.158432969646956, |
|
"learning_rate": 1.648579140277931e-07, |
|
"logits/chosen": -2.7101292610168457, |
|
"logits/rejected": -2.715567111968994, |
|
"logps/chosen": -815.2239990234375, |
|
"logps/rejected": -1204.00830078125, |
|
"loss": 0.2016, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.184443712234497, |
|
"rewards/margins": 4.096536159515381, |
|
"rewards/rejected": -7.280980110168457, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.3236267372600927, |
|
"grad_norm": 6.688172890404941, |
|
"learning_rate": 1.5406164963681773e-07, |
|
"logits/chosen": -2.6899495124816895, |
|
"logits/rejected": -2.697331666946411, |
|
"logps/chosen": -788.6990356445312, |
|
"logps/rejected": -1184.682861328125, |
|
"loss": 0.2028, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -2.907625436782837, |
|
"rewards/margins": 4.130288124084473, |
|
"rewards/rejected": -7.037914276123047, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3500992720052944, |
|
"grad_norm": 6.177665969794554, |
|
"learning_rate": 1.4347133197498618e-07, |
|
"logits/chosen": -2.720623016357422, |
|
"logits/rejected": -2.713228702545166, |
|
"logps/chosen": -813.38427734375, |
|
"logps/rejected": -1158.912353515625, |
|
"loss": 0.1947, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -3.203073024749756, |
|
"rewards/margins": 3.7714266777038574, |
|
"rewards/rejected": -6.974499702453613, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.3765718067504964, |
|
"grad_norm": 7.6140239843164785, |
|
"learning_rate": 1.3310969482159296e-07, |
|
"logits/chosen": -2.7323203086853027, |
|
"logits/rejected": -2.720503330230713, |
|
"logps/chosen": -822.5255737304688, |
|
"logps/rejected": -1203.3837890625, |
|
"loss": 0.206, |
|
"rewards/accuracies": 0.8843749761581421, |
|
"rewards/chosen": -3.2711524963378906, |
|
"rewards/margins": 4.040999889373779, |
|
"rewards/rejected": -7.312152862548828, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.4030443414956983, |
|
"grad_norm": 7.395646356776199, |
|
"learning_rate": 1.2299898105732384e-07, |
|
"logits/chosen": -2.664844036102295, |
|
"logits/rejected": -2.6827051639556885, |
|
"logps/chosen": -797.8216552734375, |
|
"logps/rejected": -1175.84326171875, |
|
"loss": 0.1975, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -3.197530746459961, |
|
"rewards/margins": 3.875856876373291, |
|
"rewards/rejected": -7.07338809967041, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.4295168762409, |
|
"grad_norm": 5.37023800702416, |
|
"learning_rate": 1.1316089491642075e-07, |
|
"logits/chosen": -2.6801838874816895, |
|
"logits/rejected": -2.7150120735168457, |
|
"logps/chosen": -782.3211669921875, |
|
"logps/rejected": -1218.0201416015625, |
|
"loss": 0.177, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -3.0446789264678955, |
|
"rewards/margins": 4.468942642211914, |
|
"rewards/rejected": -7.5136213302612305, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.4559894109861018, |
|
"grad_norm": 6.848159521074984, |
|
"learning_rate": 1.0361655539513564e-07, |
|
"logits/chosen": -2.7019715309143066, |
|
"logits/rejected": -2.717634677886963, |
|
"logps/chosen": -816.9815063476562, |
|
"logps/rejected": -1239.1295166015625, |
|
"loss": 0.1866, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.2783799171447754, |
|
"rewards/margins": 4.237971305847168, |
|
"rewards/rejected": -7.516351222991943, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.4824619457313037, |
|
"grad_norm": 6.1903290336235965, |
|
"learning_rate": 9.438645091649028e-08, |
|
"logits/chosen": -2.703291416168213, |
|
"logits/rejected": -2.717991590499878, |
|
"logps/chosen": -835.4637451171875, |
|
"logps/rejected": -1262.481201171875, |
|
"loss": 0.1941, |
|
"rewards/accuracies": 0.909375011920929, |
|
"rewards/chosen": -3.5229759216308594, |
|
"rewards/margins": 4.230559349060059, |
|
"rewards/rejected": -7.753535270690918, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.5089344804765057, |
|
"grad_norm": 7.319396362423722, |
|
"learning_rate": 8.549039534865979e-08, |
|
"logits/chosen": -2.6856772899627686, |
|
"logits/rejected": -2.7186999320983887, |
|
"logps/chosen": -839.0086059570312, |
|
"logps/rejected": -1260.779541015625, |
|
"loss": 0.1939, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.4230124950408936, |
|
"rewards/margins": 4.303016185760498, |
|
"rewards/rejected": -7.7260284423828125, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.5354070152217076, |
|
"grad_norm": 6.830716936510353, |
|
"learning_rate": 7.694748547139429e-08, |
|
"logits/chosen": -2.690796136856079, |
|
"logits/rejected": -2.6854541301727295, |
|
"logps/chosen": -858.9547119140625, |
|
"logps/rejected": -1268.125732421875, |
|
"loss": 0.1822, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.606029510498047, |
|
"rewards/margins": 4.35842752456665, |
|
"rewards/rejected": -7.964457035064697, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.5618795499669094, |
|
"grad_norm": 7.344475965596424, |
|
"learning_rate": 6.877605998178343e-08, |
|
"logits/chosen": -2.6852376461029053, |
|
"logits/rejected": -2.6851906776428223, |
|
"logps/chosen": -825.2852783203125, |
|
"logps/rejected": -1217.6024169921875, |
|
"loss": 0.1937, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -3.3112761974334717, |
|
"rewards/margins": 4.0954132080078125, |
|
"rewards/rejected": -7.4066901206970215, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.588352084712111, |
|
"grad_norm": 6.774677658543177, |
|
"learning_rate": 6.099366012736437e-08, |
|
"logits/chosen": -2.6621475219726562, |
|
"logits/rejected": -2.6802525520324707, |
|
"logps/chosen": -823.2789306640625, |
|
"logps/rejected": -1230.999755859375, |
|
"loss": 0.1879, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.3858489990234375, |
|
"rewards/margins": 4.153179168701172, |
|
"rewards/rejected": -7.539028167724609, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.614824619457313, |
|
"grad_norm": 6.63238227242087, |
|
"learning_rate": 5.3616992051080415e-08, |
|
"logits/chosen": -2.677306890487671, |
|
"logits/rejected": -2.712442636489868, |
|
"logps/chosen": -788.0901489257812, |
|
"logps/rejected": -1236.086181640625, |
|
"loss": 0.1818, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.1388726234436035, |
|
"rewards/margins": 4.399405002593994, |
|
"rewards/rejected": -7.538276672363281, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.641297154202515, |
|
"grad_norm": 7.377420865927689, |
|
"learning_rate": 4.666189092892245e-08, |
|
"logits/chosen": -2.681398630142212, |
|
"logits/rejected": -2.7074177265167236, |
|
"logps/chosen": -808.8873291015625, |
|
"logps/rejected": -1222.2945556640625, |
|
"loss": 0.192, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -3.247420072555542, |
|
"rewards/margins": 4.086552619934082, |
|
"rewards/rejected": -7.3339738845825195, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.6677696889477167, |
|
"grad_norm": 8.778568268489295, |
|
"learning_rate": 4.0143286977238345e-08, |
|
"logits/chosen": -2.732050895690918, |
|
"logits/rejected": -2.743983507156372, |
|
"logps/chosen": -833.8406372070312, |
|
"logps/rejected": -1255.4339599609375, |
|
"loss": 0.2052, |
|
"rewards/accuracies": 0.909375011920929, |
|
"rewards/chosen": -3.412902355194092, |
|
"rewards/margins": 4.319065570831299, |
|
"rewards/rejected": -7.731966972351074, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.6942422236929184, |
|
"grad_norm": 9.757233734434815, |
|
"learning_rate": 3.407517340267957e-08, |
|
"logits/chosen": -2.704099655151367, |
|
"logits/rejected": -2.747129440307617, |
|
"logps/chosen": -825.1597900390625, |
|
"logps/rejected": -1279.721923828125, |
|
"loss": 0.1969, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": -3.3202297687530518, |
|
"rewards/margins": 4.6456804275512695, |
|
"rewards/rejected": -7.965909481048584, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.7207147584381204, |
|
"grad_norm": 5.790336419749713, |
|
"learning_rate": 2.847057636358663e-08, |
|
"logits/chosen": -2.666538715362549, |
|
"logits/rejected": -2.6833436489105225, |
|
"logps/chosen": -791.0831298828125, |
|
"logps/rejected": -1239.421875, |
|
"loss": 0.2025, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.060492753982544, |
|
"rewards/margins": 4.618899345397949, |
|
"rewards/rejected": -7.679392337799072, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.7471872931833223, |
|
"grad_norm": 7.090658258583613, |
|
"learning_rate": 2.3341527007295107e-08, |
|
"logits/chosen": -2.724691867828369, |
|
"logits/rejected": -2.7381081581115723, |
|
"logps/chosen": -795.6064453125, |
|
"logps/rejected": -1221.3892822265625, |
|
"loss": 0.1934, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -3.000559091567993, |
|
"rewards/margins": 4.30582332611084, |
|
"rewards/rejected": -7.306382656097412, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.7736598279285243, |
|
"grad_norm": 7.043130947329383, |
|
"learning_rate": 1.8699035643389927e-08, |
|
"logits/chosen": -2.6909706592559814, |
|
"logits/rejected": -2.706798791885376, |
|
"logps/chosen": -813.7172241210938, |
|
"logps/rejected": -1210.0374755859375, |
|
"loss": 0.1935, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.150881290435791, |
|
"rewards/margins": 4.167717933654785, |
|
"rewards/rejected": -7.318598747253418, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.800132362673726, |
|
"grad_norm": 8.93025020392657, |
|
"learning_rate": 1.4553068108346778e-08, |
|
"logits/chosen": -2.7232556343078613, |
|
"logits/rejected": -2.758570671081543, |
|
"logps/chosen": -816.0042724609375, |
|
"logps/rejected": -1228.6907958984375, |
|
"loss": 0.1969, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/chosen": -3.222668409347534, |
|
"rewards/margins": 4.130661487579346, |
|
"rewards/rejected": -7.353329658508301, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.8266048974189277, |
|
"grad_norm": 8.105462657724475, |
|
"learning_rate": 1.0912524372300031e-08, |
|
"logits/chosen": -2.6958508491516113, |
|
"logits/rejected": -2.7145791053771973, |
|
"logps/chosen": -800.2642211914062, |
|
"logps/rejected": -1204.597412109375, |
|
"loss": 0.1979, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/chosen": -3.0551960468292236, |
|
"rewards/margins": 4.35771369934082, |
|
"rewards/rejected": -7.412909507751465, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.8530774321641297, |
|
"grad_norm": 6.305134676239398, |
|
"learning_rate": 7.785219433859846e-09, |
|
"logits/chosen": -2.666156768798828, |
|
"logits/rejected": -2.6787142753601074, |
|
"logps/chosen": -795.4716186523438, |
|
"logps/rejected": -1216.070068359375, |
|
"loss": 0.1889, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -3.1992480754852295, |
|
"rewards/margins": 4.258774757385254, |
|
"rewards/rejected": -7.4580230712890625, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.8795499669093316, |
|
"grad_norm": 8.35714287832233, |
|
"learning_rate": 5.177866543990689e-09, |
|
"logits/chosen": -2.6713449954986572, |
|
"logits/rejected": -2.7099664211273193, |
|
"logps/chosen": -809.7697143554688, |
|
"logps/rejected": -1260.0615234375, |
|
"loss": 0.1768, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.31097412109375, |
|
"rewards/margins": 4.475188255310059, |
|
"rewards/rejected": -7.786163330078125, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.9060225016545336, |
|
"grad_norm": 6.357842566297534, |
|
"learning_rate": 3.0960627949644105e-09, |
|
"logits/chosen": -2.661090850830078, |
|
"logits/rejected": -2.6854565143585205, |
|
"logps/chosen": -799.8544921875, |
|
"logps/rejected": -1197.60595703125, |
|
"loss": 0.195, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.179225206375122, |
|
"rewards/margins": 4.040897846221924, |
|
"rewards/rejected": -7.220122337341309, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.9324950363997353, |
|
"grad_norm": 6.897936781273812, |
|
"learning_rate": 1.5442771053230663e-09, |
|
"logits/chosen": -2.654930353164673, |
|
"logits/rejected": -2.6900641918182373, |
|
"logps/chosen": -794.2493286132812, |
|
"logps/rejected": -1218.8597412109375, |
|
"loss": 0.1752, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.182406187057495, |
|
"rewards/margins": 4.386531352996826, |
|
"rewards/rejected": -7.5689377784729, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.958967571144937, |
|
"grad_norm": 6.958740857504488, |
|
"learning_rate": 5.25840626643681e-10, |
|
"logits/chosen": -2.731703519821167, |
|
"logits/rejected": -2.728083848953247, |
|
"logps/chosen": -804.791259765625, |
|
"logps/rejected": -1165.920654296875, |
|
"loss": 0.1871, |
|
"rewards/accuracies": 0.8968750238418579, |
|
"rewards/chosen": -3.141540765762329, |
|
"rewards/margins": 3.8939075469970703, |
|
"rewards/rejected": -7.035449028015137, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.985440105890139, |
|
"grad_norm": 7.712958898241185, |
|
"learning_rate": 4.293959269863201e-11, |
|
"logits/chosen": -2.6934590339660645, |
|
"logits/rejected": -2.702648639678955, |
|
"logps/chosen": -818.2516479492188, |
|
"logps/rejected": -1221.8408203125, |
|
"loss": 0.2042, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.207921266555786, |
|
"rewards/margins": 4.332727909088135, |
|
"rewards/rejected": -7.5406494140625, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.9960291197882198, |
|
"step": 754, |
|
"total_flos": 0.0, |
|
"train_loss": 0.26563876598520053, |
|
"train_runtime": 5267.2293, |
|
"train_samples_per_second": 36.699, |
|
"train_steps_per_second": 0.143 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 754, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|