|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 563148, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0026635982015384943, |
|
"grad_norm": 0.7201167941093445, |
|
"learning_rate": 0.0001996, |
|
"loss": 9.4233, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.005327196403076989, |
|
"grad_norm": 0.15531601011753082, |
|
"learning_rate": 0.0003996, |
|
"loss": 7.4925, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.007990794604615483, |
|
"grad_norm": 0.2483946532011032, |
|
"learning_rate": 0.0005996, |
|
"loss": 7.4229, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.010654392806153977, |
|
"grad_norm": 0.5883714556694031, |
|
"learning_rate": 0.0007996, |
|
"loss": 7.2323, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.013317991007692471, |
|
"grad_norm": 0.7867951989173889, |
|
"learning_rate": 0.0009996, |
|
"loss": 7.0605, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.015981589209230967, |
|
"grad_norm": 0.8444465398788452, |
|
"learning_rate": 0.0009991117421269675, |
|
"loss": 6.9306, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.01864518741076946, |
|
"grad_norm": 0.6867188215255737, |
|
"learning_rate": 0.00099821991695324, |
|
"loss": 6.8518, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.021308785612307955, |
|
"grad_norm": 0.5377506017684937, |
|
"learning_rate": 0.0009973280917795124, |
|
"loss": 6.7872, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.02397238381384645, |
|
"grad_norm": 0.6717762351036072, |
|
"learning_rate": 0.0009964362666057848, |
|
"loss": 6.7506, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.026635982015384942, |
|
"grad_norm": 1.001440167427063, |
|
"learning_rate": 0.0009955444414320573, |
|
"loss": 6.7159, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.029299580216923436, |
|
"grad_norm": 0.5917439460754395, |
|
"learning_rate": 0.0009946526162583297, |
|
"loss": 6.6817, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.031963178418461934, |
|
"grad_norm": 0.6403864026069641, |
|
"learning_rate": 0.0009937607910846021, |
|
"loss": 6.6561, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.034626776620000424, |
|
"grad_norm": 0.6477270126342773, |
|
"learning_rate": 0.0009928689659108746, |
|
"loss": 6.626, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.03729037482153892, |
|
"grad_norm": 0.8317912817001343, |
|
"learning_rate": 0.0009919789243874944, |
|
"loss": 6.6155, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.03995397302307741, |
|
"grad_norm": 0.81658536195755, |
|
"learning_rate": 0.0009910870992137668, |
|
"loss": 6.5983, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.04261757122461591, |
|
"grad_norm": 0.8080710768699646, |
|
"learning_rate": 0.0009901952740400395, |
|
"loss": 6.5712, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.045281169426154406, |
|
"grad_norm": 0.7330273985862732, |
|
"learning_rate": 0.000989303448866312, |
|
"loss": 6.5671, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.0479447676276929, |
|
"grad_norm": 0.5048246383666992, |
|
"learning_rate": 0.0009884134073429318, |
|
"loss": 6.5566, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.050608365829231394, |
|
"grad_norm": 0.60006183385849, |
|
"learning_rate": 0.0009875215821692042, |
|
"loss": 6.5299, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.053271964030769885, |
|
"grad_norm": 0.7553561329841614, |
|
"learning_rate": 0.0009866297569954767, |
|
"loss": 6.4984, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.05593556223230838, |
|
"grad_norm": 0.6969451904296875, |
|
"learning_rate": 0.000985737931821749, |
|
"loss": 6.4697, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.05859916043384687, |
|
"grad_norm": 0.8137800097465515, |
|
"learning_rate": 0.0009848461066480215, |
|
"loss": 6.4535, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.06126275863538537, |
|
"grad_norm": 0.6285300850868225, |
|
"learning_rate": 0.000983954281474294, |
|
"loss": 6.4259, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.06392635683692387, |
|
"grad_norm": 0.6301620006561279, |
|
"learning_rate": 0.0009830624563005664, |
|
"loss": 6.4174, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.06658995503846236, |
|
"grad_norm": 0.49541255831718445, |
|
"learning_rate": 0.0009821706311268388, |
|
"loss": 6.4134, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.06925355324000085, |
|
"grad_norm": 0.8492177128791809, |
|
"learning_rate": 0.000981280589603459, |
|
"loss": 6.394, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.07191715144153935, |
|
"grad_norm": 0.6284229755401611, |
|
"learning_rate": 0.0009803887644297313, |
|
"loss": 6.3861, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.07458074964307784, |
|
"grad_norm": 0.7854110598564148, |
|
"learning_rate": 0.0009794969392560038, |
|
"loss": 6.3795, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.07724434784461634, |
|
"grad_norm": 0.6952440738677979, |
|
"learning_rate": 0.0009786051140822762, |
|
"loss": 6.3679, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.07990794604615482, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.0009777132889085486, |
|
"loss": 6.363, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.08257154424769332, |
|
"grad_norm": 0.6554950475692749, |
|
"learning_rate": 0.0009768232473851685, |
|
"loss": 6.3597, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.08523514244923182, |
|
"grad_norm": 0.6918802261352539, |
|
"learning_rate": 0.000975931422211441, |
|
"loss": 6.3536, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.08789874065077032, |
|
"grad_norm": 0.749622642993927, |
|
"learning_rate": 0.0009750395970377135, |
|
"loss": 6.3438, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.09056233885230881, |
|
"grad_norm": 0.7492349743843079, |
|
"learning_rate": 0.000974147771863986, |
|
"loss": 6.3332, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.0932259370538473, |
|
"grad_norm": 0.6446586847305298, |
|
"learning_rate": 0.000973257730340606, |
|
"loss": 6.3241, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.0958895352553858, |
|
"grad_norm": 0.8464730978012085, |
|
"learning_rate": 0.0009723659051668784, |
|
"loss": 6.3194, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.09855313345692429, |
|
"grad_norm": 0.6281186938285828, |
|
"learning_rate": 0.0009714740799931508, |
|
"loss": 6.309, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.10121673165846279, |
|
"grad_norm": 0.8605656027793884, |
|
"learning_rate": 0.0009705822548194233, |
|
"loss": 6.2991, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.10388032986000127, |
|
"grad_norm": 0.7788176536560059, |
|
"learning_rate": 0.0009696922132960431, |
|
"loss": 6.3005, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.10654392806153977, |
|
"grad_norm": 0.6075990200042725, |
|
"learning_rate": 0.0009688003881223157, |
|
"loss": 6.2843, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.10920752626307827, |
|
"grad_norm": 0.7577124238014221, |
|
"learning_rate": 0.0009679085629485881, |
|
"loss": 6.2759, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.11187112446461676, |
|
"grad_norm": 0.8228011727333069, |
|
"learning_rate": 0.0009670167377748605, |
|
"loss": 6.2599, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.11453472266615526, |
|
"grad_norm": 0.7447388172149658, |
|
"learning_rate": 0.0009661266962514804, |
|
"loss": 6.2513, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.11719832086769374, |
|
"grad_norm": 0.9003899097442627, |
|
"learning_rate": 0.0009652348710777528, |
|
"loss": 6.2279, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.11986191906923224, |
|
"grad_norm": 1.0574650764465332, |
|
"learning_rate": 0.0009643430459040254, |
|
"loss": 6.2027, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.12252551727077074, |
|
"grad_norm": 0.9610631465911865, |
|
"learning_rate": 0.0009634512207302978, |
|
"loss": 6.1742, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.12518911547230924, |
|
"grad_norm": 1.1535989046096802, |
|
"learning_rate": 0.0009625611792069178, |
|
"loss": 6.1294, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.12785271367384773, |
|
"grad_norm": 1.1773658990859985, |
|
"learning_rate": 0.0009616711376835376, |
|
"loss": 6.097, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.13051631187538623, |
|
"grad_norm": 1.2815760374069214, |
|
"learning_rate": 0.0009607793125098101, |
|
"loss": 6.0634, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.13317991007692473, |
|
"grad_norm": 1.4569323062896729, |
|
"learning_rate": 0.0009598874873360826, |
|
"loss": 6.0457, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.1358435082784632, |
|
"grad_norm": 1.506204605102539, |
|
"learning_rate": 0.000958995662162355, |
|
"loss": 6.0186, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.1385071064800017, |
|
"grad_norm": 1.3472563028335571, |
|
"learning_rate": 0.0009581056206389749, |
|
"loss": 6.0086, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.1411707046815402, |
|
"grad_norm": 1.4809520244598389, |
|
"learning_rate": 0.0009572137954652473, |
|
"loss": 5.9898, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.1438343028830787, |
|
"grad_norm": 1.5233690738677979, |
|
"learning_rate": 0.0009563219702915198, |
|
"loss": 5.9781, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.1464979010846172, |
|
"grad_norm": 1.5101710557937622, |
|
"learning_rate": 0.0009554301451177923, |
|
"loss": 5.9561, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.14916149928615569, |
|
"grad_norm": 1.612731695175171, |
|
"learning_rate": 0.0009545401035944123, |
|
"loss": 5.9526, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.15182509748769418, |
|
"grad_norm": 1.7018260955810547, |
|
"learning_rate": 0.0009536482784206847, |
|
"loss": 5.9338, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.15448869568923268, |
|
"grad_norm": 1.7604913711547852, |
|
"learning_rate": 0.0009527564532469571, |
|
"loss": 5.9321, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.15715229389077118, |
|
"grad_norm": 1.721969485282898, |
|
"learning_rate": 0.0009518646280732296, |
|
"loss": 5.9175, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.15981589209230965, |
|
"grad_norm": 1.5823644399642944, |
|
"learning_rate": 0.0009509745865498494, |
|
"loss": 5.9153, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.16247949029384814, |
|
"grad_norm": 1.7854641675949097, |
|
"learning_rate": 0.000950082761376122, |
|
"loss": 5.9072, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.16514308849538664, |
|
"grad_norm": 1.7369080781936646, |
|
"learning_rate": 0.0009491909362023944, |
|
"loss": 5.9029, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.16780668669692514, |
|
"grad_norm": 1.674492597579956, |
|
"learning_rate": 0.0009482991110286668, |
|
"loss": 5.8841, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.17047028489846364, |
|
"grad_norm": 1.7058457136154175, |
|
"learning_rate": 0.0009474072858549393, |
|
"loss": 5.8883, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.17313388310000213, |
|
"grad_norm": 1.5853819847106934, |
|
"learning_rate": 0.0009465172443315591, |
|
"loss": 5.8775, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.17579748130154063, |
|
"grad_norm": 1.7525198459625244, |
|
"learning_rate": 0.0009456254191578317, |
|
"loss": 5.8717, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.17846107950307913, |
|
"grad_norm": 1.9233468770980835, |
|
"learning_rate": 0.0009447335939841041, |
|
"loss": 5.8608, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.18112467770461763, |
|
"grad_norm": 1.637522578239441, |
|
"learning_rate": 0.0009438417688103765, |
|
"loss": 5.8658, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.1837882759061561, |
|
"grad_norm": 1.8892813920974731, |
|
"learning_rate": 0.000942949943636649, |
|
"loss": 5.8523, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.1864518741076946, |
|
"grad_norm": 1.9510762691497803, |
|
"learning_rate": 0.0009420599021132689, |
|
"loss": 5.8404, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.1891154723092331, |
|
"grad_norm": 1.7907196283340454, |
|
"learning_rate": 0.0009411680769395415, |
|
"loss": 5.8396, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.1917790705107716, |
|
"grad_norm": 1.8805279731750488, |
|
"learning_rate": 0.0009402762517658139, |
|
"loss": 5.8293, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.19444266871231008, |
|
"grad_norm": 1.7272233963012695, |
|
"learning_rate": 0.0009393844265920863, |
|
"loss": 5.8268, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.19710626691384858, |
|
"grad_norm": 2.035203695297241, |
|
"learning_rate": 0.0009384926014183588, |
|
"loss": 5.8209, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.19976986511538708, |
|
"grad_norm": 1.8728936910629272, |
|
"learning_rate": 0.0009376007762446312, |
|
"loss": 5.8165, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.20243346331692558, |
|
"grad_norm": 1.9231390953063965, |
|
"learning_rate": 0.0009367089510709037, |
|
"loss": 5.8149, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.20509706151846407, |
|
"grad_norm": 1.7793642282485962, |
|
"learning_rate": 0.0009358171258971762, |
|
"loss": 5.8132, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.20776065972000254, |
|
"grad_norm": 1.7759062051773071, |
|
"learning_rate": 0.000934927084373796, |
|
"loss": 5.8065, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.21042425792154104, |
|
"grad_norm": 1.7528033256530762, |
|
"learning_rate": 0.0009340352592000685, |
|
"loss": 5.8023, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.21308785612307954, |
|
"grad_norm": 1.8702290058135986, |
|
"learning_rate": 0.0009331434340263409, |
|
"loss": 5.7909, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.21575145432461804, |
|
"grad_norm": 1.9332852363586426, |
|
"learning_rate": 0.000932253392502961, |
|
"loss": 5.7937, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.21841505252615653, |
|
"grad_norm": 1.8513240814208984, |
|
"learning_rate": 0.0009313615673292334, |
|
"loss": 5.7865, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.22107865072769503, |
|
"grad_norm": 1.8357592821121216, |
|
"learning_rate": 0.0009304697421555058, |
|
"loss": 5.7859, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.22374224892923353, |
|
"grad_norm": 1.7558057308197021, |
|
"learning_rate": 0.0009295779169817783, |
|
"loss": 5.7781, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.22640584713077203, |
|
"grad_norm": 1.7014683485031128, |
|
"learning_rate": 0.0009286860918080507, |
|
"loss": 5.7703, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.22906944533231052, |
|
"grad_norm": 1.8377306461334229, |
|
"learning_rate": 0.0009277942666343233, |
|
"loss": 5.7775, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.231733043533849, |
|
"grad_norm": 1.7670570611953735, |
|
"learning_rate": 0.0009269024414605957, |
|
"loss": 5.7606, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.2343966417353875, |
|
"grad_norm": 1.907322883605957, |
|
"learning_rate": 0.0009260106162868681, |
|
"loss": 5.7595, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.237060239936926, |
|
"grad_norm": 1.9192357063293457, |
|
"learning_rate": 0.000925120574763488, |
|
"loss": 5.7574, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.23972383813846448, |
|
"grad_norm": 1.801256775856018, |
|
"learning_rate": 0.0009242287495897604, |
|
"loss": 5.7623, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.24238743634000298, |
|
"grad_norm": 1.7864599227905273, |
|
"learning_rate": 0.000923336924416033, |
|
"loss": 5.7464, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.24505103454154148, |
|
"grad_norm": 2.0881760120391846, |
|
"learning_rate": 0.0009224450992423054, |
|
"loss": 5.7492, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.24771463274307998, |
|
"grad_norm": 2.0729496479034424, |
|
"learning_rate": 0.0009215550577189252, |
|
"loss": 5.7464, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.2503782309446185, |
|
"grad_norm": 1.807739496231079, |
|
"learning_rate": 0.0009206632325451977, |
|
"loss": 5.7391, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.25304182914615697, |
|
"grad_norm": 1.7898356914520264, |
|
"learning_rate": 0.0009197731910218176, |
|
"loss": 5.7399, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.25570542734769547, |
|
"grad_norm": 1.6668163537979126, |
|
"learning_rate": 0.0009188813658480901, |
|
"loss": 5.7316, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.25836902554923397, |
|
"grad_norm": 1.743788242340088, |
|
"learning_rate": 0.0009179895406743626, |
|
"loss": 5.7251, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.26103262375077246, |
|
"grad_norm": 1.7427009344100952, |
|
"learning_rate": 0.000917097715500635, |
|
"loss": 5.7231, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.26369622195231096, |
|
"grad_norm": 1.8911422491073608, |
|
"learning_rate": 0.0009162058903269075, |
|
"loss": 5.7272, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.26635982015384946, |
|
"grad_norm": 1.7783831357955933, |
|
"learning_rate": 0.0009153140651531799, |
|
"loss": 5.7193, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.2690234183553879, |
|
"grad_norm": 1.75882089138031, |
|
"learning_rate": 0.0009144222399794523, |
|
"loss": 5.7233, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.2716870165569264, |
|
"grad_norm": 1.8454984426498413, |
|
"learning_rate": 0.0009135304148057249, |
|
"loss": 5.7163, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.2743506147584649, |
|
"grad_norm": 1.8908592462539673, |
|
"learning_rate": 0.0009126403732823447, |
|
"loss": 5.7175, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.2770142129600034, |
|
"grad_norm": 1.6938859224319458, |
|
"learning_rate": 0.0009117485481086172, |
|
"loss": 5.7113, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.2796778111615419, |
|
"grad_norm": 1.8087745904922485, |
|
"learning_rate": 0.0009108567229348896, |
|
"loss": 5.7104, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.2823414093630804, |
|
"grad_norm": 1.9441509246826172, |
|
"learning_rate": 0.000909964897761162, |
|
"loss": 5.7006, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.2850050075646189, |
|
"grad_norm": 2.016289710998535, |
|
"learning_rate": 0.000909074856237782, |
|
"loss": 5.7084, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.2876686057661574, |
|
"grad_norm": 1.7924542427062988, |
|
"learning_rate": 0.0009081830310640544, |
|
"loss": 5.6967, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.2903322039676959, |
|
"grad_norm": 1.8578925132751465, |
|
"learning_rate": 0.0009072912058903269, |
|
"loss": 5.7058, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.2929958021692344, |
|
"grad_norm": 1.8592642545700073, |
|
"learning_rate": 0.0009063993807165993, |
|
"loss": 5.699, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.2956594003707729, |
|
"grad_norm": 1.726891040802002, |
|
"learning_rate": 0.0009055075555428717, |
|
"loss": 5.6873, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.29832299857231137, |
|
"grad_norm": 1.8885732889175415, |
|
"learning_rate": 0.0009046175140194918, |
|
"loss": 5.6859, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.30098659677384987, |
|
"grad_norm": 1.6777235269546509, |
|
"learning_rate": 0.0009037256888457643, |
|
"loss": 5.6843, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.30365019497538837, |
|
"grad_norm": 1.824777364730835, |
|
"learning_rate": 0.0009028338636720367, |
|
"loss": 5.6865, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.30631379317692686, |
|
"grad_norm": 1.6151602268218994, |
|
"learning_rate": 0.0009019420384983091, |
|
"loss": 5.6864, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.30897739137846536, |
|
"grad_norm": 1.7518750429153442, |
|
"learning_rate": 0.0009010502133245816, |
|
"loss": 5.6835, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.31164098958000386, |
|
"grad_norm": 1.9652341604232788, |
|
"learning_rate": 0.0009001583881508541, |
|
"loss": 5.6778, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.31430458778154235, |
|
"grad_norm": 1.8396164178848267, |
|
"learning_rate": 0.0008992665629771265, |
|
"loss": 5.6805, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.3169681859830808, |
|
"grad_norm": 1.7397726774215698, |
|
"learning_rate": 0.000898374737803399, |
|
"loss": 5.6809, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.3196317841846193, |
|
"grad_norm": 1.6550874710083008, |
|
"learning_rate": 0.0008974846962800188, |
|
"loss": 5.6713, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.3222953823861578, |
|
"grad_norm": 1.7428010702133179, |
|
"learning_rate": 0.0008965928711062913, |
|
"loss": 5.6777, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.3249589805876963, |
|
"grad_norm": 1.7465174198150635, |
|
"learning_rate": 0.0008957028295829112, |
|
"loss": 5.6668, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.3276225787892348, |
|
"grad_norm": 1.719190239906311, |
|
"learning_rate": 0.0008948110044091838, |
|
"loss": 5.6736, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.3302861769907733, |
|
"grad_norm": 1.6879175901412964, |
|
"learning_rate": 0.0008939191792354562, |
|
"loss": 5.6585, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.3329497751923118, |
|
"grad_norm": 1.6741931438446045, |
|
"learning_rate": 0.0008930273540617286, |
|
"loss": 5.6584, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.3356133733938503, |
|
"grad_norm": 1.8733186721801758, |
|
"learning_rate": 0.0008921355288880011, |
|
"loss": 5.6655, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.3382769715953888, |
|
"grad_norm": 1.8366929292678833, |
|
"learning_rate": 0.0008912454873646209, |
|
"loss": 5.6551, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.3409405697969273, |
|
"grad_norm": 1.7783548831939697, |
|
"learning_rate": 0.0008903536621908935, |
|
"loss": 5.6598, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.34360416799846577, |
|
"grad_norm": 1.739394187927246, |
|
"learning_rate": 0.0008894618370171659, |
|
"loss": 5.6568, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.34626776620000427, |
|
"grad_norm": 1.706986427307129, |
|
"learning_rate": 0.0008885700118434383, |
|
"loss": 5.6577, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.34893136440154277, |
|
"grad_norm": 1.7595592737197876, |
|
"learning_rate": 0.0008876781866697108, |
|
"loss": 5.6504, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.35159496260308126, |
|
"grad_norm": 1.7445604801177979, |
|
"learning_rate": 0.0008867863614959832, |
|
"loss": 5.6457, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.35425856080461976, |
|
"grad_norm": 1.7039164304733276, |
|
"learning_rate": 0.0008858945363222557, |
|
"loss": 5.652, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.35692215900615826, |
|
"grad_norm": 1.7117230892181396, |
|
"learning_rate": 0.0008850027111485282, |
|
"loss": 5.6456, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.35958575720769675, |
|
"grad_norm": 1.8759076595306396, |
|
"learning_rate": 0.000884112669625148, |
|
"loss": 5.6504, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.36224935540923525, |
|
"grad_norm": 1.5524253845214844, |
|
"learning_rate": 0.0008832208444514205, |
|
"loss": 5.6426, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.36491295361077375, |
|
"grad_norm": 1.648575782775879, |
|
"learning_rate": 0.0008823290192776929, |
|
"loss": 5.6401, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.3675765518123122, |
|
"grad_norm": 1.6062759160995483, |
|
"learning_rate": 0.0008814371941039654, |
|
"loss": 5.6466, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.3702401500138507, |
|
"grad_norm": 1.5237386226654053, |
|
"learning_rate": 0.0008805471525805854, |
|
"loss": 5.6381, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.3729037482153892, |
|
"grad_norm": 1.7291427850723267, |
|
"learning_rate": 0.0008796553274068578, |
|
"loss": 5.6337, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.3755673464169277, |
|
"grad_norm": 1.875213623046875, |
|
"learning_rate": 0.0008787635022331303, |
|
"loss": 5.6356, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.3782309446184662, |
|
"grad_norm": 1.8453514575958252, |
|
"learning_rate": 0.0008778716770594027, |
|
"loss": 5.6348, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.3808945428200047, |
|
"grad_norm": 1.725234866142273, |
|
"learning_rate": 0.0008769816355360227, |
|
"loss": 5.6318, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.3835581410215432, |
|
"grad_norm": 1.7739455699920654, |
|
"learning_rate": 0.0008760898103622951, |
|
"loss": 5.6296, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.3862217392230817, |
|
"grad_norm": 1.683827519416809, |
|
"learning_rate": 0.0008751979851885675, |
|
"loss": 5.6357, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.38888533742462017, |
|
"grad_norm": 1.5576590299606323, |
|
"learning_rate": 0.00087430616001484, |
|
"loss": 5.63, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.39154893562615867, |
|
"grad_norm": 1.666030764579773, |
|
"learning_rate": 0.0008734161184914598, |
|
"loss": 5.6178, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.39421253382769716, |
|
"grad_norm": 1.618916392326355, |
|
"learning_rate": 0.0008725242933177324, |
|
"loss": 5.6273, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.39687613202923566, |
|
"grad_norm": 1.69428551197052, |
|
"learning_rate": 0.0008716324681440048, |
|
"loss": 5.6188, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.39953973023077416, |
|
"grad_norm": 1.8516380786895752, |
|
"learning_rate": 0.0008707406429702772, |
|
"loss": 5.6235, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.40220332843231266, |
|
"grad_norm": 1.505953311920166, |
|
"learning_rate": 0.0008698506014468972, |
|
"loss": 5.6175, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.40486692663385115, |
|
"grad_norm": 1.5639010667800903, |
|
"learning_rate": 0.0008689587762731696, |
|
"loss": 5.6213, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.40753052483538965, |
|
"grad_norm": 1.7431727647781372, |
|
"learning_rate": 0.0008680669510994421, |
|
"loss": 5.6198, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.41019412303692815, |
|
"grad_norm": 1.676757574081421, |
|
"learning_rate": 0.0008671751259257146, |
|
"loss": 5.6252, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.41285772123846665, |
|
"grad_norm": 1.6216061115264893, |
|
"learning_rate": 0.0008662850844023345, |
|
"loss": 5.6211, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.4155213194400051, |
|
"grad_norm": 1.6766453981399536, |
|
"learning_rate": 0.0008653932592286069, |
|
"loss": 5.62, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.4181849176415436, |
|
"grad_norm": 1.6790215969085693, |
|
"learning_rate": 0.0008645014340548793, |
|
"loss": 5.6093, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.4208485158430821, |
|
"grad_norm": 1.8037434816360474, |
|
"learning_rate": 0.0008636096088811518, |
|
"loss": 5.6085, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.4235121140446206, |
|
"grad_norm": 1.6324502229690552, |
|
"learning_rate": 0.0008627195673577717, |
|
"loss": 5.6031, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.4261757122461591, |
|
"grad_norm": 1.6987981796264648, |
|
"learning_rate": 0.0008618277421840443, |
|
"loss": 5.6116, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.4288393104476976, |
|
"grad_norm": 1.6692321300506592, |
|
"learning_rate": 0.0008609359170103167, |
|
"loss": 5.6062, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.43150290864923607, |
|
"grad_norm": 1.6387773752212524, |
|
"learning_rate": 0.0008600440918365891, |
|
"loss": 5.6087, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.43416650685077457, |
|
"grad_norm": 1.792861819267273, |
|
"learning_rate": 0.000859154050313209, |
|
"loss": 5.608, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.43683010505231307, |
|
"grad_norm": 1.676076889038086, |
|
"learning_rate": 0.0008582622251394815, |
|
"loss": 5.6056, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.43949370325385156, |
|
"grad_norm": 1.772159218788147, |
|
"learning_rate": 0.000857370399965754, |
|
"loss": 5.6015, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.44215730145539006, |
|
"grad_norm": 1.7022145986557007, |
|
"learning_rate": 0.0008564785747920264, |
|
"loss": 5.6056, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.44482089965692856, |
|
"grad_norm": 1.6428086757659912, |
|
"learning_rate": 0.0008555885332686463, |
|
"loss": 5.596, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.44748449785846706, |
|
"grad_norm": 1.6144286394119263, |
|
"learning_rate": 0.0008546967080949187, |
|
"loss": 5.5974, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.45014809606000555, |
|
"grad_norm": 1.5918573141098022, |
|
"learning_rate": 0.0008538048829211912, |
|
"loss": 5.604, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.45281169426154405, |
|
"grad_norm": 1.7871578931808472, |
|
"learning_rate": 0.0008529130577474637, |
|
"loss": 5.5951, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.45547529246308255, |
|
"grad_norm": 1.6631501913070679, |
|
"learning_rate": 0.0008520230162240836, |
|
"loss": 5.6014, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.45813889066462105, |
|
"grad_norm": 1.6243520975112915, |
|
"learning_rate": 0.0008511311910503561, |
|
"loss": 5.5942, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.46080248886615954, |
|
"grad_norm": 1.5686520338058472, |
|
"learning_rate": 0.0008502393658766285, |
|
"loss": 5.5981, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.463466087067698, |
|
"grad_norm": 1.7691351175308228, |
|
"learning_rate": 0.0008493475407029009, |
|
"loss": 5.5984, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.4661296852692365, |
|
"grad_norm": 1.6885465383529663, |
|
"learning_rate": 0.0008484574991795209, |
|
"loss": 5.5851, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.468793283470775, |
|
"grad_norm": 1.6488664150238037, |
|
"learning_rate": 0.0008475656740057933, |
|
"loss": 5.5831, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.4714568816723135, |
|
"grad_norm": 1.5736653804779053, |
|
"learning_rate": 0.0008466738488320658, |
|
"loss": 5.582, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.474120479873852, |
|
"grad_norm": 1.7857962846755981, |
|
"learning_rate": 0.0008457820236583382, |
|
"loss": 5.5901, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.47678407807539047, |
|
"grad_norm": 1.7936720848083496, |
|
"learning_rate": 0.0008448919821349581, |
|
"loss": 5.5822, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.47944767627692897, |
|
"grad_norm": 1.546919345855713, |
|
"learning_rate": 0.0008440001569612306, |
|
"loss": 5.581, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.48211127447846747, |
|
"grad_norm": 1.778827428817749, |
|
"learning_rate": 0.000843108331787503, |
|
"loss": 5.5922, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.48477487268000596, |
|
"grad_norm": 1.495205044746399, |
|
"learning_rate": 0.0008422165066137755, |
|
"loss": 5.5821, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.48743847088154446, |
|
"grad_norm": 1.6151823997497559, |
|
"learning_rate": 0.0008413264650903954, |
|
"loss": 5.5801, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.49010206908308296, |
|
"grad_norm": 1.7652384042739868, |
|
"learning_rate": 0.0008404346399166679, |
|
"loss": 5.5785, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.49276566728462146, |
|
"grad_norm": 1.7062280178070068, |
|
"learning_rate": 0.0008395428147429404, |
|
"loss": 5.5784, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.49542926548615995, |
|
"grad_norm": 1.5986762046813965, |
|
"learning_rate": 0.0008386509895692128, |
|
"loss": 5.5814, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.49809286368769845, |
|
"grad_norm": 1.672861933708191, |
|
"learning_rate": 0.0008377609480458327, |
|
"loss": 5.5743, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.500756461889237, |
|
"grad_norm": 1.8104331493377686, |
|
"learning_rate": 0.0008368691228721051, |
|
"loss": 5.5709, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.5034200600907754, |
|
"grad_norm": 1.8253047466278076, |
|
"learning_rate": 0.0008359772976983776, |
|
"loss": 5.5642, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.5060836582923139, |
|
"grad_norm": 1.604465126991272, |
|
"learning_rate": 0.0008350854725246501, |
|
"loss": 5.5691, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.5087472564938524, |
|
"grad_norm": 1.7985742092132568, |
|
"learning_rate": 0.00083419543100127, |
|
"loss": 5.5611, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.5114108546953909, |
|
"grad_norm": 1.652733325958252, |
|
"learning_rate": 0.0008333036058275424, |
|
"loss": 5.5577, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.5140744528969294, |
|
"grad_norm": 1.8247016668319702, |
|
"learning_rate": 0.0008324117806538148, |
|
"loss": 5.5557, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.5167380510984679, |
|
"grad_norm": 1.784303069114685, |
|
"learning_rate": 0.0008315199554800873, |
|
"loss": 5.5554, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.5194016493000064, |
|
"grad_norm": 1.705725073814392, |
|
"learning_rate": 0.0008306299139567072, |
|
"loss": 5.5545, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.5220652475015449, |
|
"grad_norm": 1.8760724067687988, |
|
"learning_rate": 0.0008297380887829798, |
|
"loss": 5.5512, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.5247288457030834, |
|
"grad_norm": 1.7412986755371094, |
|
"learning_rate": 0.0008288462636092522, |
|
"loss": 5.5522, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.5273924439046219, |
|
"grad_norm": 2.0051610469818115, |
|
"learning_rate": 0.0008279544384355246, |
|
"loss": 5.5403, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.5300560421061604, |
|
"grad_norm": 1.6867221593856812, |
|
"learning_rate": 0.0008270643969121445, |
|
"loss": 5.544, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.5327196403076989, |
|
"grad_norm": 1.838189721107483, |
|
"learning_rate": 0.0008261725717384169, |
|
"loss": 5.5396, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.5353832385092374, |
|
"grad_norm": 1.655271291732788, |
|
"learning_rate": 0.0008252807465646895, |
|
"loss": 5.5358, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.5380468367107758, |
|
"grad_norm": 1.8378669023513794, |
|
"learning_rate": 0.0008243889213909619, |
|
"loss": 5.5419, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.5407104349123143, |
|
"grad_norm": 1.7509022951126099, |
|
"learning_rate": 0.0008234988798675818, |
|
"loss": 5.523, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.5433740331138528, |
|
"grad_norm": 1.9558390378952026, |
|
"learning_rate": 0.0008226070546938542, |
|
"loss": 5.5322, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.5460376313153913, |
|
"grad_norm": 2.0113561153411865, |
|
"learning_rate": 0.0008217152295201266, |
|
"loss": 5.5303, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.5487012295169298, |
|
"grad_norm": 1.989725112915039, |
|
"learning_rate": 0.0008208234043463993, |
|
"loss": 5.5257, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.5513648277184683, |
|
"grad_norm": 1.702812671661377, |
|
"learning_rate": 0.0008199315791726717, |
|
"loss": 5.5327, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.5540284259200068, |
|
"grad_norm": 1.8519411087036133, |
|
"learning_rate": 0.0008190397539989441, |
|
"loss": 5.5272, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.5566920241215453, |
|
"grad_norm": 1.856350064277649, |
|
"learning_rate": 0.0008181479288252166, |
|
"loss": 5.5211, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.5593556223230838, |
|
"grad_norm": 1.7010074853897095, |
|
"learning_rate": 0.000817256103651489, |
|
"loss": 5.5287, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.5620192205246223, |
|
"grad_norm": 1.6479413509368896, |
|
"learning_rate": 0.000816366062128109, |
|
"loss": 5.5279, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.5646828187261608, |
|
"grad_norm": 1.9108966588974, |
|
"learning_rate": 0.0008154742369543814, |
|
"loss": 5.5203, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.5673464169276993, |
|
"grad_norm": 1.9142667055130005, |
|
"learning_rate": 0.0008145824117806538, |
|
"loss": 5.5189, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.5700100151292378, |
|
"grad_norm": 1.8495519161224365, |
|
"learning_rate": 0.0008136905866069263, |
|
"loss": 5.5196, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.5726736133307763, |
|
"grad_norm": 2.063087224960327, |
|
"learning_rate": 0.0008128005450835461, |
|
"loss": 5.5132, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.5753372115323148, |
|
"grad_norm": 2.0009357929229736, |
|
"learning_rate": 0.0008119087199098186, |
|
"loss": 5.5177, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.5780008097338533, |
|
"grad_norm": 2.0125739574432373, |
|
"learning_rate": 0.0008110168947360911, |
|
"loss": 5.5112, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.5806644079353918, |
|
"grad_norm": 1.8415509462356567, |
|
"learning_rate": 0.0008101250695623635, |
|
"loss": 5.509, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.5833280061369303, |
|
"grad_norm": 1.7688753604888916, |
|
"learning_rate": 0.0008092350280389835, |
|
"loss": 5.5032, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.5859916043384688, |
|
"grad_norm": 1.8354215621948242, |
|
"learning_rate": 0.000808343202865256, |
|
"loss": 5.5129, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.5886552025400072, |
|
"grad_norm": 2.036357879638672, |
|
"learning_rate": 0.0008074513776915284, |
|
"loss": 5.5043, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.5913188007415457, |
|
"grad_norm": 1.8382165431976318, |
|
"learning_rate": 0.0008065595525178009, |
|
"loss": 5.5065, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.5939823989430842, |
|
"grad_norm": 2.001885175704956, |
|
"learning_rate": 0.0008056695109944208, |
|
"loss": 5.507, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 0.5966459971446227, |
|
"grad_norm": 1.872819423675537, |
|
"learning_rate": 0.0008047776858206932, |
|
"loss": 5.5081, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.5993095953461612, |
|
"grad_norm": 1.8629109859466553, |
|
"learning_rate": 0.0008038858606469656, |
|
"loss": 5.5078, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.6019731935476997, |
|
"grad_norm": 2.0044994354248047, |
|
"learning_rate": 0.0008029940354732381, |
|
"loss": 5.498, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.6046367917492382, |
|
"grad_norm": 1.9607182741165161, |
|
"learning_rate": 0.000802103993949858, |
|
"loss": 5.5092, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 0.6073003899507767, |
|
"grad_norm": 1.9605486392974854, |
|
"learning_rate": 0.0008012121687761305, |
|
"loss": 5.5013, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.6099639881523152, |
|
"grad_norm": 1.999872088432312, |
|
"learning_rate": 0.0008003203436024029, |
|
"loss": 5.497, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 0.6126275863538537, |
|
"grad_norm": 1.7834984064102173, |
|
"learning_rate": 0.0007994285184286753, |
|
"loss": 5.5001, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.6152911845553922, |
|
"grad_norm": 1.9666252136230469, |
|
"learning_rate": 0.0007985384769052953, |
|
"loss": 5.5004, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 0.6179547827569307, |
|
"grad_norm": 1.810936450958252, |
|
"learning_rate": 0.0007976484353819152, |
|
"loss": 5.4934, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.6206183809584692, |
|
"grad_norm": 1.8183609247207642, |
|
"learning_rate": 0.0007967566102081877, |
|
"loss": 5.4999, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 0.6232819791600077, |
|
"grad_norm": 2.1452646255493164, |
|
"learning_rate": 0.0007958647850344601, |
|
"loss": 5.4937, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.6259455773615462, |
|
"grad_norm": 1.984305739402771, |
|
"learning_rate": 0.0007949729598607326, |
|
"loss": 5.494, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 0.6286091755630847, |
|
"grad_norm": 2.1507790088653564, |
|
"learning_rate": 0.000794081134687005, |
|
"loss": 5.4915, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.6312727737646232, |
|
"grad_norm": 1.821390151977539, |
|
"learning_rate": 0.0007931910931636249, |
|
"loss": 5.4948, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 0.6339363719661616, |
|
"grad_norm": 1.901696801185608, |
|
"learning_rate": 0.0007922992679898974, |
|
"loss": 5.4944, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.6365999701677001, |
|
"grad_norm": 2.214447259902954, |
|
"learning_rate": 0.0007914074428161698, |
|
"loss": 5.4901, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 0.6392635683692386, |
|
"grad_norm": 1.8764078617095947, |
|
"learning_rate": 0.0007905156176424423, |
|
"loss": 5.4837, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.6419271665707771, |
|
"grad_norm": 1.9411547183990479, |
|
"learning_rate": 0.0007896237924687147, |
|
"loss": 5.4889, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 0.6445907647723156, |
|
"grad_norm": 1.8323979377746582, |
|
"learning_rate": 0.0007887319672949871, |
|
"loss": 5.49, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.6472543629738541, |
|
"grad_norm": 1.8666421175003052, |
|
"learning_rate": 0.0007878401421212597, |
|
"loss": 5.4911, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 0.6499179611753926, |
|
"grad_norm": 2.0501484870910645, |
|
"learning_rate": 0.0007869483169475321, |
|
"loss": 5.4894, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.6525815593769311, |
|
"grad_norm": 1.8784074783325195, |
|
"learning_rate": 0.0007860600590744995, |
|
"loss": 5.4911, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 0.6552451575784696, |
|
"grad_norm": 1.9021259546279907, |
|
"learning_rate": 0.000785168233900772, |
|
"loss": 5.4844, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.6579087557800081, |
|
"grad_norm": 2.053755283355713, |
|
"learning_rate": 0.0007842764087270444, |
|
"loss": 5.4884, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 0.6605723539815466, |
|
"grad_norm": 1.9320204257965088, |
|
"learning_rate": 0.0007833845835533169, |
|
"loss": 5.4822, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.6632359521830851, |
|
"grad_norm": 1.793219804763794, |
|
"learning_rate": 0.0007824945420299368, |
|
"loss": 5.4834, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 0.6658995503846236, |
|
"grad_norm": 2.0100185871124268, |
|
"learning_rate": 0.0007816027168562092, |
|
"loss": 5.4872, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.6685631485861621, |
|
"grad_norm": 2.0543274879455566, |
|
"learning_rate": 0.0007807108916824816, |
|
"loss": 5.4826, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 0.6712267467877006, |
|
"grad_norm": 1.9622262716293335, |
|
"learning_rate": 0.0007798190665087542, |
|
"loss": 5.4809, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.673890344989239, |
|
"grad_norm": 1.918966293334961, |
|
"learning_rate": 0.0007789272413350267, |
|
"loss": 5.4823, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 0.6765539431907776, |
|
"grad_norm": 1.8516751527786255, |
|
"learning_rate": 0.0007780354161612992, |
|
"loss": 5.4786, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.679217541392316, |
|
"grad_norm": 1.8985280990600586, |
|
"learning_rate": 0.000777145374637919, |
|
"loss": 5.4762, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 0.6818811395938545, |
|
"grad_norm": 2.030210018157959, |
|
"learning_rate": 0.0007762535494641915, |
|
"loss": 5.4786, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.684544737795393, |
|
"grad_norm": 1.9270013570785522, |
|
"learning_rate": 0.0007753617242904639, |
|
"loss": 5.4801, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 0.6872083359969315, |
|
"grad_norm": 1.7799612283706665, |
|
"learning_rate": 0.0007744698991167364, |
|
"loss": 5.4715, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.68987193419847, |
|
"grad_norm": 2.1841835975646973, |
|
"learning_rate": 0.0007735780739430089, |
|
"loss": 5.4726, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 0.6925355324000085, |
|
"grad_norm": 1.970680594444275, |
|
"learning_rate": 0.0007726862487692813, |
|
"loss": 5.4751, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.695199130601547, |
|
"grad_norm": 2.1457014083862305, |
|
"learning_rate": 0.0007717944235955537, |
|
"loss": 5.4754, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 0.6978627288030855, |
|
"grad_norm": 1.8095160722732544, |
|
"learning_rate": 0.0007709025984218262, |
|
"loss": 5.4723, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 0.700526327004624, |
|
"grad_norm": 1.8374313116073608, |
|
"learning_rate": 0.000770012556898446, |
|
"loss": 5.4774, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 0.7031899252061625, |
|
"grad_norm": 1.8603581190109253, |
|
"learning_rate": 0.0007691207317247186, |
|
"loss": 5.477, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.705853523407701, |
|
"grad_norm": 1.9838221073150635, |
|
"learning_rate": 0.0007682306902013385, |
|
"loss": 5.4732, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 0.7085171216092395, |
|
"grad_norm": 1.9500114917755127, |
|
"learning_rate": 0.000767338865027611, |
|
"loss": 5.4742, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 0.711180719810778, |
|
"grad_norm": 1.9748975038528442, |
|
"learning_rate": 0.0007664470398538834, |
|
"loss": 5.4675, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 0.7138443180123165, |
|
"grad_norm": 1.7860807180404663, |
|
"learning_rate": 0.0007655552146801558, |
|
"loss": 5.4711, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 0.716507916213855, |
|
"grad_norm": 2.076504945755005, |
|
"learning_rate": 0.0007646633895064284, |
|
"loss": 5.4691, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 0.7191715144153935, |
|
"grad_norm": 2.1392953395843506, |
|
"learning_rate": 0.0007637715643327008, |
|
"loss": 5.4763, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.721835112616932, |
|
"grad_norm": 1.7750567197799683, |
|
"learning_rate": 0.0007628797391589732, |
|
"loss": 5.4624, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 0.7244987108184705, |
|
"grad_norm": 2.1746318340301514, |
|
"learning_rate": 0.0007619879139852457, |
|
"loss": 5.4632, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.727162309020009, |
|
"grad_norm": 1.9568692445755005, |
|
"learning_rate": 0.0007610978724618655, |
|
"loss": 5.4702, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 0.7298259072215475, |
|
"grad_norm": 1.940618634223938, |
|
"learning_rate": 0.0007602060472881381, |
|
"loss": 5.4682, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 0.7324895054230859, |
|
"grad_norm": 2.0432674884796143, |
|
"learning_rate": 0.0007593142221144105, |
|
"loss": 5.4661, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 0.7351531036246244, |
|
"grad_norm": 1.989637017250061, |
|
"learning_rate": 0.0007584223969406829, |
|
"loss": 5.4643, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 0.7378167018261629, |
|
"grad_norm": 1.7842735052108765, |
|
"learning_rate": 0.0007575305717669554, |
|
"loss": 5.4633, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 0.7404803000277014, |
|
"grad_norm": 2.000488519668579, |
|
"learning_rate": 0.0007566405302435752, |
|
"loss": 5.4645, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 0.7431438982292399, |
|
"grad_norm": 1.9219857454299927, |
|
"learning_rate": 0.0007557487050698478, |
|
"loss": 5.4587, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 0.7458074964307784, |
|
"grad_norm": 1.8964563608169556, |
|
"learning_rate": 0.0007548568798961202, |
|
"loss": 5.4594, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.7484710946323169, |
|
"grad_norm": 2.0744431018829346, |
|
"learning_rate": 0.0007539650547223926, |
|
"loss": 5.4677, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 0.7511346928338554, |
|
"grad_norm": 2.0807344913482666, |
|
"learning_rate": 0.0007530732295486651, |
|
"loss": 5.4594, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 0.7537982910353939, |
|
"grad_norm": 1.9063740968704224, |
|
"learning_rate": 0.0007521814043749375, |
|
"loss": 5.4614, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 0.7564618892369324, |
|
"grad_norm": 1.8823788166046143, |
|
"learning_rate": 0.0007512913628515576, |
|
"loss": 5.4612, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 0.7591254874384709, |
|
"grad_norm": 2.027939558029175, |
|
"learning_rate": 0.00075039953767783, |
|
"loss": 5.457, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 0.7617890856400094, |
|
"grad_norm": 1.956814169883728, |
|
"learning_rate": 0.0007495077125041024, |
|
"loss": 5.4561, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.7644526838415479, |
|
"grad_norm": 1.8203577995300293, |
|
"learning_rate": 0.0007486158873303749, |
|
"loss": 5.4612, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 0.7671162820430864, |
|
"grad_norm": 2.0049407482147217, |
|
"learning_rate": 0.0007477240621566473, |
|
"loss": 5.4572, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.7697798802446248, |
|
"grad_norm": 2.0092926025390625, |
|
"learning_rate": 0.0007468322369829198, |
|
"loss": 5.4566, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 0.7724434784461633, |
|
"grad_norm": 1.9448853731155396, |
|
"learning_rate": 0.0007459421954595397, |
|
"loss": 5.4567, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.7751070766477018, |
|
"grad_norm": 1.9080660343170166, |
|
"learning_rate": 0.0007450503702858121, |
|
"loss": 5.4529, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 0.7777706748492403, |
|
"grad_norm": 2.0922887325286865, |
|
"learning_rate": 0.0007441585451120846, |
|
"loss": 5.4594, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 0.7804342730507788, |
|
"grad_norm": 2.102870464324951, |
|
"learning_rate": 0.000743266719938357, |
|
"loss": 5.4533, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 0.7830978712523173, |
|
"grad_norm": 1.8905880451202393, |
|
"learning_rate": 0.0007423748947646295, |
|
"loss": 5.4512, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 0.7857614694538558, |
|
"grad_norm": 1.937587857246399, |
|
"learning_rate": 0.000741483069590902, |
|
"loss": 5.4577, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 0.7884250676553943, |
|
"grad_norm": 2.2599427700042725, |
|
"learning_rate": 0.0007405912444171744, |
|
"loss": 5.4545, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 0.7910886658569328, |
|
"grad_norm": 2.1247055530548096, |
|
"learning_rate": 0.0007396994192434468, |
|
"loss": 5.4552, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 0.7937522640584713, |
|
"grad_norm": 1.8920656442642212, |
|
"learning_rate": 0.0007388093777200668, |
|
"loss": 5.4551, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 0.7964158622600098, |
|
"grad_norm": 2.05411696434021, |
|
"learning_rate": 0.0007379175525463393, |
|
"loss": 5.4581, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 0.7990794604615483, |
|
"grad_norm": 2.1096110343933105, |
|
"learning_rate": 0.0007370257273726118, |
|
"loss": 5.4553, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.8017430586630868, |
|
"grad_norm": 2.060760736465454, |
|
"learning_rate": 0.0007361339021988842, |
|
"loss": 5.4557, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 0.8044066568646253, |
|
"grad_norm": 1.7533081769943237, |
|
"learning_rate": 0.0007352438606755041, |
|
"loss": 5.4596, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 0.8070702550661638, |
|
"grad_norm": 1.948110580444336, |
|
"learning_rate": 0.0007343520355017765, |
|
"loss": 5.4581, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 0.8097338532677023, |
|
"grad_norm": 2.0876693725585938, |
|
"learning_rate": 0.000733460210328049, |
|
"loss": 5.4517, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.8123974514692408, |
|
"grad_norm": 1.8972123861312866, |
|
"learning_rate": 0.0007325701688046689, |
|
"loss": 5.4529, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 0.8150610496707793, |
|
"grad_norm": 2.0049657821655273, |
|
"learning_rate": 0.0007316783436309413, |
|
"loss": 5.4506, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 0.8177246478723178, |
|
"grad_norm": 1.9599244594573975, |
|
"learning_rate": 0.0007307865184572138, |
|
"loss": 5.4503, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 0.8203882460738563, |
|
"grad_norm": 2.090162992477417, |
|
"learning_rate": 0.0007298946932834862, |
|
"loss": 5.4487, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.8230518442753948, |
|
"grad_norm": 1.9685425758361816, |
|
"learning_rate": 0.0007290028681097586, |
|
"loss": 5.4459, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 0.8257154424769333, |
|
"grad_norm": 2.0231292247772217, |
|
"learning_rate": 0.0007281110429360312, |
|
"loss": 5.4519, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.8283790406784717, |
|
"grad_norm": 1.824242353439331, |
|
"learning_rate": 0.0007272192177623036, |
|
"loss": 5.4495, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 0.8310426388800102, |
|
"grad_norm": 1.8740367889404297, |
|
"learning_rate": 0.000726327392588576, |
|
"loss": 5.4514, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 0.8337062370815487, |
|
"grad_norm": 1.898790955543518, |
|
"learning_rate": 0.000725437351065196, |
|
"loss": 5.4442, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 0.8363698352830872, |
|
"grad_norm": 1.9713107347488403, |
|
"learning_rate": 0.0007245455258914684, |
|
"loss": 5.4481, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 0.8390334334846257, |
|
"grad_norm": 1.892471432685852, |
|
"learning_rate": 0.000723653700717741, |
|
"loss": 5.4514, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 0.8416970316861642, |
|
"grad_norm": 2.0477683544158936, |
|
"learning_rate": 0.0007227618755440134, |
|
"loss": 5.4402, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 0.8443606298877027, |
|
"grad_norm": 1.9651503562927246, |
|
"learning_rate": 0.0007218736176709807, |
|
"loss": 5.439, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 0.8470242280892412, |
|
"grad_norm": 1.9664440155029297, |
|
"learning_rate": 0.0007209817924972531, |
|
"loss": 5.4512, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 0.8496878262907797, |
|
"grad_norm": 1.9268772602081299, |
|
"learning_rate": 0.0007200899673235256, |
|
"loss": 5.4445, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 0.8523514244923182, |
|
"grad_norm": 2.0761542320251465, |
|
"learning_rate": 0.0007191981421497981, |
|
"loss": 5.4476, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.8550150226938567, |
|
"grad_norm": 2.080336570739746, |
|
"learning_rate": 0.0007183063169760705, |
|
"loss": 5.4472, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 0.8576786208953951, |
|
"grad_norm": 1.8157365322113037, |
|
"learning_rate": 0.000717414491802343, |
|
"loss": 5.4471, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 0.8603422190969336, |
|
"grad_norm": 1.7620859146118164, |
|
"learning_rate": 0.0007165226666286154, |
|
"loss": 5.4486, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 0.8630058172984721, |
|
"grad_norm": 1.8530540466308594, |
|
"learning_rate": 0.0007156326251052354, |
|
"loss": 5.4403, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 0.8656694155000106, |
|
"grad_norm": 1.91478431224823, |
|
"learning_rate": 0.0007147407999315079, |
|
"loss": 5.4453, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 0.8683330137015491, |
|
"grad_norm": 1.944806456565857, |
|
"learning_rate": 0.0007138489747577804, |
|
"loss": 5.4438, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 0.8709966119030876, |
|
"grad_norm": 1.941565752029419, |
|
"learning_rate": 0.0007129571495840528, |
|
"loss": 5.4403, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 0.8736602101046261, |
|
"grad_norm": 1.8101640939712524, |
|
"learning_rate": 0.0007120653244103252, |
|
"loss": 5.4352, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 0.8763238083061646, |
|
"grad_norm": 2.391594171524048, |
|
"learning_rate": 0.0007111752828869451, |
|
"loss": 5.4379, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 0.8789874065077031, |
|
"grad_norm": 1.946295142173767, |
|
"learning_rate": 0.0007102834577132175, |
|
"loss": 5.4385, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.8816510047092416, |
|
"grad_norm": 2.1615066528320312, |
|
"learning_rate": 0.00070939163253949, |
|
"loss": 5.4439, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 0.8843146029107801, |
|
"grad_norm": 2.0320687294006348, |
|
"learning_rate": 0.0007084998073657625, |
|
"loss": 5.4434, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 0.8869782011123186, |
|
"grad_norm": 1.8692481517791748, |
|
"learning_rate": 0.0007076079821920349, |
|
"loss": 5.437, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 0.8896417993138571, |
|
"grad_norm": 2.007511854171753, |
|
"learning_rate": 0.0007067161570183073, |
|
"loss": 5.4327, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 0.8923053975153956, |
|
"grad_norm": 2.02004337310791, |
|
"learning_rate": 0.0007058243318445799, |
|
"loss": 5.4393, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 0.8949689957169341, |
|
"grad_norm": 1.7644096612930298, |
|
"learning_rate": 0.0007049325066708523, |
|
"loss": 5.4304, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.8976325939184726, |
|
"grad_norm": 2.0698578357696533, |
|
"learning_rate": 0.0007040424651474723, |
|
"loss": 5.4301, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 0.9002961921200111, |
|
"grad_norm": 1.881465196609497, |
|
"learning_rate": 0.0007031506399737447, |
|
"loss": 5.4399, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 0.9029597903215496, |
|
"grad_norm": 2.0607750415802, |
|
"learning_rate": 0.0007022588148000172, |
|
"loss": 5.4311, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 0.9056233885230881, |
|
"grad_norm": 2.1066737174987793, |
|
"learning_rate": 0.0007013669896262897, |
|
"loss": 5.4348, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.9082869867246266, |
|
"grad_norm": 2.0234835147857666, |
|
"learning_rate": 0.0007004769481029096, |
|
"loss": 5.4337, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 0.9109505849261651, |
|
"grad_norm": 1.8877592086791992, |
|
"learning_rate": 0.000699585122929182, |
|
"loss": 5.4389, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 0.9136141831277036, |
|
"grad_norm": 2.117302417755127, |
|
"learning_rate": 0.0006986932977554544, |
|
"loss": 5.4333, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 0.9162777813292421, |
|
"grad_norm": 2.073172092437744, |
|
"learning_rate": 0.0006978014725817269, |
|
"loss": 5.4318, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 0.9189413795307806, |
|
"grad_norm": 2.064408540725708, |
|
"learning_rate": 0.0006969114310583467, |
|
"loss": 5.431, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 0.9216049777323191, |
|
"grad_norm": 1.9481194019317627, |
|
"learning_rate": 0.0006960196058846193, |
|
"loss": 5.4321, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 0.9242685759338576, |
|
"grad_norm": 2.010923147201538, |
|
"learning_rate": 0.0006951277807108917, |
|
"loss": 5.4342, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 0.926932174135396, |
|
"grad_norm": 1.9323519468307495, |
|
"learning_rate": 0.0006942359555371641, |
|
"loss": 5.4303, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 0.9295957723369345, |
|
"grad_norm": 2.2859385013580322, |
|
"learning_rate": 0.0006933459140137841, |
|
"loss": 5.4352, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 0.932259370538473, |
|
"grad_norm": 2.055107593536377, |
|
"learning_rate": 0.000692455872490404, |
|
"loss": 5.4352, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.9349229687400115, |
|
"grad_norm": 1.9875715970993042, |
|
"learning_rate": 0.0006915640473166765, |
|
"loss": 5.4392, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 0.93758656694155, |
|
"grad_norm": 2.097477912902832, |
|
"learning_rate": 0.0006906722221429489, |
|
"loss": 5.4291, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.9402501651430885, |
|
"grad_norm": 1.8664289712905884, |
|
"learning_rate": 0.0006897803969692214, |
|
"loss": 5.423, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 0.942913763344627, |
|
"grad_norm": 2.0907797813415527, |
|
"learning_rate": 0.0006888885717954938, |
|
"loss": 5.4322, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 0.9455773615461655, |
|
"grad_norm": 1.9234920740127563, |
|
"learning_rate": 0.0006879967466217662, |
|
"loss": 5.4303, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 0.948240959747704, |
|
"grad_norm": 2.0696797370910645, |
|
"learning_rate": 0.0006871049214480388, |
|
"loss": 5.4251, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 0.9509045579492424, |
|
"grad_norm": 2.0838043689727783, |
|
"learning_rate": 0.0006862130962743112, |
|
"loss": 5.4244, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 0.9535681561507809, |
|
"grad_norm": 2.1029279232025146, |
|
"learning_rate": 0.0006853230547509311, |
|
"loss": 5.4323, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 0.9562317543523194, |
|
"grad_norm": 2.1586649417877197, |
|
"learning_rate": 0.000684433013227551, |
|
"loss": 5.4329, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 0.9588953525538579, |
|
"grad_norm": 1.8636375665664673, |
|
"learning_rate": 0.0006835411880538235, |
|
"loss": 5.43, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.9615589507553964, |
|
"grad_norm": 1.9289181232452393, |
|
"learning_rate": 0.0006826493628800959, |
|
"loss": 5.4193, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 0.9642225489569349, |
|
"grad_norm": 1.9578914642333984, |
|
"learning_rate": 0.0006817575377063684, |
|
"loss": 5.4298, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 0.9668861471584734, |
|
"grad_norm": 2.0745270252227783, |
|
"learning_rate": 0.0006808657125326409, |
|
"loss": 5.4315, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 0.9695497453600119, |
|
"grad_norm": 1.9545907974243164, |
|
"learning_rate": 0.0006799738873589133, |
|
"loss": 5.425, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 0.9722133435615504, |
|
"grad_norm": 1.9709100723266602, |
|
"learning_rate": 0.0006790820621851857, |
|
"loss": 5.425, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 0.9748769417630889, |
|
"grad_norm": 1.8214976787567139, |
|
"learning_rate": 0.0006781902370114582, |
|
"loss": 5.4307, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 0.9775405399646274, |
|
"grad_norm": 1.8456212282180786, |
|
"learning_rate": 0.0006773001954880781, |
|
"loss": 5.4277, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 0.9802041381661659, |
|
"grad_norm": 2.0278677940368652, |
|
"learning_rate": 0.0006764083703143506, |
|
"loss": 5.425, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.9828677363677044, |
|
"grad_norm": 1.8401942253112793, |
|
"learning_rate": 0.000675516545140623, |
|
"loss": 5.4228, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 0.9855313345692429, |
|
"grad_norm": 2.0018155574798584, |
|
"learning_rate": 0.0006746247199668954, |
|
"loss": 5.4272, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.9881949327707814, |
|
"grad_norm": 1.9544193744659424, |
|
"learning_rate": 0.0006737346784435153, |
|
"loss": 5.4297, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 0.9908585309723199, |
|
"grad_norm": 1.8701244592666626, |
|
"learning_rate": 0.0006728428532697878, |
|
"loss": 5.4305, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 0.9935221291738584, |
|
"grad_norm": 1.9702414274215698, |
|
"learning_rate": 0.0006719510280960603, |
|
"loss": 5.4272, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 0.9961857273753969, |
|
"grad_norm": 2.005018472671509, |
|
"learning_rate": 0.0006710592029223327, |
|
"loss": 5.4259, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 0.9988493255769354, |
|
"grad_norm": 1.9745688438415527, |
|
"learning_rate": 0.0006701691613989527, |
|
"loss": 5.4255, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 1.001512923778474, |
|
"grad_norm": 2.119936466217041, |
|
"learning_rate": 0.0006692773362252251, |
|
"loss": 5.4282, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 1.0041765219800123, |
|
"grad_norm": 1.8192147016525269, |
|
"learning_rate": 0.0006683855110514976, |
|
"loss": 5.4272, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 1.006840120181551, |
|
"grad_norm": 2.0825536251068115, |
|
"learning_rate": 0.0006674936858777701, |
|
"loss": 5.4191, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 1.0095037183830893, |
|
"grad_norm": 2.034301519393921, |
|
"learning_rate": 0.0006666036443543899, |
|
"loss": 5.4212, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 1.0121673165846279, |
|
"grad_norm": 2.013160467147827, |
|
"learning_rate": 0.0006657118191806624, |
|
"loss": 5.4216, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 1.0148309147861663, |
|
"grad_norm": 1.9328818321228027, |
|
"learning_rate": 0.0006648199940069348, |
|
"loss": 5.4286, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 1.0174945129877049, |
|
"grad_norm": 2.011674642562866, |
|
"learning_rate": 0.0006639281688332073, |
|
"loss": 5.426, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 1.0201581111892433, |
|
"grad_norm": 2.1039912700653076, |
|
"learning_rate": 0.0006630381273098273, |
|
"loss": 5.4261, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 1.0228217093907819, |
|
"grad_norm": 1.8038475513458252, |
|
"learning_rate": 0.0006621480857864472, |
|
"loss": 5.4201, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 1.0254853075923203, |
|
"grad_norm": 1.8866719007492065, |
|
"learning_rate": 0.0006612562606127196, |
|
"loss": 5.4156, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 1.0281489057938589, |
|
"grad_norm": 1.9180611371994019, |
|
"learning_rate": 0.000660364435438992, |
|
"loss": 5.4219, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 1.0308125039953973, |
|
"grad_norm": 1.83159339427948, |
|
"learning_rate": 0.0006594726102652645, |
|
"loss": 5.4158, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 1.0334761021969359, |
|
"grad_norm": 1.8638277053833008, |
|
"learning_rate": 0.000658580785091537, |
|
"loss": 5.4196, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 1.0361397003984743, |
|
"grad_norm": 1.8679394721984863, |
|
"learning_rate": 0.0006576889599178094, |
|
"loss": 5.4221, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 1.0388032986000129, |
|
"grad_norm": 1.8080953359603882, |
|
"learning_rate": 0.0006567971347440819, |
|
"loss": 5.4168, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 1.0414668968015512, |
|
"grad_norm": 2.044064521789551, |
|
"learning_rate": 0.0006559053095703543, |
|
"loss": 5.4152, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 1.0441304950030899, |
|
"grad_norm": 2.067416191101074, |
|
"learning_rate": 0.0006550152680469742, |
|
"loss": 5.4197, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 1.0467940932046282, |
|
"grad_norm": 1.8547744750976562, |
|
"learning_rate": 0.0006541234428732467, |
|
"loss": 5.416, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 1.0494576914061668, |
|
"grad_norm": 2.1002390384674072, |
|
"learning_rate": 0.0006532316176995191, |
|
"loss": 5.414, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 1.0521212896077052, |
|
"grad_norm": 1.8542534112930298, |
|
"learning_rate": 0.0006523397925257916, |
|
"loss": 5.4176, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 1.0547848878092438, |
|
"grad_norm": 1.8873697519302368, |
|
"learning_rate": 0.000651447967352064, |
|
"loss": 5.4155, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 1.0574484860107822, |
|
"grad_norm": 2.0172159671783447, |
|
"learning_rate": 0.0006505561421783364, |
|
"loss": 5.4234, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 1.0601120842123208, |
|
"grad_norm": 1.9374735355377197, |
|
"learning_rate": 0.000649664317004609, |
|
"loss": 5.4131, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 1.0627756824138592, |
|
"grad_norm": 2.141655921936035, |
|
"learning_rate": 0.0006487724918308814, |
|
"loss": 5.4134, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 1.0654392806153978, |
|
"grad_norm": 1.9056235551834106, |
|
"learning_rate": 0.0006478824503075014, |
|
"loss": 5.4173, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 1.0681028788169362, |
|
"grad_norm": 2.3003177642822266, |
|
"learning_rate": 0.0006469906251337738, |
|
"loss": 5.4049, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 1.0707664770184748, |
|
"grad_norm": 2.1843066215515137, |
|
"learning_rate": 0.0006460987999600462, |
|
"loss": 5.411, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 1.0734300752200132, |
|
"grad_norm": 2.0827953815460205, |
|
"learning_rate": 0.0006452069747863188, |
|
"loss": 5.4175, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 1.0760936734215516, |
|
"grad_norm": 2.02587890625, |
|
"learning_rate": 0.0006443169332629386, |
|
"loss": 5.4183, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 1.0787572716230902, |
|
"grad_norm": 1.8049343824386597, |
|
"learning_rate": 0.0006434251080892111, |
|
"loss": 5.4142, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 1.0814208698246286, |
|
"grad_norm": 2.1238086223602295, |
|
"learning_rate": 0.0006425332829154835, |
|
"loss": 5.4155, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 1.0840844680261672, |
|
"grad_norm": 1.9311139583587646, |
|
"learning_rate": 0.0006416414577417559, |
|
"loss": 5.4132, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 1.0867480662277056, |
|
"grad_norm": 1.970428228378296, |
|
"learning_rate": 0.0006407514162183758, |
|
"loss": 5.4073, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 1.0894116644292442, |
|
"grad_norm": 1.7967313528060913, |
|
"learning_rate": 0.0006398595910446483, |
|
"loss": 5.4113, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 1.0920752626307826, |
|
"grad_norm": 1.7493606805801392, |
|
"learning_rate": 0.0006389677658709208, |
|
"loss": 5.4106, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 1.0947388608323212, |
|
"grad_norm": 1.868148922920227, |
|
"learning_rate": 0.0006380777243475407, |
|
"loss": 5.4125, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 1.0974024590338596, |
|
"grad_norm": 2.0261473655700684, |
|
"learning_rate": 0.0006371858991738132, |
|
"loss": 5.4119, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 1.1000660572353982, |
|
"grad_norm": 1.8863203525543213, |
|
"learning_rate": 0.0006362940740000856, |
|
"loss": 5.4085, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 1.1027296554369366, |
|
"grad_norm": 1.97225821018219, |
|
"learning_rate": 0.0006354022488263581, |
|
"loss": 5.4106, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 1.1053932536384752, |
|
"grad_norm": 2.2650508880615234, |
|
"learning_rate": 0.0006345104236526306, |
|
"loss": 5.4128, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 1.1080568518400136, |
|
"grad_norm": 1.9305511713027954, |
|
"learning_rate": 0.000633618598478903, |
|
"loss": 5.4084, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 1.1107204500415522, |
|
"grad_norm": 2.110548973083496, |
|
"learning_rate": 0.0006327285569555229, |
|
"loss": 5.4078, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 1.1133840482430906, |
|
"grad_norm": 2.0234880447387695, |
|
"learning_rate": 0.0006318367317817953, |
|
"loss": 5.4125, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 1.1160476464446292, |
|
"grad_norm": 1.8949861526489258, |
|
"learning_rate": 0.0006309449066080678, |
|
"loss": 5.4077, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 1.1187112446461676, |
|
"grad_norm": 1.9646226167678833, |
|
"learning_rate": 0.0006300530814343403, |
|
"loss": 5.4112, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 1.1213748428477062, |
|
"grad_norm": 1.9960238933563232, |
|
"learning_rate": 0.0006291612562606127, |
|
"loss": 5.4062, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 1.1240384410492446, |
|
"grad_norm": 2.0510716438293457, |
|
"learning_rate": 0.0006282694310868851, |
|
"loss": 5.4094, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 1.1267020392507832, |
|
"grad_norm": 1.969011664390564, |
|
"learning_rate": 0.0006273776059131576, |
|
"loss": 5.4123, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 1.1293656374523215, |
|
"grad_norm": 2.0459535121917725, |
|
"learning_rate": 0.0006264857807394301, |
|
"loss": 5.4077, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 1.1320292356538602, |
|
"grad_norm": 2.093336343765259, |
|
"learning_rate": 0.0006255957392160501, |
|
"loss": 5.4107, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 1.1346928338553985, |
|
"grad_norm": 1.8615410327911377, |
|
"learning_rate": 0.0006247056976926699, |
|
"loss": 5.4078, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 1.1373564320569371, |
|
"grad_norm": 1.9422777891159058, |
|
"learning_rate": 0.0006238138725189424, |
|
"loss": 5.4115, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 1.1400200302584755, |
|
"grad_norm": 1.9412380456924438, |
|
"learning_rate": 0.0006229220473452148, |
|
"loss": 5.4013, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 1.1426836284600141, |
|
"grad_norm": 2.2532691955566406, |
|
"learning_rate": 0.0006220302221714873, |
|
"loss": 5.4061, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 1.1453472266615525, |
|
"grad_norm": 1.7372703552246094, |
|
"learning_rate": 0.0006211383969977598, |
|
"loss": 5.41, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 1.1480108248630911, |
|
"grad_norm": 1.9771249294281006, |
|
"learning_rate": 0.0006202465718240322, |
|
"loss": 5.4032, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 1.1506744230646295, |
|
"grad_norm": 1.802037000656128, |
|
"learning_rate": 0.0006193547466503046, |
|
"loss": 5.4026, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 1.1533380212661681, |
|
"grad_norm": 1.958177924156189, |
|
"learning_rate": 0.0006184629214765771, |
|
"loss": 5.4043, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 1.1560016194677065, |
|
"grad_norm": 1.9318652153015137, |
|
"learning_rate": 0.000617572879953197, |
|
"loss": 5.4044, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 1.158665217669245, |
|
"grad_norm": 1.917920470237732, |
|
"learning_rate": 0.0006166810547794695, |
|
"loss": 5.4051, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 1.1613288158707835, |
|
"grad_norm": 1.9815441370010376, |
|
"learning_rate": 0.0006157892296057419, |
|
"loss": 5.4036, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 1.1639924140723221, |
|
"grad_norm": 2.0141518115997314, |
|
"learning_rate": 0.0006148974044320143, |
|
"loss": 5.4093, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 1.1666560122738605, |
|
"grad_norm": 2.0144686698913574, |
|
"learning_rate": 0.0006140073629086343, |
|
"loss": 5.3992, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 1.169319610475399, |
|
"grad_norm": 1.848953127861023, |
|
"learning_rate": 0.0006131155377349069, |
|
"loss": 5.4069, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 1.1719832086769375, |
|
"grad_norm": 1.8711676597595215, |
|
"learning_rate": 0.0006122237125611793, |
|
"loss": 5.4058, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 1.1746468068784761, |
|
"grad_norm": 2.1549181938171387, |
|
"learning_rate": 0.0006113318873874517, |
|
"loss": 5.4057, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 1.1773104050800145, |
|
"grad_norm": 2.136955738067627, |
|
"learning_rate": 0.0006104418458640716, |
|
"loss": 5.4047, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 1.1799740032815529, |
|
"grad_norm": 1.984183430671692, |
|
"learning_rate": 0.000609550020690344, |
|
"loss": 5.397, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 1.1826376014830915, |
|
"grad_norm": 2.173187732696533, |
|
"learning_rate": 0.0006086581955166164, |
|
"loss": 5.3996, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 1.1853011996846299, |
|
"grad_norm": 2.0700299739837646, |
|
"learning_rate": 0.000607766370342889, |
|
"loss": 5.3976, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 1.1879647978861685, |
|
"grad_norm": 2.1351547241210938, |
|
"learning_rate": 0.0006068763288195088, |
|
"loss": 5.4113, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 1.1906283960877069, |
|
"grad_norm": 1.9995781183242798, |
|
"learning_rate": 0.0006059845036457813, |
|
"loss": 5.4012, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 1.1932919942892455, |
|
"grad_norm": 2.2745988368988037, |
|
"learning_rate": 0.0006050926784720537, |
|
"loss": 5.4093, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 1.1959555924907839, |
|
"grad_norm": 2.5383615493774414, |
|
"learning_rate": 0.0006042026369486737, |
|
"loss": 5.3934, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 1.1986191906923225, |
|
"grad_norm": 2.132570266723633, |
|
"learning_rate": 0.0006033108117749462, |
|
"loss": 5.4143, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 1.2012827888938609, |
|
"grad_norm": 1.9985568523406982, |
|
"learning_rate": 0.0006024189866012187, |
|
"loss": 5.3987, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 1.2039463870953995, |
|
"grad_norm": 1.9169471263885498, |
|
"learning_rate": 0.0006015271614274911, |
|
"loss": 5.4005, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 1.2066099852969379, |
|
"grad_norm": 1.9423543214797974, |
|
"learning_rate": 0.0006006353362537635, |
|
"loss": 5.4016, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 1.2092735834984765, |
|
"grad_norm": 2.0575485229492188, |
|
"learning_rate": 0.000599743511080036, |
|
"loss": 5.393, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 1.2119371817000149, |
|
"grad_norm": 2.034454584121704, |
|
"learning_rate": 0.0005988516859063085, |
|
"loss": 5.3946, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 1.2146007799015535, |
|
"grad_norm": 1.9063221216201782, |
|
"learning_rate": 0.0005979598607325809, |
|
"loss": 5.4005, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 1.2172643781030918, |
|
"grad_norm": 2.094717025756836, |
|
"learning_rate": 0.0005970698192092008, |
|
"loss": 5.3943, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 1.2199279763046305, |
|
"grad_norm": 1.9740791320800781, |
|
"learning_rate": 0.0005961779940354732, |
|
"loss": 5.399, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 1.2225915745061688, |
|
"grad_norm": 1.95699143409729, |
|
"learning_rate": 0.0005952861688617457, |
|
"loss": 5.3971, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 1.2252551727077075, |
|
"grad_norm": 1.9305535554885864, |
|
"learning_rate": 0.0005943943436880182, |
|
"loss": 5.399, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 1.2279187709092458, |
|
"grad_norm": 1.8926870822906494, |
|
"learning_rate": 0.000593504302164638, |
|
"loss": 5.3967, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 1.2305823691107844, |
|
"grad_norm": 1.91937255859375, |
|
"learning_rate": 0.0005926124769909105, |
|
"loss": 5.3966, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 1.2332459673123228, |
|
"grad_norm": 1.9494017362594604, |
|
"learning_rate": 0.0005917224354675305, |
|
"loss": 5.3988, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 1.2359095655138614, |
|
"grad_norm": 1.7676622867584229, |
|
"learning_rate": 0.0005908306102938029, |
|
"loss": 5.3954, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 1.2385731637153998, |
|
"grad_norm": 1.9707027673721313, |
|
"learning_rate": 0.0005899387851200753, |
|
"loss": 5.3987, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 1.2412367619169384, |
|
"grad_norm": 1.8651105165481567, |
|
"learning_rate": 0.0005890469599463479, |
|
"loss": 5.3913, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 1.2439003601184768, |
|
"grad_norm": 2.2256948947906494, |
|
"learning_rate": 0.0005881551347726203, |
|
"loss": 5.4022, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 1.2465639583200154, |
|
"grad_norm": 2.0236611366271973, |
|
"learning_rate": 0.0005872633095988927, |
|
"loss": 5.3928, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 1.2492275565215538, |
|
"grad_norm": 2.07328724861145, |
|
"learning_rate": 0.0005863714844251652, |
|
"loss": 5.3964, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 1.2518911547230922, |
|
"grad_norm": 2.011497974395752, |
|
"learning_rate": 0.000585481442901785, |
|
"loss": 5.4, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 1.2545547529246308, |
|
"grad_norm": 1.891579270362854, |
|
"learning_rate": 0.0005845896177280576, |
|
"loss": 5.3931, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 1.2572183511261694, |
|
"grad_norm": 1.8369475603103638, |
|
"learning_rate": 0.00058369779255433, |
|
"loss": 5.388, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 1.2598819493277078, |
|
"grad_norm": 2.316582441329956, |
|
"learning_rate": 0.0005828059673806024, |
|
"loss": 5.3878, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 1.2625455475292462, |
|
"grad_norm": 1.8466497659683228, |
|
"learning_rate": 0.0005819141422068749, |
|
"loss": 5.3942, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 1.2652091457307848, |
|
"grad_norm": 1.9420734643936157, |
|
"learning_rate": 0.0005810223170331473, |
|
"loss": 5.3907, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 1.2678727439323234, |
|
"grad_norm": 1.9229456186294556, |
|
"learning_rate": 0.0005801304918594198, |
|
"loss": 5.394, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 1.2705363421338618, |
|
"grad_norm": 2.126213788986206, |
|
"learning_rate": 0.0005792386666856923, |
|
"loss": 5.3875, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 1.2731999403354002, |
|
"grad_norm": 1.9714566469192505, |
|
"learning_rate": 0.0005783486251623122, |
|
"loss": 5.3938, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 1.2758635385369388, |
|
"grad_norm": 2.244844436645508, |
|
"learning_rate": 0.0005774567999885847, |
|
"loss": 5.3974, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 1.2785271367384774, |
|
"grad_norm": 2.083517551422119, |
|
"learning_rate": 0.0005765649748148571, |
|
"loss": 5.3827, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 1.2811907349400158, |
|
"grad_norm": 2.1155362129211426, |
|
"learning_rate": 0.0005756749332914771, |
|
"loss": 5.3908, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 1.2838543331415542, |
|
"grad_norm": 2.0415351390838623, |
|
"learning_rate": 0.0005747831081177495, |
|
"loss": 5.3904, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 1.2865179313430928, |
|
"grad_norm": 2.4744224548339844, |
|
"learning_rate": 0.0005738912829440219, |
|
"loss": 5.3825, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 1.2891815295446314, |
|
"grad_norm": 1.9680261611938477, |
|
"learning_rate": 0.0005729994577702944, |
|
"loss": 5.3915, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 1.2918451277461698, |
|
"grad_norm": 2.4636471271514893, |
|
"learning_rate": 0.0005721076325965668, |
|
"loss": 5.3946, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 1.2945087259477082, |
|
"grad_norm": 1.8884419202804565, |
|
"learning_rate": 0.0005712158074228393, |
|
"loss": 5.3905, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 1.2971723241492468, |
|
"grad_norm": 2.192204236984253, |
|
"learning_rate": 0.0005703257658994592, |
|
"loss": 5.3891, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 1.2998359223507852, |
|
"grad_norm": 1.963740587234497, |
|
"learning_rate": 0.0005694339407257316, |
|
"loss": 5.389, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 1.3024995205523238, |
|
"grad_norm": 2.2511630058288574, |
|
"learning_rate": 0.0005685421155520041, |
|
"loss": 5.3988, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 1.3051631187538622, |
|
"grad_norm": 1.8933221101760864, |
|
"learning_rate": 0.0005676502903782765, |
|
"loss": 5.39, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 1.3078267169554008, |
|
"grad_norm": 1.813040852546692, |
|
"learning_rate": 0.000566758465204549, |
|
"loss": 5.3884, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 1.3104903151569391, |
|
"grad_norm": 2.3987181186676025, |
|
"learning_rate": 0.0005658666400308215, |
|
"loss": 5.3888, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 1.3131539133584778, |
|
"grad_norm": 2.0762851238250732, |
|
"learning_rate": 0.0005649748148570939, |
|
"loss": 5.3881, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 1.3158175115600161, |
|
"grad_norm": 2.3197662830352783, |
|
"learning_rate": 0.0005640829896833663, |
|
"loss": 5.3876, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 1.3184811097615547, |
|
"grad_norm": 1.9953910112380981, |
|
"learning_rate": 0.0005631929481599863, |
|
"loss": 5.3892, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 1.3211447079630931, |
|
"grad_norm": 2.20346999168396, |
|
"learning_rate": 0.0005623011229862588, |
|
"loss": 5.3844, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 1.3238083061646317, |
|
"grad_norm": 1.9688447713851929, |
|
"learning_rate": 0.0005614092978125313, |
|
"loss": 5.3924, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 1.3264719043661701, |
|
"grad_norm": 1.950621485710144, |
|
"learning_rate": 0.0005605174726388037, |
|
"loss": 5.382, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 1.3291355025677087, |
|
"grad_norm": 2.0261106491088867, |
|
"learning_rate": 0.0005596274311154236, |
|
"loss": 5.3889, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 1.3317991007692471, |
|
"grad_norm": 1.819598913192749, |
|
"learning_rate": 0.000558735605941696, |
|
"loss": 5.3879, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 1.3344626989707857, |
|
"grad_norm": 2.092658042907715, |
|
"learning_rate": 0.0005578437807679685, |
|
"loss": 5.3897, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 1.3371262971723241, |
|
"grad_norm": 1.8927563428878784, |
|
"learning_rate": 0.000556951955594241, |
|
"loss": 5.3888, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 1.3397898953738627, |
|
"grad_norm": 1.91410493850708, |
|
"learning_rate": 0.0005560619140708608, |
|
"loss": 5.3865, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 1.3424534935754011, |
|
"grad_norm": 1.923710584640503, |
|
"learning_rate": 0.0005551700888971333, |
|
"loss": 5.3831, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 1.3451170917769395, |
|
"grad_norm": 2.011301279067993, |
|
"learning_rate": 0.0005542782637234058, |
|
"loss": 5.3832, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 1.347780689978478, |
|
"grad_norm": 1.8271079063415527, |
|
"learning_rate": 0.0005533864385496783, |
|
"loss": 5.3843, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 1.3504442881800167, |
|
"grad_norm": 2.0028188228607178, |
|
"learning_rate": 0.0005524963970262982, |
|
"loss": 5.383, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 1.353107886381555, |
|
"grad_norm": 1.8386844396591187, |
|
"learning_rate": 0.0005516045718525706, |
|
"loss": 5.3873, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 1.3557714845830935, |
|
"grad_norm": 1.8750890493392944, |
|
"learning_rate": 0.0005507127466788431, |
|
"loss": 5.3794, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 1.358435082784632, |
|
"grad_norm": 1.9305578470230103, |
|
"learning_rate": 0.0005498209215051155, |
|
"loss": 5.3863, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 1.3610986809861707, |
|
"grad_norm": 2.1922383308410645, |
|
"learning_rate": 0.0005489308799817354, |
|
"loss": 5.3889, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 1.363762279187709, |
|
"grad_norm": 2.006162405014038, |
|
"learning_rate": 0.0005480390548080079, |
|
"loss": 5.3793, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 1.3664258773892475, |
|
"grad_norm": 2.1891300678253174, |
|
"learning_rate": 0.0005471472296342803, |
|
"loss": 5.3805, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 1.369089475590786, |
|
"grad_norm": 2.036553144454956, |
|
"learning_rate": 0.0005462554044605528, |
|
"loss": 5.3809, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 1.3717530737923247, |
|
"grad_norm": 1.9189977645874023, |
|
"learning_rate": 0.0005453653629371727, |
|
"loss": 5.3766, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 1.374416671993863, |
|
"grad_norm": 1.98636794090271, |
|
"learning_rate": 0.0005444735377634452, |
|
"loss": 5.39, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 1.3770802701954015, |
|
"grad_norm": 1.897522211074829, |
|
"learning_rate": 0.0005435834962400651, |
|
"loss": 5.3839, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 1.37974386839694, |
|
"grad_norm": 2.0826635360717773, |
|
"learning_rate": 0.0005426916710663376, |
|
"loss": 5.383, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 1.3824074665984787, |
|
"grad_norm": 1.8267229795455933, |
|
"learning_rate": 0.00054179984589261, |
|
"loss": 5.3866, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 1.385071064800017, |
|
"grad_norm": 2.1117184162139893, |
|
"learning_rate": 0.0005409080207188824, |
|
"loss": 5.3787, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 1.3877346630015555, |
|
"grad_norm": 1.9132159948349, |
|
"learning_rate": 0.0005400161955451549, |
|
"loss": 5.3812, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 1.390398261203094, |
|
"grad_norm": 1.9600298404693604, |
|
"learning_rate": 0.0005391243703714274, |
|
"loss": 5.381, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 1.3930618594046325, |
|
"grad_norm": 2.000422716140747, |
|
"learning_rate": 0.0005382325451976998, |
|
"loss": 5.3823, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 1.395725457606171, |
|
"grad_norm": 2.2225003242492676, |
|
"learning_rate": 0.0005373407200239723, |
|
"loss": 5.3776, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 1.3983890558077094, |
|
"grad_norm": 2.084779977798462, |
|
"learning_rate": 0.0005364506785005921, |
|
"loss": 5.3781, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 1.401052654009248, |
|
"grad_norm": 2.126775026321411, |
|
"learning_rate": 0.0005355588533268646, |
|
"loss": 5.3832, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 1.4037162522107864, |
|
"grad_norm": 1.9713746309280396, |
|
"learning_rate": 0.0005346670281531371, |
|
"loss": 5.3792, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 1.406379850412325, |
|
"grad_norm": 2.0785419940948486, |
|
"learning_rate": 0.0005337752029794095, |
|
"loss": 5.3825, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 1.4090434486138634, |
|
"grad_norm": 2.3811593055725098, |
|
"learning_rate": 0.0005328851614560295, |
|
"loss": 5.3826, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 1.411707046815402, |
|
"grad_norm": 2.1196324825286865, |
|
"learning_rate": 0.0005319933362823019, |
|
"loss": 5.3785, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 1.4143706450169404, |
|
"grad_norm": 2.06736421585083, |
|
"learning_rate": 0.0005311015111085744, |
|
"loss": 5.3796, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 1.417034243218479, |
|
"grad_norm": 2.1438751220703125, |
|
"learning_rate": 0.0005302096859348468, |
|
"loss": 5.3747, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 1.4196978414200174, |
|
"grad_norm": 2.0328142642974854, |
|
"learning_rate": 0.0005293196444114668, |
|
"loss": 5.3726, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 1.422361439621556, |
|
"grad_norm": 1.9709652662277222, |
|
"learning_rate": 0.0005284278192377392, |
|
"loss": 5.3835, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 1.4250250378230944, |
|
"grad_norm": 2.0982072353363037, |
|
"learning_rate": 0.0005275359940640116, |
|
"loss": 5.3719, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 1.427688636024633, |
|
"grad_norm": 2.3335447311401367, |
|
"learning_rate": 0.0005266441688902841, |
|
"loss": 5.3824, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 1.4303522342261714, |
|
"grad_norm": 1.9240329265594482, |
|
"learning_rate": 0.0005257541273669039, |
|
"loss": 5.3754, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 1.43301583242771, |
|
"grad_norm": 2.0762813091278076, |
|
"learning_rate": 0.0005248623021931765, |
|
"loss": 5.3754, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 1.4356794306292484, |
|
"grad_norm": 1.9223084449768066, |
|
"learning_rate": 0.0005239704770194489, |
|
"loss": 5.3751, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 1.4383430288307868, |
|
"grad_norm": 1.9600517749786377, |
|
"learning_rate": 0.0005230786518457213, |
|
"loss": 5.3726, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 1.4410066270323254, |
|
"grad_norm": 2.0275826454162598, |
|
"learning_rate": 0.0005221886103223413, |
|
"loss": 5.3755, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 1.443670225233864, |
|
"grad_norm": 2.0879909992218018, |
|
"learning_rate": 0.0005212967851486137, |
|
"loss": 5.371, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 1.4463338234354024, |
|
"grad_norm": 2.2107584476470947, |
|
"learning_rate": 0.0005204049599748863, |
|
"loss": 5.3775, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 1.4489974216369408, |
|
"grad_norm": 1.9889525175094604, |
|
"learning_rate": 0.0005195131348011587, |
|
"loss": 5.369, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 1.4516610198384794, |
|
"grad_norm": 1.8878706693649292, |
|
"learning_rate": 0.0005186230932777786, |
|
"loss": 5.3762, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 1.454324618040018, |
|
"grad_norm": 2.0804665088653564, |
|
"learning_rate": 0.000517731268104051, |
|
"loss": 5.3731, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 1.4569882162415564, |
|
"grad_norm": 2.3155815601348877, |
|
"learning_rate": 0.0005168394429303234, |
|
"loss": 5.3696, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 1.4596518144430948, |
|
"grad_norm": 2.2707676887512207, |
|
"learning_rate": 0.000515947617756596, |
|
"loss": 5.3763, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 1.4623154126446334, |
|
"grad_norm": 1.947204828262329, |
|
"learning_rate": 0.0005150575762332158, |
|
"loss": 5.3689, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 1.464979010846172, |
|
"grad_norm": 1.9428602457046509, |
|
"learning_rate": 0.0005141657510594883, |
|
"loss": 5.3797, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 1.4676426090477104, |
|
"grad_norm": 2.4003546237945557, |
|
"learning_rate": 0.0005132739258857608, |
|
"loss": 5.3672, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 1.4703062072492488, |
|
"grad_norm": 2.047048330307007, |
|
"learning_rate": 0.0005123821007120333, |
|
"loss": 5.3761, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 1.4729698054507874, |
|
"grad_norm": 2.0965404510498047, |
|
"learning_rate": 0.0005114920591886531, |
|
"loss": 5.3645, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 1.475633403652326, |
|
"grad_norm": 1.9648233652114868, |
|
"learning_rate": 0.0005106002340149257, |
|
"loss": 5.37, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 1.4782970018538644, |
|
"grad_norm": 1.8992446660995483, |
|
"learning_rate": 0.0005097084088411981, |
|
"loss": 5.3679, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 1.4809606000554028, |
|
"grad_norm": 2.125126838684082, |
|
"learning_rate": 0.0005088165836674705, |
|
"loss": 5.3702, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 1.4836241982569414, |
|
"grad_norm": 2.030409574508667, |
|
"learning_rate": 0.0005079265421440904, |
|
"loss": 5.3691, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 1.4862877964584797, |
|
"grad_norm": 1.9816679954528809, |
|
"learning_rate": 0.0005070347169703628, |
|
"loss": 5.3723, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 1.4889513946600184, |
|
"grad_norm": 2.032564401626587, |
|
"learning_rate": 0.0005061428917966354, |
|
"loss": 5.3695, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 1.4916149928615567, |
|
"grad_norm": 2.0342843532562256, |
|
"learning_rate": 0.0005052510666229078, |
|
"loss": 5.3681, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 1.4942785910630954, |
|
"grad_norm": 1.9113322496414185, |
|
"learning_rate": 0.0005043610250995278, |
|
"loss": 5.3713, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 1.4969421892646337, |
|
"grad_norm": 2.1201562881469727, |
|
"learning_rate": 0.0005034691999258002, |
|
"loss": 5.375, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 1.4996057874661723, |
|
"grad_norm": 2.1695244312286377, |
|
"learning_rate": 0.0005025773747520726, |
|
"loss": 5.3666, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 1.5022693856677107, |
|
"grad_norm": 2.2736222743988037, |
|
"learning_rate": 0.0005016873332286925, |
|
"loss": 5.3728, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 1.5049329838692493, |
|
"grad_norm": 1.9306550025939941, |
|
"learning_rate": 0.000500795508054965, |
|
"loss": 5.3607, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 1.507596582070788, |
|
"grad_norm": 1.970550537109375, |
|
"learning_rate": 0.0004999036828812375, |
|
"loss": 5.372, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 1.5102601802723261, |
|
"grad_norm": 1.7387876510620117, |
|
"learning_rate": 0.0004990118577075099, |
|
"loss": 5.3728, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 1.5129237784738647, |
|
"grad_norm": 2.364816188812256, |
|
"learning_rate": 0.0004981200325337823, |
|
"loss": 5.3667, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 1.5155873766754033, |
|
"grad_norm": 1.959367036819458, |
|
"learning_rate": 0.0004972282073600549, |
|
"loss": 5.3672, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 1.5182509748769417, |
|
"grad_norm": 2.4462456703186035, |
|
"learning_rate": 0.0004963363821863273, |
|
"loss": 5.3669, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 1.52091457307848, |
|
"grad_norm": 1.949645757675171, |
|
"learning_rate": 0.0004954445570125997, |
|
"loss": 5.3669, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 1.5235781712800187, |
|
"grad_norm": 2.0255677700042725, |
|
"learning_rate": 0.0004945545154892197, |
|
"loss": 5.3689, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 1.5262417694815573, |
|
"grad_norm": 2.0761642456054688, |
|
"learning_rate": 0.0004936644739658396, |
|
"loss": 5.3633, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 1.5289053676830957, |
|
"grad_norm": 2.1219048500061035, |
|
"learning_rate": 0.000492772648792112, |
|
"loss": 5.3617, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 1.531568965884634, |
|
"grad_norm": 1.83650803565979, |
|
"learning_rate": 0.0004918808236183844, |
|
"loss": 5.3735, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 1.5342325640861727, |
|
"grad_norm": 2.0275492668151855, |
|
"learning_rate": 0.0004909889984446568, |
|
"loss": 5.3636, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 1.5368961622877113, |
|
"grad_norm": 1.9854780435562134, |
|
"learning_rate": 0.0004900971732709294, |
|
"loss": 5.3595, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 1.5395597604892497, |
|
"grad_norm": 2.282017707824707, |
|
"learning_rate": 0.0004892053480972018, |
|
"loss": 5.3673, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 1.542223358690788, |
|
"grad_norm": 2.0435492992401123, |
|
"learning_rate": 0.0004883135229234743, |
|
"loss": 5.3771, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 1.5448869568923267, |
|
"grad_norm": 2.4702582359313965, |
|
"learning_rate": 0.0004874216977497467, |
|
"loss": 5.3592, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 1.5475505550938653, |
|
"grad_norm": 2.032315731048584, |
|
"learning_rate": 0.00048653165622636666, |
|
"loss": 5.3688, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 1.5502141532954037, |
|
"grad_norm": 2.13460636138916, |
|
"learning_rate": 0.0004856398310526391, |
|
"loss": 5.3624, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 1.552877751496942, |
|
"grad_norm": 1.9628610610961914, |
|
"learning_rate": 0.0004847480058789115, |
|
"loss": 5.3647, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 1.5555413496984807, |
|
"grad_norm": 1.8896455764770508, |
|
"learning_rate": 0.000483856180705184, |
|
"loss": 5.3693, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 1.5582049479000193, |
|
"grad_norm": 1.92352294921875, |
|
"learning_rate": 0.0004829661391818039, |
|
"loss": 5.3551, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 1.5608685461015577, |
|
"grad_norm": 2.061492919921875, |
|
"learning_rate": 0.0004820743140080764, |
|
"loss": 5.3618, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 1.563532144303096, |
|
"grad_norm": 2.0767364501953125, |
|
"learning_rate": 0.0004811842724846963, |
|
"loss": 5.3596, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 1.5661957425046347, |
|
"grad_norm": 2.103719472885132, |
|
"learning_rate": 0.00048029244731096876, |
|
"loss": 5.3547, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 1.5688593407061733, |
|
"grad_norm": 2.096832275390625, |
|
"learning_rate": 0.00047940062213724124, |
|
"loss": 5.3635, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 1.5715229389077117, |
|
"grad_norm": 2.053567409515381, |
|
"learning_rate": 0.0004785087969635137, |
|
"loss": 5.3683, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 1.57418653710925, |
|
"grad_norm": 2.040846586227417, |
|
"learning_rate": 0.00047761697178978616, |
|
"loss": 5.3623, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 1.5768501353107887, |
|
"grad_norm": 2.0361154079437256, |
|
"learning_rate": 0.0004767251466160586, |
|
"loss": 5.3572, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 1.5795137335123273, |
|
"grad_norm": 2.006989002227783, |
|
"learning_rate": 0.00047583332144233103, |
|
"loss": 5.3702, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 1.5821773317138657, |
|
"grad_norm": 2.0891811847686768, |
|
"learning_rate": 0.0004749414962686035, |
|
"loss": 5.3664, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 1.584840929915404, |
|
"grad_norm": 2.023730754852295, |
|
"learning_rate": 0.0004740514547452234, |
|
"loss": 5.3668, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 1.5875045281169426, |
|
"grad_norm": 1.8560234308242798, |
|
"learning_rate": 0.0004731596295714958, |
|
"loss": 5.3688, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 1.5901681263184813, |
|
"grad_norm": 1.84561288356781, |
|
"learning_rate": 0.0004722678043977683, |
|
"loss": 5.3595, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 1.5928317245200196, |
|
"grad_norm": 2.0453810691833496, |
|
"learning_rate": 0.0004713759792240407, |
|
"loss": 5.3612, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 1.595495322721558, |
|
"grad_norm": 2.03952956199646, |
|
"learning_rate": 0.0004704859377006607, |
|
"loss": 5.3595, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 1.5981589209230966, |
|
"grad_norm": 2.175218343734741, |
|
"learning_rate": 0.00046959411252693313, |
|
"loss": 5.3599, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 1.6008225191246352, |
|
"grad_norm": 1.9432867765426636, |
|
"learning_rate": 0.00046870228735320556, |
|
"loss": 5.3579, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 1.6034861173261736, |
|
"grad_norm": 2.0046420097351074, |
|
"learning_rate": 0.00046781046217947805, |
|
"loss": 5.3506, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 1.606149715527712, |
|
"grad_norm": 1.9781187772750854, |
|
"learning_rate": 0.00046692042065609796, |
|
"loss": 5.3585, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 1.6088133137292506, |
|
"grad_norm": 2.0884523391723633, |
|
"learning_rate": 0.0004660285954823704, |
|
"loss": 5.36, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 1.611476911930789, |
|
"grad_norm": 2.0299806594848633, |
|
"learning_rate": 0.0004651367703086429, |
|
"loss": 5.3609, |
|
"step": 302500 |
|
}, |
|
{ |
|
"epoch": 1.6141405101323274, |
|
"grad_norm": 2.0034475326538086, |
|
"learning_rate": 0.0004642449451349153, |
|
"loss": 5.3621, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 1.616804108333866, |
|
"grad_norm": 2.027804136276245, |
|
"learning_rate": 0.00046335490361153523, |
|
"loss": 5.3617, |
|
"step": 303500 |
|
}, |
|
{ |
|
"epoch": 1.6194677065354046, |
|
"grad_norm": 2.2879958152770996, |
|
"learning_rate": 0.0004624630784378077, |
|
"loss": 5.3597, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 1.622131304736943, |
|
"grad_norm": 2.0821385383605957, |
|
"learning_rate": 0.00046157125326408015, |
|
"loss": 5.3539, |
|
"step": 304500 |
|
}, |
|
{ |
|
"epoch": 1.6247949029384814, |
|
"grad_norm": 2.0150811672210693, |
|
"learning_rate": 0.00046067942809035263, |
|
"loss": 5.3568, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 1.62745850114002, |
|
"grad_norm": 1.944470763206482, |
|
"learning_rate": 0.0004597893865669725, |
|
"loss": 5.3618, |
|
"step": 305500 |
|
}, |
|
{ |
|
"epoch": 1.6301220993415586, |
|
"grad_norm": 1.8767342567443848, |
|
"learning_rate": 0.000458897561393245, |
|
"loss": 5.3572, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 1.632785697543097, |
|
"grad_norm": 2.100074291229248, |
|
"learning_rate": 0.0004580057362195174, |
|
"loss": 5.3557, |
|
"step": 306500 |
|
}, |
|
{ |
|
"epoch": 1.6354492957446354, |
|
"grad_norm": 1.8953720331192017, |
|
"learning_rate": 0.00045711569469613733, |
|
"loss": 5.3603, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 1.638112893946174, |
|
"grad_norm": 2.099968433380127, |
|
"learning_rate": 0.0004562238695224098, |
|
"loss": 5.3459, |
|
"step": 307500 |
|
}, |
|
{ |
|
"epoch": 1.6407764921477126, |
|
"grad_norm": 2.21608567237854, |
|
"learning_rate": 0.00045533204434868225, |
|
"loss": 5.3602, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 1.643440090349251, |
|
"grad_norm": 2.0884177684783936, |
|
"learning_rate": 0.0004544402191749547, |
|
"loss": 5.3538, |
|
"step": 308500 |
|
}, |
|
{ |
|
"epoch": 1.6461036885507894, |
|
"grad_norm": 2.0560896396636963, |
|
"learning_rate": 0.00045354839400122717, |
|
"loss": 5.3618, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 1.648767286752328, |
|
"grad_norm": 2.3166544437408447, |
|
"learning_rate": 0.0004526565688274996, |
|
"loss": 5.3446, |
|
"step": 309500 |
|
}, |
|
{ |
|
"epoch": 1.6514308849538666, |
|
"grad_norm": 1.9376626014709473, |
|
"learning_rate": 0.0004517647436537721, |
|
"loss": 5.3565, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 1.654094483155405, |
|
"grad_norm": 1.8356984853744507, |
|
"learning_rate": 0.0004508729184800445, |
|
"loss": 5.3585, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 1.6567580813569434, |
|
"grad_norm": 2.0316951274871826, |
|
"learning_rate": 0.00044998287695666443, |
|
"loss": 5.3615, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 1.659421679558482, |
|
"grad_norm": 2.1165359020233154, |
|
"learning_rate": 0.00044909283543328435, |
|
"loss": 5.357, |
|
"step": 311500 |
|
}, |
|
{ |
|
"epoch": 1.6620852777600206, |
|
"grad_norm": 2.1769607067108154, |
|
"learning_rate": 0.0004482010102595568, |
|
"loss": 5.3567, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 1.664748875961559, |
|
"grad_norm": 2.0454256534576416, |
|
"learning_rate": 0.0004473091850858292, |
|
"loss": 5.3573, |
|
"step": 312500 |
|
}, |
|
{ |
|
"epoch": 1.6674124741630973, |
|
"grad_norm": 2.1431968212127686, |
|
"learning_rate": 0.0004464173599121017, |
|
"loss": 5.3509, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 1.670076072364636, |
|
"grad_norm": 2.0397841930389404, |
|
"learning_rate": 0.00044552553473837413, |
|
"loss": 5.3532, |
|
"step": 313500 |
|
}, |
|
{ |
|
"epoch": 1.6727396705661746, |
|
"grad_norm": 2.080476999282837, |
|
"learning_rate": 0.0004446337095646467, |
|
"loss": 5.3558, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 1.675403268767713, |
|
"grad_norm": 1.9653671979904175, |
|
"learning_rate": 0.0004437418843909191, |
|
"loss": 5.3481, |
|
"step": 314500 |
|
}, |
|
{ |
|
"epoch": 1.6780668669692513, |
|
"grad_norm": 2.2119712829589844, |
|
"learning_rate": 0.0004428500592171916, |
|
"loss": 5.3555, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 1.68073046517079, |
|
"grad_norm": 1.990404486656189, |
|
"learning_rate": 0.00044196001769381145, |
|
"loss": 5.3567, |
|
"step": 315500 |
|
}, |
|
{ |
|
"epoch": 1.6833940633723286, |
|
"grad_norm": 2.0500054359436035, |
|
"learning_rate": 0.0004410681925200839, |
|
"loss": 5.3503, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 1.686057661573867, |
|
"grad_norm": 2.205277919769287, |
|
"learning_rate": 0.00044017636734635637, |
|
"loss": 5.3553, |
|
"step": 316500 |
|
}, |
|
{ |
|
"epoch": 1.6887212597754053, |
|
"grad_norm": 1.9659850597381592, |
|
"learning_rate": 0.0004392845421726288, |
|
"loss": 5.3456, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 1.691384857976944, |
|
"grad_norm": 2.029604196548462, |
|
"learning_rate": 0.0004383927169989013, |
|
"loss": 5.3554, |
|
"step": 317500 |
|
}, |
|
{ |
|
"epoch": 1.6940484561784825, |
|
"grad_norm": 2.041193723678589, |
|
"learning_rate": 0.0004375008918251737, |
|
"loss": 5.3534, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 1.696712054380021, |
|
"grad_norm": 2.068268299102783, |
|
"learning_rate": 0.00043661085030179364, |
|
"loss": 5.3564, |
|
"step": 318500 |
|
}, |
|
{ |
|
"epoch": 1.6993756525815593, |
|
"grad_norm": 2.0078883171081543, |
|
"learning_rate": 0.0004357190251280661, |
|
"loss": 5.3518, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 1.702039250783098, |
|
"grad_norm": 1.9186288118362427, |
|
"learning_rate": 0.00043482719995433856, |
|
"loss": 5.3471, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 1.7047028489846365, |
|
"grad_norm": 2.0289323329925537, |
|
"learning_rate": 0.000433935374780611, |
|
"loss": 5.3513, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 1.7073664471861747, |
|
"grad_norm": 1.69050133228302, |
|
"learning_rate": 0.0004330435496068835, |
|
"loss": 5.3513, |
|
"step": 320500 |
|
}, |
|
{ |
|
"epoch": 1.7100300453877133, |
|
"grad_norm": 2.0047898292541504, |
|
"learning_rate": 0.0004321517244331559, |
|
"loss": 5.3531, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 1.712693643589252, |
|
"grad_norm": 2.1100831031799316, |
|
"learning_rate": 0.0004312616829097759, |
|
"loss": 5.3494, |
|
"step": 321500 |
|
}, |
|
{ |
|
"epoch": 1.7153572417907903, |
|
"grad_norm": 2.053802013397217, |
|
"learning_rate": 0.0004303698577360483, |
|
"loss": 5.3573, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 1.7180208399923287, |
|
"grad_norm": 1.9370436668395996, |
|
"learning_rate": 0.00042947803256232074, |
|
"loss": 5.3457, |
|
"step": 322500 |
|
}, |
|
{ |
|
"epoch": 1.7206844381938673, |
|
"grad_norm": 2.062244176864624, |
|
"learning_rate": 0.00042858620738859323, |
|
"loss": 5.3532, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 1.723348036395406, |
|
"grad_norm": 2.129863739013672, |
|
"learning_rate": 0.00042769438221486566, |
|
"loss": 5.3469, |
|
"step": 323500 |
|
}, |
|
{ |
|
"epoch": 1.7260116345969443, |
|
"grad_norm": 2.1496474742889404, |
|
"learning_rate": 0.0004268043406914855, |
|
"loss": 5.3494, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 1.7286752327984827, |
|
"grad_norm": 2.0887863636016846, |
|
"learning_rate": 0.00042591251551775806, |
|
"loss": 5.3483, |
|
"step": 324500 |
|
}, |
|
{ |
|
"epoch": 1.7313388310000213, |
|
"grad_norm": 2.4094293117523193, |
|
"learning_rate": 0.0004250206903440305, |
|
"loss": 5.3485, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 1.73400242920156, |
|
"grad_norm": 2.046931266784668, |
|
"learning_rate": 0.000424128865170303, |
|
"loss": 5.345, |
|
"step": 325500 |
|
}, |
|
{ |
|
"epoch": 1.7366660274030983, |
|
"grad_norm": 2.1520516872406006, |
|
"learning_rate": 0.0004232370399965754, |
|
"loss": 5.351, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 1.7393296256046367, |
|
"grad_norm": 2.006589651107788, |
|
"learning_rate": 0.0004223469984731953, |
|
"loss": 5.3511, |
|
"step": 326500 |
|
}, |
|
{ |
|
"epoch": 1.7419932238061753, |
|
"grad_norm": 1.9035310745239258, |
|
"learning_rate": 0.00042145517329946776, |
|
"loss": 5.3457, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 1.7446568220077139, |
|
"grad_norm": 2.0777719020843506, |
|
"learning_rate": 0.0004205633481257402, |
|
"loss": 5.3519, |
|
"step": 327500 |
|
}, |
|
{ |
|
"epoch": 1.7473204202092523, |
|
"grad_norm": 2.2958412170410156, |
|
"learning_rate": 0.0004196715229520127, |
|
"loss": 5.3455, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 1.7499840184107907, |
|
"grad_norm": 2.3482723236083984, |
|
"learning_rate": 0.0004187796977782851, |
|
"loss": 5.3513, |
|
"step": 328500 |
|
}, |
|
{ |
|
"epoch": 1.7526476166123293, |
|
"grad_norm": 2.4552931785583496, |
|
"learning_rate": 0.00041788787260455755, |
|
"loss": 5.3496, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 1.7553112148138679, |
|
"grad_norm": 2.0816726684570312, |
|
"learning_rate": 0.00041699604743083003, |
|
"loss": 5.3434, |
|
"step": 329500 |
|
}, |
|
{ |
|
"epoch": 1.7579748130154063, |
|
"grad_norm": 1.869194746017456, |
|
"learning_rate": 0.00041610600590744995, |
|
"loss": 5.349, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 1.7606384112169446, |
|
"grad_norm": 2.020172595977783, |
|
"learning_rate": 0.00041521418073372243, |
|
"loss": 5.3489, |
|
"step": 330500 |
|
}, |
|
{ |
|
"epoch": 1.7633020094184833, |
|
"grad_norm": 2.1260483264923096, |
|
"learning_rate": 0.00041432235555999487, |
|
"loss": 5.3523, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 1.7659656076200219, |
|
"grad_norm": 2.1546857357025146, |
|
"learning_rate": 0.0004134305303862673, |
|
"loss": 5.3414, |
|
"step": 331500 |
|
}, |
|
{ |
|
"epoch": 1.7686292058215602, |
|
"grad_norm": 2.2955052852630615, |
|
"learning_rate": 0.0004125387052125398, |
|
"loss": 5.3489, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 1.7712928040230986, |
|
"grad_norm": 2.0505149364471436, |
|
"learning_rate": 0.0004116468800388122, |
|
"loss": 5.3543, |
|
"step": 332500 |
|
}, |
|
{ |
|
"epoch": 1.7739564022246372, |
|
"grad_norm": 1.9976879358291626, |
|
"learning_rate": 0.0004107550548650847, |
|
"loss": 5.3455, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 1.7766200004261758, |
|
"grad_norm": 2.1872785091400146, |
|
"learning_rate": 0.00040986322969135714, |
|
"loss": 5.345, |
|
"step": 333500 |
|
}, |
|
{ |
|
"epoch": 1.7792835986277142, |
|
"grad_norm": 2.025681257247925, |
|
"learning_rate": 0.00040897318816797705, |
|
"loss": 5.3559, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 1.7819471968292526, |
|
"grad_norm": 2.051701307296753, |
|
"learning_rate": 0.00040808136299424954, |
|
"loss": 5.3424, |
|
"step": 334500 |
|
}, |
|
{ |
|
"epoch": 1.7846107950307912, |
|
"grad_norm": 2.161292314529419, |
|
"learning_rate": 0.00040718953782052197, |
|
"loss": 5.3418, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 1.7872743932323298, |
|
"grad_norm": 2.1306283473968506, |
|
"learning_rate": 0.00040629771264679446, |
|
"loss": 5.352, |
|
"step": 335500 |
|
}, |
|
{ |
|
"epoch": 1.7899379914338682, |
|
"grad_norm": 2.1994986534118652, |
|
"learning_rate": 0.00040540767112341437, |
|
"loss": 5.348, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 1.7926015896354066, |
|
"grad_norm": 2.3227968215942383, |
|
"learning_rate": 0.00040451762960003423, |
|
"loss": 5.3444, |
|
"step": 336500 |
|
}, |
|
{ |
|
"epoch": 1.7952651878369452, |
|
"grad_norm": 2.1397862434387207, |
|
"learning_rate": 0.0004036258044263067, |
|
"loss": 5.3556, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 1.7979287860384838, |
|
"grad_norm": 2.0676870346069336, |
|
"learning_rate": 0.00040273397925257915, |
|
"loss": 5.3471, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 1.8005923842400222, |
|
"grad_norm": 2.2523062229156494, |
|
"learning_rate": 0.0004018421540788516, |
|
"loss": 5.3431, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 1.8032559824415606, |
|
"grad_norm": 2.1115000247955322, |
|
"learning_rate": 0.00040095211255547155, |
|
"loss": 5.3467, |
|
"step": 338500 |
|
}, |
|
{ |
|
"epoch": 1.8059195806430992, |
|
"grad_norm": 2.0157132148742676, |
|
"learning_rate": 0.000400060287381744, |
|
"loss": 5.3462, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 1.8085831788446376, |
|
"grad_norm": 2.1384365558624268, |
|
"learning_rate": 0.0003991684622080165, |
|
"loss": 5.3381, |
|
"step": 339500 |
|
}, |
|
{ |
|
"epoch": 1.811246777046176, |
|
"grad_norm": 2.016707420349121, |
|
"learning_rate": 0.0003982766370342889, |
|
"loss": 5.3424, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 1.8139103752477146, |
|
"grad_norm": 1.9890104532241821, |
|
"learning_rate": 0.00039738481186056134, |
|
"loss": 5.3459, |
|
"step": 340500 |
|
}, |
|
{ |
|
"epoch": 1.8165739734492532, |
|
"grad_norm": 1.997981309890747, |
|
"learning_rate": 0.0003964947703371813, |
|
"loss": 5.3415, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 1.8192375716507916, |
|
"grad_norm": 2.077340602874756, |
|
"learning_rate": 0.00039560294516345374, |
|
"loss": 5.3401, |
|
"step": 341500 |
|
}, |
|
{ |
|
"epoch": 1.82190116985233, |
|
"grad_norm": 1.9495571851730347, |
|
"learning_rate": 0.00039471111998972617, |
|
"loss": 5.3461, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 1.8245647680538686, |
|
"grad_norm": 2.086167097091675, |
|
"learning_rate": 0.00039381929481599866, |
|
"loss": 5.3457, |
|
"step": 342500 |
|
}, |
|
{ |
|
"epoch": 1.8272283662554072, |
|
"grad_norm": 1.9157156944274902, |
|
"learning_rate": 0.0003929274696422711, |
|
"loss": 5.3374, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 1.8298919644569456, |
|
"grad_norm": 2.2283830642700195, |
|
"learning_rate": 0.0003920356444685436, |
|
"loss": 5.3403, |
|
"step": 343500 |
|
}, |
|
{ |
|
"epoch": 1.832555562658484, |
|
"grad_norm": 2.155780553817749, |
|
"learning_rate": 0.00039114560294516344, |
|
"loss": 5.3403, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 1.8352191608600226, |
|
"grad_norm": 2.0122015476226807, |
|
"learning_rate": 0.00039025377777143587, |
|
"loss": 5.3485, |
|
"step": 344500 |
|
}, |
|
{ |
|
"epoch": 1.8378827590615612, |
|
"grad_norm": 2.1252944469451904, |
|
"learning_rate": 0.00038936195259770836, |
|
"loss": 5.3534, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 1.8405463572630996, |
|
"grad_norm": 2.16573166847229, |
|
"learning_rate": 0.00038847012742398084, |
|
"loss": 5.3407, |
|
"step": 345500 |
|
}, |
|
{ |
|
"epoch": 1.843209955464638, |
|
"grad_norm": 2.043785810470581, |
|
"learning_rate": 0.0003875800859006007, |
|
"loss": 5.3441, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 1.8458735536661766, |
|
"grad_norm": 2.0578818321228027, |
|
"learning_rate": 0.0003866882607268732, |
|
"loss": 5.344, |
|
"step": 346500 |
|
}, |
|
{ |
|
"epoch": 1.8485371518677152, |
|
"grad_norm": 2.344649076461792, |
|
"learning_rate": 0.0003857964355531456, |
|
"loss": 5.3401, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 1.8512007500692536, |
|
"grad_norm": 2.2246205806732178, |
|
"learning_rate": 0.0003849046103794181, |
|
"loss": 5.3474, |
|
"step": 347500 |
|
}, |
|
{ |
|
"epoch": 1.853864348270792, |
|
"grad_norm": 2.3041775226593018, |
|
"learning_rate": 0.00038401278520569054, |
|
"loss": 5.3403, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 1.8565279464723305, |
|
"grad_norm": 2.0579144954681396, |
|
"learning_rate": 0.00038312096003196303, |
|
"loss": 5.3388, |
|
"step": 348500 |
|
}, |
|
{ |
|
"epoch": 1.8591915446738692, |
|
"grad_norm": 2.1944098472595215, |
|
"learning_rate": 0.00038223091850858294, |
|
"loss": 5.3412, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 1.8618551428754075, |
|
"grad_norm": 2.0834217071533203, |
|
"learning_rate": 0.0003813390933348554, |
|
"loss": 5.3465, |
|
"step": 349500 |
|
}, |
|
{ |
|
"epoch": 1.864518741076946, |
|
"grad_norm": 1.9777040481567383, |
|
"learning_rate": 0.00038044726816112786, |
|
"loss": 5.3394, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 1.8671823392784845, |
|
"grad_norm": 2.341625690460205, |
|
"learning_rate": 0.0003795554429874003, |
|
"loss": 5.3414, |
|
"step": 350500 |
|
}, |
|
{ |
|
"epoch": 1.8698459374800231, |
|
"grad_norm": 1.9645224809646606, |
|
"learning_rate": 0.0003786636178136728, |
|
"loss": 5.3429, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 1.8725095356815615, |
|
"grad_norm": 2.217845916748047, |
|
"learning_rate": 0.0003777717926399452, |
|
"loss": 5.3485, |
|
"step": 351500 |
|
}, |
|
{ |
|
"epoch": 1.8751731338831, |
|
"grad_norm": 2.2836930751800537, |
|
"learning_rate": 0.00037687996746621765, |
|
"loss": 5.3369, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 1.8778367320846385, |
|
"grad_norm": 2.1809890270233154, |
|
"learning_rate": 0.00037598814229249013, |
|
"loss": 5.3375, |
|
"step": 352500 |
|
}, |
|
{ |
|
"epoch": 1.8805003302861771, |
|
"grad_norm": 2.4111125469207764, |
|
"learning_rate": 0.00037509810076911005, |
|
"loss": 5.3453, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 1.8831639284877155, |
|
"grad_norm": 2.264157295227051, |
|
"learning_rate": 0.0003742062755953825, |
|
"loss": 5.3412, |
|
"step": 353500 |
|
}, |
|
{ |
|
"epoch": 1.885827526689254, |
|
"grad_norm": 2.232529878616333, |
|
"learning_rate": 0.00037331445042165497, |
|
"loss": 5.3481, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 1.8884911248907925, |
|
"grad_norm": 2.0301549434661865, |
|
"learning_rate": 0.00037242440889827483, |
|
"loss": 5.3351, |
|
"step": 354500 |
|
}, |
|
{ |
|
"epoch": 1.8911547230923311, |
|
"grad_norm": 2.040621757507324, |
|
"learning_rate": 0.0003715325837245473, |
|
"loss": 5.3442, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 1.8938183212938695, |
|
"grad_norm": 2.085535764694214, |
|
"learning_rate": 0.0003706407585508198, |
|
"loss": 5.3302, |
|
"step": 355500 |
|
}, |
|
{ |
|
"epoch": 1.896481919495408, |
|
"grad_norm": 2.1077394485473633, |
|
"learning_rate": 0.00036974893337709223, |
|
"loss": 5.3383, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 1.8991455176969465, |
|
"grad_norm": 2.242241621017456, |
|
"learning_rate": 0.0003688571082033647, |
|
"loss": 5.3315, |
|
"step": 356500 |
|
}, |
|
{ |
|
"epoch": 1.901809115898485, |
|
"grad_norm": 2.2890877723693848, |
|
"learning_rate": 0.00036796528302963715, |
|
"loss": 5.3378, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 1.9044727141000233, |
|
"grad_norm": 2.3517234325408936, |
|
"learning_rate": 0.000367075241506257, |
|
"loss": 5.3369, |
|
"step": 357500 |
|
}, |
|
{ |
|
"epoch": 1.9071363123015619, |
|
"grad_norm": 2.3767483234405518, |
|
"learning_rate": 0.0003661834163325295, |
|
"loss": 5.3365, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 1.9097999105031005, |
|
"grad_norm": 2.2238335609436035, |
|
"learning_rate": 0.00036529159115880193, |
|
"loss": 5.3353, |
|
"step": 358500 |
|
}, |
|
{ |
|
"epoch": 1.9124635087046389, |
|
"grad_norm": 2.0594356060028076, |
|
"learning_rate": 0.0003643997659850744, |
|
"loss": 5.3346, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 1.9151271069061773, |
|
"grad_norm": 2.1106550693511963, |
|
"learning_rate": 0.00036350794081134685, |
|
"loss": 5.3317, |
|
"step": 359500 |
|
}, |
|
{ |
|
"epoch": 1.9177907051077159, |
|
"grad_norm": 2.0819623470306396, |
|
"learning_rate": 0.00036261611563761934, |
|
"loss": 5.332, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 1.9204543033092545, |
|
"grad_norm": 1.9421486854553223, |
|
"learning_rate": 0.00036172607411423925, |
|
"loss": 5.3425, |
|
"step": 360500 |
|
}, |
|
{ |
|
"epoch": 1.9231179015107929, |
|
"grad_norm": 2.304370641708374, |
|
"learning_rate": 0.0003608342489405117, |
|
"loss": 5.3278, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 1.9257814997123313, |
|
"grad_norm": 1.9409058094024658, |
|
"learning_rate": 0.00035994242376678417, |
|
"loss": 5.3364, |
|
"step": 361500 |
|
}, |
|
{ |
|
"epoch": 1.9284450979138699, |
|
"grad_norm": 2.199068307876587, |
|
"learning_rate": 0.0003590505985930566, |
|
"loss": 5.3375, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 1.9311086961154085, |
|
"grad_norm": 2.4809699058532715, |
|
"learning_rate": 0.0003581587734193291, |
|
"loss": 5.3304, |
|
"step": 362500 |
|
}, |
|
{ |
|
"epoch": 1.9337722943169469, |
|
"grad_norm": 1.8762375116348267, |
|
"learning_rate": 0.000357268731895949, |
|
"loss": 5.3396, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 1.9364358925184852, |
|
"grad_norm": 2.14876651763916, |
|
"learning_rate": 0.00035637690672222144, |
|
"loss": 5.3295, |
|
"step": 363500 |
|
}, |
|
{ |
|
"epoch": 1.9390994907200239, |
|
"grad_norm": 2.0710737705230713, |
|
"learning_rate": 0.0003554850815484939, |
|
"loss": 5.3319, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 1.9417630889215625, |
|
"grad_norm": 2.1879022121429443, |
|
"learning_rate": 0.00035459325637476636, |
|
"loss": 5.3353, |
|
"step": 364500 |
|
}, |
|
{ |
|
"epoch": 1.9444266871231008, |
|
"grad_norm": 2.2101471424102783, |
|
"learning_rate": 0.0003537014312010388, |
|
"loss": 5.3365, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 1.9470902853246392, |
|
"grad_norm": 2.1538619995117188, |
|
"learning_rate": 0.0003528113896776587, |
|
"loss": 5.3345, |
|
"step": 365500 |
|
}, |
|
{ |
|
"epoch": 1.9497538835261778, |
|
"grad_norm": 2.3958141803741455, |
|
"learning_rate": 0.0003519195645039312, |
|
"loss": 5.3298, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 1.9524174817277165, |
|
"grad_norm": 2.2059667110443115, |
|
"learning_rate": 0.0003510277393302037, |
|
"loss": 5.3228, |
|
"step": 366500 |
|
}, |
|
{ |
|
"epoch": 1.9550810799292548, |
|
"grad_norm": 2.0048577785491943, |
|
"learning_rate": 0.0003501359141564761, |
|
"loss": 5.3336, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 1.9577446781307932, |
|
"grad_norm": 2.0165789127349854, |
|
"learning_rate": 0.00034924408898274854, |
|
"loss": 5.3342, |
|
"step": 367500 |
|
}, |
|
{ |
|
"epoch": 1.9604082763323318, |
|
"grad_norm": 2.2053885459899902, |
|
"learning_rate": 0.00034835226380902103, |
|
"loss": 5.3359, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 1.9630718745338704, |
|
"grad_norm": 2.316288948059082, |
|
"learning_rate": 0.0003474622222856409, |
|
"loss": 5.3344, |
|
"step": 368500 |
|
}, |
|
{ |
|
"epoch": 1.9657354727354088, |
|
"grad_norm": 2.385871410369873, |
|
"learning_rate": 0.0003465703971119133, |
|
"loss": 5.3364, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 1.9683990709369472, |
|
"grad_norm": 2.3206396102905273, |
|
"learning_rate": 0.0003456785719381858, |
|
"loss": 5.3309, |
|
"step": 369500 |
|
}, |
|
{ |
|
"epoch": 1.9710626691384858, |
|
"grad_norm": 2.172229766845703, |
|
"learning_rate": 0.00034478674676445824, |
|
"loss": 5.3338, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 1.9737262673400244, |
|
"grad_norm": 2.3812954425811768, |
|
"learning_rate": 0.0003438967052410782, |
|
"loss": 5.3306, |
|
"step": 370500 |
|
}, |
|
{ |
|
"epoch": 1.9763898655415628, |
|
"grad_norm": 2.1423757076263428, |
|
"learning_rate": 0.00034300488006735064, |
|
"loss": 5.3406, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 1.9790534637431012, |
|
"grad_norm": 2.2044973373413086, |
|
"learning_rate": 0.0003421130548936231, |
|
"loss": 5.3371, |
|
"step": 371500 |
|
}, |
|
{ |
|
"epoch": 1.9817170619446398, |
|
"grad_norm": 1.944014549255371, |
|
"learning_rate": 0.00034122122971989556, |
|
"loss": 5.3348, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 1.9843806601461784, |
|
"grad_norm": 2.3091371059417725, |
|
"learning_rate": 0.000340329404546168, |
|
"loss": 5.3283, |
|
"step": 372500 |
|
}, |
|
{ |
|
"epoch": 1.9870442583477168, |
|
"grad_norm": 2.600417137145996, |
|
"learning_rate": 0.0003394375793724405, |
|
"loss": 5.3292, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 1.9897078565492552, |
|
"grad_norm": 2.0236728191375732, |
|
"learning_rate": 0.0003385457541987129, |
|
"loss": 5.3353, |
|
"step": 373500 |
|
}, |
|
{ |
|
"epoch": 1.9923714547507938, |
|
"grad_norm": 2.298342227935791, |
|
"learning_rate": 0.00033765392902498535, |
|
"loss": 5.3355, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 1.9950350529523324, |
|
"grad_norm": 1.945620059967041, |
|
"learning_rate": 0.0003367638875016053, |
|
"loss": 5.3302, |
|
"step": 374500 |
|
}, |
|
{ |
|
"epoch": 1.9976986511538706, |
|
"grad_norm": 2.1642651557922363, |
|
"learning_rate": 0.0003358738459782252, |
|
"loss": 5.3259, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 2.000362249355409, |
|
"grad_norm": 2.149771213531494, |
|
"learning_rate": 0.0003349820208044976, |
|
"loss": 5.3347, |
|
"step": 375500 |
|
}, |
|
{ |
|
"epoch": 2.003025847556948, |
|
"grad_norm": 2.2164316177368164, |
|
"learning_rate": 0.0003340901956307701, |
|
"loss": 5.3308, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 2.0056894457584864, |
|
"grad_norm": 2.2055323123931885, |
|
"learning_rate": 0.0003331983704570426, |
|
"loss": 5.332, |
|
"step": 376500 |
|
}, |
|
{ |
|
"epoch": 2.0083530439600246, |
|
"grad_norm": 2.1814560890197754, |
|
"learning_rate": 0.00033230654528331507, |
|
"loss": 5.3239, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 2.011016642161563, |
|
"grad_norm": 2.1237363815307617, |
|
"learning_rate": 0.0003314147201095875, |
|
"loss": 5.3364, |
|
"step": 377500 |
|
}, |
|
{ |
|
"epoch": 2.013680240363102, |
|
"grad_norm": 2.1073851585388184, |
|
"learning_rate": 0.00033052467858620736, |
|
"loss": 5.3209, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 2.0163438385646404, |
|
"grad_norm": 1.9759477376937866, |
|
"learning_rate": 0.00032963285341247985, |
|
"loss": 5.3272, |
|
"step": 378500 |
|
}, |
|
{ |
|
"epoch": 2.0190074367661786, |
|
"grad_norm": 2.100966691970825, |
|
"learning_rate": 0.0003287410282387523, |
|
"loss": 5.3226, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 2.021671034967717, |
|
"grad_norm": 2.141537666320801, |
|
"learning_rate": 0.00032784920306502477, |
|
"loss": 5.3305, |
|
"step": 379500 |
|
}, |
|
{ |
|
"epoch": 2.0243346331692558, |
|
"grad_norm": 2.2714550495147705, |
|
"learning_rate": 0.0003269573778912972, |
|
"loss": 5.3335, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 2.0269982313707944, |
|
"grad_norm": 2.1945018768310547, |
|
"learning_rate": 0.0003260673363679171, |
|
"loss": 5.3267, |
|
"step": 380500 |
|
}, |
|
{ |
|
"epoch": 2.0296618295723325, |
|
"grad_norm": 2.269015312194824, |
|
"learning_rate": 0.0003251755111941896, |
|
"loss": 5.3346, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 2.032325427773871, |
|
"grad_norm": 2.194460391998291, |
|
"learning_rate": 0.00032428368602046203, |
|
"loss": 5.3216, |
|
"step": 381500 |
|
}, |
|
{ |
|
"epoch": 2.0349890259754098, |
|
"grad_norm": 2.1248984336853027, |
|
"learning_rate": 0.0003233918608467345, |
|
"loss": 5.3294, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 2.0376526241769484, |
|
"grad_norm": 2.213801622390747, |
|
"learning_rate": 0.00032250003567300695, |
|
"loss": 5.3282, |
|
"step": 382500 |
|
}, |
|
{ |
|
"epoch": 2.0403162223784865, |
|
"grad_norm": 2.0801334381103516, |
|
"learning_rate": 0.0003216082104992794, |
|
"loss": 5.3293, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 2.042979820580025, |
|
"grad_norm": 2.191882371902466, |
|
"learning_rate": 0.00032071816897589935, |
|
"loss": 5.3297, |
|
"step": 383500 |
|
}, |
|
{ |
|
"epoch": 2.0456434187815637, |
|
"grad_norm": 2.238471031188965, |
|
"learning_rate": 0.0003198263438021718, |
|
"loss": 5.3274, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 2.0483070169831024, |
|
"grad_norm": 2.0454585552215576, |
|
"learning_rate": 0.0003189345186284443, |
|
"loss": 5.3335, |
|
"step": 384500 |
|
}, |
|
{ |
|
"epoch": 2.0509706151846405, |
|
"grad_norm": 2.449857473373413, |
|
"learning_rate": 0.0003180426934547167, |
|
"loss": 5.3243, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 2.053634213386179, |
|
"grad_norm": 2.182969331741333, |
|
"learning_rate": 0.00031715265193133657, |
|
"loss": 5.3239, |
|
"step": 385500 |
|
}, |
|
{ |
|
"epoch": 2.0562978115877177, |
|
"grad_norm": 2.3800108432769775, |
|
"learning_rate": 0.00031626082675760905, |
|
"loss": 5.3263, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 2.058961409789256, |
|
"grad_norm": 2.4917428493499756, |
|
"learning_rate": 0.0003153690015838815, |
|
"loss": 5.3252, |
|
"step": 386500 |
|
}, |
|
{ |
|
"epoch": 2.0616250079907945, |
|
"grad_norm": 2.25253963470459, |
|
"learning_rate": 0.00031447717641015397, |
|
"loss": 5.3323, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 2.064288606192333, |
|
"grad_norm": 2.1959807872772217, |
|
"learning_rate": 0.00031358535123642646, |
|
"loss": 5.3257, |
|
"step": 387500 |
|
}, |
|
{ |
|
"epoch": 2.0669522043938717, |
|
"grad_norm": 2.202449321746826, |
|
"learning_rate": 0.0003126935260626989, |
|
"loss": 5.3256, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 2.06961580259541, |
|
"grad_norm": 2.093303918838501, |
|
"learning_rate": 0.0003118017008889714, |
|
"loss": 5.3302, |
|
"step": 388500 |
|
}, |
|
{ |
|
"epoch": 2.0722794007969485, |
|
"grad_norm": 2.139282464981079, |
|
"learning_rate": 0.0003109098757152438, |
|
"loss": 5.3298, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 2.074942998998487, |
|
"grad_norm": 2.004852533340454, |
|
"learning_rate": 0.00031001983419186367, |
|
"loss": 5.3329, |
|
"step": 389500 |
|
}, |
|
{ |
|
"epoch": 2.0776065972000257, |
|
"grad_norm": 2.385274648666382, |
|
"learning_rate": 0.00030912800901813616, |
|
"loss": 5.3266, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 2.080270195401564, |
|
"grad_norm": 2.218735456466675, |
|
"learning_rate": 0.0003082361838444086, |
|
"loss": 5.329, |
|
"step": 390500 |
|
}, |
|
{ |
|
"epoch": 2.0829337936031025, |
|
"grad_norm": 2.271380662918091, |
|
"learning_rate": 0.0003073443586706811, |
|
"loss": 5.3239, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 2.085597391804641, |
|
"grad_norm": 2.526583433151245, |
|
"learning_rate": 0.000306454317147301, |
|
"loss": 5.3287, |
|
"step": 391500 |
|
}, |
|
{ |
|
"epoch": 2.0882609900061797, |
|
"grad_norm": 2.1075544357299805, |
|
"learning_rate": 0.0003055624919735734, |
|
"loss": 5.3264, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 2.090924588207718, |
|
"grad_norm": 2.0297112464904785, |
|
"learning_rate": 0.0003046706667998459, |
|
"loss": 5.3279, |
|
"step": 392500 |
|
}, |
|
{ |
|
"epoch": 2.0935881864092565, |
|
"grad_norm": 2.0166475772857666, |
|
"learning_rate": 0.00030377884162611834, |
|
"loss": 5.3279, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 2.096251784610795, |
|
"grad_norm": 2.398573398590088, |
|
"learning_rate": 0.00030288880010273826, |
|
"loss": 5.325, |
|
"step": 393500 |
|
}, |
|
{ |
|
"epoch": 2.0989153828123337, |
|
"grad_norm": 2.2096564769744873, |
|
"learning_rate": 0.00030199697492901075, |
|
"loss": 5.3241, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 2.101578981013872, |
|
"grad_norm": 2.2474560737609863, |
|
"learning_rate": 0.0003011051497552832, |
|
"loss": 5.3232, |
|
"step": 394500 |
|
}, |
|
{ |
|
"epoch": 2.1042425792154105, |
|
"grad_norm": 2.2487635612487793, |
|
"learning_rate": 0.00030021332458155566, |
|
"loss": 5.3191, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 2.106906177416949, |
|
"grad_norm": 2.094921112060547, |
|
"learning_rate": 0.0002993214994078281, |
|
"loss": 5.3354, |
|
"step": 395500 |
|
}, |
|
{ |
|
"epoch": 2.1095697756184877, |
|
"grad_norm": 2.2288858890533447, |
|
"learning_rate": 0.00029843145788444796, |
|
"loss": 5.3254, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 2.112233373820026, |
|
"grad_norm": 2.166731595993042, |
|
"learning_rate": 0.00029753963271072044, |
|
"loss": 5.3239, |
|
"step": 396500 |
|
}, |
|
{ |
|
"epoch": 2.1148969720215645, |
|
"grad_norm": 2.05653715133667, |
|
"learning_rate": 0.00029664780753699293, |
|
"loss": 5.3305, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 2.117560570223103, |
|
"grad_norm": 2.08963942527771, |
|
"learning_rate": 0.0002957559823632654, |
|
"loss": 5.3255, |
|
"step": 397500 |
|
}, |
|
{ |
|
"epoch": 2.1202241684246417, |
|
"grad_norm": 2.268559217453003, |
|
"learning_rate": 0.0002948659408398853, |
|
"loss": 5.3238, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 2.12288776662618, |
|
"grad_norm": 2.9195141792297363, |
|
"learning_rate": 0.0002939741156661577, |
|
"loss": 5.3211, |
|
"step": 398500 |
|
}, |
|
{ |
|
"epoch": 2.1255513648277184, |
|
"grad_norm": 2.2552900314331055, |
|
"learning_rate": 0.0002930822904924302, |
|
"loss": 5.3251, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 2.128214963029257, |
|
"grad_norm": 2.294832706451416, |
|
"learning_rate": 0.00029219046531870263, |
|
"loss": 5.32, |
|
"step": 399500 |
|
}, |
|
{ |
|
"epoch": 2.1308785612307957, |
|
"grad_norm": 2.3486320972442627, |
|
"learning_rate": 0.0002912986401449751, |
|
"loss": 5.3197, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 2.133542159432334, |
|
"grad_norm": 2.497387647628784, |
|
"learning_rate": 0.00029040681497124755, |
|
"loss": 5.3235, |
|
"step": 400500 |
|
}, |
|
{ |
|
"epoch": 2.1362057576338724, |
|
"grad_norm": 2.3829433917999268, |
|
"learning_rate": 0.00028951498979752, |
|
"loss": 5.3145, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 2.138869355835411, |
|
"grad_norm": 2.064811944961548, |
|
"learning_rate": 0.00028862316462379247, |
|
"loss": 5.3168, |
|
"step": 401500 |
|
}, |
|
{ |
|
"epoch": 2.1415329540369497, |
|
"grad_norm": 2.194028377532959, |
|
"learning_rate": 0.0002877331231004124, |
|
"loss": 5.3221, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 2.144196552238488, |
|
"grad_norm": 2.1182937622070312, |
|
"learning_rate": 0.0002868412979266848, |
|
"loss": 5.321, |
|
"step": 402500 |
|
}, |
|
{ |
|
"epoch": 2.1468601504400264, |
|
"grad_norm": 2.3992223739624023, |
|
"learning_rate": 0.0002859494727529573, |
|
"loss": 5.3237, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 2.149523748641565, |
|
"grad_norm": 2.256955623626709, |
|
"learning_rate": 0.00028505764757922973, |
|
"loss": 5.3144, |
|
"step": 403500 |
|
}, |
|
{ |
|
"epoch": 2.152187346843103, |
|
"grad_norm": 2.3727059364318848, |
|
"learning_rate": 0.0002841658224055022, |
|
"loss": 5.3238, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 2.154850945044642, |
|
"grad_norm": 2.1184160709381104, |
|
"learning_rate": 0.00028327399723177465, |
|
"loss": 5.3196, |
|
"step": 404500 |
|
}, |
|
{ |
|
"epoch": 2.1575145432461804, |
|
"grad_norm": 2.1502108573913574, |
|
"learning_rate": 0.00028238217205804714, |
|
"loss": 5.3141, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 2.160178141447719, |
|
"grad_norm": 2.176964521408081, |
|
"learning_rate": 0.00028149034688431957, |
|
"loss": 5.3187, |
|
"step": 405500 |
|
}, |
|
{ |
|
"epoch": 2.162841739649257, |
|
"grad_norm": 2.144890069961548, |
|
"learning_rate": 0.0002806003053609395, |
|
"loss": 5.3199, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 2.165505337850796, |
|
"grad_norm": 2.17976975440979, |
|
"learning_rate": 0.000279708480187212, |
|
"loss": 5.318, |
|
"step": 406500 |
|
}, |
|
{ |
|
"epoch": 2.1681689360523344, |
|
"grad_norm": 2.181568145751953, |
|
"learning_rate": 0.0002788166550134844, |
|
"loss": 5.3214, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 2.170832534253873, |
|
"grad_norm": 2.299090623855591, |
|
"learning_rate": 0.0002779248298397569, |
|
"loss": 5.3225, |
|
"step": 407500 |
|
}, |
|
{ |
|
"epoch": 2.173496132455411, |
|
"grad_norm": 2.189419746398926, |
|
"learning_rate": 0.0002770347883163768, |
|
"loss": 5.3193, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 2.17615973065695, |
|
"grad_norm": 2.274648904800415, |
|
"learning_rate": 0.00027614296314264924, |
|
"loss": 5.3218, |
|
"step": 408500 |
|
}, |
|
{ |
|
"epoch": 2.1788233288584884, |
|
"grad_norm": 2.1534972190856934, |
|
"learning_rate": 0.0002752511379689217, |
|
"loss": 5.3173, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 2.181486927060027, |
|
"grad_norm": 2.3284084796905518, |
|
"learning_rate": 0.00027435931279519416, |
|
"loss": 5.3126, |
|
"step": 409500 |
|
}, |
|
{ |
|
"epoch": 2.184150525261565, |
|
"grad_norm": 2.286384344100952, |
|
"learning_rate": 0.0002734674876214666, |
|
"loss": 5.3232, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 2.1868141234631038, |
|
"grad_norm": 2.111091375350952, |
|
"learning_rate": 0.0002725774460980865, |
|
"loss": 5.3163, |
|
"step": 410500 |
|
}, |
|
{ |
|
"epoch": 2.1894777216646424, |
|
"grad_norm": 2.361741304397583, |
|
"learning_rate": 0.00027168562092435894, |
|
"loss": 5.3212, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 2.192141319866181, |
|
"grad_norm": 2.497840642929077, |
|
"learning_rate": 0.0002707937957506314, |
|
"loss": 5.3238, |
|
"step": 411500 |
|
}, |
|
{ |
|
"epoch": 2.194804918067719, |
|
"grad_norm": 2.227203607559204, |
|
"learning_rate": 0.00026990197057690386, |
|
"loss": 5.323, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 2.1974685162692578, |
|
"grad_norm": 2.2768001556396484, |
|
"learning_rate": 0.00026901192905352377, |
|
"loss": 5.3182, |
|
"step": 412500 |
|
}, |
|
{ |
|
"epoch": 2.2001321144707964, |
|
"grad_norm": 2.157787799835205, |
|
"learning_rate": 0.00026812010387979626, |
|
"loss": 5.3246, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 2.202795712672335, |
|
"grad_norm": 2.3759965896606445, |
|
"learning_rate": 0.0002672282787060687, |
|
"loss": 5.3207, |
|
"step": 413500 |
|
}, |
|
{ |
|
"epoch": 2.205459310873873, |
|
"grad_norm": 2.210963487625122, |
|
"learning_rate": 0.0002663364535323411, |
|
"loss": 5.3155, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 2.2081229090754118, |
|
"grad_norm": 2.265197277069092, |
|
"learning_rate": 0.0002654446283586136, |
|
"loss": 5.3194, |
|
"step": 414500 |
|
}, |
|
{ |
|
"epoch": 2.2107865072769504, |
|
"grad_norm": 2.110173225402832, |
|
"learning_rate": 0.0002645545868352335, |
|
"loss": 5.3144, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 2.213450105478489, |
|
"grad_norm": 2.235196590423584, |
|
"learning_rate": 0.000263662761661506, |
|
"loss": 5.323, |
|
"step": 415500 |
|
}, |
|
{ |
|
"epoch": 2.216113703680027, |
|
"grad_norm": 2.305601119995117, |
|
"learning_rate": 0.00026277093648777844, |
|
"loss": 5.3187, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 2.2187773018815657, |
|
"grad_norm": 2.401959180831909, |
|
"learning_rate": 0.0002618791113140509, |
|
"loss": 5.3175, |
|
"step": 416500 |
|
}, |
|
{ |
|
"epoch": 2.2214409000831044, |
|
"grad_norm": 2.163121223449707, |
|
"learning_rate": 0.0002609890697906708, |
|
"loss": 5.3169, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 2.224104498284643, |
|
"grad_norm": 2.265998363494873, |
|
"learning_rate": 0.0002600972446169432, |
|
"loss": 5.3173, |
|
"step": 417500 |
|
}, |
|
{ |
|
"epoch": 2.226768096486181, |
|
"grad_norm": 2.236154317855835, |
|
"learning_rate": 0.00025920541944321577, |
|
"loss": 5.3167, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 2.2294316946877197, |
|
"grad_norm": 2.1707651615142822, |
|
"learning_rate": 0.0002583135942694882, |
|
"loss": 5.3184, |
|
"step": 418500 |
|
}, |
|
{ |
|
"epoch": 2.2320952928892583, |
|
"grad_norm": 2.121073007583618, |
|
"learning_rate": 0.00025742355274610806, |
|
"loss": 5.3171, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 2.234758891090797, |
|
"grad_norm": 2.2292840480804443, |
|
"learning_rate": 0.00025653172757238055, |
|
"loss": 5.3185, |
|
"step": 419500 |
|
}, |
|
{ |
|
"epoch": 2.237422489292335, |
|
"grad_norm": 2.2376914024353027, |
|
"learning_rate": 0.000255639902398653, |
|
"loss": 5.3143, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 2.2400860874938737, |
|
"grad_norm": 2.2844974994659424, |
|
"learning_rate": 0.0002547480772249254, |
|
"loss": 5.3039, |
|
"step": 420500 |
|
}, |
|
{ |
|
"epoch": 2.2427496856954123, |
|
"grad_norm": 2.278136968612671, |
|
"learning_rate": 0.0002538562520511979, |
|
"loss": 5.3159, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 2.2454132838969505, |
|
"grad_norm": 2.3182220458984375, |
|
"learning_rate": 0.00025296442687747033, |
|
"loss": 5.319, |
|
"step": 421500 |
|
}, |
|
{ |
|
"epoch": 2.248076882098489, |
|
"grad_norm": 2.5095927715301514, |
|
"learning_rate": 0.0002520743853540903, |
|
"loss": 5.3174, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 2.2507404803000277, |
|
"grad_norm": 2.3167264461517334, |
|
"learning_rate": 0.00025118256018036273, |
|
"loss": 5.3131, |
|
"step": 422500 |
|
}, |
|
{ |
|
"epoch": 2.2534040785015663, |
|
"grad_norm": 2.211766481399536, |
|
"learning_rate": 0.00025029073500663516, |
|
"loss": 5.325, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 2.256067676703105, |
|
"grad_norm": 2.1502010822296143, |
|
"learning_rate": 0.00024939890983290765, |
|
"loss": 5.3139, |
|
"step": 423500 |
|
}, |
|
{ |
|
"epoch": 2.258731274904643, |
|
"grad_norm": 2.1429567337036133, |
|
"learning_rate": 0.00024850886830952756, |
|
"loss": 5.3147, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 2.2613948731061817, |
|
"grad_norm": 2.272367238998413, |
|
"learning_rate": 0.0002476170431358, |
|
"loss": 5.3128, |
|
"step": 424500 |
|
}, |
|
{ |
|
"epoch": 2.2640584713077203, |
|
"grad_norm": 2.6372079849243164, |
|
"learning_rate": 0.00024672521796207243, |
|
"loss": 5.3134, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 2.2667220695092585, |
|
"grad_norm": 2.4213263988494873, |
|
"learning_rate": 0.0002458333927883449, |
|
"loss": 5.3142, |
|
"step": 425500 |
|
}, |
|
{ |
|
"epoch": 2.269385667710797, |
|
"grad_norm": 2.2919113636016846, |
|
"learning_rate": 0.0002449415676146174, |
|
"loss": 5.3199, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 2.2720492659123357, |
|
"grad_norm": 2.1887030601501465, |
|
"learning_rate": 0.0002440515260912373, |
|
"loss": 5.3168, |
|
"step": 426500 |
|
}, |
|
{ |
|
"epoch": 2.2747128641138743, |
|
"grad_norm": 2.2401158809661865, |
|
"learning_rate": 0.00024315970091750975, |
|
"loss": 5.3142, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 2.2773764623154125, |
|
"grad_norm": 2.264155864715576, |
|
"learning_rate": 0.0002422678757437822, |
|
"loss": 5.3063, |
|
"step": 427500 |
|
}, |
|
{ |
|
"epoch": 2.280040060516951, |
|
"grad_norm": 2.372823476791382, |
|
"learning_rate": 0.00024137605057005467, |
|
"loss": 5.3146, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 2.2827036587184897, |
|
"grad_norm": 2.5441572666168213, |
|
"learning_rate": 0.00024048600904667456, |
|
"loss": 5.3129, |
|
"step": 428500 |
|
}, |
|
{ |
|
"epoch": 2.2853672569200283, |
|
"grad_norm": 2.107741594314575, |
|
"learning_rate": 0.00023959418387294702, |
|
"loss": 5.3112, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 2.2880308551215665, |
|
"grad_norm": 2.1812095642089844, |
|
"learning_rate": 0.00023870235869921948, |
|
"loss": 5.3144, |
|
"step": 429500 |
|
}, |
|
{ |
|
"epoch": 2.290694453323105, |
|
"grad_norm": 2.3959500789642334, |
|
"learning_rate": 0.00023781053352549194, |
|
"loss": 5.3108, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 2.2933580515246437, |
|
"grad_norm": 2.3315865993499756, |
|
"learning_rate": 0.00023691870835176437, |
|
"loss": 5.3093, |
|
"step": 430500 |
|
}, |
|
{ |
|
"epoch": 2.2960216497261823, |
|
"grad_norm": 2.0199296474456787, |
|
"learning_rate": 0.00023602688317803685, |
|
"loss": 5.3209, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 2.2986852479277204, |
|
"grad_norm": 2.2393200397491455, |
|
"learning_rate": 0.00023513684165465677, |
|
"loss": 5.3074, |
|
"step": 431500 |
|
}, |
|
{ |
|
"epoch": 2.301348846129259, |
|
"grad_norm": 2.4474637508392334, |
|
"learning_rate": 0.00023424501648092923, |
|
"loss": 5.3158, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 2.3040124443307977, |
|
"grad_norm": 2.3248863220214844, |
|
"learning_rate": 0.00023335319130720166, |
|
"loss": 5.3157, |
|
"step": 432500 |
|
}, |
|
{ |
|
"epoch": 2.3066760425323363, |
|
"grad_norm": 2.4158935546875, |
|
"learning_rate": 0.00023246136613347412, |
|
"loss": 5.3092, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 2.3093396407338744, |
|
"grad_norm": 2.084850549697876, |
|
"learning_rate": 0.00023156954095974658, |
|
"loss": 5.3178, |
|
"step": 433500 |
|
}, |
|
{ |
|
"epoch": 2.312003238935413, |
|
"grad_norm": 2.319776773452759, |
|
"learning_rate": 0.0002306794994363665, |
|
"loss": 5.3074, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 2.3146668371369516, |
|
"grad_norm": 2.2137837409973145, |
|
"learning_rate": 0.00022978767426263893, |
|
"loss": 5.3073, |
|
"step": 434500 |
|
}, |
|
{ |
|
"epoch": 2.31733043533849, |
|
"grad_norm": 2.4062960147857666, |
|
"learning_rate": 0.0002288958490889114, |
|
"loss": 5.3112, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 2.3199940335400284, |
|
"grad_norm": 2.27229380607605, |
|
"learning_rate": 0.00022800402391518385, |
|
"loss": 5.3114, |
|
"step": 435500 |
|
}, |
|
{ |
|
"epoch": 2.322657631741567, |
|
"grad_norm": 2.499032974243164, |
|
"learning_rate": 0.00022711219874145633, |
|
"loss": 5.3132, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 2.3253212299431056, |
|
"grad_norm": 2.071829080581665, |
|
"learning_rate": 0.00022622215721807625, |
|
"loss": 5.3181, |
|
"step": 436500 |
|
}, |
|
{ |
|
"epoch": 2.3279848281446442, |
|
"grad_norm": 2.4178686141967773, |
|
"learning_rate": 0.00022533033204434868, |
|
"loss": 5.3079, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 2.3306484263461824, |
|
"grad_norm": 2.431913375854492, |
|
"learning_rate": 0.00022443850687062114, |
|
"loss": 5.311, |
|
"step": 437500 |
|
}, |
|
{ |
|
"epoch": 2.333312024547721, |
|
"grad_norm": 2.3519508838653564, |
|
"learning_rate": 0.0002235466816968936, |
|
"loss": 5.3149, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 2.3359756227492596, |
|
"grad_norm": 2.286878824234009, |
|
"learning_rate": 0.00022265485652316606, |
|
"loss": 5.312, |
|
"step": 438500 |
|
}, |
|
{ |
|
"epoch": 2.338639220950798, |
|
"grad_norm": 2.3200433254241943, |
|
"learning_rate": 0.00022176481499978595, |
|
"loss": 5.2989, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 2.3413028191523364, |
|
"grad_norm": 2.165735960006714, |
|
"learning_rate": 0.0002208729898260584, |
|
"loss": 5.3169, |
|
"step": 439500 |
|
}, |
|
{ |
|
"epoch": 2.343966417353875, |
|
"grad_norm": 2.0269339084625244, |
|
"learning_rate": 0.00021998116465233087, |
|
"loss": 5.3088, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 2.3466300155554136, |
|
"grad_norm": 2.2074029445648193, |
|
"learning_rate": 0.00021908933947860333, |
|
"loss": 5.3096, |
|
"step": 440500 |
|
}, |
|
{ |
|
"epoch": 2.3492936137569522, |
|
"grad_norm": 2.7109835147857666, |
|
"learning_rate": 0.0002181975143048758, |
|
"loss": 5.3089, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 2.3519572119584904, |
|
"grad_norm": 2.2240071296691895, |
|
"learning_rate": 0.0002173074727814957, |
|
"loss": 5.3148, |
|
"step": 441500 |
|
}, |
|
{ |
|
"epoch": 2.354620810160029, |
|
"grad_norm": 2.26788330078125, |
|
"learning_rate": 0.00021641564760776816, |
|
"loss": 5.3113, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 2.3572844083615676, |
|
"grad_norm": 2.389122486114502, |
|
"learning_rate": 0.00021552382243404062, |
|
"loss": 5.3133, |
|
"step": 442500 |
|
}, |
|
{ |
|
"epoch": 2.3599480065631058, |
|
"grad_norm": 2.382267475128174, |
|
"learning_rate": 0.00021463199726031308, |
|
"loss": 5.3129, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 2.3626116047646444, |
|
"grad_norm": 2.411574363708496, |
|
"learning_rate": 0.00021374195573693297, |
|
"loss": 5.3022, |
|
"step": 443500 |
|
}, |
|
{ |
|
"epoch": 2.365275202966183, |
|
"grad_norm": 2.348522424697876, |
|
"learning_rate": 0.00021285013056320543, |
|
"loss": 5.3137, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 2.3679388011677216, |
|
"grad_norm": 2.3230319023132324, |
|
"learning_rate": 0.00021195830538947789, |
|
"loss": 5.3059, |
|
"step": 444500 |
|
}, |
|
{ |
|
"epoch": 2.3706023993692598, |
|
"grad_norm": 2.2816174030303955, |
|
"learning_rate": 0.00021106648021575035, |
|
"loss": 5.3117, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 2.3732659975707984, |
|
"grad_norm": 2.400097370147705, |
|
"learning_rate": 0.0002101746550420228, |
|
"loss": 5.3095, |
|
"step": 445500 |
|
}, |
|
{ |
|
"epoch": 2.375929595772337, |
|
"grad_norm": 2.470815896987915, |
|
"learning_rate": 0.00020928461351864272, |
|
"loss": 5.3027, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 2.3785931939738756, |
|
"grad_norm": 2.1947262287139893, |
|
"learning_rate": 0.00020839278834491518, |
|
"loss": 5.3031, |
|
"step": 446500 |
|
}, |
|
{ |
|
"epoch": 2.3812567921754138, |
|
"grad_norm": 2.3549935817718506, |
|
"learning_rate": 0.00020750096317118764, |
|
"loss": 5.3083, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 2.3839203903769524, |
|
"grad_norm": 2.457932949066162, |
|
"learning_rate": 0.0002066091379974601, |
|
"loss": 5.3052, |
|
"step": 447500 |
|
}, |
|
{ |
|
"epoch": 2.386583988578491, |
|
"grad_norm": 2.2867889404296875, |
|
"learning_rate": 0.00020571909647407999, |
|
"loss": 5.3155, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 2.3892475867800296, |
|
"grad_norm": 2.061497688293457, |
|
"learning_rate": 0.00020482727130035245, |
|
"loss": 5.3087, |
|
"step": 448500 |
|
}, |
|
{ |
|
"epoch": 2.3919111849815677, |
|
"grad_norm": 2.2757697105407715, |
|
"learning_rate": 0.0002039354461266249, |
|
"loss": 5.3095, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 2.3945747831831063, |
|
"grad_norm": 2.4835853576660156, |
|
"learning_rate": 0.00020304362095289736, |
|
"loss": 5.3091, |
|
"step": 449500 |
|
}, |
|
{ |
|
"epoch": 2.397238381384645, |
|
"grad_norm": 2.2896037101745605, |
|
"learning_rate": 0.00020215357942951728, |
|
"loss": 5.3124, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 2.3999019795861836, |
|
"grad_norm": 2.31545090675354, |
|
"learning_rate": 0.00020126175425578974, |
|
"loss": 5.31, |
|
"step": 450500 |
|
}, |
|
{ |
|
"epoch": 2.4025655777877217, |
|
"grad_norm": 2.296827554702759, |
|
"learning_rate": 0.0002003699290820622, |
|
"loss": 5.3027, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 2.4052291759892603, |
|
"grad_norm": 2.60396671295166, |
|
"learning_rate": 0.00019947810390833466, |
|
"loss": 5.312, |
|
"step": 451500 |
|
}, |
|
{ |
|
"epoch": 2.407892774190799, |
|
"grad_norm": 2.500142812728882, |
|
"learning_rate": 0.00019858627873460712, |
|
"loss": 5.2995, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 2.4105563723923376, |
|
"grad_norm": 2.179241180419922, |
|
"learning_rate": 0.000197696237211227, |
|
"loss": 5.3034, |
|
"step": 452500 |
|
}, |
|
{ |
|
"epoch": 2.4132199705938757, |
|
"grad_norm": 2.5400588512420654, |
|
"learning_rate": 0.00019680441203749947, |
|
"loss": 5.3074, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 2.4158835687954143, |
|
"grad_norm": 2.4482738971710205, |
|
"learning_rate": 0.00019591258686377192, |
|
"loss": 5.301, |
|
"step": 453500 |
|
}, |
|
{ |
|
"epoch": 2.418547166996953, |
|
"grad_norm": 2.3452165126800537, |
|
"learning_rate": 0.00019502076169004438, |
|
"loss": 5.311, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 2.4212107651984915, |
|
"grad_norm": 2.1771457195281982, |
|
"learning_rate": 0.0001941307201666643, |
|
"loss": 5.3035, |
|
"step": 454500 |
|
}, |
|
{ |
|
"epoch": 2.4238743634000297, |
|
"grad_norm": 2.195034980773926, |
|
"learning_rate": 0.00019323889499293676, |
|
"loss": 5.3069, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 2.4265379616015683, |
|
"grad_norm": 2.3099453449249268, |
|
"learning_rate": 0.00019234706981920922, |
|
"loss": 5.3075, |
|
"step": 455500 |
|
}, |
|
{ |
|
"epoch": 2.429201559803107, |
|
"grad_norm": 2.5112428665161133, |
|
"learning_rate": 0.00019145524464548168, |
|
"loss": 5.3093, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 2.431865158004645, |
|
"grad_norm": 2.470879316329956, |
|
"learning_rate": 0.00019056520312210157, |
|
"loss": 5.3021, |
|
"step": 456500 |
|
}, |
|
{ |
|
"epoch": 2.4345287562061837, |
|
"grad_norm": 2.381201982498169, |
|
"learning_rate": 0.00018967337794837403, |
|
"loss": 5.304, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 2.4371923544077223, |
|
"grad_norm": 2.30584454536438, |
|
"learning_rate": 0.00018878155277464648, |
|
"loss": 5.3063, |
|
"step": 457500 |
|
}, |
|
{ |
|
"epoch": 2.439855952609261, |
|
"grad_norm": 2.1264095306396484, |
|
"learning_rate": 0.00018788972760091894, |
|
"loss": 5.303, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 2.4425195508107995, |
|
"grad_norm": 2.5097908973693848, |
|
"learning_rate": 0.0001869979024271914, |
|
"loss": 5.3028, |
|
"step": 458500 |
|
}, |
|
{ |
|
"epoch": 2.4451831490123377, |
|
"grad_norm": 2.1753334999084473, |
|
"learning_rate": 0.00018610786090381132, |
|
"loss": 5.303, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 2.4478467472138763, |
|
"grad_norm": 2.393508195877075, |
|
"learning_rate": 0.00018521603573008378, |
|
"loss": 5.3065, |
|
"step": 459500 |
|
}, |
|
{ |
|
"epoch": 2.450510345415415, |
|
"grad_norm": 2.4845023155212402, |
|
"learning_rate": 0.00018432421055635624, |
|
"loss": 5.3055, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 2.453173943616953, |
|
"grad_norm": 2.286433458328247, |
|
"learning_rate": 0.0001834323853826287, |
|
"loss": 5.3093, |
|
"step": 460500 |
|
}, |
|
{ |
|
"epoch": 2.4558375418184917, |
|
"grad_norm": 2.3205184936523438, |
|
"learning_rate": 0.00018254056020890113, |
|
"loss": 5.3046, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 2.4585011400200303, |
|
"grad_norm": 2.2458608150482178, |
|
"learning_rate": 0.00018165051868552104, |
|
"loss": 5.3034, |
|
"step": 461500 |
|
}, |
|
{ |
|
"epoch": 2.461164738221569, |
|
"grad_norm": 2.4838719367980957, |
|
"learning_rate": 0.0001807586935117935, |
|
"loss": 5.3067, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 2.463828336423107, |
|
"grad_norm": 2.363417148590088, |
|
"learning_rate": 0.00017986686833806596, |
|
"loss": 5.3075, |
|
"step": 462500 |
|
}, |
|
{ |
|
"epoch": 2.4664919346246457, |
|
"grad_norm": 2.1464176177978516, |
|
"learning_rate": 0.0001789750431643384, |
|
"loss": 5.2936, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 2.4691555328261843, |
|
"grad_norm": 2.1444778442382812, |
|
"learning_rate": 0.00017808321799061086, |
|
"loss": 5.3012, |
|
"step": 463500 |
|
}, |
|
{ |
|
"epoch": 2.471819131027723, |
|
"grad_norm": 2.1136202812194824, |
|
"learning_rate": 0.0001771931764672308, |
|
"loss": 5.2991, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 2.474482729229261, |
|
"grad_norm": 2.325840950012207, |
|
"learning_rate": 0.00017630135129350326, |
|
"loss": 5.3005, |
|
"step": 464500 |
|
}, |
|
{ |
|
"epoch": 2.4771463274307997, |
|
"grad_norm": 2.1854569911956787, |
|
"learning_rate": 0.00017540952611977572, |
|
"loss": 5.3041, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 2.4798099256323383, |
|
"grad_norm": 2.247187614440918, |
|
"learning_rate": 0.00017451770094604815, |
|
"loss": 5.3038, |
|
"step": 465500 |
|
}, |
|
{ |
|
"epoch": 2.482473523833877, |
|
"grad_norm": 2.3324661254882812, |
|
"learning_rate": 0.0001736258757723206, |
|
"loss": 5.2999, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 2.485137122035415, |
|
"grad_norm": 2.3304693698883057, |
|
"learning_rate": 0.00017273583424894052, |
|
"loss": 5.3022, |
|
"step": 466500 |
|
}, |
|
{ |
|
"epoch": 2.4878007202369536, |
|
"grad_norm": 2.5459063053131104, |
|
"learning_rate": 0.00017184400907521298, |
|
"loss": 5.3082, |
|
"step": 467000 |
|
}, |
|
{ |
|
"epoch": 2.4904643184384923, |
|
"grad_norm": 2.280992031097412, |
|
"learning_rate": 0.00017095218390148542, |
|
"loss": 5.3027, |
|
"step": 467500 |
|
}, |
|
{ |
|
"epoch": 2.493127916640031, |
|
"grad_norm": 2.204409599304199, |
|
"learning_rate": 0.00017006035872775787, |
|
"loss": 5.3056, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 2.495791514841569, |
|
"grad_norm": 2.7257113456726074, |
|
"learning_rate": 0.0001691703172043778, |
|
"loss": 5.3043, |
|
"step": 468500 |
|
}, |
|
{ |
|
"epoch": 2.4984551130431076, |
|
"grad_norm": 2.262225866317749, |
|
"learning_rate": 0.00016827849203065025, |
|
"loss": 5.3022, |
|
"step": 469000 |
|
}, |
|
{ |
|
"epoch": 2.5011187112446462, |
|
"grad_norm": 2.167947769165039, |
|
"learning_rate": 0.0001673866668569227, |
|
"loss": 5.2977, |
|
"step": 469500 |
|
}, |
|
{ |
|
"epoch": 2.5037823094461844, |
|
"grad_norm": 2.434269428253174, |
|
"learning_rate": 0.00016649484168319517, |
|
"loss": 5.3003, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 2.506445907647723, |
|
"grad_norm": 2.2088136672973633, |
|
"learning_rate": 0.00016560480015981508, |
|
"loss": 5.3048, |
|
"step": 470500 |
|
}, |
|
{ |
|
"epoch": 2.5091095058492616, |
|
"grad_norm": 2.268261194229126, |
|
"learning_rate": 0.00016471297498608754, |
|
"loss": 5.3048, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 2.5117731040508002, |
|
"grad_norm": 2.462432384490967, |
|
"learning_rate": 0.00016382114981235998, |
|
"loss": 5.305, |
|
"step": 471500 |
|
}, |
|
{ |
|
"epoch": 2.514436702252339, |
|
"grad_norm": 2.6072680950164795, |
|
"learning_rate": 0.00016292932463863243, |
|
"loss": 5.2986, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 2.517100300453877, |
|
"grad_norm": 2.600860118865967, |
|
"learning_rate": 0.0001620374994649049, |
|
"loss": 5.299, |
|
"step": 472500 |
|
}, |
|
{ |
|
"epoch": 2.5197638986554156, |
|
"grad_norm": 2.3521888256073, |
|
"learning_rate": 0.00016114567429117735, |
|
"loss": 5.2936, |
|
"step": 473000 |
|
}, |
|
{ |
|
"epoch": 2.522427496856954, |
|
"grad_norm": 2.712414026260376, |
|
"learning_rate": 0.00016025563276779724, |
|
"loss": 5.303, |
|
"step": 473500 |
|
}, |
|
{ |
|
"epoch": 2.5250910950584924, |
|
"grad_norm": 2.267749071121216, |
|
"learning_rate": 0.0001593638075940697, |
|
"loss": 5.3026, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 2.527754693260031, |
|
"grad_norm": 2.206207275390625, |
|
"learning_rate": 0.0001584719824203422, |
|
"loss": 5.2967, |
|
"step": 474500 |
|
}, |
|
{ |
|
"epoch": 2.5304182914615696, |
|
"grad_norm": 2.3536181449890137, |
|
"learning_rate": 0.00015758015724661465, |
|
"loss": 5.2971, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 2.533081889663108, |
|
"grad_norm": 2.229966163635254, |
|
"learning_rate": 0.00015669011572323456, |
|
"loss": 5.3002, |
|
"step": 475500 |
|
}, |
|
{ |
|
"epoch": 2.535745487864647, |
|
"grad_norm": 2.391902208328247, |
|
"learning_rate": 0.000155798290549507, |
|
"loss": 5.3046, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 2.538409086066185, |
|
"grad_norm": 2.367274522781372, |
|
"learning_rate": 0.00015490646537577945, |
|
"loss": 5.3014, |
|
"step": 476500 |
|
}, |
|
{ |
|
"epoch": 2.5410726842677236, |
|
"grad_norm": 2.398796319961548, |
|
"learning_rate": 0.00015401464020205191, |
|
"loss": 5.3012, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 2.543736282469262, |
|
"grad_norm": 2.2506918907165527, |
|
"learning_rate": 0.00015312281502832437, |
|
"loss": 5.3034, |
|
"step": 477500 |
|
}, |
|
{ |
|
"epoch": 2.5463998806708004, |
|
"grad_norm": 2.4038991928100586, |
|
"learning_rate": 0.00015223277350494426, |
|
"loss": 5.298, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 2.549063478872339, |
|
"grad_norm": 2.2355668544769287, |
|
"learning_rate": 0.00015134094833121672, |
|
"loss": 5.2999, |
|
"step": 478500 |
|
}, |
|
{ |
|
"epoch": 2.5517270770738776, |
|
"grad_norm": 2.312537908554077, |
|
"learning_rate": 0.00015044912315748918, |
|
"loss": 5.2987, |
|
"step": 479000 |
|
}, |
|
{ |
|
"epoch": 2.554390675275416, |
|
"grad_norm": 2.4338889122009277, |
|
"learning_rate": 0.00014955729798376164, |
|
"loss": 5.2936, |
|
"step": 479500 |
|
}, |
|
{ |
|
"epoch": 2.557054273476955, |
|
"grad_norm": 2.303349018096924, |
|
"learning_rate": 0.00014866725646038155, |
|
"loss": 5.2941, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 2.559717871678493, |
|
"grad_norm": 2.27744197845459, |
|
"learning_rate": 0.00014777543128665401, |
|
"loss": 5.2961, |
|
"step": 480500 |
|
}, |
|
{ |
|
"epoch": 2.5623814698800316, |
|
"grad_norm": 2.364135265350342, |
|
"learning_rate": 0.00014688360611292647, |
|
"loss": 5.2982, |
|
"step": 481000 |
|
}, |
|
{ |
|
"epoch": 2.56504506808157, |
|
"grad_norm": 2.652825355529785, |
|
"learning_rate": 0.00014599178093919893, |
|
"loss": 5.3072, |
|
"step": 481500 |
|
}, |
|
{ |
|
"epoch": 2.5677086662831083, |
|
"grad_norm": 2.2864181995391846, |
|
"learning_rate": 0.00014510173941581882, |
|
"loss": 5.3027, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 2.570372264484647, |
|
"grad_norm": 2.1780378818511963, |
|
"learning_rate": 0.00014420991424209128, |
|
"loss": 5.2988, |
|
"step": 482500 |
|
}, |
|
{ |
|
"epoch": 2.5730358626861856, |
|
"grad_norm": 2.4762122631073, |
|
"learning_rate": 0.00014331808906836374, |
|
"loss": 5.2963, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 2.575699460887724, |
|
"grad_norm": 2.3064920902252197, |
|
"learning_rate": 0.0001424262638946362, |
|
"loss": 5.304, |
|
"step": 483500 |
|
}, |
|
{ |
|
"epoch": 2.5783630590892628, |
|
"grad_norm": 2.17753529548645, |
|
"learning_rate": 0.00014153443872090866, |
|
"loss": 5.2909, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 2.581026657290801, |
|
"grad_norm": 2.442643404006958, |
|
"learning_rate": 0.00014064439719752857, |
|
"loss": 5.3033, |
|
"step": 484500 |
|
}, |
|
{ |
|
"epoch": 2.5836902554923395, |
|
"grad_norm": 2.5781943798065186, |
|
"learning_rate": 0.00013975257202380103, |
|
"loss": 5.2969, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 2.586353853693878, |
|
"grad_norm": 2.1409718990325928, |
|
"learning_rate": 0.0001388607468500735, |
|
"loss": 5.2987, |
|
"step": 485500 |
|
}, |
|
{ |
|
"epoch": 2.5890174518954163, |
|
"grad_norm": 2.23543381690979, |
|
"learning_rate": 0.00013796892167634595, |
|
"loss": 5.2989, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 2.591681050096955, |
|
"grad_norm": 2.418957233428955, |
|
"learning_rate": 0.00013707888015296584, |
|
"loss": 5.2972, |
|
"step": 486500 |
|
}, |
|
{ |
|
"epoch": 2.5943446482984935, |
|
"grad_norm": 2.292370080947876, |
|
"learning_rate": 0.0001361870549792383, |
|
"loss": 5.3005, |
|
"step": 487000 |
|
}, |
|
{ |
|
"epoch": 2.5970082465000317, |
|
"grad_norm": 2.360339403152466, |
|
"learning_rate": 0.00013529522980551076, |
|
"loss": 5.2974, |
|
"step": 487500 |
|
}, |
|
{ |
|
"epoch": 2.5996718447015703, |
|
"grad_norm": 2.2026000022888184, |
|
"learning_rate": 0.00013440340463178322, |
|
"loss": 5.3012, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 2.602335442903109, |
|
"grad_norm": 2.273235559463501, |
|
"learning_rate": 0.00013351336310840313, |
|
"loss": 5.2955, |
|
"step": 488500 |
|
}, |
|
{ |
|
"epoch": 2.6049990411046475, |
|
"grad_norm": 2.349081516265869, |
|
"learning_rate": 0.0001326215379346756, |
|
"loss": 5.3026, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 2.607662639306186, |
|
"grad_norm": 2.4691007137298584, |
|
"learning_rate": 0.00013172971276094805, |
|
"loss": 5.2999, |
|
"step": 489500 |
|
}, |
|
{ |
|
"epoch": 2.6103262375077243, |
|
"grad_norm": 2.3375978469848633, |
|
"learning_rate": 0.0001308378875872205, |
|
"loss": 5.2849, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 2.612989835709263, |
|
"grad_norm": 2.3784444332122803, |
|
"learning_rate": 0.0001299478460638404, |
|
"loss": 5.2937, |
|
"step": 490500 |
|
}, |
|
{ |
|
"epoch": 2.6156534339108015, |
|
"grad_norm": 2.4842257499694824, |
|
"learning_rate": 0.00012905602089011286, |
|
"loss": 5.2919, |
|
"step": 491000 |
|
}, |
|
{ |
|
"epoch": 2.6183170321123397, |
|
"grad_norm": 2.2826011180877686, |
|
"learning_rate": 0.00012816419571638532, |
|
"loss": 5.2902, |
|
"step": 491500 |
|
}, |
|
{ |
|
"epoch": 2.6209806303138783, |
|
"grad_norm": 2.300616979598999, |
|
"learning_rate": 0.00012727237054265778, |
|
"loss": 5.3024, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 2.623644228515417, |
|
"grad_norm": 2.4524025917053223, |
|
"learning_rate": 0.00012638232901927772, |
|
"loss": 5.2908, |
|
"step": 492500 |
|
}, |
|
{ |
|
"epoch": 2.6263078267169555, |
|
"grad_norm": 2.3518335819244385, |
|
"learning_rate": 0.00012549050384555015, |
|
"loss": 5.2977, |
|
"step": 493000 |
|
}, |
|
{ |
|
"epoch": 2.628971424918494, |
|
"grad_norm": 2.5559749603271484, |
|
"learning_rate": 0.0001245986786718226, |
|
"loss": 5.2933, |
|
"step": 493500 |
|
}, |
|
{ |
|
"epoch": 2.6316350231200323, |
|
"grad_norm": 2.32487416267395, |
|
"learning_rate": 0.00012370685349809507, |
|
"loss": 5.2941, |
|
"step": 494000 |
|
}, |
|
{ |
|
"epoch": 2.634298621321571, |
|
"grad_norm": 2.384162187576294, |
|
"learning_rate": 0.000122816811974715, |
|
"loss": 5.2978, |
|
"step": 494500 |
|
}, |
|
{ |
|
"epoch": 2.6369622195231095, |
|
"grad_norm": 2.7350683212280273, |
|
"learning_rate": 0.00012192498680098743, |
|
"loss": 5.3015, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 2.6396258177246477, |
|
"grad_norm": 2.5397427082061768, |
|
"learning_rate": 0.00012103316162725988, |
|
"loss": 5.2924, |
|
"step": 495500 |
|
}, |
|
{ |
|
"epoch": 2.6422894159261863, |
|
"grad_norm": 2.4719595909118652, |
|
"learning_rate": 0.00012014133645353234, |
|
"loss": 5.2982, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 2.644953014127725, |
|
"grad_norm": 2.7110893726348877, |
|
"learning_rate": 0.0001192495112798048, |
|
"loss": 5.2908, |
|
"step": 496500 |
|
}, |
|
{ |
|
"epoch": 2.6476166123292635, |
|
"grad_norm": 2.5090041160583496, |
|
"learning_rate": 0.00011835946975642471, |
|
"loss": 5.2939, |
|
"step": 497000 |
|
}, |
|
{ |
|
"epoch": 2.650280210530802, |
|
"grad_norm": 2.5113580226898193, |
|
"learning_rate": 0.00011746764458269717, |
|
"loss": 5.2935, |
|
"step": 497500 |
|
}, |
|
{ |
|
"epoch": 2.6529438087323403, |
|
"grad_norm": 2.4266409873962402, |
|
"learning_rate": 0.00011657581940896962, |
|
"loss": 5.2931, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 2.655607406933879, |
|
"grad_norm": 2.4426701068878174, |
|
"learning_rate": 0.00011568399423524208, |
|
"loss": 5.2909, |
|
"step": 498500 |
|
}, |
|
{ |
|
"epoch": 2.6582710051354175, |
|
"grad_norm": 2.5790412425994873, |
|
"learning_rate": 0.00011479216906151452, |
|
"loss": 5.2919, |
|
"step": 499000 |
|
}, |
|
{ |
|
"epoch": 2.6609346033369556, |
|
"grad_norm": 2.309144973754883, |
|
"learning_rate": 0.00011390212753813445, |
|
"loss": 5.2967, |
|
"step": 499500 |
|
}, |
|
{ |
|
"epoch": 2.6635982015384942, |
|
"grad_norm": 2.297360420227051, |
|
"learning_rate": 0.0001130103023644069, |
|
"loss": 5.2918, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 2.666261799740033, |
|
"grad_norm": 2.539792776107788, |
|
"learning_rate": 0.00011211847719067936, |
|
"loss": 5.2914, |
|
"step": 500500 |
|
}, |
|
{ |
|
"epoch": 2.6689253979415715, |
|
"grad_norm": 2.246025800704956, |
|
"learning_rate": 0.00011122665201695182, |
|
"loss": 5.2968, |
|
"step": 501000 |
|
}, |
|
{ |
|
"epoch": 2.67158899614311, |
|
"grad_norm": 2.34342885017395, |
|
"learning_rate": 0.00011033661049357173, |
|
"loss": 5.2885, |
|
"step": 501500 |
|
}, |
|
{ |
|
"epoch": 2.6742525943446482, |
|
"grad_norm": 2.4776382446289062, |
|
"learning_rate": 0.00010944478531984418, |
|
"loss": 5.2944, |
|
"step": 502000 |
|
}, |
|
{ |
|
"epoch": 2.676916192546187, |
|
"grad_norm": 2.583674907684326, |
|
"learning_rate": 0.00010855296014611664, |
|
"loss": 5.2907, |
|
"step": 502500 |
|
}, |
|
{ |
|
"epoch": 2.6795797907477255, |
|
"grad_norm": 2.3661584854125977, |
|
"learning_rate": 0.0001076611349723891, |
|
"loss": 5.2969, |
|
"step": 503000 |
|
}, |
|
{ |
|
"epoch": 2.6822433889492636, |
|
"grad_norm": 2.3716771602630615, |
|
"learning_rate": 0.00010677109344900901, |
|
"loss": 5.2993, |
|
"step": 503500 |
|
}, |
|
{ |
|
"epoch": 2.6849069871508022, |
|
"grad_norm": 2.3315460681915283, |
|
"learning_rate": 0.00010587926827528146, |
|
"loss": 5.2914, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 2.687570585352341, |
|
"grad_norm": 2.2361655235290527, |
|
"learning_rate": 0.00010498744310155392, |
|
"loss": 5.288, |
|
"step": 504500 |
|
}, |
|
{ |
|
"epoch": 2.690234183553879, |
|
"grad_norm": 2.3718972206115723, |
|
"learning_rate": 0.00010409561792782638, |
|
"loss": 5.2933, |
|
"step": 505000 |
|
}, |
|
{ |
|
"epoch": 2.6928977817554176, |
|
"grad_norm": 2.414783477783203, |
|
"learning_rate": 0.0001032055764044463, |
|
"loss": 5.2905, |
|
"step": 505500 |
|
}, |
|
{ |
|
"epoch": 2.695561379956956, |
|
"grad_norm": 2.5909764766693115, |
|
"learning_rate": 0.00010231375123071875, |
|
"loss": 5.2889, |
|
"step": 506000 |
|
}, |
|
{ |
|
"epoch": 2.698224978158495, |
|
"grad_norm": 2.2361748218536377, |
|
"learning_rate": 0.0001014219260569912, |
|
"loss": 5.2884, |
|
"step": 506500 |
|
}, |
|
{ |
|
"epoch": 2.7008885763600334, |
|
"grad_norm": 2.3554787635803223, |
|
"learning_rate": 0.00010053010088326366, |
|
"loss": 5.283, |
|
"step": 507000 |
|
}, |
|
{ |
|
"epoch": 2.7035521745615716, |
|
"grad_norm": 2.4235968589782715, |
|
"learning_rate": 9.96382757095361e-05, |
|
"loss": 5.2991, |
|
"step": 507500 |
|
}, |
|
{ |
|
"epoch": 2.70621577276311, |
|
"grad_norm": 2.334272861480713, |
|
"learning_rate": 9.874645053580856e-05, |
|
"loss": 5.2921, |
|
"step": 508000 |
|
}, |
|
{ |
|
"epoch": 2.708879370964649, |
|
"grad_norm": 2.443535566329956, |
|
"learning_rate": 9.785640901242848e-05, |
|
"loss": 5.2934, |
|
"step": 508500 |
|
}, |
|
{ |
|
"epoch": 2.711542969166187, |
|
"grad_norm": 2.4466655254364014, |
|
"learning_rate": 9.696458383870094e-05, |
|
"loss": 5.2915, |
|
"step": 509000 |
|
}, |
|
{ |
|
"epoch": 2.7142065673677256, |
|
"grad_norm": 2.1013219356536865, |
|
"learning_rate": 9.60727586649734e-05, |
|
"loss": 5.2942, |
|
"step": 509500 |
|
}, |
|
{ |
|
"epoch": 2.716870165569264, |
|
"grad_norm": 2.486953020095825, |
|
"learning_rate": 9.518093349124584e-05, |
|
"loss": 5.2948, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 2.719533763770803, |
|
"grad_norm": 2.246967077255249, |
|
"learning_rate": 9.429089196786576e-05, |
|
"loss": 5.288, |
|
"step": 510500 |
|
}, |
|
{ |
|
"epoch": 2.7221973619723414, |
|
"grad_norm": 2.308177947998047, |
|
"learning_rate": 9.339906679413822e-05, |
|
"loss": 5.2925, |
|
"step": 511000 |
|
}, |
|
{ |
|
"epoch": 2.7248609601738796, |
|
"grad_norm": 2.3832600116729736, |
|
"learning_rate": 9.250724162041068e-05, |
|
"loss": 5.2925, |
|
"step": 511500 |
|
}, |
|
{ |
|
"epoch": 2.727524558375418, |
|
"grad_norm": 2.2219245433807373, |
|
"learning_rate": 9.161541644668312e-05, |
|
"loss": 5.294, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 2.730188156576957, |
|
"grad_norm": 2.4265191555023193, |
|
"learning_rate": 9.072537492330303e-05, |
|
"loss": 5.2875, |
|
"step": 512500 |
|
}, |
|
{ |
|
"epoch": 2.732851754778495, |
|
"grad_norm": 2.553427219390869, |
|
"learning_rate": 8.98335497495755e-05, |
|
"loss": 5.2984, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 2.7355153529800336, |
|
"grad_norm": 2.3475024700164795, |
|
"learning_rate": 8.894172457584796e-05, |
|
"loss": 5.2827, |
|
"step": 513500 |
|
}, |
|
{ |
|
"epoch": 2.738178951181572, |
|
"grad_norm": 2.5305187702178955, |
|
"learning_rate": 8.80498994021204e-05, |
|
"loss": 5.2937, |
|
"step": 514000 |
|
}, |
|
{ |
|
"epoch": 2.740842549383111, |
|
"grad_norm": 2.4398436546325684, |
|
"learning_rate": 8.71598578787403e-05, |
|
"loss": 5.2948, |
|
"step": 514500 |
|
}, |
|
{ |
|
"epoch": 2.7435061475846494, |
|
"grad_norm": 2.4077444076538086, |
|
"learning_rate": 8.626803270501276e-05, |
|
"loss": 5.2882, |
|
"step": 515000 |
|
}, |
|
{ |
|
"epoch": 2.7461697457861876, |
|
"grad_norm": 2.346778392791748, |
|
"learning_rate": 8.537620753128524e-05, |
|
"loss": 5.2875, |
|
"step": 515500 |
|
}, |
|
{ |
|
"epoch": 2.748833343987726, |
|
"grad_norm": 2.4900453090667725, |
|
"learning_rate": 8.448438235755768e-05, |
|
"loss": 5.2835, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 2.7514969421892648, |
|
"grad_norm": 2.4355154037475586, |
|
"learning_rate": 8.359255718383014e-05, |
|
"loss": 5.29, |
|
"step": 516500 |
|
}, |
|
{ |
|
"epoch": 2.754160540390803, |
|
"grad_norm": 2.18061900138855, |
|
"learning_rate": 8.270251566045004e-05, |
|
"loss": 5.288, |
|
"step": 517000 |
|
}, |
|
{ |
|
"epoch": 2.7568241385923415, |
|
"grad_norm": 2.3646693229675293, |
|
"learning_rate": 8.18106904867225e-05, |
|
"loss": 5.2789, |
|
"step": 517500 |
|
}, |
|
{ |
|
"epoch": 2.75948773679388, |
|
"grad_norm": 2.369717836380005, |
|
"learning_rate": 8.091886531299498e-05, |
|
"loss": 5.2901, |
|
"step": 518000 |
|
}, |
|
{ |
|
"epoch": 2.7621513349954188, |
|
"grad_norm": 2.4666647911071777, |
|
"learning_rate": 8.002704013926742e-05, |
|
"loss": 5.2858, |
|
"step": 518500 |
|
}, |
|
{ |
|
"epoch": 2.7648149331969574, |
|
"grad_norm": 2.3375349044799805, |
|
"learning_rate": 7.913699861588732e-05, |
|
"loss": 5.2854, |
|
"step": 519000 |
|
}, |
|
{ |
|
"epoch": 2.7674785313984955, |
|
"grad_norm": 2.2538347244262695, |
|
"learning_rate": 7.824517344215978e-05, |
|
"loss": 5.2899, |
|
"step": 519500 |
|
}, |
|
{ |
|
"epoch": 2.770142129600034, |
|
"grad_norm": 2.5232772827148438, |
|
"learning_rate": 7.735334826843224e-05, |
|
"loss": 5.2948, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 2.7728057278015728, |
|
"grad_norm": 2.3963685035705566, |
|
"learning_rate": 7.646152309470469e-05, |
|
"loss": 5.2919, |
|
"step": 520500 |
|
}, |
|
{ |
|
"epoch": 2.775469326003111, |
|
"grad_norm": 2.0667736530303955, |
|
"learning_rate": 7.55714815713246e-05, |
|
"loss": 5.2825, |
|
"step": 521000 |
|
}, |
|
{ |
|
"epoch": 2.7781329242046495, |
|
"grad_norm": 2.421602725982666, |
|
"learning_rate": 7.467965639759706e-05, |
|
"loss": 5.2949, |
|
"step": 521500 |
|
}, |
|
{ |
|
"epoch": 2.780796522406188, |
|
"grad_norm": 2.3447656631469727, |
|
"learning_rate": 7.378783122386952e-05, |
|
"loss": 5.2871, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 2.7834601206077263, |
|
"grad_norm": 2.1411802768707275, |
|
"learning_rate": 7.289600605014197e-05, |
|
"loss": 5.2861, |
|
"step": 522500 |
|
}, |
|
{ |
|
"epoch": 2.786123718809265, |
|
"grad_norm": 2.5163323879241943, |
|
"learning_rate": 7.200418087641443e-05, |
|
"loss": 5.286, |
|
"step": 523000 |
|
}, |
|
{ |
|
"epoch": 2.7887873170108035, |
|
"grad_norm": 2.482067108154297, |
|
"learning_rate": 7.111413935303434e-05, |
|
"loss": 5.2863, |
|
"step": 523500 |
|
}, |
|
{ |
|
"epoch": 2.791450915212342, |
|
"grad_norm": 2.3614418506622314, |
|
"learning_rate": 7.02223141793068e-05, |
|
"loss": 5.2799, |
|
"step": 524000 |
|
}, |
|
{ |
|
"epoch": 2.7941145134138807, |
|
"grad_norm": 2.333521842956543, |
|
"learning_rate": 6.933048900557925e-05, |
|
"loss": 5.2873, |
|
"step": 524500 |
|
}, |
|
{ |
|
"epoch": 2.796778111615419, |
|
"grad_norm": 2.2536137104034424, |
|
"learning_rate": 6.843866383185171e-05, |
|
"loss": 5.2909, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 2.7994417098169575, |
|
"grad_norm": 2.516286849975586, |
|
"learning_rate": 6.754862230847162e-05, |
|
"loss": 5.2944, |
|
"step": 525500 |
|
}, |
|
{ |
|
"epoch": 2.802105308018496, |
|
"grad_norm": 2.361598253250122, |
|
"learning_rate": 6.665679713474408e-05, |
|
"loss": 5.2872, |
|
"step": 526000 |
|
}, |
|
{ |
|
"epoch": 2.8047689062200343, |
|
"grad_norm": 2.387085199356079, |
|
"learning_rate": 6.576497196101654e-05, |
|
"loss": 5.291, |
|
"step": 526500 |
|
}, |
|
{ |
|
"epoch": 2.807432504421573, |
|
"grad_norm": 2.2874443531036377, |
|
"learning_rate": 6.487314678728899e-05, |
|
"loss": 5.29, |
|
"step": 527000 |
|
}, |
|
{ |
|
"epoch": 2.8100961026231115, |
|
"grad_norm": 2.4107890129089355, |
|
"learning_rate": 6.39831052639089e-05, |
|
"loss": 5.2781, |
|
"step": 527500 |
|
}, |
|
{ |
|
"epoch": 2.81275970082465, |
|
"grad_norm": 2.3214197158813477, |
|
"learning_rate": 6.309128009018136e-05, |
|
"loss": 5.2851, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 2.8154232990261887, |
|
"grad_norm": 2.3806910514831543, |
|
"learning_rate": 6.219945491645382e-05, |
|
"loss": 5.2824, |
|
"step": 528500 |
|
}, |
|
{ |
|
"epoch": 2.818086897227727, |
|
"grad_norm": 2.4679012298583984, |
|
"learning_rate": 6.130762974272627e-05, |
|
"loss": 5.291, |
|
"step": 529000 |
|
}, |
|
{ |
|
"epoch": 2.8207504954292655, |
|
"grad_norm": 2.30574631690979, |
|
"learning_rate": 6.041758821934619e-05, |
|
"loss": 5.2901, |
|
"step": 529500 |
|
}, |
|
{ |
|
"epoch": 2.823414093630804, |
|
"grad_norm": 2.309056043624878, |
|
"learning_rate": 5.9525763045618644e-05, |
|
"loss": 5.2778, |
|
"step": 530000 |
|
}, |
|
{ |
|
"epoch": 2.8260776918323423, |
|
"grad_norm": 2.378755569458008, |
|
"learning_rate": 5.8633937871891097e-05, |
|
"loss": 5.2815, |
|
"step": 530500 |
|
}, |
|
{ |
|
"epoch": 2.828741290033881, |
|
"grad_norm": 2.6057322025299072, |
|
"learning_rate": 5.7742112698163556e-05, |
|
"loss": 5.2866, |
|
"step": 531000 |
|
}, |
|
{ |
|
"epoch": 2.8314048882354195, |
|
"grad_norm": 2.3079919815063477, |
|
"learning_rate": 5.685028752443601e-05, |
|
"loss": 5.2791, |
|
"step": 531500 |
|
}, |
|
{ |
|
"epoch": 2.834068486436958, |
|
"grad_norm": 2.2242472171783447, |
|
"learning_rate": 5.5960246001055924e-05, |
|
"loss": 5.2865, |
|
"step": 532000 |
|
}, |
|
{ |
|
"epoch": 2.8367320846384967, |
|
"grad_norm": 2.3489010334014893, |
|
"learning_rate": 5.5068420827328383e-05, |
|
"loss": 5.2872, |
|
"step": 532500 |
|
}, |
|
{ |
|
"epoch": 2.839395682840035, |
|
"grad_norm": 2.9294140338897705, |
|
"learning_rate": 5.4176595653600836e-05, |
|
"loss": 5.2796, |
|
"step": 533000 |
|
}, |
|
{ |
|
"epoch": 2.8420592810415735, |
|
"grad_norm": 2.325824499130249, |
|
"learning_rate": 5.328477047987329e-05, |
|
"loss": 5.2878, |
|
"step": 533500 |
|
}, |
|
{ |
|
"epoch": 2.844722879243112, |
|
"grad_norm": 2.3206863403320312, |
|
"learning_rate": 5.23947289564932e-05, |
|
"loss": 5.2827, |
|
"step": 534000 |
|
}, |
|
{ |
|
"epoch": 2.8473864774446502, |
|
"grad_norm": 2.241338014602661, |
|
"learning_rate": 5.150290378276566e-05, |
|
"loss": 5.2862, |
|
"step": 534500 |
|
}, |
|
{ |
|
"epoch": 2.850050075646189, |
|
"grad_norm": 2.3662049770355225, |
|
"learning_rate": 5.0611078609038116e-05, |
|
"loss": 5.2868, |
|
"step": 535000 |
|
}, |
|
{ |
|
"epoch": 2.8527136738477274, |
|
"grad_norm": 2.0729544162750244, |
|
"learning_rate": 4.971925343531057e-05, |
|
"loss": 5.2851, |
|
"step": 535500 |
|
}, |
|
{ |
|
"epoch": 2.855377272049266, |
|
"grad_norm": 2.1059601306915283, |
|
"learning_rate": 4.8829211911930484e-05, |
|
"loss": 5.2809, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 2.8580408702508047, |
|
"grad_norm": 2.70766282081604, |
|
"learning_rate": 4.793738673820294e-05, |
|
"loss": 5.2896, |
|
"step": 536500 |
|
}, |
|
{ |
|
"epoch": 2.860704468452343, |
|
"grad_norm": 2.526292562484741, |
|
"learning_rate": 4.704556156447539e-05, |
|
"loss": 5.2828, |
|
"step": 537000 |
|
}, |
|
{ |
|
"epoch": 2.8633680666538814, |
|
"grad_norm": 2.246443510055542, |
|
"learning_rate": 4.6153736390747856e-05, |
|
"loss": 5.2847, |
|
"step": 537500 |
|
}, |
|
{ |
|
"epoch": 2.86603166485542, |
|
"grad_norm": 2.5226643085479736, |
|
"learning_rate": 4.5263694867367764e-05, |
|
"loss": 5.2871, |
|
"step": 538000 |
|
}, |
|
{ |
|
"epoch": 2.868695263056958, |
|
"grad_norm": 2.416816473007202, |
|
"learning_rate": 4.437186969364022e-05, |
|
"loss": 5.2825, |
|
"step": 538500 |
|
}, |
|
{ |
|
"epoch": 2.871358861258497, |
|
"grad_norm": 2.5631511211395264, |
|
"learning_rate": 4.348004451991267e-05, |
|
"loss": 5.2815, |
|
"step": 539000 |
|
}, |
|
{ |
|
"epoch": 2.8740224594600354, |
|
"grad_norm": 2.2883377075195312, |
|
"learning_rate": 4.258821934618513e-05, |
|
"loss": 5.2824, |
|
"step": 539500 |
|
}, |
|
{ |
|
"epoch": 2.8766860576615736, |
|
"grad_norm": 2.4545071125030518, |
|
"learning_rate": 4.1698177822805044e-05, |
|
"loss": 5.278, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 2.879349655863112, |
|
"grad_norm": 2.2015092372894287, |
|
"learning_rate": 4.08063526490775e-05, |
|
"loss": 5.2806, |
|
"step": 540500 |
|
}, |
|
{ |
|
"epoch": 2.882013254064651, |
|
"grad_norm": 2.7558255195617676, |
|
"learning_rate": 3.9914527475349956e-05, |
|
"loss": 5.2857, |
|
"step": 541000 |
|
}, |
|
{ |
|
"epoch": 2.8846768522661894, |
|
"grad_norm": 2.376549005508423, |
|
"learning_rate": 3.902270230162241e-05, |
|
"loss": 5.2792, |
|
"step": 541500 |
|
}, |
|
{ |
|
"epoch": 2.887340450467728, |
|
"grad_norm": 2.3727259635925293, |
|
"learning_rate": 3.813266077824232e-05, |
|
"loss": 5.2843, |
|
"step": 542000 |
|
}, |
|
{ |
|
"epoch": 2.890004048669266, |
|
"grad_norm": 2.3833839893341064, |
|
"learning_rate": 3.724083560451478e-05, |
|
"loss": 5.2785, |
|
"step": 542500 |
|
}, |
|
{ |
|
"epoch": 2.892667646870805, |
|
"grad_norm": 2.4702396392822266, |
|
"learning_rate": 3.6349010430787236e-05, |
|
"loss": 5.2785, |
|
"step": 543000 |
|
}, |
|
{ |
|
"epoch": 2.8953312450723434, |
|
"grad_norm": 2.54264497756958, |
|
"learning_rate": 3.545718525705969e-05, |
|
"loss": 5.2813, |
|
"step": 543500 |
|
}, |
|
{ |
|
"epoch": 2.8979948432738816, |
|
"grad_norm": 2.356501579284668, |
|
"learning_rate": 3.456536008333214e-05, |
|
"loss": 5.2886, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 2.90065844147542, |
|
"grad_norm": 2.546325445175171, |
|
"learning_rate": 3.367531855995206e-05, |
|
"loss": 5.2778, |
|
"step": 544500 |
|
}, |
|
{ |
|
"epoch": 2.903322039676959, |
|
"grad_norm": 2.3812687397003174, |
|
"learning_rate": 3.2783493386224516e-05, |
|
"loss": 5.284, |
|
"step": 545000 |
|
}, |
|
{ |
|
"epoch": 2.9059856378784974, |
|
"grad_norm": 2.3538711071014404, |
|
"learning_rate": 3.189166821249697e-05, |
|
"loss": 5.2755, |
|
"step": 545500 |
|
}, |
|
{ |
|
"epoch": 2.908649236080036, |
|
"grad_norm": 2.2477262020111084, |
|
"learning_rate": 3.099984303876943e-05, |
|
"loss": 5.2876, |
|
"step": 546000 |
|
}, |
|
{ |
|
"epoch": 2.911312834281574, |
|
"grad_norm": 2.2652475833892822, |
|
"learning_rate": 3.0109801515389333e-05, |
|
"loss": 5.2777, |
|
"step": 546500 |
|
}, |
|
{ |
|
"epoch": 2.9139764324831128, |
|
"grad_norm": 2.468841791152954, |
|
"learning_rate": 2.9217976341661793e-05, |
|
"loss": 5.2779, |
|
"step": 547000 |
|
}, |
|
{ |
|
"epoch": 2.9166400306846514, |
|
"grad_norm": 2.151130437850952, |
|
"learning_rate": 2.832615116793425e-05, |
|
"loss": 5.2883, |
|
"step": 547500 |
|
}, |
|
{ |
|
"epoch": 2.9193036288861895, |
|
"grad_norm": 2.464799404144287, |
|
"learning_rate": 2.74343259942067e-05, |
|
"loss": 5.2843, |
|
"step": 548000 |
|
}, |
|
{ |
|
"epoch": 2.921967227087728, |
|
"grad_norm": 2.6122734546661377, |
|
"learning_rate": 2.6544284470826617e-05, |
|
"loss": 5.2854, |
|
"step": 548500 |
|
}, |
|
{ |
|
"epoch": 2.9246308252892668, |
|
"grad_norm": 2.257554769515991, |
|
"learning_rate": 2.565245929709907e-05, |
|
"loss": 5.277, |
|
"step": 549000 |
|
}, |
|
{ |
|
"epoch": 2.9272944234908054, |
|
"grad_norm": 2.2422280311584473, |
|
"learning_rate": 2.476063412337153e-05, |
|
"loss": 5.2804, |
|
"step": 549500 |
|
}, |
|
{ |
|
"epoch": 2.929958021692344, |
|
"grad_norm": 2.4912326335906982, |
|
"learning_rate": 2.3868808949643985e-05, |
|
"loss": 5.2758, |
|
"step": 550000 |
|
}, |
|
{ |
|
"epoch": 2.932621619893882, |
|
"grad_norm": 2.305392265319824, |
|
"learning_rate": 2.2978767426263897e-05, |
|
"loss": 5.2831, |
|
"step": 550500 |
|
}, |
|
{ |
|
"epoch": 2.9352852180954208, |
|
"grad_norm": 2.699528217315674, |
|
"learning_rate": 2.2086942252536353e-05, |
|
"loss": 5.2841, |
|
"step": 551000 |
|
}, |
|
{ |
|
"epoch": 2.9379488162969594, |
|
"grad_norm": 2.3196749687194824, |
|
"learning_rate": 2.1195117078808806e-05, |
|
"loss": 5.2792, |
|
"step": 551500 |
|
}, |
|
{ |
|
"epoch": 2.9406124144984975, |
|
"grad_norm": 2.134294033050537, |
|
"learning_rate": 2.0303291905081265e-05, |
|
"loss": 5.2845, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 2.943276012700036, |
|
"grad_norm": 2.25675892829895, |
|
"learning_rate": 1.941146673135372e-05, |
|
"loss": 5.2778, |
|
"step": 552500 |
|
}, |
|
{ |
|
"epoch": 2.9459396109015747, |
|
"grad_norm": 2.141127824783325, |
|
"learning_rate": 1.852142520797363e-05, |
|
"loss": 5.2738, |
|
"step": 553000 |
|
}, |
|
{ |
|
"epoch": 2.9486032091031134, |
|
"grad_norm": 2.3503618240356445, |
|
"learning_rate": 1.762960003424609e-05, |
|
"loss": 5.277, |
|
"step": 553500 |
|
}, |
|
{ |
|
"epoch": 2.951266807304652, |
|
"grad_norm": 2.2987284660339355, |
|
"learning_rate": 1.673777486051854e-05, |
|
"loss": 5.2864, |
|
"step": 554000 |
|
}, |
|
{ |
|
"epoch": 2.95393040550619, |
|
"grad_norm": 2.384070873260498, |
|
"learning_rate": 1.5845949686791e-05, |
|
"loss": 5.2798, |
|
"step": 554500 |
|
}, |
|
{ |
|
"epoch": 2.9565940037077287, |
|
"grad_norm": 2.272744655609131, |
|
"learning_rate": 1.4954124513063455e-05, |
|
"loss": 5.2806, |
|
"step": 555000 |
|
}, |
|
{ |
|
"epoch": 2.9592576019092673, |
|
"grad_norm": 2.2945611476898193, |
|
"learning_rate": 1.4064082989683367e-05, |
|
"loss": 5.2852, |
|
"step": 555500 |
|
}, |
|
{ |
|
"epoch": 2.9619212001108055, |
|
"grad_norm": 2.5340495109558105, |
|
"learning_rate": 1.3172257815955822e-05, |
|
"loss": 5.2764, |
|
"step": 556000 |
|
}, |
|
{ |
|
"epoch": 2.964584798312344, |
|
"grad_norm": 2.3637685775756836, |
|
"learning_rate": 1.228043264222828e-05, |
|
"loss": 5.28, |
|
"step": 556500 |
|
}, |
|
{ |
|
"epoch": 2.9672483965138827, |
|
"grad_norm": 2.401252031326294, |
|
"learning_rate": 1.1388607468500735e-05, |
|
"loss": 5.2809, |
|
"step": 557000 |
|
}, |
|
{ |
|
"epoch": 2.9699119947154213, |
|
"grad_norm": 2.256577253341675, |
|
"learning_rate": 1.0498565945120647e-05, |
|
"loss": 5.2798, |
|
"step": 557500 |
|
}, |
|
{ |
|
"epoch": 2.9725755929169595, |
|
"grad_norm": 2.1444365978240967, |
|
"learning_rate": 9.606740771393103e-06, |
|
"loss": 5.2761, |
|
"step": 558000 |
|
}, |
|
{ |
|
"epoch": 2.975239191118498, |
|
"grad_norm": 2.325979471206665, |
|
"learning_rate": 8.714915597665558e-06, |
|
"loss": 5.2804, |
|
"step": 558500 |
|
}, |
|
{ |
|
"epoch": 2.9779027893200367, |
|
"grad_norm": 2.1250107288360596, |
|
"learning_rate": 7.823090423938014e-06, |
|
"loss": 5.2767, |
|
"step": 559000 |
|
}, |
|
{ |
|
"epoch": 2.9805663875215753, |
|
"grad_norm": 2.4525716304779053, |
|
"learning_rate": 6.933048900557926e-06, |
|
"loss": 5.2805, |
|
"step": 559500 |
|
}, |
|
{ |
|
"epoch": 2.9832299857231135, |
|
"grad_norm": 2.176084041595459, |
|
"learning_rate": 6.0412237268303826e-06, |
|
"loss": 5.278, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 2.985893583924652, |
|
"grad_norm": 2.607921600341797, |
|
"learning_rate": 5.149398553102839e-06, |
|
"loss": 5.2778, |
|
"step": 560500 |
|
}, |
|
{ |
|
"epoch": 2.9885571821261907, |
|
"grad_norm": 2.287775993347168, |
|
"learning_rate": 4.257573379375294e-06, |
|
"loss": 5.2721, |
|
"step": 561000 |
|
}, |
|
{ |
|
"epoch": 2.991220780327729, |
|
"grad_norm": 2.258080005645752, |
|
"learning_rate": 3.3657482056477507e-06, |
|
"loss": 5.2754, |
|
"step": 561500 |
|
}, |
|
{ |
|
"epoch": 2.9938843785292675, |
|
"grad_norm": 2.214787244796753, |
|
"learning_rate": 2.475706682267662e-06, |
|
"loss": 5.2853, |
|
"step": 562000 |
|
}, |
|
{ |
|
"epoch": 2.996547976730806, |
|
"grad_norm": 2.4470176696777344, |
|
"learning_rate": 1.583881508540118e-06, |
|
"loss": 5.2732, |
|
"step": 562500 |
|
}, |
|
{ |
|
"epoch": 2.9992115749323447, |
|
"grad_norm": 2.2027597427368164, |
|
"learning_rate": 6.920563348125741e-07, |
|
"loss": 5.2723, |
|
"step": 563000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 563148, |
|
"total_flos": 1.7947279651188326e+17, |
|
"train_loss": 5.458770358526144, |
|
"train_runtime": 36904.8634, |
|
"train_samples_per_second": 976.604, |
|
"train_steps_per_second": 15.259 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 563148, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 5000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.7947279651188326e+17, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|