|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0775354794046383, |
|
"eval_steps": 500, |
|
"global_step": 2250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.009230414214837892, |
|
"grad_norm": 0.0994892492890358, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5722, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.018460828429675783, |
|
"grad_norm": 0.062255557626485825, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2813, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.027691242644513673, |
|
"grad_norm": 0.12040963768959045, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3037, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.036921656859351566, |
|
"grad_norm": 0.04763123765587807, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3666, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.046152071074189456, |
|
"grad_norm": 0.12753669917583466, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1438, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.055382485289027346, |
|
"grad_norm": 0.10537329316139221, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4241, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06461289950386524, |
|
"grad_norm": 0.03980427607893944, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2929, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07384331371870313, |
|
"grad_norm": 0.12119753658771515, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2938, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08307372793354102, |
|
"grad_norm": 0.062459882348775864, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3273, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09230414214837891, |
|
"grad_norm": 0.15507832169532776, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1885, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1015345563632168, |
|
"grad_norm": 0.09834089875221252, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3693, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.11076497057805469, |
|
"grad_norm": 0.05203542485833168, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2587, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.11999538479289258, |
|
"grad_norm": 0.12015814334154129, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2815, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.12922579900773049, |
|
"grad_norm": 0.06197419390082359, |
|
"learning_rate": 0.0002, |
|
"loss": 1.429, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.13845621322256838, |
|
"grad_norm": 0.17958974838256836, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0941, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.14768662743740626, |
|
"grad_norm": 0.09950366616249084, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4066, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.15691704165224415, |
|
"grad_norm": 0.050984274595975876, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2131, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.16614745586708204, |
|
"grad_norm": 0.150551900267601, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2422, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.17537787008191993, |
|
"grad_norm": 0.06124914437532425, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3434, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.18460828429675782, |
|
"grad_norm": 0.17724090814590454, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0898, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.19383869851159571, |
|
"grad_norm": 0.0937797948718071, |
|
"learning_rate": 0.0002, |
|
"loss": 1.432, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.2030691127264336, |
|
"grad_norm": 0.059550438076257706, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1605, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.2122995269412715, |
|
"grad_norm": 0.10598563402891159, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2004, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.22152994115610938, |
|
"grad_norm": 0.059932854026556015, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2944, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.23076035537094727, |
|
"grad_norm": 0.16503147780895233, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1301, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.23999076958578516, |
|
"grad_norm": 0.08444368839263916, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4195, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.24922118380062305, |
|
"grad_norm": 0.05936718359589577, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2805, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.25845159801546097, |
|
"grad_norm": 0.12579703330993652, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2394, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.26768201223029886, |
|
"grad_norm": 0.06689989566802979, |
|
"learning_rate": 0.0002, |
|
"loss": 1.298, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.27691242644513675, |
|
"grad_norm": 0.17793583869934082, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0807, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.28614284065997464, |
|
"grad_norm": 0.08275260776281357, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3433, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.29537325487481253, |
|
"grad_norm": 0.05578906834125519, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2197, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.3046036690896504, |
|
"grad_norm": 0.13691303133964539, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2487, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.3138340833044883, |
|
"grad_norm": 0.055021870881319046, |
|
"learning_rate": 0.0002, |
|
"loss": 1.275, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3230644975193262, |
|
"grad_norm": 0.1495254933834076, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0445, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3322949117341641, |
|
"grad_norm": 0.09207426011562347, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3997, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.341525325949002, |
|
"grad_norm": 0.05421067774295807, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1986, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.35075574016383987, |
|
"grad_norm": 0.12197154760360718, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2589, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.35998615437867776, |
|
"grad_norm": 0.06101464852690697, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3766, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.36921656859351565, |
|
"grad_norm": 0.19063565135002136, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0677, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.37844698280835354, |
|
"grad_norm": 0.09075415134429932, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4032, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.38767739702319143, |
|
"grad_norm": 0.053780850023031235, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2328, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3969078112380293, |
|
"grad_norm": 0.11555945128202438, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2205, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.4061382254528672, |
|
"grad_norm": 0.0680965855717659, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2906, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.4153686396677051, |
|
"grad_norm": 0.17800922691822052, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0823, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.424599053882543, |
|
"grad_norm": 0.10128472000360489, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3619, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.4338294680973809, |
|
"grad_norm": 0.049957141280174255, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1817, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.44305988231221877, |
|
"grad_norm": 0.1359386295080185, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2062, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.45229029652705666, |
|
"grad_norm": 0.0684947818517685, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3084, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.46152071074189455, |
|
"grad_norm": 0.1941768378019333, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0832, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.47075112495673244, |
|
"grad_norm": 0.09089575707912445, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3617, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.4799815391715703, |
|
"grad_norm": 0.06254442036151886, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1961, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.4892119533864082, |
|
"grad_norm": 0.15948618948459625, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2078, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.4984423676012461, |
|
"grad_norm": 0.06899358332157135, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3514, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.507672781816084, |
|
"grad_norm": 0.16271327435970306, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0975, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5169031960309219, |
|
"grad_norm": 0.0963057279586792, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3987, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5261336102457598, |
|
"grad_norm": 0.05728481337428093, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1942, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5353640244605977, |
|
"grad_norm": 0.1385851353406906, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1708, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5445944386754356, |
|
"grad_norm": 0.06655001640319824, |
|
"learning_rate": 0.0002, |
|
"loss": 1.339, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5538248528902735, |
|
"grad_norm": 0.1913049817085266, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0967, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5630552671051113, |
|
"grad_norm": 0.11070458590984344, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4031, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5722856813199493, |
|
"grad_norm": 0.05270432308316231, |
|
"learning_rate": 0.0002, |
|
"loss": 1.203, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5815160955347871, |
|
"grad_norm": 0.12132929265499115, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1632, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5907465097496251, |
|
"grad_norm": 0.06843800842761993, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3555, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5999769239644629, |
|
"grad_norm": 0.1670321673154831, |
|
"learning_rate": 0.0002, |
|
"loss": 1.059, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6092073381793008, |
|
"grad_norm": 0.10655465722084045, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3544, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.6184377523941387, |
|
"grad_norm": 0.05795924738049507, |
|
"learning_rate": 0.0002, |
|
"loss": 1.25, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.6276681666089766, |
|
"grad_norm": 0.1298709660768509, |
|
"learning_rate": 0.0002, |
|
"loss": 1.222, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.6368985808238145, |
|
"grad_norm": 0.0654703825712204, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3189, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.6461289950386524, |
|
"grad_norm": 0.18943524360656738, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0885, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6553594092534902, |
|
"grad_norm": 0.11117199063301086, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3343, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.6645898234683282, |
|
"grad_norm": 0.05172109976410866, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1328, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.673820237683166, |
|
"grad_norm": 0.13359065353870392, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2447, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.683050651898004, |
|
"grad_norm": 0.059676457196474075, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2718, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6922810661128418, |
|
"grad_norm": 0.1960563063621521, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0669, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7015114803276797, |
|
"grad_norm": 0.11250808089971542, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3465, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.7107418945425176, |
|
"grad_norm": 0.05576665699481964, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1737, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.7199723087573555, |
|
"grad_norm": 0.16185137629508972, |
|
"learning_rate": 0.0002, |
|
"loss": 1.187, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.7292027229721934, |
|
"grad_norm": 0.06587795913219452, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3014, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.7384331371870313, |
|
"grad_norm": 0.21672724187374115, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1078, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7476635514018691, |
|
"grad_norm": 0.09454522281885147, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3883, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.7568939656167071, |
|
"grad_norm": 0.05657172575592995, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2014, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.7661243798315449, |
|
"grad_norm": 0.12518398463726044, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1624, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.7753547940463829, |
|
"grad_norm": 0.0718185231089592, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2899, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.7845852082612207, |
|
"grad_norm": 0.1621280312538147, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0833, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.7938156224760586, |
|
"grad_norm": 0.1027815043926239, |
|
"learning_rate": 0.0002, |
|
"loss": 1.352, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.8030460366908965, |
|
"grad_norm": 0.056900862604379654, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2106, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.8122764509057344, |
|
"grad_norm": 0.138154998421669, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1345, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.8215068651205722, |
|
"grad_norm": 0.06931914389133453, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3355, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.8307372793354102, |
|
"grad_norm": 0.185394287109375, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0777, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.839967693550248, |
|
"grad_norm": 0.09037897735834122, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3078, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.849198107765086, |
|
"grad_norm": 0.05863342061638832, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2147, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.8584285219799238, |
|
"grad_norm": 0.14246703684329987, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1953, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.8676589361947618, |
|
"grad_norm": 0.07498980313539505, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2837, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.8768893504095996, |
|
"grad_norm": 0.23275145888328552, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0615, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.8861197646244375, |
|
"grad_norm": 0.09213528782129288, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3883, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.8953501788392754, |
|
"grad_norm": 0.05749181658029556, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2223, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.9045805930541133, |
|
"grad_norm": 0.12675738334655762, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1542, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.9138110072689511, |
|
"grad_norm": 0.06891526281833649, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2883, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.9230414214837891, |
|
"grad_norm": 0.1506439447402954, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1152, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9322718356986269, |
|
"grad_norm": 0.08551948517560959, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3746, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.9415022499134649, |
|
"grad_norm": 0.05816769599914551, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2117, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.9507326641283027, |
|
"grad_norm": 0.15679936110973358, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2107, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.9599630783431407, |
|
"grad_norm": 0.06274525076150894, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2783, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.9691934925579785, |
|
"grad_norm": 0.1663607805967331, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0381, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.9784239067728164, |
|
"grad_norm": 0.10284286737442017, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4281, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.9876543209876543, |
|
"grad_norm": 0.06172878295183182, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1913, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.9968847352024922, |
|
"grad_norm": 0.11540690809488297, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2357, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.0064612899503864, |
|
"grad_norm": 0.07405360043048859, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3859, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.0156917041652245, |
|
"grad_norm": 0.07363928109407425, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9267, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.0249221183800623, |
|
"grad_norm": 0.10718333721160889, |
|
"learning_rate": 0.0002, |
|
"loss": 1.242, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.0341525325949001, |
|
"grad_norm": 0.06949968636035919, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0087, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.043382946809738, |
|
"grad_norm": 0.15124961733818054, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0951, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.052613361024576, |
|
"grad_norm": 0.08505109697580338, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2338, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.0618437752394139, |
|
"grad_norm": 0.0844084769487381, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9387, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.0710741894542517, |
|
"grad_norm": 0.12017443031072617, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2559, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.0803046036690898, |
|
"grad_norm": 0.07678249478340149, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0786, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.0895350178839276, |
|
"grad_norm": 0.18587274849414825, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0891, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.0987654320987654, |
|
"grad_norm": 0.087877057492733, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2243, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.1079958463136033, |
|
"grad_norm": 0.08151056617498398, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9319, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.1172262605284413, |
|
"grad_norm": 0.13637030124664307, |
|
"learning_rate": 0.0002, |
|
"loss": 1.342, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.1264566747432792, |
|
"grad_norm": 0.07987112551927567, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0845, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.135687088958117, |
|
"grad_norm": 0.17300938069820404, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1104, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.1449175031729548, |
|
"grad_norm": 0.0821269229054451, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2084, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.1541479173877929, |
|
"grad_norm": 0.08363176882266998, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9193, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.1633783316026307, |
|
"grad_norm": 0.12400569021701813, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3398, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.1726087458174685, |
|
"grad_norm": 0.0741545781493187, |
|
"learning_rate": 0.0002, |
|
"loss": 1.07, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.1818391600323064, |
|
"grad_norm": 0.18392737209796906, |
|
"learning_rate": 0.0002, |
|
"loss": 1.102, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.1910695742471444, |
|
"grad_norm": 0.10395547747612, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1766, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.2002999884619823, |
|
"grad_norm": 0.08065596967935562, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9593, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.20953040267682, |
|
"grad_norm": 0.13076524436473846, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2912, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.218760816891658, |
|
"grad_norm": 0.08445240557193756, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0545, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.227991231106496, |
|
"grad_norm": 0.20568707585334778, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0472, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.2372216453213338, |
|
"grad_norm": 0.0978812500834465, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1746, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.2464520595361717, |
|
"grad_norm": 0.08013073354959488, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9361, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.2556824737510095, |
|
"grad_norm": 0.15785863995552063, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3058, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.2649128879658473, |
|
"grad_norm": 0.08266527205705643, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0807, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.2741433021806854, |
|
"grad_norm": 0.18238036334514618, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0587, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.2833737163955232, |
|
"grad_norm": 0.09454452991485596, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2299, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.292604130610361, |
|
"grad_norm": 0.09221120178699493, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9915, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.3018345448251991, |
|
"grad_norm": 0.13429689407348633, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3109, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.311064959040037, |
|
"grad_norm": 0.08239381015300751, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0279, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.3202953732548748, |
|
"grad_norm": 0.20243394374847412, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1165, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.3295257874697128, |
|
"grad_norm": 0.10623496025800705, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1892, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.3387562016845507, |
|
"grad_norm": 0.08493519574403763, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9719, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.3479866158993885, |
|
"grad_norm": 0.16793687641620636, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2933, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.3572170301142263, |
|
"grad_norm": 0.0803951844573021, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0914, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.3664474443290642, |
|
"grad_norm": 0.18061946332454681, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0951, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.3756778585439022, |
|
"grad_norm": 0.09481924027204514, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2137, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.38490827275874, |
|
"grad_norm": 0.08988731354475021, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0406, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.394138686973578, |
|
"grad_norm": 0.16403962671756744, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2664, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.403369101188416, |
|
"grad_norm": 0.0818110927939415, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0402, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.4125995154032538, |
|
"grad_norm": 0.16447734832763672, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1246, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.4218299296180916, |
|
"grad_norm": 0.10654182732105255, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1963, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.4310603438329295, |
|
"grad_norm": 0.09120084345340729, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0389, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.4402907580477673, |
|
"grad_norm": 0.14979740977287292, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2998, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.4495211722626054, |
|
"grad_norm": 0.07872021943330765, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0553, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.4587515864774432, |
|
"grad_norm": 0.1971125304698944, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0648, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.467982000692281, |
|
"grad_norm": 0.10692698508501053, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2246, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.477212414907119, |
|
"grad_norm": 0.0899726077914238, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8801, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.486442829121957, |
|
"grad_norm": 0.14624738693237305, |
|
"learning_rate": 0.0002, |
|
"loss": 1.268, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.4956732433367947, |
|
"grad_norm": 0.07690660655498505, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0909, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.5049036575516326, |
|
"grad_norm": 0.18284741044044495, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0576, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.5141340717664704, |
|
"grad_norm": 0.09398135542869568, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2297, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.5233644859813085, |
|
"grad_norm": 0.08855324983596802, |
|
"learning_rate": 0.0002, |
|
"loss": 0.97, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.5325949001961463, |
|
"grad_norm": 0.15404872596263885, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3044, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.5418253144109841, |
|
"grad_norm": 0.08170903474092484, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0842, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.5510557286258222, |
|
"grad_norm": 0.18026649951934814, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0973, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.56028614284066, |
|
"grad_norm": 0.09690876305103302, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2005, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.5695165570554979, |
|
"grad_norm": 0.09389860183000565, |
|
"learning_rate": 0.0002, |
|
"loss": 0.97, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.578746971270336, |
|
"grad_norm": 0.15237314999103546, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2745, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.5879773854851735, |
|
"grad_norm": 0.09236445277929306, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0959, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.5972077997000116, |
|
"grad_norm": 0.193682461977005, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1163, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.6064382139148494, |
|
"grad_norm": 0.1000017300248146, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2142, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.6156686281296873, |
|
"grad_norm": 0.0881427600979805, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9718, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.6248990423445253, |
|
"grad_norm": 0.16513171792030334, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2881, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.6341294565593631, |
|
"grad_norm": 0.0824236124753952, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0153, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.643359870774201, |
|
"grad_norm": 0.18334250152111053, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0999, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.652590284989039, |
|
"grad_norm": 0.09759881347417831, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2143, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.6618206992038767, |
|
"grad_norm": 0.09835653752088547, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9948, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.6710511134187147, |
|
"grad_norm": 0.1422744244337082, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2753, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.6802815276335525, |
|
"grad_norm": 0.0781414732336998, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0262, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.6895119418483904, |
|
"grad_norm": 0.2046748399734497, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0542, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.6987423560632284, |
|
"grad_norm": 0.09908697754144669, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1942, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.7079727702780663, |
|
"grad_norm": 0.09801312536001205, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9554, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.717203184492904, |
|
"grad_norm": 0.1688520461320877, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2497, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.7264335987077422, |
|
"grad_norm": 0.08527534455060959, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0928, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.7356640129225798, |
|
"grad_norm": 0.19082818925380707, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0729, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.7448944271374178, |
|
"grad_norm": 0.10551278293132782, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2043, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.7541248413522557, |
|
"grad_norm": 0.0871649906039238, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9317, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.7633552555670935, |
|
"grad_norm": 0.13540509343147278, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2648, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.7725856697819315, |
|
"grad_norm": 0.0853731706738472, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0521, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.7818160839967694, |
|
"grad_norm": 0.20368990302085876, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1177, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.7910464982116072, |
|
"grad_norm": 0.09358594566583633, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1872, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.8002769124264453, |
|
"grad_norm": 0.08881039917469025, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9926, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.8095073266412829, |
|
"grad_norm": 0.14714112877845764, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2216, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.818737740856121, |
|
"grad_norm": 0.08117840439081192, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0313, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.8279681550709588, |
|
"grad_norm": 0.2248132824897766, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1024, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.8371985692857966, |
|
"grad_norm": 0.11052978783845901, |
|
"learning_rate": 0.0002, |
|
"loss": 1.18, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.8464289835006347, |
|
"grad_norm": 0.08844051510095596, |
|
"learning_rate": 0.0002, |
|
"loss": 0.99, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.8556593977154725, |
|
"grad_norm": 0.17737261950969696, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3163, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.8648898119303103, |
|
"grad_norm": 0.09116645157337189, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0379, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.8741202261451484, |
|
"grad_norm": 0.16292434930801392, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0473, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.883350640359986, |
|
"grad_norm": 0.09686768054962158, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1816, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.892581054574824, |
|
"grad_norm": 0.09110133349895477, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9796, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.901811468789662, |
|
"grad_norm": 0.1265280693769455, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3238, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.9110418830044997, |
|
"grad_norm": 0.07903092354536057, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0891, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.9202722972193378, |
|
"grad_norm": 0.17232394218444824, |
|
"learning_rate": 0.0002, |
|
"loss": 1.134, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.9295027114341756, |
|
"grad_norm": 0.10463748872280121, |
|
"learning_rate": 0.0002, |
|
"loss": 1.219, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.9387331256490135, |
|
"grad_norm": 0.09100574254989624, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9918, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.9479635398638515, |
|
"grad_norm": 0.16899889707565308, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2809, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.9571939540786891, |
|
"grad_norm": 0.08439228683710098, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0566, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.9664243682935272, |
|
"grad_norm": 0.1993444412946701, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0572, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.975654782508365, |
|
"grad_norm": 0.1062462106347084, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2122, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.9848851967232028, |
|
"grad_norm": 0.08482355624437332, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9347, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.994115610938041, |
|
"grad_norm": 0.13126371800899506, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2604, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.0036921656859352, |
|
"grad_norm": 0.09182780981063843, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1431, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.012922579900773, |
|
"grad_norm": 0.1420363485813141, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8912, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.022152994115611, |
|
"grad_norm": 0.1376965492963791, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1347, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.031383408330449, |
|
"grad_norm": 0.07587343454360962, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7146, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.0406138225452866, |
|
"grad_norm": 0.20123699307441711, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1783, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.0498442367601246, |
|
"grad_norm": 0.10732077807188034, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8584, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.0590746509749627, |
|
"grad_norm": 0.19853752851486206, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9121, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.0683050651898003, |
|
"grad_norm": 0.13605843484401703, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1323, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.0775354794046383, |
|
"grad_norm": 0.07920292764902115, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6811, |
|
"step": 2250 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 10000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 250, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.194849501524787e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|