| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.1599997440004096, | |
| "eval_steps": 200000, | |
| "global_step": 100000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0001599997440004096, | |
| "grad_norm": 84.32501983642578, | |
| "learning_rate": 3.103950336794611e-08, | |
| "loss": 10.8792, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0003199994880008192, | |
| "grad_norm": 60.63747024536133, | |
| "learning_rate": 6.303899137613798e-08, | |
| "loss": 10.9284, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.00047999923200122877, | |
| "grad_norm": 55.71075439453125, | |
| "learning_rate": 9.503847938432986e-08, | |
| "loss": 10.6466, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0006399989760016384, | |
| "grad_norm": 57.63307189941406, | |
| "learning_rate": 1.2703796739252173e-07, | |
| "loss": 10.841, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.000799998720002048, | |
| "grad_norm": 89.1032485961914, | |
| "learning_rate": 1.590374554007136e-07, | |
| "loss": 10.8094, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0009599984640024575, | |
| "grad_norm": 57.2479362487793, | |
| "learning_rate": 1.9103694340890547e-07, | |
| "loss": 10.4323, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.0011199982080028672, | |
| "grad_norm": 51.17530059814453, | |
| "learning_rate": 2.2303643141709733e-07, | |
| "loss": 10.3032, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.0012799979520032767, | |
| "grad_norm": 60.76409912109375, | |
| "learning_rate": 2.550359194252892e-07, | |
| "loss": 10.4006, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.0014399976960036865, | |
| "grad_norm": 67.00859069824219, | |
| "learning_rate": 2.870354074334811e-07, | |
| "loss": 10.4743, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.001599997440004096, | |
| "grad_norm": 68.4343032836914, | |
| "learning_rate": 3.19034895441673e-07, | |
| "loss": 10.2334, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.0017599971840045055, | |
| "grad_norm": 48.704105377197266, | |
| "learning_rate": 3.510343834498648e-07, | |
| "loss": 10.0135, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.001919996928004915, | |
| "grad_norm": 45.30134963989258, | |
| "learning_rate": 3.830338714580567e-07, | |
| "loss": 9.7874, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.002079996672005325, | |
| "grad_norm": 84.56024169921875, | |
| "learning_rate": 4.150333594662486e-07, | |
| "loss": 9.7419, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.0022399964160057344, | |
| "grad_norm": 45.73213195800781, | |
| "learning_rate": 4.470328474744404e-07, | |
| "loss": 9.7412, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.002399996160006144, | |
| "grad_norm": 50.21996307373047, | |
| "learning_rate": 4.790323354826324e-07, | |
| "loss": 9.4585, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.0025599959040065534, | |
| "grad_norm": 59.475799560546875, | |
| "learning_rate": 5.110318234908241e-07, | |
| "loss": 9.5339, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.002719995648006963, | |
| "grad_norm": 82.53620910644531, | |
| "learning_rate": 5.43031311499016e-07, | |
| "loss": 9.4345, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.002879995392007373, | |
| "grad_norm": 39.44235610961914, | |
| "learning_rate": 5.750307995072079e-07, | |
| "loss": 9.1733, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.0030399951360077825, | |
| "grad_norm": 37.58698654174805, | |
| "learning_rate": 6.070302875153998e-07, | |
| "loss": 8.9952, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.003199994880008192, | |
| "grad_norm": 40.35204315185547, | |
| "learning_rate": 6.390297755235917e-07, | |
| "loss": 8.9669, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.0033599946240086016, | |
| "grad_norm": 57.84451675415039, | |
| "learning_rate": 6.707092686517017e-07, | |
| "loss": 8.8152, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.003519994368009011, | |
| "grad_norm": 40.126953125, | |
| "learning_rate": 7.027087566598935e-07, | |
| "loss": 8.7936, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.0036799941120094206, | |
| "grad_norm": 35.435707092285156, | |
| "learning_rate": 7.347082446680854e-07, | |
| "loss": 8.6771, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.00383999385600983, | |
| "grad_norm": 42.3509635925293, | |
| "learning_rate": 7.667077326762773e-07, | |
| "loss": 8.4648, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.00399999360001024, | |
| "grad_norm": 33.58556365966797, | |
| "learning_rate": 7.987072206844691e-07, | |
| "loss": 8.5764, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.00415999334401065, | |
| "grad_norm": 34.014678955078125, | |
| "learning_rate": 8.30706708692661e-07, | |
| "loss": 8.4587, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.004319993088011059, | |
| "grad_norm": 36.43831253051758, | |
| "learning_rate": 8.627061967008528e-07, | |
| "loss": 8.2966, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.004479992832011469, | |
| "grad_norm": 31.411684036254883, | |
| "learning_rate": 8.947056847090448e-07, | |
| "loss": 8.2329, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.004639992576011879, | |
| "grad_norm": 47.570125579833984, | |
| "learning_rate": 9.267051727172366e-07, | |
| "loss": 8.1415, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.004799992320012288, | |
| "grad_norm": 30.771928787231445, | |
| "learning_rate": 9.587046607254284e-07, | |
| "loss": 8.0404, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.004959992064012698, | |
| "grad_norm": 26.92803955078125, | |
| "learning_rate": 9.907041487336204e-07, | |
| "loss": 7.9698, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.005119991808013107, | |
| "grad_norm": 31.121917724609375, | |
| "learning_rate": 1.0227036367418122e-06, | |
| "loss": 7.9205, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.005279991552013517, | |
| "grad_norm": 33.991416931152344, | |
| "learning_rate": 1.054703124750004e-06, | |
| "loss": 7.8314, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.005439991296013926, | |
| "grad_norm": 31.278030395507812, | |
| "learning_rate": 1.086702612758196e-06, | |
| "loss": 7.8369, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.005599991040014336, | |
| "grad_norm": 28.116140365600586, | |
| "learning_rate": 1.1187021007663878e-06, | |
| "loss": 7.6403, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.005759990784014746, | |
| "grad_norm": 30.954113006591797, | |
| "learning_rate": 1.1507015887745798e-06, | |
| "loss": 7.5842, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.005919990528015155, | |
| "grad_norm": 36.53567886352539, | |
| "learning_rate": 1.1827010767827715e-06, | |
| "loss": 7.5812, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.006079990272015565, | |
| "grad_norm": 36.81153106689453, | |
| "learning_rate": 1.2147005647909635e-06, | |
| "loss": 7.4335, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.006239990016015974, | |
| "grad_norm": 22.556833267211914, | |
| "learning_rate": 1.2467000527991553e-06, | |
| "loss": 7.4917, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.006399989760016384, | |
| "grad_norm": 40.195579528808594, | |
| "learning_rate": 1.278699540807347e-06, | |
| "loss": 7.3204, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.006559989504016793, | |
| "grad_norm": 21.862642288208008, | |
| "learning_rate": 1.310699028815539e-06, | |
| "loss": 7.2971, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.006719989248017203, | |
| "grad_norm": 29.61161231994629, | |
| "learning_rate": 1.3426985168237308e-06, | |
| "loss": 7.2233, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.006879988992017613, | |
| "grad_norm": 22.342451095581055, | |
| "learning_rate": 1.3746980048319228e-06, | |
| "loss": 7.2081, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.007039988736018022, | |
| "grad_norm": 36.36684799194336, | |
| "learning_rate": 1.4066974928401148e-06, | |
| "loss": 7.1364, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.007199988480018432, | |
| "grad_norm": 25.563953399658203, | |
| "learning_rate": 1.4386969808483064e-06, | |
| "loss": 7.0663, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.007359988224018841, | |
| "grad_norm": 22.50385856628418, | |
| "learning_rate": 1.4706964688564984e-06, | |
| "loss": 6.9601, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.007519987968019251, | |
| "grad_norm": 31.61231231689453, | |
| "learning_rate": 1.5026959568646904e-06, | |
| "loss": 6.9546, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.00767998771201966, | |
| "grad_norm": 18.862520217895508, | |
| "learning_rate": 1.5346954448728822e-06, | |
| "loss": 6.9019, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.00783998745602007, | |
| "grad_norm": 32.594539642333984, | |
| "learning_rate": 1.5666949328810741e-06, | |
| "loss": 6.8801, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.00799998720002048, | |
| "grad_norm": 21.06804084777832, | |
| "learning_rate": 1.598694420889266e-06, | |
| "loss": 6.7734, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.00815998694402089, | |
| "grad_norm": 31.783803939819336, | |
| "learning_rate": 1.6303739140173757e-06, | |
| "loss": 6.7648, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.0083199866880213, | |
| "grad_norm": 49.79084777832031, | |
| "learning_rate": 1.6623734020255677e-06, | |
| "loss": 6.7498, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.008479986432021708, | |
| "grad_norm": 26.1977481842041, | |
| "learning_rate": 1.6943728900337597e-06, | |
| "loss": 6.6872, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.008639986176022118, | |
| "grad_norm": 21.942001342773438, | |
| "learning_rate": 1.7263723780419515e-06, | |
| "loss": 6.6264, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.008799985920022528, | |
| "grad_norm": 32.572959899902344, | |
| "learning_rate": 1.7583718660501433e-06, | |
| "loss": 6.579, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.008959985664022938, | |
| "grad_norm": 20.728240966796875, | |
| "learning_rate": 1.7903713540583353e-06, | |
| "loss": 6.6001, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.009119985408023347, | |
| "grad_norm": 24.334205627441406, | |
| "learning_rate": 1.822370842066527e-06, | |
| "loss": 6.5971, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.009279985152023757, | |
| "grad_norm": 27.025753021240234, | |
| "learning_rate": 1.854370330074719e-06, | |
| "loss": 6.4694, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.009439984896024167, | |
| "grad_norm": 23.506013870239258, | |
| "learning_rate": 1.8863698180829106e-06, | |
| "loss": 6.3983, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.009599984640024576, | |
| "grad_norm": 35.65713882446289, | |
| "learning_rate": 1.9183693060911026e-06, | |
| "loss": 6.4477, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.009759984384024985, | |
| "grad_norm": 22.977373123168945, | |
| "learning_rate": 1.950368794099295e-06, | |
| "loss": 6.4308, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.009919984128025396, | |
| "grad_norm": 22.127635955810547, | |
| "learning_rate": 1.982368282107486e-06, | |
| "loss": 6.4248, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.010079983872025805, | |
| "grad_norm": 33.53960418701172, | |
| "learning_rate": 2.0143677701156784e-06, | |
| "loss": 6.2642, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.010239983616026214, | |
| "grad_norm": 24.39597511291504, | |
| "learning_rate": 2.04636725812387e-06, | |
| "loss": 6.2763, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.010399983360026625, | |
| "grad_norm": 24.471288681030273, | |
| "learning_rate": 2.078366746132062e-06, | |
| "loss": 6.3878, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.010559983104027034, | |
| "grad_norm": 34.05498123168945, | |
| "learning_rate": 2.110366234140254e-06, | |
| "loss": 6.2601, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.010719982848027443, | |
| "grad_norm": 30.60455322265625, | |
| "learning_rate": 2.142365722148446e-06, | |
| "loss": 6.1789, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.010879982592027852, | |
| "grad_norm": 27.737686157226562, | |
| "learning_rate": 2.1743652101566377e-06, | |
| "loss": 6.1773, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.011039982336028263, | |
| "grad_norm": 24.246810913085938, | |
| "learning_rate": 2.2063646981648294e-06, | |
| "loss": 6.1439, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.011199982080028672, | |
| "grad_norm": 27.53533363342285, | |
| "learning_rate": 2.2383641861730217e-06, | |
| "loss": 6.1863, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.011359981824029081, | |
| "grad_norm": 27.81687355041504, | |
| "learning_rate": 2.2703636741812134e-06, | |
| "loss": 6.0513, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.011519981568029492, | |
| "grad_norm": 28.00519371032715, | |
| "learning_rate": 2.3020431673093234e-06, | |
| "loss": 6.0671, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.011679981312029901, | |
| "grad_norm": 29.347061157226562, | |
| "learning_rate": 2.3340426553175152e-06, | |
| "loss": 6.0212, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.01183998105603031, | |
| "grad_norm": 29.621200561523438, | |
| "learning_rate": 2.365722148445625e-06, | |
| "loss": 6.0043, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.011999980800030719, | |
| "grad_norm": 31.689117431640625, | |
| "learning_rate": 2.397721636453817e-06, | |
| "loss": 6.0166, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.01215998054403113, | |
| "grad_norm": 46.79508972167969, | |
| "learning_rate": 2.429721124462009e-06, | |
| "loss": 5.9754, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.012319980288031539, | |
| "grad_norm": 28.857833862304688, | |
| "learning_rate": 2.4617206124702006e-06, | |
| "loss": 5.9211, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.012479980032031948, | |
| "grad_norm": 58.34132766723633, | |
| "learning_rate": 2.4937201004783928e-06, | |
| "loss": 5.7867, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.012639979776032359, | |
| "grad_norm": 49.33425521850586, | |
| "learning_rate": 2.525719588486584e-06, | |
| "loss": 5.8534, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.012799979520032768, | |
| "grad_norm": 39.17392349243164, | |
| "learning_rate": 2.5577190764947763e-06, | |
| "loss": 5.7708, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.012959979264033177, | |
| "grad_norm": 45.94136428833008, | |
| "learning_rate": 2.589718564502968e-06, | |
| "loss": 5.8328, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.013119979008033586, | |
| "grad_norm": 36.19196319580078, | |
| "learning_rate": 2.6217180525111603e-06, | |
| "loss": 5.7417, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.013279978752033997, | |
| "grad_norm": 37.051658630371094, | |
| "learning_rate": 2.653717540519352e-06, | |
| "loss": 5.8097, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.013439978496034406, | |
| "grad_norm": 90.0757064819336, | |
| "learning_rate": 2.6857170285275435e-06, | |
| "loss": 5.7578, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.013599978240034815, | |
| "grad_norm": 92.7857894897461, | |
| "learning_rate": 2.7177165165357357e-06, | |
| "loss": 5.643, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.013759977984035226, | |
| "grad_norm": 26.648149490356445, | |
| "learning_rate": 2.7497160045439274e-06, | |
| "loss": 5.6401, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.013919977728035635, | |
| "grad_norm": 45.42919158935547, | |
| "learning_rate": 2.7817154925521196e-06, | |
| "loss": 5.6627, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.014079977472036044, | |
| "grad_norm": 48.3182487487793, | |
| "learning_rate": 2.8137149805603114e-06, | |
| "loss": 5.6167, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.014239977216036454, | |
| "grad_norm": 51.463653564453125, | |
| "learning_rate": 2.8457144685685028e-06, | |
| "loss": 5.6539, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.014399976960036864, | |
| "grad_norm": 47.81680679321289, | |
| "learning_rate": 2.877713956576695e-06, | |
| "loss": 5.4513, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.014559976704037273, | |
| "grad_norm": 42.410667419433594, | |
| "learning_rate": 2.9097134445848868e-06, | |
| "loss": 5.4132, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.014719976448037683, | |
| "grad_norm": 55.33562088012695, | |
| "learning_rate": 2.941712932593079e-06, | |
| "loss": 5.4714, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.014879976192038093, | |
| "grad_norm": 38.538246154785156, | |
| "learning_rate": 2.9737124206012707e-06, | |
| "loss": 5.4786, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.015039975936038502, | |
| "grad_norm": 43.42023468017578, | |
| "learning_rate": 3.0057119086094625e-06, | |
| "loss": 5.3928, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.015199975680038912, | |
| "grad_norm": 24.861467361450195, | |
| "learning_rate": 3.037391401737572e-06, | |
| "loss": 5.4774, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.01535997542403932, | |
| "grad_norm": 98.92141723632812, | |
| "learning_rate": 3.0693908897457643e-06, | |
| "loss": 5.2881, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.015519975168039732, | |
| "grad_norm": 62.839866638183594, | |
| "learning_rate": 3.101390377753956e-06, | |
| "loss": 5.3699, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.01567997491204014, | |
| "grad_norm": 46.006065368652344, | |
| "learning_rate": 3.133069870882066e-06, | |
| "loss": 5.1483, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.01583997465604055, | |
| "grad_norm": 89.62445068359375, | |
| "learning_rate": 3.1650693588902583e-06, | |
| "loss": 5.3051, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.01599997440004096, | |
| "grad_norm": 41.113609313964844, | |
| "learning_rate": 3.19706884689845e-06, | |
| "loss": 5.2546, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.01615997414404137, | |
| "grad_norm": 46.37376403808594, | |
| "learning_rate": 3.2290683349066414e-06, | |
| "loss": 5.2314, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.01631997388804178, | |
| "grad_norm": 60.3846321105957, | |
| "learning_rate": 3.2610678229148337e-06, | |
| "loss": 5.1783, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.016479973632042188, | |
| "grad_norm": 145.4359130859375, | |
| "learning_rate": 3.2930673109230254e-06, | |
| "loss": 5.2074, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.0166399733760426, | |
| "grad_norm": 69.00183868408203, | |
| "learning_rate": 3.325066798931217e-06, | |
| "loss": 5.2825, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.01679997312004301, | |
| "grad_norm": 48.03580093383789, | |
| "learning_rate": 3.3570662869394094e-06, | |
| "loss": 5.1715, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.016959972864043417, | |
| "grad_norm": 58.56736755371094, | |
| "learning_rate": 3.389065774947601e-06, | |
| "loss": 5.087, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.017119972608043828, | |
| "grad_norm": 54.484527587890625, | |
| "learning_rate": 3.421065262955793e-06, | |
| "loss": 5.082, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.017279972352044235, | |
| "grad_norm": 74.30866241455078, | |
| "learning_rate": 3.4530647509639847e-06, | |
| "loss": 4.9111, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.017439972096044646, | |
| "grad_norm": 60.489505767822266, | |
| "learning_rate": 3.4850642389721765e-06, | |
| "loss": 5.0213, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.017599971840045057, | |
| "grad_norm": 61.25093460083008, | |
| "learning_rate": 3.5170637269803687e-06, | |
| "loss": 4.9898, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.017759971584045464, | |
| "grad_norm": 51.98568344116211, | |
| "learning_rate": 3.5490632149885605e-06, | |
| "loss": 4.7734, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.017919971328045875, | |
| "grad_norm": 64.08167266845703, | |
| "learning_rate": 3.581062702996752e-06, | |
| "loss": 4.9511, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.018079971072046286, | |
| "grad_norm": 61.8354606628418, | |
| "learning_rate": 3.613062191004944e-06, | |
| "loss": 5.0481, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.018239970816046693, | |
| "grad_norm": 97.53675842285156, | |
| "learning_rate": 3.645061679013136e-06, | |
| "loss": 4.8441, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.018399970560047104, | |
| "grad_norm": 49.35017013549805, | |
| "learning_rate": 3.677061167021328e-06, | |
| "loss": 4.873, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.018559970304047515, | |
| "grad_norm": 44.33409118652344, | |
| "learning_rate": 3.70906065502952e-06, | |
| "loss": 4.9988, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.018719970048047922, | |
| "grad_norm": 140.5505828857422, | |
| "learning_rate": 3.741060143037712e-06, | |
| "loss": 4.7653, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.018879969792048333, | |
| "grad_norm": 68.21163177490234, | |
| "learning_rate": 3.7730596310459034e-06, | |
| "loss": 4.804, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.019039969536048744, | |
| "grad_norm": 48.678226470947266, | |
| "learning_rate": 3.805059119054095e-06, | |
| "loss": 4.8288, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.01919996928004915, | |
| "grad_norm": 76.32611083984375, | |
| "learning_rate": 3.837058607062287e-06, | |
| "loss": 4.7053, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.019359969024049562, | |
| "grad_norm": 70.85586547851562, | |
| "learning_rate": 3.869058095070479e-06, | |
| "loss": 4.6887, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.01951996876804997, | |
| "grad_norm": 66.46036529541016, | |
| "learning_rate": 3.901057583078671e-06, | |
| "loss": 4.7832, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.01967996851205038, | |
| "grad_norm": 165.13221740722656, | |
| "learning_rate": 3.9330570710868636e-06, | |
| "loss": 4.6817, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.01983996825605079, | |
| "grad_norm": 118.48895263671875, | |
| "learning_rate": 3.965056559095055e-06, | |
| "loss": 4.6252, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.0199999680000512, | |
| "grad_norm": 64.3436050415039, | |
| "learning_rate": 3.997056047103246e-06, | |
| "loss": 4.5936, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.02015996774405161, | |
| "grad_norm": 42.27592468261719, | |
| "learning_rate": 4.0290555351114385e-06, | |
| "loss": 4.7452, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.02031996748805202, | |
| "grad_norm": 60.829036712646484, | |
| "learning_rate": 4.061055023119631e-06, | |
| "loss": 4.5321, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.020479967232052428, | |
| "grad_norm": 161.975830078125, | |
| "learning_rate": 4.093054511127823e-06, | |
| "loss": 4.4964, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.02063996697605284, | |
| "grad_norm": 99.2963638305664, | |
| "learning_rate": 4.125053999136014e-06, | |
| "loss": 4.4421, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.02079996672005325, | |
| "grad_norm": 68.78880310058594, | |
| "learning_rate": 4.156733492264124e-06, | |
| "loss": 4.3782, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.020959966464053657, | |
| "grad_norm": 80.74951171875, | |
| "learning_rate": 4.188732980272316e-06, | |
| "loss": 4.5169, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.021119966208054067, | |
| "grad_norm": 157.87254333496094, | |
| "learning_rate": 4.220412473400426e-06, | |
| "loss": 4.533, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.02127996595205448, | |
| "grad_norm": 148.68331909179688, | |
| "learning_rate": 4.252411961408618e-06, | |
| "loss": 4.3725, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.021439965696054886, | |
| "grad_norm": 72.9531021118164, | |
| "learning_rate": 4.28441144941681e-06, | |
| "loss": 4.2911, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.021599965440055297, | |
| "grad_norm": 73.24847412109375, | |
| "learning_rate": 4.316410937425001e-06, | |
| "loss": 4.2261, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.021759965184055704, | |
| "grad_norm": 94.57313537597656, | |
| "learning_rate": 4.348410425433194e-06, | |
| "loss": 4.2467, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.021919964928056115, | |
| "grad_norm": 105.674560546875, | |
| "learning_rate": 4.380409913441385e-06, | |
| "loss": 4.1558, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.022079964672056526, | |
| "grad_norm": 63.658287048339844, | |
| "learning_rate": 4.412409401449577e-06, | |
| "loss": 4.2794, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.022239964416056933, | |
| "grad_norm": 77.69287109375, | |
| "learning_rate": 4.444408889457769e-06, | |
| "loss": 4.2383, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.022399964160057344, | |
| "grad_norm": 82.83360290527344, | |
| "learning_rate": 4.4764083774659615e-06, | |
| "loss": 4.1654, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.022559963904057755, | |
| "grad_norm": 47.373531341552734, | |
| "learning_rate": 4.508407865474153e-06, | |
| "loss": 4.158, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.022719963648058162, | |
| "grad_norm": 97.64757537841797, | |
| "learning_rate": 4.540407353482344e-06, | |
| "loss": 4.1299, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.022879963392058573, | |
| "grad_norm": 54.75618362426758, | |
| "learning_rate": 4.5724068414905365e-06, | |
| "loss": 4.1902, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.023039963136058984, | |
| "grad_norm": 258.4887390136719, | |
| "learning_rate": 4.604406329498729e-06, | |
| "loss": 3.7853, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.02319996288005939, | |
| "grad_norm": 104.63798522949219, | |
| "learning_rate": 4.63640581750692e-06, | |
| "loss": 4.0514, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.023359962624059802, | |
| "grad_norm": 60.090843200683594, | |
| "learning_rate": 4.668405305515112e-06, | |
| "loss": 4.1655, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.023519962368060213, | |
| "grad_norm": 44.36670684814453, | |
| "learning_rate": 4.7004047935233036e-06, | |
| "loss": 4.051, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.02367996211206062, | |
| "grad_norm": 41.61213302612305, | |
| "learning_rate": 4.732404281531496e-06, | |
| "loss": 4.078, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.02383996185606103, | |
| "grad_norm": 73.2448501586914, | |
| "learning_rate": 4.764403769539688e-06, | |
| "loss": 4.1193, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.023999961600061438, | |
| "grad_norm": 77.30301666259766, | |
| "learning_rate": 4.796403257547879e-06, | |
| "loss": 4.1536, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.02415996134406185, | |
| "grad_norm": 48.1458854675293, | |
| "learning_rate": 4.8284027455560715e-06, | |
| "loss": 3.935, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.02431996108806226, | |
| "grad_norm": 129.59295654296875, | |
| "learning_rate": 4.860402233564263e-06, | |
| "loss": 3.9535, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.024479960832062667, | |
| "grad_norm": 163.0813751220703, | |
| "learning_rate": 4.892401721572455e-06, | |
| "loss": 3.7051, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.024639960576063078, | |
| "grad_norm": 102.2786865234375, | |
| "learning_rate": 4.924401209580647e-06, | |
| "loss": 3.8329, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.02479996032006349, | |
| "grad_norm": 160.66392517089844, | |
| "learning_rate": 4.956400697588839e-06, | |
| "loss": 3.9412, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.024959960064063896, | |
| "grad_norm": 136.77218627929688, | |
| "learning_rate": 4.988400185597031e-06, | |
| "loss": 3.6668, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.025119959808064307, | |
| "grad_norm": 63.87991714477539, | |
| "learning_rate": 5.0200796787251404e-06, | |
| "loss": 3.7758, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.025279959552064718, | |
| "grad_norm": 352.977294921875, | |
| "learning_rate": 5.052079166733333e-06, | |
| "loss": 3.8805, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.025439959296065125, | |
| "grad_norm": 148.54776000976562, | |
| "learning_rate": 5.084078654741524e-06, | |
| "loss": 3.8848, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.025599959040065536, | |
| "grad_norm": 105.01113891601562, | |
| "learning_rate": 5.116078142749716e-06, | |
| "loss": 3.75, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.025759958784065947, | |
| "grad_norm": 170.62828063964844, | |
| "learning_rate": 5.148077630757908e-06, | |
| "loss": 3.5685, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.025919958528066354, | |
| "grad_norm": 164.85324096679688, | |
| "learning_rate": 5.180077118766101e-06, | |
| "loss": 3.7016, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.026079958272066765, | |
| "grad_norm": 79.85810852050781, | |
| "learning_rate": 5.212076606774292e-06, | |
| "loss": 4.0955, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.026239958016067173, | |
| "grad_norm": 109.73529815673828, | |
| "learning_rate": 5.244076094782484e-06, | |
| "loss": 3.7577, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.026399957760067583, | |
| "grad_norm": 105.98066711425781, | |
| "learning_rate": 5.276075582790676e-06, | |
| "loss": 3.7485, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.026559957504067994, | |
| "grad_norm": 71.02545166015625, | |
| "learning_rate": 5.3080750707988686e-06, | |
| "loss": 3.8263, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.0267199572480684, | |
| "grad_norm": 245.44224548339844, | |
| "learning_rate": 5.340074558807059e-06, | |
| "loss": 3.6922, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.026879956992068813, | |
| "grad_norm": 42.178157806396484, | |
| "learning_rate": 5.372074046815251e-06, | |
| "loss": 3.6568, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.027039956736069223, | |
| "grad_norm": 114.55894470214844, | |
| "learning_rate": 5.404073534823443e-06, | |
| "loss": 3.7317, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.02719995648006963, | |
| "grad_norm": 86.70626831054688, | |
| "learning_rate": 5.436073022831635e-06, | |
| "loss": 3.5089, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.02735995622407004, | |
| "grad_norm": 202.02505493164062, | |
| "learning_rate": 5.468072510839827e-06, | |
| "loss": 3.7377, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.027519955968070452, | |
| "grad_norm": 114.00701141357422, | |
| "learning_rate": 5.500071998848019e-06, | |
| "loss": 3.6206, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.02767995571207086, | |
| "grad_norm": 152.38311767578125, | |
| "learning_rate": 5.532071486856211e-06, | |
| "loss": 3.3702, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.02783995545607127, | |
| "grad_norm": 156.1048126220703, | |
| "learning_rate": 5.564070974864403e-06, | |
| "loss": 3.5126, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.02799995520007168, | |
| "grad_norm": 117.87386322021484, | |
| "learning_rate": 5.596070462872595e-06, | |
| "loss": 3.4841, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.02815995494407209, | |
| "grad_norm": 616.7991333007812, | |
| "learning_rate": 5.628069950880786e-06, | |
| "loss": 3.1464, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.0283199546880725, | |
| "grad_norm": 131.32760620117188, | |
| "learning_rate": 5.6600694388889786e-06, | |
| "loss": 3.7012, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.028479954432072907, | |
| "grad_norm": 60.172969818115234, | |
| "learning_rate": 5.69206892689717e-06, | |
| "loss": 3.5802, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.028639954176073318, | |
| "grad_norm": 169.24374389648438, | |
| "learning_rate": 5.724068414905361e-06, | |
| "loss": 3.4952, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.02879995392007373, | |
| "grad_norm": 158.77391052246094, | |
| "learning_rate": 5.7560679029135535e-06, | |
| "loss": 3.1174, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.028959953664074136, | |
| "grad_norm": 218.98867797851562, | |
| "learning_rate": 5.787747396041664e-06, | |
| "loss": 3.3134, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.029119953408074547, | |
| "grad_norm": 185.3249053955078, | |
| "learning_rate": 5.819746884049856e-06, | |
| "loss": 3.3578, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.029279953152074958, | |
| "grad_norm": 93.69242858886719, | |
| "learning_rate": 5.851746372058048e-06, | |
| "loss": 3.0209, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.029439952896075365, | |
| "grad_norm": 85.82784271240234, | |
| "learning_rate": 5.883745860066239e-06, | |
| "loss": 3.3796, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.029599952640075776, | |
| "grad_norm": 125.96697998046875, | |
| "learning_rate": 5.915745348074431e-06, | |
| "loss": 3.2287, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.029759952384076187, | |
| "grad_norm": 235.71075439453125, | |
| "learning_rate": 5.947744836082623e-06, | |
| "loss": 3.1537, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.029919952128076594, | |
| "grad_norm": 139.5558319091797, | |
| "learning_rate": 5.979744324090815e-06, | |
| "loss": 2.9073, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.030079951872077005, | |
| "grad_norm": 204.2928924560547, | |
| "learning_rate": 6.011743812099007e-06, | |
| "loss": 3.3444, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.030239951616077416, | |
| "grad_norm": 165.4457244873047, | |
| "learning_rate": 6.043743300107199e-06, | |
| "loss": 3.1341, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.030399951360077823, | |
| "grad_norm": 66.5983657836914, | |
| "learning_rate": 6.07574278811539e-06, | |
| "loss": 2.8862, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.030559951104078234, | |
| "grad_norm": 219.95774841308594, | |
| "learning_rate": 6.1077422761235826e-06, | |
| "loss": 3.2033, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.03071995084807864, | |
| "grad_norm": 125.15766906738281, | |
| "learning_rate": 6.139741764131775e-06, | |
| "loss": 3.2764, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.030879950592079052, | |
| "grad_norm": 207.95970153808594, | |
| "learning_rate": 6.171741252139967e-06, | |
| "loss": 3.0725, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.031039950336079463, | |
| "grad_norm": 368.32781982421875, | |
| "learning_rate": 6.203740740148158e-06, | |
| "loss": 3.0436, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.03119995008007987, | |
| "grad_norm": 412.2764587402344, | |
| "learning_rate": 6.23574022815635e-06, | |
| "loss": 3.3493, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.03135994982408028, | |
| "grad_norm": 155.46766662597656, | |
| "learning_rate": 6.267739716164542e-06, | |
| "loss": 3.0141, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.03151994956808069, | |
| "grad_norm": 89.32569885253906, | |
| "learning_rate": 6.299739204172733e-06, | |
| "loss": 2.779, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.0316799493120811, | |
| "grad_norm": 241.4378204345703, | |
| "learning_rate": 6.3317386921809254e-06, | |
| "loss": 3.3543, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.03183994905608151, | |
| "grad_norm": 13.20569133758545, | |
| "learning_rate": 6.363738180189118e-06, | |
| "loss": 3.1526, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.03199994880008192, | |
| "grad_norm": 270.6402893066406, | |
| "learning_rate": 6.395737668197309e-06, | |
| "loss": 2.7896, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.03215994854408233, | |
| "grad_norm": 106.38632202148438, | |
| "learning_rate": 6.427737156205501e-06, | |
| "loss": 2.9398, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.03231994828808274, | |
| "grad_norm": 191.7210693359375, | |
| "learning_rate": 6.459416649333611e-06, | |
| "loss": 3.1254, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.03247994803208315, | |
| "grad_norm": 143.96151733398438, | |
| "learning_rate": 6.491416137341803e-06, | |
| "loss": 2.8832, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.03263994777608356, | |
| "grad_norm": 150.26368713378906, | |
| "learning_rate": 6.523415625349994e-06, | |
| "loss": 3.0542, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.032799947520083965, | |
| "grad_norm": 178.11705017089844, | |
| "learning_rate": 6.5554151133581865e-06, | |
| "loss": 2.9722, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.032959947264084376, | |
| "grad_norm": 222.4794921875, | |
| "learning_rate": 6.587414601366379e-06, | |
| "loss": 2.9321, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.03311994700808479, | |
| "grad_norm": 155.37796020507812, | |
| "learning_rate": 6.619414089374571e-06, | |
| "loss": 2.6448, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.0332799467520852, | |
| "grad_norm": 155.5786590576172, | |
| "learning_rate": 6.651413577382762e-06, | |
| "loss": 3.4006, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.03343994649608561, | |
| "grad_norm": 684.525146484375, | |
| "learning_rate": 6.6834130653909545e-06, | |
| "loss": 3.0022, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.03359994624008602, | |
| "grad_norm": 545.5623168945312, | |
| "learning_rate": 6.715412553399147e-06, | |
| "loss": 2.6366, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.03375994598408642, | |
| "grad_norm": 292.9093017578125, | |
| "learning_rate": 6.747412041407339e-06, | |
| "loss": 3.0112, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.033919945728086834, | |
| "grad_norm": 2.531680107116699, | |
| "learning_rate": 6.7794115294155294e-06, | |
| "loss": 2.7856, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.034079945472087245, | |
| "grad_norm": 216.7860565185547, | |
| "learning_rate": 6.811411017423722e-06, | |
| "loss": 3.0967, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.034239945216087656, | |
| "grad_norm": 138.73028564453125, | |
| "learning_rate": 6.843410505431913e-06, | |
| "loss": 2.8754, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.034399944960088066, | |
| "grad_norm": 78.2362060546875, | |
| "learning_rate": 6.875409993440105e-06, | |
| "loss": 3.1269, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.03455994470408847, | |
| "grad_norm": 144.1228790283203, | |
| "learning_rate": 6.907409481448297e-06, | |
| "loss": 2.8235, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.03471994444808888, | |
| "grad_norm": 275.1159973144531, | |
| "learning_rate": 6.93940896945649e-06, | |
| "loss": 2.4912, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.03487994419208929, | |
| "grad_norm": 216.12060546875, | |
| "learning_rate": 6.971408457464681e-06, | |
| "loss": 2.5079, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.0350399439360897, | |
| "grad_norm": 398.5049133300781, | |
| "learning_rate": 7.003407945472873e-06, | |
| "loss": 3.2942, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.035199943680090114, | |
| "grad_norm": 116.13761901855469, | |
| "learning_rate": 7.035407433481065e-06, | |
| "loss": 2.4184, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.035359943424090524, | |
| "grad_norm": 425.1556091308594, | |
| "learning_rate": 7.067406921489257e-06, | |
| "loss": 2.782, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.03551994316809093, | |
| "grad_norm": 17.029335021972656, | |
| "learning_rate": 7.099086414617366e-06, | |
| "loss": 2.7652, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.03567994291209134, | |
| "grad_norm": 307.45343017578125, | |
| "learning_rate": 7.1310859026255585e-06, | |
| "loss": 3.113, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.03583994265609175, | |
| "grad_norm": 69.89311981201172, | |
| "learning_rate": 7.163085390633751e-06, | |
| "loss": 2.7451, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.03599994240009216, | |
| "grad_norm": 28.0865535736084, | |
| "learning_rate": 7.195084878641943e-06, | |
| "loss": 2.7473, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.03615994214409257, | |
| "grad_norm": 108.03202056884766, | |
| "learning_rate": 7.227084366650134e-06, | |
| "loss": 2.5116, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.03631994188809298, | |
| "grad_norm": 299.888427734375, | |
| "learning_rate": 7.2590838546583265e-06, | |
| "loss": 2.8531, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.036479941632093386, | |
| "grad_norm": 87.79664611816406, | |
| "learning_rate": 7.291083342666519e-06, | |
| "loss": 2.9171, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.0366399413760938, | |
| "grad_norm": 388.6871337890625, | |
| "learning_rate": 7.323082830674709e-06, | |
| "loss": 2.7954, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.03679994112009421, | |
| "grad_norm": 87.27410888671875, | |
| "learning_rate": 7.355082318682901e-06, | |
| "loss": 2.5376, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.03695994086409462, | |
| "grad_norm": 159.74534606933594, | |
| "learning_rate": 7.387081806691094e-06, | |
| "loss": 3.2488, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.03711994060809503, | |
| "grad_norm": 169.96243286132812, | |
| "learning_rate": 7.419081294699285e-06, | |
| "loss": 2.6131, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.037279940352095434, | |
| "grad_norm": 221.1896514892578, | |
| "learning_rate": 7.451080782707477e-06, | |
| "loss": 3.1343, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.037439940096095845, | |
| "grad_norm": 67.28482818603516, | |
| "learning_rate": 7.482760275835588e-06, | |
| "loss": 2.3159, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.037599939840096255, | |
| "grad_norm": 341.05975341796875, | |
| "learning_rate": 7.514759763843779e-06, | |
| "loss": 2.4225, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.037759939584096666, | |
| "grad_norm": 250.44683837890625, | |
| "learning_rate": 7.54675925185197e-06, | |
| "loss": 2.5034, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.03791993932809708, | |
| "grad_norm": 423.6518249511719, | |
| "learning_rate": 7.5787587398601625e-06, | |
| "loss": 3.0067, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.03807993907209749, | |
| "grad_norm": 169.45944213867188, | |
| "learning_rate": 7.610758227868355e-06, | |
| "loss": 2.313, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.03823993881609789, | |
| "grad_norm": 80.43399047851562, | |
| "learning_rate": 7.642757715876546e-06, | |
| "loss": 2.5363, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.0383999385600983, | |
| "grad_norm": 248.08848571777344, | |
| "learning_rate": 7.674757203884739e-06, | |
| "loss": 2.7929, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.03855993830409871, | |
| "grad_norm": 3.7647440433502197, | |
| "learning_rate": 7.70675669189293e-06, | |
| "loss": 2.617, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.038719938048099124, | |
| "grad_norm": 3.100020170211792, | |
| "learning_rate": 7.738756179901122e-06, | |
| "loss": 2.9711, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.038879937792099535, | |
| "grad_norm": 69.79640197753906, | |
| "learning_rate": 7.770755667909315e-06, | |
| "loss": 2.7726, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.03903993753609994, | |
| "grad_norm": 190.2179412841797, | |
| "learning_rate": 7.802755155917506e-06, | |
| "loss": 2.5849, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.03919993728010035, | |
| "grad_norm": 75.47491455078125, | |
| "learning_rate": 7.834754643925698e-06, | |
| "loss": 2.3231, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.03935993702410076, | |
| "grad_norm": 13.3529691696167, | |
| "learning_rate": 7.866754131933889e-06, | |
| "loss": 2.2477, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.03951993676810117, | |
| "grad_norm": 280.162109375, | |
| "learning_rate": 7.89875361994208e-06, | |
| "loss": 2.5487, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.03967993651210158, | |
| "grad_norm": 376.9624938964844, | |
| "learning_rate": 7.930753107950273e-06, | |
| "loss": 2.5175, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.03983993625610199, | |
| "grad_norm": 341.099609375, | |
| "learning_rate": 7.962752595958465e-06, | |
| "loss": 2.6758, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.0399999360001024, | |
| "grad_norm": 436.5195007324219, | |
| "learning_rate": 7.994752083966658e-06, | |
| "loss": 2.7313, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.04015993574410281, | |
| "grad_norm": 274.91363525390625, | |
| "learning_rate": 8.026751571974849e-06, | |
| "loss": 2.4846, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.04031993548810322, | |
| "grad_norm": 183.5716094970703, | |
| "learning_rate": 8.05875105998304e-06, | |
| "loss": 2.8697, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.04047993523210363, | |
| "grad_norm": 70.23844909667969, | |
| "learning_rate": 8.090750547991234e-06, | |
| "loss": 2.5289, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.04063993497610404, | |
| "grad_norm": 139.3669891357422, | |
| "learning_rate": 8.122750035999425e-06, | |
| "loss": 2.235, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.04079993472010445, | |
| "grad_norm": 242.79315185546875, | |
| "learning_rate": 8.154429529127534e-06, | |
| "loss": 2.5028, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.040959934464104855, | |
| "grad_norm": 257.0070495605469, | |
| "learning_rate": 8.186429017135727e-06, | |
| "loss": 2.6295, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.041119934208105266, | |
| "grad_norm": 314.8670959472656, | |
| "learning_rate": 8.218428505143918e-06, | |
| "loss": 2.6159, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.04127993395210568, | |
| "grad_norm": 284.12762451171875, | |
| "learning_rate": 8.250427993152111e-06, | |
| "loss": 2.4447, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.04143993369610609, | |
| "grad_norm": 5.427358627319336, | |
| "learning_rate": 8.282427481160302e-06, | |
| "loss": 2.7233, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.0415999334401065, | |
| "grad_norm": 240.23260498046875, | |
| "learning_rate": 8.314426969168494e-06, | |
| "loss": 2.5651, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.0417599331841069, | |
| "grad_norm": 15.093184471130371, | |
| "learning_rate": 8.346426457176687e-06, | |
| "loss": 2.1317, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.04191993292810731, | |
| "grad_norm": 14.953177452087402, | |
| "learning_rate": 8.378425945184878e-06, | |
| "loss": 2.6157, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.042079932672107724, | |
| "grad_norm": 242.84718322753906, | |
| "learning_rate": 8.410105438312987e-06, | |
| "loss": 2.7385, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.042239932416108135, | |
| "grad_norm": 1.3409643173217773, | |
| "learning_rate": 8.44210492632118e-06, | |
| "loss": 2.4642, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.042399932160108546, | |
| "grad_norm": 90.02801513671875, | |
| "learning_rate": 8.474104414329371e-06, | |
| "loss": 2.0621, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.04255993190410896, | |
| "grad_norm": 11.879080772399902, | |
| "learning_rate": 8.506103902337564e-06, | |
| "loss": 2.3864, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.04271993164810936, | |
| "grad_norm": 598.356689453125, | |
| "learning_rate": 8.538103390345756e-06, | |
| "loss": 2.6951, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.04287993139210977, | |
| "grad_norm": 144.25924682617188, | |
| "learning_rate": 8.570102878353947e-06, | |
| "loss": 2.2628, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.04303993113611018, | |
| "grad_norm": 521.1145629882812, | |
| "learning_rate": 8.602102366362138e-06, | |
| "loss": 2.7538, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.04319993088011059, | |
| "grad_norm": 86.13031005859375, | |
| "learning_rate": 8.63410185437033e-06, | |
| "loss": 2.6871, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.043359930624111004, | |
| "grad_norm": 268.4532775878906, | |
| "learning_rate": 8.666101342378523e-06, | |
| "loss": 2.2453, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.04351993036811141, | |
| "grad_norm": 531.1592407226562, | |
| "learning_rate": 8.698100830386714e-06, | |
| "loss": 1.6334, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.04367993011211182, | |
| "grad_norm": 166.83230590820312, | |
| "learning_rate": 8.730100318394906e-06, | |
| "loss": 2.666, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.04383992985611223, | |
| "grad_norm": 208.4716033935547, | |
| "learning_rate": 8.762099806403099e-06, | |
| "loss": 2.128, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.04399992960011264, | |
| "grad_norm": 257.9130859375, | |
| "learning_rate": 8.79409929441129e-06, | |
| "loss": 2.7573, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.04415992934411305, | |
| "grad_norm": 85.08763885498047, | |
| "learning_rate": 8.826098782419481e-06, | |
| "loss": 2.5276, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.04431992908811346, | |
| "grad_norm": 8.960221290588379, | |
| "learning_rate": 8.858098270427674e-06, | |
| "loss": 2.2438, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.044479928832113866, | |
| "grad_norm": 404.66558837890625, | |
| "learning_rate": 8.890097758435866e-06, | |
| "loss": 2.3156, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.04463992857611428, | |
| "grad_norm": 151.23495483398438, | |
| "learning_rate": 8.922097246444059e-06, | |
| "loss": 2.1735, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.04479992832011469, | |
| "grad_norm": 151.7221221923828, | |
| "learning_rate": 8.95409673445225e-06, | |
| "loss": 2.1733, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.0449599280641151, | |
| "grad_norm": 228.95974731445312, | |
| "learning_rate": 8.986096222460441e-06, | |
| "loss": 2.4094, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.04511992780811551, | |
| "grad_norm": 522.6806640625, | |
| "learning_rate": 9.018095710468633e-06, | |
| "loss": 2.8484, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.04527992755211592, | |
| "grad_norm": 16.065011978149414, | |
| "learning_rate": 9.050095198476824e-06, | |
| "loss": 2.4507, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.045439927296116324, | |
| "grad_norm": 227.2984619140625, | |
| "learning_rate": 9.082094686485017e-06, | |
| "loss": 2.6822, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.045599927040116735, | |
| "grad_norm": 430.3262634277344, | |
| "learning_rate": 9.114094174493209e-06, | |
| "loss": 2.1191, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.045759926784117146, | |
| "grad_norm": 0.1830236166715622, | |
| "learning_rate": 9.1460936625014e-06, | |
| "loss": 2.0696, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.045919926528117556, | |
| "grad_norm": 97.45941162109375, | |
| "learning_rate": 9.178093150509593e-06, | |
| "loss": 2.4027, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.04607992627211797, | |
| "grad_norm": 22.469968795776367, | |
| "learning_rate": 9.210092638517784e-06, | |
| "loss": 1.7958, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.04623992601611837, | |
| "grad_norm": 103.27215576171875, | |
| "learning_rate": 9.242092126525977e-06, | |
| "loss": 2.5874, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.04639992576011878, | |
| "grad_norm": 578.951171875, | |
| "learning_rate": 9.274091614534169e-06, | |
| "loss": 2.2679, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.04655992550411919, | |
| "grad_norm": 6.261137008666992, | |
| "learning_rate": 9.30609110254236e-06, | |
| "loss": 2.6394, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.046719925248119604, | |
| "grad_norm": 113.35989379882812, | |
| "learning_rate": 9.338090590550551e-06, | |
| "loss": 1.7998, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.046879924992120015, | |
| "grad_norm": 116.46363830566406, | |
| "learning_rate": 9.370090078558743e-06, | |
| "loss": 2.6834, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.047039924736120425, | |
| "grad_norm": 84.5538101196289, | |
| "learning_rate": 9.402089566566936e-06, | |
| "loss": 2.1242, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.04719992448012083, | |
| "grad_norm": 150.44454956054688, | |
| "learning_rate": 9.434089054575127e-06, | |
| "loss": 2.0039, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.04735992422412124, | |
| "grad_norm": 12.482616424560547, | |
| "learning_rate": 9.466088542583319e-06, | |
| "loss": 2.018, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.04751992396812165, | |
| "grad_norm": 1.1050609350204468, | |
| "learning_rate": 9.498088030591512e-06, | |
| "loss": 2.9357, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.04767992371212206, | |
| "grad_norm": 256.4771423339844, | |
| "learning_rate": 9.530087518599703e-06, | |
| "loss": 2.1914, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.04783992345612247, | |
| "grad_norm": 178.9422149658203, | |
| "learning_rate": 9.562087006607896e-06, | |
| "loss": 2.0968, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.047999923200122876, | |
| "grad_norm": 160.1494140625, | |
| "learning_rate": 9.594086494616087e-06, | |
| "loss": 1.9762, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.04815992294412329, | |
| "grad_norm": 86.46272277832031, | |
| "learning_rate": 9.626085982624279e-06, | |
| "loss": 2.1436, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.0483199226881237, | |
| "grad_norm": 76.13285064697266, | |
| "learning_rate": 9.658085470632472e-06, | |
| "loss": 2.1919, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.04847992243212411, | |
| "grad_norm": 2.952242374420166, | |
| "learning_rate": 9.690084958640661e-06, | |
| "loss": 1.9683, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.04863992217612452, | |
| "grad_norm": 33.4036979675293, | |
| "learning_rate": 9.722084446648855e-06, | |
| "loss": 2.3543, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.04879992192012493, | |
| "grad_norm": 173.6257781982422, | |
| "learning_rate": 9.753763939776965e-06, | |
| "loss": 2.0642, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.048959921664125335, | |
| "grad_norm": 0.08548393100500107, | |
| "learning_rate": 9.785763427785156e-06, | |
| "loss": 1.8447, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.049119921408125745, | |
| "grad_norm": 111.82203674316406, | |
| "learning_rate": 9.817762915793348e-06, | |
| "loss": 2.3467, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.049279921152126156, | |
| "grad_norm": 142.97500610351562, | |
| "learning_rate": 9.84976240380154e-06, | |
| "loss": 2.6461, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.04943992089612657, | |
| "grad_norm": 417.88677978515625, | |
| "learning_rate": 9.881761891809732e-06, | |
| "loss": 2.028, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.04959992064012698, | |
| "grad_norm": 4.543129920959473, | |
| "learning_rate": 9.913761379817923e-06, | |
| "loss": 1.4188, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.04975992038412739, | |
| "grad_norm": 205.02293395996094, | |
| "learning_rate": 9.945760867826115e-06, | |
| "loss": 2.7219, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 0.04991992012812779, | |
| "grad_norm": 123.40583038330078, | |
| "learning_rate": 9.977760355834308e-06, | |
| "loss": 2.2345, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.050079919872128204, | |
| "grad_norm": 0.9410820603370667, | |
| "learning_rate": 1.00097598438425e-05, | |
| "loss": 2.201, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 0.050239919616128614, | |
| "grad_norm": 51.27448272705078, | |
| "learning_rate": 1.004175933185069e-05, | |
| "loss": 2.092, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.050399919360129025, | |
| "grad_norm": 258.7269592285156, | |
| "learning_rate": 1.0073758819858884e-05, | |
| "loss": 2.2871, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.050559919104129436, | |
| "grad_norm": 108.11058044433594, | |
| "learning_rate": 1.0105758307867075e-05, | |
| "loss": 2.0167, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.05071991884812984, | |
| "grad_norm": 229.5725555419922, | |
| "learning_rate": 1.0137757795875266e-05, | |
| "loss": 1.9175, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 0.05087991859213025, | |
| "grad_norm": 204.41357421875, | |
| "learning_rate": 1.016975728388346e-05, | |
| "loss": 2.2229, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.05103991833613066, | |
| "grad_norm": 8.951689720153809, | |
| "learning_rate": 1.020175677189165e-05, | |
| "loss": 2.1196, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 0.05119991808013107, | |
| "grad_norm": 275.85198974609375, | |
| "learning_rate": 1.0233756259899844e-05, | |
| "loss": 2.2192, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.05135991782413148, | |
| "grad_norm": 359.066650390625, | |
| "learning_rate": 1.0265755747908035e-05, | |
| "loss": 1.6462, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 0.051519917568131894, | |
| "grad_norm": 0.10183493793010712, | |
| "learning_rate": 1.0297755235916226e-05, | |
| "loss": 2.099, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.0516799173121323, | |
| "grad_norm": 43.3016357421875, | |
| "learning_rate": 1.0329434729044337e-05, | |
| "loss": 2.0914, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 0.05183991705613271, | |
| "grad_norm": 97.42915344238281, | |
| "learning_rate": 1.0361434217052528e-05, | |
| "loss": 2.3295, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.05199991680013312, | |
| "grad_norm": 1.9172292947769165, | |
| "learning_rate": 1.039343370506072e-05, | |
| "loss": 2.256, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.05215991654413353, | |
| "grad_norm": 157.83743286132812, | |
| "learning_rate": 1.0425433193068913e-05, | |
| "loss": 1.7662, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.05231991628813394, | |
| "grad_norm": 1.3025041818618774, | |
| "learning_rate": 1.0457432681077104e-05, | |
| "loss": 1.7234, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 0.052479916032134345, | |
| "grad_norm": 234.8426971435547, | |
| "learning_rate": 1.0489432169085297e-05, | |
| "loss": 1.984, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.052639915776134756, | |
| "grad_norm": 9.249500274658203, | |
| "learning_rate": 1.0521431657093488e-05, | |
| "loss": 2.1815, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 0.05279991552013517, | |
| "grad_norm": 164.6519012451172, | |
| "learning_rate": 1.055343114510168e-05, | |
| "loss": 1.4987, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.05295991526413558, | |
| "grad_norm": 145.9049072265625, | |
| "learning_rate": 1.0585430633109873e-05, | |
| "loss": 2.0034, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 0.05311991500813599, | |
| "grad_norm": 79.73159790039062, | |
| "learning_rate": 1.0617430121118062e-05, | |
| "loss": 2.6008, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.0532799147521364, | |
| "grad_norm": 131.95318603515625, | |
| "learning_rate": 1.0649429609126254e-05, | |
| "loss": 2.4585, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 0.0534399144961368, | |
| "grad_norm": 44.75098419189453, | |
| "learning_rate": 1.0681429097134445e-05, | |
| "loss": 1.881, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.053599914240137214, | |
| "grad_norm": 0.9141740202903748, | |
| "learning_rate": 1.0713428585142638e-05, | |
| "loss": 1.8738, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.053759913984137625, | |
| "grad_norm": 248.49734497070312, | |
| "learning_rate": 1.074542807315083e-05, | |
| "loss": 1.9726, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.053919913728138036, | |
| "grad_norm": 167.88706970214844, | |
| "learning_rate": 1.0777427561159021e-05, | |
| "loss": 2.3734, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 0.05407991347213845, | |
| "grad_norm": 0.43971773982048035, | |
| "learning_rate": 1.0809427049167214e-05, | |
| "loss": 1.6898, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.05423991321613886, | |
| "grad_norm": 713.7942504882812, | |
| "learning_rate": 1.0841426537175405e-05, | |
| "loss": 2.2171, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 0.05439991296013926, | |
| "grad_norm": 57.55624771118164, | |
| "learning_rate": 1.0873426025183598e-05, | |
| "loss": 1.4453, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.05455991270413967, | |
| "grad_norm": 409.5030822753906, | |
| "learning_rate": 1.090542551319179e-05, | |
| "loss": 1.5057, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 0.05471991244814008, | |
| "grad_norm": 60.115047454833984, | |
| "learning_rate": 1.0937425001199981e-05, | |
| "loss": 2.1497, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 0.054879912192140494, | |
| "grad_norm": 0.7692262530326843, | |
| "learning_rate": 1.0969424489208174e-05, | |
| "loss": 1.8618, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 0.055039911936140905, | |
| "grad_norm": 698.8638916015625, | |
| "learning_rate": 1.1001423977216366e-05, | |
| "loss": 1.7878, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.05519991168014131, | |
| "grad_norm": 0.5103877186775208, | |
| "learning_rate": 1.1033423465224557e-05, | |
| "loss": 1.8199, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.05535991142414172, | |
| "grad_norm": 347.1667175292969, | |
| "learning_rate": 1.106542295323275e-05, | |
| "loss": 2.1649, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 0.05551991116814213, | |
| "grad_norm": 99.95459747314453, | |
| "learning_rate": 1.1097422441240941e-05, | |
| "loss": 1.7906, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 0.05567991091214254, | |
| "grad_norm": 211.90087890625, | |
| "learning_rate": 1.1129421929249133e-05, | |
| "loss": 1.6816, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.05583991065614295, | |
| "grad_norm": 60.790199279785156, | |
| "learning_rate": 1.1161421417257326e-05, | |
| "loss": 2.1464, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 0.05599991040014336, | |
| "grad_norm": 585.09716796875, | |
| "learning_rate": 1.1193420905265517e-05, | |
| "loss": 2.0039, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.05615991014414377, | |
| "grad_norm": 0.2061644047498703, | |
| "learning_rate": 1.122542039327371e-05, | |
| "loss": 1.735, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 0.05631990988814418, | |
| "grad_norm": 204.5592498779297, | |
| "learning_rate": 1.1257419881281901e-05, | |
| "loss": 1.853, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.05647990963214459, | |
| "grad_norm": 695.4961547851562, | |
| "learning_rate": 1.1289419369290093e-05, | |
| "loss": 1.6068, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 0.056639909376145, | |
| "grad_norm": 220.02767944335938, | |
| "learning_rate": 1.1321418857298282e-05, | |
| "loss": 1.6349, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 0.05679990912014541, | |
| "grad_norm": 0.07823936641216278, | |
| "learning_rate": 1.1353418345306476e-05, | |
| "loss": 1.9571, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.056959908864145814, | |
| "grad_norm": 31.91838836669922, | |
| "learning_rate": 1.1385417833314667e-05, | |
| "loss": 1.5854, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.057119908608146225, | |
| "grad_norm": 1040.179931640625, | |
| "learning_rate": 1.1417417321322858e-05, | |
| "loss": 1.9756, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 0.057279908352146636, | |
| "grad_norm": 16.008800506591797, | |
| "learning_rate": 1.1449416809331051e-05, | |
| "loss": 1.9816, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 0.057439908096147047, | |
| "grad_norm": 226.522705078125, | |
| "learning_rate": 1.1481416297339243e-05, | |
| "loss": 1.6758, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 0.05759990784014746, | |
| "grad_norm": 85.04449462890625, | |
| "learning_rate": 1.1513415785347436e-05, | |
| "loss": 2.2583, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.05775990758414787, | |
| "grad_norm": 3.989626884460449, | |
| "learning_rate": 1.1545415273355627e-05, | |
| "loss": 1.7584, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 0.05791990732814827, | |
| "grad_norm": 63.272911071777344, | |
| "learning_rate": 1.1577414761363818e-05, | |
| "loss": 1.9894, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 0.05807990707214868, | |
| "grad_norm": 175.4257049560547, | |
| "learning_rate": 1.1609414249372011e-05, | |
| "loss": 2.3922, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 0.058239906816149094, | |
| "grad_norm": 160.36253356933594, | |
| "learning_rate": 1.164109374250012e-05, | |
| "loss": 2.0077, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 0.058399906560149505, | |
| "grad_norm": 95.23787689208984, | |
| "learning_rate": 1.1673093230508312e-05, | |
| "loss": 2.3684, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.058559906304149915, | |
| "grad_norm": 173.092041015625, | |
| "learning_rate": 1.1705092718516505e-05, | |
| "loss": 2.1103, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 0.058719906048150326, | |
| "grad_norm": 719.3712768554688, | |
| "learning_rate": 1.1736772211644613e-05, | |
| "loss": 2.0728, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 0.05887990579215073, | |
| "grad_norm": 1.7120122909545898, | |
| "learning_rate": 1.1768771699652806e-05, | |
| "loss": 1.9364, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 0.05903990553615114, | |
| "grad_norm": 120.16387176513672, | |
| "learning_rate": 1.1800771187660998e-05, | |
| "loss": 2.5203, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 0.05919990528015155, | |
| "grad_norm": 46.504329681396484, | |
| "learning_rate": 1.1832770675669189e-05, | |
| "loss": 1.8473, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.05935990502415196, | |
| "grad_norm": 255.33987426757812, | |
| "learning_rate": 1.1864770163677382e-05, | |
| "loss": 1.8076, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 0.059519904768152374, | |
| "grad_norm": 130.05715942382812, | |
| "learning_rate": 1.1896769651685574e-05, | |
| "loss": 2.0157, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 0.05967990451215278, | |
| "grad_norm": 201.22866821289062, | |
| "learning_rate": 1.1928769139693765e-05, | |
| "loss": 2.1587, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 0.05983990425615319, | |
| "grad_norm": 0.2600236237049103, | |
| "learning_rate": 1.1960768627701958e-05, | |
| "loss": 1.9825, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 0.0599999040001536, | |
| "grad_norm": 0.20701654255390167, | |
| "learning_rate": 1.199276811571015e-05, | |
| "loss": 2.0693, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.06015990374415401, | |
| "grad_norm": 247.0039520263672, | |
| "learning_rate": 1.202476760371834e-05, | |
| "loss": 1.5505, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 0.06031990348815442, | |
| "grad_norm": 15.698258399963379, | |
| "learning_rate": 1.2056767091726534e-05, | |
| "loss": 1.5472, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 0.06047990323215483, | |
| "grad_norm": 357.7092590332031, | |
| "learning_rate": 1.2088766579734725e-05, | |
| "loss": 2.0568, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 0.060639902976155236, | |
| "grad_norm": 54.52446365356445, | |
| "learning_rate": 1.2120766067742918e-05, | |
| "loss": 1.9219, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 0.060799902720155646, | |
| "grad_norm": 240.81784057617188, | |
| "learning_rate": 1.215276555575111e-05, | |
| "loss": 2.091, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.06095990246415606, | |
| "grad_norm": 0.14063161611557007, | |
| "learning_rate": 1.21847650437593e-05, | |
| "loss": 2.0523, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 0.06111990220815647, | |
| "grad_norm": 101.88555145263672, | |
| "learning_rate": 1.2216764531767494e-05, | |
| "loss": 1.7628, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 0.06127990195215688, | |
| "grad_norm": 1.7761729955673218, | |
| "learning_rate": 1.2248764019775685e-05, | |
| "loss": 1.8753, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 0.06143990169615728, | |
| "grad_norm": 183.46917724609375, | |
| "learning_rate": 1.2280763507783877e-05, | |
| "loss": 1.846, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.061599901440157694, | |
| "grad_norm": 0.008245576173067093, | |
| "learning_rate": 1.231276299579207e-05, | |
| "loss": 1.803, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.061759901184158104, | |
| "grad_norm": 389.3524169921875, | |
| "learning_rate": 1.2344762483800261e-05, | |
| "loss": 2.1226, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 0.061919900928158515, | |
| "grad_norm": 457.38519287109375, | |
| "learning_rate": 1.2376761971808452e-05, | |
| "loss": 2.0906, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 0.062079900672158926, | |
| "grad_norm": 95.94575500488281, | |
| "learning_rate": 1.2408441464936563e-05, | |
| "loss": 1.4321, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 0.06223990041615934, | |
| "grad_norm": 0.09420862793922424, | |
| "learning_rate": 1.2440440952944754e-05, | |
| "loss": 2.5214, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 0.06239990016015974, | |
| "grad_norm": 7.472883224487305, | |
| "learning_rate": 1.2472440440952947e-05, | |
| "loss": 1.5412, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.06255989990416015, | |
| "grad_norm": 198.42828369140625, | |
| "learning_rate": 1.2504439928961139e-05, | |
| "loss": 1.4382, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 0.06271989964816056, | |
| "grad_norm": 1.2646727561950684, | |
| "learning_rate": 1.253643941696933e-05, | |
| "loss": 1.8417, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 0.06287989939216097, | |
| "grad_norm": 85.20125579833984, | |
| "learning_rate": 1.2568438904977523e-05, | |
| "loss": 2.1105, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 0.06303989913616138, | |
| "grad_norm": 6.063973903656006, | |
| "learning_rate": 1.2600438392985714e-05, | |
| "loss": 1.6347, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 0.0631998988801618, | |
| "grad_norm": 1.7712761163711548, | |
| "learning_rate": 1.2632437880993904e-05, | |
| "loss": 2.0372, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.0633598986241622, | |
| "grad_norm": 105.22515106201172, | |
| "learning_rate": 1.2664437369002095e-05, | |
| "loss": 1.6222, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 0.06351989836816262, | |
| "grad_norm": 152.34910583496094, | |
| "learning_rate": 1.2696436857010288e-05, | |
| "loss": 1.8033, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 0.06367989811216301, | |
| "grad_norm": 0.4972204864025116, | |
| "learning_rate": 1.272843634501848e-05, | |
| "loss": 1.9847, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 0.06383989785616342, | |
| "grad_norm": 145.8481903076172, | |
| "learning_rate": 1.2760435833026673e-05, | |
| "loss": 2.1354, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 0.06399989760016384, | |
| "grad_norm": 0.4929490089416504, | |
| "learning_rate": 1.2792435321034864e-05, | |
| "loss": 1.6792, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.06415989734416425, | |
| "grad_norm": 0.004757192451506853, | |
| "learning_rate": 1.2824434809043055e-05, | |
| "loss": 2.1055, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 0.06431989708816466, | |
| "grad_norm": 133.86878967285156, | |
| "learning_rate": 1.2856434297051249e-05, | |
| "loss": 2.0657, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 0.06447989683216507, | |
| "grad_norm": 75.14216613769531, | |
| "learning_rate": 1.288843378505944e-05, | |
| "loss": 1.9618, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 0.06463989657616548, | |
| "grad_norm": 0.47655782103538513, | |
| "learning_rate": 1.2920433273067631e-05, | |
| "loss": 1.5807, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 0.06479989632016589, | |
| "grad_norm": 0.25797244906425476, | |
| "learning_rate": 1.2952432761075824e-05, | |
| "loss": 1.6451, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.0649598960641663, | |
| "grad_norm": 0.013840774074196815, | |
| "learning_rate": 1.2984432249084016e-05, | |
| "loss": 2.1299, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 0.06511989580816671, | |
| "grad_norm": 0.016265127807855606, | |
| "learning_rate": 1.3016431737092207e-05, | |
| "loss": 1.9912, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 0.06527989555216712, | |
| "grad_norm": 91.05821228027344, | |
| "learning_rate": 1.30484312251004e-05, | |
| "loss": 1.6392, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 0.06543989529616753, | |
| "grad_norm": 0.5753430724143982, | |
| "learning_rate": 1.3080430713108591e-05, | |
| "loss": 1.8049, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 0.06559989504016793, | |
| "grad_norm": 1.7056798934936523, | |
| "learning_rate": 1.3112430201116784e-05, | |
| "loss": 1.9832, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.06575989478416834, | |
| "grad_norm": 115.96708679199219, | |
| "learning_rate": 1.3144109694244893e-05, | |
| "loss": 2.0309, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 0.06591989452816875, | |
| "grad_norm": 128.86553955078125, | |
| "learning_rate": 1.3176109182253085e-05, | |
| "loss": 1.8362, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 0.06607989427216916, | |
| "grad_norm": 8.644057273864746, | |
| "learning_rate": 1.3208108670261278e-05, | |
| "loss": 2.2709, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 0.06623989401616957, | |
| "grad_norm": 105.3166732788086, | |
| "learning_rate": 1.3240108158269469e-05, | |
| "loss": 2.0785, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 0.06639989376016998, | |
| "grad_norm": 66.77593231201172, | |
| "learning_rate": 1.327210764627766e-05, | |
| "loss": 1.5627, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.0665598935041704, | |
| "grad_norm": 0.6800107359886169, | |
| "learning_rate": 1.3304107134285853e-05, | |
| "loss": 1.6058, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 0.0667198932481708, | |
| "grad_norm": 138.5995330810547, | |
| "learning_rate": 1.3336106622294045e-05, | |
| "loss": 1.7099, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 0.06687989299217122, | |
| "grad_norm": 0.2328547090291977, | |
| "learning_rate": 1.3368106110302238e-05, | |
| "loss": 1.7096, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 0.06703989273617163, | |
| "grad_norm": 82.12950897216797, | |
| "learning_rate": 1.3400105598310429e-05, | |
| "loss": 1.6429, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 0.06719989248017204, | |
| "grad_norm": 1.3431618213653564, | |
| "learning_rate": 1.343210508631862e-05, | |
| "loss": 1.2514, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.06735989222417244, | |
| "grad_norm": 30.410139083862305, | |
| "learning_rate": 1.3464104574326814e-05, | |
| "loss": 1.5746, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 0.06751989196817285, | |
| "grad_norm": 106.41495513916016, | |
| "learning_rate": 1.3496104062335005e-05, | |
| "loss": 1.7186, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 0.06767989171217326, | |
| "grad_norm": 200.46978759765625, | |
| "learning_rate": 1.3528103550343195e-05, | |
| "loss": 1.8152, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 0.06783989145617367, | |
| "grad_norm": 0.09822285175323486, | |
| "learning_rate": 1.3560103038351386e-05, | |
| "loss": 1.705, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 0.06799989120017408, | |
| "grad_norm": 438.903564453125, | |
| "learning_rate": 1.3592102526359579e-05, | |
| "loss": 1.6779, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.06815989094417449, | |
| "grad_norm": 0.03262553736567497, | |
| "learning_rate": 1.362410201436777e-05, | |
| "loss": 1.8157, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 0.0683198906881749, | |
| "grad_norm": 376.0313720703125, | |
| "learning_rate": 1.3656101502375962e-05, | |
| "loss": 1.8464, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 0.06847989043217531, | |
| "grad_norm": 29.421518325805664, | |
| "learning_rate": 1.3688100990384155e-05, | |
| "loss": 1.748, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 0.06863989017617572, | |
| "grad_norm": 183.51832580566406, | |
| "learning_rate": 1.3720100478392346e-05, | |
| "loss": 1.6836, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 0.06879988992017613, | |
| "grad_norm": 0.0013067092513665557, | |
| "learning_rate": 1.3752099966400539e-05, | |
| "loss": 1.65, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.06895988966417654, | |
| "grad_norm": 0.006181403063237667, | |
| "learning_rate": 1.378409945440873e-05, | |
| "loss": 1.5632, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 0.06911988940817694, | |
| "grad_norm": 0.134628027677536, | |
| "learning_rate": 1.3816098942416922e-05, | |
| "loss": 2.0987, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 0.06927988915217735, | |
| "grad_norm": 235.39088439941406, | |
| "learning_rate": 1.3848098430425115e-05, | |
| "loss": 1.5783, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 0.06943988889617776, | |
| "grad_norm": 89.28943634033203, | |
| "learning_rate": 1.3880097918433306e-05, | |
| "loss": 1.8029, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 0.06959988864017817, | |
| "grad_norm": 197.04258728027344, | |
| "learning_rate": 1.3911777411561415e-05, | |
| "loss": 1.7154, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.06975988838417858, | |
| "grad_norm": 96.05148315429688, | |
| "learning_rate": 1.3943776899569608e-05, | |
| "loss": 1.663, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 0.069919888128179, | |
| "grad_norm": 8.378194808959961, | |
| "learning_rate": 1.39757763875778e-05, | |
| "loss": 1.4403, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 0.0700798878721794, | |
| "grad_norm": 1932.4417724609375, | |
| "learning_rate": 1.4007775875585992e-05, | |
| "loss": 1.6513, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 0.07023988761617982, | |
| "grad_norm": 185.06163024902344, | |
| "learning_rate": 1.4039775363594184e-05, | |
| "loss": 2.2041, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 0.07039988736018023, | |
| "grad_norm": 1.7904412746429443, | |
| "learning_rate": 1.4071774851602375e-05, | |
| "loss": 2.3908, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.07055988710418064, | |
| "grad_norm": 0.07365602254867554, | |
| "learning_rate": 1.4103774339610568e-05, | |
| "loss": 1.7153, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 0.07071988684818105, | |
| "grad_norm": 117.01744842529297, | |
| "learning_rate": 1.413577382761876e-05, | |
| "loss": 2.2112, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 0.07087988659218145, | |
| "grad_norm": 509.897216796875, | |
| "learning_rate": 1.4167773315626951e-05, | |
| "loss": 1.8663, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 0.07103988633618186, | |
| "grad_norm": 188.78509521484375, | |
| "learning_rate": 1.4199772803635144e-05, | |
| "loss": 1.8206, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 0.07119988608018227, | |
| "grad_norm": 122.20122528076172, | |
| "learning_rate": 1.4231772291643335e-05, | |
| "loss": 2.2269, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.07135988582418268, | |
| "grad_norm": 894.0123901367188, | |
| "learning_rate": 1.4263771779651527e-05, | |
| "loss": 1.8159, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 0.07151988556818309, | |
| "grad_norm": 217.02325439453125, | |
| "learning_rate": 1.429577126765972e-05, | |
| "loss": 1.9257, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 0.0716798853121835, | |
| "grad_norm": 0.4048191010951996, | |
| "learning_rate": 1.4327770755667911e-05, | |
| "loss": 2.087, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 0.07183988505618391, | |
| "grad_norm": 85.02055358886719, | |
| "learning_rate": 1.4359770243676104e-05, | |
| "loss": 1.3623, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 0.07199988480018432, | |
| "grad_norm": 90.52297973632812, | |
| "learning_rate": 1.4391769731684295e-05, | |
| "loss": 1.5747, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.07215988454418473, | |
| "grad_norm": 28.14681053161621, | |
| "learning_rate": 1.4423769219692485e-05, | |
| "loss": 1.8051, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 0.07231988428818514, | |
| "grad_norm": 0.31391066312789917, | |
| "learning_rate": 1.4455768707700676e-05, | |
| "loss": 2.3691, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 0.07247988403218555, | |
| "grad_norm": 0.08174788951873779, | |
| "learning_rate": 1.448776819570887e-05, | |
| "loss": 2.1125, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 0.07263988377618597, | |
| "grad_norm": 107.4333267211914, | |
| "learning_rate": 1.4519767683717061e-05, | |
| "loss": 1.566, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 0.07279988352018636, | |
| "grad_norm": 119.36717224121094, | |
| "learning_rate": 1.4551767171725252e-05, | |
| "loss": 1.5042, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.07295988326418677, | |
| "grad_norm": 157.90757751464844, | |
| "learning_rate": 1.4583446664853364e-05, | |
| "loss": 1.9469, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 0.07311988300818718, | |
| "grad_norm": 172.0279998779297, | |
| "learning_rate": 1.4615446152861554e-05, | |
| "loss": 1.9346, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 0.0732798827521876, | |
| "grad_norm": 961.58935546875, | |
| "learning_rate": 1.4647445640869747e-05, | |
| "loss": 1.4362, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 0.073439882496188, | |
| "grad_norm": 0.0017953349743038416, | |
| "learning_rate": 1.4679445128877938e-05, | |
| "loss": 1.9164, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 0.07359988224018842, | |
| "grad_norm": 67.61031341552734, | |
| "learning_rate": 1.471144461688613e-05, | |
| "loss": 1.511, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.07375988198418883, | |
| "grad_norm": 223.40682983398438, | |
| "learning_rate": 1.4743444104894323e-05, | |
| "loss": 1.4523, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 0.07391988172818924, | |
| "grad_norm": 86.30171966552734, | |
| "learning_rate": 1.4775443592902514e-05, | |
| "loss": 1.1247, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 0.07407988147218965, | |
| "grad_norm": 147.9749755859375, | |
| "learning_rate": 1.4807443080910706e-05, | |
| "loss": 1.9694, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 0.07423988121619006, | |
| "grad_norm": 0.01015425007790327, | |
| "learning_rate": 1.4839442568918899e-05, | |
| "loss": 2.1909, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 0.07439988096019047, | |
| "grad_norm": 259.0996398925781, | |
| "learning_rate": 1.487144205692709e-05, | |
| "loss": 2.0247, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.07455988070419087, | |
| "grad_norm": 95.31226348876953, | |
| "learning_rate": 1.4903441544935281e-05, | |
| "loss": 1.2061, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 0.07471988044819128, | |
| "grad_norm": 173.83978271484375, | |
| "learning_rate": 1.4935441032943474e-05, | |
| "loss": 1.6151, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 0.07487988019219169, | |
| "grad_norm": 2.386795997619629, | |
| "learning_rate": 1.4967440520951666e-05, | |
| "loss": 1.6184, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 0.0750398799361921, | |
| "grad_norm": 429.2137756347656, | |
| "learning_rate": 1.4999440008959859e-05, | |
| "loss": 2.0375, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 0.07519987968019251, | |
| "grad_norm": 387.4931945800781, | |
| "learning_rate": 1.503143949696805e-05, | |
| "loss": 1.8357, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.07535987942419292, | |
| "grad_norm": 412.69384765625, | |
| "learning_rate": 1.5063438984976241e-05, | |
| "loss": 1.7605, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 0.07551987916819333, | |
| "grad_norm": 64.52519989013672, | |
| "learning_rate": 1.5095438472984435e-05, | |
| "loss": 2.1139, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 0.07567987891219374, | |
| "grad_norm": 2.706088066101074, | |
| "learning_rate": 1.5127437960992626e-05, | |
| "loss": 1.2971, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 0.07583987865619415, | |
| "grad_norm": 3.714489459991455, | |
| "learning_rate": 1.5159437449000817e-05, | |
| "loss": 1.7242, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 0.07599987840019456, | |
| "grad_norm": 5.220536708831787, | |
| "learning_rate": 1.519143693700901e-05, | |
| "loss": 1.2726, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.07615987814419498, | |
| "grad_norm": 133.04861450195312, | |
| "learning_rate": 1.5223436425017202e-05, | |
| "loss": 1.9947, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 0.07631987788819537, | |
| "grad_norm": 0.19635449349880219, | |
| "learning_rate": 1.5255435913025393e-05, | |
| "loss": 2.2796, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 0.07647987763219578, | |
| "grad_norm": 168.06861877441406, | |
| "learning_rate": 1.5287115406153503e-05, | |
| "loss": 1.6232, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 0.0766398773761962, | |
| "grad_norm": 0.004633053671568632, | |
| "learning_rate": 1.5319114894161697e-05, | |
| "loss": 1.3513, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 0.0767998771201966, | |
| "grad_norm": 0.0009558099554851651, | |
| "learning_rate": 1.5351114382169886e-05, | |
| "loss": 1.291, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.07695987686419702, | |
| "grad_norm": 1.2679246664047241, | |
| "learning_rate": 1.538311387017808e-05, | |
| "loss": 1.5954, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 0.07711987660819743, | |
| "grad_norm": 511.6206970214844, | |
| "learning_rate": 1.5415113358186272e-05, | |
| "loss": 1.6232, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 0.07727987635219784, | |
| "grad_norm": 190.66940307617188, | |
| "learning_rate": 1.5447112846194462e-05, | |
| "loss": 1.8858, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 0.07743987609619825, | |
| "grad_norm": 368.29022216796875, | |
| "learning_rate": 1.5479112334202655e-05, | |
| "loss": 1.6235, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 0.07759987584019866, | |
| "grad_norm": 0.00299979280680418, | |
| "learning_rate": 1.5511111822210848e-05, | |
| "loss": 1.9061, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.07775987558419907, | |
| "grad_norm": 96.37437438964844, | |
| "learning_rate": 1.5543111310219038e-05, | |
| "loss": 1.5919, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 0.07791987532819948, | |
| "grad_norm": 141.4491729736328, | |
| "learning_rate": 1.5575110798227227e-05, | |
| "loss": 1.8474, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 0.07807987507219988, | |
| "grad_norm": 9.810319900512695, | |
| "learning_rate": 1.560711028623542e-05, | |
| "loss": 1.7112, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 0.07823987481620029, | |
| "grad_norm": 0.20426060259342194, | |
| "learning_rate": 1.5639109774243613e-05, | |
| "loss": 1.8007, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 0.0783998745602007, | |
| "grad_norm": 2.3212544918060303, | |
| "learning_rate": 1.5671109262251803e-05, | |
| "loss": 1.7499, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.07855987430420111, | |
| "grad_norm": 2386.313232421875, | |
| "learning_rate": 1.5703108750259996e-05, | |
| "loss": 1.4046, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 0.07871987404820152, | |
| "grad_norm": 123.7901611328125, | |
| "learning_rate": 1.573510823826819e-05, | |
| "loss": 2.0843, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 0.07887987379220193, | |
| "grad_norm": 190.3510284423828, | |
| "learning_rate": 1.576710772627638e-05, | |
| "loss": 1.52, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 0.07903987353620234, | |
| "grad_norm": 0.0007205315632745624, | |
| "learning_rate": 1.5799107214284572e-05, | |
| "loss": 1.8708, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 0.07919987328020275, | |
| "grad_norm": 0.013503137975931168, | |
| "learning_rate": 1.5831106702292765e-05, | |
| "loss": 1.673, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.07935987302420316, | |
| "grad_norm": 65.62171936035156, | |
| "learning_rate": 1.5863106190300958e-05, | |
| "loss": 1.8457, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 0.07951987276820358, | |
| "grad_norm": 92.97589874267578, | |
| "learning_rate": 1.5895105678309148e-05, | |
| "loss": 1.5627, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 0.07967987251220399, | |
| "grad_norm": 21.424842834472656, | |
| "learning_rate": 1.5926785171437256e-05, | |
| "loss": 1.6497, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 0.07983987225620438, | |
| "grad_norm": 0.0011259341845288873, | |
| "learning_rate": 1.595878465944545e-05, | |
| "loss": 1.5787, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 0.0799998720002048, | |
| "grad_norm": 0.01448867842555046, | |
| "learning_rate": 1.5990784147453643e-05, | |
| "loss": 1.8507, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.0801598717442052, | |
| "grad_norm": 66.43374633789062, | |
| "learning_rate": 1.6022783635461832e-05, | |
| "loss": 1.4336, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 0.08031987148820562, | |
| "grad_norm": 27.450525283813477, | |
| "learning_rate": 1.6054783123470025e-05, | |
| "loss": 2.152, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 0.08047987123220603, | |
| "grad_norm": 33.92656707763672, | |
| "learning_rate": 1.6086782611478218e-05, | |
| "loss": 1.6311, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 0.08063987097620644, | |
| "grad_norm": 20.14742660522461, | |
| "learning_rate": 1.611878209948641e-05, | |
| "loss": 1.7442, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 0.08079987072020685, | |
| "grad_norm": 190.49342346191406, | |
| "learning_rate": 1.61507815874946e-05, | |
| "loss": 1.8063, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.08095987046420726, | |
| "grad_norm": 673.4315185546875, | |
| "learning_rate": 1.6182781075502794e-05, | |
| "loss": 1.4, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 0.08111987020820767, | |
| "grad_norm": 87.93296813964844, | |
| "learning_rate": 1.6214780563510987e-05, | |
| "loss": 1.6401, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 0.08127986995220808, | |
| "grad_norm": 10.013740539550781, | |
| "learning_rate": 1.6246780051519177e-05, | |
| "loss": 1.9426, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 0.08143986969620849, | |
| "grad_norm": 84.70770263671875, | |
| "learning_rate": 1.627877953952737e-05, | |
| "loss": 2.0937, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 0.0815998694402089, | |
| "grad_norm": 66.33674621582031, | |
| "learning_rate": 1.6310779027535563e-05, | |
| "loss": 1.8187, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.0817598691842093, | |
| "grad_norm": 0.013598043471574783, | |
| "learning_rate": 1.6342778515543753e-05, | |
| "loss": 2.1751, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 0.08191986892820971, | |
| "grad_norm": 0.8764291405677795, | |
| "learning_rate": 1.6374778003551946e-05, | |
| "loss": 2.1703, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 0.08207986867221012, | |
| "grad_norm": 14.436594009399414, | |
| "learning_rate": 1.640677749156014e-05, | |
| "loss": 1.4443, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 0.08223986841621053, | |
| "grad_norm": 0.27148157358169556, | |
| "learning_rate": 1.6438776979568328e-05, | |
| "loss": 1.9266, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 0.08239986816021094, | |
| "grad_norm": 11.139505386352539, | |
| "learning_rate": 1.6470776467576518e-05, | |
| "loss": 1.8226, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.08255986790421135, | |
| "grad_norm": 105.84121704101562, | |
| "learning_rate": 1.650277595558471e-05, | |
| "loss": 1.4394, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 0.08271986764821176, | |
| "grad_norm": 161.04141235351562, | |
| "learning_rate": 1.6534775443592904e-05, | |
| "loss": 1.052, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 0.08287986739221218, | |
| "grad_norm": 11.454148292541504, | |
| "learning_rate": 1.6566774931601094e-05, | |
| "loss": 1.0614, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 0.08303986713621259, | |
| "grad_norm": 0.03253089264035225, | |
| "learning_rate": 1.6598774419609287e-05, | |
| "loss": 1.4591, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 0.083199866880213, | |
| "grad_norm": 0.2750859558582306, | |
| "learning_rate": 1.663077390761748e-05, | |
| "loss": 1.6479, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.08335986662421341, | |
| "grad_norm": 3.381882667541504, | |
| "learning_rate": 1.666245340074559e-05, | |
| "loss": 1.7548, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 0.0835198663682138, | |
| "grad_norm": 102.45317840576172, | |
| "learning_rate": 1.669445288875378e-05, | |
| "loss": 1.6293, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 0.08367986611221422, | |
| "grad_norm": 122.25707244873047, | |
| "learning_rate": 1.672645237676197e-05, | |
| "loss": 1.7183, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 0.08383986585621463, | |
| "grad_norm": 1.0929410457611084, | |
| "learning_rate": 1.6758451864770164e-05, | |
| "loss": 1.2329, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 0.08399986560021504, | |
| "grad_norm": 0.0009238706552423537, | |
| "learning_rate": 1.6790451352778357e-05, | |
| "loss": 1.5292, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.08415986534421545, | |
| "grad_norm": 15.874957084655762, | |
| "learning_rate": 1.6822450840786547e-05, | |
| "loss": 1.6752, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 0.08431986508821586, | |
| "grad_norm": 8.129535675048828, | |
| "learning_rate": 1.685445032879474e-05, | |
| "loss": 1.3228, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 0.08447986483221627, | |
| "grad_norm": 196.6626434326172, | |
| "learning_rate": 1.6886449816802933e-05, | |
| "loss": 1.485, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 0.08463986457621668, | |
| "grad_norm": 257.9208679199219, | |
| "learning_rate": 1.6918449304811123e-05, | |
| "loss": 1.4228, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 0.08479986432021709, | |
| "grad_norm": 126.92493438720703, | |
| "learning_rate": 1.6950448792819316e-05, | |
| "loss": 1.1385, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.0849598640642175, | |
| "grad_norm": 218.52455139160156, | |
| "learning_rate": 1.698244828082751e-05, | |
| "loss": 1.1812, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 0.08511986380821791, | |
| "grad_norm": 0.32875338196754456, | |
| "learning_rate": 1.70144477688357e-05, | |
| "loss": 1.4763, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 0.08527986355221831, | |
| "grad_norm": 6.30516242980957, | |
| "learning_rate": 1.704644725684389e-05, | |
| "loss": 1.9444, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 0.08543986329621872, | |
| "grad_norm": 0.10023212432861328, | |
| "learning_rate": 1.7078446744852085e-05, | |
| "loss": 1.5316, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 0.08559986304021913, | |
| "grad_norm": 0.16311447322368622, | |
| "learning_rate": 1.7110446232860278e-05, | |
| "loss": 1.6928, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.08575986278421954, | |
| "grad_norm": 113.52496337890625, | |
| "learning_rate": 1.7142445720868467e-05, | |
| "loss": 1.4466, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 0.08591986252821995, | |
| "grad_norm": 202.2323760986328, | |
| "learning_rate": 1.717444520887666e-05, | |
| "loss": 1.438, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 0.08607986227222036, | |
| "grad_norm": 0.009465747512876987, | |
| "learning_rate": 1.7206444696884853e-05, | |
| "loss": 1.1629, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 0.08623986201622078, | |
| "grad_norm": 36.7415771484375, | |
| "learning_rate": 1.7238444184893043e-05, | |
| "loss": 1.3017, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 0.08639986176022119, | |
| "grad_norm": 238.30662536621094, | |
| "learning_rate": 1.7270443672901236e-05, | |
| "loss": 1.6614, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.0865598615042216, | |
| "grad_norm": 92.3208236694336, | |
| "learning_rate": 1.730244316090943e-05, | |
| "loss": 1.4535, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 0.08671986124822201, | |
| "grad_norm": 78.44776153564453, | |
| "learning_rate": 1.733444264891762e-05, | |
| "loss": 1.7061, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 0.08687986099222242, | |
| "grad_norm": 0.22147144377231598, | |
| "learning_rate": 1.736644213692581e-05, | |
| "loss": 1.4681, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 0.08703986073622282, | |
| "grad_norm": 11.244450569152832, | |
| "learning_rate": 1.7398441624934e-05, | |
| "loss": 1.3449, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 0.08719986048022323, | |
| "grad_norm": 0.002357147866860032, | |
| "learning_rate": 1.7430441112942195e-05, | |
| "loss": 1.8814, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.08735986022422364, | |
| "grad_norm": 0.008252524770796299, | |
| "learning_rate": 1.7462440600950384e-05, | |
| "loss": 1.5989, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 0.08751985996822405, | |
| "grad_norm": 61.25815200805664, | |
| "learning_rate": 1.7494440088958577e-05, | |
| "loss": 1.3711, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 0.08767985971222446, | |
| "grad_norm": 0.536085307598114, | |
| "learning_rate": 1.752643957696677e-05, | |
| "loss": 1.3199, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 0.08783985945622487, | |
| "grad_norm": 261.3522033691406, | |
| "learning_rate": 1.755811907009488e-05, | |
| "loss": 1.3713, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 0.08799985920022528, | |
| "grad_norm": 0.18219026923179626, | |
| "learning_rate": 1.7590118558103072e-05, | |
| "loss": 1.441, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.08815985894422569, | |
| "grad_norm": 2.0045886039733887, | |
| "learning_rate": 1.7622118046111262e-05, | |
| "loss": 1.268, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 0.0883198586882261, | |
| "grad_norm": 119.64205932617188, | |
| "learning_rate": 1.7654117534119455e-05, | |
| "loss": 1.1648, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 0.08847985843222651, | |
| "grad_norm": 1316.0831298828125, | |
| "learning_rate": 1.7686117022127648e-05, | |
| "loss": 1.8108, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 0.08863985817622692, | |
| "grad_norm": 0.016518862918019295, | |
| "learning_rate": 1.7718116510135838e-05, | |
| "loss": 1.4904, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 0.08879985792022732, | |
| "grad_norm": 0.0020672741811722517, | |
| "learning_rate": 1.774979600326395e-05, | |
| "loss": 1.2555, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.08895985766422773, | |
| "grad_norm": 0.0013748366618528962, | |
| "learning_rate": 1.778179549127214e-05, | |
| "loss": 1.2733, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 0.08911985740822814, | |
| "grad_norm": 156.89892578125, | |
| "learning_rate": 1.7813794979280332e-05, | |
| "loss": 1.5194, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 0.08927985715222855, | |
| "grad_norm": 0.034991975873708725, | |
| "learning_rate": 1.7845794467288526e-05, | |
| "loss": 1.7587, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 0.08943985689622896, | |
| "grad_norm": 25.602022171020508, | |
| "learning_rate": 1.7877793955296715e-05, | |
| "loss": 1.6183, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 0.08959985664022938, | |
| "grad_norm": 0.6393762230873108, | |
| "learning_rate": 1.7909793443304908e-05, | |
| "loss": 1.3596, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.08975985638422979, | |
| "grad_norm": 48.19321823120117, | |
| "learning_rate": 1.79417929313131e-05, | |
| "loss": 1.5248, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 0.0899198561282302, | |
| "grad_norm": 88.53876495361328, | |
| "learning_rate": 1.797379241932129e-05, | |
| "loss": 1.5177, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 0.09007985587223061, | |
| "grad_norm": 4.195464611053467, | |
| "learning_rate": 1.8005791907329484e-05, | |
| "loss": 1.7579, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 0.09023985561623102, | |
| "grad_norm": 4.309329986572266, | |
| "learning_rate": 1.8037791395337677e-05, | |
| "loss": 1.5508, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 0.09039985536023143, | |
| "grad_norm": 781.6338500976562, | |
| "learning_rate": 1.8069790883345867e-05, | |
| "loss": 1.5965, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.09055985510423184, | |
| "grad_norm": 0.2752499580383301, | |
| "learning_rate": 1.810179037135406e-05, | |
| "loss": 1.5762, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 0.09071985484823224, | |
| "grad_norm": 107.10926818847656, | |
| "learning_rate": 1.8133789859362253e-05, | |
| "loss": 1.7441, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 0.09087985459223265, | |
| "grad_norm": 97.79568481445312, | |
| "learning_rate": 1.8165789347370442e-05, | |
| "loss": 2.0257, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 0.09103985433623306, | |
| "grad_norm": 0.0016732424264773726, | |
| "learning_rate": 1.8197788835378635e-05, | |
| "loss": 1.1371, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 0.09119985408023347, | |
| "grad_norm": 7.662989139556885, | |
| "learning_rate": 1.822978832338683e-05, | |
| "loss": 1.8825, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.09135985382423388, | |
| "grad_norm": 16.23094940185547, | |
| "learning_rate": 1.8261787811395018e-05, | |
| "loss": 1.0455, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 0.09151985356823429, | |
| "grad_norm": 0.025669243186712265, | |
| "learning_rate": 1.829378729940321e-05, | |
| "loss": 1.5889, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 0.0916798533122347, | |
| "grad_norm": 0.4638320505619049, | |
| "learning_rate": 1.8325786787411404e-05, | |
| "loss": 1.192, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 0.09183985305623511, | |
| "grad_norm": 61.32036209106445, | |
| "learning_rate": 1.8357786275419597e-05, | |
| "loss": 1.5374, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 0.09199985280023552, | |
| "grad_norm": 0.0012545910431072116, | |
| "learning_rate": 1.8389785763427787e-05, | |
| "loss": 1.6236, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.09215985254423593, | |
| "grad_norm": 92.61576080322266, | |
| "learning_rate": 1.842178525143598e-05, | |
| "loss": 1.8945, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 0.09231985228823635, | |
| "grad_norm": 22.349824905395508, | |
| "learning_rate": 1.845378473944417e-05, | |
| "loss": 1.607, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 0.09247985203223674, | |
| "grad_norm": 126.0189208984375, | |
| "learning_rate": 1.8485784227452363e-05, | |
| "loss": 1.8133, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 0.09263985177623715, | |
| "grad_norm": 78.0487060546875, | |
| "learning_rate": 1.8517783715460552e-05, | |
| "loss": 1.5777, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 0.09279985152023756, | |
| "grad_norm": 0.0007238721009343863, | |
| "learning_rate": 1.8549783203468745e-05, | |
| "loss": 1.5043, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.09295985126423797, | |
| "grad_norm": 123.57425689697266, | |
| "learning_rate": 1.858178269147694e-05, | |
| "loss": 1.7681, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 0.09311985100823839, | |
| "grad_norm": 0.2985190153121948, | |
| "learning_rate": 1.8613782179485128e-05, | |
| "loss": 1.623, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 0.0932798507522388, | |
| "grad_norm": 64.27520751953125, | |
| "learning_rate": 1.864578166749332e-05, | |
| "loss": 2.2137, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 0.09343985049623921, | |
| "grad_norm": 207.88841247558594, | |
| "learning_rate": 1.8677781155501514e-05, | |
| "loss": 2.2447, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 0.09359985024023962, | |
| "grad_norm": 94.58351135253906, | |
| "learning_rate": 1.8709780643509704e-05, | |
| "loss": 2.3013, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.09375984998424003, | |
| "grad_norm": 0.29524990916252136, | |
| "learning_rate": 1.8741780131517897e-05, | |
| "loss": 1.3105, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 0.09391984972824044, | |
| "grad_norm": 110.85052490234375, | |
| "learning_rate": 1.877377961952609e-05, | |
| "loss": 1.4461, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 0.09407984947224085, | |
| "grad_norm": 44.61641311645508, | |
| "learning_rate": 1.880577910753428e-05, | |
| "loss": 2.1321, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 0.09423984921624125, | |
| "grad_norm": 699.353759765625, | |
| "learning_rate": 1.8837778595542473e-05, | |
| "loss": 1.7541, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 0.09439984896024166, | |
| "grad_norm": 88.22161865234375, | |
| "learning_rate": 1.8869778083550666e-05, | |
| "loss": 1.7894, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.09455984870424207, | |
| "grad_norm": 25.957782745361328, | |
| "learning_rate": 1.8901777571558855e-05, | |
| "loss": 1.693, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 0.09471984844824248, | |
| "grad_norm": 1.7580465078353882, | |
| "learning_rate": 1.8933457064686968e-05, | |
| "loss": 1.7073, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 0.09487984819224289, | |
| "grad_norm": 5.568783283233643, | |
| "learning_rate": 1.8965456552695157e-05, | |
| "loss": 2.0305, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 0.0950398479362433, | |
| "grad_norm": 0.21757324039936066, | |
| "learning_rate": 1.899745604070335e-05, | |
| "loss": 1.3684, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 0.09519984768024371, | |
| "grad_norm": 123.5767593383789, | |
| "learning_rate": 1.9029455528711543e-05, | |
| "loss": 1.8754, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.09535984742424412, | |
| "grad_norm": 66.91508483886719, | |
| "learning_rate": 1.9061455016719733e-05, | |
| "loss": 2.0225, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 0.09551984716824453, | |
| "grad_norm": 0.00018894312961492687, | |
| "learning_rate": 1.9093454504727926e-05, | |
| "loss": 2.1975, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 0.09567984691224495, | |
| "grad_norm": 84.60813903808594, | |
| "learning_rate": 1.912545399273612e-05, | |
| "loss": 1.7173, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 0.09583984665624536, | |
| "grad_norm": 67.51477813720703, | |
| "learning_rate": 1.915745348074431e-05, | |
| "loss": 1.4302, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 0.09599984640024575, | |
| "grad_norm": 59.47672653198242, | |
| "learning_rate": 1.9189452968752502e-05, | |
| "loss": 1.2497, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.09615984614424616, | |
| "grad_norm": 19.75477409362793, | |
| "learning_rate": 1.9221452456760695e-05, | |
| "loss": 1.4058, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 0.09631984588824657, | |
| "grad_norm": 91.08583068847656, | |
| "learning_rate": 1.9253451944768885e-05, | |
| "loss": 1.0956, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 0.09647984563224699, | |
| "grad_norm": 133.5473175048828, | |
| "learning_rate": 1.9285451432777078e-05, | |
| "loss": 1.3731, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 0.0966398453762474, | |
| "grad_norm": 0.010973370634019375, | |
| "learning_rate": 1.931745092078527e-05, | |
| "loss": 1.2953, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 0.09679984512024781, | |
| "grad_norm": 0.08579988777637482, | |
| "learning_rate": 1.934945040879346e-05, | |
| "loss": 1.0987, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.09695984486424822, | |
| "grad_norm": 1.1617801189422607, | |
| "learning_rate": 1.9381449896801653e-05, | |
| "loss": 1.5104, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 0.09711984460824863, | |
| "grad_norm": 0.3544386029243469, | |
| "learning_rate": 1.9413449384809843e-05, | |
| "loss": 1.5224, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 0.09727984435224904, | |
| "grad_norm": 44.28148651123047, | |
| "learning_rate": 1.9445448872818036e-05, | |
| "loss": 1.3982, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 0.09743984409624945, | |
| "grad_norm": 167.9832305908203, | |
| "learning_rate": 1.947744836082623e-05, | |
| "loss": 1.2785, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 0.09759984384024986, | |
| "grad_norm": 129.58119201660156, | |
| "learning_rate": 1.950944784883442e-05, | |
| "loss": 1.6018, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.09775984358425026, | |
| "grad_norm": 0.048871856182813644, | |
| "learning_rate": 1.9541447336842612e-05, | |
| "loss": 1.4968, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 0.09791984332825067, | |
| "grad_norm": 0.14592894911766052, | |
| "learning_rate": 1.9573446824850805e-05, | |
| "loss": 1.2423, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 0.09807984307225108, | |
| "grad_norm": 0.548117458820343, | |
| "learning_rate": 1.9605446312858995e-05, | |
| "loss": 1.9973, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 0.09823984281625149, | |
| "grad_norm": 52.4393424987793, | |
| "learning_rate": 1.9637445800867188e-05, | |
| "loss": 1.2149, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 0.0983998425602519, | |
| "grad_norm": 101.79759216308594, | |
| "learning_rate": 1.966944528887538e-05, | |
| "loss": 1.731, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.09855984230425231, | |
| "grad_norm": 0.04176723212003708, | |
| "learning_rate": 1.970144477688357e-05, | |
| "loss": 1.2889, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 0.09871984204825272, | |
| "grad_norm": 8.380585670471191, | |
| "learning_rate": 1.9733444264891763e-05, | |
| "loss": 1.856, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 0.09887984179225313, | |
| "grad_norm": 0.014852323569357395, | |
| "learning_rate": 1.9765443752899956e-05, | |
| "loss": 0.8942, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 0.09903984153625355, | |
| "grad_norm": 0.3229600787162781, | |
| "learning_rate": 1.9797443240908146e-05, | |
| "loss": 1.3371, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 0.09919984128025396, | |
| "grad_norm": 141.8211212158203, | |
| "learning_rate": 1.982944272891634e-05, | |
| "loss": 1.5222, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.09935984102425437, | |
| "grad_norm": 0.025253353640437126, | |
| "learning_rate": 1.9861442216924532e-05, | |
| "loss": 1.5435, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 0.09951984076825478, | |
| "grad_norm": 0.0009790909243747592, | |
| "learning_rate": 1.9893441704932722e-05, | |
| "loss": 1.1172, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 0.09967984051225517, | |
| "grad_norm": 43.73761749267578, | |
| "learning_rate": 1.9925441192940915e-05, | |
| "loss": 1.6024, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 0.09983984025625559, | |
| "grad_norm": 18.400936126708984, | |
| "learning_rate": 1.9957440680949108e-05, | |
| "loss": 1.3914, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 0.099999840000256, | |
| "grad_norm": 937.4790649414062, | |
| "learning_rate": 1.9989440168957298e-05, | |
| "loss": 1.4714, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.10015983974425641, | |
| "grad_norm": 0.14736099541187286, | |
| "learning_rate": 1.999761777777778e-05, | |
| "loss": 1.2922, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 0.10031983948825682, | |
| "grad_norm": 0.606549084186554, | |
| "learning_rate": 1.999409777777778e-05, | |
| "loss": 1.4263, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 0.10047983923225723, | |
| "grad_norm": 426.79400634765625, | |
| "learning_rate": 1.9990542222222224e-05, | |
| "loss": 1.4586, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 0.10063983897625764, | |
| "grad_norm": 0.007887039333581924, | |
| "learning_rate": 1.9986986666666668e-05, | |
| "loss": 1.6312, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 0.10079983872025805, | |
| "grad_norm": 0.9442864060401917, | |
| "learning_rate": 1.9983431111111113e-05, | |
| "loss": 1.9607, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.10095983846425846, | |
| "grad_norm": 0.002317711478099227, | |
| "learning_rate": 1.9979875555555557e-05, | |
| "loss": 1.5771, | |
| "step": 63100 | |
| }, | |
| { | |
| "epoch": 0.10111983820825887, | |
| "grad_norm": 75.09770965576172, | |
| "learning_rate": 1.9976320000000002e-05, | |
| "loss": 1.6721, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 0.10127983795225928, | |
| "grad_norm": 135.64022827148438, | |
| "learning_rate": 1.9972764444444446e-05, | |
| "loss": 1.8461, | |
| "step": 63300 | |
| }, | |
| { | |
| "epoch": 0.10143983769625968, | |
| "grad_norm": 0.1608121395111084, | |
| "learning_rate": 1.996920888888889e-05, | |
| "loss": 1.5256, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 0.10159983744026009, | |
| "grad_norm": 266.8143615722656, | |
| "learning_rate": 1.9965653333333336e-05, | |
| "loss": 1.9736, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.1017598371842605, | |
| "grad_norm": 125.29386138916016, | |
| "learning_rate": 1.996209777777778e-05, | |
| "loss": 1.4735, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 0.10191983692826091, | |
| "grad_norm": 1.8401292562484741, | |
| "learning_rate": 1.9958542222222225e-05, | |
| "loss": 1.4619, | |
| "step": 63700 | |
| }, | |
| { | |
| "epoch": 0.10207983667226132, | |
| "grad_norm": 352.0743103027344, | |
| "learning_rate": 1.995498666666667e-05, | |
| "loss": 1.6571, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 0.10223983641626173, | |
| "grad_norm": 546.5570068359375, | |
| "learning_rate": 1.9951431111111114e-05, | |
| "loss": 1.5888, | |
| "step": 63900 | |
| }, | |
| { | |
| "epoch": 0.10239983616026214, | |
| "grad_norm": 0.0009566646185703576, | |
| "learning_rate": 1.994787555555556e-05, | |
| "loss": 2.0457, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.10255983590426256, | |
| "grad_norm": 717.4028930664062, | |
| "learning_rate": 1.9944320000000003e-05, | |
| "loss": 1.7843, | |
| "step": 64100 | |
| }, | |
| { | |
| "epoch": 0.10271983564826297, | |
| "grad_norm": 0.16068622469902039, | |
| "learning_rate": 1.9940764444444447e-05, | |
| "loss": 1.5116, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 0.10287983539226338, | |
| "grad_norm": 69.0772705078125, | |
| "learning_rate": 1.9937208888888892e-05, | |
| "loss": 1.6682, | |
| "step": 64300 | |
| }, | |
| { | |
| "epoch": 0.10303983513626379, | |
| "grad_norm": 0.0007585228304378688, | |
| "learning_rate": 1.9933653333333337e-05, | |
| "loss": 1.2137, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 0.10319983488026419, | |
| "grad_norm": 140.95750427246094, | |
| "learning_rate": 1.9930097777777778e-05, | |
| "loss": 1.1308, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.1033598346242646, | |
| "grad_norm": 1.2280133962631226, | |
| "learning_rate": 1.9926542222222226e-05, | |
| "loss": 2.031, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 0.103519834368265, | |
| "grad_norm": 51.01097106933594, | |
| "learning_rate": 1.9922986666666667e-05, | |
| "loss": 1.6903, | |
| "step": 64700 | |
| }, | |
| { | |
| "epoch": 0.10367983411226542, | |
| "grad_norm": 354.4974365234375, | |
| "learning_rate": 1.9919431111111115e-05, | |
| "loss": 1.3365, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 0.10383983385626583, | |
| "grad_norm": 98.43709564208984, | |
| "learning_rate": 1.9915875555555556e-05, | |
| "loss": 1.5736, | |
| "step": 64900 | |
| }, | |
| { | |
| "epoch": 0.10399983360026624, | |
| "grad_norm": 176.26661682128906, | |
| "learning_rate": 1.991232e-05, | |
| "loss": 1.7264, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.10415983334426665, | |
| "grad_norm": 55.52714920043945, | |
| "learning_rate": 1.9908764444444445e-05, | |
| "loss": 1.1781, | |
| "step": 65100 | |
| }, | |
| { | |
| "epoch": 0.10431983308826706, | |
| "grad_norm": 0.0009407736943103373, | |
| "learning_rate": 1.990520888888889e-05, | |
| "loss": 1.2503, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 0.10447983283226747, | |
| "grad_norm": 0.001376794883981347, | |
| "learning_rate": 1.9901653333333334e-05, | |
| "loss": 0.9432, | |
| "step": 65300 | |
| }, | |
| { | |
| "epoch": 0.10463983257626788, | |
| "grad_norm": 52.175819396972656, | |
| "learning_rate": 1.9898133333333335e-05, | |
| "loss": 1.264, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 0.1047998323202683, | |
| "grad_norm": 147.7506866455078, | |
| "learning_rate": 1.989457777777778e-05, | |
| "loss": 1.2086, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.10495983206426869, | |
| "grad_norm": 31.214550018310547, | |
| "learning_rate": 1.9891022222222224e-05, | |
| "loss": 1.8692, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 0.1051198318082691, | |
| "grad_norm": 168.40858459472656, | |
| "learning_rate": 1.988746666666667e-05, | |
| "loss": 1.2745, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 0.10527983155226951, | |
| "grad_norm": 401.31842041015625, | |
| "learning_rate": 1.9883911111111113e-05, | |
| "loss": 1.6839, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 0.10543983129626992, | |
| "grad_norm": 139.64588928222656, | |
| "learning_rate": 1.9880355555555558e-05, | |
| "loss": 1.4509, | |
| "step": 65900 | |
| }, | |
| { | |
| "epoch": 0.10559983104027033, | |
| "grad_norm": 125.26469421386719, | |
| "learning_rate": 1.98768e-05, | |
| "loss": 1.1615, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.10575983078427074, | |
| "grad_norm": 0.1609152853488922, | |
| "learning_rate": 1.9873244444444447e-05, | |
| "loss": 1.4458, | |
| "step": 66100 | |
| }, | |
| { | |
| "epoch": 0.10591983052827116, | |
| "grad_norm": 0.000580300809815526, | |
| "learning_rate": 1.9869688888888888e-05, | |
| "loss": 1.8329, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 0.10607983027227157, | |
| "grad_norm": 101.93132781982422, | |
| "learning_rate": 1.9866133333333336e-05, | |
| "loss": 1.567, | |
| "step": 66300 | |
| }, | |
| { | |
| "epoch": 0.10623983001627198, | |
| "grad_norm": 99.72083282470703, | |
| "learning_rate": 1.9862577777777777e-05, | |
| "loss": 1.6746, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 0.10639982976027239, | |
| "grad_norm": 1651.70263671875, | |
| "learning_rate": 1.9859022222222225e-05, | |
| "loss": 1.65, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.1065598295042728, | |
| "grad_norm": 132.80343627929688, | |
| "learning_rate": 1.9855466666666666e-05, | |
| "loss": 1.5497, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 0.1067198292482732, | |
| "grad_norm": 0.004364237189292908, | |
| "learning_rate": 1.9851911111111114e-05, | |
| "loss": 1.4009, | |
| "step": 66700 | |
| }, | |
| { | |
| "epoch": 0.1068798289922736, | |
| "grad_norm": 4.1050825119018555, | |
| "learning_rate": 1.9848355555555556e-05, | |
| "loss": 2.058, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 0.10703982873627402, | |
| "grad_norm": 0.047410767525434494, | |
| "learning_rate": 1.9844800000000004e-05, | |
| "loss": 1.6306, | |
| "step": 66900 | |
| }, | |
| { | |
| "epoch": 0.10719982848027443, | |
| "grad_norm": 6.651243686676025, | |
| "learning_rate": 1.9841244444444445e-05, | |
| "loss": 1.4377, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.10735982822427484, | |
| "grad_norm": 0.010524190030992031, | |
| "learning_rate": 1.983768888888889e-05, | |
| "loss": 1.4501, | |
| "step": 67100 | |
| }, | |
| { | |
| "epoch": 0.10751982796827525, | |
| "grad_norm": 0.0009414692758582532, | |
| "learning_rate": 1.9834133333333334e-05, | |
| "loss": 1.2648, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 0.10767982771227566, | |
| "grad_norm": 1019.7636108398438, | |
| "learning_rate": 1.983057777777778e-05, | |
| "loss": 1.3186, | |
| "step": 67300 | |
| }, | |
| { | |
| "epoch": 0.10783982745627607, | |
| "grad_norm": 0.006541971582919359, | |
| "learning_rate": 1.982705777777778e-05, | |
| "loss": 1.1313, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 0.10799982720027648, | |
| "grad_norm": 323.63226318359375, | |
| "learning_rate": 1.9823502222222224e-05, | |
| "loss": 2.2523, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.1081598269442769, | |
| "grad_norm": 114.84791564941406, | |
| "learning_rate": 1.981994666666667e-05, | |
| "loss": 1.9146, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 0.1083198266882773, | |
| "grad_norm": 4.059427738189697, | |
| "learning_rate": 1.9816391111111113e-05, | |
| "loss": 1.7334, | |
| "step": 67700 | |
| }, | |
| { | |
| "epoch": 0.10847982643227772, | |
| "grad_norm": 3.274331569671631, | |
| "learning_rate": 1.9812835555555558e-05, | |
| "loss": 1.7195, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 0.10863982617627811, | |
| "grad_norm": 0.0480005145072937, | |
| "learning_rate": 1.9809280000000002e-05, | |
| "loss": 1.4661, | |
| "step": 67900 | |
| }, | |
| { | |
| "epoch": 0.10879982592027852, | |
| "grad_norm": 45.43354415893555, | |
| "learning_rate": 1.9805724444444447e-05, | |
| "loss": 1.3503, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.10895982566427893, | |
| "grad_norm": 0.0006582220084965229, | |
| "learning_rate": 1.980216888888889e-05, | |
| "loss": 1.0129, | |
| "step": 68100 | |
| }, | |
| { | |
| "epoch": 0.10911982540827934, | |
| "grad_norm": 111.87661743164062, | |
| "learning_rate": 1.9798613333333332e-05, | |
| "loss": 1.6036, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 0.10927982515227976, | |
| "grad_norm": 122.35249328613281, | |
| "learning_rate": 1.979505777777778e-05, | |
| "loss": 0.9312, | |
| "step": 68300 | |
| }, | |
| { | |
| "epoch": 0.10943982489628017, | |
| "grad_norm": 0.5635089874267578, | |
| "learning_rate": 1.979150222222222e-05, | |
| "loss": 1.5817, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 0.10959982464028058, | |
| "grad_norm": 2.6275858879089355, | |
| "learning_rate": 1.978794666666667e-05, | |
| "loss": 1.2024, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 0.10975982438428099, | |
| "grad_norm": 0.6521372199058533, | |
| "learning_rate": 1.978439111111111e-05, | |
| "loss": 0.985, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 0.1099198241282814, | |
| "grad_norm": 2.0386836528778076, | |
| "learning_rate": 1.978083555555556e-05, | |
| "loss": 1.1712, | |
| "step": 68700 | |
| }, | |
| { | |
| "epoch": 0.11007982387228181, | |
| "grad_norm": 132.18045043945312, | |
| "learning_rate": 1.977728e-05, | |
| "loss": 1.5874, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 0.11023982361628222, | |
| "grad_norm": 0.0068659852258861065, | |
| "learning_rate": 1.9773724444444448e-05, | |
| "loss": 1.8551, | |
| "step": 68900 | |
| }, | |
| { | |
| "epoch": 0.11039982336028262, | |
| "grad_norm": 84.89590454101562, | |
| "learning_rate": 1.977016888888889e-05, | |
| "loss": 1.232, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.11055982310428303, | |
| "grad_norm": 191.7918243408203, | |
| "learning_rate": 1.9766613333333337e-05, | |
| "loss": 1.4688, | |
| "step": 69100 | |
| }, | |
| { | |
| "epoch": 0.11071982284828344, | |
| "grad_norm": 10.109711647033691, | |
| "learning_rate": 1.9763057777777778e-05, | |
| "loss": 1.1107, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 0.11087982259228385, | |
| "grad_norm": 63.09272766113281, | |
| "learning_rate": 1.9759502222222226e-05, | |
| "loss": 1.6495, | |
| "step": 69300 | |
| }, | |
| { | |
| "epoch": 0.11103982233628426, | |
| "grad_norm": 66.24422454833984, | |
| "learning_rate": 1.9755946666666667e-05, | |
| "loss": 1.6278, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 0.11119982208028467, | |
| "grad_norm": 95.56941223144531, | |
| "learning_rate": 1.975239111111111e-05, | |
| "loss": 1.7135, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 0.11135982182428508, | |
| "grad_norm": 324.1082763671875, | |
| "learning_rate": 1.9748835555555556e-05, | |
| "loss": 1.5108, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 0.1115198215682855, | |
| "grad_norm": 123.81194305419922, | |
| "learning_rate": 1.974528e-05, | |
| "loss": 1.4056, | |
| "step": 69700 | |
| }, | |
| { | |
| "epoch": 0.1116798213122859, | |
| "grad_norm": 0.9657291769981384, | |
| "learning_rate": 1.9741724444444445e-05, | |
| "loss": 0.9324, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 0.11183982105628631, | |
| "grad_norm": 21.34449005126953, | |
| "learning_rate": 1.973816888888889e-05, | |
| "loss": 1.3613, | |
| "step": 69900 | |
| }, | |
| { | |
| "epoch": 0.11199982080028673, | |
| "grad_norm": 51.35042953491211, | |
| "learning_rate": 1.9734613333333334e-05, | |
| "loss": 1.5283, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.11215982054428712, | |
| "grad_norm": 16.527353286743164, | |
| "learning_rate": 1.973105777777778e-05, | |
| "loss": 1.3809, | |
| "step": 70100 | |
| }, | |
| { | |
| "epoch": 0.11231982028828753, | |
| "grad_norm": 0.032987259328365326, | |
| "learning_rate": 1.9727502222222224e-05, | |
| "loss": 1.5552, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 0.11247982003228794, | |
| "grad_norm": 96.99321746826172, | |
| "learning_rate": 1.9723946666666668e-05, | |
| "loss": 1.4567, | |
| "step": 70300 | |
| }, | |
| { | |
| "epoch": 0.11263981977628836, | |
| "grad_norm": 291.11444091796875, | |
| "learning_rate": 1.9720391111111113e-05, | |
| "loss": 1.4404, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 0.11279981952028877, | |
| "grad_norm": 1.33843195438385, | |
| "learning_rate": 1.9716835555555557e-05, | |
| "loss": 1.1805, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 0.11295981926428918, | |
| "grad_norm": 340.20355224609375, | |
| "learning_rate": 1.9713280000000002e-05, | |
| "loss": 2.514, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 0.11311981900828959, | |
| "grad_norm": 0.20241181552410126, | |
| "learning_rate": 1.9709724444444446e-05, | |
| "loss": 1.4821, | |
| "step": 70700 | |
| }, | |
| { | |
| "epoch": 0.11327981875229, | |
| "grad_norm": 0.044828109443187714, | |
| "learning_rate": 1.970616888888889e-05, | |
| "loss": 1.5156, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 0.11343981849629041, | |
| "grad_norm": 121.0267105102539, | |
| "learning_rate": 1.9702648888888892e-05, | |
| "loss": 1.5925, | |
| "step": 70900 | |
| }, | |
| { | |
| "epoch": 0.11359981824029082, | |
| "grad_norm": 1217.32373046875, | |
| "learning_rate": 1.9699093333333333e-05, | |
| "loss": 1.9517, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.11375981798429123, | |
| "grad_norm": 102.1255111694336, | |
| "learning_rate": 1.969553777777778e-05, | |
| "loss": 1.2685, | |
| "step": 71100 | |
| }, | |
| { | |
| "epoch": 0.11391981772829163, | |
| "grad_norm": 0.0009581278427504003, | |
| "learning_rate": 1.9691982222222222e-05, | |
| "loss": 1.6314, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 0.11407981747229204, | |
| "grad_norm": 105.03948974609375, | |
| "learning_rate": 1.968842666666667e-05, | |
| "loss": 1.5252, | |
| "step": 71300 | |
| }, | |
| { | |
| "epoch": 0.11423981721629245, | |
| "grad_norm": 2.6692819595336914, | |
| "learning_rate": 1.968487111111111e-05, | |
| "loss": 1.5176, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 0.11439981696029286, | |
| "grad_norm": 93.15460205078125, | |
| "learning_rate": 1.968131555555556e-05, | |
| "loss": 1.3461, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 0.11455981670429327, | |
| "grad_norm": 37.849117279052734, | |
| "learning_rate": 1.967776e-05, | |
| "loss": 1.3832, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 0.11471981644829368, | |
| "grad_norm": 3.6809959411621094, | |
| "learning_rate": 1.967420444444445e-05, | |
| "loss": 1.2962, | |
| "step": 71700 | |
| }, | |
| { | |
| "epoch": 0.11487981619229409, | |
| "grad_norm": 40.560264587402344, | |
| "learning_rate": 1.967064888888889e-05, | |
| "loss": 1.5179, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 0.1150398159362945, | |
| "grad_norm": 0.17644475400447845, | |
| "learning_rate": 1.9667093333333334e-05, | |
| "loss": 1.1041, | |
| "step": 71900 | |
| }, | |
| { | |
| "epoch": 0.11519981568029491, | |
| "grad_norm": 0.05514904111623764, | |
| "learning_rate": 1.966353777777778e-05, | |
| "loss": 1.5031, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.11535981542429533, | |
| "grad_norm": 23.659364700317383, | |
| "learning_rate": 1.9659982222222223e-05, | |
| "loss": 1.5412, | |
| "step": 72100 | |
| }, | |
| { | |
| "epoch": 0.11551981516829574, | |
| "grad_norm": 0.0025822233874350786, | |
| "learning_rate": 1.9656426666666668e-05, | |
| "loss": 1.2971, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 0.11567981491229615, | |
| "grad_norm": 2.382300853729248, | |
| "learning_rate": 1.9652871111111112e-05, | |
| "loss": 1.0979, | |
| "step": 72300 | |
| }, | |
| { | |
| "epoch": 0.11583981465629654, | |
| "grad_norm": 150.96646118164062, | |
| "learning_rate": 1.9649315555555557e-05, | |
| "loss": 1.307, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 0.11599981440029696, | |
| "grad_norm": 0.022950541228055954, | |
| "learning_rate": 1.964576e-05, | |
| "loss": 1.3418, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 0.11615981414429737, | |
| "grad_norm": 21.7007999420166, | |
| "learning_rate": 1.9642204444444446e-05, | |
| "loss": 1.7298, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 0.11631981388829778, | |
| "grad_norm": 100.60992431640625, | |
| "learning_rate": 1.963864888888889e-05, | |
| "loss": 1.68, | |
| "step": 72700 | |
| }, | |
| { | |
| "epoch": 0.11647981363229819, | |
| "grad_norm": 104.22400665283203, | |
| "learning_rate": 1.9635093333333335e-05, | |
| "loss": 1.3106, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 0.1166398133762986, | |
| "grad_norm": 0.01572352461516857, | |
| "learning_rate": 1.963153777777778e-05, | |
| "loss": 1.0954, | |
| "step": 72900 | |
| }, | |
| { | |
| "epoch": 0.11679981312029901, | |
| "grad_norm": 0.1720964014530182, | |
| "learning_rate": 1.9627982222222224e-05, | |
| "loss": 1.5994, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.11695981286429942, | |
| "grad_norm": 90.89932250976562, | |
| "learning_rate": 1.962442666666667e-05, | |
| "loss": 1.5953, | |
| "step": 73100 | |
| }, | |
| { | |
| "epoch": 0.11711981260829983, | |
| "grad_norm": 94.24946594238281, | |
| "learning_rate": 1.9620871111111113e-05, | |
| "loss": 1.9498, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 0.11727981235230024, | |
| "grad_norm": 0.08061110228300095, | |
| "learning_rate": 1.9617315555555554e-05, | |
| "loss": 0.9937, | |
| "step": 73300 | |
| }, | |
| { | |
| "epoch": 0.11743981209630065, | |
| "grad_norm": 9.990059852600098, | |
| "learning_rate": 1.9613760000000002e-05, | |
| "loss": 1.4753, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 0.11759981184030105, | |
| "grad_norm": 1.572757601737976, | |
| "learning_rate": 1.9610204444444444e-05, | |
| "loss": 1.417, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 0.11775981158430146, | |
| "grad_norm": 23.618915557861328, | |
| "learning_rate": 1.960664888888889e-05, | |
| "loss": 1.596, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 0.11791981132830187, | |
| "grad_norm": 90.75736999511719, | |
| "learning_rate": 1.9603093333333333e-05, | |
| "loss": 1.8794, | |
| "step": 73700 | |
| }, | |
| { | |
| "epoch": 0.11807981107230228, | |
| "grad_norm": 0.07401008158922195, | |
| "learning_rate": 1.959953777777778e-05, | |
| "loss": 1.3118, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 0.11823981081630269, | |
| "grad_norm": 80.61852264404297, | |
| "learning_rate": 1.9595982222222222e-05, | |
| "loss": 1.732, | |
| "step": 73900 | |
| }, | |
| { | |
| "epoch": 0.1183998105603031, | |
| "grad_norm": 0.18783807754516602, | |
| "learning_rate": 1.959242666666667e-05, | |
| "loss": 1.4504, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.11855981030430351, | |
| "grad_norm": 0.0010747779160737991, | |
| "learning_rate": 1.958887111111111e-05, | |
| "loss": 1.0878, | |
| "step": 74100 | |
| }, | |
| { | |
| "epoch": 0.11871981004830393, | |
| "grad_norm": 0.0007994744810275733, | |
| "learning_rate": 1.958531555555556e-05, | |
| "loss": 1.2488, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 0.11887980979230434, | |
| "grad_norm": 16.047822952270508, | |
| "learning_rate": 1.958176e-05, | |
| "loss": 1.3887, | |
| "step": 74300 | |
| }, | |
| { | |
| "epoch": 0.11903980953630475, | |
| "grad_norm": 105.44868469238281, | |
| "learning_rate": 1.9578204444444448e-05, | |
| "loss": 1.2265, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 0.11919980928030516, | |
| "grad_norm": 0.0008991442155092955, | |
| "learning_rate": 1.957464888888889e-05, | |
| "loss": 1.4668, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 0.11935980902430555, | |
| "grad_norm": 0.04507048800587654, | |
| "learning_rate": 1.9571093333333334e-05, | |
| "loss": 1.6258, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 0.11951980876830597, | |
| "grad_norm": 0.00036178340087644756, | |
| "learning_rate": 1.9567537777777778e-05, | |
| "loss": 1.9551, | |
| "step": 74700 | |
| }, | |
| { | |
| "epoch": 0.11967980851230638, | |
| "grad_norm": 0.0032805718947201967, | |
| "learning_rate": 1.9563982222222223e-05, | |
| "loss": 1.1811, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 0.11983980825630679, | |
| "grad_norm": 126.1537857055664, | |
| "learning_rate": 1.9560426666666667e-05, | |
| "loss": 1.2119, | |
| "step": 74900 | |
| }, | |
| { | |
| "epoch": 0.1199998080003072, | |
| "grad_norm": 1.2026222944259644, | |
| "learning_rate": 1.9556871111111112e-05, | |
| "loss": 1.4051, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.12015980774430761, | |
| "grad_norm": 1.8911128044128418, | |
| "learning_rate": 1.9553351111111113e-05, | |
| "loss": 1.2587, | |
| "step": 75100 | |
| }, | |
| { | |
| "epoch": 0.12031980748830802, | |
| "grad_norm": 0.4351516664028168, | |
| "learning_rate": 1.9549795555555558e-05, | |
| "loss": 1.4563, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 0.12047980723230843, | |
| "grad_norm": 0.004506128840148449, | |
| "learning_rate": 1.9546240000000002e-05, | |
| "loss": 1.5581, | |
| "step": 75300 | |
| }, | |
| { | |
| "epoch": 0.12063980697630884, | |
| "grad_norm": 0.0002510923077352345, | |
| "learning_rate": 1.9542684444444447e-05, | |
| "loss": 1.5457, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 0.12079980672030925, | |
| "grad_norm": 0.15088102221488953, | |
| "learning_rate": 1.953912888888889e-05, | |
| "loss": 1.2675, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 0.12095980646430966, | |
| "grad_norm": 0.05502159520983696, | |
| "learning_rate": 1.9535573333333336e-05, | |
| "loss": 1.0948, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 0.12111980620831006, | |
| "grad_norm": 0.09219387173652649, | |
| "learning_rate": 1.953201777777778e-05, | |
| "loss": 1.2045, | |
| "step": 75700 | |
| }, | |
| { | |
| "epoch": 0.12127980595231047, | |
| "grad_norm": 199.2202911376953, | |
| "learning_rate": 1.9528462222222225e-05, | |
| "loss": 1.5964, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 0.12143980569631088, | |
| "grad_norm": 11.821746826171875, | |
| "learning_rate": 1.9524906666666666e-05, | |
| "loss": 1.0517, | |
| "step": 75900 | |
| }, | |
| { | |
| "epoch": 0.12159980544031129, | |
| "grad_norm": 106.8429946899414, | |
| "learning_rate": 1.9521351111111114e-05, | |
| "loss": 1.2883, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.1217598051843117, | |
| "grad_norm": 0.08651433885097504, | |
| "learning_rate": 1.9517795555555555e-05, | |
| "loss": 1.2276, | |
| "step": 76100 | |
| }, | |
| { | |
| "epoch": 0.12191980492831211, | |
| "grad_norm": 79.67507934570312, | |
| "learning_rate": 1.9514240000000003e-05, | |
| "loss": 1.2463, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 0.12207980467231253, | |
| "grad_norm": 0.6488481163978577, | |
| "learning_rate": 1.9510684444444444e-05, | |
| "loss": 1.241, | |
| "step": 76300 | |
| }, | |
| { | |
| "epoch": 0.12223980441631294, | |
| "grad_norm": 6.853870391845703, | |
| "learning_rate": 1.9507128888888892e-05, | |
| "loss": 1.8648, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 0.12239980416031335, | |
| "grad_norm": 0.0021333652548491955, | |
| "learning_rate": 1.9503573333333333e-05, | |
| "loss": 1.4848, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 0.12255980390431376, | |
| "grad_norm": 0.0014837757917121053, | |
| "learning_rate": 1.950001777777778e-05, | |
| "loss": 1.413, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 0.12271980364831417, | |
| "grad_norm": 196.25413513183594, | |
| "learning_rate": 1.9496462222222222e-05, | |
| "loss": 1.594, | |
| "step": 76700 | |
| }, | |
| { | |
| "epoch": 0.12287980339231457, | |
| "grad_norm": 117.68331909179688, | |
| "learning_rate": 1.949290666666667e-05, | |
| "loss": 1.3682, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 0.12303980313631498, | |
| "grad_norm": 3.5699806213378906, | |
| "learning_rate": 1.948935111111111e-05, | |
| "loss": 1.159, | |
| "step": 76900 | |
| }, | |
| { | |
| "epoch": 0.12319980288031539, | |
| "grad_norm": 0.051335014402866364, | |
| "learning_rate": 1.9485795555555556e-05, | |
| "loss": 1.4702, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.1233598026243158, | |
| "grad_norm": 0.012798790819942951, | |
| "learning_rate": 1.948224e-05, | |
| "loss": 1.3251, | |
| "step": 77100 | |
| }, | |
| { | |
| "epoch": 0.12351980236831621, | |
| "grad_norm": 3.92969012260437, | |
| "learning_rate": 1.9478684444444445e-05, | |
| "loss": 1.0538, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 0.12367980211231662, | |
| "grad_norm": 2.8302226066589355, | |
| "learning_rate": 1.947512888888889e-05, | |
| "loss": 1.1708, | |
| "step": 77300 | |
| }, | |
| { | |
| "epoch": 0.12383980185631703, | |
| "grad_norm": 0.11278839409351349, | |
| "learning_rate": 1.947160888888889e-05, | |
| "loss": 1.2864, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 0.12399980160031744, | |
| "grad_norm": 99.1993408203125, | |
| "learning_rate": 1.9468053333333335e-05, | |
| "loss": 1.6501, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 0.12415980134431785, | |
| "grad_norm": 0.22377841174602509, | |
| "learning_rate": 1.946449777777778e-05, | |
| "loss": 1.0104, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 0.12431980108831826, | |
| "grad_norm": 99.57634735107422, | |
| "learning_rate": 1.9460942222222225e-05, | |
| "loss": 1.7969, | |
| "step": 77700 | |
| }, | |
| { | |
| "epoch": 0.12447980083231867, | |
| "grad_norm": 0.9174038767814636, | |
| "learning_rate": 1.945738666666667e-05, | |
| "loss": 1.0293, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 0.12463980057631908, | |
| "grad_norm": 61.01045227050781, | |
| "learning_rate": 1.9453831111111114e-05, | |
| "loss": 1.5593, | |
| "step": 77900 | |
| }, | |
| { | |
| "epoch": 0.12479980032031948, | |
| "grad_norm": 2412.276611328125, | |
| "learning_rate": 1.9450275555555558e-05, | |
| "loss": 0.9902, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.12495980006431989, | |
| "grad_norm": 241.99363708496094, | |
| "learning_rate": 1.9446720000000003e-05, | |
| "loss": 1.058, | |
| "step": 78100 | |
| }, | |
| { | |
| "epoch": 0.1251197998083203, | |
| "grad_norm": 96.88700866699219, | |
| "learning_rate": 1.9443164444444447e-05, | |
| "loss": 1.4039, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 0.12527979955232071, | |
| "grad_norm": 56.962181091308594, | |
| "learning_rate": 1.943960888888889e-05, | |
| "loss": 1.008, | |
| "step": 78300 | |
| }, | |
| { | |
| "epoch": 0.12543979929632113, | |
| "grad_norm": 23.272607803344727, | |
| "learning_rate": 1.9436053333333336e-05, | |
| "loss": 1.4593, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 0.12559979904032154, | |
| "grad_norm": 97.3494644165039, | |
| "learning_rate": 1.9432497777777778e-05, | |
| "loss": 1.563, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 0.12575979878432195, | |
| "grad_norm": 3.625567674636841, | |
| "learning_rate": 1.9428942222222226e-05, | |
| "loss": 1.1569, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 0.12591979852832236, | |
| "grad_norm": 63.88728332519531, | |
| "learning_rate": 1.9425386666666667e-05, | |
| "loss": 1.3886, | |
| "step": 78700 | |
| }, | |
| { | |
| "epoch": 0.12607979827232277, | |
| "grad_norm": 0.533359169960022, | |
| "learning_rate": 1.9421831111111115e-05, | |
| "loss": 1.061, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 0.12623979801632318, | |
| "grad_norm": 0.0005107554607093334, | |
| "learning_rate": 1.9418275555555556e-05, | |
| "loss": 1.2085, | |
| "step": 78900 | |
| }, | |
| { | |
| "epoch": 0.1263997977603236, | |
| "grad_norm": 66.7668685913086, | |
| "learning_rate": 1.9414720000000004e-05, | |
| "loss": 1.8553, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.126559797504324, | |
| "grad_norm": 2.3458669185638428, | |
| "learning_rate": 1.9411164444444445e-05, | |
| "loss": 1.7144, | |
| "step": 79100 | |
| }, | |
| { | |
| "epoch": 0.1267197972483244, | |
| "grad_norm": 101.0086669921875, | |
| "learning_rate": 1.9407608888888893e-05, | |
| "loss": 1.2216, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 0.12687979699232482, | |
| "grad_norm": 14.662532806396484, | |
| "learning_rate": 1.9404053333333334e-05, | |
| "loss": 1.1646, | |
| "step": 79300 | |
| }, | |
| { | |
| "epoch": 0.12703979673632523, | |
| "grad_norm": 70.46912384033203, | |
| "learning_rate": 1.9400497777777782e-05, | |
| "loss": 1.7768, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 0.12719979648032564, | |
| "grad_norm": 3.7776920795440674, | |
| "learning_rate": 1.9396942222222223e-05, | |
| "loss": 1.1314, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 0.12735979622432603, | |
| "grad_norm": 0.05991614609956741, | |
| "learning_rate": 1.9393386666666668e-05, | |
| "loss": 1.2374, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 0.12751979596832644, | |
| "grad_norm": 1.138396978378296, | |
| "learning_rate": 1.9389831111111112e-05, | |
| "loss": 1.2681, | |
| "step": 79700 | |
| }, | |
| { | |
| "epoch": 0.12767979571232685, | |
| "grad_norm": 117.04296875, | |
| "learning_rate": 1.9386275555555557e-05, | |
| "loss": 1.2624, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 0.12783979545632726, | |
| "grad_norm": 165.1708984375, | |
| "learning_rate": 1.9382755555555558e-05, | |
| "loss": 1.6775, | |
| "step": 79900 | |
| }, | |
| { | |
| "epoch": 0.12799979520032767, | |
| "grad_norm": 127.26524353027344, | |
| "learning_rate": 1.9379200000000002e-05, | |
| "loss": 1.3587, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.12815979494432808, | |
| "grad_norm": 128.8250274658203, | |
| "learning_rate": 1.9375644444444447e-05, | |
| "loss": 1.7402, | |
| "step": 80100 | |
| }, | |
| { | |
| "epoch": 0.1283197946883285, | |
| "grad_norm": 83.64952850341797, | |
| "learning_rate": 1.937208888888889e-05, | |
| "loss": 1.5349, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 0.1284797944323289, | |
| "grad_norm": 2.9033825397491455, | |
| "learning_rate": 1.9368533333333336e-05, | |
| "loss": 0.8546, | |
| "step": 80300 | |
| }, | |
| { | |
| "epoch": 0.12863979417632931, | |
| "grad_norm": 1.8563624620437622, | |
| "learning_rate": 1.936497777777778e-05, | |
| "loss": 1.3903, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 0.12879979392032972, | |
| "grad_norm": 0.020641742274165154, | |
| "learning_rate": 1.9361422222222225e-05, | |
| "loss": 1.0712, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 0.12895979366433014, | |
| "grad_norm": 0.030105268582701683, | |
| "learning_rate": 1.935786666666667e-05, | |
| "loss": 1.6633, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 0.12911979340833055, | |
| "grad_norm": 7.39204216003418, | |
| "learning_rate": 1.935431111111111e-05, | |
| "loss": 1.4125, | |
| "step": 80700 | |
| }, | |
| { | |
| "epoch": 0.12927979315233096, | |
| "grad_norm": 0.6996489763259888, | |
| "learning_rate": 1.935075555555556e-05, | |
| "loss": 0.6973, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 0.12943979289633137, | |
| "grad_norm": 45.01316452026367, | |
| "learning_rate": 1.93472e-05, | |
| "loss": 1.1729, | |
| "step": 80900 | |
| }, | |
| { | |
| "epoch": 0.12959979264033178, | |
| "grad_norm": 0.2586953938007355, | |
| "learning_rate": 1.9343644444444448e-05, | |
| "loss": 1.2217, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.1297597923843322, | |
| "grad_norm": 0.02437330223619938, | |
| "learning_rate": 1.934008888888889e-05, | |
| "loss": 1.3184, | |
| "step": 81100 | |
| }, | |
| { | |
| "epoch": 0.1299197921283326, | |
| "grad_norm": 86.13786315917969, | |
| "learning_rate": 1.9336533333333334e-05, | |
| "loss": 1.2718, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 0.130079791872333, | |
| "grad_norm": 129.18377685546875, | |
| "learning_rate": 1.9332977777777778e-05, | |
| "loss": 1.1913, | |
| "step": 81300 | |
| }, | |
| { | |
| "epoch": 0.13023979161633342, | |
| "grad_norm": 0.21126429736614227, | |
| "learning_rate": 1.9329422222222223e-05, | |
| "loss": 1.4728, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 0.13039979136033383, | |
| "grad_norm": 17.239547729492188, | |
| "learning_rate": 1.9325902222222224e-05, | |
| "loss": 1.1221, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 0.13055979110433424, | |
| "grad_norm": 3.2373907566070557, | |
| "learning_rate": 1.932234666666667e-05, | |
| "loss": 1.235, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 0.13071979084833465, | |
| "grad_norm": 5.152343273162842, | |
| "learning_rate": 1.9318791111111113e-05, | |
| "loss": 1.3497, | |
| "step": 81700 | |
| }, | |
| { | |
| "epoch": 0.13087979059233507, | |
| "grad_norm": 88.2583236694336, | |
| "learning_rate": 1.9315235555555558e-05, | |
| "loss": 1.2361, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 0.13103979033633545, | |
| "grad_norm": 0.001005143509246409, | |
| "learning_rate": 1.9311680000000002e-05, | |
| "loss": 2.0015, | |
| "step": 81900 | |
| }, | |
| { | |
| "epoch": 0.13119979008033586, | |
| "grad_norm": 0.3949466347694397, | |
| "learning_rate": 1.9308124444444447e-05, | |
| "loss": 1.2259, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.13135978982433627, | |
| "grad_norm": 0.6978406310081482, | |
| "learning_rate": 1.930456888888889e-05, | |
| "loss": 0.9236, | |
| "step": 82100 | |
| }, | |
| { | |
| "epoch": 0.13151978956833668, | |
| "grad_norm": 0.6740103363990784, | |
| "learning_rate": 1.9301013333333332e-05, | |
| "loss": 1.5339, | |
| "step": 82200 | |
| }, | |
| { | |
| "epoch": 0.1316797893123371, | |
| "grad_norm": 0.007084805518388748, | |
| "learning_rate": 1.929745777777778e-05, | |
| "loss": 1.2036, | |
| "step": 82300 | |
| }, | |
| { | |
| "epoch": 0.1318397890563375, | |
| "grad_norm": 88.51764678955078, | |
| "learning_rate": 1.929390222222222e-05, | |
| "loss": 1.2631, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 0.1319997888003379, | |
| "grad_norm": 0.000969950866419822, | |
| "learning_rate": 1.929034666666667e-05, | |
| "loss": 1.0858, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 0.13215978854433832, | |
| "grad_norm": 0.10399264842271805, | |
| "learning_rate": 1.928679111111111e-05, | |
| "loss": 1.635, | |
| "step": 82600 | |
| }, | |
| { | |
| "epoch": 0.13231978828833874, | |
| "grad_norm": 39.86758804321289, | |
| "learning_rate": 1.928323555555556e-05, | |
| "loss": 1.285, | |
| "step": 82700 | |
| }, | |
| { | |
| "epoch": 0.13247978803233915, | |
| "grad_norm": 2.6627957820892334, | |
| "learning_rate": 1.927968e-05, | |
| "loss": 1.1209, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 0.13263978777633956, | |
| "grad_norm": 0.2310008406639099, | |
| "learning_rate": 1.9276124444444448e-05, | |
| "loss": 1.4032, | |
| "step": 82900 | |
| }, | |
| { | |
| "epoch": 0.13279978752033997, | |
| "grad_norm": 76.39102935791016, | |
| "learning_rate": 1.927256888888889e-05, | |
| "loss": 1.1279, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.13295978726434038, | |
| "grad_norm": 0.5016289949417114, | |
| "learning_rate": 1.9269013333333337e-05, | |
| "loss": 1.5145, | |
| "step": 83100 | |
| }, | |
| { | |
| "epoch": 0.1331197870083408, | |
| "grad_norm": 0.2468506544828415, | |
| "learning_rate": 1.9265457777777778e-05, | |
| "loss": 1.4923, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 0.1332797867523412, | |
| "grad_norm": 0.03473009541630745, | |
| "learning_rate": 1.9261902222222222e-05, | |
| "loss": 0.9845, | |
| "step": 83300 | |
| }, | |
| { | |
| "epoch": 0.1334397864963416, | |
| "grad_norm": 15.979637145996094, | |
| "learning_rate": 1.9258346666666667e-05, | |
| "loss": 1.3847, | |
| "step": 83400 | |
| }, | |
| { | |
| "epoch": 0.13359978624034202, | |
| "grad_norm": 0.13443760573863983, | |
| "learning_rate": 1.925479111111111e-05, | |
| "loss": 1.0149, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 0.13375978598434243, | |
| "grad_norm": 0.09114881604909897, | |
| "learning_rate": 1.9251235555555556e-05, | |
| "loss": 1.2644, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 0.13391978572834284, | |
| "grad_norm": 67.14397430419922, | |
| "learning_rate": 1.924768e-05, | |
| "loss": 1.2981, | |
| "step": 83700 | |
| }, | |
| { | |
| "epoch": 0.13407978547234325, | |
| "grad_norm": 9.479357719421387, | |
| "learning_rate": 1.9244124444444445e-05, | |
| "loss": 1.6903, | |
| "step": 83800 | |
| }, | |
| { | |
| "epoch": 0.13423978521634367, | |
| "grad_norm": 222.48973083496094, | |
| "learning_rate": 1.924056888888889e-05, | |
| "loss": 1.2846, | |
| "step": 83900 | |
| }, | |
| { | |
| "epoch": 0.13439978496034408, | |
| "grad_norm": 0.24070465564727783, | |
| "learning_rate": 1.9237013333333334e-05, | |
| "loss": 1.4647, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.13455978470434446, | |
| "grad_norm": 1444.743896484375, | |
| "learning_rate": 1.923345777777778e-05, | |
| "loss": 1.1213, | |
| "step": 84100 | |
| }, | |
| { | |
| "epoch": 0.13471978444834487, | |
| "grad_norm": 1.174815058708191, | |
| "learning_rate": 1.9229902222222223e-05, | |
| "loss": 1.1379, | |
| "step": 84200 | |
| }, | |
| { | |
| "epoch": 0.13487978419234528, | |
| "grad_norm": 120.12804412841797, | |
| "learning_rate": 1.9226346666666668e-05, | |
| "loss": 1.2793, | |
| "step": 84300 | |
| }, | |
| { | |
| "epoch": 0.1350397839363457, | |
| "grad_norm": 89.50218200683594, | |
| "learning_rate": 1.9222791111111113e-05, | |
| "loss": 1.343, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 0.1351997836803461, | |
| "grad_norm": 100.86327362060547, | |
| "learning_rate": 1.9219235555555557e-05, | |
| "loss": 1.8342, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 0.1353597834243465, | |
| "grad_norm": 99.5421371459961, | |
| "learning_rate": 1.921568e-05, | |
| "loss": 1.0487, | |
| "step": 84600 | |
| }, | |
| { | |
| "epoch": 0.13551978316834692, | |
| "grad_norm": 0.0015393303474411368, | |
| "learning_rate": 1.9212124444444446e-05, | |
| "loss": 1.1531, | |
| "step": 84700 | |
| }, | |
| { | |
| "epoch": 0.13567978291234734, | |
| "grad_norm": 3.457564353942871, | |
| "learning_rate": 1.920856888888889e-05, | |
| "loss": 0.8552, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 0.13583978265634775, | |
| "grad_norm": 0.00041561800753697753, | |
| "learning_rate": 1.9205013333333335e-05, | |
| "loss": 1.1422, | |
| "step": 84900 | |
| }, | |
| { | |
| "epoch": 0.13599978240034816, | |
| "grad_norm": 0.0012223550584167242, | |
| "learning_rate": 1.920145777777778e-05, | |
| "loss": 1.0918, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.13615978214434857, | |
| "grad_norm": 0.21084783971309662, | |
| "learning_rate": 1.9197902222222224e-05, | |
| "loss": 1.2873, | |
| "step": 85100 | |
| }, | |
| { | |
| "epoch": 0.13631978188834898, | |
| "grad_norm": 24.241910934448242, | |
| "learning_rate": 1.919434666666667e-05, | |
| "loss": 1.547, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 0.1364797816323494, | |
| "grad_norm": 47.714027404785156, | |
| "learning_rate": 1.9190791111111114e-05, | |
| "loss": 1.5094, | |
| "step": 85300 | |
| }, | |
| { | |
| "epoch": 0.1366397813763498, | |
| "grad_norm": 6.054490089416504, | |
| "learning_rate": 1.9187235555555558e-05, | |
| "loss": 1.051, | |
| "step": 85400 | |
| }, | |
| { | |
| "epoch": 0.1367997811203502, | |
| "grad_norm": 0.001112865749746561, | |
| "learning_rate": 1.9183680000000003e-05, | |
| "loss": 0.9952, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 0.13695978086435062, | |
| "grad_norm": 0.015406353399157524, | |
| "learning_rate": 1.9180124444444447e-05, | |
| "loss": 1.1978, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 0.13711978060835103, | |
| "grad_norm": 17.65171241760254, | |
| "learning_rate": 1.9176604444444445e-05, | |
| "loss": 1.5221, | |
| "step": 85700 | |
| }, | |
| { | |
| "epoch": 0.13727978035235144, | |
| "grad_norm": 14.018333435058594, | |
| "learning_rate": 1.917304888888889e-05, | |
| "loss": 1.3841, | |
| "step": 85800 | |
| }, | |
| { | |
| "epoch": 0.13743978009635185, | |
| "grad_norm": 0.0006000687135383487, | |
| "learning_rate": 1.9169493333333334e-05, | |
| "loss": 1.3999, | |
| "step": 85900 | |
| }, | |
| { | |
| "epoch": 0.13759977984035227, | |
| "grad_norm": 1715.1507568359375, | |
| "learning_rate": 1.916593777777778e-05, | |
| "loss": 1.5574, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.13775977958435268, | |
| "grad_norm": 4.6950907707214355, | |
| "learning_rate": 1.9162382222222223e-05, | |
| "loss": 1.3267, | |
| "step": 86100 | |
| }, | |
| { | |
| "epoch": 0.1379197793283531, | |
| "grad_norm": 0.8909225463867188, | |
| "learning_rate": 1.9158826666666668e-05, | |
| "loss": 1.358, | |
| "step": 86200 | |
| }, | |
| { | |
| "epoch": 0.1380797790723535, | |
| "grad_norm": 27.72040367126465, | |
| "learning_rate": 1.9155271111111112e-05, | |
| "loss": 1.5441, | |
| "step": 86300 | |
| }, | |
| { | |
| "epoch": 0.13823977881635388, | |
| "grad_norm": 120.01333618164062, | |
| "learning_rate": 1.9151715555555557e-05, | |
| "loss": 1.4124, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 0.1383997785603543, | |
| "grad_norm": 27.406797409057617, | |
| "learning_rate": 1.914816e-05, | |
| "loss": 0.8352, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 0.1385597783043547, | |
| "grad_norm": 0.07549207657575607, | |
| "learning_rate": 1.9144604444444446e-05, | |
| "loss": 1.2549, | |
| "step": 86600 | |
| }, | |
| { | |
| "epoch": 0.1387197780483551, | |
| "grad_norm": 1.3746123313903809, | |
| "learning_rate": 1.914104888888889e-05, | |
| "loss": 1.4328, | |
| "step": 86700 | |
| }, | |
| { | |
| "epoch": 0.13887977779235552, | |
| "grad_norm": 0.002391360467299819, | |
| "learning_rate": 1.9137493333333335e-05, | |
| "loss": 1.2577, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 0.13903977753635594, | |
| "grad_norm": 39.3692626953125, | |
| "learning_rate": 1.913393777777778e-05, | |
| "loss": 1.4417, | |
| "step": 86900 | |
| }, | |
| { | |
| "epoch": 0.13919977728035635, | |
| "grad_norm": 0.0015022088773548603, | |
| "learning_rate": 1.9130382222222224e-05, | |
| "loss": 1.1927, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.13935977702435676, | |
| "grad_norm": 3.776437520980835, | |
| "learning_rate": 1.912682666666667e-05, | |
| "loss": 1.4435, | |
| "step": 87100 | |
| }, | |
| { | |
| "epoch": 0.13951977676835717, | |
| "grad_norm": 9.693673133850098, | |
| "learning_rate": 1.9123271111111113e-05, | |
| "loss": 1.3579, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 0.13967977651235758, | |
| "grad_norm": 47.54679870605469, | |
| "learning_rate": 1.9119751111111114e-05, | |
| "loss": 1.3883, | |
| "step": 87300 | |
| }, | |
| { | |
| "epoch": 0.139839776256358, | |
| "grad_norm": 57.24945068359375, | |
| "learning_rate": 1.9116195555555555e-05, | |
| "loss": 1.2645, | |
| "step": 87400 | |
| }, | |
| { | |
| "epoch": 0.1399997760003584, | |
| "grad_norm": 0.0025031184777617455, | |
| "learning_rate": 1.9112640000000003e-05, | |
| "loss": 1.1366, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 0.1401597757443588, | |
| "grad_norm": 0.015484058298170567, | |
| "learning_rate": 1.9109084444444445e-05, | |
| "loss": 1.4566, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 0.14031977548835922, | |
| "grad_norm": 0.24919560551643372, | |
| "learning_rate": 1.9105528888888893e-05, | |
| "loss": 1.447, | |
| "step": 87700 | |
| }, | |
| { | |
| "epoch": 0.14047977523235963, | |
| "grad_norm": 74.2865219116211, | |
| "learning_rate": 1.9101973333333334e-05, | |
| "loss": 1.0701, | |
| "step": 87800 | |
| }, | |
| { | |
| "epoch": 0.14063977497636004, | |
| "grad_norm": 127.0066909790039, | |
| "learning_rate": 1.909841777777778e-05, | |
| "loss": 1.3449, | |
| "step": 87900 | |
| }, | |
| { | |
| "epoch": 0.14079977472036045, | |
| "grad_norm": 87.54583740234375, | |
| "learning_rate": 1.9094862222222223e-05, | |
| "loss": 1.4331, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.14095977446436087, | |
| "grad_norm": 0.001399531727656722, | |
| "learning_rate": 1.909130666666667e-05, | |
| "loss": 1.3965, | |
| "step": 88100 | |
| }, | |
| { | |
| "epoch": 0.14111977420836128, | |
| "grad_norm": 69.87310028076172, | |
| "learning_rate": 1.9087751111111112e-05, | |
| "loss": 1.347, | |
| "step": 88200 | |
| }, | |
| { | |
| "epoch": 0.1412797739523617, | |
| "grad_norm": 69.38946533203125, | |
| "learning_rate": 1.9084195555555556e-05, | |
| "loss": 1.0262, | |
| "step": 88300 | |
| }, | |
| { | |
| "epoch": 0.1414397736963621, | |
| "grad_norm": 18.69589614868164, | |
| "learning_rate": 1.908064e-05, | |
| "loss": 1.0787, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 0.1415997734403625, | |
| "grad_norm": 0.20538243651390076, | |
| "learning_rate": 1.9077084444444446e-05, | |
| "loss": 1.3829, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 0.1417597731843629, | |
| "grad_norm": 0.0005450706230476499, | |
| "learning_rate": 1.907352888888889e-05, | |
| "loss": 1.2001, | |
| "step": 88600 | |
| }, | |
| { | |
| "epoch": 0.1419197729283633, | |
| "grad_norm": 2.548616409301758, | |
| "learning_rate": 1.9069973333333335e-05, | |
| "loss": 1.2407, | |
| "step": 88700 | |
| }, | |
| { | |
| "epoch": 0.1420797726723637, | |
| "grad_norm": 139.437744140625, | |
| "learning_rate": 1.906641777777778e-05, | |
| "loss": 1.6291, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 0.14223977241636412, | |
| "grad_norm": 10.629435539245605, | |
| "learning_rate": 1.9062862222222224e-05, | |
| "loss": 1.1502, | |
| "step": 88900 | |
| }, | |
| { | |
| "epoch": 0.14239977216036454, | |
| "grad_norm": 3.494685411453247, | |
| "learning_rate": 1.905930666666667e-05, | |
| "loss": 1.2155, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.14255977190436495, | |
| "grad_norm": 107.18891143798828, | |
| "learning_rate": 1.9055751111111113e-05, | |
| "loss": 1.3381, | |
| "step": 89100 | |
| }, | |
| { | |
| "epoch": 0.14271977164836536, | |
| "grad_norm": 2575.91796875, | |
| "learning_rate": 1.9052195555555557e-05, | |
| "loss": 0.819, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 0.14287977139236577, | |
| "grad_norm": 302.19500732421875, | |
| "learning_rate": 1.9048640000000002e-05, | |
| "loss": 1.0402, | |
| "step": 89300 | |
| }, | |
| { | |
| "epoch": 0.14303977113636618, | |
| "grad_norm": 87.07076263427734, | |
| "learning_rate": 1.9045084444444447e-05, | |
| "loss": 1.1062, | |
| "step": 89400 | |
| }, | |
| { | |
| "epoch": 0.1431997708803666, | |
| "grad_norm": 5.228755950927734, | |
| "learning_rate": 1.9041528888888888e-05, | |
| "loss": 1.6693, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 0.143359770624367, | |
| "grad_norm": 0.20638461410999298, | |
| "learning_rate": 1.9037973333333336e-05, | |
| "loss": 1.1991, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 0.1435197703683674, | |
| "grad_norm": 114.9300308227539, | |
| "learning_rate": 1.9034417777777777e-05, | |
| "loss": 1.3535, | |
| "step": 89700 | |
| }, | |
| { | |
| "epoch": 0.14367977011236782, | |
| "grad_norm": 86.26241302490234, | |
| "learning_rate": 1.9030862222222225e-05, | |
| "loss": 1.6776, | |
| "step": 89800 | |
| }, | |
| { | |
| "epoch": 0.14383976985636823, | |
| "grad_norm": 78.20118713378906, | |
| "learning_rate": 1.9027306666666666e-05, | |
| "loss": 1.2221, | |
| "step": 89900 | |
| }, | |
| { | |
| "epoch": 0.14399976960036864, | |
| "grad_norm": 7.4184088706970215, | |
| "learning_rate": 1.9023751111111114e-05, | |
| "loss": 1.0253, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.14415976934436905, | |
| "grad_norm": 99.789794921875, | |
| "learning_rate": 1.9020195555555555e-05, | |
| "loss": 1.0469, | |
| "step": 90100 | |
| }, | |
| { | |
| "epoch": 0.14431976908836947, | |
| "grad_norm": 0.003789502428844571, | |
| "learning_rate": 1.9016640000000003e-05, | |
| "loss": 1.2465, | |
| "step": 90200 | |
| }, | |
| { | |
| "epoch": 0.14447976883236988, | |
| "grad_norm": 0.27373766899108887, | |
| "learning_rate": 1.9013084444444444e-05, | |
| "loss": 1.4068, | |
| "step": 90300 | |
| }, | |
| { | |
| "epoch": 0.1446397685763703, | |
| "grad_norm": 101.30089569091797, | |
| "learning_rate": 1.9009528888888892e-05, | |
| "loss": 1.5961, | |
| "step": 90400 | |
| }, | |
| { | |
| "epoch": 0.1447997683203707, | |
| "grad_norm": 0.24238981306552887, | |
| "learning_rate": 1.9005973333333333e-05, | |
| "loss": 1.0579, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 0.1449597680643711, | |
| "grad_norm": 0.6612280011177063, | |
| "learning_rate": 1.900241777777778e-05, | |
| "loss": 0.941, | |
| "step": 90600 | |
| }, | |
| { | |
| "epoch": 0.14511976780837152, | |
| "grad_norm": 3.1052684783935547, | |
| "learning_rate": 1.8998862222222222e-05, | |
| "loss": 1.1861, | |
| "step": 90700 | |
| }, | |
| { | |
| "epoch": 0.14527976755237193, | |
| "grad_norm": 9.876090049743652, | |
| "learning_rate": 1.8995306666666667e-05, | |
| "loss": 1.4697, | |
| "step": 90800 | |
| }, | |
| { | |
| "epoch": 0.1454397672963723, | |
| "grad_norm": 32.829795837402344, | |
| "learning_rate": 1.899175111111111e-05, | |
| "loss": 0.6486, | |
| "step": 90900 | |
| }, | |
| { | |
| "epoch": 0.14559976704037272, | |
| "grad_norm": 2219.107177734375, | |
| "learning_rate": 1.8988195555555556e-05, | |
| "loss": 1.3865, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.14575976678437313, | |
| "grad_norm": 0.2465362697839737, | |
| "learning_rate": 1.898464e-05, | |
| "loss": 1.1494, | |
| "step": 91100 | |
| }, | |
| { | |
| "epoch": 0.14591976652837355, | |
| "grad_norm": 0.06304822117090225, | |
| "learning_rate": 1.8981084444444445e-05, | |
| "loss": 1.3623, | |
| "step": 91200 | |
| }, | |
| { | |
| "epoch": 0.14607976627237396, | |
| "grad_norm": 0.0003378583351150155, | |
| "learning_rate": 1.897752888888889e-05, | |
| "loss": 1.2193, | |
| "step": 91300 | |
| }, | |
| { | |
| "epoch": 0.14623976601637437, | |
| "grad_norm": 0.023890919983386993, | |
| "learning_rate": 1.8973973333333334e-05, | |
| "loss": 1.3003, | |
| "step": 91400 | |
| }, | |
| { | |
| "epoch": 0.14639976576037478, | |
| "grad_norm": 0.20042432844638824, | |
| "learning_rate": 1.8970453333333335e-05, | |
| "loss": 1.2608, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 0.1465597655043752, | |
| "grad_norm": 0.4310738742351532, | |
| "learning_rate": 1.896689777777778e-05, | |
| "loss": 1.2544, | |
| "step": 91600 | |
| }, | |
| { | |
| "epoch": 0.1467197652483756, | |
| "grad_norm": 6.881536960601807, | |
| "learning_rate": 1.8963342222222224e-05, | |
| "loss": 1.332, | |
| "step": 91700 | |
| }, | |
| { | |
| "epoch": 0.146879764992376, | |
| "grad_norm": 34.862266540527344, | |
| "learning_rate": 1.895978666666667e-05, | |
| "loss": 1.3548, | |
| "step": 91800 | |
| }, | |
| { | |
| "epoch": 0.14703976473637642, | |
| "grad_norm": 41.60286331176758, | |
| "learning_rate": 1.8956231111111114e-05, | |
| "loss": 1.54, | |
| "step": 91900 | |
| }, | |
| { | |
| "epoch": 0.14719976448037683, | |
| "grad_norm": 0.21723419427871704, | |
| "learning_rate": 1.8952675555555558e-05, | |
| "loss": 1.3125, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.14735976422437724, | |
| "grad_norm": 12.313715934753418, | |
| "learning_rate": 1.894912e-05, | |
| "loss": 0.897, | |
| "step": 92100 | |
| }, | |
| { | |
| "epoch": 0.14751976396837765, | |
| "grad_norm": 9.945670171873644e-05, | |
| "learning_rate": 1.8945564444444447e-05, | |
| "loss": 1.1594, | |
| "step": 92200 | |
| }, | |
| { | |
| "epoch": 0.14767976371237806, | |
| "grad_norm": 0.00018985375936608762, | |
| "learning_rate": 1.894200888888889e-05, | |
| "loss": 0.9194, | |
| "step": 92300 | |
| }, | |
| { | |
| "epoch": 0.14783976345637848, | |
| "grad_norm": 6.04590368270874, | |
| "learning_rate": 1.8938453333333336e-05, | |
| "loss": 1.2209, | |
| "step": 92400 | |
| }, | |
| { | |
| "epoch": 0.1479997632003789, | |
| "grad_norm": 0.19547709822654724, | |
| "learning_rate": 1.8934897777777777e-05, | |
| "loss": 1.0027, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 0.1481597629443793, | |
| "grad_norm": 0.00870482623577118, | |
| "learning_rate": 1.8931342222222225e-05, | |
| "loss": 1.4675, | |
| "step": 92600 | |
| }, | |
| { | |
| "epoch": 0.1483197626883797, | |
| "grad_norm": 112.5121078491211, | |
| "learning_rate": 1.8927786666666667e-05, | |
| "loss": 1.3982, | |
| "step": 92700 | |
| }, | |
| { | |
| "epoch": 0.14847976243238012, | |
| "grad_norm": 0.000352471019141376, | |
| "learning_rate": 1.8924231111111115e-05, | |
| "loss": 0.8595, | |
| "step": 92800 | |
| }, | |
| { | |
| "epoch": 0.14863976217638053, | |
| "grad_norm": 0.20730045437812805, | |
| "learning_rate": 1.8920675555555556e-05, | |
| "loss": 1.572, | |
| "step": 92900 | |
| }, | |
| { | |
| "epoch": 0.14879976192038094, | |
| "grad_norm": 5.9496917724609375, | |
| "learning_rate": 1.8917120000000004e-05, | |
| "loss": 1.2832, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.14895976166438132, | |
| "grad_norm": 0.012922318652272224, | |
| "learning_rate": 1.8913564444444445e-05, | |
| "loss": 1.2838, | |
| "step": 93100 | |
| }, | |
| { | |
| "epoch": 0.14911976140838173, | |
| "grad_norm": 40.53496170043945, | |
| "learning_rate": 1.8910008888888893e-05, | |
| "loss": 1.6535, | |
| "step": 93200 | |
| }, | |
| { | |
| "epoch": 0.14927976115238215, | |
| "grad_norm": 106.17526245117188, | |
| "learning_rate": 1.8906453333333334e-05, | |
| "loss": 1.5996, | |
| "step": 93300 | |
| }, | |
| { | |
| "epoch": 0.14943976089638256, | |
| "grad_norm": 93.25550079345703, | |
| "learning_rate": 1.890289777777778e-05, | |
| "loss": 1.058, | |
| "step": 93400 | |
| }, | |
| { | |
| "epoch": 0.14959976064038297, | |
| "grad_norm": 83.99794006347656, | |
| "learning_rate": 1.8899342222222223e-05, | |
| "loss": 1.3316, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 0.14975976038438338, | |
| "grad_norm": 0.0036302392836660147, | |
| "learning_rate": 1.8895822222222224e-05, | |
| "loss": 0.8627, | |
| "step": 93600 | |
| }, | |
| { | |
| "epoch": 0.1499197601283838, | |
| "grad_norm": 4.188179969787598, | |
| "learning_rate": 1.889226666666667e-05, | |
| "loss": 1.4411, | |
| "step": 93700 | |
| }, | |
| { | |
| "epoch": 0.1500797598723842, | |
| "grad_norm": 0.38291868567466736, | |
| "learning_rate": 1.8888711111111113e-05, | |
| "loss": 0.9331, | |
| "step": 93800 | |
| }, | |
| { | |
| "epoch": 0.1502397596163846, | |
| "grad_norm": 2.517091751098633, | |
| "learning_rate": 1.8885155555555558e-05, | |
| "loss": 1.0032, | |
| "step": 93900 | |
| }, | |
| { | |
| "epoch": 0.15039975936038502, | |
| "grad_norm": 128.78472900390625, | |
| "learning_rate": 1.8881600000000002e-05, | |
| "loss": 1.2341, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.15055975910438543, | |
| "grad_norm": 0.0004920060164295137, | |
| "learning_rate": 1.8878044444444447e-05, | |
| "loss": 1.3369, | |
| "step": 94100 | |
| }, | |
| { | |
| "epoch": 0.15071975884838584, | |
| "grad_norm": 36.320411682128906, | |
| "learning_rate": 1.887448888888889e-05, | |
| "loss": 1.2324, | |
| "step": 94200 | |
| }, | |
| { | |
| "epoch": 0.15087975859238625, | |
| "grad_norm": 34.70246887207031, | |
| "learning_rate": 1.8870933333333336e-05, | |
| "loss": 1.6952, | |
| "step": 94300 | |
| }, | |
| { | |
| "epoch": 0.15103975833638666, | |
| "grad_norm": 0.5894516110420227, | |
| "learning_rate": 1.886737777777778e-05, | |
| "loss": 1.2401, | |
| "step": 94400 | |
| }, | |
| { | |
| "epoch": 0.15119975808038708, | |
| "grad_norm": 0.0024842778220772743, | |
| "learning_rate": 1.8863822222222222e-05, | |
| "loss": 1.2998, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 0.1513597578243875, | |
| "grad_norm": 0.16776002943515778, | |
| "learning_rate": 1.886026666666667e-05, | |
| "loss": 1.1458, | |
| "step": 94600 | |
| }, | |
| { | |
| "epoch": 0.1515197575683879, | |
| "grad_norm": 0.008235426619648933, | |
| "learning_rate": 1.885671111111111e-05, | |
| "loss": 1.0211, | |
| "step": 94700 | |
| }, | |
| { | |
| "epoch": 0.1516797573123883, | |
| "grad_norm": 82.82616424560547, | |
| "learning_rate": 1.8853191111111112e-05, | |
| "loss": 0.9866, | |
| "step": 94800 | |
| }, | |
| { | |
| "epoch": 0.15183975705638872, | |
| "grad_norm": 0.0073872278444468975, | |
| "learning_rate": 1.8849635555555556e-05, | |
| "loss": 1.3636, | |
| "step": 94900 | |
| }, | |
| { | |
| "epoch": 0.15199975680038913, | |
| "grad_norm": 11.451104164123535, | |
| "learning_rate": 1.884608e-05, | |
| "loss": 1.1485, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.15215975654438954, | |
| "grad_norm": 100.47103118896484, | |
| "learning_rate": 1.8842524444444446e-05, | |
| "loss": 0.7671, | |
| "step": 95100 | |
| }, | |
| { | |
| "epoch": 0.15231975628838995, | |
| "grad_norm": 0.4101124703884125, | |
| "learning_rate": 1.883896888888889e-05, | |
| "loss": 1.0069, | |
| "step": 95200 | |
| }, | |
| { | |
| "epoch": 0.15247975603239033, | |
| "grad_norm": 37.40227508544922, | |
| "learning_rate": 1.8835413333333335e-05, | |
| "loss": 1.1276, | |
| "step": 95300 | |
| }, | |
| { | |
| "epoch": 0.15263975577639075, | |
| "grad_norm": 141.687744140625, | |
| "learning_rate": 1.883185777777778e-05, | |
| "loss": 1.4477, | |
| "step": 95400 | |
| }, | |
| { | |
| "epoch": 0.15279975552039116, | |
| "grad_norm": 0.008716798387467861, | |
| "learning_rate": 1.8828302222222224e-05, | |
| "loss": 0.9887, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 0.15295975526439157, | |
| "grad_norm": 7.543517858721316e-05, | |
| "learning_rate": 1.882474666666667e-05, | |
| "loss": 1.065, | |
| "step": 95600 | |
| }, | |
| { | |
| "epoch": 0.15311975500839198, | |
| "grad_norm": 6.7989821434021, | |
| "learning_rate": 1.8821191111111113e-05, | |
| "loss": 0.982, | |
| "step": 95700 | |
| }, | |
| { | |
| "epoch": 0.1532797547523924, | |
| "grad_norm": 27.62921714782715, | |
| "learning_rate": 1.8817635555555557e-05, | |
| "loss": 1.1166, | |
| "step": 95800 | |
| }, | |
| { | |
| "epoch": 0.1534397544963928, | |
| "grad_norm": 28.467132568359375, | |
| "learning_rate": 1.8814080000000002e-05, | |
| "loss": 1.3949, | |
| "step": 95900 | |
| }, | |
| { | |
| "epoch": 0.1535997542403932, | |
| "grad_norm": 89.31570434570312, | |
| "learning_rate": 1.8810524444444447e-05, | |
| "loss": 1.4164, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.15375975398439362, | |
| "grad_norm": 0.30763378739356995, | |
| "learning_rate": 1.880696888888889e-05, | |
| "loss": 1.7997, | |
| "step": 96100 | |
| }, | |
| { | |
| "epoch": 0.15391975372839403, | |
| "grad_norm": 90.01514434814453, | |
| "learning_rate": 1.8803413333333336e-05, | |
| "loss": 1.3941, | |
| "step": 96200 | |
| }, | |
| { | |
| "epoch": 0.15407975347239444, | |
| "grad_norm": 0.9498651027679443, | |
| "learning_rate": 1.879985777777778e-05, | |
| "loss": 1.0592, | |
| "step": 96300 | |
| }, | |
| { | |
| "epoch": 0.15423975321639485, | |
| "grad_norm": 88.10832977294922, | |
| "learning_rate": 1.8796302222222225e-05, | |
| "loss": 1.1661, | |
| "step": 96400 | |
| }, | |
| { | |
| "epoch": 0.15439975296039526, | |
| "grad_norm": 3.867802143096924, | |
| "learning_rate": 1.879274666666667e-05, | |
| "loss": 1.5968, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 0.15455975270439568, | |
| "grad_norm": 114.89385986328125, | |
| "learning_rate": 1.8789191111111114e-05, | |
| "loss": 1.2586, | |
| "step": 96600 | |
| }, | |
| { | |
| "epoch": 0.1547197524483961, | |
| "grad_norm": 0.009951179847121239, | |
| "learning_rate": 1.878563555555556e-05, | |
| "loss": 1.5164, | |
| "step": 96700 | |
| }, | |
| { | |
| "epoch": 0.1548797521923965, | |
| "grad_norm": 1.848288893699646, | |
| "learning_rate": 1.8782080000000003e-05, | |
| "loss": 1.5942, | |
| "step": 96800 | |
| }, | |
| { | |
| "epoch": 0.1550397519363969, | |
| "grad_norm": 56.26310348510742, | |
| "learning_rate": 1.8778524444444448e-05, | |
| "loss": 0.6635, | |
| "step": 96900 | |
| }, | |
| { | |
| "epoch": 0.15519975168039732, | |
| "grad_norm": 0.2863824963569641, | |
| "learning_rate": 1.8774968888888892e-05, | |
| "loss": 1.3037, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.15535975142439773, | |
| "grad_norm": 0.006738504860550165, | |
| "learning_rate": 1.8771413333333333e-05, | |
| "loss": 1.3557, | |
| "step": 97100 | |
| }, | |
| { | |
| "epoch": 0.15551975116839814, | |
| "grad_norm": 0.24526001513004303, | |
| "learning_rate": 1.876785777777778e-05, | |
| "loss": 1.0864, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 0.15567975091239855, | |
| "grad_norm": 70.70162963867188, | |
| "learning_rate": 1.8764302222222222e-05, | |
| "loss": 1.3139, | |
| "step": 97300 | |
| }, | |
| { | |
| "epoch": 0.15583975065639896, | |
| "grad_norm": 0.7548888921737671, | |
| "learning_rate": 1.8760746666666667e-05, | |
| "loss": 0.7139, | |
| "step": 97400 | |
| }, | |
| { | |
| "epoch": 0.15599975040039937, | |
| "grad_norm": 0.08793803304433823, | |
| "learning_rate": 1.875719111111111e-05, | |
| "loss": 1.1084, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 0.15615975014439976, | |
| "grad_norm": 8.044859886169434, | |
| "learning_rate": 1.8753635555555556e-05, | |
| "loss": 1.2294, | |
| "step": 97600 | |
| }, | |
| { | |
| "epoch": 0.15631974988840017, | |
| "grad_norm": 0.4635624587535858, | |
| "learning_rate": 1.875008e-05, | |
| "loss": 0.9581, | |
| "step": 97700 | |
| }, | |
| { | |
| "epoch": 0.15647974963240058, | |
| "grad_norm": 0.0022484343498945236, | |
| "learning_rate": 1.8746524444444445e-05, | |
| "loss": 1.2983, | |
| "step": 97800 | |
| }, | |
| { | |
| "epoch": 0.156639749376401, | |
| "grad_norm": 2.357697010040283, | |
| "learning_rate": 1.874296888888889e-05, | |
| "loss": 1.8281, | |
| "step": 97900 | |
| }, | |
| { | |
| "epoch": 0.1567997491204014, | |
| "grad_norm": 78.0554428100586, | |
| "learning_rate": 1.8739413333333334e-05, | |
| "loss": 1.2914, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.1569597488644018, | |
| "grad_norm": 0.6091700196266174, | |
| "learning_rate": 1.873585777777778e-05, | |
| "loss": 0.8656, | |
| "step": 98100 | |
| }, | |
| { | |
| "epoch": 0.15711974860840222, | |
| "grad_norm": 0.20535144209861755, | |
| "learning_rate": 1.8732302222222223e-05, | |
| "loss": 1.3438, | |
| "step": 98200 | |
| }, | |
| { | |
| "epoch": 0.15727974835240263, | |
| "grad_norm": 0.029342494904994965, | |
| "learning_rate": 1.8728746666666668e-05, | |
| "loss": 1.465, | |
| "step": 98300 | |
| }, | |
| { | |
| "epoch": 0.15743974809640304, | |
| "grad_norm": 0.20423032343387604, | |
| "learning_rate": 1.8725191111111112e-05, | |
| "loss": 1.2253, | |
| "step": 98400 | |
| }, | |
| { | |
| "epoch": 0.15759974784040345, | |
| "grad_norm": 0.020203936845064163, | |
| "learning_rate": 1.8721635555555557e-05, | |
| "loss": 1.3481, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 0.15775974758440386, | |
| "grad_norm": 0.001091059297323227, | |
| "learning_rate": 1.871808e-05, | |
| "loss": 1.5131, | |
| "step": 98600 | |
| }, | |
| { | |
| "epoch": 0.15791974732840428, | |
| "grad_norm": 42.3817253112793, | |
| "learning_rate": 1.8714524444444446e-05, | |
| "loss": 1.4852, | |
| "step": 98700 | |
| }, | |
| { | |
| "epoch": 0.15807974707240469, | |
| "grad_norm": 11.986414909362793, | |
| "learning_rate": 1.871096888888889e-05, | |
| "loss": 1.1317, | |
| "step": 98800 | |
| }, | |
| { | |
| "epoch": 0.1582397468164051, | |
| "grad_norm": 6.878232002258301, | |
| "learning_rate": 1.8707413333333335e-05, | |
| "loss": 1.0395, | |
| "step": 98900 | |
| }, | |
| { | |
| "epoch": 0.1583997465604055, | |
| "grad_norm": 0.011188351549208164, | |
| "learning_rate": 1.8703893333333333e-05, | |
| "loss": 0.9256, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.15855974630440592, | |
| "grad_norm": 0.03425045683979988, | |
| "learning_rate": 1.870033777777778e-05, | |
| "loss": 0.9774, | |
| "step": 99100 | |
| }, | |
| { | |
| "epoch": 0.15871974604840633, | |
| "grad_norm": 26.11473846435547, | |
| "learning_rate": 1.8696782222222222e-05, | |
| "loss": 0.9756, | |
| "step": 99200 | |
| }, | |
| { | |
| "epoch": 0.15887974579240674, | |
| "grad_norm": 0.0001582380209583789, | |
| "learning_rate": 1.869322666666667e-05, | |
| "loss": 1.4885, | |
| "step": 99300 | |
| }, | |
| { | |
| "epoch": 0.15903974553640715, | |
| "grad_norm": 0.1462339162826538, | |
| "learning_rate": 1.868967111111111e-05, | |
| "loss": 1.2373, | |
| "step": 99400 | |
| }, | |
| { | |
| "epoch": 0.15919974528040756, | |
| "grad_norm": 20.499425888061523, | |
| "learning_rate": 1.8686115555555556e-05, | |
| "loss": 1.3868, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 0.15935974502440797, | |
| "grad_norm": 89.42505645751953, | |
| "learning_rate": 1.868256e-05, | |
| "loss": 0.9238, | |
| "step": 99600 | |
| }, | |
| { | |
| "epoch": 0.15951974476840838, | |
| "grad_norm": 4.4118266105651855, | |
| "learning_rate": 1.8679004444444445e-05, | |
| "loss": 1.0793, | |
| "step": 99700 | |
| }, | |
| { | |
| "epoch": 0.15967974451240877, | |
| "grad_norm": 0.08320512622594833, | |
| "learning_rate": 1.867544888888889e-05, | |
| "loss": 1.2405, | |
| "step": 99800 | |
| }, | |
| { | |
| "epoch": 0.15983974425640918, | |
| "grad_norm": 1.5630072355270386, | |
| "learning_rate": 1.8671893333333334e-05, | |
| "loss": 1.2417, | |
| "step": 99900 | |
| }, | |
| { | |
| "epoch": 0.1599997440004096, | |
| "grad_norm": 1.7299790382385254, | |
| "learning_rate": 1.866833777777778e-05, | |
| "loss": 1.1264, | |
| "step": 100000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 625001, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 128, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |